aboutsummaryrefslogtreecommitdiff
path: root/data
diff options
context:
space:
mode:
authorYulian Kuncheff <670212+daegalus@users.noreply.github.com>2025-03-22 23:44:49 +0100
committerGitHub <noreply@github.com>2025-03-22 18:44:49 -0400
commit6156d3d7293a1757725b1d36a89a61ede1ffe850 (patch)
tree770109d49181f582fed54ca787ebf959791f1b56 /data
parentaf6f05554fe8da112599f30d32524c28a4078cac (diff)
downloadanubis-6156d3d7293a1757725b1d36a89a61ede1ffe850.tar.xz
anubis-6156d3d7293a1757725b1d36a89a61ede1ffe850.zip
Refactor and split out things into cmd and lib (#77)
* Refactor anubis to split business logic into a lib, and cmd to just be direct usage. * Post-rebase fixes. * Update changelog, remove unnecessary one. * lib: refactor this This is mostly based on my personal preferences for how Go code should be laid out. I'm not sold on the package name "lib" (I'd call it anubis but that would stutter), but people are probably gonna import it as libanubis so it's likely fine. Packages have been "flattened" to centralize implementation with area of concern. This goes against the Java-esque style that many people like, but I think this helps make things simple. Most notably: the dnsbl client (which is a hack) is an internal package until it's made more generic. Then it can be made external. I also fixed the logic such that `go generate` works and rebased on main. * internal/test: run tests iff npx exists and DONT_USE_NETWORK is not set Signed-off-by: Xe Iaso <me@xeiaso.net> * internal/test: install deps Signed-off-by: Xe Iaso <me@xeiaso.net> * .github/workflows: verbose go tests? Signed-off-by: Xe Iaso <me@xeiaso.net> * internal/test: sleep 2 Signed-off-by: Xe Iaso <me@xeiaso.net> * internal/test: nix this test so CI works Signed-off-by: Xe Iaso <me@xeiaso.net> * internal/test: warmup per browser? Signed-off-by: Xe Iaso <me@xeiaso.net> * internal/test: disable for now :( Signed-off-by: Xe Iaso <me@xeiaso.net> * lib/anubis: do not apply bot rules if address check fails Closes #83 --------- Signed-off-by: Xe Iaso <me@xeiaso.net> Co-authored-by: Xe Iaso <me@xeiaso.net>
Diffstat (limited to 'data')
-rw-r--r--data/botPolicies.json398
-rw-r--r--data/embed.go8
2 files changed, 406 insertions, 0 deletions
diff --git a/data/botPolicies.json b/data/botPolicies.json
new file mode 100644
index 0000000..0e33706
--- /dev/null
+++ b/data/botPolicies.json
@@ -0,0 +1,398 @@
+{
+ "bots": [
+ {
+ "name": "amazonbot",
+ "user_agent_regex": "Amazonbot",
+ "action": "DENY"
+ },
+ {
+ "name": "googlebot",
+ "user_agent_regex": "\\+http\\://www\\.google\\.com/bot\\.html",
+ "action": "ALLOW",
+ "remote_addresses": [
+ "2001:4860:4801:10::/64",
+ "2001:4860:4801:11::/64",
+ "2001:4860:4801:12::/64",
+ "2001:4860:4801:13::/64",
+ "2001:4860:4801:14::/64",
+ "2001:4860:4801:15::/64",
+ "2001:4860:4801:16::/64",
+ "2001:4860:4801:17::/64",
+ "2001:4860:4801:18::/64",
+ "2001:4860:4801:19::/64",
+ "2001:4860:4801:1a::/64",
+ "2001:4860:4801:1b::/64",
+ "2001:4860:4801:1c::/64",
+ "2001:4860:4801:1d::/64",
+ "2001:4860:4801:1e::/64",
+ "2001:4860:4801:1f::/64",
+ "2001:4860:4801:20::/64",
+ "2001:4860:4801:21::/64",
+ "2001:4860:4801:22::/64",
+ "2001:4860:4801:23::/64",
+ "2001:4860:4801:24::/64",
+ "2001:4860:4801:25::/64",
+ "2001:4860:4801:26::/64",
+ "2001:4860:4801:27::/64",
+ "2001:4860:4801:28::/64",
+ "2001:4860:4801:29::/64",
+ "2001:4860:4801:2::/64",
+ "2001:4860:4801:2a::/64",
+ "2001:4860:4801:2b::/64",
+ "2001:4860:4801:2c::/64",
+ "2001:4860:4801:2d::/64",
+ "2001:4860:4801:2e::/64",
+ "2001:4860:4801:2f::/64",
+ "2001:4860:4801:31::/64",
+ "2001:4860:4801:32::/64",
+ "2001:4860:4801:33::/64",
+ "2001:4860:4801:34::/64",
+ "2001:4860:4801:35::/64",
+ "2001:4860:4801:36::/64",
+ "2001:4860:4801:37::/64",
+ "2001:4860:4801:38::/64",
+ "2001:4860:4801:39::/64",
+ "2001:4860:4801:3a::/64",
+ "2001:4860:4801:3b::/64",
+ "2001:4860:4801:3c::/64",
+ "2001:4860:4801:3d::/64",
+ "2001:4860:4801:3e::/64",
+ "2001:4860:4801:40::/64",
+ "2001:4860:4801:41::/64",
+ "2001:4860:4801:42::/64",
+ "2001:4860:4801:43::/64",
+ "2001:4860:4801:44::/64",
+ "2001:4860:4801:45::/64",
+ "2001:4860:4801:46::/64",
+ "2001:4860:4801:47::/64",
+ "2001:4860:4801:48::/64",
+ "2001:4860:4801:49::/64",
+ "2001:4860:4801:4a::/64",
+ "2001:4860:4801:4b::/64",
+ "2001:4860:4801:4c::/64",
+ "2001:4860:4801:50::/64",
+ "2001:4860:4801:51::/64",
+ "2001:4860:4801:52::/64",
+ "2001:4860:4801:53::/64",
+ "2001:4860:4801:54::/64",
+ "2001:4860:4801:55::/64",
+ "2001:4860:4801:56::/64",
+ "2001:4860:4801:60::/64",
+ "2001:4860:4801:61::/64",
+ "2001:4860:4801:62::/64",
+ "2001:4860:4801:63::/64",
+ "2001:4860:4801:64::/64",
+ "2001:4860:4801:65::/64",
+ "2001:4860:4801:66::/64",
+ "2001:4860:4801:67::/64",
+ "2001:4860:4801:68::/64",
+ "2001:4860:4801:69::/64",
+ "2001:4860:4801:6a::/64",
+ "2001:4860:4801:6b::/64",
+ "2001:4860:4801:6c::/64",
+ "2001:4860:4801:6d::/64",
+ "2001:4860:4801:6e::/64",
+ "2001:4860:4801:6f::/64",
+ "2001:4860:4801:70::/64",
+ "2001:4860:4801:71::/64",
+ "2001:4860:4801:72::/64",
+ "2001:4860:4801:73::/64",
+ "2001:4860:4801:74::/64",
+ "2001:4860:4801:75::/64",
+ "2001:4860:4801:76::/64",
+ "2001:4860:4801:77::/64",
+ "2001:4860:4801:78::/64",
+ "2001:4860:4801:79::/64",
+ "2001:4860:4801:80::/64",
+ "2001:4860:4801:81::/64",
+ "2001:4860:4801:82::/64",
+ "2001:4860:4801:83::/64",
+ "2001:4860:4801:84::/64",
+ "2001:4860:4801:85::/64",
+ "2001:4860:4801:86::/64",
+ "2001:4860:4801:87::/64",
+ "2001:4860:4801:88::/64",
+ "2001:4860:4801:90::/64",
+ "2001:4860:4801:91::/64",
+ "2001:4860:4801:92::/64",
+ "2001:4860:4801:93::/64",
+ "2001:4860:4801:94::/64",
+ "2001:4860:4801:95::/64",
+ "2001:4860:4801:96::/64",
+ "2001:4860:4801:a0::/64",
+ "2001:4860:4801:a1::/64",
+ "2001:4860:4801:a2::/64",
+ "2001:4860:4801:a3::/64",
+ "2001:4860:4801:a4::/64",
+ "2001:4860:4801:a5::/64",
+ "2001:4860:4801:c::/64",
+ "2001:4860:4801:f::/64",
+ "192.178.5.0/27",
+ "192.178.6.0/27",
+ "192.178.6.128/27",
+ "192.178.6.160/27",
+ "192.178.6.192/27",
+ "192.178.6.32/27",
+ "192.178.6.64/27",
+ "192.178.6.96/27",
+ "34.100.182.96/28",
+ "34.101.50.144/28",
+ "34.118.254.0/28",
+ "34.118.66.0/28",
+ "34.126.178.96/28",
+ "34.146.150.144/28",
+ "34.147.110.144/28",
+ "34.151.74.144/28",
+ "34.152.50.64/28",
+ "34.154.114.144/28",
+ "34.155.98.32/28",
+ "34.165.18.176/28",
+ "34.175.160.64/28",
+ "34.176.130.16/28",
+ "34.22.85.0/27",
+ "34.64.82.64/28",
+ "34.65.242.112/28",
+ "34.80.50.80/28",
+ "34.88.194.0/28",
+ "34.89.10.80/28",
+ "34.89.198.80/28",
+ "34.96.162.48/28",
+ "35.247.243.240/28",
+ "66.249.64.0/27",
+ "66.249.64.128/27",
+ "66.249.64.160/27",
+ "66.249.64.224/27",
+ "66.249.64.32/27",
+ "66.249.64.64/27",
+ "66.249.64.96/27",
+ "66.249.65.0/27",
+ "66.249.65.128/27",
+ "66.249.65.160/27",
+ "66.249.65.192/27",
+ "66.249.65.224/27",
+ "66.249.65.32/27",
+ "66.249.65.64/27",
+ "66.249.65.96/27",
+ "66.249.66.0/27",
+ "66.249.66.128/27",
+ "66.249.66.160/27",
+ "66.249.66.192/27",
+ "66.249.66.224/27",
+ "66.249.66.32/27",
+ "66.249.66.64/27",
+ "66.249.66.96/27",
+ "66.249.68.0/27",
+ "66.249.68.128/27",
+ "66.249.68.32/27",
+ "66.249.68.64/27",
+ "66.249.68.96/27",
+ "66.249.69.0/27",
+ "66.249.69.128/27",
+ "66.249.69.160/27",
+ "66.249.69.192/27",
+ "66.249.69.224/27",
+ "66.249.69.32/27",
+ "66.249.69.64/27",
+ "66.249.69.96/27",
+ "66.249.70.0/27",
+ "66.249.70.128/27",
+ "66.249.70.160/27",
+ "66.249.70.192/27",
+ "66.249.70.224/27",
+ "66.249.70.32/27",
+ "66.249.70.64/27",
+ "66.249.70.96/27",
+ "66.249.71.0/27",
+ "66.249.71.128/27",
+ "66.249.71.160/27",
+ "66.249.71.192/27",
+ "66.249.71.224/27",
+ "66.249.71.32/27",
+ "66.249.71.64/27",
+ "66.249.71.96/27",
+ "66.249.72.0/27",
+ "66.249.72.128/27",
+ "66.249.72.160/27",
+ "66.249.72.192/27",
+ "66.249.72.224/27",
+ "66.249.72.32/27",
+ "66.249.72.64/27",
+ "66.249.72.96/27",
+ "66.249.73.0/27",
+ "66.249.73.128/27",
+ "66.249.73.160/27",
+ "66.249.73.192/27",
+ "66.249.73.224/27",
+ "66.249.73.32/27",
+ "66.249.73.64/27",
+ "66.249.73.96/27",
+ "66.249.74.0/27",
+ "66.249.74.128/27",
+ "66.249.74.160/27",
+ "66.249.74.192/27",
+ "66.249.74.32/27",
+ "66.249.74.64/27",
+ "66.249.74.96/27",
+ "66.249.75.0/27",
+ "66.249.75.128/27",
+ "66.249.75.160/27",
+ "66.249.75.192/27",
+ "66.249.75.224/27",
+ "66.249.75.32/27",
+ "66.249.75.64/27",
+ "66.249.75.96/27",
+ "66.249.76.0/27",
+ "66.249.76.128/27",
+ "66.249.76.160/27",
+ "66.249.76.192/27",
+ "66.249.76.224/27",
+ "66.249.76.32/27",
+ "66.249.76.64/27",
+ "66.249.76.96/27",
+ "66.249.77.0/27",
+ "66.249.77.128/27",
+ "66.249.77.160/27",
+ "66.249.77.192/27",
+ "66.249.77.224/27",
+ "66.249.77.32/27",
+ "66.249.77.64/27",
+ "66.249.77.96/27",
+ "66.249.78.0/27",
+ "66.249.78.32/27",
+ "66.249.79.0/27",
+ "66.249.79.128/27",
+ "66.249.79.160/27",
+ "66.249.79.192/27",
+ "66.249.79.224/27",
+ "66.249.79.32/27",
+ "66.249.79.64/27",
+ "66.249.79.96/27"
+ ]
+ },
+ {
+ "name": "bingbot",
+ "user_agent_regex": "\\+http\\://www\\.bing\\.com/bingbot\\.htm",
+ "action": "ALLOW",
+ "remote_addresses": [
+ "157.55.39.0/24",
+ "207.46.13.0/24",
+ "40.77.167.0/24",
+ "13.66.139.0/24",
+ "13.66.144.0/24",
+ "52.167.144.0/24",
+ "13.67.10.16/28",
+ "13.69.66.240/28",
+ "13.71.172.224/28",
+ "139.217.52.0/28",
+ "191.233.204.224/28",
+ "20.36.108.32/28",
+ "20.43.120.16/28",
+ "40.79.131.208/28",
+ "40.79.186.176/28",
+ "52.231.148.0/28",
+ "20.79.107.240/28",
+ "51.105.67.0/28",
+ "20.125.163.80/28",
+ "40.77.188.0/22",
+ "65.55.210.0/24",
+ "199.30.24.0/23",
+ "40.77.202.0/24",
+ "40.77.139.0/25",
+ "20.74.197.0/28",
+ "20.15.133.160/27",
+ "40.77.177.0/24",
+ "40.77.178.0/23"
+ ]
+ },
+ {
+ "name": "qwantbot",
+ "user_agent_regex": "\\+https\\://help\\.qwant\\.com/bot/",
+ "action": "ALLOW",
+ "remote_addresses": [
+ "91.242.162.0/24"
+ ]
+ },
+ {
+ "name": "kagibot",
+ "user_agent_regex": "\\+https\\://kagi\\.com/bot",
+ "action": "ALLOW",
+ "remote_addresses": [
+ "216.18.205.234/32",
+ "35.212.27.76/32",
+ "104.254.65.50/32",
+ "209.151.156.194/32"
+ ]
+ },
+ {
+ "name": "marginalia",
+ "user_agent_regex": "search\\.marginalia\\.nu",
+ "action": "ALLOW",
+ "remote_addresses": [
+ "193.183.0.162/31",
+ "193.183.0.164/30",
+ "193.183.0.168/30",
+ "193.183.0.172/31",
+ "193.183.0.174/32"
+ ]
+ },
+ {
+ "name": "mojeekbot",
+ "user_agent_regex": "http\\://www\\.mojeek\\.com/bot\\.html",
+ "action": "ALLOW",
+ "remote_addresses": [
+ "5.102.173.71/32"
+ ]
+ },
+ {
+ "name": "us-artificial-intelligence-scraper",
+ "user_agent_regex": "\\+https\\://github\\.com/US-Artificial-Intelligence/scraper",
+ "action": "DENY"
+ },
+ {
+ "name": "well-known",
+ "path_regex": "^/.well-known/.*$",
+ "action": "ALLOW"
+ },
+ {
+ "name": "favicon",
+ "path_regex": "^/favicon.ico$",
+ "action": "ALLOW"
+ },
+ {
+ "name": "robots-txt",
+ "path_regex": "^/robots.txt$",
+ "action": "ALLOW"
+ },
+ {
+ "name": "lightpanda",
+ "user_agent_regex": "^Lightpanda/.*$",
+ "action": "DENY"
+ },
+ {
+ "name": "headless-chrome",
+ "user_agent_regex": "HeadlessChrome",
+ "action": "DENY"
+ },
+ {
+ "name": "headless-chromium",
+ "user_agent_regex": "HeadlessChromium",
+ "action": "DENY"
+ },
+ {
+ "name": "generic-bot-catchall",
+ "user_agent_regex": "(?i:bot|crawler)",
+ "action": "CHALLENGE",
+ "challenge": {
+ "difficulty": 16,
+ "report_as": 4,
+ "algorithm": "slow"
+ }
+ },
+ {
+ "name": "generic-browser",
+ "user_agent_regex": "Mozilla",
+ "action": "CHALLENGE"
+ }
+ ],
+ "dnsbl": true
+}
diff --git a/data/embed.go b/data/embed.go
new file mode 100644
index 0000000..5a5f4d2
--- /dev/null
+++ b/data/embed.go
@@ -0,0 +1,8 @@
+package data
+
+import "embed"
+
+var (
+ //go:embed botPolicies.json
+ BotPolicies embed.FS
+)