aboutsummaryrefslogtreecommitdiff
path: root/data/botPolicies.yaml
diff options
context:
space:
mode:
authorXe Iaso <me@xeiaso.net>2025-04-23 07:01:28 -0400
committerGitHub <noreply@github.com>2025-04-23 07:01:28 -0400
commit74e11505c6133ee1107811e81a0fd53e1d7876dd (patch)
tree9169e3fccc32657a9a84358bf7e6d7779fa704df /data/botPolicies.yaml
parent4e2c9de7085fbc8e5abe8d0659d807881d69769c (diff)
downloadanubis-74e11505c6133ee1107811e81a0fd53e1d7876dd.tar.xz
anubis-74e11505c6133ee1107811e81a0fd53e1d7876dd.zip
feat: enable loading config fragments (#321)
* feat(config): support importing bot policy snippets This changes the grammar of the Anubis bot policy config to allow importing from internal shared rules or external rules on the filesystem. This lets you create a file at `/data/policies/block-evilbot.yaml` and then import it with: ```yaml bots: - import: /data/policies/block-evilbot.yaml ``` This also explodes the default policy file into a bunch of composable snippets. Thank you @Aibrew for your example gitea Atom / RSS feed rules! Signed-off-by: Xe Iaso <me@xeiaso.net> * fix(data): update botPolicies.json to use imports Signed-off-by: Xe Iaso <me@xeiaso.net> * fix(cmd/anubis): extract bot policies with --extract-resources This allows a user that doesn't have anything but the Anubis binary to figure out what the default configuration does. * docs(data/botPolices.yaml): document import syntax in-line Signed-off-by: Xe Iaso <me@xeiaso.net> * fix(lib/policy): better test importing from JSON snippets Signed-off-by: Xe Iaso <me@xeiaso.net> * docs(admin): Add import syntax documentation This documents the import syntax and is based on the block comment at the top of the default bot policy file. * docs(changelog): add note about importing snippets Signed-off-by: Xe Iaso <me@xeiaso.net> * style(lib/policy/config): use an error value instead of an inline error Signed-off-by: Xe Iaso <me@xeiaso.net> --------- Signed-off-by: Xe Iaso <me@xeiaso.net>
Diffstat (limited to 'data/botPolicies.yaml')
-rw-r--r--data/botPolicies.yaml672
1 files changed, 30 insertions, 642 deletions
diff --git a/data/botPolicies.yaml b/data/botPolicies.yaml
index cb4715a..585be15 100644
--- a/data/botPolicies.yaml
+++ b/data/botPolicies.yaml
@@ -1,651 +1,38 @@
+## Anubis has the ability to let you import snippets of configuration into the main
+## configuration file. This allows you to break up your config into smaller parts
+## that get logically assembled into one big file.
+##
+## Of note, a bot rule can either have inline bot configuration or import a
+## bot config snippet. You cannot do both in a single bot rule.
+##
+## Import paths can either be prefixed with (data) to import from the common/shared
+## rules in the data folder in the Anubis source tree or will point to absolute/relative
+## paths in your filesystem. If you don't have access to the Anubis source tree, check
+## /usr/share/docs/anubis/data or in the tarball you extracted Anubis from.
+
bots:
# Pathological bots to deny
-- name: us-artificial-intelligence-scraper
- user_agent_regex: \+https\://github\.com/US-Artificial-Intelligence/scraper
- action: DENY
-- name: lightpanda
- user_agent_regex: ^LightPanda/.*$
- action: DENY
-- name: headless-chrome
- user_agent_regex: HeadlessChrome
- action: DENY
-- name: headless-chromium
- user_agent_regex: HeadlessChromium
- action: DENY
-- name: "ai-robots-txt"
- user_agent_regex: >
- AI2Bot|Ai2Bot-Dolma|Amazonbot|anthropic-ai|Applebot|Applebot-Extended|Brightbot 1.0|Bytespider|CCBot|ChatGPT-User|Claude-Web|ClaudeBot|cohere-ai|cohere-training-data-crawler|Crawlspace|Diffbot|DuckAssistBot|FacebookBot|FriendlyCrawler|Google-Extended|GoogleOther|GoogleOther-Image|GoogleOther-Video|GPTBot|iaskspider/2.0|ICC-Crawler|ImagesiftBot|img2dataset|ISSCyberRiskCrawler|Kangaroo Bot|Meta-ExternalAgent|Meta-ExternalFetcher|OAI-SearchBot|omgili|omgilibot|PanguBot|Perplexity-User|PerplexityBot|PetalBot|Scrapy|SemrushBot-OCOB|SemrushBot-SWA|Sidetrade indexer bot|Timpibot|VelenPublicWebCrawler|Webzio-Extended|YouBot
- action: DENY
-- name: cloudflare-workers
- headers_regex:
- CF-Worker: .*
- action: DENY
+- # This correlates to data/bots/ai-robots-txt.yaml in the source tree
+ import: (data)/bots/ai-robots-txt.yaml
+- import: (data)/bots/cloudflare-workers.yaml
+- import: (data)/bots/headless-browsers.yaml
+- import: (data)/bots/us-ai-scraper.yaml
-# search engines to allow
-- name: googlebot
- user_agent_regex: \+http\://www\.google\.com/bot\.html
- action: ALLOW
- # https://developers.google.com/static/search/apis/ipranges/googlebot.json
- remote_addresses: [
- "2001:4860:4801:10::/64",
- "2001:4860:4801:11::/64",
- "2001:4860:4801:12::/64",
- "2001:4860:4801:13::/64",
- "2001:4860:4801:14::/64",
- "2001:4860:4801:15::/64",
- "2001:4860:4801:16::/64",
- "2001:4860:4801:17::/64",
- "2001:4860:4801:18::/64",
- "2001:4860:4801:19::/64",
- "2001:4860:4801:1a::/64",
- "2001:4860:4801:1b::/64",
- "2001:4860:4801:1c::/64",
- "2001:4860:4801:1d::/64",
- "2001:4860:4801:1e::/64",
- "2001:4860:4801:1f::/64",
- "2001:4860:4801:20::/64",
- "2001:4860:4801:21::/64",
- "2001:4860:4801:22::/64",
- "2001:4860:4801:23::/64",
- "2001:4860:4801:24::/64",
- "2001:4860:4801:25::/64",
- "2001:4860:4801:26::/64",
- "2001:4860:4801:27::/64",
- "2001:4860:4801:28::/64",
- "2001:4860:4801:29::/64",
- "2001:4860:4801:2::/64",
- "2001:4860:4801:2a::/64",
- "2001:4860:4801:2b::/64",
- "2001:4860:4801:2c::/64",
- "2001:4860:4801:2d::/64",
- "2001:4860:4801:2e::/64",
- "2001:4860:4801:2f::/64",
- "2001:4860:4801:31::/64",
- "2001:4860:4801:32::/64",
- "2001:4860:4801:33::/64",
- "2001:4860:4801:34::/64",
- "2001:4860:4801:35::/64",
- "2001:4860:4801:36::/64",
- "2001:4860:4801:37::/64",
- "2001:4860:4801:38::/64",
- "2001:4860:4801:39::/64",
- "2001:4860:4801:3a::/64",
- "2001:4860:4801:3b::/64",
- "2001:4860:4801:3c::/64",
- "2001:4860:4801:3d::/64",
- "2001:4860:4801:3e::/64",
- "2001:4860:4801:40::/64",
- "2001:4860:4801:41::/64",
- "2001:4860:4801:42::/64",
- "2001:4860:4801:43::/64",
- "2001:4860:4801:44::/64",
- "2001:4860:4801:45::/64",
- "2001:4860:4801:46::/64",
- "2001:4860:4801:47::/64",
- "2001:4860:4801:48::/64",
- "2001:4860:4801:49::/64",
- "2001:4860:4801:4a::/64",
- "2001:4860:4801:4b::/64",
- "2001:4860:4801:4c::/64",
- "2001:4860:4801:50::/64",
- "2001:4860:4801:51::/64",
- "2001:4860:4801:52::/64",
- "2001:4860:4801:53::/64",
- "2001:4860:4801:54::/64",
- "2001:4860:4801:55::/64",
- "2001:4860:4801:56::/64",
- "2001:4860:4801:60::/64",
- "2001:4860:4801:61::/64",
- "2001:4860:4801:62::/64",
- "2001:4860:4801:63::/64",
- "2001:4860:4801:64::/64",
- "2001:4860:4801:65::/64",
- "2001:4860:4801:66::/64",
- "2001:4860:4801:67::/64",
- "2001:4860:4801:68::/64",
- "2001:4860:4801:69::/64",
- "2001:4860:4801:6a::/64",
- "2001:4860:4801:6b::/64",
- "2001:4860:4801:6c::/64",
- "2001:4860:4801:6d::/64",
- "2001:4860:4801:6e::/64",
- "2001:4860:4801:6f::/64",
- "2001:4860:4801:70::/64",
- "2001:4860:4801:71::/64",
- "2001:4860:4801:72::/64",
- "2001:4860:4801:73::/64",
- "2001:4860:4801:74::/64",
- "2001:4860:4801:75::/64",
- "2001:4860:4801:76::/64",
- "2001:4860:4801:77::/64",
- "2001:4860:4801:78::/64",
- "2001:4860:4801:79::/64",
- "2001:4860:4801:80::/64",
- "2001:4860:4801:81::/64",
- "2001:4860:4801:82::/64",
- "2001:4860:4801:83::/64",
- "2001:4860:4801:84::/64",
- "2001:4860:4801:85::/64",
- "2001:4860:4801:86::/64",
- "2001:4860:4801:87::/64",
- "2001:4860:4801:88::/64",
- "2001:4860:4801:90::/64",
- "2001:4860:4801:91::/64",
- "2001:4860:4801:92::/64",
- "2001:4860:4801:93::/64",
- "2001:4860:4801:94::/64",
- "2001:4860:4801:95::/64",
- "2001:4860:4801:96::/64",
- "2001:4860:4801:a0::/64",
- "2001:4860:4801:a1::/64",
- "2001:4860:4801:a2::/64",
- "2001:4860:4801:a3::/64",
- "2001:4860:4801:a4::/64",
- "2001:4860:4801:a5::/64",
- "2001:4860:4801:c::/64",
- "2001:4860:4801:f::/64",
- "192.178.5.0/27",
- "192.178.6.0/27",
- "192.178.6.128/27",
- "192.178.6.160/27",
- "192.178.6.192/27",
- "192.178.6.32/27",
- "192.178.6.64/27",
- "192.178.6.96/27",
- "34.100.182.96/28",
- "34.101.50.144/28",
- "34.118.254.0/28",
- "34.118.66.0/28",
- "34.126.178.96/28",
- "34.146.150.144/28",
- "34.147.110.144/28",
- "34.151.74.144/28",
- "34.152.50.64/28",
- "34.154.114.144/28",
- "34.155.98.32/28",
- "34.165.18.176/28",
- "34.175.160.64/28",
- "34.176.130.16/28",
- "34.22.85.0/27",
- "34.64.82.64/28",
- "34.65.242.112/28",
- "34.80.50.80/28",
- "34.88.194.0/28",
- "34.89.10.80/28",
- "34.89.198.80/28",
- "34.96.162.48/28",
- "35.247.243.240/28",
- "66.249.64.0/27",
- "66.249.64.128/27",
- "66.249.64.160/27",
- "66.249.64.224/27",
- "66.249.64.32/27",
- "66.249.64.64/27",
- "66.249.64.96/27",
- "66.249.65.0/27",
- "66.249.65.128/27",
- "66.249.65.160/27",
- "66.249.65.192/27",
- "66.249.65.224/27",
- "66.249.65.32/27",
- "66.249.65.64/27",
- "66.249.65.96/27",
- "66.249.66.0/27",
- "66.249.66.128/27",
- "66.249.66.160/27",
- "66.249.66.192/27",
- "66.249.66.224/27",
- "66.249.66.32/27",
- "66.249.66.64/27",
- "66.249.66.96/27",
- "66.249.68.0/27",
- "66.249.68.128/27",
- "66.249.68.32/27",
- "66.249.68.64/27",
- "66.249.68.96/27",
- "66.249.69.0/27",
- "66.249.69.128/27",
- "66.249.69.160/27",
- "66.249.69.192/27",
- "66.249.69.224/27",
- "66.249.69.32/27",
- "66.249.69.64/27",
- "66.249.69.96/27",
- "66.249.70.0/27",
- "66.249.70.128/27",
- "66.249.70.160/27",
- "66.249.70.192/27",
- "66.249.70.224/27",
- "66.249.70.32/27",
- "66.249.70.64/27",
- "66.249.70.96/27",
- "66.249.71.0/27",
- "66.249.71.128/27",
- "66.249.71.160/27",
- "66.249.71.192/27",
- "66.249.71.224/27",
- "66.249.71.32/27",
- "66.249.71.64/27",
- "66.249.71.96/27",
- "66.249.72.0/27",
- "66.249.72.128/27",
- "66.249.72.160/27",
- "66.249.72.192/27",
- "66.249.72.224/27",
- "66.249.72.32/27",
- "66.249.72.64/27",
- "66.249.72.96/27",
- "66.249.73.0/27",
- "66.249.73.128/27",
- "66.249.73.160/27",
- "66.249.73.192/27",
- "66.249.73.224/27",
- "66.249.73.32/27",
- "66.249.73.64/27",
- "66.249.73.96/27",
- "66.249.74.0/27",
- "66.249.74.128/27",
- "66.249.74.160/27",
- "66.249.74.192/27",
- "66.249.74.32/27",
- "66.249.74.64/27",
- "66.249.74.96/27",
- "66.249.75.0/27",
- "66.249.75.128/27",
- "66.249.75.160/27",
- "66.249.75.192/27",
- "66.249.75.224/27",
- "66.249.75.32/27",
- "66.249.75.64/27",
- "66.249.75.96/27",
- "66.249.76.0/27",
- "66.249.76.128/27",
- "66.249.76.160/27",
- "66.249.76.192/27",
- "66.249.76.224/27",
- "66.249.76.32/27",
- "66.249.76.64/27",
- "66.249.76.96/27",
- "66.249.77.0/27",
- "66.249.77.128/27",
- "66.249.77.160/27",
- "66.249.77.192/27",
- "66.249.77.224/27",
- "66.249.77.32/27",
- "66.249.77.64/27",
- "66.249.77.96/27",
- "66.249.78.0/27",
- "66.249.78.32/27",
- "66.249.79.0/27",
- "66.249.79.128/27",
- "66.249.79.160/27",
- "66.249.79.192/27",
- "66.249.79.224/27",
- "66.249.79.32/27",
- "66.249.79.64/27",
- "66.249.79.96/27"
- ]
-- name: bingbot
- user_agent_regex: \+http\://www\.bing\.com/bingbot\.htm
- action: ALLOW
- # https://www.bing.com/toolbox/bingbot.json
- remote_addresses: [
- "157.55.39.0/24",
- "207.46.13.0/24",
- "40.77.167.0/24",
- "13.66.139.0/24",
- "13.66.144.0/24",
- "52.167.144.0/24",
- "13.67.10.16/28",
- "13.69.66.240/28",
- "13.71.172.224/28",
- "139.217.52.0/28",
- "191.233.204.224/28",
- "20.36.108.32/28",
- "20.43.120.16/28",
- "40.79.131.208/28",
- "40.79.186.176/28",
- "52.231.148.0/28",
- "20.79.107.240/28",
- "51.105.67.0/28",
- "20.125.163.80/28",
- "40.77.188.0/22",
- "65.55.210.0/24",
- "199.30.24.0/23",
- "40.77.202.0/24",
- "40.77.139.0/25",
- "20.74.197.0/28",
- "20.15.133.160/27",
- "40.77.177.0/24",
- "40.77.178.0/23"
- ]
-- name: duckduckbot
- user_agent_regex: DuckDuckBot/1\.1; \(\+http\://duckduckgo\.com/duckduckbot\.html\)
- action: ALLOW
- # https://duckduckgo.com/duckduckgo-help-pages/results/duckduckbot
- remote_addresses: [
- "57.152.72.128/32",
- "51.8.253.152/32",
- "40.80.242.63/32",
- "20.12.141.99/32",
- "20.49.136.28/32",
- "51.116.131.221/32",
- "51.107.40.209/32",
- "20.40.133.240/32",
- "20.50.168.91/32",
- "51.120.48.122/32",
- "20.193.45.113/32",
- "40.76.173.151/32",
- "40.76.163.7/32",
- "20.185.79.47/32",
- "52.142.26.175/32",
- "20.185.79.15/32",
- "52.142.24.149/32",
- "40.76.162.208/32",
- "40.76.163.23/32",
- "40.76.162.191/32",
- "40.76.162.247/32",
- "40.88.21.235/32",
- "20.191.45.212/32",
- "52.146.59.12/32",
- "52.146.59.156/32",
- "52.146.59.154/32",
- "52.146.58.236/32",
- "20.62.224.44/32",
- "51.104.180.53/32",
- "51.104.180.47/32",
- "51.104.180.26/32",
- "51.104.146.225/32",
- "51.104.146.235/32",
- "20.73.202.147/32",
- "20.73.132.240/32",
- "20.71.12.143/32",
- "20.56.197.58/32",
- "20.56.197.63/32",
- "20.43.150.93/32",
- "20.43.150.85/32",
- "20.44.222.1/32",
- "40.89.243.175/32",
- "13.89.106.77/32",
- "52.143.242.6/32",
- "52.143.241.111/32",
- "52.154.60.82/32",
- "20.197.209.11/32",
- "20.197.209.27/32",
- "20.226.133.105/32",
- "191.234.216.4/32",
- "191.234.216.178/32",
- "20.53.92.211/32",
- "20.53.91.2/32",
- "20.207.99.197/32",
- "20.207.97.190/32",
- "40.81.250.205/32",
- "40.64.106.11/32",
- "40.64.105.247/32",
- "20.72.242.93/32",
- "20.99.255.235/32",
- "20.113.3.121/32",
- "52.224.16.221/32",
- "52.224.21.53/32",
- "52.224.20.204/32",
- "52.224.21.19/32",
- "52.224.20.249/32",
- "52.224.20.203/32",
- "52.224.20.190/32",
- "52.224.16.229/32",
- "52.224.21.20/32",
- "52.146.63.80/32",
- "52.224.20.227/32",
- "52.224.20.193/32",
- "52.190.37.160/32",
- "52.224.21.23/32",
- "52.224.20.223/32",
- "52.224.20.181/32",
- "52.224.21.49/32",
- "52.224.21.55/32",
- "52.224.21.61/32",
- "52.224.19.152/32",
- "52.224.20.186/32",
- "52.224.21.27/32",
- "52.224.21.51/32",
- "52.224.20.174/32",
- "52.224.21.4/32",
- "51.104.164.109/32",
- "51.104.167.71/32",
- "51.104.160.177/32",
- "51.104.162.149/32",
- "51.104.167.95/32",
- "51.104.167.54/32",
- "51.104.166.111/32",
- "51.104.167.88/32",
- "51.104.161.32/32",
- "51.104.163.250/32",
- "51.104.164.189/32",
- "51.104.167.19/32",
- "51.104.160.167/32",
- "51.104.167.110/32",
- "20.191.44.119/32",
- "51.104.167.104/32",
- "20.191.44.234/32",
- "51.104.164.215/32",
- "51.104.167.52/32",
- "20.191.44.22/32",
- "51.104.167.87/32",
- "51.104.167.96/32",
- "20.191.44.16/32",
- "51.104.167.61/32",
- "51.104.164.147/32",
- "20.50.48.159/32",
- "40.114.182.172/32",
- "20.50.50.130/32",
- "20.50.50.163/32",
- "20.50.50.46/32",
- "40.114.182.153/32",
- "20.50.50.118/32",
- "20.50.49.55/32",
- "20.50.49.25/32",
- "40.114.183.251/32",
- "20.50.50.123/32",
- "20.50.49.237/32",
- "20.50.48.192/32",
- "20.50.50.134/32",
- "51.138.90.233/32",
- "40.114.183.196/32",
- "20.50.50.146/32",
- "40.114.183.88/32",
- "20.50.50.145/32",
- "20.50.50.121/32",
- "20.50.49.40/32",
- "51.138.90.206/32",
- "40.114.182.45/32",
- "51.138.90.161/32",
- "20.50.49.0/32",
- "40.119.232.215/32",
- "104.43.55.167/32",
- "40.119.232.251/32",
- "40.119.232.50/32",
- "40.119.232.146/32",
- "40.119.232.218/32",
- "104.43.54.127/32",
- "104.43.55.117/32",
- "104.43.55.116/32",
- "104.43.55.166/32",
- "52.154.169.50/32",
- "52.154.171.70/32",
- "52.154.170.229/32",
- "52.154.170.113/32",
- "52.154.171.44/32",
- "52.154.172.2/32",
- "52.143.244.81/32",
- "52.154.171.87/32",
- "52.154.171.250/32",
- "52.154.170.28/32",
- "52.154.170.122/32",
- "52.143.243.117/32",
- "52.143.247.235/32",
- "52.154.171.235/32",
- "52.154.171.196/32",
- "52.154.171.0/32",
- "52.154.170.243/32",
- "52.154.170.26/32",
- "52.154.169.200/32",
- "52.154.170.96/32",
- "52.154.170.88/32",
- "52.154.171.150/32",
- "52.154.171.205/32",
- "52.154.170.117/32",
- "52.154.170.209/32",
- "191.235.202.48/32",
- "191.233.3.202/32",
- "191.235.201.214/32",
- "191.233.3.197/32",
- "191.235.202.38/32",
- "20.53.78.144/32",
- "20.193.24.10/32",
- "20.53.78.236/32",
- "20.53.78.138/32",
- "20.53.78.123/32",
- "20.53.78.106/32",
- "20.193.27.215/32",
- "20.193.25.197/32",
- "20.193.12.126/32",
- "20.193.24.251/32",
- "20.204.242.101/32",
- "20.207.72.113/32",
- "20.204.242.19/32",
- "20.219.45.67/32",
- "20.207.72.11/32",
- "20.219.45.190/32",
- "20.204.243.55/32",
- "20.204.241.148/32",
- "20.207.72.110/32",
- "20.204.240.172/32",
- "20.207.72.21/32",
- "20.204.246.81/32",
- "20.207.107.181/32",
- "20.204.246.254/32",
- "20.219.43.246/32",
- "52.149.25.43/32",
- "52.149.61.51/32",
- "52.149.58.139/32",
- "52.149.60.38/32",
- "52.148.165.38/32",
- "52.143.95.162/32",
- "52.149.56.151/32",
- "52.149.30.45/32",
- "52.149.58.173/32",
- "52.143.95.204/32",
- "52.149.28.83/32",
- "52.149.58.69/32",
- "52.148.161.87/32",
- "52.149.58.27/32",
- "52.149.28.18/32",
- "20.79.226.26/32",
- "20.79.239.66/32",
- "20.79.238.198/32",
- "20.113.14.159/32",
- "20.75.144.152/32",
- "20.43.172.120/32",
- "20.53.134.160/32",
- "20.201.15.208/32",
- "20.93.28.24/32",
- "20.61.34.40/32",
- "52.242.224.168/32",
- "20.80.129.80/32",
- "20.195.108.47/32",
- "4.195.133.120/32",
- "4.228.76.163/32",
- "4.182.131.108/32",
- "4.209.224.56/32",
- "108.141.83.74/32",
- "4.213.46.14/32",
- "172.169.17.165/32",
- "51.8.71.117/32",
- "20.3.1.178/32",
- "52.149.56.151/32",
- "52.149.30.45/32",
- "52.149.58.173/32",
- "52.143.95.204/32",
- "52.149.28.83/32",
- "52.149.58.69/32",
- "52.148.161.87/32",
- "52.149.58.27/32",
- "52.149.28.18/32",
- "20.79.226.26/32",
- "20.79.239.66/32",
- "20.79.238.198/32",
- "20.113.14.159/32",
- "20.75.144.152/32",
- "20.43.172.120/32",
- "20.53.134.160/32",
- "20.201.15.208/32",
- "20.93.28.24/32",
- "20.61.34.40/32",
- "52.242.224.168/32",
- "20.80.129.80/32",
- "20.195.108.47/32",
- "4.195.133.120/32",
- "4.228.76.163/32",
- "4.182.131.108/32",
- "4.209.224.56/32",
- "108.141.83.74/32",
- "4.213.46.14/32",
- "172.169.17.165/32",
- "51.8.71.117/32",
- "20.3.1.178/32"
- ]
-- name: qwantbot
- user_agent_regex: \+https\://help\.qwant\.com/bot/
- action: ALLOW
- # https://help.qwant.com/wp-content/uploads/sites/2/2025/01/qwantbot.json
- remote_addresses: [ "91.242.162.0/24" ]
-- name: internet-archive
- action: ALLOW
- # https://ipinfo.io/AS7941
- remote_addresses: [
- "207.241.224.0/20",
- "208.70.24.0/21",
- "2620:0:9c0::/48"
- ]
-- name: kagibot
- user_agent_regex: \+https\://kagi\.com/bot
- action: ALLOW
- # https://kagi.com/bot
- remote_addresses: [
- "216.18.205.234/32",
- "35.212.27.76/32",
- "104.254.65.50/32",
- "209.151.156.194/32"
- ]
-- name: marginalia
- user_agent_regex: search\.marginalia\.nu
- action: ALLOW
- # Received directly over email
- remote_addresses: [
- "193.183.0.162/31",
- "193.183.0.164/30",
- "193.183.0.168/30",
- "193.183.0.172/31",
- "193.183.0.174/32"
- ]
-- name: mojeekbot
- user_agent_regex: http\://www\.mojeek\.com/bot\.html
- action: ALLOW
- # https://www.mojeek.com/bot.html
- remote_addresses: [ "5.102.173.71/32" ]
+# Search engines to allow
+- import: (data)/crawlers/googlebot.yaml
+- import: (data)/crawlers/bingbot.yaml
+- import: (data)/crawlers/duckduckbot.yaml
+- import: (data)/crawlers/qwantbot.yaml
+- import: (data)/crawlers/internet-archive.yaml
+- import: (data)/crawlers/kagibot.yaml
+- import: (data)/crawlers/marginalia.yaml
+- import: (data)/crawlers/mojeekbot.yaml
-# Common "keeping the internet working" routes
-- name: well-known
- path_regex: ^/.well-known/.*$
- action: ALLOW
-- name: favicon
- path_regex: ^/favicon.ico$
- action: ALLOW
-- name: robots-txt
- path_regex: ^/robots.txt$
- action: ALLOW
+# Allow common "keeping the internet working" routes (well-known, favicon, robots.txt)
+- import: (data)/common/keep-internet-working.yaml
# # Punish any bot with "bot" in the user-agent string
+# # This is known to have a high false-positive rate, use at your own risk
# - name: generic-bot-catchall
# user_agent_regex: (?i:bot|crawler)
# action: CHALLENGE
@@ -654,6 +41,7 @@ bots:
# report_as: 4 # lie to the operator
# algorithm: slow # intentionally waste CPU cycles and time
+# Generic catchall rule
- name: generic-browser
user_agent_regex: >
Mozilla|Opera