diff options
| author | Xe Iaso <me@xeiaso.net> | 2025-04-23 07:01:28 -0400 |
|---|---|---|
| committer | GitHub <noreply@github.com> | 2025-04-23 07:01:28 -0400 |
| commit | 74e11505c6133ee1107811e81a0fd53e1d7876dd (patch) | |
| tree | 9169e3fccc32657a9a84358bf7e6d7779fa704df /data/botPolicies.yaml | |
| parent | 4e2c9de7085fbc8e5abe8d0659d807881d69769c (diff) | |
| download | anubis-74e11505c6133ee1107811e81a0fd53e1d7876dd.tar.xz anubis-74e11505c6133ee1107811e81a0fd53e1d7876dd.zip | |
feat: enable loading config fragments (#321)
* feat(config): support importing bot policy snippets
This changes the grammar of the Anubis bot policy config to allow
importing from internal shared rules or external rules on the
filesystem.
This lets you create a file at `/data/policies/block-evilbot.yaml` and
then import it with:
```yaml
bots:
- import: /data/policies/block-evilbot.yaml
```
This also explodes the default policy file into a bunch of composable
snippets.
Thank you @Aibrew for your example gitea Atom / RSS feed rules!
Signed-off-by: Xe Iaso <me@xeiaso.net>
* fix(data): update botPolicies.json to use imports
Signed-off-by: Xe Iaso <me@xeiaso.net>
* fix(cmd/anubis): extract bot policies with --extract-resources
This allows a user that doesn't have anything but the Anubis binary to
figure out what the default configuration does.
* docs(data/botPolices.yaml): document import syntax in-line
Signed-off-by: Xe Iaso <me@xeiaso.net>
* fix(lib/policy): better test importing from JSON snippets
Signed-off-by: Xe Iaso <me@xeiaso.net>
* docs(admin): Add import syntax documentation
This documents the import syntax and is based on the block comment at
the top of the default bot policy file.
* docs(changelog): add note about importing snippets
Signed-off-by: Xe Iaso <me@xeiaso.net>
* style(lib/policy/config): use an error value instead of an inline error
Signed-off-by: Xe Iaso <me@xeiaso.net>
---------
Signed-off-by: Xe Iaso <me@xeiaso.net>
Diffstat (limited to 'data/botPolicies.yaml')
| -rw-r--r-- | data/botPolicies.yaml | 672 |
1 files changed, 30 insertions, 642 deletions
diff --git a/data/botPolicies.yaml b/data/botPolicies.yaml index cb4715a..585be15 100644 --- a/data/botPolicies.yaml +++ b/data/botPolicies.yaml @@ -1,651 +1,38 @@ +## Anubis has the ability to let you import snippets of configuration into the main +## configuration file. This allows you to break up your config into smaller parts +## that get logically assembled into one big file. +## +## Of note, a bot rule can either have inline bot configuration or import a +## bot config snippet. You cannot do both in a single bot rule. +## +## Import paths can either be prefixed with (data) to import from the common/shared +## rules in the data folder in the Anubis source tree or will point to absolute/relative +## paths in your filesystem. If you don't have access to the Anubis source tree, check +## /usr/share/docs/anubis/data or in the tarball you extracted Anubis from. + bots: # Pathological bots to deny -- name: us-artificial-intelligence-scraper - user_agent_regex: \+https\://github\.com/US-Artificial-Intelligence/scraper - action: DENY -- name: lightpanda - user_agent_regex: ^LightPanda/.*$ - action: DENY -- name: headless-chrome - user_agent_regex: HeadlessChrome - action: DENY -- name: headless-chromium - user_agent_regex: HeadlessChromium - action: DENY -- name: "ai-robots-txt" - user_agent_regex: > - AI2Bot|Ai2Bot-Dolma|Amazonbot|anthropic-ai|Applebot|Applebot-Extended|Brightbot 1.0|Bytespider|CCBot|ChatGPT-User|Claude-Web|ClaudeBot|cohere-ai|cohere-training-data-crawler|Crawlspace|Diffbot|DuckAssistBot|FacebookBot|FriendlyCrawler|Google-Extended|GoogleOther|GoogleOther-Image|GoogleOther-Video|GPTBot|iaskspider/2.0|ICC-Crawler|ImagesiftBot|img2dataset|ISSCyberRiskCrawler|Kangaroo Bot|Meta-ExternalAgent|Meta-ExternalFetcher|OAI-SearchBot|omgili|omgilibot|PanguBot|Perplexity-User|PerplexityBot|PetalBot|Scrapy|SemrushBot-OCOB|SemrushBot-SWA|Sidetrade indexer bot|Timpibot|VelenPublicWebCrawler|Webzio-Extended|YouBot - action: DENY -- name: cloudflare-workers - headers_regex: - CF-Worker: .* - action: DENY +- # This correlates to data/bots/ai-robots-txt.yaml in the source tree + import: (data)/bots/ai-robots-txt.yaml +- import: (data)/bots/cloudflare-workers.yaml +- import: (data)/bots/headless-browsers.yaml +- import: (data)/bots/us-ai-scraper.yaml -# search engines to allow -- name: googlebot - user_agent_regex: \+http\://www\.google\.com/bot\.html - action: ALLOW - # https://developers.google.com/static/search/apis/ipranges/googlebot.json - remote_addresses: [ - "2001:4860:4801:10::/64", - "2001:4860:4801:11::/64", - "2001:4860:4801:12::/64", - "2001:4860:4801:13::/64", - "2001:4860:4801:14::/64", - "2001:4860:4801:15::/64", - "2001:4860:4801:16::/64", - "2001:4860:4801:17::/64", - "2001:4860:4801:18::/64", - "2001:4860:4801:19::/64", - "2001:4860:4801:1a::/64", - "2001:4860:4801:1b::/64", - "2001:4860:4801:1c::/64", - "2001:4860:4801:1d::/64", - "2001:4860:4801:1e::/64", - "2001:4860:4801:1f::/64", - "2001:4860:4801:20::/64", - "2001:4860:4801:21::/64", - "2001:4860:4801:22::/64", - "2001:4860:4801:23::/64", - "2001:4860:4801:24::/64", - "2001:4860:4801:25::/64", - "2001:4860:4801:26::/64", - "2001:4860:4801:27::/64", - "2001:4860:4801:28::/64", - "2001:4860:4801:29::/64", - "2001:4860:4801:2::/64", - "2001:4860:4801:2a::/64", - "2001:4860:4801:2b::/64", - "2001:4860:4801:2c::/64", - "2001:4860:4801:2d::/64", - "2001:4860:4801:2e::/64", - "2001:4860:4801:2f::/64", - "2001:4860:4801:31::/64", - "2001:4860:4801:32::/64", - "2001:4860:4801:33::/64", - "2001:4860:4801:34::/64", - "2001:4860:4801:35::/64", - "2001:4860:4801:36::/64", - "2001:4860:4801:37::/64", - "2001:4860:4801:38::/64", - "2001:4860:4801:39::/64", - "2001:4860:4801:3a::/64", - "2001:4860:4801:3b::/64", - "2001:4860:4801:3c::/64", - "2001:4860:4801:3d::/64", - "2001:4860:4801:3e::/64", - "2001:4860:4801:40::/64", - "2001:4860:4801:41::/64", - "2001:4860:4801:42::/64", - "2001:4860:4801:43::/64", - "2001:4860:4801:44::/64", - "2001:4860:4801:45::/64", - "2001:4860:4801:46::/64", - "2001:4860:4801:47::/64", - "2001:4860:4801:48::/64", - "2001:4860:4801:49::/64", - "2001:4860:4801:4a::/64", - "2001:4860:4801:4b::/64", - "2001:4860:4801:4c::/64", - "2001:4860:4801:50::/64", - "2001:4860:4801:51::/64", - "2001:4860:4801:52::/64", - "2001:4860:4801:53::/64", - "2001:4860:4801:54::/64", - "2001:4860:4801:55::/64", - "2001:4860:4801:56::/64", - "2001:4860:4801:60::/64", - "2001:4860:4801:61::/64", - "2001:4860:4801:62::/64", - "2001:4860:4801:63::/64", - "2001:4860:4801:64::/64", - "2001:4860:4801:65::/64", - "2001:4860:4801:66::/64", - "2001:4860:4801:67::/64", - "2001:4860:4801:68::/64", - "2001:4860:4801:69::/64", - "2001:4860:4801:6a::/64", - "2001:4860:4801:6b::/64", - "2001:4860:4801:6c::/64", - "2001:4860:4801:6d::/64", - "2001:4860:4801:6e::/64", - "2001:4860:4801:6f::/64", - "2001:4860:4801:70::/64", - "2001:4860:4801:71::/64", - "2001:4860:4801:72::/64", - "2001:4860:4801:73::/64", - "2001:4860:4801:74::/64", - "2001:4860:4801:75::/64", - "2001:4860:4801:76::/64", - "2001:4860:4801:77::/64", - "2001:4860:4801:78::/64", - "2001:4860:4801:79::/64", - "2001:4860:4801:80::/64", - "2001:4860:4801:81::/64", - "2001:4860:4801:82::/64", - "2001:4860:4801:83::/64", - "2001:4860:4801:84::/64", - "2001:4860:4801:85::/64", - "2001:4860:4801:86::/64", - "2001:4860:4801:87::/64", - "2001:4860:4801:88::/64", - "2001:4860:4801:90::/64", - "2001:4860:4801:91::/64", - "2001:4860:4801:92::/64", - "2001:4860:4801:93::/64", - "2001:4860:4801:94::/64", - "2001:4860:4801:95::/64", - "2001:4860:4801:96::/64", - "2001:4860:4801:a0::/64", - "2001:4860:4801:a1::/64", - "2001:4860:4801:a2::/64", - "2001:4860:4801:a3::/64", - "2001:4860:4801:a4::/64", - "2001:4860:4801:a5::/64", - "2001:4860:4801:c::/64", - "2001:4860:4801:f::/64", - "192.178.5.0/27", - "192.178.6.0/27", - "192.178.6.128/27", - "192.178.6.160/27", - "192.178.6.192/27", - "192.178.6.32/27", - "192.178.6.64/27", - "192.178.6.96/27", - "34.100.182.96/28", - "34.101.50.144/28", - "34.118.254.0/28", - "34.118.66.0/28", - "34.126.178.96/28", - "34.146.150.144/28", - "34.147.110.144/28", - "34.151.74.144/28", - "34.152.50.64/28", - "34.154.114.144/28", - "34.155.98.32/28", - "34.165.18.176/28", - "34.175.160.64/28", - "34.176.130.16/28", - "34.22.85.0/27", - "34.64.82.64/28", - "34.65.242.112/28", - "34.80.50.80/28", - "34.88.194.0/28", - "34.89.10.80/28", - "34.89.198.80/28", - "34.96.162.48/28", - "35.247.243.240/28", - "66.249.64.0/27", - "66.249.64.128/27", - "66.249.64.160/27", - "66.249.64.224/27", - "66.249.64.32/27", - "66.249.64.64/27", - "66.249.64.96/27", - "66.249.65.0/27", - "66.249.65.128/27", - "66.249.65.160/27", - "66.249.65.192/27", - "66.249.65.224/27", - "66.249.65.32/27", - "66.249.65.64/27", - "66.249.65.96/27", - "66.249.66.0/27", - "66.249.66.128/27", - "66.249.66.160/27", - "66.249.66.192/27", - "66.249.66.224/27", - "66.249.66.32/27", - "66.249.66.64/27", - "66.249.66.96/27", - "66.249.68.0/27", - "66.249.68.128/27", - "66.249.68.32/27", - "66.249.68.64/27", - "66.249.68.96/27", - "66.249.69.0/27", - "66.249.69.128/27", - "66.249.69.160/27", - "66.249.69.192/27", - "66.249.69.224/27", - "66.249.69.32/27", - "66.249.69.64/27", - "66.249.69.96/27", - "66.249.70.0/27", - "66.249.70.128/27", - "66.249.70.160/27", - "66.249.70.192/27", - "66.249.70.224/27", - "66.249.70.32/27", - "66.249.70.64/27", - "66.249.70.96/27", - "66.249.71.0/27", - "66.249.71.128/27", - "66.249.71.160/27", - "66.249.71.192/27", - "66.249.71.224/27", - "66.249.71.32/27", - "66.249.71.64/27", - "66.249.71.96/27", - "66.249.72.0/27", - "66.249.72.128/27", - "66.249.72.160/27", - "66.249.72.192/27", - "66.249.72.224/27", - "66.249.72.32/27", - "66.249.72.64/27", - "66.249.72.96/27", - "66.249.73.0/27", - "66.249.73.128/27", - "66.249.73.160/27", - "66.249.73.192/27", - "66.249.73.224/27", - "66.249.73.32/27", - "66.249.73.64/27", - "66.249.73.96/27", - "66.249.74.0/27", - "66.249.74.128/27", - "66.249.74.160/27", - "66.249.74.192/27", - "66.249.74.32/27", - "66.249.74.64/27", - "66.249.74.96/27", - "66.249.75.0/27", - "66.249.75.128/27", - "66.249.75.160/27", - "66.249.75.192/27", - "66.249.75.224/27", - "66.249.75.32/27", - "66.249.75.64/27", - "66.249.75.96/27", - "66.249.76.0/27", - "66.249.76.128/27", - "66.249.76.160/27", - "66.249.76.192/27", - "66.249.76.224/27", - "66.249.76.32/27", - "66.249.76.64/27", - "66.249.76.96/27", - "66.249.77.0/27", - "66.249.77.128/27", - "66.249.77.160/27", - "66.249.77.192/27", - "66.249.77.224/27", - "66.249.77.32/27", - "66.249.77.64/27", - "66.249.77.96/27", - "66.249.78.0/27", - "66.249.78.32/27", - "66.249.79.0/27", - "66.249.79.128/27", - "66.249.79.160/27", - "66.249.79.192/27", - "66.249.79.224/27", - "66.249.79.32/27", - "66.249.79.64/27", - "66.249.79.96/27" - ] -- name: bingbot - user_agent_regex: \+http\://www\.bing\.com/bingbot\.htm - action: ALLOW - # https://www.bing.com/toolbox/bingbot.json - remote_addresses: [ - "157.55.39.0/24", - "207.46.13.0/24", - "40.77.167.0/24", - "13.66.139.0/24", - "13.66.144.0/24", - "52.167.144.0/24", - "13.67.10.16/28", - "13.69.66.240/28", - "13.71.172.224/28", - "139.217.52.0/28", - "191.233.204.224/28", - "20.36.108.32/28", - "20.43.120.16/28", - "40.79.131.208/28", - "40.79.186.176/28", - "52.231.148.0/28", - "20.79.107.240/28", - "51.105.67.0/28", - "20.125.163.80/28", - "40.77.188.0/22", - "65.55.210.0/24", - "199.30.24.0/23", - "40.77.202.0/24", - "40.77.139.0/25", - "20.74.197.0/28", - "20.15.133.160/27", - "40.77.177.0/24", - "40.77.178.0/23" - ] -- name: duckduckbot - user_agent_regex: DuckDuckBot/1\.1; \(\+http\://duckduckgo\.com/duckduckbot\.html\) - action: ALLOW - # https://duckduckgo.com/duckduckgo-help-pages/results/duckduckbot - remote_addresses: [ - "57.152.72.128/32", - "51.8.253.152/32", - "40.80.242.63/32", - "20.12.141.99/32", - "20.49.136.28/32", - "51.116.131.221/32", - "51.107.40.209/32", - "20.40.133.240/32", - "20.50.168.91/32", - "51.120.48.122/32", - "20.193.45.113/32", - "40.76.173.151/32", - "40.76.163.7/32", - "20.185.79.47/32", - "52.142.26.175/32", - "20.185.79.15/32", - "52.142.24.149/32", - "40.76.162.208/32", - "40.76.163.23/32", - "40.76.162.191/32", - "40.76.162.247/32", - "40.88.21.235/32", - "20.191.45.212/32", - "52.146.59.12/32", - "52.146.59.156/32", - "52.146.59.154/32", - "52.146.58.236/32", - "20.62.224.44/32", - "51.104.180.53/32", - "51.104.180.47/32", - "51.104.180.26/32", - "51.104.146.225/32", - "51.104.146.235/32", - "20.73.202.147/32", - "20.73.132.240/32", - "20.71.12.143/32", - "20.56.197.58/32", - "20.56.197.63/32", - "20.43.150.93/32", - "20.43.150.85/32", - "20.44.222.1/32", - "40.89.243.175/32", - "13.89.106.77/32", - "52.143.242.6/32", - "52.143.241.111/32", - "52.154.60.82/32", - "20.197.209.11/32", - "20.197.209.27/32", - "20.226.133.105/32", - "191.234.216.4/32", - "191.234.216.178/32", - "20.53.92.211/32", - "20.53.91.2/32", - "20.207.99.197/32", - "20.207.97.190/32", - "40.81.250.205/32", - "40.64.106.11/32", - "40.64.105.247/32", - "20.72.242.93/32", - "20.99.255.235/32", - "20.113.3.121/32", - "52.224.16.221/32", - "52.224.21.53/32", - "52.224.20.204/32", - "52.224.21.19/32", - "52.224.20.249/32", - "52.224.20.203/32", - "52.224.20.190/32", - "52.224.16.229/32", - "52.224.21.20/32", - "52.146.63.80/32", - "52.224.20.227/32", - "52.224.20.193/32", - "52.190.37.160/32", - "52.224.21.23/32", - "52.224.20.223/32", - "52.224.20.181/32", - "52.224.21.49/32", - "52.224.21.55/32", - "52.224.21.61/32", - "52.224.19.152/32", - "52.224.20.186/32", - "52.224.21.27/32", - "52.224.21.51/32", - "52.224.20.174/32", - "52.224.21.4/32", - "51.104.164.109/32", - "51.104.167.71/32", - "51.104.160.177/32", - "51.104.162.149/32", - "51.104.167.95/32", - "51.104.167.54/32", - "51.104.166.111/32", - "51.104.167.88/32", - "51.104.161.32/32", - "51.104.163.250/32", - "51.104.164.189/32", - "51.104.167.19/32", - "51.104.160.167/32", - "51.104.167.110/32", - "20.191.44.119/32", - "51.104.167.104/32", - "20.191.44.234/32", - "51.104.164.215/32", - "51.104.167.52/32", - "20.191.44.22/32", - "51.104.167.87/32", - "51.104.167.96/32", - "20.191.44.16/32", - "51.104.167.61/32", - "51.104.164.147/32", - "20.50.48.159/32", - "40.114.182.172/32", - "20.50.50.130/32", - "20.50.50.163/32", - "20.50.50.46/32", - "40.114.182.153/32", - "20.50.50.118/32", - "20.50.49.55/32", - "20.50.49.25/32", - "40.114.183.251/32", - "20.50.50.123/32", - "20.50.49.237/32", - "20.50.48.192/32", - "20.50.50.134/32", - "51.138.90.233/32", - "40.114.183.196/32", - "20.50.50.146/32", - "40.114.183.88/32", - "20.50.50.145/32", - "20.50.50.121/32", - "20.50.49.40/32", - "51.138.90.206/32", - "40.114.182.45/32", - "51.138.90.161/32", - "20.50.49.0/32", - "40.119.232.215/32", - "104.43.55.167/32", - "40.119.232.251/32", - "40.119.232.50/32", - "40.119.232.146/32", - "40.119.232.218/32", - "104.43.54.127/32", - "104.43.55.117/32", - "104.43.55.116/32", - "104.43.55.166/32", - "52.154.169.50/32", - "52.154.171.70/32", - "52.154.170.229/32", - "52.154.170.113/32", - "52.154.171.44/32", - "52.154.172.2/32", - "52.143.244.81/32", - "52.154.171.87/32", - "52.154.171.250/32", - "52.154.170.28/32", - "52.154.170.122/32", - "52.143.243.117/32", - "52.143.247.235/32", - "52.154.171.235/32", - "52.154.171.196/32", - "52.154.171.0/32", - "52.154.170.243/32", - "52.154.170.26/32", - "52.154.169.200/32", - "52.154.170.96/32", - "52.154.170.88/32", - "52.154.171.150/32", - "52.154.171.205/32", - "52.154.170.117/32", - "52.154.170.209/32", - "191.235.202.48/32", - "191.233.3.202/32", - "191.235.201.214/32", - "191.233.3.197/32", - "191.235.202.38/32", - "20.53.78.144/32", - "20.193.24.10/32", - "20.53.78.236/32", - "20.53.78.138/32", - "20.53.78.123/32", - "20.53.78.106/32", - "20.193.27.215/32", - "20.193.25.197/32", - "20.193.12.126/32", - "20.193.24.251/32", - "20.204.242.101/32", - "20.207.72.113/32", - "20.204.242.19/32", - "20.219.45.67/32", - "20.207.72.11/32", - "20.219.45.190/32", - "20.204.243.55/32", - "20.204.241.148/32", - "20.207.72.110/32", - "20.204.240.172/32", - "20.207.72.21/32", - "20.204.246.81/32", - "20.207.107.181/32", - "20.204.246.254/32", - "20.219.43.246/32", - "52.149.25.43/32", - "52.149.61.51/32", - "52.149.58.139/32", - "52.149.60.38/32", - "52.148.165.38/32", - "52.143.95.162/32", - "52.149.56.151/32", - "52.149.30.45/32", - "52.149.58.173/32", - "52.143.95.204/32", - "52.149.28.83/32", - "52.149.58.69/32", - "52.148.161.87/32", - "52.149.58.27/32", - "52.149.28.18/32", - "20.79.226.26/32", - "20.79.239.66/32", - "20.79.238.198/32", - "20.113.14.159/32", - "20.75.144.152/32", - "20.43.172.120/32", - "20.53.134.160/32", - "20.201.15.208/32", - "20.93.28.24/32", - "20.61.34.40/32", - "52.242.224.168/32", - "20.80.129.80/32", - "20.195.108.47/32", - "4.195.133.120/32", - "4.228.76.163/32", - "4.182.131.108/32", - "4.209.224.56/32", - "108.141.83.74/32", - "4.213.46.14/32", - "172.169.17.165/32", - "51.8.71.117/32", - "20.3.1.178/32", - "52.149.56.151/32", - "52.149.30.45/32", - "52.149.58.173/32", - "52.143.95.204/32", - "52.149.28.83/32", - "52.149.58.69/32", - "52.148.161.87/32", - "52.149.58.27/32", - "52.149.28.18/32", - "20.79.226.26/32", - "20.79.239.66/32", - "20.79.238.198/32", - "20.113.14.159/32", - "20.75.144.152/32", - "20.43.172.120/32", - "20.53.134.160/32", - "20.201.15.208/32", - "20.93.28.24/32", - "20.61.34.40/32", - "52.242.224.168/32", - "20.80.129.80/32", - "20.195.108.47/32", - "4.195.133.120/32", - "4.228.76.163/32", - "4.182.131.108/32", - "4.209.224.56/32", - "108.141.83.74/32", - "4.213.46.14/32", - "172.169.17.165/32", - "51.8.71.117/32", - "20.3.1.178/32" - ] -- name: qwantbot - user_agent_regex: \+https\://help\.qwant\.com/bot/ - action: ALLOW - # https://help.qwant.com/wp-content/uploads/sites/2/2025/01/qwantbot.json - remote_addresses: [ "91.242.162.0/24" ] -- name: internet-archive - action: ALLOW - # https://ipinfo.io/AS7941 - remote_addresses: [ - "207.241.224.0/20", - "208.70.24.0/21", - "2620:0:9c0::/48" - ] -- name: kagibot - user_agent_regex: \+https\://kagi\.com/bot - action: ALLOW - # https://kagi.com/bot - remote_addresses: [ - "216.18.205.234/32", - "35.212.27.76/32", - "104.254.65.50/32", - "209.151.156.194/32" - ] -- name: marginalia - user_agent_regex: search\.marginalia\.nu - action: ALLOW - # Received directly over email - remote_addresses: [ - "193.183.0.162/31", - "193.183.0.164/30", - "193.183.0.168/30", - "193.183.0.172/31", - "193.183.0.174/32" - ] -- name: mojeekbot - user_agent_regex: http\://www\.mojeek\.com/bot\.html - action: ALLOW - # https://www.mojeek.com/bot.html - remote_addresses: [ "5.102.173.71/32" ] +# Search engines to allow +- import: (data)/crawlers/googlebot.yaml +- import: (data)/crawlers/bingbot.yaml +- import: (data)/crawlers/duckduckbot.yaml +- import: (data)/crawlers/qwantbot.yaml +- import: (data)/crawlers/internet-archive.yaml +- import: (data)/crawlers/kagibot.yaml +- import: (data)/crawlers/marginalia.yaml +- import: (data)/crawlers/mojeekbot.yaml -# Common "keeping the internet working" routes -- name: well-known - path_regex: ^/.well-known/.*$ - action: ALLOW -- name: favicon - path_regex: ^/favicon.ico$ - action: ALLOW -- name: robots-txt - path_regex: ^/robots.txt$ - action: ALLOW +# Allow common "keeping the internet working" routes (well-known, favicon, robots.txt) +- import: (data)/common/keep-internet-working.yaml # # Punish any bot with "bot" in the user-agent string +# # This is known to have a high false-positive rate, use at your own risk # - name: generic-bot-catchall # user_agent_regex: (?i:bot|crawler) # action: CHALLENGE @@ -654,6 +41,7 @@ bots: # report_as: 4 # lie to the operator # algorithm: slow # intentionally waste CPU cycles and time +# Generic catchall rule - name: generic-browser user_agent_regex: > Mozilla|Opera |
