diff options
| author | Xe Iaso <me@xeiaso.net> | 2025-04-22 14:21:17 -0400 |
|---|---|---|
| committer | Xe Iaso <me@xeiaso.net> | 2025-04-22 14:21:17 -0400 |
| commit | e6af0697daea67eab0dcec1ad5929ffe7e9dbd50 (patch) | |
| tree | 5c4328e3a16b9d0845e4b468b0b702ad0c1a82ca | |
| parent | 7cc8953b71b783c02e04692fe1a71efdacb2c981 (diff) | |
| download | anubis-e6af0697daea67eab0dcec1ad5929ffe7e9dbd50.tar.xz anubis-e6af0697daea67eab0dcec1ad5929ffe7e9dbd50.zip | |
docs(data/botPolices.yaml): document import syntax in-line
Signed-off-by: Xe Iaso <me@xeiaso.net>
| -rw-r--r-- | data/botPolicies.yaml | 41 |
1 files changed, 27 insertions, 14 deletions
diff --git a/data/botPolicies.yaml b/data/botPolicies.yaml index 0378b5e..d4b0739 100644 --- a/data/botPolicies.yaml +++ b/data/botPolicies.yaml @@ -1,22 +1,36 @@ +## Anubis has the ability to let you import snippets of configuration into the main +## configuration file. This allows you to break up your config into smaller parts +## that get logically assembled into one big file. +## +## Of note, a bot rule can either have inline bot configuration or import a +## bot config snippet. You cannot do both in a single bot rule. +## +## Import paths can either be prefixed with (data) to import from the common/shard +## rules in the data folder in the Anubis source tree or will point to absolute/relative +## paths in your filesystem. If you don't have access to the Anubis source tree, check +## /usr/share/docs/anubis/data or in the tarball you extracted Anubis from. + bots: # Pathological bots to deny -- import: "(data)/bots/ai-robots-txt.yaml" -- import: "(data)/bots/cloudflare-workers.yaml" -- import: "(data)/bots/headless-browsers.yaml" -- import: "(data)/bots/us-ai-scraper.yaml" + +- # This correlates to data/bots/ai-robots-txt.yaml in the source tree + import: (data)/bots/ai-robots-txt.yaml +- import: (data)/bots/cloudflare-workers.yaml +- import: (data)/bots/headless-browsers.yaml +- import: (data)/bots/us-ai-scraper.yaml # Search engines to allow -- import: "(data)/crawlers/googlebot.yaml" -- import: "(data)/crawlers/bingbot.yaml" -- import: "(data)/crawlers/duckduckbot.yaml" -- import: "(data)/crawlers/qwantbot.yaml" -- import: "(data)/crawlers/internet-archive.yaml" -- import: "(data)/crawlers/kagibot.yaml" -- import: "(data)/crawlers/marginalia.yaml" -- import: "(data)/crawlers/mojeekbot.yaml" +- import: (data)/crawlers/googlebot.yaml +- import: (data)/crawlers/bingbot.yaml +- import: (data)/crawlers/duckduckbot.yaml +- import: (data)/crawlers/qwantbot.yaml +- import: (data)/crawlers/internet-archive.yaml +- import: (data)/crawlers/kagibot.yaml +- import: (data)/crawlers/marginalia.yaml +- import: (data)/crawlers/mojeekbot.yaml # Allow common "keeping the internet working" routes (well-known, favicon, robots.txt) -- import: "(data)/common/keep-internet-working.yaml" +- import: (data)/common/keep-internet-working.yaml # # Punish any bot with "bot" in the user-agent string # # This is known to have a high false-positive rate, use at your own risk @@ -28,7 +42,6 @@ bots: # report_as: 4 # lie to the operator # algorithm: slow # intentionally waste CPU cycles and time - # Generic catchall rule - name: generic-browser user_agent_regex: > |
