docs(data/botPolices.yaml): document import syntax in-line

Signed-off-by: Xe Iaso <me@xeiaso.net>
author: Xe Iaso <me@xeiaso.net> 2025-04-22 14:21:17 -0400
committer: Xe Iaso <me@xeiaso.net> 2025-04-22 14:21:17 -0400
commit: e6af0697daea67eab0dcec1ad5929ffe7e9dbd50 (patch)
tree: 5c4328e3a16b9d0845e4b468b0b702ad0c1a82ca
parent: 7cc8953b71b783c02e04692fe1a71efdacb2c981 (diff)
download: anubis-e6af0697daea67eab0dcec1ad5929ffe7e9dbd50.tar.xz
anubis-e6af0697daea67eab0dcec1ad5929ffe7e9dbd50.zip
1 files changed, 27 insertions, 14 deletions
diff --git a/data/botPolicies.yaml b/data/botPolicies.yaml
index 0378b5e..d4b0739 100644
--- a/data/botPolicies.yaml
+++ b/data/botPolicies.yaml
@@ -1,22 +1,36 @@
+## Anubis has the ability to let you import snippets of configuration into the main
+## configuration file. This allows you to break up your config into smaller parts
+## that get logically assembled into one big file.
+##
+## Of note, a bot rule can either have inline bot configuration or import a
+## bot config snippet. You cannot do both in a single bot rule.
+##
+## Import paths can either be prefixed with (data) to import from the common/shard
+## rules in the data folder in the Anubis source tree or will point to absolute/relative
+## paths in your filesystem. If you don't have access to the Anubis source tree, check
+## /usr/share/docs/anubis/data or in the tarball you extracted Anubis from.
+
 bots:
 # Pathological bots to deny
-- import: "(data)/bots/ai-robots-txt.yaml"
-- import: "(data)/bots/cloudflare-workers.yaml"
-- import: "(data)/bots/headless-browsers.yaml"
-- import: "(data)/bots/us-ai-scraper.yaml"
+
+- # This correlates to data/bots/ai-robots-txt.yaml in the source tree
+  import: (data)/bots/ai-robots-txt.yaml
+- import: (data)/bots/cloudflare-workers.yaml 
+- import: (data)/bots/headless-browsers.yaml
+- import: (data)/bots/us-ai-scraper.yaml
 
 # Search engines to allow
-- import: "(data)/crawlers/googlebot.yaml"
-- import: "(data)/crawlers/bingbot.yaml"
-- import: "(data)/crawlers/duckduckbot.yaml"
-- import: "(data)/crawlers/qwantbot.yaml"
-- import: "(data)/crawlers/internet-archive.yaml"
-- import: "(data)/crawlers/kagibot.yaml"
-- import: "(data)/crawlers/marginalia.yaml"
-- import: "(data)/crawlers/mojeekbot.yaml"
+- import: (data)/crawlers/googlebot.yaml
+- import: (data)/crawlers/bingbot.yaml
+- import: (data)/crawlers/duckduckbot.yaml
+- import: (data)/crawlers/qwantbot.yaml
+- import: (data)/crawlers/internet-archive.yaml
+- import: (data)/crawlers/kagibot.yaml
+- import: (data)/crawlers/marginalia.yaml
+- import: (data)/crawlers/mojeekbot.yaml
 
 # Allow common "keeping the internet working" routes (well-known, favicon, robots.txt)
-- import: "(data)/common/keep-internet-working.yaml"
+- import: (data)/common/keep-internet-working.yaml
 
 # # Punish any bot with "bot" in the user-agent string
 # # This is known to have a high false-positive rate, use at your own risk
@@ -28,7 +42,6 @@ bots:
 #     report_as: 4    # lie to the operator
 #     algorithm: slow # intentionally waste CPU cycles and time
 
-
 # Generic catchall rule
 - name: generic-browser
   user_agent_regex: >
author	Xe Iaso <me@xeiaso.net>	2025-04-22 14:21:17 -0400
committer	Xe Iaso <me@xeiaso.net>	2025-04-22 14:21:17 -0400
commit	e6af0697daea67eab0dcec1ad5929ffe7e9dbd50 (patch)
tree	5c4328e3a16b9d0845e4b468b0b702ad0c1a82ca
parent	7cc8953b71b783c02e04692fe1a71efdacb2c981 (diff)
download	anubis-e6af0697daea67eab0dcec1ad5929ffe7e9dbd50.tar.xz anubis-e6af0697daea67eab0dcec1ad5929ffe7e9dbd50.zip