From 2bcca657bbb814a0e02b2e175ee6900eddeba625 Mon Sep 17 00:00:00 2001 From: Xe Iaso Date: Sun, 27 Apr 2025 09:34:58 -0400 Subject: chore(k8s/xesite): update anubis to use yaml config Signed-off-by: Xe Iaso --- manifest/xesite/anubis/botPolicies.yaml | 51 ++++++++++++++++++++++++++++ manifest/xesite/anubis/xesite-rss-feeds.yaml | 9 +++++ manifest/xesite/deployment.yaml | 2 +- manifest/xesite/kustomization.yaml | 3 +- 4 files changed, 63 insertions(+), 2 deletions(-) create mode 100644 manifest/xesite/anubis/botPolicies.yaml create mode 100644 manifest/xesite/anubis/xesite-rss-feeds.yaml diff --git a/manifest/xesite/anubis/botPolicies.yaml b/manifest/xesite/anubis/botPolicies.yaml new file mode 100644 index 0000000..a9edcbe --- /dev/null +++ b/manifest/xesite/anubis/botPolicies.yaml @@ -0,0 +1,51 @@ +## Anubis has the ability to let you import snippets of configuration into the main +## configuration file. This allows you to break up your config into smaller parts +## that get logically assembled into one big file. +## +## Of note, a bot rule can either have inline bot configuration or import a +## bot config snippet. You cannot do both in a single bot rule. +## +## Import paths can either be prefixed with (data) to import from the common/shared +## rules in the data folder in the Anubis source tree or will point to absolute/relative +## paths in your filesystem. If you don't have access to the Anubis source tree, check +## /usr/share/docs/anubis/data or in the tarball you extracted Anubis from. + +bots: +# Pathological bots to deny +- # This correlates to data/bots/ai-robots-txt.yaml in the source tree + import: (data)/bots/ai-robots-txt.yaml +- import: (data)/bots/cloudflare-workers.yaml +- import: (data)/bots/headless-browsers.yaml +- import: (data)/bots/us-ai-scraper.yaml + +# Search engines to allow +- import: (data)/crawlers/googlebot.yaml +- import: (data)/crawlers/bingbot.yaml +- import: (data)/crawlers/duckduckbot.yaml +- import: (data)/crawlers/qwantbot.yaml +- import: (data)/crawlers/internet-archive.yaml +- import: (data)/crawlers/kagibot.yaml +- import: (data)/crawlers/marginalia.yaml +- import: (data)/crawlers/mojeekbot.yaml + +# Allow common "keeping the internet working" routes (well-known, favicon, robots.txt) +- import: (data)/common/keep-internet-working.yaml +- import: /xe/cfg/anubis/xesite-rss-feeds.yaml + +# # Punish any bot with "bot" in the user-agent string +# # This is known to have a high false-positive rate, use at your own risk +# - name: generic-bot-catchall +# user_agent_regex: (?i:bot|crawler) +# action: CHALLENGE +# challenge: +# difficulty: 16 # impossible +# report_as: 4 # lie to the operator +# algorithm: slow # intentionally waste CPU cycles and time + +# Generic catchall rule +- name: generic-browser + user_agent_regex: > + Mozilla|Opera + action: CHALLENGE + +dnsbl: false diff --git a/manifest/xesite/anubis/xesite-rss-feeds.yaml b/manifest/xesite/anubis/xesite-rss-feeds.yaml new file mode 100644 index 0000000..28a93c5 --- /dev/null +++ b/manifest/xesite/anubis/xesite-rss-feeds.yaml @@ -0,0 +1,9 @@ +- name: blog-rss-feed + action: ALLOW + path_regex: ^/blog.rss$ +- name: blog-json-feed + action: ALLOW + path_regex: ^/blog.json$ +- name: xecast-rss-feed + action: ALLOW + path_regex: ^/xecast.rss$ \ No newline at end of file diff --git a/manifest/xesite/deployment.yaml b/manifest/xesite/deployment.yaml index 3791fc3..9b50b90 100644 --- a/manifest/xesite/deployment.yaml +++ b/manifest/xesite/deployment.yaml @@ -80,7 +80,7 @@ spec: - name: "METRICS_BIND" value: ":9090" - name: "POLICY_FNAME" - value: "/xe/cfg/anubis/botPolicies.json" + value: "/xe/cfg/anubis/botPolicies.yaml" - name: "SERVE_ROBOTS_TXT" value: "false" - name: "TARGET" diff --git a/manifest/xesite/kustomization.yaml b/manifest/xesite/kustomization.yaml index fc82930..3be727b 100644 --- a/manifest/xesite/kustomization.yaml +++ b/manifest/xesite/kustomization.yaml @@ -12,4 +12,5 @@ configMapGenerator: - name: anubis-cfg behavior: create files: - - ./anubis/botPolicies.json \ No newline at end of file + - ./anubis/botPolicies.yaml + - ./anubis/xesite-rss-feeds.yaml \ No newline at end of file -- cgit v1.2.3