aboutsummaryrefslogtreecommitdiff
path: root/manifest
diff options
context:
space:
mode:
Diffstat (limited to 'manifest')
-rw-r--r--manifest/xesite/anubis/botPolicies.yaml51
-rw-r--r--manifest/xesite/anubis/xesite-rss-feeds.yaml9
-rw-r--r--manifest/xesite/deployment.yaml2
-rw-r--r--manifest/xesite/kustomization.yaml3
4 files changed, 63 insertions, 2 deletions
diff --git a/manifest/xesite/anubis/botPolicies.yaml b/manifest/xesite/anubis/botPolicies.yaml
new file mode 100644
index 0000000..a9edcbe
--- /dev/null
+++ b/manifest/xesite/anubis/botPolicies.yaml
@@ -0,0 +1,51 @@
+## Anubis has the ability to let you import snippets of configuration into the main
+## configuration file. This allows you to break up your config into smaller parts
+## that get logically assembled into one big file.
+##
+## Of note, a bot rule can either have inline bot configuration or import a
+## bot config snippet. You cannot do both in a single bot rule.
+##
+## Import paths can either be prefixed with (data) to import from the common/shared
+## rules in the data folder in the Anubis source tree or will point to absolute/relative
+## paths in your filesystem. If you don't have access to the Anubis source tree, check
+## /usr/share/docs/anubis/data or in the tarball you extracted Anubis from.
+
+bots:
+# Pathological bots to deny
+- # This correlates to data/bots/ai-robots-txt.yaml in the source tree
+ import: (data)/bots/ai-robots-txt.yaml
+- import: (data)/bots/cloudflare-workers.yaml
+- import: (data)/bots/headless-browsers.yaml
+- import: (data)/bots/us-ai-scraper.yaml
+
+# Search engines to allow
+- import: (data)/crawlers/googlebot.yaml
+- import: (data)/crawlers/bingbot.yaml
+- import: (data)/crawlers/duckduckbot.yaml
+- import: (data)/crawlers/qwantbot.yaml
+- import: (data)/crawlers/internet-archive.yaml
+- import: (data)/crawlers/kagibot.yaml
+- import: (data)/crawlers/marginalia.yaml
+- import: (data)/crawlers/mojeekbot.yaml
+
+# Allow common "keeping the internet working" routes (well-known, favicon, robots.txt)
+- import: (data)/common/keep-internet-working.yaml
+- import: /xe/cfg/anubis/xesite-rss-feeds.yaml
+
+# # Punish any bot with "bot" in the user-agent string
+# # This is known to have a high false-positive rate, use at your own risk
+# - name: generic-bot-catchall
+# user_agent_regex: (?i:bot|crawler)
+# action: CHALLENGE
+# challenge:
+# difficulty: 16 # impossible
+# report_as: 4 # lie to the operator
+# algorithm: slow # intentionally waste CPU cycles and time
+
+# Generic catchall rule
+- name: generic-browser
+ user_agent_regex: >
+ Mozilla|Opera
+ action: CHALLENGE
+
+dnsbl: false
diff --git a/manifest/xesite/anubis/xesite-rss-feeds.yaml b/manifest/xesite/anubis/xesite-rss-feeds.yaml
new file mode 100644
index 0000000..28a93c5
--- /dev/null
+++ b/manifest/xesite/anubis/xesite-rss-feeds.yaml
@@ -0,0 +1,9 @@
+- name: blog-rss-feed
+ action: ALLOW
+ path_regex: ^/blog.rss$
+- name: blog-json-feed
+ action: ALLOW
+ path_regex: ^/blog.json$
+- name: xecast-rss-feed
+ action: ALLOW
+ path_regex: ^/xecast.rss$ \ No newline at end of file
diff --git a/manifest/xesite/deployment.yaml b/manifest/xesite/deployment.yaml
index 3791fc3..9b50b90 100644
--- a/manifest/xesite/deployment.yaml
+++ b/manifest/xesite/deployment.yaml
@@ -80,7 +80,7 @@ spec:
- name: "METRICS_BIND"
value: ":9090"
- name: "POLICY_FNAME"
- value: "/xe/cfg/anubis/botPolicies.json"
+ value: "/xe/cfg/anubis/botPolicies.yaml"
- name: "SERVE_ROBOTS_TXT"
value: "false"
- name: "TARGET"
diff --git a/manifest/xesite/kustomization.yaml b/manifest/xesite/kustomization.yaml
index fc82930..3be727b 100644
--- a/manifest/xesite/kustomization.yaml
+++ b/manifest/xesite/kustomization.yaml
@@ -12,4 +12,5 @@ configMapGenerator:
- name: anubis-cfg
behavior: create
files:
- - ./anubis/botPolicies.json \ No newline at end of file
+ - ./anubis/botPolicies.yaml
+ - ./anubis/xesite-rss-feeds.yaml \ No newline at end of file