aboutsummaryrefslogtreecommitdiff
path: root/data/crawlers
diff options
context:
space:
mode:
authorXe Iaso <me@xeiaso.net>2025-04-23 07:01:28 -0400
committerGitHub <noreply@github.com>2025-04-23 07:01:28 -0400
commit74e11505c6133ee1107811e81a0fd53e1d7876dd (patch)
tree9169e3fccc32657a9a84358bf7e6d7779fa704df /data/crawlers
parent4e2c9de7085fbc8e5abe8d0659d807881d69769c (diff)
downloadanubis-74e11505c6133ee1107811e81a0fd53e1d7876dd.tar.xz
anubis-74e11505c6133ee1107811e81a0fd53e1d7876dd.zip
feat: enable loading config fragments (#321)
* feat(config): support importing bot policy snippets This changes the grammar of the Anubis bot policy config to allow importing from internal shared rules or external rules on the filesystem. This lets you create a file at `/data/policies/block-evilbot.yaml` and then import it with: ```yaml bots: - import: /data/policies/block-evilbot.yaml ``` This also explodes the default policy file into a bunch of composable snippets. Thank you @Aibrew for your example gitea Atom / RSS feed rules! Signed-off-by: Xe Iaso <me@xeiaso.net> * fix(data): update botPolicies.json to use imports Signed-off-by: Xe Iaso <me@xeiaso.net> * fix(cmd/anubis): extract bot policies with --extract-resources This allows a user that doesn't have anything but the Anubis binary to figure out what the default configuration does. * docs(data/botPolices.yaml): document import syntax in-line Signed-off-by: Xe Iaso <me@xeiaso.net> * fix(lib/policy): better test importing from JSON snippets Signed-off-by: Xe Iaso <me@xeiaso.net> * docs(admin): Add import syntax documentation This documents the import syntax and is based on the block comment at the top of the default bot policy file. * docs(changelog): add note about importing snippets Signed-off-by: Xe Iaso <me@xeiaso.net> * style(lib/policy/config): use an error value instead of an inline error Signed-off-by: Xe Iaso <me@xeiaso.net> --------- Signed-off-by: Xe Iaso <me@xeiaso.net>
Diffstat (limited to 'data/crawlers')
-rw-r--r--data/crawlers/bingbot.yaml34
-rw-r--r--data/crawlers/duckduckbot.yaml275
-rw-r--r--data/crawlers/googlebot.yaml263
-rw-r--r--data/crawlers/internet-archive.yaml8
-rw-r--r--data/crawlers/kagibot.yaml10
-rw-r--r--data/crawlers/marginalia.yaml11
-rw-r--r--data/crawlers/mojeekbot.yaml5
-rw-r--r--data/crawlers/qwantbot.yaml5
8 files changed, 611 insertions, 0 deletions
diff --git a/data/crawlers/bingbot.yaml b/data/crawlers/bingbot.yaml
new file mode 100644
index 0000000..2f7885d
--- /dev/null
+++ b/data/crawlers/bingbot.yaml
@@ -0,0 +1,34 @@
+- name: bingbot
+ user_agent_regex: \+http\://www\.bing\.com/bingbot\.htm
+ action: ALLOW
+ # https://www.bing.com/toolbox/bingbot.json
+ remote_addresses: [
+ "157.55.39.0/24",
+ "207.46.13.0/24",
+ "40.77.167.0/24",
+ "13.66.139.0/24",
+ "13.66.144.0/24",
+ "52.167.144.0/24",
+ "13.67.10.16/28",
+ "13.69.66.240/28",
+ "13.71.172.224/28",
+ "139.217.52.0/28",
+ "191.233.204.224/28",
+ "20.36.108.32/28",
+ "20.43.120.16/28",
+ "40.79.131.208/28",
+ "40.79.186.176/28",
+ "52.231.148.0/28",
+ "20.79.107.240/28",
+ "51.105.67.0/28",
+ "20.125.163.80/28",
+ "40.77.188.0/22",
+ "65.55.210.0/24",
+ "199.30.24.0/23",
+ "40.77.202.0/24",
+ "40.77.139.0/25",
+ "20.74.197.0/28",
+ "20.15.133.160/27",
+ "40.77.177.0/24",
+ "40.77.178.0/23"
+ ]
diff --git a/data/crawlers/duckduckbot.yaml b/data/crawlers/duckduckbot.yaml
new file mode 100644
index 0000000..302a1e3
--- /dev/null
+++ b/data/crawlers/duckduckbot.yaml
@@ -0,0 +1,275 @@
+- name: duckduckbot
+ user_agent_regex: DuckDuckBot/1\.1; \(\+http\://duckduckgo\.com/duckduckbot\.html\)
+ action: ALLOW
+ # https://duckduckgo.com/duckduckgo-help-pages/results/duckduckbot
+ remote_addresses: [
+ "57.152.72.128/32",
+ "51.8.253.152/32",
+ "40.80.242.63/32",
+ "20.12.141.99/32",
+ "20.49.136.28/32",
+ "51.116.131.221/32",
+ "51.107.40.209/32",
+ "20.40.133.240/32",
+ "20.50.168.91/32",
+ "51.120.48.122/32",
+ "20.193.45.113/32",
+ "40.76.173.151/32",
+ "40.76.163.7/32",
+ "20.185.79.47/32",
+ "52.142.26.175/32",
+ "20.185.79.15/32",
+ "52.142.24.149/32",
+ "40.76.162.208/32",
+ "40.76.163.23/32",
+ "40.76.162.191/32",
+ "40.76.162.247/32",
+ "40.88.21.235/32",
+ "20.191.45.212/32",
+ "52.146.59.12/32",
+ "52.146.59.156/32",
+ "52.146.59.154/32",
+ "52.146.58.236/32",
+ "20.62.224.44/32",
+ "51.104.180.53/32",
+ "51.104.180.47/32",
+ "51.104.180.26/32",
+ "51.104.146.225/32",
+ "51.104.146.235/32",
+ "20.73.202.147/32",
+ "20.73.132.240/32",
+ "20.71.12.143/32",
+ "20.56.197.58/32",
+ "20.56.197.63/32",
+ "20.43.150.93/32",
+ "20.43.150.85/32",
+ "20.44.222.1/32",
+ "40.89.243.175/32",
+ "13.89.106.77/32",
+ "52.143.242.6/32",
+ "52.143.241.111/32",
+ "52.154.60.82/32",
+ "20.197.209.11/32",
+ "20.197.209.27/32",
+ "20.226.133.105/32",
+ "191.234.216.4/32",
+ "191.234.216.178/32",
+ "20.53.92.211/32",
+ "20.53.91.2/32",
+ "20.207.99.197/32",
+ "20.207.97.190/32",
+ "40.81.250.205/32",
+ "40.64.106.11/32",
+ "40.64.105.247/32",
+ "20.72.242.93/32",
+ "20.99.255.235/32",
+ "20.113.3.121/32",
+ "52.224.16.221/32",
+ "52.224.21.53/32",
+ "52.224.20.204/32",
+ "52.224.21.19/32",
+ "52.224.20.249/32",
+ "52.224.20.203/32",
+ "52.224.20.190/32",
+ "52.224.16.229/32",
+ "52.224.21.20/32",
+ "52.146.63.80/32",
+ "52.224.20.227/32",
+ "52.224.20.193/32",
+ "52.190.37.160/32",
+ "52.224.21.23/32",
+ "52.224.20.223/32",
+ "52.224.20.181/32",
+ "52.224.21.49/32",
+ "52.224.21.55/32",
+ "52.224.21.61/32",
+ "52.224.19.152/32",
+ "52.224.20.186/32",
+ "52.224.21.27/32",
+ "52.224.21.51/32",
+ "52.224.20.174/32",
+ "52.224.21.4/32",
+ "51.104.164.109/32",
+ "51.104.167.71/32",
+ "51.104.160.177/32",
+ "51.104.162.149/32",
+ "51.104.167.95/32",
+ "51.104.167.54/32",
+ "51.104.166.111/32",
+ "51.104.167.88/32",
+ "51.104.161.32/32",
+ "51.104.163.250/32",
+ "51.104.164.189/32",
+ "51.104.167.19/32",
+ "51.104.160.167/32",
+ "51.104.167.110/32",
+ "20.191.44.119/32",
+ "51.104.167.104/32",
+ "20.191.44.234/32",
+ "51.104.164.215/32",
+ "51.104.167.52/32",
+ "20.191.44.22/32",
+ "51.104.167.87/32",
+ "51.104.167.96/32",
+ "20.191.44.16/32",
+ "51.104.167.61/32",
+ "51.104.164.147/32",
+ "20.50.48.159/32",
+ "40.114.182.172/32",
+ "20.50.50.130/32",
+ "20.50.50.163/32",
+ "20.50.50.46/32",
+ "40.114.182.153/32",
+ "20.50.50.118/32",
+ "20.50.49.55/32",
+ "20.50.49.25/32",
+ "40.114.183.251/32",
+ "20.50.50.123/32",
+ "20.50.49.237/32",
+ "20.50.48.192/32",
+ "20.50.50.134/32",
+ "51.138.90.233/32",
+ "40.114.183.196/32",
+ "20.50.50.146/32",
+ "40.114.183.88/32",
+ "20.50.50.145/32",
+ "20.50.50.121/32",
+ "20.50.49.40/32",
+ "51.138.90.206/32",
+ "40.114.182.45/32",
+ "51.138.90.161/32",
+ "20.50.49.0/32",
+ "40.119.232.215/32",
+ "104.43.55.167/32",
+ "40.119.232.251/32",
+ "40.119.232.50/32",
+ "40.119.232.146/32",
+ "40.119.232.218/32",
+ "104.43.54.127/32",
+ "104.43.55.117/32",
+ "104.43.55.116/32",
+ "104.43.55.166/32",
+ "52.154.169.50/32",
+ "52.154.171.70/32",
+ "52.154.170.229/32",
+ "52.154.170.113/32",
+ "52.154.171.44/32",
+ "52.154.172.2/32",
+ "52.143.244.81/32",
+ "52.154.171.87/32",
+ "52.154.171.250/32",
+ "52.154.170.28/32",
+ "52.154.170.122/32",
+ "52.143.243.117/32",
+ "52.143.247.235/32",
+ "52.154.171.235/32",
+ "52.154.171.196/32",
+ "52.154.171.0/32",
+ "52.154.170.243/32",
+ "52.154.170.26/32",
+ "52.154.169.200/32",
+ "52.154.170.96/32",
+ "52.154.170.88/32",
+ "52.154.171.150/32",
+ "52.154.171.205/32",
+ "52.154.170.117/32",
+ "52.154.170.209/32",
+ "191.235.202.48/32",
+ "191.233.3.202/32",
+ "191.235.201.214/32",
+ "191.233.3.197/32",
+ "191.235.202.38/32",
+ "20.53.78.144/32",
+ "20.193.24.10/32",
+ "20.53.78.236/32",
+ "20.53.78.138/32",
+ "20.53.78.123/32",
+ "20.53.78.106/32",
+ "20.193.27.215/32",
+ "20.193.25.197/32",
+ "20.193.12.126/32",
+ "20.193.24.251/32",
+ "20.204.242.101/32",
+ "20.207.72.113/32",
+ "20.204.242.19/32",
+ "20.219.45.67/32",
+ "20.207.72.11/32",
+ "20.219.45.190/32",
+ "20.204.243.55/32",
+ "20.204.241.148/32",
+ "20.207.72.110/32",
+ "20.204.240.172/32",
+ "20.207.72.21/32",
+ "20.204.246.81/32",
+ "20.207.107.181/32",
+ "20.204.246.254/32",
+ "20.219.43.246/32",
+ "52.149.25.43/32",
+ "52.149.61.51/32",
+ "52.149.58.139/32",
+ "52.149.60.38/32",
+ "52.148.165.38/32",
+ "52.143.95.162/32",
+ "52.149.56.151/32",
+ "52.149.30.45/32",
+ "52.149.58.173/32",
+ "52.143.95.204/32",
+ "52.149.28.83/32",
+ "52.149.58.69/32",
+ "52.148.161.87/32",
+ "52.149.58.27/32",
+ "52.149.28.18/32",
+ "20.79.226.26/32",
+ "20.79.239.66/32",
+ "20.79.238.198/32",
+ "20.113.14.159/32",
+ "20.75.144.152/32",
+ "20.43.172.120/32",
+ "20.53.134.160/32",
+ "20.201.15.208/32",
+ "20.93.28.24/32",
+ "20.61.34.40/32",
+ "52.242.224.168/32",
+ "20.80.129.80/32",
+ "20.195.108.47/32",
+ "4.195.133.120/32",
+ "4.228.76.163/32",
+ "4.182.131.108/32",
+ "4.209.224.56/32",
+ "108.141.83.74/32",
+ "4.213.46.14/32",
+ "172.169.17.165/32",
+ "51.8.71.117/32",
+ "20.3.1.178/32",
+ "52.149.56.151/32",
+ "52.149.30.45/32",
+ "52.149.58.173/32",
+ "52.143.95.204/32",
+ "52.149.28.83/32",
+ "52.149.58.69/32",
+ "52.148.161.87/32",
+ "52.149.58.27/32",
+ "52.149.28.18/32",
+ "20.79.226.26/32",
+ "20.79.239.66/32",
+ "20.79.238.198/32",
+ "20.113.14.159/32",
+ "20.75.144.152/32",
+ "20.43.172.120/32",
+ "20.53.134.160/32",
+ "20.201.15.208/32",
+ "20.93.28.24/32",
+ "20.61.34.40/32",
+ "52.242.224.168/32",
+ "20.80.129.80/32",
+ "20.195.108.47/32",
+ "4.195.133.120/32",
+ "4.228.76.163/32",
+ "4.182.131.108/32",
+ "4.209.224.56/32",
+ "108.141.83.74/32",
+ "4.213.46.14/32",
+ "172.169.17.165/32",
+ "51.8.71.117/32",
+ "20.3.1.178/32"
+ ]
diff --git a/data/crawlers/googlebot.yaml b/data/crawlers/googlebot.yaml
new file mode 100644
index 0000000..f173512
--- /dev/null
+++ b/data/crawlers/googlebot.yaml
@@ -0,0 +1,263 @@
+- name: googlebot
+ user_agent_regex: \+http\://www\.google\.com/bot\.html
+ action: ALLOW
+ # https://developers.google.com/static/search/apis/ipranges/googlebot.json
+ remote_addresses: [
+ "2001:4860:4801:10::/64",
+ "2001:4860:4801:11::/64",
+ "2001:4860:4801:12::/64",
+ "2001:4860:4801:13::/64",
+ "2001:4860:4801:14::/64",
+ "2001:4860:4801:15::/64",
+ "2001:4860:4801:16::/64",
+ "2001:4860:4801:17::/64",
+ "2001:4860:4801:18::/64",
+ "2001:4860:4801:19::/64",
+ "2001:4860:4801:1a::/64",
+ "2001:4860:4801:1b::/64",
+ "2001:4860:4801:1c::/64",
+ "2001:4860:4801:1d::/64",
+ "2001:4860:4801:1e::/64",
+ "2001:4860:4801:1f::/64",
+ "2001:4860:4801:20::/64",
+ "2001:4860:4801:21::/64",
+ "2001:4860:4801:22::/64",
+ "2001:4860:4801:23::/64",
+ "2001:4860:4801:24::/64",
+ "2001:4860:4801:25::/64",
+ "2001:4860:4801:26::/64",
+ "2001:4860:4801:27::/64",
+ "2001:4860:4801:28::/64",
+ "2001:4860:4801:29::/64",
+ "2001:4860:4801:2::/64",
+ "2001:4860:4801:2a::/64",
+ "2001:4860:4801:2b::/64",
+ "2001:4860:4801:2c::/64",
+ "2001:4860:4801:2d::/64",
+ "2001:4860:4801:2e::/64",
+ "2001:4860:4801:2f::/64",
+ "2001:4860:4801:31::/64",
+ "2001:4860:4801:32::/64",
+ "2001:4860:4801:33::/64",
+ "2001:4860:4801:34::/64",
+ "2001:4860:4801:35::/64",
+ "2001:4860:4801:36::/64",
+ "2001:4860:4801:37::/64",
+ "2001:4860:4801:38::/64",
+ "2001:4860:4801:39::/64",
+ "2001:4860:4801:3a::/64",
+ "2001:4860:4801:3b::/64",
+ "2001:4860:4801:3c::/64",
+ "2001:4860:4801:3d::/64",
+ "2001:4860:4801:3e::/64",
+ "2001:4860:4801:40::/64",
+ "2001:4860:4801:41::/64",
+ "2001:4860:4801:42::/64",
+ "2001:4860:4801:43::/64",
+ "2001:4860:4801:44::/64",
+ "2001:4860:4801:45::/64",
+ "2001:4860:4801:46::/64",
+ "2001:4860:4801:47::/64",
+ "2001:4860:4801:48::/64",
+ "2001:4860:4801:49::/64",
+ "2001:4860:4801:4a::/64",
+ "2001:4860:4801:4b::/64",
+ "2001:4860:4801:4c::/64",
+ "2001:4860:4801:50::/64",
+ "2001:4860:4801:51::/64",
+ "2001:4860:4801:52::/64",
+ "2001:4860:4801:53::/64",
+ "2001:4860:4801:54::/64",
+ "2001:4860:4801:55::/64",
+ "2001:4860:4801:56::/64",
+ "2001:4860:4801:60::/64",
+ "2001:4860:4801:61::/64",
+ "2001:4860:4801:62::/64",
+ "2001:4860:4801:63::/64",
+ "2001:4860:4801:64::/64",
+ "2001:4860:4801:65::/64",
+ "2001:4860:4801:66::/64",
+ "2001:4860:4801:67::/64",
+ "2001:4860:4801:68::/64",
+ "2001:4860:4801:69::/64",
+ "2001:4860:4801:6a::/64",
+ "2001:4860:4801:6b::/64",
+ "2001:4860:4801:6c::/64",
+ "2001:4860:4801:6d::/64",
+ "2001:4860:4801:6e::/64",
+ "2001:4860:4801:6f::/64",
+ "2001:4860:4801:70::/64",
+ "2001:4860:4801:71::/64",
+ "2001:4860:4801:72::/64",
+ "2001:4860:4801:73::/64",
+ "2001:4860:4801:74::/64",
+ "2001:4860:4801:75::/64",
+ "2001:4860:4801:76::/64",
+ "2001:4860:4801:77::/64",
+ "2001:4860:4801:78::/64",
+ "2001:4860:4801:79::/64",
+ "2001:4860:4801:80::/64",
+ "2001:4860:4801:81::/64",
+ "2001:4860:4801:82::/64",
+ "2001:4860:4801:83::/64",
+ "2001:4860:4801:84::/64",
+ "2001:4860:4801:85::/64",
+ "2001:4860:4801:86::/64",
+ "2001:4860:4801:87::/64",
+ "2001:4860:4801:88::/64",
+ "2001:4860:4801:90::/64",
+ "2001:4860:4801:91::/64",
+ "2001:4860:4801:92::/64",
+ "2001:4860:4801:93::/64",
+ "2001:4860:4801:94::/64",
+ "2001:4860:4801:95::/64",
+ "2001:4860:4801:96::/64",
+ "2001:4860:4801:a0::/64",
+ "2001:4860:4801:a1::/64",
+ "2001:4860:4801:a2::/64",
+ "2001:4860:4801:a3::/64",
+ "2001:4860:4801:a4::/64",
+ "2001:4860:4801:a5::/64",
+ "2001:4860:4801:c::/64",
+ "2001:4860:4801:f::/64",
+ "192.178.5.0/27",
+ "192.178.6.0/27",
+ "192.178.6.128/27",
+ "192.178.6.160/27",
+ "192.178.6.192/27",
+ "192.178.6.32/27",
+ "192.178.6.64/27",
+ "192.178.6.96/27",
+ "34.100.182.96/28",
+ "34.101.50.144/28",
+ "34.118.254.0/28",
+ "34.118.66.0/28",
+ "34.126.178.96/28",
+ "34.146.150.144/28",
+ "34.147.110.144/28",
+ "34.151.74.144/28",
+ "34.152.50.64/28",
+ "34.154.114.144/28",
+ "34.155.98.32/28",
+ "34.165.18.176/28",
+ "34.175.160.64/28",
+ "34.176.130.16/28",
+ "34.22.85.0/27",
+ "34.64.82.64/28",
+ "34.65.242.112/28",
+ "34.80.50.80/28",
+ "34.88.194.0/28",
+ "34.89.10.80/28",
+ "34.89.198.80/28",
+ "34.96.162.48/28",
+ "35.247.243.240/28",
+ "66.249.64.0/27",
+ "66.249.64.128/27",
+ "66.249.64.160/27",
+ "66.249.64.224/27",
+ "66.249.64.32/27",
+ "66.249.64.64/27",
+ "66.249.64.96/27",
+ "66.249.65.0/27",
+ "66.249.65.128/27",
+ "66.249.65.160/27",
+ "66.249.65.192/27",
+ "66.249.65.224/27",
+ "66.249.65.32/27",
+ "66.249.65.64/27",
+ "66.249.65.96/27",
+ "66.249.66.0/27",
+ "66.249.66.128/27",
+ "66.249.66.160/27",
+ "66.249.66.192/27",
+ "66.249.66.224/27",
+ "66.249.66.32/27",
+ "66.249.66.64/27",
+ "66.249.66.96/27",
+ "66.249.68.0/27",
+ "66.249.68.128/27",
+ "66.249.68.32/27",
+ "66.249.68.64/27",
+ "66.249.68.96/27",
+ "66.249.69.0/27",
+ "66.249.69.128/27",
+ "66.249.69.160/27",
+ "66.249.69.192/27",
+ "66.249.69.224/27",
+ "66.249.69.32/27",
+ "66.249.69.64/27",
+ "66.249.69.96/27",
+ "66.249.70.0/27",
+ "66.249.70.128/27",
+ "66.249.70.160/27",
+ "66.249.70.192/27",
+ "66.249.70.224/27",
+ "66.249.70.32/27",
+ "66.249.70.64/27",
+ "66.249.70.96/27",
+ "66.249.71.0/27",
+ "66.249.71.128/27",
+ "66.249.71.160/27",
+ "66.249.71.192/27",
+ "66.249.71.224/27",
+ "66.249.71.32/27",
+ "66.249.71.64/27",
+ "66.249.71.96/27",
+ "66.249.72.0/27",
+ "66.249.72.128/27",
+ "66.249.72.160/27",
+ "66.249.72.192/27",
+ "66.249.72.224/27",
+ "66.249.72.32/27",
+ "66.249.72.64/27",
+ "66.249.72.96/27",
+ "66.249.73.0/27",
+ "66.249.73.128/27",
+ "66.249.73.160/27",
+ "66.249.73.192/27",
+ "66.249.73.224/27",
+ "66.249.73.32/27",
+ "66.249.73.64/27",
+ "66.249.73.96/27",
+ "66.249.74.0/27",
+ "66.249.74.128/27",
+ "66.249.74.160/27",
+ "66.249.74.192/27",
+ "66.249.74.32/27",
+ "66.249.74.64/27",
+ "66.249.74.96/27",
+ "66.249.75.0/27",
+ "66.249.75.128/27",
+ "66.249.75.160/27",
+ "66.249.75.192/27",
+ "66.249.75.224/27",
+ "66.249.75.32/27",
+ "66.249.75.64/27",
+ "66.249.75.96/27",
+ "66.249.76.0/27",
+ "66.249.76.128/27",
+ "66.249.76.160/27",
+ "66.249.76.192/27",
+ "66.249.76.224/27",
+ "66.249.76.32/27",
+ "66.249.76.64/27",
+ "66.249.76.96/27",
+ "66.249.77.0/27",
+ "66.249.77.128/27",
+ "66.249.77.160/27",
+ "66.249.77.192/27",
+ "66.249.77.224/27",
+ "66.249.77.32/27",
+ "66.249.77.64/27",
+ "66.249.77.96/27",
+ "66.249.78.0/27",
+ "66.249.78.32/27",
+ "66.249.79.0/27",
+ "66.249.79.128/27",
+ "66.249.79.160/27",
+ "66.249.79.192/27",
+ "66.249.79.224/27",
+ "66.249.79.32/27",
+ "66.249.79.64/27",
+ "66.249.79.96/27"
+ ]
diff --git a/data/crawlers/internet-archive.yaml b/data/crawlers/internet-archive.yaml
new file mode 100644
index 0000000..5e209e8
--- /dev/null
+++ b/data/crawlers/internet-archive.yaml
@@ -0,0 +1,8 @@
+- name: internet-archive
+ action: ALLOW
+ # https://ipinfo.io/AS7941
+ remote_addresses: [
+ "207.241.224.0/20",
+ "208.70.24.0/21",
+ "2620:0:9c0::/48"
+ ] \ No newline at end of file
diff --git a/data/crawlers/kagibot.yaml b/data/crawlers/kagibot.yaml
new file mode 100644
index 0000000..db62b57
--- /dev/null
+++ b/data/crawlers/kagibot.yaml
@@ -0,0 +1,10 @@
+- name: kagibot
+ user_agent_regex: \+https\://kagi\.com/bot
+ action: ALLOW
+ # https://kagi.com/bot
+ remote_addresses: [
+ "216.18.205.234/32",
+ "35.212.27.76/32",
+ "104.254.65.50/32",
+ "209.151.156.194/32"
+ ]
diff --git a/data/crawlers/marginalia.yaml b/data/crawlers/marginalia.yaml
new file mode 100644
index 0000000..e12ebc4
--- /dev/null
+++ b/data/crawlers/marginalia.yaml
@@ -0,0 +1,11 @@
+- name: marginalia
+ user_agent_regex: search\.marginalia\.nu
+ action: ALLOW
+ # Received directly over email
+ remote_addresses: [
+ "193.183.0.162/31",
+ "193.183.0.164/30",
+ "193.183.0.168/30",
+ "193.183.0.172/31",
+ "193.183.0.174/32"
+ ] \ No newline at end of file
diff --git a/data/crawlers/mojeekbot.yaml b/data/crawlers/mojeekbot.yaml
new file mode 100644
index 0000000..fcd20f5
--- /dev/null
+++ b/data/crawlers/mojeekbot.yaml
@@ -0,0 +1,5 @@
+- name: mojeekbot
+ user_agent_regex: http\://www\.mojeek\.com/bot\.html
+ action: ALLOW
+ # https://www.mojeek.com/bot.html
+ remote_addresses: [ "5.102.173.71/32" ] \ No newline at end of file
diff --git a/data/crawlers/qwantbot.yaml b/data/crawlers/qwantbot.yaml
new file mode 100644
index 0000000..a402154
--- /dev/null
+++ b/data/crawlers/qwantbot.yaml
@@ -0,0 +1,5 @@
+- name: qwantbot
+ user_agent_regex: \+https\://help\.qwant\.com/bot/
+ action: ALLOW
+ # https://help.qwant.com/wp-content/uploads/sites/2/2025/01/qwantbot.json
+ remote_addresses: [ "91.242.162.0/24" ]