From a40c5e99fc182b6f2fbff5c3f94d9f46c1abce45 Mon Sep 17 00:00:00 2001 From: Dryusdan Date: Sat, 19 Apr 2025 00:28:56 +0200 Subject: Add more AI user agent in botPolicies.json (#249) * Add more IA user agent in bot policies * Update data/botPolicies.json Signed-off-by: Xe Iaso * Fix trailling pipe that deny all requests --------- Signed-off-by: Xe Iaso Co-authored-by: Xe Iaso --- data/botPolicies.json | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'data') diff --git a/data/botPolicies.json b/data/botPolicies.json index 7d6e4cb..dbc3d35 100644 --- a/data/botPolicies.json +++ b/data/botPolicies.json @@ -1,8 +1,8 @@ { "bots": [ { - "name": "amazonbot", - "user_agent_regex": "Amazonbot", + "name": "ai-robots-txt", + "user_agent_regex": "AI2Bot|Ai2Bot-Dolma|Amazonbot|anthropic-ai|Applebot|Applebot-Extended|Brightbot 1.0|Bytespider|CCBot|ChatGPT-User|Claude-Web|ClaudeBot|cohere-ai|cohere-training-data-crawler|Crawlspace|Diffbot|DuckAssistBot|FacebookBot|FriendlyCrawler|Google-Extended|GoogleOther|GoogleOther-Image|GoogleOther-Video|GPTBot|iaskspider/2.0|ICC-Crawler|ImagesiftBot|img2dataset|ISSCyberRiskCrawler|Kangaroo Bot|Meta-ExternalAgent|Meta-ExternalFetcher|OAI-SearchBot|omgili|omgilibot|PanguBot|Perplexity-User|PerplexityBot|PetalBot|Scrapy|SemrushBot-OCOB|SemrushBot-SWA|Sidetrade indexer bot|Timpibot|VelenPublicWebCrawler|Webzio-Extended|YouBot", "action": "DENY" }, { -- cgit v1.2.3