From 6f3d81618c3d1420e2b04068c3b3f37f417222e3 Mon Sep 17 00:00:00 2001 From: Alan Orth Date: Fri, 25 Apr 2025 09:34:43 +0300 Subject: [PATCH] examples: add TikTokSpider Requests using this user agent are coming from the same Amazon net- works as Bytespider. --- examples/forgejo.yml | 2 +- examples/generic.yml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/examples/forgejo.yml b/examples/forgejo.yml index 7736fcb..7fb7bed 100644 --- a/examples/forgejo.yml +++ b/examples/forgejo.yml @@ -106,7 +106,7 @@ rules: - 'userAgent.matches("^Opera/[0-9.]+\\.\\(")' # AI bullshit stuff, they do not respect robots.txt even while they read it # TikTok Bytedance AI training - - 'userAgent.contains("Bytedance") || userAgent.contains("Bytespider")' + - 'userAgent.contains("Bytedance") || userAgent.contains("Bytespider") || userAgent.contains("TikTokSpider")' # Meta AI training; The Meta-ExternalAgent crawler crawls the web for use cases such as training AI models or improving products by indexing content directly. - 'userAgent.contains("meta-externalagent/") || userAgent.contains("meta-externalfetcher/") || userAgent.contains("FacebookBot")' # Anthropic AI training and usage diff --git a/examples/generic.yml b/examples/generic.yml index 973b3db..b4ee00b 100644 --- a/examples/generic.yml +++ b/examples/generic.yml @@ -59,7 +59,7 @@ rules: - 'userAgent.matches("^Opera/[0-9.]+\\.\\(")' # AI bullshit stuff, they do not respect robots.txt even while they read it # TikTok Bytedance AI training - - 'userAgent.contains("Bytedance") || userAgent.contains("Bytespider")' + - 'userAgent.contains("Bytedance") || userAgent.contains("Bytespider") || userAgent.contains("TikTokSpider")' # Meta AI training; The Meta-ExternalAgent crawler crawls the web for use cases such as training AI models or improving products by indexing content directly. - 'userAgent.contains("meta-externalagent/") || userAgent.contains("meta-externalfetcher/") || userAgent.contains("FacebookBot")' # Anthropic AI training and usage