feat: add go-away for redlib
This commit is contained in:
181
privfrontends/configs/redlib/policy.yml
Normal file
181
privfrontends/configs/redlib/policy.yml
Normal file
@@ -0,0 +1,181 @@
|
||||
# Define networks to be used later below
|
||||
networks:
|
||||
# Networks will get included from snippets
|
||||
|
||||
|
||||
challenges:
|
||||
# Challenges will get included from snippets
|
||||
|
||||
conditions:
|
||||
# Conditions will get replaced on rules AST when found as ($condition-name)
|
||||
|
||||
# Conditions will get included from snippets
|
||||
|
||||
|
||||
is-static-asset:
|
||||
- 'path == "/apple-touch-icon.png"'
|
||||
- 'path == "/apple-touch-icon-precomposed.png"'
|
||||
- 'path.matches("\\.(manifest|ttf|woff|woff2|jpg|jpeg|gif|png|webp|avif|svg|mp4|webm|css|js|mjs|wasm)$")'
|
||||
|
||||
is-suspicious-crawler:
|
||||
- 'userAgent.contains("Presto/") || userAgent.contains("Trident/")'
|
||||
# Old IE browsers
|
||||
- 'userAgent.matches("MSIE ([2-9]|10|11)\\.")'
|
||||
# Old Linux browsers
|
||||
- 'userAgent.matches("Linux i[63]86") || userAgent.matches("FreeBSD i[63]86")'
|
||||
# Old Windows browsers
|
||||
- 'userAgent.matches("Windows (3|95|98|CE)") || userAgent.matches("Windows NT [1-5]\\.")'
|
||||
# Old mobile browsers
|
||||
- 'userAgent.matches("Android [1-5]\\.") || userAgent.matches("(iPad|iPhone) OS [1-9]_")'
|
||||
# Old generic browsers
|
||||
- 'userAgent.startsWith("Opera/")'
|
||||
#- 'userAgent.matches("Gecko/(201[0-9]|200[0-9])")'
|
||||
- 'userAgent.matches("^Mozilla/[1-4]")'
|
||||
|
||||
|
||||
# Rules are checked sequentially in order, from top to bottom
|
||||
rules:
|
||||
- name: allow-well-known-resources
|
||||
conditions:
|
||||
- '($is-well-known-asset)'
|
||||
action: pass
|
||||
|
||||
- name: allow-static-resources
|
||||
conditions:
|
||||
- '($is-static-asset)'
|
||||
action: pass
|
||||
|
||||
- name: allow-hls-js
|
||||
conditions:
|
||||
- 'path == "/hls.min.js"'
|
||||
- 'path.startsWith("/hls/")'
|
||||
action: pass
|
||||
|
||||
- name: desired-crawlers
|
||||
conditions:
|
||||
- *is-bot-googlebot
|
||||
- *is-bot-bingbot
|
||||
- *is-bot-duckduckbot
|
||||
- *is-bot-kagibot
|
||||
- *is-bot-qwantbot
|
||||
- *is-bot-yandexbot
|
||||
action: pass
|
||||
|
||||
# Matches private networks and localhost.
|
||||
# Uncomment this if you want to let your own tools this way
|
||||
#- name: allow-private-networks
|
||||
# conditions:
|
||||
# # Allows localhost and private networks CIDR
|
||||
# - *is-network-localhost
|
||||
# - *is-network-private
|
||||
# action: pass
|
||||
|
||||
- name: undesired-crawlers
|
||||
conditions:
|
||||
- '($is-headless-chromium)'
|
||||
- 'userAgent.startsWith("Lightpanda/")'
|
||||
- 'userAgent.startsWith("masscan/")'
|
||||
# Typo'd opera botnet
|
||||
- 'userAgent.matches("^Opera/[0-9.]+\\.\\(")'
|
||||
# AI bullshit stuff, they do not respect robots.txt even while they read it
|
||||
# TikTok Bytedance AI training
|
||||
- 'userAgent.contains("Bytedance") || userAgent.contains("Bytespider") || userAgent.contains("TikTokSpider")'
|
||||
# Meta AI training; The Meta-ExternalAgent crawler crawls the web for use cases such as training AI models or improving products by indexing content directly.
|
||||
- 'userAgent.contains("meta-externalagent/") || userAgent.contains("meta-externalfetcher/") || userAgent.contains("FacebookBot")'
|
||||
# Who the fuck is this ?
|
||||
- 'userAgent.contains("SemrushBot") || userAgent.contains("Barklower")'
|
||||
# Anthropic AI training and usage
|
||||
- 'userAgent.contains("ClaudeBot") || userAgent.contains("Claude-User")|| userAgent.contains("Claude-SearchBot")'
|
||||
# Common Crawl AI crawlers
|
||||
- 'userAgent.contains("CCBot")'
|
||||
# ChatGPT AI crawlers https://platform.openai.com/docs/bots
|
||||
- 'userAgent.contains("GPTBot") || userAgent.contains("OAI-SearchBot") || userAgent.contains("ChatGPT-User")'
|
||||
# Other AI crawlers
|
||||
- 'userAgent.contains("Amazonbot") || userAgent.contains("Google-Extended") || userAgent.contains("PanguBot") || userAgent.contains("AI2Bot") || userAgent.contains("Diffbot") || userAgent.contains("cohere-training-data-crawler") || userAgent.contains("Applebot-Extended")'
|
||||
# SEO / Ads and marketing
|
||||
- 'userAgent.contains("BLEXBot")'
|
||||
action: drop
|
||||
|
||||
- name: unknown-crawlers
|
||||
conditions:
|
||||
# No user agent set
|
||||
- 'userAgent == ""'
|
||||
action: deny
|
||||
|
||||
# check a sequence of challenges
|
||||
- name: suspicious-crawlers
|
||||
conditions: ['($is-suspicious-crawler)']
|
||||
action: none
|
||||
children:
|
||||
- name: 0
|
||||
action: check
|
||||
settings:
|
||||
challenges: [js-refresh]
|
||||
- name: 1
|
||||
action: check
|
||||
settings:
|
||||
challenges: [preload-link, resource-load]
|
||||
- name: 2
|
||||
action: check
|
||||
settings:
|
||||
challenges: [header-refresh]
|
||||
|
||||
# check DNSBL and serve harder challenges
|
||||
# todo: make this specific to score
|
||||
- name: undesired-dnsbl
|
||||
action: check
|
||||
settings:
|
||||
challenges: [dnsbl]
|
||||
# if DNSBL fails, check additional challenges
|
||||
fail: check
|
||||
fail-settings:
|
||||
challenges: [js-refresh]
|
||||
|
||||
- name: suspicious-fetchers
|
||||
action: check
|
||||
settings:
|
||||
challenges: [js-refresh]
|
||||
conditions:
|
||||
- 'userAgent.contains("facebookexternalhit/") || userAgent.contains("facebookcatalog/")'
|
||||
|
||||
# Allow PUT/DELETE/PATCH/POST requests in general
|
||||
- name: non-get-request
|
||||
action: pass
|
||||
conditions:
|
||||
- '!(method == "HEAD" || method == "GET")'
|
||||
|
||||
# Enable fetching OpenGraph and other tags from backend on these paths
|
||||
- name: enable-meta-tags
|
||||
action: context
|
||||
settings:
|
||||
context-set:
|
||||
# Map OpenGraph or similar <meta> tags back to the reply, even if denied/challenged
|
||||
proxy-meta-tags: "true"
|
||||
|
||||
# Set additional response headers
|
||||
#response-headers:
|
||||
# X-Clacks-Overhead:
|
||||
# - GNU Terry Pratchett
|
||||
|
||||
- name: plaintext-browser
|
||||
action: challenge
|
||||
settings:
|
||||
challenges: [meta-refresh, cookie]
|
||||
conditions:
|
||||
- 'userAgent.startsWith("Lynx/")'
|
||||
|
||||
# Uncomment this rule out to challenge tool-like user agents
|
||||
- name: standard-tools
|
||||
action: challenge
|
||||
settings:
|
||||
challenges: [cookie]
|
||||
conditions:
|
||||
- '($is-generic-robot-ua)'
|
||||
- '($is-tool-ua)'
|
||||
- '!($is-generic-browser)'
|
||||
|
||||
- name: standard-browser
|
||||
action: challenge
|
||||
settings:
|
||||
challenges: [preload-link, meta-refresh, resource-load, js-refresh]
|
||||
conditions:
|
@@ -114,7 +114,7 @@ apps:
|
||||
- name: redlib
|
||||
image: quay.io/redlib/redlib:latest
|
||||
ports:
|
||||
- "6464:8080"
|
||||
- "8080"
|
||||
environment:
|
||||
FRONT_PAGE: popular
|
||||
COMMENT_SORT: new
|
||||
@@ -122,6 +122,21 @@ apps:
|
||||
BLUR_NSFW: on
|
||||
USE_HLS: on
|
||||
AUTOPLAY_VIDEOS: off
|
||||
- name: go-away
|
||||
image: git.projectsegfau.lt/midou/go-away:latest
|
||||
ports:
|
||||
- "6464:9980"
|
||||
mounts:
|
||||
- "./cache:/cache"
|
||||
- "./policy.yml:/policy.yml:ro"
|
||||
environment:
|
||||
GOAWAY_BIND: ":9980"
|
||||
GOAWAY_BIND_NETWORK: "proxy"
|
||||
GOAWAY_POLICY: "/policy.yml"
|
||||
GOAWAY_SLOG_LEVEL: "WARN"
|
||||
GOAWAY_CHALLENGE_TEMPLATE: redlib
|
||||
GOAWAY_BACKEND: "*=http://redlib:8080"
|
||||
|
||||
nitter:
|
||||
needs_data_dir: true
|
||||
needs_configs_dir: true
|
||||
|
Reference in New Issue
Block a user