feat: add go-away for redlib

2025-07-25 11:33:05 +05:30
parent fbedf747be
commit adb402232f
2 changed files with 197 additions and 1 deletions
--- a/privfrontends/configs/redlib/policy.yml
+++ b/privfrontends/configs/redlib/policy.yml
@@ -0,0 +1,181 @@
+# Define networks to be used later below
+networks:
+  # Networks will get included from snippets
+
+
+challenges:
+  # Challenges will get included from snippets
+
+conditions:
+  # Conditions will get replaced on rules AST when found as ($condition-name)
+
+  # Conditions will get included from snippets
+
+
+  is-static-asset:
+    - 'path == "/apple-touch-icon.png"'
+    - 'path == "/apple-touch-icon-precomposed.png"'
+    - 'path.matches("\\.(manifest|ttf|woff|woff2|jpg|jpeg|gif|png|webp|avif|svg|mp4|webm|css|js|mjs|wasm)$")'
+
+  is-suspicious-crawler:
+    - 'userAgent.contains("Presto/") || userAgent.contains("Trident/")'
+    # Old IE browsers
+    - 'userAgent.matches("MSIE ([2-9]|10|11)\\.")'
+    # Old Linux browsers
+    - 'userAgent.matches("Linux i[63]86") || userAgent.matches("FreeBSD i[63]86")'
+    # Old Windows browsers
+    - 'userAgent.matches("Windows (3|95|98|CE)") || userAgent.matches("Windows NT [1-5]\\.")'
+    # Old mobile browsers
+    - 'userAgent.matches("Android [1-5]\\.") || userAgent.matches("(iPad|iPhone) OS [1-9]_")'
+    # Old generic browsers
+    - 'userAgent.startsWith("Opera/")'
+    #- 'userAgent.matches("Gecko/(201[0-9]|200[0-9])")'
+    - 'userAgent.matches("^Mozilla/[1-4]")'
+
+
+# Rules are checked sequentially in order, from top to bottom
+rules:
+  - name: allow-well-known-resources
+    conditions:
+      - '($is-well-known-asset)'
+    action: pass
+
+  - name: allow-static-resources
+    conditions:
+      - '($is-static-asset)'
+    action: pass
+
+  - name: allow-hls-js 
+    conditions:
+      - 'path == "/hls.min.js"'
+      - 'path.startsWith("/hls/")'
+    action: pass
+
+  - name: desired-crawlers
+    conditions:
+      - *is-bot-googlebot
+      - *is-bot-bingbot
+      - *is-bot-duckduckbot
+      - *is-bot-kagibot
+      - *is-bot-qwantbot
+      - *is-bot-yandexbot
+    action: pass
+
+  # Matches private networks and localhost.
+  # Uncomment this if you want to let your own tools this way
+  #- name: allow-private-networks
+  #  conditions:
+  #    # Allows localhost and private networks CIDR
+  #    - *is-network-localhost
+  #    - *is-network-private
+  #  action: pass
+
+  - name: undesired-crawlers
+    conditions:
+      - '($is-headless-chromium)'
+      - 'userAgent.startsWith("Lightpanda/")'
+      - 'userAgent.startsWith("masscan/")'
+      # Typo'd opera botnet
+      - 'userAgent.matches("^Opera/[0-9.]+\\.\\(")'
+      # AI bullshit stuff, they do not respect robots.txt even while they read it
+      # TikTok Bytedance AI training
+      - 'userAgent.contains("Bytedance") || userAgent.contains("Bytespider") || userAgent.contains("TikTokSpider")'
+      # Meta AI training; The Meta-ExternalAgent crawler crawls the web for use cases such as training AI models or improving products by indexing content directly.
+      - 'userAgent.contains("meta-externalagent/") || userAgent.contains("meta-externalfetcher/") || userAgent.contains("FacebookBot")'
+      # Who the fuck is this ?
+      - 'userAgent.contains("SemrushBot") || userAgent.contains("Barklower")'
+      # Anthropic AI training and usage
+      - 'userAgent.contains("ClaudeBot") || userAgent.contains("Claude-User")|| userAgent.contains("Claude-SearchBot")'
+      # Common Crawl AI crawlers
+      - 'userAgent.contains("CCBot")'
+      # ChatGPT AI crawlers https://platform.openai.com/docs/bots
+      - 'userAgent.contains("GPTBot") || userAgent.contains("OAI-SearchBot") || userAgent.contains("ChatGPT-User")'
+      # Other AI crawlers
+      - 'userAgent.contains("Amazonbot") || userAgent.contains("Google-Extended") || userAgent.contains("PanguBot") || userAgent.contains("AI2Bot") || userAgent.contains("Diffbot") || userAgent.contains("cohere-training-data-crawler") || userAgent.contains("Applebot-Extended")'
+      # SEO / Ads and marketing
+      - 'userAgent.contains("BLEXBot")'
+    action: drop
+
+  - name: unknown-crawlers
+    conditions:
+      # No user agent set
+      - 'userAgent == ""'
+    action: deny
+
+  # check a sequence of challenges
+  - name: suspicious-crawlers
+    conditions: ['($is-suspicious-crawler)']
+    action: none
+    children:
+      - name: 0
+        action: check
+        settings:
+          challenges: [js-refresh]
+      - name: 1
+        action: check
+        settings:
+          challenges: [preload-link, resource-load]
+      - name: 2
+        action: check
+        settings:
+          challenges: [header-refresh]
+
+  # check DNSBL and serve harder challenges
+  # todo: make this specific to score
+  - name: undesired-dnsbl
+    action: check
+    settings:
+      challenges: [dnsbl]
+      # if DNSBL fails, check additional challenges
+      fail: check
+      fail-settings:
+        challenges: [js-refresh]
+
+  - name: suspicious-fetchers
+    action: check
+    settings:
+      challenges: [js-refresh]
+    conditions:
+      - 'userAgent.contains("facebookexternalhit/") || userAgent.contains("facebookcatalog/")'
+
+  # Allow PUT/DELETE/PATCH/POST requests in general
+  - name: non-get-request
+    action: pass
+    conditions:
+      - '!(method == "HEAD" || method == "GET")'
+
+  # Enable fetching OpenGraph and other tags from backend on these paths
+  - name: enable-meta-tags
+    action: context
+    settings:
+      context-set:
+        # Map OpenGraph or similar <meta> tags back to the reply, even if denied/challenged
+        proxy-meta-tags: "true"
+
+      # Set additional response headers
+      #response-headers:
+      # X-Clacks-Overhead:
+      #  - GNU Terry Pratchett
+
+  - name: plaintext-browser
+    action: challenge
+    settings:
+      challenges: [meta-refresh, cookie]
+    conditions:
+      - 'userAgent.startsWith("Lynx/")'
+
+  # Uncomment this rule out to challenge tool-like user agents
+  - name: standard-tools
+    action: challenge
+    settings:
+      challenges: [cookie]
+    conditions:
+      - '($is-generic-robot-ua)'
+      - '($is-tool-ua)'
+      - '!($is-generic-browser)'
+
+  - name: standard-browser
+    action: challenge
+    settings:
+      challenges: [preload-link, meta-refresh, resource-load, js-refresh]
+    conditions:
--- a/privfrontends/vars.yaml
+++ b/privfrontends/vars.yaml
@@ -114,7 +114,7 @@ apps:
          - name: redlib
            image: quay.io/redlib/redlib:latest
            ports:
-              - "6464:8080"
+              - "8080"
            environment:
              FRONT_PAGE: popular
              COMMENT_SORT: new
@@ -122,6 +122,21 @@ apps:
              BLUR_NSFW: on
              USE_HLS: on
              AUTOPLAY_VIDEOS: off
+          - name: go-away
+            image: git.projectsegfau.lt/midou/go-away:latest
+            ports:
+              - "6464:9980"
+            mounts:
+              - "./cache:/cache"
+              - "./policy.yml:/policy.yml:ro"
+            environment:
+              GOAWAY_BIND: ":9980"
+              GOAWAY_BIND_NETWORK: "proxy"
+              GOAWAY_POLICY: "/policy.yml"
+              GOAWAY_SLOG_LEVEL: "WARN"
+              GOAWAY_CHALLENGE_TEMPLATE: redlib 
+              GOAWAY_BACKEND: "*=http://redlib:8080"
+
    nitter:
      needs_data_dir: true
      needs_configs_dir: true