From e9269338e7cb2f3aca36b0f5c1636912f4cf6c22 Mon Sep 17 00:00:00 2001 From: WeebDataHoarder <57538841+WeebDataHoarder@users.noreply.github.com> Date: Thu, 3 Apr 2025 14:18:55 +0200 Subject: [PATCH] Add response poisoning --- build-poison.sh | 9 +++ embed.go | 3 + go.mod | 2 + go.sum | 6 ++ lib/http.go | 43 +++++++++++ lib/poison.go | 26 +++++++ lib/policy/rule.go | 1 + lib/state.go | 2 + poison/generator.go | 180 ++++++++++++++++++++++++++++++++++++++++++++ policy.yml | 22 +++++- 10 files changed, 291 insertions(+), 3 deletions(-) create mode 100755 build-poison.sh create mode 100644 lib/poison.go create mode 100644 poison/generator.go diff --git a/build-poison.sh b/build-poison.sh new file mode 100755 index 0000000..c1173ad --- /dev/null +++ b/build-poison.sh @@ -0,0 +1,9 @@ +#!/bin/bash + +set -e +set -o pipefail + +cd "$(cd -P -- "$(dirname -- "${BASH_SOURCE[0]}")" && pwd -P)" + + +go run ./poison -path ./poison/ \ No newline at end of file diff --git a/embed.go b/embed.go index 86cf97d..959e2d7 100644 --- a/embed.go +++ b/embed.go @@ -10,3 +10,6 @@ var ChallengeFs embed.FS //go:embed templates var TemplatesFs embed.FS + +//go:embed poison/*.poison +var PoisonFs embed.FS diff --git a/go.mod b/go.mod index 562943f..330de11 100644 --- a/go.mod +++ b/go.mod @@ -4,9 +4,11 @@ go 1.24 require ( codeberg.org/meta/gzipped/v2 v2.0.0-20231111234332-aa70c3194756 + github.com/andybalholm/brotli v1.1.1 github.com/go-jose/go-jose/v4 v4.0.5 github.com/google/cel-go v0.24.1 github.com/itchyny/gojq v0.12.17 + github.com/klauspost/compress v1.18.0 github.com/tetratelabs/wazero v1.9.0 github.com/yl2chen/cidranger v1.0.2 gopkg.in/yaml.v3 v3.0.1 diff --git a/go.sum b/go.sum index 9cb45e3..c1c03e0 100644 --- a/go.sum +++ b/go.sum @@ -2,6 +2,8 @@ cel.dev/expr v0.22.1 h1:xoFEsNh972Yzey8N9TCPx2nDvMN7TMhQEzxLuj/iRrI= cel.dev/expr v0.22.1/go.mod h1:MrpN08Q+lEBs+bGYdLxxHkZoUSsCp0nSKTs0nTymJgw= codeberg.org/meta/gzipped/v2 v2.0.0-20231111234332-aa70c3194756 h1:bDqEUEYt4UJy8mfLCZeJuXx+xNJvdqTbkE4Ci11NQYU= codeberg.org/meta/gzipped/v2 v2.0.0-20231111234332-aa70c3194756/go.mod h1:aJ/ghJW7viYfwZ6OizDst+uJgbb6r/Hvoqhmi1OPTTw= +github.com/andybalholm/brotli v1.1.1 h1:PR2pgnyFznKEugtsUo0xLdDop5SKXd5Qf5ysW+7XdTA= +github.com/andybalholm/brotli v1.1.1/go.mod h1:05ib4cKhjx3OQYUY22hTVd34Bc8upXjOLL2rKwwZBoA= github.com/antlr4-go/antlr/v4 v4.13.1 h1:SqQKkuVZ+zWkMMNkjy5FZe5mr5WURWnlpmOuzYWrPrQ= github.com/antlr4-go/antlr/v4 v4.13.1/go.mod h1:GKmUxMtwp6ZgGwZSva4eWPC5mS6vUAmOABFgjdkM7Nw= github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= @@ -19,6 +21,8 @@ github.com/itchyny/timefmt-go v0.1.6 h1:ia3s54iciXDdzWzwaVKXZPbiXzxxnv1SPGFfM/my github.com/itchyny/timefmt-go v0.1.6/go.mod h1:RRDZYC5s9ErkjQvTvvU7keJjxUYzIISJGxm9/mAERQg= github.com/kevinpollet/nego v0.0.0-20211010160919-a65cd48cee43 h1:Pdirg1gwhEcGjMLyuSxGn9664p+P8J9SrfMgpFwrDyg= github.com/kevinpollet/nego v0.0.0-20211010160919-a65cd48cee43/go.mod h1:ahLMuLCUyDdXqtqGyuwGev7/PGtO7r7ocvdwDuEN/3E= +github.com/klauspost/compress v1.18.0 h1:c/Cqfb0r+Yi+JtIEq73FWXVkRonBlf0CRNYc8Zttxdo= +github.com/klauspost/compress v1.18.0/go.mod h1:2Pp+KzxcywXVXMr50+X0Q/Lsb43OQHYWRCY2AiWywWQ= github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= github.com/stoewer/go-strcase v1.3.0 h1:g0eASXYtp+yvN9fK8sH94oCIk0fau9uV1/ZdJ0AVEzs= @@ -34,6 +38,8 @@ github.com/stretchr/testify v1.10.0 h1:Xv5erBjTwe/5IxqUQTdXv5kgmIvbHo3QQyRwhJsOf github.com/stretchr/testify v1.10.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY= github.com/tetratelabs/wazero v1.9.0 h1:IcZ56OuxrtaEz8UYNRHBrUa9bYeX9oVY93KspZZBf/I= github.com/tetratelabs/wazero v1.9.0/go.mod h1:TSbcXCfFP0L2FGkRPxHphadXPjo1T6W+CseNNY7EkjM= +github.com/xyproto/randomstring v1.0.5 h1:YtlWPoRdgMu3NZtP45drfy1GKoojuR7hmRcnhZqKjWU= +github.com/xyproto/randomstring v1.0.5/go.mod h1:rgmS5DeNXLivK7YprL0pY+lTuhNQW3iGxZ18UQApw/E= github.com/yl2chen/cidranger v1.0.2 h1:lbOWZVCG1tCRX4u24kuM1Tb4nHqWkDxwLdoS+SevawU= github.com/yl2chen/cidranger v1.0.2/go.mod h1:9U1yz7WPYDwf0vpNWFaeRh0bjwz5RVgRy/9UEQfHl0g= golang.org/x/crypto v0.36.0 h1:AnAEvhDddvBdpY+uR+MyHmuZzzNqXSe/GvuDeob5L34= diff --git a/lib/http.go b/lib/http.go index 85e6b03..d3a147e 100644 --- a/lib/http.go +++ b/lib/http.go @@ -14,12 +14,14 @@ import ( "git.gammaspectra.live/git/go-away/lib/policy" "github.com/google/cel-go/common/types" "html/template" + "io" "log/slog" "maps" "net" "net/http" "net/http/httputil" "net/url" + "path" "path/filepath" "strconv" "strings" @@ -314,6 +316,47 @@ func (state *State) handleRequest(w http.ResponseWriter, r *http.Request) { //TODO: configure block fail(http.StatusForbidden, fmt.Errorf("access denied: blocked by administrative rule %s/%s", r.Header.Get("X-Away-Id"), rule.Hash)) return + case policy.RuleActionPOISON: + lg.Info("request poisoned", "rule", rule.Name, "rule_hash", rule.Hash) + + mime := "text/html" + switch path.Ext(r.URL.Path) { + case ".css": + case ".json", ".js", ".mjs": + + } + + encodings := strings.Split(r.Header.Get("Accept-Encoding"), ",") + for i, encoding := range encodings { + encodings[i] = strings.TrimSpace(strings.ToLower(encoding)) + } + + reader, encoding := state.getPoison(mime, encodings) + if reader == nil { + mime = "application/octet-stream" + reader, encoding = state.getPoison(mime, encodings) + } + + if reader != nil { + defer reader.Close() + } + + w.Header().Set("Cache-Control", "max-age=0, private, must-revalidate, no-transform") + w.Header().Set("Vary", "Accept-Encoding") + w.Header().Set("Content-Type", mime) + w.Header().Set("X-Content-Type-Options", "nosniff") + if encoding != "" { + w.Header().Set("Content-Encoding", encoding) + } + w.WriteHeader(http.StatusOK) + if flusher, ok := w.(http.Flusher); ok { + // trigger chunked encoding + flusher.Flush() + } + if r != nil { + _, _ = io.Copy(w, reader) + } + return } } } diff --git a/lib/poison.go b/lib/poison.go new file mode 100644 index 0000000..aeedb00 --- /dev/null +++ b/lib/poison.go @@ -0,0 +1,26 @@ +package lib + +import ( + go_away "git.gammaspectra.live/git/go-away" + "io" + "path" + "slices" + "strings" +) + +var poisonEncodings = []string{"br", "zstd", "gzip"} + +func (state *State) getPoison(mime string, encodings []string) (r io.ReadCloser, encoding string) { + for _, encoding = range poisonEncodings { + if !slices.Contains(encodings, encoding) { + continue + } + + p := path.Join("poison", strings.ReplaceAll(mime, "/", "_")+"."+encoding+".poison") + f, err := go_away.PoisonFs.Open(p) + if err == nil { + return f, encoding + } + } + return nil, "" +} diff --git a/lib/policy/rule.go b/lib/policy/rule.go index 874d727..7ec3bd7 100644 --- a/lib/policy/rule.go +++ b/lib/policy/rule.go @@ -8,6 +8,7 @@ const ( RuleActionBLOCK RuleAction = "BLOCK" RuleActionCHALLENGE RuleAction = "CHALLENGE" RuleActionCHECK RuleAction = "CHECK" + RuleActionPOISON RuleAction = "POISON" ) type Rule struct { diff --git a/lib/state.go b/lib/state.go index f7389c0..1d8fb71 100644 --- a/lib/state.go +++ b/lib/state.go @@ -57,6 +57,8 @@ type State struct { PublicKey ed25519.PublicKey PrivateKey ed25519.PrivateKey + + Poison map[string][]byte } type RuleState struct { diff --git a/poison/generator.go b/poison/generator.go new file mode 100644 index 0000000..f7c4bb8 --- /dev/null +++ b/poison/generator.go @@ -0,0 +1,180 @@ +package main + +import ( + "bytes" + "compress/gzip" + "flag" + "fmt" + "github.com/andybalholm/brotli" + "github.com/klauspost/compress/zstd" + "io" + "math/rand/v2" + "os" + "path" + "slices" + "strings" + "sync" +) + +type poisonCharacterGenerator struct { + Header []byte + AllowedBytes []byte + Repeat int + counter int +} + +func (r *poisonCharacterGenerator) Read(p []byte) (n int, err error) { + if len(r.Header) > 0 { + copy(p, r.Header) + nn := min(len(r.Header), len(p)) + r.Header = r.Header[nn:] + p = p[nn:] + } + + stride := min(len(p), r.Repeat) + for i := 0; i < len(p); i += stride { + copy(p[i:], bytes.Repeat([]byte{r.AllowedBytes[r.counter]}, stride)) + r.counter = (r.counter + 1) % len(r.AllowedBytes) + } + return len(p), nil +} + +type poisonValuesGenerator struct { + Header []byte + AllowedValues [][]byte + counter int +} + +func (r *poisonValuesGenerator) Read(p []byte) (n int, err error) { + var i int + + if len(r.Header) > 0 { + copy(p, r.Header) + nn := min(len(r.Header), len(p)) + r.Header = r.Header[nn:] + i += nn + + for i < len(p) { + copy(p[i:], r.AllowedValues[r.counter]) + i += len(r.AllowedValues[r.counter]) + r.counter = (r.counter + 1) % len(r.AllowedValues) + if r.counter == 0 { + break + } + } + } + + for i < len(p) { + buf := slices.Repeat(r.AllowedValues[r.counter], len(r.AllowedValues)-r.counter) + copy(p[i:], buf) + i += len(buf) + r.counter = (r.counter + 1) % len(r.AllowedValues) + } + return len(p), nil +} + +func main() { + + outputPath := flag.String("path", "./", "path to poison files") + + flag.Parse() + + const Gigabyte = 1024 * 1024 * 1024 + + compressPoison(*outputPath, "text/html", &poisonValuesGenerator{ + Header: []byte(fmt.Sprintf("%d", rand.Uint64())), + AllowedValues: [][]byte{ + []byte("


\n"), + []byte("

\n"), + []byte("

\n"), + []byte("

"), + []byte("
\n"), + []byte("

"), + []byte("

Are you a bot?

\n"), + []byte(""), + }, + }, Gigabyte) +} + +var poisonEncodings = []string{"br", "zstd", "gzip"} + +func compressPoison(outputPath, mime string, r io.Reader, maxSize int64) { + r = io.LimitReader(r, maxSize) + + var closers []func() + var encoders []io.Writer + var writers []io.Writer + var readers []io.Reader + + for _, encoding := range poisonEncodings { + f, err := os.Create(path.Join(outputPath, strings.ReplaceAll(mime, "/", "_")+"."+encoding+".poison")) + if err != nil { + panic(err) + } + switch encoding { + case "zstd": + w, err := zstd.NewWriter(f, zstd.WithEncoderLevel(zstd.SpeedBestCompression), zstd.WithEncoderCRC(false), zstd.WithWindowSize(zstd.MaxWindowSize)) + if err != nil { + panic(err) + } + encoders = append(encoders, w) + closers = append(closers, func() { + w.Close() + f.Close() + }) + case "br": + w := brotli.NewWriterLevel(f, brotli.BestCompression) + encoders = append(encoders, w) + closers = append(closers, func() { + w.Close() + f.Close() + }) + case "gzip": + w, err := gzip.NewWriterLevel(f, gzip.BestCompression) + if err != nil { + panic(err) + } + encoders = append(encoders, w) + closers = append(closers, func() { + w.Close() + f.Close() + }) + } + r, w := io.Pipe() + readers = append(readers, r) + writers = append(writers, w) + } + + var wg sync.WaitGroup + + for i := range poisonEncodings { + wg.Add(1) + go func() { + defer wg.Done() + + _, err := io.Copy(encoders[i], readers[i]) + if err != nil { + panic(err) + } + closers[i]() + + // discard remaining data + _, _ = io.Copy(io.Discard, readers[i]) + }() + } + + _, err := io.Copy(io.MultiWriter(writers...), r) + if err != nil { + panic(err) + } + + for _, w := range writers { + if pw, ok := w.(io.Closer); ok { + pw.Close() + } else { + panic("writer is not a Closer") + } + } + + wg.Wait() +} diff --git a/policy.yml b/policy.yml index e7a988e..01e567f 100644 --- a/policy.yml +++ b/policy.yml @@ -15,6 +15,10 @@ networks: # AS45102 - url: https://raw.githubusercontent.com/ipverse/asn-ip/refs/heads/master/as/45102/aggregated.json jq-path: '.subnets.ipv4[], .subnets.ipv6[]' + zenlayer-inc: + # AS21859 + - url: https://raw.githubusercontent.com/ipverse/asn-ip/refs/heads/master/as/21859/aggregated.json + jq-path: '.subnets.ipv4[], .subnets.ipv6[]' aws-cloud: - url: https://ip-ranges.amazonaws.com/ip-ranges.json jq-path: '(.prefixes[] | select(has("ip_prefix")) | .ip_prefix), (.prefixes[] | select(has("ipv6_prefix")) | .ipv6_prefix)' @@ -180,6 +184,11 @@ conditions: is-git-path: - 'path.matches("^/[^/]+/[^/]+/(git-upload-pack|git-receive-pack|HEAD|info/refs|info/lfs|objects)")' + is-generic-robot-ua: + - 'userAgent.contains("compatible;") && !userAgent.contains("Trident/")' + - 'userAgent.matches("\\+https?://")' + - 'userAgent.matches("[bB]ot/[0-9]")' + is-tool-ua: - 'userAgent.startsWith("python-requests/")' - 'userAgent.startsWith("Python-urllib/")' @@ -237,13 +246,12 @@ conditions: rules: - name: undesired-networks conditions: - - 'inNetwork("huawei-cloud", remoteAddress) || inNetwork("alibaba-cloud", remoteAddress)' - action: deny + - 'inNetwork("huawei-cloud", remoteAddress) || inNetwork("alibaba-cloud", remoteAddress) || inNetwork("zenlayer-inc", remoteAddress)' + action: poison - name: undesired-crawlers conditions: - '($is-headless-chromium)' - - 'userAgent == ""' - 'userAgent.startsWith("Lightpanda/")' - 'userAgent.startsWith("masscan/")' # Typo'd opera botnet @@ -261,8 +269,16 @@ rules: - 'userAgent.contains("GPTBot") || userAgent.contains("OAI-SearchBot") || userAgent.contains("ChatGPT-User")' # Other AI crawlers - 'userAgent.contains("Amazonbot") || userAgent.contains("Google-Extended") || userAgent.contains("PanguBot") || userAgent.contains("AI2Bot") || userAgent.contains("Diffbot") || userAgent.contains("cohere-training-data-crawler") || userAgent.contains("Applebot-Extended")' + action: poison + + - name: unknown-crawlers + conditions: + # No user agent set + - 'userAgent == ""' action: deny + + # check a sequence of challenges for non logged in - name: suspicious-crawlers/0 conditions: ['($is-suspicious-crawler)']