10 Commits

Author SHA1 Message Date
WeebDataHoarder
088115a86f Point generic challenge to generic.yml 2025-04-13 13:12:39 +02:00
WeebDataHoarder
6d5aaddd03 New README 2025-04-13 13:10:56 +02:00
WeebDataHoarder
06e8556d68 Add generic template, update README 2025-04-13 11:50:03 +02:00
WeebDataHoarder
cb0a3732bd Update forgejo template 2025-04-13 11:24:58 +02:00
WeebDataHoarder
f2389650eb Remove TLS debugging code on main 2025-04-13 11:16:13 +02:00
WeebDataHoarder
cc89b8657f Only serve poison if encoding for it exists 2025-04-13 11:15:24 +02:00
WeebDataHoarder
f2005d5051 Ensure JA3N is stringified on logger 2025-04-12 15:36:54 +02:00
WeebDataHoarder
617e40099f Check fingerprint ptr before usage 2025-04-12 13:56:25 +02:00
WeebDataHoarder
ca49c99cad Add support for JA3N / JA4 TLS fingerprinting 2025-04-12 02:13:05 +02:00
WeebDataHoarder
f6f00a54da Go 1.22 -> Go 1.24 bump 2025-04-11 07:50:02 +02:00
15 changed files with 1140 additions and 132 deletions

View File

@@ -93,14 +93,16 @@ local Publish(go, alpine, os, arch, trigger, platforms, extra) = {
#
[
Build("1.22", "3.20", "linux", "amd64"),
Build("1.22", "3.20", "linux", "arm64"),
Build("1.24", "3.20", "linux", "amd64"),
Build("1.24", "3.20", "linux", "arm64"),
Build("1.24", "3.21", "linux", "amd64"),
Build("1.24", "3.21", "linux", "arm64"),
# latest
Publish("1.24", "3.21", "linux", "amd64", {event: ["push"], branch: ["master"], }, ["linux/amd64", "linux/arm64"], {tags: ["latest"],}) + {name: "publish-latest"},
# modern
Publish("1.24", "3.21", "linux", "amd64", {event: ["promote", "tag"], target: ["production"], }, ["linux/amd64", "linux/arm64"], {auto_tag: true,}),
# legacy
Publish("1.22", "3.20", "linux", "amd64", {event: ["promote", "tag"], target: ["production"], }, ["linux/amd64", "linux/arm64"], {auto_tag: true,}),
Publish("1.24", "3.20", "linux", "amd64", {event: ["promote", "tag"], target: ["production"], }, ["linux/amd64", "linux/arm64"], {auto_tag: true,}),
]

View File

@@ -4,7 +4,7 @@ environment:
GOARCH: amd64
GOOS: linux
kind: pipeline
name: build-1.22-alpine3.20-amd64
name: build-1.24-alpine3.20-amd64
platform:
arch: amd64
os: linux
@@ -15,7 +15,7 @@ steps:
- mkdir .bin
- go build -v -o ./.bin/go-away ./cmd/go-away
- go build -v -o ./.bin/test-wasm-runtime ./cmd/test-wasm-runtime
image: golang:1.22-alpine3.20
image: golang:1.24-alpine3.20
name: build
- commands:
- ./.bin/test-wasm-runtime -wasm ./embed/challenge/js-pow-sha256/runtime/runtime.wasm
@@ -44,7 +44,7 @@ environment:
GOARCH: arm64
GOOS: linux
kind: pipeline
name: build-1.22-alpine3.20-arm64
name: build-1.24-alpine3.20-arm64
platform:
arch: arm64
os: linux
@@ -55,7 +55,7 @@ steps:
- mkdir .bin
- go build -v -o ./.bin/go-away ./cmd/go-away
- go build -v -o ./.bin/test-wasm-runtime ./cmd/test-wasm-runtime
image: golang:1.22-alpine3.20
image: golang:1.24-alpine3.20
name: build
- commands:
- ./.bin/test-wasm-runtime -wasm ./embed/challenge/js-pow-sha256/runtime/runtime.wasm
@@ -232,7 +232,7 @@ trigger:
type: docker
---
kind: pipeline
name: publish-1.22-alpine3.20
name: publish-1.24-alpine3.20
platform:
arch: amd64
os: linux
@@ -247,7 +247,7 @@ steps:
auto_tag_suffix: alpine3.20
build_args:
from: alpine:3.20
from_builder: golang:1.22-alpine3.20
from_builder: golang:1.24-alpine3.20
builder_driver: docker-container
compress: true
password:
@@ -268,6 +268,6 @@ trigger:
type: docker
---
kind: signature
hmac: 3cbd114d368c7bd348105921d85c703db1c1bc46de79f00daabbca23ffac6050
hmac: 8583621811fa483a6594352a8f9eeca7d66f6509f8e36bd2299b9e0723ed1451
...

398
README.md
View File

@@ -7,6 +7,252 @@ Self-hosted abuse detection and rule enforcement against low-effort mass AI scra
This documentation is a work in progress. For now, see policy examples under [examples/](examples/).
This Go package can be used as a command on `git.gammaspectra.live/git/go-away/cmd/go-away` or a library under `git.gammaspectra.live/git/go-away/lib`
## Support
If you have some suggestion or issue, feel free to open a [New Issue](https://git.gammaspectra.live/git/go-away/issues/new) on the repository.
[Pull Requests](https://git.gammaspectra.live/git/go-away/pulls) are encouraged and desired.
For real-time chat and other support join IRC on [##go-away](ircs://irc.libera.chat/##go-away) on Libera.Chat. The channel may not be monitored at all times, feel free to ping the operators there.
## Features
### Rich rule matching
[Common Expression Language (CEL)](https://cel.dev/overview/cel-overview) is used to allow arbitrary selection of client properties, not only limited to regex. Boolean operators are supported.
Templates can be defined in the Policy to allow reuse of such conditions on rule matching. Challenges can also be gated behind conditions.
See the [CEL Language Definition](https://github.com/google/cel-spec/blob/master/doc/langdef.md) for the syntax.
Rules and conditions are served with this environment:
```
remoteAddress (net.IP) - Connecting client remote address from headers or properties
host (string) - HTTP Host
method (string) - HTTP Method/Verb
userAgent (string) - HTTP User-Agent header
path (string) - HTTP request Path
query (map[string]string) - HTTP request Query arguments
headers (map[string]string) - HTTP request headers
Only available when TLS is enabled
fpJA3N (string) JA3N TLS Fingerprint
fpJA4 (string) JA4 TLS Fingerprint
```
Additionally, these functions are available:
```
Check whether a given IP is listed on the underlying defined network or CIDR
inNetwork(networkName string, address net.IP) bool
inNetwork(networkCIDR string, address net.IP) bool
Check whether a given IP is listed on the provided DNSBL
inDNSBL(address net.IP) bool
```
### Template support
Internal or external templates can be loaded to customize the look of the challenge or error page. Additionally, themes can be configured to change the look of these quickly.
These templates are included by default:
* `anubis`: An anubis-like themed challenge.
* `forgejo`: Uses the Forgejo template and assets from your own instance. Supports specifying themes like `forgejo-light` and `forgejo-dark`.
External templates for your site can be loaded specifying a full path to the `.gohtml` file. See [embed/templates/](embed/templates/) for examples to follow.
### Extended rule actions
In addition to the common PASS / CHALLENGE / DENY rules, we offer CHECK and POISON.
CHECK allows the client to be challenged but continue matching rules after these.
POISON sends defined responses to bad clients that will annoy them.
### Multiple challenge matching
Several challenges can be offered as options for rules. This allows users that have passed other challenges before to not be affected.
For example:
```yaml
- name: standard-browser
action: challenge
challenges: [http-cookie-check, self-preload-link, self-meta-refresh, self-resource-load, js-pow-sha256]
conditions:
- '($is-generic-browser)'
```
This rule has the user be checked against a backend, then attempts pass a few browser challenges.
In this case the processing would stop at `self-meta-refresh` due to the behavior of earlier challenges.
Any of these listed challenges being passed in the past will allow the client through, including non-offered `self-resource-load` and `js-pow-sha256`.
### Non-Javascript challenges
Several challenges that do not require JavaScript are offered, some targeting the HTTP stack and others a general browser behavior, or consulting with a backend service.
These can be used for light checking of requests that eliminate most of the low effort scraping.
See [Challenges](#challenges) below for a list of them.
### Custom proof-of-work JS / WASM challenges
A WASM interface for server-side proof generation and checking is offered. We provide `js-pow-sha256` as an example of one.
An internal test has shown you can implement Captchas or other browser fingerprinting tests within this interface.
If you are interested in creating your own, see the [Development](#development) section below.
### Upstream PROXY support
Support for [HAProxy PROXY protocol](https://github.com/haproxy/haproxy/blob/master/doc/proxy-protocol.txt) can be enabled.
This allows sending the client IP without altering the connection or HTTP headers.
Supported by HAProxy, [Caddy](https://caddyserver.com/docs/caddyfile/directives/reverse_proxy#proxy_protocol), [nginx](https://nginx.org/en/docs/stream/ngx_stream_proxy_module.html#proxy_protocol) and others.
### Automatic TLS support and HTTP/2 support
You can enable automatic certificate generation and TLS for the site via any ACME directory, which enables HTTP/2.
Without TLS, HTTP/2 cleartext is supported, but you will need to configure the upstream proxy to send this protocol (`h2c://` on Caddy for example).
### TLS Fingerprinting
When running with TLS via autocert, TLS Fingerprinting of the incoming client is done.
This can be targeted on conditions or other application logic.
Read more about [JA3](https://medium.com/salesforce-engineering/tls-fingerprinting-with-ja3-and-ja3s-247362855967) and [JA4](https://github.com/FoxIO-LLC/ja4/blob/main/technical_details/README.md).
### DNSBL
You can configure a [DNSBL (Domain Name System blocklist)](https://en.wikipedia.org/wiki/Domain_Name_System_blocklist) to be queried on rules and conditions.
This allows you to serve harder or different challenges to higher risk clients, or block them from specific sections.
Only rules that match DNSBL will cause a query to be sent, meaning the bulk of requests will not be sent to this service upstream.
Results will be temporarily cached
By default, [DroneBL](https://dronebl.org/) is used.
### Network range loading
Network ranges can be loaded via fetched JSON / TXT / HTML pages, or via lists. You can filter these using _jq_ or a regex.
Example for _jq_:
```yaml
aws-cloud:
- url: https://ip-ranges.amazonaws.com/ip-ranges.json
jq-path: '(.prefixes[] | select(has("ip_prefix")) | .ip_prefix), (.prefixes[] | select(has("ipv6_prefix")) | .ipv6_prefix)'
```
Example for _regex_:
```yaml
cloudflare:
- url: https://www.cloudflare.com/ips-v4
regex: "(?P<prefix>[0-9]+\\.[0-9]+\\.[0-9]+\\.[0-9]+/[0-9]+)"
- url: https://www.cloudflare.com/ips-v6
regex: "(?P<prefix>[0-9a-f:]+::/[0-9]+)"
```
### Sharing of signing seed across instances
You can share the signing secret across multiple of your instances if you'd like to deploy multiple across the world.
That way signed secrets will be verifiable across all the instances.
By default, a random temporary key is generated every run.
### Multiple backend support
Multiple backends are supported, and rules specific on backend can be defined, and conditions and rules can match this as well.
This allows one instance to run multiple domains or subdomains.
### Package path
You can modify the path where challenges are served and package name, if you don't want its presence to be easily discoverable.
No source code editing or forking necessary!
## Why?
In the past few years this small git instance has been hit by waves and waves of scraping.
This was usually fought back by random useragent blocks for bots that did not follow [robots.txt](/robots.txt), until the past half year, where low-effort mass scraping was used more prominently.
Recently these networks go from using residential IP blocks to sending requests at several hundred rps.
If the server gets sluggish, more requests pile up. Even when denied they scrape for weeks later. Effectively spray and pray scraping, process later.
At some point about 300Mbit/s of incoming requests (not including the responses) was hitting the server. And all at nonsense URLs
If AI is so smart, why not just git clone the repositories?
Xe (anubis creator) has written about similar frustrations in several blogposts:
* [Amazon's AI crawler is making my git server unstable](https://xeiaso.net/notes/2025/amazon-crawler/) [01/17/2025]
* [Anubis works](https://xeiaso.net/notes/2025/anubis-works/) [04/12/2025]
Drew DeVault (sourcehut) has posted several articles regarding the same issues:
* [Please stop externalizing your costs directly into my face](https://drewdevault.com/2025/03/17/2025-03-17-Stop-externalizing-your-costs-on-me.html) [17/03/2025]
* (fun tidbit: I'm the one quoted as having the feedback discussion interrupted to deal with bots!)
Others were also suffering at the same time [[1]](https://donotsta.re/notice/AreSNZlRlJv73AW7tI) [[2]](https://community.ipfire.org/t/suricata-ruleset-to-prevent-ai-scraping/11974) [[3]](https://gabrielsimmer.com/blog/stop-scraping-git-forge) [[4]](https://gabrielsimmer.com/blog/stop-scraping-git-forge) [[5]](https://blog.nytsoi.net/2025/03/01/obliterated-by-ai).
---
Initially I deployed Anubis, and yeah, it does work!
This tool started as a way to replace [Anubis](https://anubis.techaro.lol/) as it was not found as featureful as desired.
go-away may not be as straight to configure as Anubis but this was chosen to reduce impact on legitimate users, and offers many more options to dynamically target new waves.
### Can't scrapers adapt?
Yes, they can. At the moment their spray-and-pray approach is cheap for them.
If they have to start adding an active browser in their scraping, that makes their collection expensive and slow.
This would more or less eliminate the high rate low effort passive scraping and replace it with an active model.
go-anubis offers a highly configurable set of challenges and rules that you can adapt to new ways.
## Example policies
### Forgejo
The policy file at [examples/forgejo.yml](examples/forgejo.yml) provides a ready template to be used on your own Forgejo instance.
Important notes:
* Edit the `homesite` rule, as it's targeted to common users or orgs on the instance. A better regex might be possible in the future.
* Edit the `http-cookie-check` challenge, as this will fetch the listed backend with the given session cookie to check for user login.
* Adjust the desired blocked networks or others. A template list of network ranges is provided, feel free to remove these if not needed.
* Check the conditions and base rules to change your challenges offered and other ordering.
* By default Googlebot / Bingbot / DuckDuckBot / Kagibot / Qwantbot / Yandexbot are allowed by useragent and network ranges.
### Generic
The policy file at [examples/generic.yml](examples/generic.yml) provides a baseline to place on any site, that can be modified to fit your needs.
Important notes:
* Edit the `homesite` rule, as it's targeted to pages you always want to have available, like landing pages.
* Edit the `is-static-asset` condition or the `allow-static-resources` rule to allow static file access as necessary.
* If you have an API, add a PASS rule targeting it.
* Check the conditions and base rules to change your challenges offered and other ordering.
* Add or modify rules to target specific pages on your site as desired.
* By default Googlebot / Bingbot / DuckDuckBot / Kagibot / Qwantbot / Yandexbot are allowed by useragent and network ranges.
## Setup
It is recommended to have another reverse proxy above (for example [Caddy](https://caddyserver.com/), nginx, HAProxy) to handle HTTPs or similar.
@@ -15,7 +261,7 @@ go-away for now only accepts plaintext connections, although it can take _HTTP/2
### Binary / Go
Requires Go 1.22+. Builds statically without CGo
Requires Go 1.24+. Builds statically without CGo usage.
```shell
git clone https://git.gammaspectra.live/git/go-away.git && cd go-away
@@ -36,7 +282,16 @@ Available under [Dockerfile](Dockerfile). See the _docker compose_ below for the
### docker compose
Example follows a hypothetical Forgejo server running on `http://forgejo:3000` serving `git.example.com`
```yaml
networks:
forgejo:
external: false
volumes:
goaway_cache:
services:
go-away:
image: git.gammaspectra.live/git/go-away:latest
@@ -48,12 +303,24 @@ services:
depends_on:
- forgejo
volumes:
- "goaway_cache:/cache"
- "./examples/forgejo.yml:/policy.yml:ro"
environment:
#GOAWAY_BIND: ":8080"
# Supported tcp, unix, and proxy (for enabling PROXY module for request unwrapping)
#GOAWAY_BIND_NETWORK: "tcp"
#GOAWAY_SOCKET_MODE: "0770"
# set to letsencrypt or other directory URL to enable HTTPS. Above ports will be TLS only.
# enables request JA3N / JA4 client TLS fingerprinting
# TLS fingerprints are served on X-TLS-Fingerprint-JA3N and X-TLS-Fingerprint-JA4 headers
# TLS fingerprints can be matched against on CEL conditions
#GOAWAY_ACME_AUTOCERT: ""
# Cache path for several services like certificates and caching network ranges
# Can be semi-ephemeral, recommended to be mapped to a permanent volume
#GOAWAY_CACHE="/cache"
# default is WARN, set to INFO to also see challenge successes and others
#GOAWAY_SLOG_LEVEL: "INFO"
@@ -64,8 +331,13 @@ services:
# HTTP header that the client ip will be fetched from
# Defaults to the connection ip itself, if set here make sure your upstream proxy sets this properly
# Usually X-Forwarded-For is a good pick
# Not necessary with GOAWAY_BIND_NETWORK: proxy
GOAWAY_CLIENT_IP_HEADER: "X-Real-Ip"
# HTTP header that go-away will set the obtained ip will be set to
# If left empty, the header on GOAWAY_CLIENT_IP_HEADER will be left as-is
#GOAWAY_BACKEND_IP_HEADER: ""
GOAWAY_POLICY: "/policy.yml"
# Template, and theme for the template to pick. defaults to an anubis-like one
@@ -81,20 +353,122 @@ services:
# additional backends can be specified via more command arguments
# command: ["--backend", "ci.example.com=http://ci:3000"]
forgejo:
# etc.
```
## Challenges
#### http
Verify incoming requests against a specified backend to allow the user through. Cookies and some other headers are passed.
For example, this allows verifying the user cookies against the backend to have the user skip all other challenges.
Example on Forgejo, checks that current user is authenticated:
```yaml
http-cookie-check:
mode: http
url: http://forgejo:3000/user/stopwatches
# url: http://forgejo:3000/repo/search
# url: http://forgejo:3000/notifications/new
parameters:
http-method: GET
http-cookie: i_like_gitea
http-code: 200
```
#### preload-link
Requires HTTP/2+ response parsing and logic, silent challenge (does not display a challenge page).
Browsers that support [103 Early Hints](https://developer.mozilla.org/en-US/docs/Web/HTTP/Reference/Status/103) are indicated to fetch a CSS resource via [Link](https://developer.mozilla.org/en-US/docs/Web/HTTP/Reference/Headers/Link) preload that solves the challenge.
The server waits until solved or defined timeout, then continues on other challenges if failed.
Example:
```yaml
self-preload-link:
condition: '"Sec-Fetch-Mode" in headers && headers["Sec-Fetch-Mode"] == "navigate"'
mode: "preload-link"
runtime:
# verifies that result = key
mode: "key"
probability: 0.1
parameters:
preload-early-hint-deadline: 3s
key-code: 200
key-mime: text/css
key-content: ""
```
#### header-refresh
Requires HTTP response parsing and logic, displays challenge site instantly.
Have the browser solve the challenge by following the URL listed on HTTP [Refresh](https://developer.mozilla.org/en-US/docs/Web/HTTP/Reference/Headers/Refresh) instantly.
#### meta-refresh
Requires HTTP and HTML response parsing and logic, displays challenge site instantly.
Have the browser solve the challenge by following the URL listed on HTML `<meta http-equiv=refresh>` tag instantly. Equivalent to above.
#### resource-load
Requires HTTP and HTML response parsing and logic, displays challenge site.
Servers a challenge page with a linked resource that is loaded by the browser, which solves the challenge. Page refreshes a few seconds later via [Refresh](https://developer.mozilla.org/en-US/docs/Web/HTTP/Reference/Headers/Refresh).
Example:
```yaml
self-resource-load:
mode: "resource-load"
runtime:
# verifies that result = key
mode: "key"
probability: 0.1
parameters:
key-code: 200
key-mime: text/css
key-content: ""
```
#### cookie
Requires HTTP parsing and a Cookie Jar, silent challenge (does not display a challenge page unless failed).
Serves the client with a Set-Cookie that solves the challenge, and redirects it back to the same page. Browser must present the cookie to load.
Several tools implement this, but usually not mass scrapers.
#### js-pow-sha256
Requires JavaScript and workers, displays challenge site.
Has the user solve a Proof of Work using SHA256 hashes, with configurable difficulty.
Example:
```yaml
js-pow-sha256:
# Asset must be under challenges/{name}/static/{asset}
# Other files here will be available under that path
mode: js
asset: load.mjs
parameters:
# difficulty is number of bits that must be set to 0 from start
# Anubis challenge difficulty 5 becomes 5 * 8 = 20
difficulty: 20
runtime:
mode: wasm
# Verify must be under challenges/{name}/runtime/{asset}
asset: runtime.wasm
probability: 0.02
```
## Example policies
### Forgejo
The policy file at [examples/forgejo.yml](examples/forgejo.yml) provides a ready template to be used on your own Forgejo instance.
Important notes:
* Edit the `homesite` rule, as it's targeted to common users or orgs on the instance. A better regex might be possible in the future.
* Edit the `http-cookie-check` challenge, as this will fetch the listed backend with the given session cookie to check for user login.
* Adjust the desired blocked networks or others. A template list of network ranges is provided, feel free to remove these if not needed.
* Check the conditions and base rules to change your challenges offered and other ordering.
## Development

View File

@@ -4,6 +4,7 @@ import (
"context"
"crypto/ed25519"
"crypto/rand"
"crypto/tls"
"encoding/hex"
"errors"
"flag"
@@ -14,8 +15,6 @@ import (
"github.com/pires/go-proxyproto"
"golang.org/x/crypto/acme"
"golang.org/x/crypto/acme/autocert"
"golang.org/x/net/http2"
"golang.org/x/net/http2/h2c"
"gopkg.in/yaml.v3"
"log"
"log/slog"
@@ -98,26 +97,6 @@ func (v *MultiVar) Set(value string) error {
return nil
}
func newServer(handler http.Handler, manager *autocert.Manager) *http.Server {
if manager == nil {
h2s := &http2.Server{}
// TODO: use Go 1.24 Server.Protocols to add H2C
// https://pkg.go.dev/net/http#Server.Protocols
h1s := &http.Server{
Handler: h2c.NewHandler(handler, h2s),
}
return h1s
} else {
return &http.Server{
TLSConfig: manager.TLSConfig(),
Handler: handler,
}
}
}
func newACMEManager(clientDirectory string, backends map[string]http.Handler) *autocert.Manager {
var domains []string
@@ -262,15 +241,15 @@ func main() {
}
}
var acmeManager *autocert.Manager
var tlsConfig *tls.Config
if *acmeAutocert != "" {
switch *acmeAutocert {
case "letsencrypt":
*acmeAutocert = "https://acme-v02.api.letsencrypt.org/directory"
*acmeAutocert = acme.LetsEncryptURL
}
acmeManager = newACMEManager(*acmeAutocert, createdBackends)
acmeManager := newACMEManager(*acmeAutocert, createdBackends)
if *cachePath != "" {
err = os.MkdirAll(path.Join(*cachePath, "acme"), 0755)
if err != nil {
@@ -282,6 +261,7 @@ func main() {
"acme-autocert enabled",
"directory", *acmeAutocert,
)
tlsConfig = acmeManager.TLSConfig()
}
var wg sync.WaitGroup
@@ -294,7 +274,7 @@ func main() {
go func() {
defer wg.Done()
server := newServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
server := utils.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
backend, ok := createdBackends[r.Host]
if !ok {
http.Error(w, http.StatusText(http.StatusServiceUnavailable), http.StatusServiceUnavailable)
@@ -302,7 +282,7 @@ func main() {
}
backend.ServeHTTP(w, r)
}), acmeManager)
}), tlsConfig)
listener, listenUrl := setupListener(*bindNetwork, *bind, *socketMode, *bindProxy)
slog.Warn(
@@ -315,7 +295,7 @@ func main() {
go func() {
defer wg.Done()
if acmeManager != nil {
if tlsConfig != nil {
if err := server.ServeTLS(listener, "", ""); !errors.Is(err, http.ErrServerClosed) {
log.Fatal(err)
}
@@ -369,9 +349,9 @@ func main() {
"url", listenUrl,
)
server := newServer(state, acmeManager)
server := utils.NewServer(state, tlsConfig)
if acmeManager != nil {
if tlsConfig != nil {
if err := server.ServeTLS(listener, "", ""); !errors.Is(err, http.ErrServerClosed) {
log.Fatal(err)
}

View File

@@ -93,7 +93,6 @@ networks:
regex: "\\n(?P<prefix>[0-9]+\\.[0-9]+\\.[0-9]+\\.[0-9]+) "
# todo: define interface
challenges:
js-pow-sha256:
# Asset must be under challenges/{name}/static/{asset}
@@ -167,7 +166,6 @@ challenges:
http-method: GET
http-cookie: i_like_gitea
http-code: 200
# todo: archive value of session within token to bind it
conditions:
# Conditions will get replaced on rules AST when found as ($condition-name)
@@ -209,6 +207,7 @@ conditions:
is-generic-robot-ua:
- 'userAgent.contains("compatible;") && !userAgent.contains("Trident/")'
- 'userAgent.matches("\\+https?://")'
- 'userAgent.contains("@")'
- 'userAgent.matches("[bB]ot/[0-9]")'
is-tool-ua:
@@ -255,24 +254,18 @@ conditions:
# user activity tab
- 'path.matches("^/[^/]+$") && "tab" in query && query.tab == "activity"'
# Rules and conditions are served this environment
# remoteAddress (net.IP) - Connecting client remote address from headers or properties
# host (string) - HTTP Host
# method (string) - HTTP Method/Verb
# userAgent (string) - HTTP User-Agent header
# path (string) - HTTP request Path
# query (map[string]string) - HTTP request Query arguments
# headers (map[string]string) - HTTP request headers
#
# Additionally these functions are available
# inNetwork(networkName string, address net.IP) bool
# inNetwork(networkCIDR string, address net.IP) bool
rules:
- name: allow-well-known-resources
conditions:
- '($is-well-known-asset)'
action: pass
- name: allow-static-resources
conditions:
- '($is-static-asset)'
action: pass
- name: undesired-networks
conditions:
- 'inNetwork("huawei-cloud", remoteAddress) || inNetwork("alibaba-cloud", remoteAddress) || inNetwork("zenlayer-inc", remoteAddress)'
@@ -326,11 +319,6 @@ rules:
action: check
challenges: [self-resource-load]
- name: allow-static-resources
conditions:
- '($is-static-asset)'
action: pass
- name: always-pow-challenge
conditions:
# login and sign up paths

274
examples/generic.yml Normal file
View File

@@ -0,0 +1,274 @@
# Example cmdline (forward requests from upstream to port :8080)
# $ go-away --bind :8080 --backend site.example.com=http://site:3000 --policy examples/generic.yml --challenge-template anubis
# Define networks to be used later below
networks:
googlebot:
- url: https://developers.google.com/static/search/apis/ipranges/googlebot.json
jq-path: '(.prefixes[] | select(has("ipv4Prefix")) | .ipv4Prefix), (.prefixes[] | select(has("ipv6Prefix")) | .ipv6Prefix)'
bingbot:
- url: https://www.bing.com/toolbox/bingbot.json
jq-path: '(.prefixes[] | select(has("ipv4Prefix")) | .ipv4Prefix), (.prefixes[] | select(has("ipv6Prefix")) | .ipv6Prefix)'
qwantbot:
- url: https://help.qwant.com/wp-content/uploads/sites/2/2025/01/qwantbot.json
jq-path: '(.prefixes[] | select(has("ipv4Prefix")) | .ipv4Prefix), (.prefixes[] | select(has("ipv6Prefix")) | .ipv6Prefix)'
duckduckbot:
- url: https://duckduckgo.com/duckduckgo-help-pages/results/duckduckbot/
regex: "<li>(?P<prefix>[0-9]+\\.[0-9]+\\.[0-9]+\\.[0-9]+)</li>"
yandexbot:
# todo: detected as bot
# - url: https://yandex.com/ips
# regex: "<span>(?P<prefix>(([0-9]+\\.[0-9]+\\.[0-9]+\\.[0-9]+)|([0-9a-f:]+::))/[0-9]+)[ \\\\t]*</span><br/>"
- prefixes:
- "5.45.192.0/18"
- "5.255.192.0/18"
- "37.9.64.0/18"
- "37.140.128.0/18"
- "77.88.0.0/18"
- "84.252.160.0/19"
- "87.250.224.0/19"
- "90.156.176.0/22"
- "93.158.128.0/18"
- "95.108.128.0/17"
- "141.8.128.0/18"
- "178.154.128.0/18"
- "185.32.187.0/24"
- "2a02:6b8::/29"
kagibot:
- url: https://kagi.com/bot
regex: "\\n(?P<prefix>[0-9]+\\.[0-9]+\\.[0-9]+\\.[0-9]+) "
challenges:
js-pow-sha256:
# Asset must be under challenges/{name}/static/{asset}
# Other files here will be available under that path
mode: js
asset: load.mjs
parameters:
difficulty: 15
runtime:
mode: wasm
# Verify must be under challenges/{name}/runtime/{asset}
asset: runtime.wasm
probability: 0.02
# Challenges with a cookie, self redirect (non-JS, requires HTTP parsing)
self-cookie:
mode: "cookie"
# Challenges with a redirect via Link header with rel=preload and early hints (non-JS, requires HTTP parsing, fetching and logic)
# Works on HTTP/2 and above!
self-preload-link:
condition: '"Sec-Fetch-Mode" in headers && headers["Sec-Fetch-Mode"] == "navigate"'
mode: "preload-link"
runtime:
# verifies that result = key
mode: "key"
probability: 0.1
parameters:
preload-early-hint-deadline: 3s
key-code: 200
key-mime: text/css
key-content: ""
# Challenges with a redirect via Refresh header (non-JS, requires HTTP parsing and logic)
self-header-refresh:
mode: "header-refresh"
runtime:
# verifies that result = key
mode: "key"
probability: 0.1
# Challenges with a redirect via Refresh meta (non-JS, requires HTML parsing and logic)
self-meta-refresh:
mode: "meta-refresh"
runtime:
# verifies that result = key
mode: "key"
probability: 0.1
# Challenges with loading a random CSS or image document (non-JS, requires HTML parsing and logic)
self-resource-load:
mode: "resource-load"
runtime:
# verifies that result = key
mode: "key"
probability: 0.1
parameters:
key-code: 200
key-mime: text/css
key-content: ""
conditions:
# Conditions will get replaced on rules AST when found as ($condition-name)
# Checks to detect a headless chromium via headers only
is-headless-chromium:
- 'userAgent.contains("HeadlessChrome") || userAgent.contains("HeadlessChromium")'
- '"Sec-Ch-Ua" in headers && (headers["Sec-Ch-Ua"].contains("HeadlessChrome") || headers["Sec-Ch-Ua"].contains("HeadlessChromium"))'
#- '(userAgent.contains("Chrome/") || userAgent.contains("Chromium/")) && (!("Accept-Language" in headers) || !("Accept-Encoding" in headers))'
is-generic-browser:
- 'userAgent.startsWith("Mozilla/") || userAgent.startsWith("Opera/")'
is-well-known-asset:
- 'path == "/robots.txt"'
- 'path.startsWith("/.well-known")'
is-static-asset:
- 'path == "/favicon.ico"'
- 'path == "/apple-touch-icon.png"'
- 'path == "/apple-touch-icon-precomposed.png"'
- 'path.matches("\\.(manifest|ttf|woff|woff2|jpg|jpeg|gif|png|webp|avif|svg|mp4|webm|css|js|mjs|wasm)$")'
is-generic-robot-ua:
- 'userAgent.contains("compatible;") && !userAgent.contains("Trident/")'
- 'userAgent.matches("\\+https?://")'
- 'userAgent.contains("@")'
- 'userAgent.matches("[bB]ot/[0-9]")'
is-tool-ua:
- 'userAgent.startsWith("python-requests/")'
- 'userAgent.startsWith("Python-urllib/")'
- 'userAgent.startsWith("python-httpx/")'
- 'userAgent.contains("aoihttp/")'
- 'userAgent.startsWith("http.rb/")'
- 'userAgent.startsWith("curl/")'
- 'userAgent.startsWith("Wget/")'
- 'userAgent.startsWith("libcurl/")'
- 'userAgent.startsWith("okhttp/")'
- 'userAgent.startsWith("Java/")'
- 'userAgent.startsWith("Apache-HttpClient//")'
- 'userAgent.startsWith("Go-http-client/")'
- 'userAgent.startsWith("node-fetch/")'
- 'userAgent.startsWith("reqwest/")'
is-suspicious-crawler:
- 'userAgent.contains("Presto/") || userAgent.contains("Trident/")'
# Old IE browsers
- 'userAgent.matches("MSIE ([2-9]|10|11)\\.")'
# Old Linux browsers
- 'userAgent.contains("Linux i[63]86") || userAgent.contains("FreeBSD i[63]86")'
# Old Windows browsers
- 'userAgent.matches("Windows (3|95|98|CE)") || userAgent.matches("Windows NT [1-5]\\.")'
# Old mobile browsers
- 'userAgent.matches("Android [1-5]\\.") || userAgent.matches("(iPad|iPhone) OS [1-9]_")'
# Old generic browsers
- 'userAgent.startsWith("Opera/")'
#- 'userAgent.matches("Gecko/(201[0-9]|200[0-9])")'
- 'userAgent.matches("^Mozilla/[1-4]")'
rules:
- name: allow-well-known-resources
conditions:
- '($is-well-known-asset)'
action: pass
- name: allow-static-resources
conditions:
- '($is-static-asset)'
action: pass
- name: undesired-crawlers
conditions:
- '($is-headless-chromium)'
- 'userAgent.startsWith("Lightpanda/")'
- 'userAgent.startsWith("masscan/")'
# Typo'd opera botnet
- 'userAgent.matches("^Opera/[0-9.]+\\.\\(")'
# AI bullshit stuff, they do not respect robots.txt even while they read it
# TikTok Bytedance AI training
- 'userAgent.contains("Bytedance") || userAgent.contains("Bytespider")'
# Meta AI training; The Meta-ExternalAgent crawler crawls the web for use cases such as training AI models or improving products by indexing content directly.
- 'userAgent.contains("meta-externalagent/") || userAgent.contains("meta-externalfetcher/") || userAgent.contains("FacebookBot")'
# Anthropic AI training and usage
- 'userAgent.contains("ClaudeBot") || userAgent.contains("Claude-User")|| userAgent.contains("Claude-SearchBot")'
# Common Crawl AI crawlers
- 'userAgent.contains("CCBot")'
# ChatGPT AI crawlers https://platform.openai.com/docs/bots
- 'userAgent.contains("GPTBot") || userAgent.contains("OAI-SearchBot") || userAgent.contains("ChatGPT-User")'
# Other AI crawlers
- 'userAgent.contains("Amazonbot") || userAgent.contains("Google-Extended") || userAgent.contains("PanguBot") || userAgent.contains("AI2Bot") || userAgent.contains("Diffbot") || userAgent.contains("cohere-training-data-crawler") || userAgent.contains("Applebot-Extended")'
# SEO / Ads and marketing
- 'userAgent.contains("BLEXBot")'
action: deny
- name: unknown-crawlers
conditions:
# No user agent set
- 'userAgent == ""'
action: deny
# check a sequence of challenges
- name: suspicious-crawlers/0
conditions: ['($is-suspicious-crawler)']
action: check
challenges: [js-pow-sha256]
- name: suspicious-crawlers/1
conditions: ['($is-suspicious-crawler)']
action: check
challenges: [self-preload-link]
- name: suspicious-crawlers/2
conditions: ['($is-suspicious-crawler)']
action: check
challenges: [self-header-refresh]
- name: suspicious-crawlers/3
conditions: ['($is-suspicious-crawler)']
action: check
challenges: [self-resource-load]
- name: desired-crawlers
conditions:
- 'userAgent.contains("+https://kagi.com/bot") && inNetwork("kagibot", remoteAddress)'
- '(userAgent.contains("+http://www.google.com/bot.html") || userAgent.contains("Google-InspectionTool") || userAgent.contains("Googlebot")) && inNetwork("googlebot", remoteAddress)'
- 'userAgent.contains("+http://www.bing.com/bingbot.htm") && inNetwork("bingbot", remoteAddress)'
- 'userAgent.contains("+http://duckduckgo.com/duckduckbot.html") && inNetwork("duckduckbot", remoteAddress)'
- 'userAgent.contains("+https://help.qwant.com/bot/") && inNetwork("qwantbot", remoteAddress)'
- 'userAgent.contains("+http://yandex.com/bots") && inNetwork("yandexbot", remoteAddress)'
action: pass
- name: homesite
conditions:
- 'path == "/"'
action: pass
# check DNSBL and serve harder challenges
- name: undesired-dnsbl
conditions:
- 'inDNSBL(remoteAddress)'
action: check
challenges: [js-pow-sha256]
- name: suspicious-fetchers
action: check
challenges: [js-pow-sha256]
conditions:
- 'userAgent.contains("facebookexternalhit/") || userAgent.contains("facebookcatalog/")'
# Allow PUT/DELETE/PATCH/POST requests in general
- name: non-get-request
action: pass
conditions:
- '!(method == "HEAD" || method == "GET")'
- name: standard-tools
action: challenge
challenges: [self-cookie]
conditions:
- '($is-generic-robot-ua)'
- '($is-tool-ua)'
- '!($is-generic-browser)'
- name: standard-browser
action: challenge
challenges: [self-preload-link, self-meta-refresh, self-resource-load, js-pow-sha256]
conditions:
- '($is-generic-browser)'

30
go.mod
View File

@@ -1,21 +1,20 @@
module git.gammaspectra.live/git/go-away
go 1.22.0
go 1.24.0
toolchain go1.22.12
toolchain go1.24.2
require (
codeberg.org/meta/gzipped/v2 v2.0.0-20231111234332-aa70c3194756
github.com/andybalholm/brotli v1.1.1
github.com/go-jose/go-jose/v4 v4.0.5
github.com/go-jose/go-jose/v4 v4.1.0
github.com/google/cel-go v0.24.1
github.com/itchyny/gojq v0.12.17
github.com/klauspost/compress v1.18.0
github.com/pires/go-proxyproto v0.8.0
github.com/tetratelabs/wazero v1.9.0
github.com/yl2chen/cidranger v1.0.2
golang.org/x/crypto v0.33.0
golang.org/x/net v0.35.0
golang.org/x/crypto v0.37.0
gopkg.in/yaml.v3 v3.0.1
)
@@ -25,25 +24,14 @@ require (
github.com/itchyny/timefmt-go v0.1.6 // indirect
github.com/kevinpollet/nego v0.0.0-20211010160919-a65cd48cee43 // indirect
github.com/stoewer/go-strcase v1.3.0 // indirect
golang.org/x/exp v0.0.0-20250210185358-939b2ce775ac // indirect
golang.org/x/text v0.22.0 // indirect
google.golang.org/genproto/googleapis/api v0.0.0-20240826202546-f6391c0de4c7 // indirect
google.golang.org/genproto/googleapis/rpc v0.0.0-20240826202546-f6391c0de4c7 // indirect
golang.org/x/exp v0.0.0 // indirect
golang.org/x/net v0.39.0 // indirect
golang.org/x/text v0.24.0 // indirect
google.golang.org/genproto/googleapis/api v0.0.0-20250409194420-de1ac958c67a // indirect
google.golang.org/genproto/googleapis/rpc v0.0.0-20250409194420-de1ac958c67a // indirect
google.golang.org/protobuf v1.36.6 // indirect
)
// Used by github.com/antlr4-go/antlr v4.13.0 via github.com/google/cel-go
// Ensure we have no other exp package usages by only proxying the slices functions in that package
// Newer versions than v0.0.0-20250210185358-939b2ce775ac are not supported by Go 1.22
replace golang.org/x/exp v0.0.0 => ./utils/exp
// Pin latest versions to support Go 1.22 to prevent a package update from changing them
// TODO: remove this when Go 1.22+ is supported by other higher users
replace (
github.com/go-jose/go-jose/v4 => github.com/go-jose/go-jose/v4 v4.0.5
golang.org/x/crypto => golang.org/x/crypto v0.33.0
golang.org/x/net => golang.org/x/net v0.35.0
golang.org/x/text => golang.org/x/text v0.22.0
google.golang.org/genproto/googleapis/api => google.golang.org/genproto/googleapis/api v0.0.0-20240826202546-f6391c0de4c7
google.golang.org/genproto/googleapis/rpc => google.golang.org/genproto/googleapis/rpc v0.0.0-20240826202546-f6391c0de4c7
)

30
go.sum
View File

@@ -9,12 +9,12 @@ github.com/antlr4-go/antlr/v4 v4.13.1/go.mod h1:GKmUxMtwp6ZgGwZSva4eWPC5mS6vUAmO
github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
github.com/go-jose/go-jose/v4 v4.0.5 h1:M6T8+mKZl/+fNNuFHvGIzDz7BTLQPIounk/b9dw3AaE=
github.com/go-jose/go-jose/v4 v4.0.5/go.mod h1:s3P1lRrkT8igV8D9OjyL4WRyHvjB6a4JSllnOrmmBOA=
github.com/go-jose/go-jose/v4 v4.1.0 h1:cYSYxd3pw5zd2FSXk2vGdn9igQU2PS8MuxrCOCl0FdY=
github.com/go-jose/go-jose/v4 v4.1.0/go.mod h1:GG/vqmYm3Von2nYiB2vGTXzdoNKE5tix5tuc6iAd+sw=
github.com/google/cel-go v0.24.1 h1:jsBCtxG8mM5wiUJDSGUqU0K7Mtr3w7Eyv00rw4DiZxI=
github.com/google/cel-go v0.24.1/go.mod h1:Hdf9TqOaTNSFQA1ybQaRqATVoK7m/zcf7IMhGXP5zI8=
github.com/google/go-cmp v0.6.0 h1:ofyhxvXcZhMsU5ulbFiLKl/XBFqE1GSq7atu8tAmTRI=
github.com/google/go-cmp v0.6.0/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY=
github.com/google/go-cmp v0.7.0 h1:wk8382ETsv4JYUZwIsn6YpYiWiBsYLSJiTsyBybVuN8=
github.com/google/go-cmp v0.7.0/go.mod h1:pXiqmnSA92OHEEa9HXL2W4E7lf9JzCmGVUdgjX3N/iU=
github.com/itchyny/gojq v0.12.17 h1:8av8eGduDb5+rvEdaOO+zQUjA04MS0m3Ps8HiD+fceg=
github.com/itchyny/gojq v0.12.17/go.mod h1:WBrEMkgAfAGO1LUcGOckBl5O726KPp+OlkKug0I/FEY=
github.com/itchyny/timefmt-go v0.1.6 h1:ia3s54iciXDdzWzwaVKXZPbiXzxxnv1SPGFfM/myJ5Q=
@@ -44,18 +44,16 @@ github.com/xyproto/randomstring v1.0.5 h1:YtlWPoRdgMu3NZtP45drfy1GKoojuR7hmRcnhZ
github.com/xyproto/randomstring v1.0.5/go.mod h1:rgmS5DeNXLivK7YprL0pY+lTuhNQW3iGxZ18UQApw/E=
github.com/yl2chen/cidranger v1.0.2 h1:lbOWZVCG1tCRX4u24kuM1Tb4nHqWkDxwLdoS+SevawU=
github.com/yl2chen/cidranger v1.0.2/go.mod h1:9U1yz7WPYDwf0vpNWFaeRh0bjwz5RVgRy/9UEQfHl0g=
golang.org/x/crypto v0.33.0 h1:IOBPskki6Lysi0lo9qQvbxiQ+FvsCC/YWOecCHAixus=
golang.org/x/crypto v0.33.0/go.mod h1:bVdXmD7IV/4GdElGPozy6U7lWdRXA4qyRVGJV57uQ5M=
golang.org/x/exp v0.0.0-20250210185358-939b2ce775ac h1:l5+whBCLH3iH2ZNHYLbAe58bo7yrN4mVcnkHDYz5vvs=
golang.org/x/exp v0.0.0-20250210185358-939b2ce775ac/go.mod h1:hH+7mtFmImwwcMvScyxUhjuVHR3HGaDPMn9rMSUUbxo=
golang.org/x/net v0.35.0 h1:T5GQRQb2y08kTAByq9L4/bz8cipCdA8FbRTXewonqY8=
golang.org/x/net v0.35.0/go.mod h1:EglIi67kWsHKlRzzVMUD93VMSWGFOMSZgxFjparz1Qk=
golang.org/x/text v0.22.0 h1:bofq7m3/HAFvbF51jz3Q9wLg3jkvSPuiZu/pD1XwgtM=
golang.org/x/text v0.22.0/go.mod h1:YRoo4H8PVmsu+E3Ou7cqLVH8oXWIHVoX0jqUWALQhfY=
google.golang.org/genproto/googleapis/api v0.0.0-20240826202546-f6391c0de4c7 h1:YcyjlL1PRr2Q17/I0dPk2JmYS5CDXfcdb2Z3YRioEbw=
google.golang.org/genproto/googleapis/api v0.0.0-20240826202546-f6391c0de4c7/go.mod h1:OCdP9MfskevB/rbYvHTsXTtKC+3bHWajPdoKgjcYkfo=
google.golang.org/genproto/googleapis/rpc v0.0.0-20240826202546-f6391c0de4c7 h1:2035KHhUv+EpyB+hWgJnaWKJOdX1E95w2S8Rr4uWKTs=
google.golang.org/genproto/googleapis/rpc v0.0.0-20240826202546-f6391c0de4c7/go.mod h1:UqMtugtsSgubUsoxbuAoiCXvqvErP7Gf0so0mK9tHxU=
golang.org/x/crypto v0.37.0 h1:kJNSjF/Xp7kU0iB2Z+9viTPMW4EqqsrywMXLJOOsXSE=
golang.org/x/crypto v0.37.0/go.mod h1:vg+k43peMZ0pUMhYmVAWysMK35e6ioLh3wB8ZCAfbVc=
golang.org/x/net v0.39.0 h1:ZCu7HMWDxpXpaiKdhzIfaltL9Lp31x/3fCP11bc6/fY=
golang.org/x/net v0.39.0/go.mod h1:X7NRbYVEA+ewNkCNyJ513WmMdQ3BineSwVtN2zD/d+E=
golang.org/x/text v0.24.0 h1:dd5Bzh4yt5KYA8f9CJHCP4FB4D51c2c6JvN37xJJkJ0=
golang.org/x/text v0.24.0/go.mod h1:L8rBsPeo2pSS+xqN0d5u2ikmjtmoJbDBT1b7nHvFCdU=
google.golang.org/genproto/googleapis/api v0.0.0-20250409194420-de1ac958c67a h1:OQ7sHVzkx6L57dQpzUS4ckfWJ51KDH74XHTDe23xWAs=
google.golang.org/genproto/googleapis/api v0.0.0-20250409194420-de1ac958c67a/go.mod h1:2R6XrVC8Oc08GlNh8ujEpc7HkLiEZ16QeY7FxIs20ac=
google.golang.org/genproto/googleapis/rpc v0.0.0-20250409194420-de1ac958c67a h1:GIqLhp/cYUkuGuiT+vJk8vhOP86L4+SP5j8yXgeVpvI=
google.golang.org/genproto/googleapis/rpc v0.0.0-20250409194420-de1ac958c67a/go.mod h1:qQ0YXyHHx3XkvlzUtpXDkS29lDSafHMZBAZDc03LQ3A=
google.golang.org/protobuf v1.36.6 h1:z1NpPI8ku2WgiWnf+t9wTPsn6eP1L7ksHUlkfLvd9xY=
google.golang.org/protobuf v1.36.6/go.mod h1:jduwjTPXsFjZGTmRluh+L6NjiWu7pchiJ2/5YcXBHnY=
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405 h1:yhCVgyC4o1eVCa2tZl7eS0r+SDo693bJlVdllGtEeKM=

View File

@@ -1,6 +1,8 @@
go 1.22.0
go 1.24.0
toolchain go1.24.2
use (
.
utils/exp
)
)

View File

@@ -1,5 +1,6 @@
github.com/golang/protobuf v1.5.0 h1:LUVKkCeviFUMKqHa4tXIIij/lbhnMbP7Fn5wKdKkRh4=
github.com/golang/protobuf v1.5.0/go.mod h1:FsONVRAS9T7sI+LIUmWTfcYkHO4aIWwzhcaSAoJOfIk=
github.com/google/go-cmp v0.7.0/go.mod h1:pXiqmnSA92OHEEa9HXL2W4E7lf9JzCmGVUdgjX3N/iU=
github.com/mattn/go-isatty v0.0.20 h1:xfD0iDuEKnDkl03q4limB+vH+GxLEtL/jb4xVJSWWEY=
github.com/mattn/go-isatty v0.0.20/go.mod h1:W+V8PltTTMOvKvAeJH7IuucS94S2C6jfK/D7dTCTo3Y=
github.com/mattn/go-runewidth v0.0.15 h1:UNAjwbU9l54TA3KzvqLGxwWjHmMgBUVhBiTjelZgg3U=
@@ -7,20 +8,30 @@ github.com/mattn/go-runewidth v0.0.15/go.mod h1:Jdepj2loyihRzMpdS35Xk/zdY8IAYHsh
github.com/rivo/uniseg v0.4.7 h1:WUdvkW8uEhrYfLC4ZzdpI2ztxP1I582+49Oc5Mq64VQ=
github.com/rivo/uniseg v0.4.7/go.mod h1:FN3SvrM+Zdj16jyLfmOkMNblXMcoc8DfTHruCPUcx88=
github.com/stretchr/objx v0.5.0 h1:1zr/of2m5FGMsad5YfcqgdqdWrIhu+EBEJRhR1U7z/c=
golang.org/x/mod v0.17.0 h1:zY54UmvipHiNd+pm+m0x9KhZ9hl1/7QNMyxXbc6ICqA=
golang.org/x/mod v0.17.0/go.mod h1:hTbmBsO62+eylJbnUtE2MGJUyE7QWk4xUqPFrRgJ+7c=
golang.org/x/mod v0.23.0 h1:Zb7khfcRGKk+kqfxFaP5tZqCnDZMjC5VtUBs87Hr6QM=
golang.org/x/mod v0.23.0/go.mod h1:6SkKJ3Xj0I0BrPOZoBy3bdMptDDU9oJrpohJ3eWZ1fY=
golang.org/x/net v0.35.0 h1:T5GQRQb2y08kTAByq9L4/bz8cipCdA8FbRTXewonqY8=
golang.org/x/net v0.35.0/go.mod h1:EglIi67kWsHKlRzzVMUD93VMSWGFOMSZgxFjparz1Qk=
golang.org/x/sync v0.11.0 h1:GGz8+XQP4FvTTrjZPzNKTMFtSXH80RAzG+5ghFPgK9w=
golang.org/x/sync v0.11.0/go.mod h1:Czt+wKu1gCyEFDUtn0jG5QVvpJ6rzVqr5aXyt9drQfk=
golang.org/x/sync v0.13.0 h1:AauUjRAJ9OSnvULf/ARrrVywoJDy0YS2AwQ98I37610=
golang.org/x/sync v0.13.0/go.mod h1:1dzgHSNfp02xaA81J2MS99Qcpr2w7fw1gpm99rleRqA=
golang.org/x/sys v0.30.0 h1:QjkSwP/36a20jFYWkSue1YwXzLmsV5Gfq7Eiy72C1uc=
golang.org/x/sys v0.30.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA=
golang.org/x/sys v0.32.0 h1:s77OFDvIQeibCmezSnk/q6iAfkdiQaJi4VzroCFrN20=
golang.org/x/sys v0.32.0/go.mod h1:BJP2sWEmIv4KK5OTEluFJCKSidICx8ciO85XgH3Ak8k=
golang.org/x/term v0.29.0 h1:L6pJp37ocefwRRtYPKSWOWzOtWSxVajvz2ldH/xi3iU=
golang.org/x/term v0.29.0/go.mod h1:6bl4lRlvVuDgSf3179VpIxBF0o10JUpXWOnI7nErv7s=
golang.org/x/term v0.31.0 h1:erwDkOK1Msy6offm1mOgvspSkslFnIGsFnxOKoufg3o=
golang.org/x/term v0.31.0/go.mod h1:R4BeIy7D95HzImkxGkTW1UQTtP54tio2RyHz7PwK0aw=
golang.org/x/tools v0.21.1-0.20240508182429-e35e4ccd0d2d h1:vU5i/LfpvrRCpgM/VPfJLg5KjxD3E+hfT1SH+d9zLwg=
golang.org/x/tools v0.21.1-0.20240508182429-e35e4ccd0d2d/go.mod h1:aiJjzUbINMkxbQROHiO6hDPo2LHcIPhhQsa9DLh0yGk=
golang.org/x/tools v0.30.0 h1:BgcpHewrV5AUp2G9MebG4XPFI1E2W41zU1SaqVA9vJY=
golang.org/x/tools v0.30.0/go.mod h1:c347cR/OJfw5TI+GfX7RUPNMdDRRbjvYTS0jPyvsVtY=
golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543 h1:E7g+9GITq07hpfrRu66IVDexMakfv52eLZ2CXBWiKr4=
golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
google.golang.org/grpc v1.65.0 h1:bs/cUb4lp1G5iImFFd3u5ixQzweKizoZJAwBNLR42lc=
google.golang.org/grpc v1.65.0/go.mod h1:WgYC2ypjlB0EiQi6wdKixMqukr6lBc0Vo+oOgjrM5ZQ=
google.golang.org/grpc v1.71.0 h1:kF77BGdPTQ4/JZWMlb9VpJ5pa25aqvVqogsxNHHdeBg=
google.golang.org/grpc v1.71.0/go.mod h1:H0GRtasmQOh9LkFoCPDu3ZrwUtD1YGE+b2vYBYd/8Ec=
gopkg.in/yaml.v2 v2.2.2 h1:ZCJp+EgiOT7lHqUV2J862kp8Qj64Jo6az82+3Td9dZw=

View File

@@ -20,6 +20,8 @@ func (state *State) initConditions() (err error) {
cel.Variable("userAgent", cel.StringType),
cel.Variable("path", cel.StringType),
cel.Variable("query", cel.MapType(cel.StringType, cel.StringType)),
cel.Variable("fpJA3N", cel.StringType),
cel.Variable("fpJA4", cel.StringType),
// http.Header
cel.Variable("headers", cel.MapType(cel.StringType, cel.StringType)),
//TODO: dynamic type?

View File

@@ -133,14 +133,24 @@ func (state *State) addTiming(w http.ResponseWriter, name, desc string, duration
func GetLoggerForRequest(r *http.Request) *slog.Logger {
data := RequestDataFromContext(r.Context())
return slog.With(
args := []any{
"request_id", hex.EncodeToString(data.Id[:]),
"remote_address", data.RemoteAddress.String(),
"user_agent", r.UserAgent(),
"host", r.Host,
"path", r.URL.Path,
"query", r.URL.RawQuery,
)
}
if fp := utils.GetTLSFingerprint(r); fp != nil {
if ja3n := fp.JA3N(); ja3n != nil {
args = append(args, "ja3n", ja3n.String())
}
if ja4 := fp.JA4(); ja4 != nil {
args = append(args, "ja4", ja4.String())
}
}
return slog.With(args...)
}
func (state *State) logger(r *http.Request) *slog.Logger {
@@ -297,24 +307,26 @@ func (state *State) handleRequest(w http.ResponseWriter, r *http.Request) {
if reader != nil {
defer reader.Close()
}
w.Header().Set("Cache-Control", "max-age=0, private, must-revalidate, no-transform")
w.Header().Set("Vary", "Accept-Encoding")
w.Header().Set("Content-Type", mime)
w.Header().Set("X-Content-Type-Options", "nosniff")
if encoding != "" {
w.Header().Set("Content-Encoding", encoding)
w.Header().Set("Cache-Control", "max-age=0, private, must-revalidate, no-transform")
w.Header().Set("Vary", "Accept-Encoding")
w.Header().Set("Content-Type", mime)
w.Header().Set("X-Content-Type-Options", "nosniff")
if encoding != "" {
w.Header().Set("Content-Encoding", encoding)
}
w.WriteHeader(http.StatusOK)
if flusher, ok := w.(http.Flusher); ok {
// trigger chunked encoding
flusher.Flush()
}
if r != nil {
_, _ = io.Copy(w, reader)
}
return
} else {
http.Error(w, http.StatusText(http.StatusForbidden), http.StatusForbidden)
}
w.WriteHeader(http.StatusOK)
if flusher, ok := w.(http.Flusher); ok {
// trigger chunked encoding
flusher.Flush()
}
if r != nil {
_, _ = io.Copy(w, reader)
}
return
}
}
}
@@ -421,12 +433,27 @@ func (state *State) ServeHTTP(w http.ResponseWriter, r *http.Request) {
data.RemoteAddress = getRequestAddress(r, state.Settings.ClientIpHeader)
data.Challenges = make(map[challenge.Id]challenge.VerifyResult, len(state.Challenges))
data.Expires = time.Now().UTC().Add(DefaultValidity).Round(DefaultValidity)
var ja3n, ja4 string
if fp := utils.GetTLSFingerprint(r); fp != nil {
if ja3nPtr := fp.JA3N(); ja3nPtr != nil {
ja3n = ja3nPtr.String()
r.Header.Set("X-TLS-Fingerprint-JA3N", ja3n)
}
if ja4Ptr := fp.JA4(); ja4Ptr != nil {
ja4 = ja4Ptr.String()
r.Header.Set("X-TLS-Fingerprint-JA4", ja4)
}
}
data.ProgramEnv = map[string]any{
"host": r.Host,
"method": r.Method,
"remoteAddress": data.RemoteAddress,
"userAgent": r.UserAgent(),
"path": r.URL.Path,
"fpJA3N": ja3n,
"fpJA4": ja4,
"query": func() map[string]string {
result := make(map[string]string)
for k, v := range r.URL.Query() {

View File

@@ -1,5 +1,5 @@
module git.gammaspectra.live/git/go-away/utils/exp
go 1.22.0
go 1.24.0
toolchain go1.22.12
toolchain go1.24.2

339
utils/fingerprint.go Normal file
View File

@@ -0,0 +1,339 @@
package utils
import (
"context"
"crypto/md5"
"crypto/sha256"
"crypto/tls"
"encoding/hex"
"fmt"
"net"
"net/http"
"slices"
"strconv"
"strings"
"sync/atomic"
)
func applyTLSFingerprinter(server *http.Server) {
server.TLSConfig = server.TLSConfig.Clone()
getCertificate := server.TLSConfig.GetCertificate
if getCertificate == nil {
server.TLSConfig.GetCertificate = func(clientHello *tls.ClientHelloInfo) (*tls.Certificate, error) {
ja3n, ja4 := buildTLSFingerprint(clientHello)
ptr := clientHello.Context().Value(tlsFingerprintKey{})
if fpPtr, ok := ptr.(*TLSFingerprint); ok && ptr != nil && fpPtr != nil {
fpPtr.ja3n.Store(&ja3n)
fpPtr.ja4.Store(&ja4)
}
return nil, nil
}
} else {
server.TLSConfig.GetCertificate = func(clientHello *tls.ClientHelloInfo) (*tls.Certificate, error) {
ja3n, ja4 := buildTLSFingerprint(clientHello)
ptr := clientHello.Context().Value(tlsFingerprintKey{})
if fpPtr, ok := ptr.(*TLSFingerprint); ok && ptr != nil && fpPtr != nil {
fpPtr.ja3n.Store(&ja3n)
fpPtr.ja4.Store(&ja4)
}
return getCertificate(clientHello)
}
}
server.ConnContext = func(ctx context.Context, c net.Conn) context.Context {
return context.WithValue(ctx, tlsFingerprintKey{}, &TLSFingerprint{})
}
}
type tlsFingerprintKey struct{}
type TLSFingerprint struct {
ja3n atomic.Pointer[TLSFingerprintJA3N]
ja4 atomic.Pointer[TLSFingerprintJA4]
}
type TLSFingerprintJA3N [md5.Size]byte
func (f TLSFingerprintJA3N) String() string {
return hex.EncodeToString(f[:])
}
type TLSFingerprintJA4 struct {
A [10]byte
B [6]byte
C [6]byte
}
func (f TLSFingerprintJA4) String() string {
return strings.Join([]string{
string(f.A[:]),
hex.EncodeToString(f.B[:]),
hex.EncodeToString(f.C[:]),
}, "_")
}
func (f *TLSFingerprint) JA3N() *TLSFingerprintJA3N {
return f.ja3n.Load()
}
func (f *TLSFingerprint) JA4() *TLSFingerprintJA4 {
return f.ja4.Load()
}
const greaseMask = 0x0F0F
const greaseValue = 0x0a0a
// TLS extension numbers
const (
extensionServerName uint16 = 0
extensionStatusRequest uint16 = 5
extensionSupportedCurves uint16 = 10 // supported_groups in TLS 1.3, see RFC 8446, Section 4.2.7
extensionSupportedPoints uint16 = 11
extensionSignatureAlgorithms uint16 = 13
extensionALPN uint16 = 16
extensionSCT uint16 = 18
extensionExtendedMasterSecret uint16 = 23
extensionSessionTicket uint16 = 35
extensionPreSharedKey uint16 = 41
extensionEarlyData uint16 = 42
extensionSupportedVersions uint16 = 43
extensionCookie uint16 = 44
extensionPSKModes uint16 = 45
extensionCertificateAuthorities uint16 = 47
extensionSignatureAlgorithmsCert uint16 = 50
extensionKeyShare uint16 = 51
extensionQUICTransportParameters uint16 = 57
extensionRenegotiationInfo uint16 = 0xff01
extensionECHOuterExtensions uint16 = 0xfd00
extensionEncryptedClientHello uint16 = 0xfe0d
)
func tlsFingerprintJA3(hello *tls.ClientHelloInfo, sortExtensions bool) []byte {
buf := make([]byte, 0, 256)
{
var sslVersion uint16
var hasGrease bool
for _, v := range hello.SupportedVersions {
if v&greaseMask != greaseValue {
if v > sslVersion {
sslVersion = v
}
} else {
hasGrease = true
}
}
// maximum TLS 1.2 as specified on JA3, as TLS 1.3 is put in SupportedVersions
if slices.Contains(hello.Extensions, extensionSupportedVersions) && hasGrease && sslVersion > tls.VersionTLS12 {
sslVersion = tls.VersionTLS12
}
buf = strconv.AppendUint(buf, uint64(sslVersion), 10)
buf = append(buf, ',')
}
n := 0
for _, cipher := range hello.CipherSuites {
//if !slices.Contains(greaseValues[:], cipher) {
if cipher&greaseMask != greaseValue {
buf = strconv.AppendUint(buf, uint64(cipher), 10)
buf = append(buf, '-')
n = 1
}
}
buf = buf[:len(buf)-n]
buf = append(buf, ',')
n = 0
extensions := hello.Extensions
if sortExtensions {
extensions = slices.Clone(extensions)
slices.Sort(extensions)
}
for _, extension := range extensions {
if extension&greaseMask != greaseValue {
buf = strconv.AppendUint(buf, uint64(extension), 10)
buf = append(buf, '-')
n = 1
}
}
buf = buf[:len(buf)-n]
buf = append(buf, ',')
n = 0
for _, curve := range hello.SupportedCurves {
if curve&greaseMask != greaseValue {
buf = strconv.AppendUint(buf, uint64(curve), 10)
buf = append(buf, '-')
n = 1
}
}
buf = buf[:len(buf)-n]
buf = append(buf, ',')
n = 0
for _, point := range hello.SupportedPoints {
buf = strconv.AppendUint(buf, uint64(point), 10)
buf = append(buf, '-')
n = 1
}
buf = buf[:len(buf)-n]
sum := md5.Sum(buf)
return sum[:]
}
func tlsFingerprintJA4(hello *tls.ClientHelloInfo) (ja4 TLSFingerprintJA4) {
buf := make([]byte, 0, 10)
// TODO: t = TLS, q = QUIC
buf = append(buf, 't')
{
var sslVersion uint16
for _, v := range hello.SupportedVersions {
if v&greaseMask != greaseValue {
if v > sslVersion {
sslVersion = v
}
}
}
switch sslVersion {
case tls.VersionSSL30:
buf = append(buf, 's', '3')
case tls.VersionTLS10:
buf = append(buf, '1', '0')
case tls.VersionTLS11:
buf = append(buf, '1', '1')
case tls.VersionTLS12:
buf = append(buf, '1', '2')
case tls.VersionTLS13:
buf = append(buf, '1', '3')
default:
sslVersion -= 0x0201
buf = strconv.AppendUint(buf, uint64(sslVersion>>8), 10)
buf = strconv.AppendUint(buf, uint64(sslVersion&0xff), 10)
}
}
if slices.Contains(hello.Extensions, extensionServerName) && hello.ServerName != "" {
buf = append(buf, 'd')
} else {
buf = append(buf, 'i')
}
ciphers := make([]uint16, 0, len(hello.CipherSuites))
for _, cipher := range hello.CipherSuites {
if cipher&greaseMask != greaseValue {
ciphers = append(ciphers, cipher)
}
}
extensionCount := 0
extensions := make([]uint16, 0, len(hello.Extensions))
for _, extension := range hello.Extensions {
if extension&greaseMask != greaseValue {
extensionCount++
if extension != extensionALPN && extension != extensionServerName {
extensions = append(extensions, extension)
}
}
}
schemes := make([]tls.SignatureScheme, 0, len(hello.SignatureSchemes))
for _, scheme := range hello.SignatureSchemes {
if scheme&greaseMask != greaseValue {
schemes = append(schemes, scheme)
}
}
//TODO: maybe little endian
slices.Sort(ciphers)
slices.Sort(extensions)
//slices.Sort(schemes)
if len(ciphers) < 10 {
buf = append(buf, '0')
buf = strconv.AppendUint(buf, uint64(len(ciphers)), 10)
} else if len(ciphers) > 99 {
buf = append(buf, '9', '9')
} else {
buf = strconv.AppendUint(buf, uint64(len(ciphers)), 10)
}
if extensionCount < 10 {
buf = append(buf, '0')
buf = strconv.AppendUint(buf, uint64(extensionCount), 10)
} else if extensionCount > 99 {
buf = append(buf, '9', '9')
} else {
buf = strconv.AppendUint(buf, uint64(extensionCount), 10)
}
if len(hello.SupportedProtos) > 0 && len(hello.SupportedProtos[0]) > 1 {
buf = append(buf, hello.SupportedProtos[0][0], hello.SupportedProtos[0][len(hello.SupportedProtos[0])-1])
} else {
buf = append(buf, '0', '0')
}
copy(ja4.A[:], buf)
ja4.B = ja4SHA256(uint16SliceToHex(ciphers))
extBuf := uint16SliceToHex(extensions)
if len(schemes) > 0 {
extBuf = append(extBuf, '_')
extBuf = append(extBuf, uint16SliceToHex(schemes)...)
}
ja4.C = ja4SHA256(extBuf)
return ja4
}
func uint16SliceToHex[T ~uint16](in []T) (out []byte) {
if len(in) == 0 {
return out
}
out = slices.Grow(out, hex.EncodedLen(len(in)*2)+len(in))
for _, n := range in {
out = append(out, fmt.Sprintf("%04x", uint16(n))...)
out = append(out, ',')
}
out = out[:len(out)-1]
return out
}
func ja4SHA256(buf []byte) [6]byte {
if len(buf) == 0 {
return [6]byte{0, 0, 0, 0, 0, 0}
}
sum := sha256.Sum256(buf)
return [6]byte(sum[:6])
}
func buildTLSFingerprint(hello *tls.ClientHelloInfo) (ja3n TLSFingerprintJA3N, ja4 TLSFingerprintJA4) {
return TLSFingerprintJA3N(tlsFingerprintJA3(hello, true)), tlsFingerprintJA4(hello)
}
func GetTLSFingerprint(r *http.Request) *TLSFingerprint {
ptr := r.Context().Value(tlsFingerprintKey{})
if fpPtr, ok := ptr.(*TLSFingerprint); ok && ptr != nil && fpPtr != nil {
return fpPtr
}
return nil
}

View File

@@ -2,6 +2,7 @@ package utils
import (
"context"
"crypto/tls"
"errors"
"fmt"
"net"
@@ -11,6 +12,28 @@ import (
"strings"
)
func NewServer(handler http.Handler, tlsConfig *tls.Config) *http.Server {
if tlsConfig == nil {
proto := new(http.Protocols)
proto.SetHTTP1(true)
proto.SetUnencryptedHTTP2(true)
h1s := &http.Server{
Handler: handler,
Protocols: proto,
}
return h1s
} else {
server := &http.Server{
TLSConfig: tlsConfig,
Handler: handler,
}
applyTLSFingerprinter(server)
return server
}
}
func EnsureNoOpenRedirect(redirect string) (string, error) {
uri, err := url.Parse(redirect)
if err != nil {