Implement nested rules and check vs challenge, list policies
This commit is contained in:
13
http.go
13
http.go
@@ -84,6 +84,7 @@ func (state *State) handleRequest(w http.ResponseWriter, r *http.Request) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
for _, rule := range state.Rules {
|
for _, rule := range state.Rules {
|
||||||
|
nextRule:
|
||||||
if out, _, err := rule.Program.Eval(env); err != nil {
|
if out, _, err := rule.Program.Eval(env); err != nil {
|
||||||
//TODO error
|
//TODO error
|
||||||
panic(err)
|
panic(err)
|
||||||
@@ -93,10 +94,9 @@ func (state *State) handleRequest(w http.ResponseWriter, r *http.Request) {
|
|||||||
default:
|
default:
|
||||||
panic(fmt.Errorf("unknown action %s", rule.Action))
|
panic(fmt.Errorf("unknown action %s", rule.Action))
|
||||||
case PolicyRuleActionPASS:
|
case PolicyRuleActionPASS:
|
||||||
//fallback, proxy!
|
|
||||||
state.Backend.ServeHTTP(w, r)
|
state.Backend.ServeHTTP(w, r)
|
||||||
return
|
return
|
||||||
case PolicyRuleActionCHALLENGE:
|
case PolicyRuleActionCHALLENGE, PolicyRuleActionCHECK:
|
||||||
expiry := time.Now().UTC().Add(DefaultValidity).Round(DefaultValidity)
|
expiry := time.Now().UTC().Add(DefaultValidity).Round(DefaultValidity)
|
||||||
|
|
||||||
for _, challengeName := range rule.Challenges {
|
for _, challengeName := range rule.Challenges {
|
||||||
@@ -107,6 +107,9 @@ func (state *State) handleRequest(w http.ResponseWriter, r *http.Request) {
|
|||||||
ClearCookie(CookiePrefix+challengeName, w)
|
ClearCookie(CookiePrefix+challengeName, w)
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
|
if rule.Action == PolicyRuleActionCHECK {
|
||||||
|
goto nextRule
|
||||||
|
}
|
||||||
// we passed the challenge!
|
// we passed the challenge!
|
||||||
//TODO log?
|
//TODO log?
|
||||||
state.Backend.ServeHTTP(w, r)
|
state.Backend.ServeHTTP(w, r)
|
||||||
@@ -125,6 +128,9 @@ func (state *State) handleRequest(w http.ResponseWriter, r *http.Request) {
|
|||||||
case ChallengeResultContinue:
|
case ChallengeResultContinue:
|
||||||
continue
|
continue
|
||||||
case ChallengeResultPass:
|
case ChallengeResultPass:
|
||||||
|
if rule.Action == PolicyRuleActionCHECK {
|
||||||
|
goto nextRule
|
||||||
|
}
|
||||||
// we pass the challenge early!
|
// we pass the challenge early!
|
||||||
state.Backend.ServeHTTP(w, r)
|
state.Backend.ServeHTTP(w, r)
|
||||||
return
|
return
|
||||||
@@ -145,6 +151,9 @@ func (state *State) handleRequest(w http.ResponseWriter, r *http.Request) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
state.Backend.ServeHTTP(w, r)
|
||||||
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
func (state *State) setupRoutes() error {
|
func (state *State) setupRoutes() error {
|
||||||
|
|||||||
@@ -40,8 +40,7 @@ func parseCIDROrIP(value string) (net.IPNet, error) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
type Policy struct {
|
type Policy struct {
|
||||||
// UserAgents map of a list of user-agent regex
|
|
||||||
UserAgents map[string][]string `yaml:"user-agents"`
|
|
||||||
// Networks map of networks and prefixes to be loaded
|
// Networks map of networks and prefixes to be loaded
|
||||||
Networks map[string][]PolicyNetwork `yaml:"networks"`
|
Networks map[string][]PolicyNetwork `yaml:"networks"`
|
||||||
|
|
||||||
@@ -59,6 +58,7 @@ const (
|
|||||||
PolicyRuleActionDENY PolicyRuleAction = "DENY"
|
PolicyRuleActionDENY PolicyRuleAction = "DENY"
|
||||||
PolicyRuleActionBLOCK PolicyRuleAction = "BLOCK"
|
PolicyRuleActionBLOCK PolicyRuleAction = "BLOCK"
|
||||||
PolicyRuleActionCHALLENGE PolicyRuleAction = "CHALLENGE"
|
PolicyRuleActionCHALLENGE PolicyRuleAction = "CHALLENGE"
|
||||||
|
PolicyRuleActionCHECK PolicyRuleAction = "CHECK"
|
||||||
)
|
)
|
||||||
|
|
||||||
type PolicyRule struct {
|
type PolicyRule struct {
|
||||||
|
|||||||
168
policy.yml
168
policy.yml
@@ -1,22 +1,3 @@
|
|||||||
# Define groups of useragents to use later below for matching
|
|
||||||
user-agents:
|
|
||||||
default-browser:
|
|
||||||
- "^Mozilla/"
|
|
||||||
- "^Opera/"
|
|
||||||
bad-crawlers:
|
|
||||||
- "Amazonbot"
|
|
||||||
headless-browser:
|
|
||||||
- "HeadlessChrome"
|
|
||||||
- "HeadlessChromium"
|
|
||||||
- "^Lightpanda/"
|
|
||||||
- "^$"
|
|
||||||
rss:
|
|
||||||
- "FeedFetcher-Google"
|
|
||||||
git:
|
|
||||||
- "^git/"
|
|
||||||
- "^go-git/"
|
|
||||||
- "^JGit[/-]"
|
|
||||||
- "^GoModuleMirror/"
|
|
||||||
|
|
||||||
# Define networks to be used later below
|
# Define networks to be used later below
|
||||||
networks:
|
networks:
|
||||||
@@ -70,23 +51,6 @@ networks:
|
|||||||
regex: "(?P<prefix>[0-9a-f:]+::/[0-9]+)"
|
regex: "(?P<prefix>[0-9a-f:]+::/[0-9]+)"
|
||||||
|
|
||||||
|
|
||||||
conditions:
|
|
||||||
# Checks to detect a headless chromium via headers only
|
|
||||||
is-headless-chromium:
|
|
||||||
- 'userAgent.contains("HeadlessChrome") || userAgent.contains("HeadlessChromium")'
|
|
||||||
- 'headers["Sec-Ch-Ua"].contains("HeadlessChrome") || headers["Sec-Ch-Ua"].contains("HeadlessChromium")'
|
|
||||||
- '(userAgent.contains("Chrome/") || userAgent.contains("Chromium/")) && (headers["Accept-Language"] == "" || headers["Accept-Encoding"] == "")'
|
|
||||||
is-static-asset:
|
|
||||||
- 'path == "/robots.txt"'
|
|
||||||
- 'path == "/favicon.ico"'
|
|
||||||
- 'path == "/apple-touch-icon.png"'
|
|
||||||
- 'path == "/apple-touch-icon-precomposed.png"'
|
|
||||||
- 'path.startsWith("/assets/")'
|
|
||||||
- 'path.startsWith("/repo-avatars/")'
|
|
||||||
- 'path.startsWith("/avatars/")'
|
|
||||||
- 'path.startsWith("/avatar/")'
|
|
||||||
|
|
||||||
|
|
||||||
# todo: define interface
|
# todo: define interface
|
||||||
challenges:
|
challenges:
|
||||||
js-pow-sha256:
|
js-pow-sha256:
|
||||||
@@ -95,7 +59,7 @@ challenges:
|
|||||||
mode: js
|
mode: js
|
||||||
asset: load.mjs
|
asset: load.mjs
|
||||||
parameters:
|
parameters:
|
||||||
difficulty: 4
|
difficulty: 5
|
||||||
runtime:
|
runtime:
|
||||||
mode: wasm
|
mode: wasm
|
||||||
# Verify must be under challenges/{name}/runtime/{asset}
|
# Verify must be under challenges/{name}/runtime/{asset}
|
||||||
@@ -131,15 +95,139 @@ challenges:
|
|||||||
http-method: GET
|
http-method: GET
|
||||||
http-code: 200
|
http-code: 200
|
||||||
|
|
||||||
|
conditions:
|
||||||
|
# Conditions will get replaced on rules AST when found as ($condition-name)
|
||||||
|
# Checks to detect a headless chromium via headers only
|
||||||
|
is-headless-chromium:
|
||||||
|
- 'userAgent.contains("HeadlessChrome") || userAgent.contains("HeadlessChromium")'
|
||||||
|
- '"Sec-Ch-Ua" in headers && (headers["Sec-Ch-Ua"].contains("HeadlessChrome") || headers["Sec-Ch-Ua"].contains("HeadlessChromium"))'
|
||||||
|
- '(userAgent.contains("Chrome/") || userAgent.contains("Chromium/")) && (!("Accept-Language" in headers) || !("Accept-Encoding" in headers))'
|
||||||
|
is-static-asset:
|
||||||
|
- 'path == "/robots.txt"'
|
||||||
|
- 'path == "/favicon.ico"'
|
||||||
|
- 'path == "/apple-touch-icon.png"'
|
||||||
|
- 'path == "/apple-touch-icon-precomposed.png"'
|
||||||
|
- 'path.startsWith("/assets/")'
|
||||||
|
- 'path.startsWith("/repo-avatars/")'
|
||||||
|
- 'path.startsWith("/avatars/")'
|
||||||
|
- 'path.startsWith("/avatar/")'
|
||||||
|
is-git-ua:
|
||||||
|
- 'userAgent.startsWith("git/")'
|
||||||
|
- 'userAgent.startsWith("go-git")'
|
||||||
|
- 'userAgent.startsWith("JGit/") || userAgent.startsWith("JGit-")'
|
||||||
|
# Golang proxy and initial fetch
|
||||||
|
- 'userAgent.startsWith("GoModuleMirror/")'
|
||||||
|
- 'userAgent.startsWith("Go-http-client/") && "go-get" in query && query["go-get"] == "1" && (path.matches("^/[^/]+/[^/]+$") || path.matches("^/[^/]+/[^/]+/v[0-9]+$"))'
|
||||||
|
is-git-path:
|
||||||
|
- 'path.matches("^/[^/]+/[^/]+/(git-upload-pack|git-receive-pack|HEAD|info/refs|info/lfs|objects)")'
|
||||||
|
|
||||||
rules:
|
rules:
|
||||||
- name: blocked-networks
|
- name: undesired-networks
|
||||||
conditions:
|
conditions:
|
||||||
- 'inNetwork("huawei-cloud", remoteAddress) || inNetwork("alibaba-cloud", remoteAddress)'
|
- 'inNetwork("huawei-cloud", remoteAddress) || inNetwork("alibaba-cloud", remoteAddress)'
|
||||||
action: deny
|
action: deny
|
||||||
|
|
||||||
- name: golang-proxy
|
- name: undesired-crawlers
|
||||||
conditions:
|
conditions:
|
||||||
- 'userAgent.startsWith("GoModuleMirror/") || (userAgent.startsWith("Go-http-client/") && query["go-get"] == "1")'
|
- '($is-headless-chromium)'
|
||||||
|
- 'userAgent == ""'
|
||||||
|
- 'userAgent.startsWith("Lightpanda/")'
|
||||||
|
# Typo'd opera botnet
|
||||||
|
- 'userAgent.matches("^Opera/[0-9.]+\\.\\(")'
|
||||||
|
# AI bullshit stuff, they do not respect robots.txt even while they read it
|
||||||
|
- 'userAgent.contains("Amazonbot") || userAgent.contains("Bytespider") || userAgent.contains("ClaudeBot") || userAgent.contains("meta-externalagent/")'
|
||||||
|
action: deny
|
||||||
|
|
||||||
|
- name: suspicious-crawlers
|
||||||
|
conditions:
|
||||||
|
- 'userAgent.contains("Presto/") || userAgent.contains("Trident/")'
|
||||||
|
# Old IE browsers
|
||||||
|
- 'userAgent.matches("MSIE ([4-9]|10|11)\\.")'
|
||||||
|
# Old Linux browsers
|
||||||
|
- 'userAgent.contains("Linux i686")'
|
||||||
|
# Old Windows browsers
|
||||||
|
- 'userAgent.matches("Windows (95|98)") || userAgent.matches("Windows NT [1-4]\\.")'
|
||||||
|
# Old mobile browsers
|
||||||
|
- 'userAgent.matches("Android [1-9]\\.") || userAgent.matches("(iPad|iPhone) OS [1-9]_")'
|
||||||
|
# Old Opera browsers
|
||||||
|
- 'userAgent.startsWith("Opera/")'
|
||||||
|
# check to continue below
|
||||||
|
action: check
|
||||||
|
challenges: [js-pow-sha256, http-cookie-check]
|
||||||
|
|
||||||
|
- name: always-pow-challenge
|
||||||
|
conditions:
|
||||||
|
- 'path.startsWith("/user/sign_up") || path.startsWith("/user/login")'
|
||||||
|
# Match archive downloads from browsers and not tools
|
||||||
|
- 'path.matches("^/[^/]+/[^/]+/archive/.*\\.(bundle|zip|tar\\.gz)") && (userAgent.startsWith("Opera/") || userAgent.startsWith("Mozilla/"))'
|
||||||
|
action: challenge
|
||||||
|
challenges: [js-pow-sha256]
|
||||||
|
|
||||||
|
|
||||||
|
- name: allow-static-resources
|
||||||
|
conditions:
|
||||||
|
- '($is-static-asset)'
|
||||||
|
action: pass
|
||||||
|
|
||||||
|
- name: allow-git-operations
|
||||||
|
conditions:
|
||||||
|
- '($is-git-path)'
|
||||||
|
- 'path.matches("^/[^/]+/[^/]+\\.git")'
|
||||||
|
- 'path.matches("^/[^/]+/[^/]+/") && ($is-git-ua)'
|
||||||
|
action: pass
|
||||||
|
|
||||||
|
- name: sitemap
|
||||||
|
conditions:
|
||||||
|
- 'path == "/sitemap.xml" || path.matches("^/explore/(users|repos)/sitemap-[0-9]+\\.xml$")'
|
||||||
|
action: pass
|
||||||
|
|
||||||
|
# TODO: rss
|
||||||
|
|
||||||
|
- name: source-download
|
||||||
|
conditions:
|
||||||
|
- 'path.matches("^/[^/]+/[^/]+/raw/branch/")'
|
||||||
|
- 'path.matches("^/[^/]+/[^/]+/archive/")'
|
||||||
|
- 'path.matches("^/[^/]+/[^/]+/media/")'
|
||||||
|
action: pass
|
||||||
|
|
||||||
|
- name: api-call
|
||||||
|
conditions:
|
||||||
|
- 'path.startsWith("/.well-known")'
|
||||||
|
- 'path.startsWith("/api/v1/") || path.startsWith("/api/forgejo/v1/")'
|
||||||
|
- 'path.startsWith("/login/oauth/")'
|
||||||
|
- 'path.startsWith("/captcha/")'
|
||||||
|
- 'path.startsWith("/metrics/")'
|
||||||
|
# todo: post only
|
||||||
|
- 'path == "/-/markup"'
|
||||||
|
- 'path == "/user/events"'
|
||||||
|
- 'path == "/ssh_info"'
|
||||||
|
- 'path == "/api/healthz"'
|
||||||
|
# user pubkeys
|
||||||
|
- 'path.matches("^/[^/]+\\.keys$")'
|
||||||
|
- 'path.matches("^/[^/]+\\.gpg")'
|
||||||
|
action: pass
|
||||||
|
|
||||||
|
- name: preview-fetchers
|
||||||
|
conditions:
|
||||||
|
- 'path.endsWith("/-/summary-card")'
|
||||||
|
- 'userAgent.contains("facebookexternalhit/") || userAgent.contains("Twitterbot/")'
|
||||||
|
- '"X-Purpose" in headers && headers["X-Purpose"] == "preview"'
|
||||||
|
action: pass
|
||||||
|
|
||||||
|
- name: desired-crawlers
|
||||||
|
conditions:
|
||||||
|
- 'userAgent.contains("+https://kagi.com/bot") && inNetwork("kagibot", remoteAddress)'
|
||||||
|
- 'userAgent.contains("+http://www.google.com/bot.html") && inNetwork("googlebot", remoteAddress)'
|
||||||
|
- 'userAgent.contains("+http://www.bing.com/bingbot.htm") && inNetwork("bingbot", remoteAddress)'
|
||||||
|
- 'userAgent.contains("+http://duckduckgo.com/duckduckbot.html") && inNetwork("duckduckbot", remoteAddress)'
|
||||||
|
- 'userAgent.contains("+https://help.qwant.com/bot/") && inNetwork("qwantbot", remoteAddress)'
|
||||||
|
- 'userAgent.contains("+http://yandex.com/bots") && inNetwork("yandexbot", remoteAddress)'
|
||||||
|
action: pass
|
||||||
|
|
||||||
|
- name: homesite
|
||||||
|
conditions:
|
||||||
|
- 'path == "/"'
|
||||||
|
- 'path.matches("(?i)^/(WeebDataHoarder|P2Pool|mirror|git|S\\.O\\.N\\.G|FM10K|Sillycom|pwgen2155|kaitou|metonym)/[^/]+$")'
|
||||||
action: pass
|
action: pass
|
||||||
|
|
||||||
- name: standard-browser
|
- name: standard-browser
|
||||||
|
|||||||
41
state.go
41
state.go
@@ -23,7 +23,6 @@ import (
|
|||||||
"net"
|
"net"
|
||||||
"net/http"
|
"net/http"
|
||||||
"net/url"
|
"net/url"
|
||||||
"regexp"
|
|
||||||
"strconv"
|
"strconv"
|
||||||
"strings"
|
"strings"
|
||||||
"time"
|
"time"
|
||||||
@@ -38,15 +37,12 @@ type State struct {
|
|||||||
|
|
||||||
Networks map[string]cidranger.Ranger
|
Networks map[string]cidranger.Ranger
|
||||||
|
|
||||||
UserAgents map[string][]*regexp.Regexp
|
|
||||||
|
|
||||||
WasmRuntime wazero.Runtime
|
WasmRuntime wazero.Runtime
|
||||||
WasmContext context.Context
|
WasmContext context.Context
|
||||||
|
|
||||||
Challenges map[string]ChallengeState
|
Challenges map[string]ChallengeState
|
||||||
|
|
||||||
RulesEnv *cel.Env
|
RulesEnv *cel.Env
|
||||||
Conditions map[string]*cel.Ast
|
|
||||||
|
|
||||||
Rules []RuleState
|
Rules []RuleState
|
||||||
|
|
||||||
@@ -59,6 +55,7 @@ type RuleState struct {
|
|||||||
|
|
||||||
Program cel.Program
|
Program cel.Program
|
||||||
Action PolicyRuleAction
|
Action PolicyRuleAction
|
||||||
|
Continue bool
|
||||||
Challenges []string
|
Challenges []string
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -100,16 +97,6 @@ func NewState(policy Policy, packagePath string, backend http.Handler) (state *S
|
|||||||
state.UrlPath = "/.well-known/." + state.PackagePath
|
state.UrlPath = "/.well-known/." + state.PackagePath
|
||||||
state.Backend = backend
|
state.Backend = backend
|
||||||
|
|
||||||
state.UserAgents = make(map[string][]*regexp.Regexp)
|
|
||||||
for k, v := range policy.UserAgents {
|
|
||||||
for _, str := range v {
|
|
||||||
expr, err := regexp.Compile(str)
|
|
||||||
if err != nil {
|
|
||||||
return nil, fmt.Errorf("user-agent %s: invalid regex expression %s: %v", k, str, err)
|
|
||||||
}
|
|
||||||
state.UserAgents[k] = append(state.UserAgents[k], expr)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
state.Networks = make(map[string]cidranger.Ranger)
|
state.Networks = make(map[string]cidranger.Ranger)
|
||||||
for k, network := range policy.Networks {
|
for k, network := range policy.Networks {
|
||||||
ranger := cidranger.NewPCTrieRanger()
|
ranger := cidranger.NewPCTrieRanger()
|
||||||
@@ -459,14 +446,22 @@ func NewState(policy Policy, packagePath string, backend http.Handler) (state *S
|
|||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
|
|
||||||
state.Conditions = make(map[string]*cel.Ast)
|
var replacements []string
|
||||||
for k, entries := range policy.Conditions {
|
for k, entries := range policy.Conditions {
|
||||||
ast, err := ConditionFromStrings(state.RulesEnv, OperatorOr, entries...)
|
ast, err := ConditionFromStrings(state.RulesEnv, OperatorOr, entries...)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, fmt.Errorf("conditions %s: error compiling conditions: %v", k, err)
|
return nil, fmt.Errorf("conditions %s: error compiling conditions: %v", k, err)
|
||||||
}
|
}
|
||||||
state.Conditions[k] = ast
|
|
||||||
|
cond, err := cel.AstToString(ast)
|
||||||
|
if err != nil {
|
||||||
|
return nil, fmt.Errorf("conditions %s: error printing condition: %v", k, err)
|
||||||
|
}
|
||||||
|
|
||||||
|
replacements = append(replacements, fmt.Sprintf("($%s)", k))
|
||||||
|
replacements = append(replacements, "("+cond+")")
|
||||||
}
|
}
|
||||||
|
conditionReplacer := strings.NewReplacer(replacements...)
|
||||||
|
|
||||||
for _, rule := range policy.Rules {
|
for _, rule := range policy.Rules {
|
||||||
r := RuleState{
|
r := RuleState{
|
||||||
@@ -475,12 +470,18 @@ func NewState(policy Policy, packagePath string, backend http.Handler) (state *S
|
|||||||
Challenges: rule.Challenges,
|
Challenges: rule.Challenges,
|
||||||
}
|
}
|
||||||
|
|
||||||
if r.Action == PolicyRuleActionCHALLENGE && len(r.Challenges) == 0 {
|
if (r.Action == PolicyRuleActionCHALLENGE || r.Action == PolicyRuleActionCHECK) && len(r.Challenges) == 0 {
|
||||||
return nil, fmt.Errorf("no challenges found in rule %s", rule.Name)
|
return nil, fmt.Errorf("no challenges found in rule %s", rule.Name)
|
||||||
}
|
}
|
||||||
|
|
||||||
//TODO: nesting conditions via decorator!
|
// allow nesting
|
||||||
ast, err := ConditionFromStrings(state.RulesEnv, OperatorOr, rule.Conditions...)
|
var conditions []string
|
||||||
|
for _, cond := range rule.Conditions {
|
||||||
|
cond = conditionReplacer.Replace(cond)
|
||||||
|
conditions = append(conditions, cond)
|
||||||
|
}
|
||||||
|
|
||||||
|
ast, err := ConditionFromStrings(state.RulesEnv, OperatorOr, conditions...)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, fmt.Errorf("rules %s: error compiling conditions: %v", rule.Name, err)
|
return nil, fmt.Errorf("rules %s: error compiling conditions: %v", rule.Name, err)
|
||||||
}
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user