diff --git a/examples/forgejo.yml b/examples/forgejo.yml index 0ec40d5..5c61bd8 100644 --- a/examples/forgejo.yml +++ b/examples/forgejo.yml @@ -389,15 +389,11 @@ rules: # Match root of site - 'path == "/"' - # Match root of any repository + # Match root of any repository or user, or issue or pr # generic /*/*/ match gave too many options for scrapers to trigger random endpoints # this is a negative match of endpoints that Forgejo holds as reserved as users or orgs # see https://codeberg.org/forgejo/forgejo/src/branch/forgejo/models/user/user.go#L582 - - 'path.matches("^/[^/]+/[^/]+/?$") && !path.matches("(?i)^/(api|metrics|v2|assets|attachments|avatar|avatars|repo-avatars|captcha|login|org|repo|user|admin|devtest|explore|issues|pulls|milestones|notifications|ghost)/[^/]+/?$")' - - # Match root of most profiles, disallowing query parameters - # this is a negative match of endpoints that Forgejo holds as reserved as users or orgs - - 'path.matches("^/[^/]+/?$") && size(query) == 0 && !path.matches("(?i)^/(api|metrics|v2|assets|attachments|avatar|avatars|repo-avatars|captcha|login|org|repo|user|admin|devtest|explore|issues|pulls|milestones|notifications|ghost)/?$")' + - '(path.matches("^/[^/]+/[^/]+/?$") || path.matches("^/[^/]+/[^/]+/(issues|pulls)/[0-9]+$") || (path.matches("^/[^/]+/?$") && size(query) == 0)) && !path.matches("(?i)^/(api|metrics|v2|assets|attachments|avatar|avatars|repo-avatars|captcha|login|org|repo|user|admin|devtest|explore|issues|pulls|milestones|notifications|ghost)(/|$)")' action: pass - name: desired-crawlers