From d82c12de286aeae2aaea601b44c085bf78f6c137 Mon Sep 17 00:00:00 2001 From: Xe Iaso Date: Thu, 20 Mar 2025 15:36:34 -0400 Subject: docs: add funding page Signed-off-by: Xe Iaso --- docs/docs/admin/installation.mdx | 134 ++++++++++++++++++++++++++++++++++++++ docs/docs/admin/policies.md | 79 +++++++++++++++++++++++ docs/docs/funding.md | 10 +++ docs/docs/installation.mdx | 135 --------------------------------------- docs/docs/policies.md | 81 ----------------------- docs/docusaurus.config.ts | 2 +- 6 files changed, 224 insertions(+), 217 deletions(-) create mode 100644 docs/docs/admin/installation.mdx create mode 100644 docs/docs/admin/policies.md create mode 100644 docs/docs/funding.md delete mode 100644 docs/docs/installation.mdx delete mode 100644 docs/docs/policies.md diff --git a/docs/docs/admin/installation.mdx b/docs/docs/admin/installation.mdx new file mode 100644 index 0000000..61bc2d2 --- /dev/null +++ b/docs/docs/admin/installation.mdx @@ -0,0 +1,134 @@ +--- +title: Setting up Anubis +--- + +Anubis is meant to sit between your reverse proxy (such as Nginx or Caddy) and your target service. One instance of Anubis must be used per service you are protecting. + +Anubis is shipped in the Docker repo [`ghcr.io/techarohq/anubis`](https://github.com/TecharoHQ/anubis/pkgs/container/anubis). The following tags exist for your convenience: + +| Tag | Meaning | +| :------------------ | :--------------------------------------------------------------------------------------------------------------------------------- | +| `latest` | The latest [tagged release](https://github.com/TecharoHQ/anubis/releases), if you are in doubt, start here. | +| `v` | The Anubis image for [any given tagged release](https://github.com/TecharoHQ/anubis/tags) | +| `main` | The current build on the `main` branch. Only use this if you need the latest and greatest features as they are merged into `main`. | +| `pr-` | The build associated with PR `#`. Only use this for debugging issues fixed by a PR. | + +Other methods to install Anubis may exist, but the Docker image is currently the only supported method. + +The Docker image runs Anubis as user ID 1000 and group ID 1000. If you are mounting external volumes into Anubis' container, please be sure they are owned by or writable to this user/group. + +Anubis has very minimal system requirements. I suspect that 128Mi of ram may be sufficient for a large number of concurrent clients. Anubis may be a poor fit for apps that use WebSockets and maintain open connections, but I don't have enough real-world experience to know one way or another. + +Anubis uses these environment variables for configuration: + +| Environment Variable | Default value | Explanation | +| :------------------- | :------------------------- | :--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| `BIND` | `:8923` | The TCP port that Anubis listens on. | +| `DIFFICULTY` | `5` | The difficulty of the challenge, or the number of leading zeroes that must be in successful responses. | +| `METRICS_BIND` | `:9090` | The TCP port that Anubis serves Prometheus metrics on. | +| `POLICY_FNAME` | `/data/cfg/botPolicy.json` | The file containing [bot policy configuration](./policies.md). See the bot policy documentation for more details. | +| `SERVE_ROBOTS_TXT` | `false` | If set `true`, Anubis will serve a default `robots.txt` file that disallows all known AI scrapers by name and then additionally disallows every scraper. This is useful if facts and circumstances make it difficult to change the underlying service to serve such a `robots.txt` file. | +| `TARGET` | `http://localhost:3923` | The URL of the service that Anubis should forward valid requests to. | + +## Docker compose + +Add Anubis to your compose file pointed at your service: + +```yaml +services: + anubis-nginx: + image: ghcr.io/techarohq/anubis:latest + environment: + BIND: ":8080" + DIFFICULTY: "5" + METRICS_BIND: ":9090" + SERVE_ROBOTS_TXT: "true" + TARGET: "http://nginx" + ports: + - 8080:8080 + nginx: + image: nginx + volumes: + - "./www:/usr/share/nginx/html" + - "./botPolicy.json:/data/cfg/botPolicy.json" +``` + +## Kubernetes + +This example makes the following assumptions: + +- Your target service is listening on TCP port `5000`. +- Anubis will be listening on port `8080`. + +Attach Anubis to your Deployment: + +```yaml +containers: + # ... + - name: anubis + image: ghcr.io/techarohq/anubis:latest + imagePullPolicy: Always + env: + - name: "BIND" + value: ":8080" + - name: "DIFFICULTY" + value: "5" + - name: "METRICS_BIND" + value: ":9090" + - name: "SERVE_ROBOTS_TXT" + value: "true" + - name: "TARGET" + value: "http://localhost:5000" + resources: + limits: + cpu: 500m + memory: 128Mi + requests: + cpu: 250m + memory: 128Mi + securityContext: + runAsUser: 1000 + runAsGroup: 1000 + runAsNonRoot: true + allowPrivilegeEscalation: false + capabilities: + drop: + - ALL + seccompProfile: + type: RuntimeDefault +``` + +Then add a Service entry for Anubis: + +```yaml +# ... +spec: + ports: + # diff-add + - protocol: TCP + # diff-add + port: 8080 + # diff-add + targetPort: 8080 + # diff-add + name: anubis +``` + +Then point your Ingress to the Anubis port: + +```yaml + rules: + - host: git.xeserv.us + http: + paths: + - pathType: Prefix + path: "/" + backend: + service: + name: git + port: + # diff-remove + name: http + # diff-add + name: anubis +``` diff --git a/docs/docs/admin/policies.md b/docs/docs/admin/policies.md new file mode 100644 index 0000000..bdf8a20 --- /dev/null +++ b/docs/docs/admin/policies.md @@ -0,0 +1,79 @@ +--- +title: Policy Definitions +--- + +Out of the box, Anubis is pretty heavy-handed. It will aggressively challenge everything that might be a browser (usually indicated by having `Mozilla` in its user agent). However, some bots are smart enough to get past the challenge. Some things that look like bots may actually be fine (IE: RSS readers). Some resources need to be visible no matter what. Some resources and remotes are fine to begin with. + +Bot policies let you customize the rules that Anubis uses to allow, deny, or challenge incoming requests. Currently you can set policies by the following matches: + +- Request path +- User agent string + +Here's an example rule that denies [Amazonbot](https://developer.amazon.com/en/amazonbot): + +```json +{ + "name": "amazonbot", + "user_agent_regex": "Amazonbot", + "action": "DENY" +} +``` + +When this rule is evaluated, Anubis will check the `User-Agent` string of the request. If it contains `Amazonbot`, Anubis will send an error page to the user saying that access is denied, but in such a way that makes scrapers think they have correctly loaded the webpage. + +Right now the only kinds of policies you can write are bot policies. Other forms of policies will be added in the future. + +Here is a minimal policy file that will protect against most scraper bots: + +```json +{ + "bots": [ + { + "name": "well-known", + "path_regex": "^/.well-known/.*$", + "action": "ALLOW" + }, + { + "name": "favicon", + "path_regex": "^/favicon.ico$", + "action": "ALLOW" + }, + { + "name": "robots-txt", + "path_regex": "^/robots.txt$", + "action": "ALLOW" + }, + { + "name": "generic-browser", + "user_agent_regex": "Mozilla", + "action": "CHALLENGE" + } + ] +} +``` + +This allows requests to [`/.well-known`](https://en.wikipedia.org/wiki/Well-known_URI), `/favicon.ico`, `/robots.txt`, and challenges any request that has the word `Mozilla` in its User-Agent string. The [default policy file](https://github.com/TecharoHQ/anubis/blob/main/cmd/anubis/botPolicies.json) is a bit more cohesive, but this should be more than enough for most users. + +If no rules match the request, it is allowed through. + +## Writing your own rules + +There are three actions that can be returned from a rule: + +| Action | Effects | +| :---------- | :-------------------------------------------------------------------------------- | +| `ALLOW` | Bypass all further checks and send the request to the backend. | +| `DENY` | Deny the request and send back an error message that scrapers think is a success. | +| `CHALLENGE` | Show a challenge page and/or validate that clients have passed a challenge. | + +Name your rules in lower case using kebab-case. Rule names will be exposed in Prometheus metrics. + +In case your service needs it for risk calculation reasons, Anubis exposes information about the rules that any requests match using a few headers: + +| Header | Explanation | Example | +| :---------------- | :--------------------------------------------------- | :--------------- | +| `X-Anubis-Rule` | The name of the rule that was matched | `bot/lightpanda` | +| `X-Anubis-Action` | The action that Anubis took in response to that rule | `CHALLENGE` | +| `X-Anubis-Status` | The status and how strict Anubis was in its checks | `PASS-FULL` | + +Policy rules are matched using [Go's standard library regular expressions package](https://pkg.go.dev/regexp). You can mess around with the syntax at [regex101.com](https://regex101.com), make sure to select the Golang option. diff --git a/docs/docs/funding.md b/docs/docs/funding.md new file mode 100644 index 0000000..72e6ffe --- /dev/null +++ b/docs/docs/funding.md @@ -0,0 +1,10 @@ +--- +sidebar_position: 998 +title: Supporting Anubis financially +--- + +Anubis is provided to the public for free in order to help advance the common good. In return, we ask (but not demand, these are words on the internet, not word of law) that you not remove the Anubis character from your deployment. + +If you want to run an unbranded or white-label version of Anubis, please [contact Xe](https://xeiaso.net/contact) to arrange a contract. This is not meant to be "contact us" pricing, I am still evaluating the market for this solution and figuring out what makes sense. + +You can donate to the project [on Patreon](https://patreon.com/cadey). diff --git a/docs/docs/installation.mdx b/docs/docs/installation.mdx deleted file mode 100644 index 35d1252..0000000 --- a/docs/docs/installation.mdx +++ /dev/null @@ -1,135 +0,0 @@ ---- -sidebar_position: 20 -title: Setting up Anubis ---- - -Anubis is meant to sit between your reverse proxy (such as Nginx or Caddy) and your target service. One instance of Anubis must be used per service you are protecting. - -Anubis is shipped in the Docker repo [`ghcr.io/techarohq/anubis`](https://github.com/TecharoHQ/anubis/pkgs/container/anubis). The following tags exist for your convenience: - -| Tag | Meaning | -| :------------------ | :--------------------------------------------------------------------------------------------------------------------------------- | -| `latest` | The latest [tagged release](https://github.com/TecharoHQ/anubis/releases), if you are in doubt, start here. | -| `v` | The Anubis image for [any given tagged release](https://github.com/TecharoHQ/anubis/tags) | -| `main` | The current build on the `main` branch. Only use this if you need the latest and greatest features as they are merged into `main`. | -| `pr-` | The build associated with PR `#`. Only use this for debugging issues fixed by a PR. | - -Other methods to install Anubis may exist, but the Docker image is currently the only supported method. - -The Docker image runs Anubis as user ID 1000 and group ID 1000. If you are mounting external volumes into Anubis' container, please be sure they are owned by or writable to this user/group. - -Anubis has very minimal system requirements. I suspect that 128Mi of ram may be sufficient for a large number of concurrent clients. Anubis may be a poor fit for apps that use WebSockets and maintain open connections, but I don't have enough real-world experience to know one way or another. - -Anubis uses these environment variables for configuration: - -| Environment Variable | Default value | Explanation | -| :------------------- | :------------------------- | :--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | -| `BIND` | `:8923` | The TCP port that Anubis listens on. | -| `DIFFICULTY` | `5` | The difficulty of the challenge, or the number of leading zeroes that must be in successful responses. | -| `METRICS_BIND` | `:9090` | The TCP port that Anubis serves Prometheus metrics on. | -| `POLICY_FNAME` | `/data/cfg/botPolicy.json` | The file containing [bot policy configuration](./policies.md). See the bot policy documentation for more details. | -| `SERVE_ROBOTS_TXT` | `false` | If set `true`, Anubis will serve a default `robots.txt` file that disallows all known AI scrapers by name and then additionally disallows every scraper. This is useful if facts and circumstances make it difficult to change the underlying service to serve such a `robots.txt` file. | -| `TARGET` | `http://localhost:3923` | The URL of the service that Anubis should forward valid requests to. | - -## Docker compose - -Add Anubis to your compose file pointed at your service: - -```yaml -services: - anubis-nginx: - image: ghcr.io/techarohq/anubis:latest - environment: - BIND: ":8080" - DIFFICULTY: "5" - METRICS_BIND: ":9090" - SERVE_ROBOTS_TXT: "true" - TARGET: "http://nginx" - ports: - - 8080:8080 - nginx: - image: nginx - volumes: - - "./www:/usr/share/nginx/html" - - "./botPolicy.json:/data/cfg/botPolicy.json" -``` - -## Kubernetes - -This example makes the following assumptions: - -- Your target service is listening on TCP port `5000`. -- Anubis will be listening on port `8080`. - -Attach Anubis to your Deployment: - -```yaml -containers: - # ... - - name: anubis - image: ghcr.io/techarohq/anubis:latest - imagePullPolicy: Always - env: - - name: "BIND" - value: ":8080" - - name: "DIFFICULTY" - value: "5" - - name: "METRICS_BIND" - value: ":9090" - - name: "SERVE_ROBOTS_TXT" - value: "true" - - name: "TARGET" - value: "http://localhost:5000" - resources: - limits: - cpu: 500m - memory: 128Mi - requests: - cpu: 250m - memory: 128Mi - securityContext: - runAsUser: 1000 - runAsGroup: 1000 - runAsNonRoot: true - allowPrivilegeEscalation: false - capabilities: - drop: - - ALL - seccompProfile: - type: RuntimeDefault -``` - -Then add a Service entry for Anubis: - -```yaml -# ... -spec: - ports: - # diff-add - - protocol: TCP - # diff-add - port: 8080 - # diff-add - targetPort: 8080 - # diff-add - name: anubis -``` - -Then point your Ingress to the Anubis port: - -```yaml - rules: - - host: git.xeserv.us - http: - paths: - - pathType: Prefix - path: "/" - backend: - service: - name: git - port: - # diff-remove - name: http - # diff-add - name: anubis -``` diff --git a/docs/docs/policies.md b/docs/docs/policies.md deleted file mode 100644 index eaa52fb..0000000 --- a/docs/docs/policies.md +++ /dev/null @@ -1,81 +0,0 @@ ---- -sidebar_position: 30 ---- - -# Policy Definitions - -Out of the box, Anubis is pretty heavy-handed. It will aggressively challenge everything that might be a browser (usually indicated by having `Mozilla` in its user agent). However, some bots are smart enough to get past the challenge. Some things that look like bots may actually be fine (IE: RSS readers). Some resources need to be visible no matter what. Some resources and remotes are fine to begin with. - -Bot policies let you customize the rules that Anubis uses to allow, deny, or challenge incoming requests. Currently you can set policies by the following matches: - -- Request path -- User agent string - -Here's an example rule that denies [Amazonbot](https://developer.amazon.com/en/amazonbot): - -```json -{ - "name": "amazonbot", - "user_agent_regex": "Amazonbot", - "action": "DENY" -} -``` - -When this rule is evaluated, Anubis will check the `User-Agent` string of the request. If it contains `Amazonbot`, Anubis will send an error page to the user saying that access is denied, but in such a way that makes scrapers think they have correctly loaded the webpage. - -Right now the only kinds of policies you can write are bot policies. Other forms of policies will be added in the future. - -Here is a minimal policy file that will protect against most scraper bots: - -```json -{ - "bots": [ - { - "name": "well-known", - "path_regex": "^/.well-known/.*$", - "action": "ALLOW" - }, - { - "name": "favicon", - "path_regex": "^/favicon.ico$", - "action": "ALLOW" - }, - { - "name": "robots-txt", - "path_regex": "^/robots.txt$", - "action": "ALLOW" - }, - { - "name": "generic-browser", - "user_agent_regex": "Mozilla", - "action": "CHALLENGE" - } - ] -} -``` - -This allows requests to [`/.well-known`](https://en.wikipedia.org/wiki/Well-known_URI), `/favicon.ico`, `/robots.txt`, and challenges any request that has the word `Mozilla` in its User-Agent string. The [default policy file](https://github.com/TecharoHQ/anubis/blob/main/cmd/anubis/botPolicies.json) is a bit more cohesive, but this should be more than enough for most users. - -If no rules match the request, it is allowed through. - -## Writing your own rules - -There are three actions that can be returned from a rule: - -| Action | Effects | -| :---------- | :-------------------------------------------------------------------------------- | -| `ALLOW` | Bypass all further checks and send the request to the backend. | -| `DENY` | Deny the request and send back an error message that scrapers think is a success. | -| `CHALLENGE` | Show a challenge page and/or validate that clients have passed a challenge. | - -Name your rules in lower case using kebab-case. Rule names will be exposed in Prometheus metrics. - -In case your service needs it for risk calculation reasons, Anubis exposes information about the rules that any requests match using a few headers: - -| Header | Explanation | Example | -| :---------------- | :--------------------------------------------------- | :--------------- | -| `X-Anubis-Rule` | The name of the rule that was matched | `bot/lightpanda` | -| `X-Anubis-Action` | The action that Anubis took in response to that rule | `CHALLENGE` | -| `X-Anubis-Status` | The status and how strict Anubis was in its checks | `PASS-FULL` | - -Policy rules are matched using [Go's standard library regular expressions package](https://pkg.go.dev/regexp). You can mess around with the syntax at [regex101.com](https://regex101.com), make sure to select the Golang option. diff --git a/docs/docusaurus.config.ts b/docs/docusaurus.config.ts index b98fb25..42eaa98 100644 --- a/docs/docusaurus.config.ts +++ b/docs/docusaurus.config.ts @@ -105,7 +105,7 @@ const config: Config = { }, { label: "Installation", - to: "/docs/installation", + to: "/docs/admin/installation", }, ], }, -- cgit v1.2.3