diff options
| author | Yulian Kuncheff <670212+daegalus@users.noreply.github.com> | 2025-03-22 23:44:49 +0100 |
|---|---|---|
| committer | GitHub <noreply@github.com> | 2025-03-22 18:44:49 -0400 |
| commit | 6156d3d7293a1757725b1d36a89a61ede1ffe850 (patch) | |
| tree | 770109d49181f582fed54ca787ebf959791f1b56 /lib/policy | |
| parent | af6f05554fe8da112599f30d32524c28a4078cac (diff) | |
| download | anubis-6156d3d7293a1757725b1d36a89a61ede1ffe850.tar.xz anubis-6156d3d7293a1757725b1d36a89a61ede1ffe850.zip | |
Refactor and split out things into cmd and lib (#77)
* Refactor anubis to split business logic into a lib, and cmd to just be direct usage.
* Post-rebase fixes.
* Update changelog, remove unnecessary one.
* lib: refactor this
This is mostly based on my personal preferences for how Go code should
be laid out. I'm not sold on the package name "lib" (I'd call it anubis
but that would stutter), but people are probably gonna import it as
libanubis so it's likely fine.
Packages have been "flattened" to centralize implementation with area of
concern. This goes against the Java-esque style that many people like,
but I think this helps make things simple.
Most notably: the dnsbl client (which is a hack) is an internal package
until it's made more generic. Then it can be made external.
I also fixed the logic such that `go generate` works and rebased on
main.
* internal/test: run tests iff npx exists and DONT_USE_NETWORK is not set
Signed-off-by: Xe Iaso <me@xeiaso.net>
* internal/test: install deps
Signed-off-by: Xe Iaso <me@xeiaso.net>
* .github/workflows: verbose go tests?
Signed-off-by: Xe Iaso <me@xeiaso.net>
* internal/test: sleep 2
Signed-off-by: Xe Iaso <me@xeiaso.net>
* internal/test: nix this test so CI works
Signed-off-by: Xe Iaso <me@xeiaso.net>
* internal/test: warmup per browser?
Signed-off-by: Xe Iaso <me@xeiaso.net>
* internal/test: disable for now :(
Signed-off-by: Xe Iaso <me@xeiaso.net>
* lib/anubis: do not apply bot rules if address check fails
Closes #83
---------
Signed-off-by: Xe Iaso <me@xeiaso.net>
Co-authored-by: Xe Iaso <me@xeiaso.net>
Diffstat (limited to 'lib/policy')
| -rw-r--r-- | lib/policy/bot.go | 32 | ||||
| -rw-r--r-- | lib/policy/config/config.go | 162 | ||||
| -rw-r--r-- | lib/policy/config/config_test.go | 248 | ||||
| -rw-r--r-- | lib/policy/config/testdata/bad/badregexes.json | 14 | ||||
| -rw-r--r-- | lib/policy/config/testdata/bad/invalid.json | 5 | ||||
| -rw-r--r-- | lib/policy/config/testdata/bad/nobots.json | 1 | ||||
| -rw-r--r-- | lib/policy/config/testdata/good/allow_everyone.json | 12 | ||||
| -rw-r--r-- | lib/policy/config/testdata/good/challengemozilla.json | 9 | ||||
| -rw-r--r-- | lib/policy/config/testdata/good/everything_blocked.json | 10 | ||||
| -rw-r--r-- | lib/policy/policy.go | 122 | ||||
| -rw-r--r-- | lib/policy/policy_test.go | 68 |
11 files changed, 683 insertions, 0 deletions
diff --git a/lib/policy/bot.go b/lib/policy/bot.go new file mode 100644 index 0000000..d9ca135 --- /dev/null +++ b/lib/policy/bot.go @@ -0,0 +1,32 @@ +package policy + +import ( + "fmt" + "regexp" + + "github.com/TecharoHQ/anubis/internal" + "github.com/TecharoHQ/anubis/lib/policy/config" + "github.com/yl2chen/cidranger" +) + +type Bot struct { + Name string + UserAgent *regexp.Regexp + Path *regexp.Regexp + Action config.Rule `json:"action"` + Challenge *config.ChallengeRules + Ranger cidranger.Ranger +} + +func (b Bot) Hash() (string, error) { + var pathRex string + if b.Path != nil { + pathRex = b.Path.String() + } + var userAgentRex string + if b.UserAgent != nil { + userAgentRex = b.UserAgent.String() + } + + return internal.SHA256sum(fmt.Sprintf("%s::%s::%s", b.Name, pathRex, userAgentRex)), nil +} diff --git a/lib/policy/config/config.go b/lib/policy/config/config.go new file mode 100644 index 0000000..67eddbf --- /dev/null +++ b/lib/policy/config/config.go @@ -0,0 +1,162 @@ +package config + +import ( + "errors" + "fmt" + "net" + "regexp" +) + +var ( + ErrNoBotRulesDefined = errors.New("config: must define at least one (1) bot rule") + ErrBotMustHaveName = errors.New("config.Bot: must set name") + ErrBotMustHaveUserAgentOrPath = errors.New("config.Bot: must set either user_agent_regex, path_regex, or remote_addresses") + ErrBotMustHaveUserAgentOrPathNotBoth = errors.New("config.Bot: must set either user_agent_regex, path_regex, and not both") + ErrUnknownAction = errors.New("config.Bot: unknown action") + ErrInvalidUserAgentRegex = errors.New("config.Bot: invalid user agent regex") + ErrInvalidPathRegex = errors.New("config.Bot: invalid path regex") + ErrInvalidCIDR = errors.New("config.Bot: invalid CIDR") +) + +type Rule string + +const ( + RuleUnknown Rule = "" + RuleAllow Rule = "ALLOW" + RuleDeny Rule = "DENY" + RuleChallenge Rule = "CHALLENGE" +) + +type Algorithm string + +const ( + AlgorithmUnknown Algorithm = "" + AlgorithmFast Algorithm = "fast" + AlgorithmSlow Algorithm = "slow" +) + +type BotConfig struct { + Name string `json:"name"` + UserAgentRegex *string `json:"user_agent_regex"` + PathRegex *string `json:"path_regex"` + Action Rule `json:"action"` + RemoteAddr []string `json:"remote_addresses"` + Challenge *ChallengeRules `json:"challenge,omitempty"` +} + +func (b BotConfig) Valid() error { + var errs []error + + if b.Name == "" { + errs = append(errs, ErrBotMustHaveName) + } + + if b.UserAgentRegex == nil && b.PathRegex == nil && (b.RemoteAddr == nil || len(b.RemoteAddr) == 0) { + errs = append(errs, ErrBotMustHaveUserAgentOrPath) + } + + if b.UserAgentRegex != nil && b.PathRegex != nil { + errs = append(errs, ErrBotMustHaveUserAgentOrPathNotBoth) + } + + if b.UserAgentRegex != nil { + if _, err := regexp.Compile(*b.UserAgentRegex); err != nil { + errs = append(errs, ErrInvalidUserAgentRegex, err) + } + } + + if b.PathRegex != nil { + if _, err := regexp.Compile(*b.PathRegex); err != nil { + errs = append(errs, ErrInvalidPathRegex, err) + } + } + + if b.RemoteAddr != nil && len(b.RemoteAddr) > 0 { + for _, cidr := range b.RemoteAddr { + if _, _, err := net.ParseCIDR(cidr); err != nil { + errs = append(errs, ErrInvalidCIDR, err) + } + } + } + + switch b.Action { + case RuleAllow, RuleChallenge, RuleDeny: + // okay + default: + errs = append(errs, fmt.Errorf("%w: %q", ErrUnknownAction, b.Action)) + } + + if b.Action == RuleChallenge && b.Challenge != nil { + if err := b.Challenge.Valid(); err != nil { + errs = append(errs, err) + } + } + + if len(errs) != 0 { + return fmt.Errorf("config: bot entry for %q is not valid:\n%w", b.Name, errors.Join(errs...)) + } + + return nil +} + +type ChallengeRules struct { + Difficulty int `json:"difficulty"` + ReportAs int `json:"report_as"` + Algorithm Algorithm `json:"algorithm"` +} + +var ( + ErrChallengeRuleHasWrongAlgorithm = errors.New("config.Bot.ChallengeRules: algorithm is invalid") + ErrChallengeDifficultyTooLow = errors.New("config.Bot.ChallengeRules: difficulty is too low (must be >= 1)") + ErrChallengeDifficultyTooHigh = errors.New("config.Bot.ChallengeRules: difficulty is too high (must be <= 64)") +) + +func (cr ChallengeRules) Valid() error { + var errs []error + + if cr.Difficulty < 1 { + errs = append(errs, fmt.Errorf("%w, got: %d", ErrChallengeDifficultyTooLow, cr.Difficulty)) + } + + if cr.Difficulty > 64 { + errs = append(errs, fmt.Errorf("%w, got: %d", ErrChallengeDifficultyTooHigh, cr.Difficulty)) + } + + switch cr.Algorithm { + case AlgorithmFast, AlgorithmSlow, AlgorithmUnknown: + // do nothing, it's all good + default: + errs = append(errs, fmt.Errorf("%w: %q", ErrChallengeRuleHasWrongAlgorithm, cr.Algorithm)) + } + + if len(errs) != 0 { + return fmt.Errorf("config: challenge rules entry is not valid:\n%w", errors.Join(errs...)) + } + + return nil +} + +type Config struct { + Bots []BotConfig `json:"bots"` + DNSBL bool `json:"dnsbl"` +} + +func (c Config) Valid() error { + var errs []error + + if len(c.Bots) == 0 { + errs = append(errs, ErrNoBotRulesDefined) + } + + for _, b := range c.Bots { + if err := b.Valid(); err != nil { + errs = append(errs, err) + } + } + + if len(errs) != 0 { + return fmt.Errorf("config is not valid:\n%w", errors.Join(errs...)) + } + + return nil +} diff --git a/lib/policy/config/config_test.go b/lib/policy/config/config_test.go new file mode 100644 index 0000000..a169087 --- /dev/null +++ b/lib/policy/config/config_test.go @@ -0,0 +1,248 @@ +package config + +import ( + "encoding/json" + "errors" + "os" + "path/filepath" + "testing" +) + +func p[V any](v V) *V { return &v } + +func TestBotValid(t *testing.T) { + var tests = []struct { + name string + bot BotConfig + err error + }{ + { + name: "simple user agent", + bot: BotConfig{ + Name: "mozilla-ua", + Action: RuleChallenge, + UserAgentRegex: p("Mozilla"), + }, + err: nil, + }, + { + name: "simple path", + bot: BotConfig{ + Name: "well-known-path", + Action: RuleAllow, + PathRegex: p("^/.well-known/.*$"), + }, + err: nil, + }, + { + name: "no rule name", + bot: BotConfig{ + Action: RuleChallenge, + UserAgentRegex: p("Mozilla"), + }, + err: ErrBotMustHaveName, + }, + { + name: "no rule matcher", + bot: BotConfig{ + Name: "broken-rule", + Action: RuleAllow, + }, + err: ErrBotMustHaveUserAgentOrPath, + }, + { + name: "both user-agent and path", + bot: BotConfig{ + Name: "path-and-user-agent", + Action: RuleDeny, + UserAgentRegex: p("Mozilla"), + PathRegex: p("^/.secret-place/.*$"), + }, + err: ErrBotMustHaveUserAgentOrPathNotBoth, + }, + { + name: "unknown action", + bot: BotConfig{ + Name: "Unknown action", + Action: RuleUnknown, + UserAgentRegex: p("Mozilla"), + }, + err: ErrUnknownAction, + }, + { + name: "invalid user agent regex", + bot: BotConfig{ + Name: "mozilla-ua", + Action: RuleChallenge, + UserAgentRegex: p("a(b"), + }, + err: ErrInvalidUserAgentRegex, + }, + { + name: "invalid path regex", + bot: BotConfig{ + Name: "mozilla-ua", + Action: RuleChallenge, + PathRegex: p("a(b"), + }, + err: ErrInvalidPathRegex, + }, + { + name: "challenge difficulty too low", + bot: BotConfig{ + Name: "mozilla-ua", + Action: RuleChallenge, + PathRegex: p("Mozilla"), + Challenge: &ChallengeRules{ + Difficulty: 0, + ReportAs: 4, + Algorithm: "fast", + }, + }, + err: ErrChallengeDifficultyTooLow, + }, + { + name: "challenge difficulty too high", + bot: BotConfig{ + Name: "mozilla-ua", + Action: RuleChallenge, + PathRegex: p("Mozilla"), + Challenge: &ChallengeRules{ + Difficulty: 420, + ReportAs: 4, + Algorithm: "fast", + }, + }, + err: ErrChallengeDifficultyTooHigh, + }, + { + name: "challenge wrong algorithm", + bot: BotConfig{ + Name: "mozilla-ua", + Action: RuleChallenge, + PathRegex: p("Mozilla"), + Challenge: &ChallengeRules{ + Difficulty: 420, + ReportAs: 4, + Algorithm: "high quality rips", + }, + }, + err: ErrChallengeRuleHasWrongAlgorithm, + }, + { + name: "invalid cidr range", + bot: BotConfig{ + Name: "mozilla-ua", + Action: RuleAllow, + RemoteAddr: []string{"0.0.0.0/33"}, + }, + err: ErrInvalidCIDR, + }, + { + name: "only filter by IP range", + bot: BotConfig{ + Name: "mozilla-ua", + Action: RuleAllow, + RemoteAddr: []string{"0.0.0.0/0"}, + }, + err: nil, + }, + { + name: "filter by user agent and IP range", + bot: BotConfig{ + Name: "mozilla-ua", + Action: RuleAllow, + UserAgentRegex: p("Mozilla"), + RemoteAddr: []string{"0.0.0.0/0"}, + }, + err: nil, + }, + { + name: "filter by path and IP range", + bot: BotConfig{ + Name: "mozilla-ua", + Action: RuleAllow, + PathRegex: p("^.*$"), + RemoteAddr: []string{"0.0.0.0/0"}, + }, + err: nil, + }, + } + + for _, cs := range tests { + cs := cs + t.Run(cs.name, func(t *testing.T) { + err := cs.bot.Valid() + if err == nil && cs.err == nil { + return + } + + if err == nil && cs.err != nil { + t.Errorf("didn't get an error, but wanted: %v", cs.err) + } + + if !errors.Is(err, cs.err) { + t.Logf("got wrong error from Valid()") + t.Logf("wanted: %v", cs.err) + t.Logf("got: %v", err) + t.Errorf("got invalid error from check") + } + }) + } +} + +func TestConfigValidKnownGood(t *testing.T) { + finfos, err := os.ReadDir("testdata/good") + if err != nil { + t.Fatal(err) + } + + for _, st := range finfos { + st := st + t.Run(st.Name(), func(t *testing.T) { + fin, err := os.Open(filepath.Join("testdata", "good", st.Name())) + if err != nil { + t.Fatal(err) + } + defer fin.Close() + + var c Config + if err := json.NewDecoder(fin).Decode(&c); err != nil { + t.Fatalf("can't decode file: %v", err) + } + + if err := c.Valid(); err != nil { + t.Fatal(err) + } + }) + } +} + +func TestConfigValidBad(t *testing.T) { + finfos, err := os.ReadDir("testdata/bad") + if err != nil { + t.Fatal(err) + } + + for _, st := range finfos { + st := st + t.Run(st.Name(), func(t *testing.T) { + fin, err := os.Open(filepath.Join("testdata", "bad", st.Name())) + if err != nil { + t.Fatal(err) + } + defer fin.Close() + + var c Config + if err := json.NewDecoder(fin).Decode(&c); err != nil { + t.Fatalf("can't decode file: %v", err) + } + + if err := c.Valid(); err == nil { + t.Fatal("validation should have failed but didn't somehow") + } else { + t.Log(err) + } + }) + } +} diff --git a/lib/policy/config/testdata/bad/badregexes.json b/lib/policy/config/testdata/bad/badregexes.json new file mode 100644 index 0000000..e85b85b --- /dev/null +++ b/lib/policy/config/testdata/bad/badregexes.json @@ -0,0 +1,14 @@ +{ + "bots": [ + { + "name": "path-bad", + "path_regex": "a(b", + "action": "DENY" + }, + { + "name": "user-agent-bad", + "user_agent_regex": "a(b", + "action": "DENY" + } + ] +}
\ No newline at end of file diff --git a/lib/policy/config/testdata/bad/invalid.json b/lib/policy/config/testdata/bad/invalid.json new file mode 100644 index 0000000..c5d1ff6 --- /dev/null +++ b/lib/policy/config/testdata/bad/invalid.json @@ -0,0 +1,5 @@ +{ + "bots": [ + {} + ] +}
\ No newline at end of file diff --git a/lib/policy/config/testdata/bad/nobots.json b/lib/policy/config/testdata/bad/nobots.json new file mode 100644 index 0000000..9e26dfe --- /dev/null +++ b/lib/policy/config/testdata/bad/nobots.json @@ -0,0 +1 @@ +{}
\ No newline at end of file diff --git a/lib/policy/config/testdata/good/allow_everyone.json b/lib/policy/config/testdata/good/allow_everyone.json new file mode 100644 index 0000000..a7e1af7 --- /dev/null +++ b/lib/policy/config/testdata/good/allow_everyone.json @@ -0,0 +1,12 @@ +{ + "bots": [ + { + "name": "everyones-invited", + "remote_addresses": [ + "0.0.0.0/0", + "::/0" + ], + "action": "ALLOW" + } + ] +}
\ No newline at end of file diff --git a/lib/policy/config/testdata/good/challengemozilla.json b/lib/policy/config/testdata/good/challengemozilla.json new file mode 100644 index 0000000..e9d34ee --- /dev/null +++ b/lib/policy/config/testdata/good/challengemozilla.json @@ -0,0 +1,9 @@ +{ + "bots": [ + { + "name": "generic-browser", + "user_agent_regex": "Mozilla", + "action": "CHALLENGE" + } + ] +}
\ No newline at end of file diff --git a/lib/policy/config/testdata/good/everything_blocked.json b/lib/policy/config/testdata/good/everything_blocked.json new file mode 100644 index 0000000..e1763e4 --- /dev/null +++ b/lib/policy/config/testdata/good/everything_blocked.json @@ -0,0 +1,10 @@ +{ + "bots": [ + { + "name": "everything", + "user_agent_regex": ".*", + "action": "DENY" + } + ], + "dnsbl": false +}
\ No newline at end of file diff --git a/lib/policy/policy.go b/lib/policy/policy.go new file mode 100644 index 0000000..51b23ff --- /dev/null +++ b/lib/policy/policy.go @@ -0,0 +1,122 @@ +package policy + +import ( + "encoding/json" + "errors" + "fmt" + "io" + "net" + "regexp" + + "github.com/prometheus/client_golang/prometheus" + "github.com/prometheus/client_golang/prometheus/promauto" + "github.com/yl2chen/cidranger" + + "github.com/TecharoHQ/anubis/lib/policy/config" +) + +var ( + PolicyApplications = promauto.NewCounterVec(prometheus.CounterOpts{ + Name: "anubis_policy_results", + Help: "The results of each policy rule", + }, []string{"rule", "action"}) +) + +type ParsedConfig struct { + orig config.Config + + Bots []Bot + DNSBL bool + DefaultDifficulty int +} + +func NewParsedConfig(orig config.Config) *ParsedConfig { + return &ParsedConfig{ + orig: orig, + } +} + +func ParseConfig(fin io.Reader, fname string, defaultDifficulty int) (*ParsedConfig, error) { + var c config.Config + if err := json.NewDecoder(fin).Decode(&c); err != nil { + return nil, fmt.Errorf("can't parse policy config JSON %s: %w", fname, err) + } + + if err := c.Valid(); err != nil { + return nil, err + } + + var err error + + result := NewParsedConfig(c) + result.DefaultDifficulty = defaultDifficulty + + for _, b := range c.Bots { + if berr := b.Valid(); berr != nil { + err = errors.Join(err, berr) + continue + } + + var botParseErr error + parsedBot := Bot{ + Name: b.Name, + Action: b.Action, + } + + if b.RemoteAddr != nil && len(b.RemoteAddr) > 0 { + parsedBot.Ranger = cidranger.NewPCTrieRanger() + + for _, cidr := range b.RemoteAddr { + _, rng, err := net.ParseCIDR(cidr) + if err != nil { + return nil, fmt.Errorf("[unexpected] range %s not parsing: %w", cidr, err) + } + + parsedBot.Ranger.Insert(cidranger.NewBasicRangerEntry(*rng)) + } + } + + if b.UserAgentRegex != nil { + userAgent, err := regexp.Compile(*b.UserAgentRegex) + if err != nil { + botParseErr = errors.Join(botParseErr, fmt.Errorf("while compiling user agent regexp: %w", err)) + continue + } else { + parsedBot.UserAgent = userAgent + } + } + + if b.PathRegex != nil { + path, err := regexp.Compile(*b.PathRegex) + if err != nil { + botParseErr = errors.Join(botParseErr, fmt.Errorf("while compiling path regexp: %w", err)) + continue + } else { + parsedBot.Path = path + } + } + + if b.Challenge == nil { + parsedBot.Challenge = &config.ChallengeRules{ + Difficulty: defaultDifficulty, + ReportAs: defaultDifficulty, + Algorithm: config.AlgorithmFast, + } + } else { + parsedBot.Challenge = b.Challenge + if parsedBot.Challenge.Algorithm == config.AlgorithmUnknown { + parsedBot.Challenge.Algorithm = config.AlgorithmFast + } + } + + result.Bots = append(result.Bots, parsedBot) + } + + if err != nil { + return nil, fmt.Errorf("errors validating policy config JSON %s: %w", fname, err) + } + + result.DNSBL = c.DNSBL + + return result, nil +} diff --git a/lib/policy/policy_test.go b/lib/policy/policy_test.go new file mode 100644 index 0000000..16ca9c7 --- /dev/null +++ b/lib/policy/policy_test.go @@ -0,0 +1,68 @@ +package policy + +import ( + "os" + "path/filepath" + "testing" + + "github.com/TecharoHQ/anubis" + "github.com/TecharoHQ/anubis/data" +) + +func TestDefaultPolicyMustParse(t *testing.T) { + fin, err := data.BotPolicies.Open("botPolicies.json") + if err != nil { + t.Fatal(err) + } + defer fin.Close() + + if _, err := ParseConfig(fin, "botPolicies.json", anubis.DefaultDifficulty); err != nil { + t.Fatalf("can't parse config: %v", err) + } +} + +func TestGoodConfigs(t *testing.T) { + finfos, err := os.ReadDir("config/testdata/good") + if err != nil { + t.Fatal(err) + } + + for _, st := range finfos { + st := st + t.Run(st.Name(), func(t *testing.T) { + fin, err := os.Open(filepath.Join("config", "testdata", "good", st.Name())) + if err != nil { + t.Fatal(err) + } + defer fin.Close() + + if _, err := ParseConfig(fin, fin.Name(), anubis.DefaultDifficulty); err != nil { + t.Fatal(err) + } + }) + } +} + +func TestBadConfigs(t *testing.T) { + finfos, err := os.ReadDir("config/testdata/bad") + if err != nil { + t.Fatal(err) + } + + for _, st := range finfos { + st := st + t.Run(st.Name(), func(t *testing.T) { + fin, err := os.Open(filepath.Join("config", "testdata", "bad", st.Name())) + if err != nil { + t.Fatal(err) + } + defer fin.Close() + + if _, err := ParseConfig(fin, fin.Name(), anubis.DefaultDifficulty); err == nil { + t.Fatal(err) + } else { + t.Log(err) + } + }) + } +} |
