aboutsummaryrefslogtreecommitdiff
path: root/lib
diff options
context:
space:
mode:
authorXe Iaso <me@xeiaso.net>2025-04-22 07:47:33 -0400
committerXe Iaso <me@xeiaso.net>2025-04-22 13:47:09 -0400
commit7396ece1d77d5ad29b68a6357c6859ddcab84c7f (patch)
treec463f08568f25be3c8858780cca42432e2c428ef /lib
parent2db4105479e5920e983b15b0341104d6e572c1ea (diff)
downloadanubis-7396ece1d77d5ad29b68a6357c6859ddcab84c7f.tar.xz
anubis-7396ece1d77d5ad29b68a6357c6859ddcab84c7f.zip
feat(config): support importing bot policy snippets
This changes the grammar of the Anubis bot policy config to allow importing from internal shared rules or external rules on the filesystem. This lets you create a file at `/data/policies/block-evilbot.yaml` and then import it with: ```yaml bots: - import: /data/policies/block-evilbot.yaml ``` This also explodes the default policy file into a bunch of composable snippets. Thank you @Aibrew for your example gitea Atom / RSS feed rules! Signed-off-by: Xe Iaso <me@xeiaso.net>
Diffstat (limited to 'lib')
-rw-r--r--lib/policy/config/config.go170
-rw-r--r--lib/policy/config/config_test.go114
-rw-r--r--lib/policy/config/testdata/bad/import_invalid_file.json7
-rw-r--r--lib/policy/config/testdata/bad/import_invalid_file.yaml2
-rw-r--r--lib/policy/config/testdata/good/import_filesystem.json7
-rw-r--r--lib/policy/config/testdata/good/import_filesystem.yaml2
-rw-r--r--lib/policy/config/testdata/good/import_keep_internet_working.json7
-rw-r--r--lib/policy/config/testdata/good/import_keep_internet_working.yaml2
-rw-r--r--lib/policy/config/testdata/hack-test.yaml3
-rw-r--r--lib/policy/policy.go13
-rw-r--r--lib/policy/testdata/hack-test.yaml3
11 files changed, 314 insertions, 16 deletions
diff --git a/lib/policy/config/config.go b/lib/policy/config/config.go
index b3d5cac..d9883c5 100644
--- a/lib/policy/config/config.go
+++ b/lib/policy/config/config.go
@@ -3,8 +3,15 @@ package config
import (
"errors"
"fmt"
+ "io"
+ "io/fs"
"net"
+ "os"
"regexp"
+ "strings"
+
+ "github.com/TecharoHQ/anubis/data"
+ "k8s.io/apimachinery/pkg/util/yaml"
)
var (
@@ -17,6 +24,9 @@ var (
ErrInvalidPathRegex = errors.New("config.Bot: invalid path regex")
ErrInvalidHeadersRegex = errors.New("config.Bot: invalid headers regex")
ErrInvalidCIDR = errors.New("config.Bot: invalid CIDR")
+ ErrInvalidImportStatement = errors.New("config.ImportStatement: invalid source file")
+ ErrCantSetBotAndImportValuesAtOnce = errors.New("config.BotOrImport: can't set bot rules and import values at the same time")
+ ErrMustSetBotOrImportRules = errors.New("config.BotOrImport: rule definition is invalid, you must set either bot rules or an import statement")
)
type Rule string
@@ -47,6 +57,24 @@ type BotConfig struct {
Challenge *ChallengeRules `json:"challenge,omitempty"`
}
+func (b BotConfig) Zero() bool {
+ for _, cond := range []bool{
+ b.Name != "",
+ b.UserAgentRegex != nil,
+ b.PathRegex != nil,
+ len(b.HeadersRegex) != 0,
+ b.Action != "",
+ len(b.RemoteAddr) != 0,
+ b.Challenge != nil,
+ } {
+ if cond {
+ return false
+ }
+ }
+
+ return true
+}
+
func (b BotConfig) Valid() error {
var errs []error
@@ -151,9 +179,147 @@ func (cr ChallengeRules) Valid() error {
return nil
}
+type ImportStatement struct {
+ Import string `json:"import"`
+ Bots []BotConfig
+}
+
+func (is *ImportStatement) open() (fs.File, error) {
+ if strings.HasPrefix(is.Import, "(data)/") {
+ fname := strings.TrimPrefix(is.Import, "(data)/")
+ fin, err := data.BotPolicies.Open(fname)
+ return fin, err
+ }
+
+ return os.Open(is.Import)
+}
+
+func (is *ImportStatement) load() error {
+ fin, err := is.open()
+ if err != nil {
+ return fmt.Errorf("can't open %s: %w", is.Import, err)
+ }
+ defer fin.Close()
+
+ var result []BotConfig
+
+ if err := yaml.NewYAMLToJSONDecoder(fin).Decode(&result); err != nil {
+ return fmt.Errorf("can't parse %s: %w", is.Import, err)
+ }
+
+ var errs []error
+
+ for _, b := range result {
+ if err := b.Valid(); err != nil {
+ errs = append(errs, err)
+ }
+ }
+
+ if len(errs) != 0 {
+ return fmt.Errorf("config %s is not valid:\n%w", is.Import, errors.Join(errs...))
+ }
+
+ is.Bots = result
+
+ return nil
+}
+
+func (is *ImportStatement) Valid() error {
+ return is.load()
+}
+
+type BotOrImport struct {
+ *BotConfig `json:",inline"`
+ *ImportStatement `json:",inline"`
+}
+
+func (boi *BotOrImport) Valid() error {
+ if boi.BotConfig != nil && boi.ImportStatement != nil {
+ return errors.New("[unexpected] can't set bot config and import statement")
+ }
+
+ if boi.BotConfig != nil {
+ return boi.BotConfig.Valid()
+ }
+
+ if boi.ImportStatement != nil {
+ return boi.ImportStatement.Valid()
+ }
+
+ return ErrMustSetBotOrImportRules
+}
+
+type fileConfig struct {
+ Bots []BotOrImport `json:"bots"`
+ DNSBL bool `json:"dnsbl"`
+}
+
+func (c fileConfig) Valid() error {
+ var errs []error
+
+ if len(c.Bots) == 0 {
+ errs = append(errs, ErrNoBotRulesDefined)
+ }
+
+ for _, b := range c.Bots {
+ if err := b.Valid(); err != nil {
+ errs = append(errs, err)
+ }
+ }
+
+ if len(errs) != 0 {
+ return fmt.Errorf("config is not valid:\n%w", errors.Join(errs...))
+ }
+
+ return nil
+}
+
+func Load(fin io.Reader, fname string) (*Config, error) {
+ var c fileConfig
+ if err := yaml.NewYAMLToJSONDecoder(fin).Decode(&c); err != nil {
+ return nil, fmt.Errorf("can't parse policy config YAML %s: %w", fname, err)
+ }
+
+ if err := c.Valid(); err != nil {
+ return nil, err
+ }
+
+ result := &Config{
+ DNSBL: c.DNSBL,
+ }
+
+ var validationErrs []error
+
+ for _, boi := range c.Bots {
+ if boi.ImportStatement != nil {
+ if err := boi.load(); err != nil {
+ validationErrs = append(validationErrs, err)
+ continue
+ }
+
+ result.Bots = append(result.Bots, boi.ImportStatement.Bots...)
+ }
+
+ if boi.BotConfig != nil {
+ if err := boi.BotConfig.Valid(); err != nil {
+ validationErrs = append(validationErrs, err)
+ continue
+ }
+
+ result.Bots = append(result.Bots, *boi.BotConfig)
+ }
+ }
+
+ if len(validationErrs) > 0 {
+ return nil, fmt.Errorf("errors validating policy config %s: %w", fname, errors.Join(validationErrs...))
+ }
+
+ return result, nil
+}
+
type Config struct {
- Bots []BotConfig `json:"bots"`
- DNSBL bool `json:"dnsbl"`
+ Bots []BotConfig
+ DNSBL bool
}
func (c Config) Valid() error {
diff --git a/lib/policy/config/config_test.go b/lib/policy/config/config_test.go
index 4176126..86c490e 100644
--- a/lib/policy/config/config_test.go
+++ b/lib/policy/config/config_test.go
@@ -2,10 +2,12 @@ package config
import (
"errors"
+ "io/fs"
"os"
"path/filepath"
"testing"
+ "github.com/TecharoHQ/anubis/data"
"k8s.io/apimachinery/pkg/util/yaml"
)
@@ -219,13 +221,69 @@ func TestConfigValidKnownGood(t *testing.T) {
}
defer fin.Close()
- var c Config
- if err := yaml.NewYAMLToJSONDecoder(fin).Decode(&c); err != nil {
- t.Fatalf("can't decode file: %v", err)
+ c, err := Load(fin, st.Name())
+ if err != nil {
+ t.Fatal(err)
}
if err := c.Valid(); err != nil {
- t.Fatal(err)
+ t.Error(err)
+ }
+
+ if len(c.Bots) == 0 {
+ t.Error("wanted more than 0 bots, got zero")
+ }
+ })
+ }
+}
+
+func TestImportStatement(t *testing.T) {
+ type testCase struct {
+ name string
+ importPath string
+ err error
+ }
+
+ var tests []testCase
+
+ for _, folderName := range []string{
+ "apps",
+ "bots",
+ "common",
+ "crawlers",
+ } {
+ if err := fs.WalkDir(data.BotPolicies, folderName, func(path string, d fs.DirEntry, err error) error {
+ if err != nil {
+ return err
+ }
+ if d.IsDir() {
+ return nil
+ }
+
+ tests = append(tests, testCase{
+ name: "(data)/" + path,
+ importPath: "(data)/" + path,
+ err: nil,
+ })
+
+ return nil
+ }); err != nil {
+ t.Fatal(err)
+ }
+ }
+
+ for _, tt := range tests {
+ t.Run(tt.name, func(t *testing.T) {
+ is := &ImportStatement{
+ Import: tt.importPath,
+ }
+
+ if err := is.Valid(); err != nil {
+ t.Errorf("validation error: %v", err)
+ }
+
+ if len(is.Bots) == 0 {
+ t.Error("wanted bot definitions, but got none")
}
})
}
@@ -246,7 +304,7 @@ func TestConfigValidBad(t *testing.T) {
}
defer fin.Close()
- var c Config
+ var c fileConfig
if err := yaml.NewYAMLToJSONDecoder(fin).Decode(&c); err != nil {
t.Fatalf("can't decode file: %v", err)
}
@@ -259,3 +317,49 @@ func TestConfigValidBad(t *testing.T) {
})
}
}
+
+func TestBotConfigZero(t *testing.T) {
+ var b BotConfig
+ if !b.Zero() {
+ t.Error("zero value BotConfig is not zero value")
+ }
+
+ b.Name = "hi"
+ if b.Zero() {
+ t.Error("BotConfig with name is zero value")
+ }
+
+ b.UserAgentRegex = p(".*")
+ if b.Zero() {
+ t.Error("BotConfig with user agent regex is zero value")
+ }
+
+ b.PathRegex = p(".*")
+ if b.Zero() {
+ t.Error("BotConfig with path regex is zero value")
+ }
+
+ b.HeadersRegex = map[string]string{"hi": "there"}
+ if b.Zero() {
+ t.Error("BotConfig with headers regex is zero value")
+ }
+
+ b.Action = RuleAllow
+ if b.Zero() {
+ t.Error("BotConfig with action is zero value")
+ }
+
+ b.RemoteAddr = []string{"::/0"}
+ if b.Zero() {
+ t.Error("BotConfig with remote addresses is zero value")
+ }
+
+ b.Challenge = &ChallengeRules{
+ Difficulty: 4,
+ ReportAs: 4,
+ Algorithm: AlgorithmFast,
+ }
+ if b.Zero() {
+ t.Error("BotConfig with challenge rules is zero value")
+ }
+}
diff --git a/lib/policy/config/testdata/bad/import_invalid_file.json b/lib/policy/config/testdata/bad/import_invalid_file.json
new file mode 100644
index 0000000..c7546c0
--- /dev/null
+++ b/lib/policy/config/testdata/bad/import_invalid_file.json
@@ -0,0 +1,7 @@
+{
+ "bots": [
+ {
+ "import": "(data)/does-not-exist-fake-file.yaml"
+ }
+ ]
+} \ No newline at end of file
diff --git a/lib/policy/config/testdata/bad/import_invalid_file.yaml b/lib/policy/config/testdata/bad/import_invalid_file.yaml
new file mode 100644
index 0000000..df78c06
--- /dev/null
+++ b/lib/policy/config/testdata/bad/import_invalid_file.yaml
@@ -0,0 +1,2 @@
+bots:
+- import: (data)/does-not-exist-fake-file.yaml \ No newline at end of file
diff --git a/lib/policy/config/testdata/good/import_filesystem.json b/lib/policy/config/testdata/good/import_filesystem.json
new file mode 100644
index 0000000..31f3700
--- /dev/null
+++ b/lib/policy/config/testdata/good/import_filesystem.json
@@ -0,0 +1,7 @@
+{
+ "bots": [
+ {
+ "import": "./testdata/hack-test.yaml"
+ }
+ ]
+} \ No newline at end of file
diff --git a/lib/policy/config/testdata/good/import_filesystem.yaml b/lib/policy/config/testdata/good/import_filesystem.yaml
new file mode 100644
index 0000000..422ccc4
--- /dev/null
+++ b/lib/policy/config/testdata/good/import_filesystem.yaml
@@ -0,0 +1,2 @@
+bots:
+- import: ./testdata/hack-test.yaml \ No newline at end of file
diff --git a/lib/policy/config/testdata/good/import_keep_internet_working.json b/lib/policy/config/testdata/good/import_keep_internet_working.json
new file mode 100644
index 0000000..68ff2db
--- /dev/null
+++ b/lib/policy/config/testdata/good/import_keep_internet_working.json
@@ -0,0 +1,7 @@
+{
+ "bots": [
+ {
+ "import": "(data)/common/keep-internet-working.yaml"
+ }
+ ]
+} \ No newline at end of file
diff --git a/lib/policy/config/testdata/good/import_keep_internet_working.yaml b/lib/policy/config/testdata/good/import_keep_internet_working.yaml
new file mode 100644
index 0000000..923ffe3
--- /dev/null
+++ b/lib/policy/config/testdata/good/import_keep_internet_working.yaml
@@ -0,0 +1,2 @@
+bots:
+- import: (data)/common/keep-internet-working.yaml \ No newline at end of file
diff --git a/lib/policy/config/testdata/hack-test.yaml b/lib/policy/config/testdata/hack-test.yaml
new file mode 100644
index 0000000..cd4d7d0
--- /dev/null
+++ b/lib/policy/config/testdata/hack-test.yaml
@@ -0,0 +1,3 @@
+- name: well-known
+ path_regex: ^/.well-known/.*$
+ action: ALLOW \ No newline at end of file
diff --git a/lib/policy/policy.go b/lib/policy/policy.go
index 5923f16..705e184 100644
--- a/lib/policy/policy.go
+++ b/lib/policy/policy.go
@@ -7,7 +7,6 @@ import (
"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/client_golang/prometheus/promauto"
- "k8s.io/apimachinery/pkg/util/yaml"
"github.com/TecharoHQ/anubis/lib/policy/config"
)
@@ -20,26 +19,22 @@ var (
)
type ParsedConfig struct {
- orig config.Config
+ orig *config.Config
Bots []Bot
DNSBL bool
DefaultDifficulty int
}
-func NewParsedConfig(orig config.Config) *ParsedConfig {
+func NewParsedConfig(orig *config.Config) *ParsedConfig {
return &ParsedConfig{
orig: orig,
}
}
func ParseConfig(fin io.Reader, fname string, defaultDifficulty int) (*ParsedConfig, error) {
- var c config.Config
- if err := yaml.NewYAMLToJSONDecoder(fin).Decode(&c); err != nil {
- return nil, fmt.Errorf("can't parse policy config YAML %s: %w", fname, err)
- }
-
- if err := c.Valid(); err != nil {
+ c, err := config.Load(fin, fname)
+ if err != nil {
return nil, err
}
diff --git a/lib/policy/testdata/hack-test.yaml b/lib/policy/testdata/hack-test.yaml
new file mode 100644
index 0000000..cd4d7d0
--- /dev/null
+++ b/lib/policy/testdata/hack-test.yaml
@@ -0,0 +1,3 @@
+- name: well-known
+ path_regex: ^/.well-known/.*$
+ action: ALLOW \ No newline at end of file