From ef52550e70a44c318dca8a406750589f67fac0eb Mon Sep 17 00:00:00 2001 From: Xe Iaso Date: Sat, 26 Apr 2025 10:01:15 -0400 Subject: fix(config): remove trailing newlines in regexes (#373) Closes #372 Fun YAML fact of the day: What is the difference between how these two expressions are parsed? ```yaml foo: > bar ``` ```yaml foo: >- bar ``` They are invisible in yaml, but when you evaluate them to JSON the difference is obvious: ```json { "foo": "bar\n" } ``` ```json { "foo": "bar" } ``` User-Agent strings, URL path values, and HTTP headers _do_ end in newlines in HTTP/1.1 wire form, but that newline is usually stripped before the server actually handles it. Also HTTP/2 is a thing and does not terminate header values with newlines. This change makes Anubis more aggressively detect mistaken uses of the yaml `>` operator and nudges the user into using the yaml `>-` operator which does not append the trailing newline. I had honestly forgotten about this YAML behavior because it wasn't relevant for so long. Oops! Glad I released a beta. Whenever you get into this state, Anubis will throw a config parsing error and then give you a message hinting at the folly of your ways. ``` config.Bot: regular expression ends with newline (try >- instead of > in yaml) ``` Big thanks to https://yaml-multiline.info, this helped me realize my folly instantly. @aiverson, this is official permission to say "told you so". Signed-off-by: Xe Iaso --- lib/policy/checker.go | 12 ++++++------ lib/policy/config/config.go | 13 +++++++++++++ .../config/testdata/bad/regex_ends_newline.json | 21 +++++++++++++++++++++ .../config/testdata/bad/regex_ends_newline.yaml | 17 +++++++++++++++++ 4 files changed, 57 insertions(+), 6 deletions(-) create mode 100644 lib/policy/config/testdata/bad/regex_ends_newline.json create mode 100644 lib/policy/config/testdata/bad/regex_ends_newline.yaml (limited to 'lib') diff --git a/lib/policy/checker.go b/lib/policy/checker.go index ad98ced..51223cd 100644 --- a/lib/policy/checker.go +++ b/lib/policy/checker.go @@ -110,11 +110,11 @@ func NewUserAgentChecker(rexStr string) (Checker, error) { } func NewHeaderMatchesChecker(header, rexStr string) (Checker, error) { - rex, err := regexp.Compile(rexStr) + rex, err := regexp.Compile(strings.TrimSpace(rexStr)) if err != nil { return nil, fmt.Errorf("%w: regex %s failed parse: %w", ErrMisconfiguration, rexStr, err) } - return &HeaderMatchesChecker{header, rex, internal.SHA256sum(header + ": " + rexStr)}, nil + return &HeaderMatchesChecker{strings.TrimSpace(header), rex, internal.SHA256sum(header + ": " + rexStr)}, nil } func (hmc *HeaderMatchesChecker) Check(r *http.Request) (bool, error) { @@ -135,7 +135,7 @@ type PathChecker struct { } func NewPathChecker(rexStr string) (Checker, error) { - rex, err := regexp.Compile(rexStr) + rex, err := regexp.Compile(strings.TrimSpace(rexStr)) if err != nil { return nil, fmt.Errorf("%w: regex %s failed parse: %w", ErrMisconfiguration, rexStr, err) } @@ -155,7 +155,7 @@ func (pc *PathChecker) Hash() string { } func NewHeaderExistsChecker(key string) Checker { - return headerExistsChecker{key} + return headerExistsChecker{strings.TrimSpace(key)} } type headerExistsChecker struct { @@ -180,11 +180,11 @@ func NewHeadersChecker(headermap map[string]string) (Checker, error) { for key, rexStr := range headermap { if rexStr == ".*" { - result = append(result, headerExistsChecker{key}) + result = append(result, headerExistsChecker{strings.TrimSpace(key)}) continue } - rex, err := regexp.Compile(rexStr) + rex, err := regexp.Compile(strings.TrimSpace(rexStr)) if err != nil { errs = append(errs, fmt.Errorf("while compiling header %s regex %s: %w", key, rexStr, err)) continue diff --git a/lib/policy/config/config.go b/lib/policy/config/config.go index 627e9cf..c670bac 100644 --- a/lib/policy/config/config.go +++ b/lib/policy/config/config.go @@ -24,6 +24,7 @@ var ( ErrInvalidPathRegex = errors.New("config.Bot: invalid path regex") ErrInvalidHeadersRegex = errors.New("config.Bot: invalid headers regex") ErrInvalidCIDR = errors.New("config.Bot: invalid CIDR") + ErrRegexEndsWithNewline = errors.New("config.Bot: regular expression ends with newline (try >- instead of > in yaml)") ErrInvalidImportStatement = errors.New("config.ImportStatement: invalid source file") ErrCantSetBotAndImportValuesAtOnce = errors.New("config.BotOrImport: can't set bot rules and import values at the same time") ErrMustSetBotOrImportRules = errors.New("config.BotOrImport: rule definition is invalid, you must set either bot rules or an import statement, not both") @@ -91,12 +92,20 @@ func (b BotConfig) Valid() error { } if b.UserAgentRegex != nil { + if strings.HasSuffix(*b.UserAgentRegex, "\n") { + errs = append(errs, fmt.Errorf("%w: user agent regex: %q", ErrRegexEndsWithNewline, *b.UserAgentRegex)) + } + if _, err := regexp.Compile(*b.UserAgentRegex); err != nil { errs = append(errs, ErrInvalidUserAgentRegex, err) } } if b.PathRegex != nil { + if strings.HasSuffix(*b.PathRegex, "\n") { + errs = append(errs, fmt.Errorf("%w: path regex: %q", ErrRegexEndsWithNewline, *b.PathRegex)) + } + if _, err := regexp.Compile(*b.PathRegex); err != nil { errs = append(errs, ErrInvalidPathRegex, err) } @@ -108,6 +117,10 @@ func (b BotConfig) Valid() error { continue } + if strings.HasSuffix(expr, "\n") { + errs = append(errs, fmt.Errorf("%w: header %s regex: %q", ErrRegexEndsWithNewline, name, expr)) + } + if _, err := regexp.Compile(expr); err != nil { errs = append(errs, ErrInvalidHeadersRegex, err) } diff --git a/lib/policy/config/testdata/bad/regex_ends_newline.json b/lib/policy/config/testdata/bad/regex_ends_newline.json new file mode 100644 index 0000000..14c7fa9 --- /dev/null +++ b/lib/policy/config/testdata/bad/regex_ends_newline.json @@ -0,0 +1,21 @@ +{ + "bots": [ + { + "name": "user-agent-ends-newline", + "user_agent_regex": "Mozilla\n", + "action": "CHALLENGE" + }, + { + "name": "path-ends-newline", + "path_regex": "^/evil/.*$\n", + "action": "CHALLENGE" + }, + { + "name": "headers-ends-newline", + "headers_regex": { + "CF-Worker": ".*\n" + }, + "action": "CHALLENGE" + } + ] +} diff --git a/lib/policy/config/testdata/bad/regex_ends_newline.yaml b/lib/policy/config/testdata/bad/regex_ends_newline.yaml new file mode 100644 index 0000000..1f0ae85 --- /dev/null +++ b/lib/policy/config/testdata/bad/regex_ends_newline.yaml @@ -0,0 +1,17 @@ +bots: +- name: user-agent-ends-newline + # Subtle bug: this ends with a newline + user_agent_regex: > + Mozilla + action: CHALLENGE +- name: path-ends-newline + # Subtle bug: this ends with a newline + path_regex: > + ^/evil/.*$ + action: CHALLENGE +- name: headers-ends-newline + # Subtle bug: this ends with a newline + headers_regex: + CF-Worker: > + .* + action: CHALLENGE \ No newline at end of file -- cgit v1.2.3