diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml new file mode 100644 index 0000000..7e0124a --- /dev/null +++ b/.github/workflows/ci.yaml @@ -0,0 +1,26 @@ +name: CI +# yamllint thinks the `on` key is being turned into `true` +# yamllint disable-line rule:truthy +on: [push] + +jobs: + test: + runs-on: ubuntu-latest + strategy: + matrix: + go-version: [ '1.19', '1.20', '1.21.x' ] + + steps: + - uses: actions/checkout@v4 + - name: Setup Go + uses: actions/setup-go@v5 + with: + go-version: ${{ matrix.go-version }} + - name: Run tests + run: go test -v . + lint-yaml: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + - name: Lint YAML + run: yamllint . diff --git a/.github/workflows/semver.yaml b/.github/workflows/semver.yaml new file mode 100644 index 0000000..909b4a5 --- /dev/null +++ b/.github/workflows/semver.yaml @@ -0,0 +1,20 @@ +name: Auto Semver +# yamllint thinks the `on` key is being turned into `true` +# yamllint disable-line rule:truthy +on: + pull_request: + types: + - closed + branches: + - main +jobs: + update: + if: github.event.pull_request.merged == true + runs-on: ubuntu-latest + steps: + - name: Checkout Repo + uses: actions/checkout@v4 + - name: Run Auto Semver + uses: discoverygarden/auto-semver@v1 + with: + token: ${{ secrets.GITHUB_TOKEN }} diff --git a/.yamllint.yaml b/.yamllint.yaml new file mode 100644 index 0000000..3576647 --- /dev/null +++ b/.yamllint.yaml @@ -0,0 +1,6 @@ +extends: default +rules: + document-start: disable + line-length: disable + brackets: + max-spaces-inside: 1 diff --git a/README.md b/README.md index b3329b3..84ef85d 100644 --- a/README.md +++ b/README.md @@ -23,3 +23,7 @@ spec: ## Blocklist The blocklists should be acccessible via http/s and be a plain text list of IP address or useragents. + +## Testing + +Running `go test` will run a set of unit tests. Running `docker compose up` will start an end to end testing environment where `allowed-*` containers should be able to make requests, while `blocked-*` containers should fail. diff --git a/botblocker.go b/botblocker.go index d27d021..1d695a4 100644 --- a/botblocker.go +++ b/botblocker.go @@ -4,6 +4,7 @@ import ( "bufio" "context" "fmt" + "io" "net/http" "net/netip" @@ -31,60 +32,103 @@ func CreateConfig() *Config { type BotBlocker struct { next http.Handler name string - ipBlocklist []netip.Addr + prefixBlocklist []netip.Prefix userAgentBlockList []string lastUpdated time.Time Config } -func (b *BotBlocker) Update() error { +func (b *BotBlocker) update() error { startTime := time.Now() - err := b.UpdateIps() + err := b.updateIps() if err != nil { - return fmt.Errorf("failed to update IP blocklists: %w", err) + return fmt.Errorf("failed to update CIDR blocklists: %w", err) } - err = b.UpdateUserAgents() + err = b.updateUserAgents() if err != nil { - return fmt.Errorf("failed to update IP blocklists: %w", err) + return fmt.Errorf("failed to update user agent blocklists: %w", err) } b.lastUpdated = time.Now() - duration := time.Now().Sub(startTime) - log.Info("Updated block lists. Blocked IPs: ", len(b.ipBlocklist), " Duration: ", duration) + duration := time.Since(startTime) + log.Info("Updated block lists. Blocked CIDRs: ", len(b.prefixBlocklist), " Duration: ", duration) return nil } -func (b *BotBlocker) UpdateIps() error { - ipBlockList := make([]netip.Addr, 0) +func (b *BotBlocker) updateIps() error { + prefixBlockList := make([]netip.Prefix, 0) - log.Info("Updating IP blocklist") + log.Info("Updating CIDR blocklist") for _, url := range b.IpBlocklistUrls { resp, err := http.Get(url) if err != nil { - return fmt.Errorf("failed fetch IP list: %w", err) + return fmt.Errorf("failed fetch CIDR list: %w", err) } if resp.StatusCode > 299 { - return fmt.Errorf("failed fetch IP list: received a %v from %v", resp.Status, url) + return fmt.Errorf("failed to fetch CIDR list: received a %v from %v", resp.Status, url) } - defer resp.Body.Close() - scanner := bufio.NewScanner(resp.Body) - for scanner.Scan() { - addrStr := scanner.Text() - addr, err := netip.ParseAddr(addrStr) + prefixes, err := readPrefixes(resp.Body) + if err != nil { + return fmt.Errorf("failed to update CIDRs: %e", err) + } + prefixBlockList = append(prefixBlockList, prefixes...) + } + + b.prefixBlocklist = prefixBlockList + + return nil +} + +func readPrefixes(prefixReader io.ReadCloser) ([]netip.Prefix, error) { + prefixes := make([]netip.Prefix, 0) + defer prefixReader.Close() + scanner := bufio.NewScanner(prefixReader) + for scanner.Scan() { + entry := strings.TrimSpace(scanner.Text()) + var prefix netip.Prefix + if strings.Contains(entry, "/") { + var err error + prefix, err = netip.ParsePrefix(entry) if err != nil { - return fmt.Errorf("failed to parse IP address: %w", err) + return []netip.Prefix{}, err + } + } else { + addr, err := netip.ParseAddr(entry) + if err != nil { + return []netip.Prefix{}, err + } + var bits int + if addr.Is4() { + bits = 32 + } else { + bits = 128 + } + prefix, err = addr.Prefix(bits) + if err != nil { + return []netip.Prefix{}, err } - ipBlockList = append(ipBlockList, addr) } + prefixes = append(prefixes, prefix) } - b.ipBlocklist = ipBlockList + return prefixes, nil +} - return nil +func readUserAgents(userAgentReader io.ReadCloser) ([]string, error) { + userAgents := make([]string, 0) + + defer userAgentReader.Close() + scanner := bufio.NewScanner(userAgentReader) + for scanner.Scan() { + agent := strings.ToLower(strings.TrimSpace(scanner.Text())) + userAgents = append(userAgents, agent) + } + + return userAgents, nil } -func (b *BotBlocker) UpdateUserAgents() error { +func (b *BotBlocker) updateUserAgents() error { userAgentBlockList := make([]string, 0) log.Info("Updating user agent blocklist") @@ -97,12 +141,11 @@ func (b *BotBlocker) UpdateUserAgents() error { return fmt.Errorf("failed fetch useragent list: received a %v from %v", resp.Status, url) } - defer resp.Body.Close() - scanner := bufio.NewScanner(resp.Body) - for scanner.Scan() { - agent := strings.ToLower(strings.TrimSpace(scanner.Text())) - userAgentBlockList = append(userAgentBlockList, agent) + agents, err := readUserAgents(resp.Body) + if err != nil { + return err } + userAgentBlockList = append(userAgentBlockList, agents...) } b.userAgentBlockList = userAgentBlockList @@ -122,7 +165,7 @@ func New(ctx context.Context, next http.Handler, config *Config, name string) (h next: next, Config: *config, } - err = blocker.Update() + err = blocker.update() if err != nil { return nil, fmt.Errorf("failed to update blocklists: %s", err) } @@ -130,41 +173,55 @@ func New(ctx context.Context, next http.Handler, config *Config, name string) (h } func (b *BotBlocker) ServeHTTP(rw http.ResponseWriter, req *http.Request) { - if time.Now().Sub(b.lastUpdated) > time.Duration(time.Hour) { - err := b.Update() + if time.Since(b.lastUpdated) > time.Hour { + err := b.update() if err != nil { log.Errorf("failed to update blocklist: %v", err) } } startTime := time.Now() - log.Debugf("Checking request: IP: \"%v\" user agent: \"%s\"", req.RemoteAddr, req.UserAgent()) + log.Debugf("Checking request: CIDR: \"%v\" user agent: \"%s\"", req.RemoteAddr, req.UserAgent()) + timer := func() { + log.Debugf("Checked request in %v", time.Since(startTime)) + } + defer timer() remoteAddrPort, err := netip.ParseAddrPort(req.RemoteAddr) if err != nil { http.Error(rw, "internal error", http.StatusInternalServerError) return } - remoteAddr := remoteAddrPort.Addr() + if b.shouldBlockIp(remoteAddrPort.Addr()) { + log.Infof("blocked request with from IP %v", remoteAddrPort.Addr()) + http.Error(rw, "blocked", http.StatusForbidden) + return + } + + agent := strings.ToLower(req.UserAgent()) + if b.shouldBlockAgent(agent) { + log.Infof("blocked request with user agent %v because it contained %v", agent, agent) + http.Error(rw, "blocked", http.StatusForbidden) + return + } + + b.next.ServeHTTP(rw, req) +} - for _, badIP := range b.ipBlocklist { - if remoteAddr == badIP { - log.Infof("blocked request with from IP %v", remoteAddrPort.Addr()) - log.Debugf("Checked request in %v", time.Now().Sub(startTime)) - http.Error(rw, "blocked", http.StatusForbidden) - return +func (b *BotBlocker) shouldBlockIp(addr netip.Addr) bool { + for _, badPrefix := range b.prefixBlocklist { + if badPrefix.Contains(addr) { + return true } } + return false +} - agent := strings.ToLower(req.UserAgent()) +func (b *BotBlocker) shouldBlockAgent(userAgent string) bool { + userAgent = strings.ToLower(strings.TrimSpace(userAgent)) for _, badAgent := range b.userAgentBlockList { - if strings.Contains(agent, badAgent) { - log.Infof("blocked request with user agent %v because it contained %v", agent, badAgent) - log.Debugf("Checked request in %v", time.Now().Sub(startTime)) - http.Error(rw, "blocked", http.StatusForbidden) - return + if strings.Contains(userAgent, badAgent) { + return true } } - - log.Debugf("Checked request in %v", time.Now().Sub(startTime)) - b.next.ServeHTTP(rw, req) + return false } diff --git a/botblocker_test.go b/botblocker_test.go new file mode 100644 index 0000000..fd3f23a --- /dev/null +++ b/botblocker_test.go @@ -0,0 +1,178 @@ +package traefik_ultimate_bad_bot_blocker + +import ( + "net/netip" + "os" + "testing" +) + +func equalStrings(a, b []string) bool { + if len(a) != len(b) { + return false + } + for i := 0; i < len(a); i++ { + if a[i] != b[i] { + return false + } + } + return true +} + +func equalPrefixes(a, b []netip.Prefix) bool { + if len(a) != len(b) { + return false + } + for i := 0; i < len(a); i++ { + if a[i] != b[i] { + return false + } + } + return true +} + +func TestReadIps(t *testing.T) { + f, err := os.Open("fixtures/lists/ip-blocklist") + if err != nil { + t.Fatal("Failed to open testfile") + } + + expected := []netip.Prefix{ + netip.PrefixFrom( + netip.AddrFrom4([4]byte{10, 10, 10, 2}), + 32, + ), + netip.PrefixFrom( + netip.AddrFrom4([4]byte{192, 168, 1, 1}), + 32, + ), + netip.PrefixFrom( + netip.AddrFrom4([4]byte{10, 10, 20, 0}), + 24, + ), + netip.PrefixFrom( + netip.AddrFrom16([16]byte{0x20, 0x01, 0xd, 0xb8, 0x33, 0x33, 0x44, 0x44, 0x55, 0x55, 0x66, 0x66, 0x77, 0x77, 0x88, 0x88}), + 128, + ), + } + prefixes, err := readPrefixes(f) + if !equalPrefixes(prefixes, expected) || err != nil { + t.Fatalf("readPrefixes(f) = %v, %e; want %v, ", prefixes, err, expected) + } +} + +func TestReadUserAgents(t *testing.T) { + f, err := os.Open("fixtures/lists/useragent-blocklist") + if err != nil { + t.Fatal("Failed to open testfile") + } + + expected := []string{"nintendobrowser", "claudebot"} + userAgents, err := readUserAgents(f) + if !equalStrings(userAgents, expected) || err != nil { + t.Fatalf("readUserAgents(f) = %v, %e; want %v, ", userAgents, err, expected) + } +} + +func TestShouldBlockIp(t *testing.T) { + botBlocker := BotBlocker{ + prefixBlocklist: []netip.Prefix{ + netip.PrefixFrom( + netip.AddrFrom4([4]byte{10, 10, 10, 2}), + 32, + ), + netip.PrefixFrom( + netip.AddrFrom4([4]byte{192, 168, 1, 1}), + 32, + ), + }, + } + badIp := netip.AddrFrom4([4]byte{10, 10, 10, 2}) + + blocked := botBlocker.shouldBlockIp(badIp) + if !blocked { + t.Fatalf("botBlocker.shouldBlockIp(%v) = %t; want true", badIp, blocked) + } +} + +func TestShouldAllowIp(t *testing.T) { + botBlocker := BotBlocker{ + prefixBlocklist: []netip.Prefix{ + netip.PrefixFrom( + netip.AddrFrom4([4]byte{10, 10, 10, 2}), + 32, + ), + netip.PrefixFrom( + netip.AddrFrom4([4]byte{192, 168, 1, 1}), + 32, + ), + }, + } + ip := netip.AddrFrom4([4]byte{10, 10, 10, 2}) + + blocked := botBlocker.shouldBlockIp(ip) + if !blocked { + t.Fatalf("botBlocker.shouldBlockIp(%v) = %t; want false", ip, blocked) + } +} + +func TestShouldBlockIpCidr(t *testing.T) { + botBlocker := BotBlocker{ + prefixBlocklist: []netip.Prefix{ + netip.PrefixFrom( + netip.AddrFrom4([4]byte{10, 10, 10, 0}), + 24, + ), + }, + } + badIp := netip.AddrFrom4([4]byte{10, 10, 10, 2}) + + blocked := botBlocker.shouldBlockIp(badIp) + if !blocked { + t.Fatalf("botBlocker.shouldBlockIp(%v) = %t; want true", badIp, blocked) + } +} + +func TestShouldAllowIpCidr(t *testing.T) { + botBlocker := BotBlocker{ + prefixBlocklist: []netip.Prefix{ + netip.PrefixFrom( + netip.AddrFrom4([4]byte{10, 10, 10, 0}), + 24, + ), + }, + } + goodIp := netip.AddrFrom4([4]byte{10, 10, 20, 2}) + + blocked := botBlocker.shouldBlockIp(goodIp) + if blocked { + t.Fatalf("botBlocker.shouldBlockIp(%v) = %t; want false", goodIp, blocked) + } +} + +func TestShouldBlockUserAgent(t *testing.T) { + botBlocker := BotBlocker{ + userAgentBlockList: []string{ + "nintendobrowser", + }, + } + badUserAgent := "Mozilla/5.0 (Nintendo WiiU) AppleWebKit/536.30 (KHTML, like Gecko) NX/3.0.4.2.12 NintendoBrowser/4.3.1.11264.US" + + blocked := botBlocker.shouldBlockAgent(badUserAgent) + if !blocked { + t.Fatalf("botBlocker.shouldBlockAgent(%s) = %t; want true", badUserAgent, blocked) + } +} + +func TestShouldAlowUserAgent(t *testing.T) { + botBlocker := BotBlocker{ + userAgentBlockList: []string{ + "nintendobrowser", + }, + } + userAgent := "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/126.0.0.0 Safari/537.36" + + blocked := botBlocker.shouldBlockAgent(userAgent) + if blocked { + t.Fatalf("botBlocker.shouldBlockAgent(%s) = %t; want false", userAgent, blocked) + } +} diff --git a/docker-compose.yml b/docker-compose.yml index 058cec6..7793fc8 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -2,7 +2,7 @@ name: traefik-bad-bot-blocker services: traefik: image: traefik:3.0 - command: + command: - "--providers.docker=true" - "--providers.docker.exposedbydefault=false" - "--entryPoints.web.address=:80" @@ -33,14 +33,14 @@ services: image: quay.io/curl/curl external_links: - traefik:whoami.example.com - entrypoint: - - /bin/sh - - -c - - | - while true; do - curl whoami.example.com 2>/dev/null | grep X-Real-Ip - sleep 5 - done + entrypoint: + - /bin/sh + - -c + - | + while true; do + curl whoami.example.com 2>/dev/null | grep X-Real-Ip + sleep 5 + done networks: test: ipv4_address: 10.10.10.1 @@ -48,14 +48,14 @@ services: image: quay.io/curl/curl external_links: - traefik:whoami.example.com - entrypoint: - - /bin/sh - - -c - - | - while true; do - curl whoami.example.com 2>/dev/null - sleep 5 - done + entrypoint: + - /bin/sh + - -c + - | + while true; do + curl whoami.example.com 2>/dev/null + sleep 5 + done networks: test: ipv4_address: 10.10.10.2 @@ -63,17 +63,32 @@ services: image: quay.io/curl/curl external_links: - traefik:whoami.example.com - entrypoint: - - /bin/sh - - -c - - | - while true; do - curl -A 'Mozilla/5.0 (Nintendo WiiU) AppleWebKit/536.30 (KHTML, like Gecko) NX/3.0.4.2.12 NintendoBrowser/4.3.1.11264.US' whoami.example.com 2>/dev/null - sleep 5 - done + entrypoint: + - /bin/sh + - -c + - | + while true; do + curl -A 'Mozilla/5.0 (Nintendo WiiU) AppleWebKit/536.30 (KHTML, like Gecko) NX/3.0.4.2.12 NintendoBrowser/4.3.1.11264.US' whoami.example.com 2>/dev/null + sleep 5 + done networks: test: ipv4_address: 10.10.10.3 + blocked-cidr: + image: quay.io/curl/curl + external_links: + - traefik:whoami.example.com + entrypoint: + - /bin/sh + - -c + - | + while true; do + curl whoami.example.com 2>/dev/null + sleep 5 + done + networks: + test: + ipv4_address: 10.10.20.2 lists: image: nginx volumes: diff --git a/fixtures/lists/ip-blocklist b/fixtures/lists/ip-blocklist index b5a79ba..8387ecd 100644 --- a/fixtures/lists/ip-blocklist +++ b/fixtures/lists/ip-blocklist @@ -1 +1,4 @@ 10.10.10.2 + 192.168.1.1/32 +10.10.20.0/24 +2001:db8:3333:4444:5555:6666:7777:8888 diff --git a/fixtures/lists/useragent-blocklist b/fixtures/lists/useragent-blocklist index 21bb7c3..9fc402a 100644 --- a/fixtures/lists/useragent-blocklist +++ b/fixtures/lists/useragent-blocklist @@ -1 +1,2 @@ nintendobrowser +claudebot diff --git a/go.mod b/go.mod index 9ef6d42..921b207 100644 --- a/go.mod +++ b/go.mod @@ -1,3 +1,3 @@ module github.com/discoverygarden/traefik-ultimate-bad-bot-blocker -go 1.21.4 +go 1.19