From 21972e49cbbe1d2f1ca1c9033ae9510ba7cf3d35 Mon Sep 17 00:00:00 2001 From: Ainar Garipov Date: Thu, 20 May 2021 13:42:35 +0300 Subject: [PATCH 1/2] Pull request: querylog: imp perf Merge in DNS/adguard-home from contains-fold to master Squashed commit of the following: commit 45c79b4b7618c8f3108766cc776b5bd3f0571761 Author: Ainar Garipov Date: Wed May 19 21:26:09 2021 +0300 querylog: imp perf --- internal/querylog/searchcriterion.go | 50 ++++++--- internal/querylog/searchcriterion_test.go | 121 ++++++++++++++++++++++ 2 files changed, 158 insertions(+), 13 deletions(-) create mode 100644 internal/querylog/searchcriterion_test.go diff --git a/internal/querylog/searchcriterion.go b/internal/querylog/searchcriterion.go index c0aca44f23d..725a36fe8eb 100644 --- a/internal/querylog/searchcriterion.go +++ b/internal/querylog/searchcriterion.go @@ -2,6 +2,8 @@ package querylog import ( "strings" + "unicode" + "unicode/utf8" "github.com/AdguardTeam/AdGuardHome/internal/dnsfilter" ) @@ -63,6 +65,37 @@ func (c *searchCriterion) ctDomainOrClientCaseStrict( strings.EqualFold(name, term) } +// containsFold reports whehter s contains, ignoring letter case, substr. +// +// TODO(a.garipov): Move to aghstrings if needed elsewhere. +func containsFold(s, substr string) (ok bool) { + sLen, substrLen := len(s), len(substr) + if sLen < substrLen { + return false + } + + if sLen == substrLen { + return strings.EqualFold(s, substr) + } + + first, _ := utf8.DecodeRuneInString(substr) + firstFolded := unicode.SimpleFold(first) + + for i := 0; i != -1 && len(s) >= len(substr); { + if strings.EqualFold(s[:substrLen], substr) { + return true + } + + i = strings.IndexFunc(s[1:], func(r rune) (eq bool) { + return r == first || r == firstFolded + }) + + s = s[1+i:] + } + + return false +} + func (c *searchCriterion) ctDomainOrClientCaseNonStrict( term string, clientID string, @@ -70,19 +103,10 @@ func (c *searchCriterion) ctDomainOrClientCaseNonStrict( host string, ip string, ) (ok bool) { - // TODO(a.garipov): Write a performant, case-insensitive version of - // strings.Contains instead of generating garbage. Or, perhaps in the - // future, use a locale-appropriate matcher from golang.org/x/text. - clientID = strings.ToLower(clientID) - host = strings.ToLower(host) - ip = strings.ToLower(ip) - name = strings.ToLower(name) - term = strings.ToLower(term) - - return strings.Contains(clientID, term) || - strings.Contains(host, term) || - strings.Contains(ip, term) || - strings.Contains(name, term) + return containsFold(clientID, term) || + containsFold(host, term) || + containsFold(ip, term) || + containsFold(name, term) } // quickMatch quickly checks if the line matches the given search criterion. diff --git a/internal/querylog/searchcriterion_test.go b/internal/querylog/searchcriterion_test.go new file mode 100644 index 00000000000..65ee6645b44 --- /dev/null +++ b/internal/querylog/searchcriterion_test.go @@ -0,0 +1,121 @@ +package querylog + +import ( + "strings" + "testing" + + "github.com/stretchr/testify/assert" +) + +func TestContainsFold(t *testing.T) { + testCases := []struct { + name string + inS string + inSubstr string + want bool + }{{ + name: "empty", + inS: "", + inSubstr: "", + want: true, + }, { + name: "shorter", + inS: "a", + inSubstr: "abc", + want: false, + }, { + name: "same_len_true", + inS: "abc", + inSubstr: "abc", + want: true, + }, { + name: "same_len_true_fold", + inS: "abc", + inSubstr: "aBc", + want: true, + }, { + name: "same_len_false", + inS: "abc", + inSubstr: "def", + want: false, + }, { + name: "longer_true", + inS: "abcdedef", + inSubstr: "def", + want: true, + }, { + name: "longer_false", + inS: "abcded", + inSubstr: "ghi", + want: false, + }, { + name: "longer_true_fold", + inS: "abcdedef", + inSubstr: "dEf", + want: true, + }, { + name: "longer_false_fold", + inS: "abcded", + inSubstr: "gHi", + want: false, + }, { + name: "longer_true_cyr_fold", + inS: "абвгдедеё", + inSubstr: "дЕЁ", + want: true, + }, { + name: "longer_false_cyr_fold", + inS: "абвгдедеё", + inSubstr: "жЗИ", + want: false, + }, { + name: "no_letters_true", + inS: "1.2.3.4", + inSubstr: "2.3.4", + want: true, + }, { + name: "no_letters_false", + inS: "1.2.3.4", + inSubstr: "2.3.5", + want: false, + }} + + for _, tc := range testCases { + t.Run(tc.name, func(t *testing.T) { + if tc.want { + assert.True(t, containsFold(tc.inS, tc.inSubstr)) + } else { + assert.False(t, containsFold(tc.inS, tc.inSubstr)) + } + }) + } +} + +var sink bool + +func BenchmarkContainsFold(b *testing.B) { + const s = "aaahBbBhccchDDDeEehFfFhGGGhHhh" + const substr = "HHH" + + // Compare our implementation of containsFold against a stupid solution + // of calling strings.ToLower and strings.Contains. + b.Run("containsfold", func(b *testing.B) { + b.ReportAllocs() + + for i := 0; i < b.N; i++ { + sink = containsFold(s, substr) + } + + assert.True(b, sink) + }) + + b.Run("tolower_contains", func(b *testing.B) { + b.ReportAllocs() + + for i := 0; i < b.N; i++ { + sink = strings.Contains(strings.ToLower(s), strings.ToLower(substr)) + } + + assert.True(b, sink) + }) +} From 9c60aef6371c608d715f3ded92afa91007397c48 Mon Sep 17 00:00:00 2001 From: Ainar Garipov Date: Thu, 20 May 2021 14:22:06 +0300 Subject: [PATCH 2/2] Pull request: home: imp whois parse Updates #2646. Squashed commit of the following: commit 0a5ff6ae74c532a296c0594a598a99c7cfaccf8c Author: Ainar Garipov Date: Thu May 20 14:07:08 2021 +0300 home: imp code commit 2af0f463a77b81e827d9faca079a19c5437e1cd9 Author: Ainar Garipov Date: Thu May 20 13:48:08 2021 +0300 home: imp whois parse --- internal/home/whois.go | 93 +++++++++++++++++++++---------------- internal/home/whois_test.go | 74 +++++++++++++++++++++++++++++ 2 files changed, 128 insertions(+), 39 deletions(-) diff --git a/internal/home/whois.go b/internal/home/whois.go index 7c77a90350b..78721edd07d 100644 --- a/internal/home/whois.go +++ b/internal/home/whois.go @@ -10,7 +10,6 @@ import ( "time" "github.com/AdguardTeam/AdGuardHome/internal/aghio" - "github.com/AdguardTeam/AdGuardHome/internal/aghstrings" "github.com/AdguardTeam/golibs/cache" "github.com/AdguardTeam/golibs/log" ) @@ -66,55 +65,71 @@ func trimValue(s string) string { return s[:maxValueLength-3] + "..." } -// Parse plain-text data from the response -func whoisParse(data string) map[string]string { - m := map[string]string{} - descr := "" - netname := "" - for len(data) != 0 { - ln := aghstrings.SplitNext(&data, '\n') - if len(ln) == 0 || ln[0] == '#' || ln[0] == '%' { +// coalesceStr returns the first non-empty string. +// +// TODO(a.garipov): Move to aghstrings? +func coalesceStr(strs ...string) (res string) { + for _, s := range strs { + if s != "" { + return s + } + } + + return "" +} + +// isWhoisComment returns true if the string is empty or is a WHOIS comment. +func isWhoisComment(s string) (ok bool) { + return len(s) == 0 || s[0] == '#' || s[0] == '%' +} + +// strmap is an alias for convenience. +type strmap = map[string]string + +// whoisParse parses a subset of plain-text data from the WHOIS response into +// a string map. +func whoisParse(data string) (m strmap) { + m = strmap{} + + var orgname string + lines := strings.Split(data, "\n") + for _, l := range lines { + if isWhoisComment(l) { continue } - kv := strings.SplitN(ln, ":", 2) + kv := strings.SplitN(l, ":", 2) if len(kv) != 2 { continue } - k := strings.TrimSpace(kv[0]) - k = strings.ToLower(k) + + k := strings.ToLower(strings.TrimSpace(kv[0])) v := strings.TrimSpace(kv[1]) + if v == "" { + continue + } switch k { - case "org-name": - m["orgname"] = trimValue(v) - case "city", "country", "orgname": - m[k] = trimValue(v) - case "descr": - if len(descr) == 0 { - descr = v - } - case "netname": - netname = v - case "whois": // "whois: whois.arin.net" - m["whois"] = v - case "referralserver": // "ReferralServer: whois://whois.ripe.net" - if strings.HasPrefix(v, "whois://") { - m["whois"] = v[len("whois://"):] - } + case "orgname", "org-name": + k = "orgname" + v = trimValue(v) + orgname = v + case "city", "country": + v = trimValue(v) + case "descr", "netname": + k = "orgname" + v = coalesceStr(orgname, v) + orgname = v + case "whois": + k = "whois" + case "referralserver": + k = "whois" + v = strings.TrimPrefix(v, "whois://") + default: + continue } - } - _, ok := m["orgname"] - if !ok { - // Set orgname from either descr or netname for the frontent. - // - // TODO(a.garipov): Perhaps don't do that in the V1 HTTP API? - if descr != "" { - m["orgname"] = trimValue(descr) - } else if netname != "" { - m["orgname"] = trimValue(netname) - } + m[k] = v } return m diff --git a/internal/home/whois_test.go b/internal/home/whois_test.go index 12511bbd9ca..f87fa4aed03 100644 --- a/internal/home/whois_test.go +++ b/internal/home/whois_test.go @@ -76,3 +76,77 @@ func TestWhois(t *testing.T) { assert.Equal(t, "Imagiland", m["country"]) assert.Equal(t, "Nonreal", m["city"]) } + +func TestWhoisParse(t *testing.T) { + const ( + city = "Nonreal" + country = "Imagiland" + orgname = "FakeOrgLLC" + whois = "whois.example.net" + ) + + testCases := []struct { + want strmap + name string + in string + }{{ + want: strmap{}, + name: "empty", + in: ``, + }, { + want: strmap{}, + name: "comments", + in: "%\n#", + }, { + want: strmap{}, + name: "no_colon", + in: "city", + }, { + want: strmap{}, + name: "no_value", + in: "city:", + }, { + want: strmap{"city": city}, + name: "city", + in: `city: ` + city, + }, { + want: strmap{"country": country}, + name: "country", + in: `country: ` + country, + }, { + want: strmap{"orgname": orgname}, + name: "orgname", + in: `orgname: ` + orgname, + }, { + want: strmap{"orgname": orgname}, + name: "orgname_hyphen", + in: `org-name: ` + orgname, + }, { + want: strmap{"orgname": orgname}, + name: "orgname_descr", + in: `descr: ` + orgname, + }, { + want: strmap{"orgname": orgname}, + name: "orgname_netname", + in: `netname: ` + orgname, + }, { + want: strmap{"whois": whois}, + name: "whois", + in: `whois: ` + whois, + }, { + want: strmap{"whois": whois}, + name: "referralserver", + in: `referralserver: whois://` + whois, + }, { + want: strmap{}, + name: "other", + in: `other: value`, + }} + + for _, tc := range testCases { + t.Run(tc.name, func(t *testing.T) { + got := whoisParse(tc.in) + assert.Equal(t, tc.want, got) + }) + } +}