From 4c81abe8de87abf53f9ae506b676f3fd4fb4a52c Mon Sep 17 00:00:00 2001 From: Simone Basso Date: Fri, 19 Jan 2024 15:11:40 +0100 Subject: [PATCH] cleanup(webconnectivitylte): remove the orig engine (#1455) This commit removes the "orig" data analysis engine of the webconnectivitylte experiment now that the "classic" engine is able to generate the same or better results. Part of https://github.com/ooni/probe/issues/2640 --- .../webconnectivitylte/analysisclassic.go | 4 +- .../webconnectivitylte/analysiscore.go | 458 +--------------- .../webconnectivitylte/analysisdns.go | 487 ------------------ .../webconnectivitylte/analysisdns_test.go | 83 --- .../webconnectivitylte/analysishttpcore.go | 76 --- .../webconnectivitylte/analysishttpdiff.go | 261 ---------- .../webconnectivitylte/analysistcpip.go | 82 --- .../webconnectivitylte/analysistls.go | 50 -- 8 files changed, 20 insertions(+), 1481 deletions(-) delete mode 100644 internal/experiment/webconnectivitylte/analysisdns.go delete mode 100644 internal/experiment/webconnectivitylte/analysisdns_test.go delete mode 100644 internal/experiment/webconnectivitylte/analysishttpcore.go delete mode 100644 internal/experiment/webconnectivitylte/analysistcpip.go delete mode 100644 internal/experiment/webconnectivitylte/analysistls.go diff --git a/internal/experiment/webconnectivitylte/analysisclassic.go b/internal/experiment/webconnectivitylte/analysisclassic.go index b2fcabf835..7347e5f14c 100644 --- a/internal/experiment/webconnectivitylte/analysisclassic.go +++ b/internal/experiment/webconnectivitylte/analysisclassic.go @@ -14,10 +14,10 @@ import ( "github.com/ooni/probe-cli/v3/internal/runtimex" ) -// AnalysisEngineClassic is an alternative analysis engine that aims to produce +// analysisEngineClassic is an alternative analysis engine that aims to produce // results that are backward compatible with Web Connectivity v0.4 while also // procuding more fine-grained blocking flags. -func AnalysisEngineClassic(tk *TestKeys, logger model.Logger) { +func analysisEngineClassic(tk *TestKeys, logger model.Logger) { tk.analysisClassic(logger) } diff --git a/internal/experiment/webconnectivitylte/analysiscore.go b/internal/experiment/webconnectivitylte/analysiscore.go index 5a671ad46a..177874fd1e 100644 --- a/internal/experiment/webconnectivitylte/analysiscore.go +++ b/internal/experiment/webconnectivitylte/analysiscore.go @@ -1,19 +1,13 @@ package webconnectivitylte -import ( - "fmt" - "net" - "net/url" - - "github.com/ooni/probe-cli/v3/internal/geoipx" - "github.com/ooni/probe-cli/v3/internal/model" - "github.com/ooni/probe-cli/v3/internal/netxlite" -) - // // Core analysis // +import ( + "github.com/ooni/probe-cli/v3/internal/model" +) + // These flags determine the context of TestKeys.Blocking. However, while .Blocking // is an enumeration, these flags allow to describe multiple blocking methods. const ( @@ -36,201 +30,16 @@ const ( AnalysisBlockingFlagSuccess ) -// AnalysisEngineFn is the function that runs the analysis engine for -// processing and scoring measurements collected by LTE. -var AnalysisEngineFn func(tk *TestKeys, logger model.Logger) = AnalysisEngineClassic - // analysisToplevel is the toplevel function that analyses the results // of the experiment once all network tasks have completed. // -// The ultimate objective of this function is to set the toplevel flags -// used by the backend to score results. These flags are: -// -// - blocking (and x_blocking_flags) which contain information about -// the detected blocking method (or methods); -// -// - accessible which contains information on whether we think we -// could access the resource somehow. -// -// Originally, Web Connectivity only had a blocking scalar value so -// we could see ourselves in one of the following cases: -// -// +----------+------------+--------------------------+ -// | Blocking | Accessible | Meaning | -// +----------+------------+--------------------------+ -// | null | null | Probe analysis error | -// +----------+------------+--------------------------+ -// | false | true | We detected no blocking | -// +----------+------------+--------------------------+ -// | "..." | false | We detected blocking | -// +----------+------------+--------------------------+ -// -// While it would be possible in this implementation, which has a granular -// definition of blocking (x_blocking_flags), to set accessible to mean -// whether we could access the resource in some conditions, it seems quite -// dangerous to deviate from the original behavior. -// -// Our code will NEVER set .Blocking or .Accessible outside of this function -// and we'll instead rely on XBlockingFlags. This function's job is to call -// other functions that compute the .XBlockingFlags and then to assign the value -// of .Blocking and .Accessible from the .XBlockingFlags value. -// -// Accordingly, this is how we map the value of the .XBlockingFlags to the -// values of .Blocking and .Accessible: -// -// +--------------------------------------+----------------+-------------+ -// | .BlockingFlags | .Blocking | .Accessible | -// +--------------------------------------+----------------+-------------+ -// | (& DNSBlocking) != 0 | "dns" | false | -// +--------------------------------------+----------------+-------------+ -// | (& TCPIPBlocking) != 0 | "tcp_ip" | false | -// +--------------------------------------+----------------+-------------+ -// | (& (TLSBlocking|HTTPBlocking)) != 0 | "http-failure" | false | -// +--------------------------------------+----------------+-------------+ -// | (& HTTPDiff) != 0 | "http-diff" | false | -// +--------------------------------------+----------------+-------------+ -// | == FlagSuccess | false | true | -// +--------------------------------------+----------------+-------------+ -// | otherwise | null | null | -// +--------------------------------------+----------------+-------------+ -// -// It's a very simple rule, that should preserve previous semantics. -// -// As an improvement over Web Connectivity v0.4, we also attempt to identify -// special subcases of a null, null result to provide the user with more information. +// This function sets v0.4-compatible test keys as well as v0.5-specific +// test keys that attempt to provide a more fine-grained view of the +// results, so that we can flag cases with multiple blocking scenarios. // // This function MUTATES the test keys. func (tk *TestKeys) analysisToplevel(logger model.Logger) { - AnalysisEngineFn(tk, logger) -} - -// AnalysisEngineOrig is the original analysis engine we wrote for LTE. This engine -// aims to detect and report about all the possible ways in which the measured website -// is blocked. As of 2023-11-30, we still consider this engine experimental. -func AnalysisEngineOrig(tk *TestKeys, logger model.Logger) { - tk.analysisOrig(logger) -} - -func (tk *TestKeys) analysisOrig(logger model.Logger) { - // Since we run after all tasks have completed (or so we assume) we're - // not going to use any form of locking here. - - // these functions compute the value of XBlockingFlags - tk.analysisDNSToplevel(logger, model.GeoIPASNLookupperFunc(geoipx.LookupASN)) - tk.analysisTCPIPToplevel(logger) - tk.analysisTLSToplevel(logger) - tk.analysisHTTPToplevel(logger) - - // now, let's determine .Accessible and .Blocking - switch { - case (tk.BlockingFlags & AnalysisBlockingFlagDNSBlocking) != 0: - tk.Blocking = "dns" - tk.Accessible = false - logger.Warnf( - "ANOMALY: flags=%d, accessible=%+v, blocking=%+v", - tk.BlockingFlags, tk.Accessible, tk.Blocking, - ) - - case (tk.BlockingFlags & AnalysisBlockingFlagTCPIPBlocking) != 0: - tk.Blocking = "tcp_ip" - tk.Accessible = false - logger.Warnf( - "ANOMALY: flags=%d, accessible=%+v, blocking=%+v", - tk.BlockingFlags, tk.Accessible, tk.Blocking, - ) - - // Assigning "http-failure" for both TLS and HTTP blocking is a legacy behavior - // because the spec does not consider the case of TLS based blocking - case (tk.BlockingFlags & (AnalysisBlockingFlagTLSBlocking | AnalysisBlockingFlagHTTPBlocking)) != 0: - tk.Blocking = "http-failure" - tk.Accessible = false - logger.Warnf("ANOMALY: flags=%d, accessible=%+v, blocking=%+v", - tk.BlockingFlags, tk.Accessible, tk.Blocking, - ) - - case (tk.BlockingFlags & AnalysisBlockingFlagHTTPDiff) != 0: - tk.Blocking = "http-diff" - tk.Accessible = false - logger.Warnf( - "ANOMALY: flags=%d, accessible=%+v, blocking=%+v", - tk.BlockingFlags, tk.Accessible, tk.Blocking, - ) - - case tk.BlockingFlags == AnalysisBlockingFlagSuccess: - tk.Blocking = false - tk.Accessible = true - logger.Infof( - "ACCESSIBLE: flags=%d, accessible=%+v, blocking=%+v", - tk.BlockingFlags, tk.Accessible, tk.Blocking, - ) - - default: - // NullNull remediation - // - // If we arrive here, the measurement has failed. However, there are a - // bunch of cases where we can still explain what happened by applying specific - // algorithms to detect edge cases. - // - // The relative order of these algorithsm matters: swapping them without - // careful consideration may produce unexpected results. - - if tk.analysisNullNullDetectTHDNSNXDOMAIN(logger) { - tk.Blocking = "dns" - tk.Accessible = false - logger.Warnf( - "RESIDUAL_DNS_BLOCKING: flags=%d, accessible=%+v, blocking=%+v", - tk.BlockingFlags, tk.Accessible, tk.Blocking, - ) - return - } - - if tk.analysisNullNullDetectNoAddrs(logger) { - tk.Blocking = false - tk.Accessible = false - logger.Infof( - "WEBSITE_DOWN_DNS: flags=%d, accessible=%+v, blocking=%+v", - tk.BlockingFlags, tk.Accessible, tk.Blocking, - ) - return - } - - if tk.analysisNullNullDetectAllConnectsFailed(logger) { - tk.Blocking = false - tk.Accessible = false - logger.Infof( - "WEBSITE_DOWN_TCP: flags=%d, accessible=%+v, blocking=%+v", - tk.BlockingFlags, tk.Accessible, tk.Blocking, - ) - return - } - - if tk.analysisNullNullDetectTLSMisconfigured(logger) { - tk.Blocking = false - tk.Accessible = false - logger.Infof( - "WEBSITE_DOWN_TLS: flags=%d, accessible=%+v, blocking=%+v", - tk.BlockingFlags, tk.Accessible, tk.Blocking, - ) - return - } - - if tk.analysisNullNullDetectSuccessfulHTTPS(logger) { - tk.Blocking = false - tk.Accessible = true - logger.Infof( - "ACCESSIBLE_HTTPS: flags=%d, accessible=%+v, blocking=%+v", - tk.BlockingFlags, tk.Accessible, tk.Blocking, - ) - return - } - - tk.Blocking = nil - tk.Accessible = nil - logger.Warnf( - "UNKNOWN: flags=%d, accessible=%+v, blocking=%+v", - tk.BlockingFlags, tk.Accessible, tk.Blocking, - ) - } + analysisEngineClassic(tk, logger) } const ( @@ -255,246 +64,15 @@ const ( AnalysisFlagNullNullUnexpectedDNSLookupSuccess ) -// analysisNullNullDetectTHDNSNXDOMAIN runs when .Blocking = nil and -// .Accessible = nil to flag cases in which the probe resolved addresses -// but the TH thinks the address is actually NXDOMAIN. When this -// happens, we're going to give priority to the TH's DoH observation. -// -// See https://github.com/ooni/probe/issues/2308. -func (tk *TestKeys) analysisNullNullDetectTHDNSNXDOMAIN(logger model.Logger) bool { - if tk.Control == nil { - // we need the control info to continue - return false - } - - // we need some cleartext successes - var cleartextSuccesses int - for _, query := range tk.Queries { - if query.Engine == "doh" { - // we skip DoH entries because they are encrypted and - // cannot be manipulated by censors - continue - } - if query.Failure != nil { - // we should stop the algorithm in case we've got any - // hard failure, but `dns_no_answer` is acceptable because - // actually it might be there's only A censorship and the - // AAAA query instead returns `dns_no_answer`. - // - // See https://explorer.ooni.org/measurement/20220914T073558Z_webconnectivity_IT_30722_n1_wroXRsBGYx0x9h0q?input=http%3A%2F%2Fitsat.info - // for a case where this was happening and fooled us - // causing us to conclude that the website was just down. - if *query.Failure == netxlite.FailureDNSNoAnswer { - continue - } - return false - } - cleartextSuccesses++ - } - if cleartextSuccesses <= 0 { - return false - } - - // if the TH failed with its own string representing the NXDOMAIN - // error, then we've detected our corner case - failure := tk.Control.DNS.Failure - if failure != nil && *failure == model.THDNSNameError { - logger.Info("DNS censorship: local DNS success with remote NXDOMAIN") - tk.NullNullFlags |= AnalysisFlagNullNullUnexpectedDNSLookupSuccess - return true - } - - // otherwise it's something else - return false -} - -// analysisNullNullDetectSuccessfulHTTPS runs when .Blocking = nil and -// .Accessible = nil to flag successul HTTPS measurements chains that -// occurred regardless of whatever else could have gone wrong. -// -// We need all requests to be HTTPS because an HTTP request in the -// chain breaks the ~reasonable assumption that our custom CA bundle -// is enough to protect against MITM. Of course, when we use this -// algorithm, we're not well positioned to flag server-side blocking. -// -// Version 0.4 of the probe implemented a similar algorithm, which -// however ran before other checks. Version, 0.5 on the contrary, runs -// this algorithm if any other heuristics failed. -// -// See https://github.com/ooni/probe/issues/2307 for more info. -func (tk *TestKeys) analysisNullNullDetectSuccessfulHTTPS(logger model.Logger) bool { - - // the chain is sorted from most recent to oldest but it does - // not matter much since we need to walk all of it. - // - // CAVEAT: this code assumes we have a single request chain - // inside the .Requests field, which seems fine because it's - // what Web Connectivity should be doing. - for _, req := range tk.Requests { - URL, err := url.Parse(req.Request.URL) - if err != nil { - // this looks like a bug - return false - } - if URL.Scheme != "https" { - // the whole chain must be HTTPS - return false - } - if req.Failure != nil { - // they must all succeed - return false - } - switch req.Response.Code { - case 200, 301, 302, 307, 308: - default: - // the response must be successful or redirect - return false - } - } - - // only if we have at least one request - if len(tk.Requests) > 0 { - logger.Info("website likely accessible: seen successful chain of HTTPS transactions") - tk.NullNullFlags |= AnalysisFlagNullNullSuccessfulHTTPS - return true - } - - // safety net otherwise - return false -} - -// analysisNullNullDetectTLSMisconfigured runs when .Blocking = nil and -// .Accessible = nil to check whether by chance we had TLS issues both on the -// probe side and on the TH side. This problem of detecting misconfiguration -// of the server's TLS stack is discussed at https://github.com/ooni/probe/issues/2300. -func (tk *TestKeys) analysisNullNullDetectTLSMisconfigured(logger model.Logger) bool { - if tk.Control == nil || tk.Control.TLSHandshake == nil { - // we need TLS control data to say we are in this case - return false - } - - for _, entry := range tk.TLSHandshakes { - if entry.Failure == nil { - // we need all attempts to fail to flag this state - return false - } - thEntry, found := tk.Control.TLSHandshake[entry.Address] - if !found { - // we need to have seen exactly the same attempts - return false - } - if thEntry.Failure == nil { - // we need all TH attempts to fail - return false - } - if *entry.Failure != *thEntry.Failure { - // we need to see the same failure to be sure, which it's - // possible to do for TLS because we have the same definition - // of failure rather than being constrained by the legacy - // implementation of the test helper and Twisted names - // - // TODO(bassosimone): this is the obvious algorithm but maybe - // it's a bit too strict and there is a more lax version of - // the same algorithm that it's still acceptable? - return false - } - } - - // only if we have had some TLS handshakes for both probe and TH - if len(tk.TLSHandshakes) > 0 && len(tk.Control.TLSHandshake) > 0 { - logger.Info("website likely down: all TLS handshake attempts failed for both probe and TH") - tk.NullNullFlags |= AnalysisFlagNullNullExpectedTLSHandshakeFailure - return true - } - - // safety net in case we've got wrong input - return false -} - -// analysisNullNullDetectAllConnectsFailed attempts to detect whether we are in -// the .Blocking = nil, .Accessible = nil case because all the TCP connect -// attempts by either the probe or the TH have failed. -// -// See https://explorer.ooni.org/measurement/20220911T105037Z_webconnectivity_IT_30722_n1_ruzuQ219SmIO9SrT?input=https://doh.centraleu.pi-dns.com/dns-query?dns=q80BAAABAAAAAAAAA3d3dwdleGFtcGxlA2NvbQAAAQAB -// for an example measurement with this behavior. -// -// See https://github.com/ooni/probe/issues/2299 for the reference issue. -func (tk *TestKeys) analysisNullNullDetectAllConnectsFailed(logger model.Logger) bool { - if tk.Control == nil { - // we need control data to say we're in this case - return false - } - - for _, entry := range tk.TCPConnect { - if entry.Status.Failure == nil { - // we need all connect attempts to fail - return false - } - epnt := net.JoinHostPort(entry.IP, fmt.Sprintf("%d", entry.Port)) - thEntry, found := tk.Control.TCPConnect[epnt] - if !found { - // we need to have seen exactly the same attempts - return false - } - if thEntry.Failure == nil { - // we need all TH attempts to fail - return false - } - } - - // only if we have had some addresses to connect - if len(tk.TCPConnect) > 0 && len(tk.Control.TCPConnect) > 0 { - logger.Info("website likely down: all TCP connect attempts failed for both probe and TH") - tk.NullNullFlags |= AnalysisFlagNullNullExpectedTCPConnectFailure - return true - } +const ( + // AnalysisFlagDNSBogon indicates we got any bogon reply + AnalysisFlagDNSBogon = 1 << iota - // safety net in case we're passed empty lists/maps - return false -} + // AnalysisDNSFlagUnexpectedFailure indicates the TH could + // resolve a domain while the probe couldn't + AnalysisDNSFlagUnexpectedFailure -// analysisNullNullDetectNoAddrs attempts to see whether we -// ended up into the .Blocking = nil, .Accessible = nil case because -// the domain is expired and all queries returned no addresses. -// -// See https://github.com/ooni/probe/issues/2290 for further -// documentation about the issue we're solving here. -// -// It would be tempting to check specifically for NXDOMAIN here, but we -// know it is problematic do that. In fact, on Android the getaddrinfo -// resolver always returns EAI_NODATA on error, regardless of the actual -// error that may have occurred in the Android DNS backend. -// -// See https://github.com/ooni/probe/issues/2029 for more information -// on Android's getaddrinfo behavior. -func (tk *TestKeys) analysisNullNullDetectNoAddrs(logger model.Logger) bool { - if tk.Control == nil { - // we need control data to say we're in this case - return false - } - for _, query := range tk.Queries { - if len(query.Answers) > 0 { - // when a query has answers, we're not in the NoAddresses case - return false - } - } - if len(tk.TCPConnect) > 0 { - // if we attempted TCP connect, we're not in the NoAddresses case - return false - } - if len(tk.TLSHandshakes) > 0 { - // if we attempted TLS handshakes, we're not in the NoAddresses case - return false - } - if len(tk.Control.DNS.Addrs) > 0 { - // when the TH resolved addresses, we're not in the NoAddresses case - return false - } - if len(tk.Control.TCPConnect) > 0 { - // when the TH used addresses, we're not in the NoAddresses case - return false - } - logger.Infof("website likely down: all DNS lookups failed for both probe and TH") - tk.NullNullFlags |= AnalysisFlagNullNullExpectedDNSLookupFailure - return true -} + // AnalysisDNSFlagUnexpectedAddrs indicates the TH resolved + // different addresses from the probe + AnalysisDNSFlagUnexpectedAddrs +) diff --git a/internal/experiment/webconnectivitylte/analysisdns.go b/internal/experiment/webconnectivitylte/analysisdns.go deleted file mode 100644 index 1147b28789..0000000000 --- a/internal/experiment/webconnectivitylte/analysisdns.go +++ /dev/null @@ -1,487 +0,0 @@ -package webconnectivitylte - -// -// DNS analysis -// - -import ( - "net" - "net/url" - - "github.com/ooni/probe-cli/v3/internal/model" - "github.com/ooni/probe-cli/v3/internal/netxlite" - "github.com/ooni/probe-cli/v3/internal/optional" -) - -const ( - // AnalysisFlagDNSBogon indicates we got any bogon reply - AnalysisFlagDNSBogon = 1 << iota - - // AnalysisDNSFlagUnexpectedFailure indicates the TH could - // resolve a domain while the probe couldn't - AnalysisDNSFlagUnexpectedFailure - - // AnalysisDNSFlagUnexpectedAddrs indicates the TH resolved - // different addresses from the probe - AnalysisDNSFlagUnexpectedAddrs -) - -// analysisDNSToplevel is the toplevel analysis function for DNS results. -// -// Note: this function DOES NOT consider failed DNS-over-HTTPS (DoH) submeasurements -// and ONLY considers the IP addrs they have resolved. Failing to contact a DoH service -// provides info about such a DoH service rather than on the measured URL. See the -// https://github.com/ooni/probe/issues/2274 issue for more info. -// -// The goals of this function are the following: -// -// 1. Set the legacy .DNSExperimentFailure field to the failure value of the -// first DNS query that failed among the ones we actually tried. Because we -// have multiple queries, unfortunately we are forced to pick one error among -// possibly many to assign to this field. This is why I consider it legacy. -// -// 2. Compute the XDNSFlags value. -// -// From the XDNSFlags value, we determine, in turn DNSConsistency and -// XBlockingFlags according to the following decision table: -// -// +-----------+----------------+---------------------+ -// | XDNSFlags | DNSConsistency | XBlockingFlags | -// +-----------+----------------+---------------------+ -// | 0 | "consistent" | no change | -// +-----------+----------------+---------------------+ -// | nonzero | "inconsistent" | set FlagDNSBlocking | -// +-----------+----------------+---------------------+ -// -// We explain how XDNSFlags is determined in the documentation of -// the functions that this function calls to do its job. -func (tk *TestKeys) analysisDNSToplevel(logger model.Logger, lookupper model.GeoIPASNLookupper) { - tk.analysisDNSExperimentFailure() - tk.analysisDNSBogon(logger) - tk.analysisDNSDuplicateResponses(logger) - tk.analysisDNSUnexpectedFailure(logger) - tk.analysisDNSUnexpectedAddrs(logger, lookupper) - if tk.DNSFlags != 0 { - logger.Warn("DNSConsistency: inconsistent") - tk.DNSConsistency = optional.Some("inconsistent") - tk.BlockingFlags |= AnalysisBlockingFlagDNSBlocking - } else { - logger.Info("DNSConsistency: consistent") - tk.DNSConsistency = optional.Some("consistent") - } -} - -// analysisDNSDuplicateResponses checks whether we received duplicate -// replies for DNS-over-UDP queries, which is very unexpected. -func (tk *TestKeys) analysisDNSDuplicateResponses(logger model.Logger) { - if length := len(tk.DNSDuplicateResponses); length > 0 { - // TODO(bassosimone): write algorithm to analyze this - logger.Warnf("DNS: got %d unexpected late/duplicate DNS responses", length) - } -} - -// analysisDNSExperimentFailure sets the legacy DNSExperimentFailure field. -func (tk *TestKeys) analysisDNSExperimentFailure() { - for _, query := range tk.Queries { - if fail := query.Failure; fail != nil { - if query.QueryType == "AAAA" && *query.Failure == netxlite.FailureDNSNoAnswer { - // maybe this heuristic could be further improved by checking - // whether the TH did actually see any IPv6 address? - continue - } - if query.Engine == "doh" { - // we SHOULD NOT flag DoH failures _because_ they pertain to the - // DoH service rather than to the input URL - // - // See https://github.com/ooni/probe/issues/2274 - continue - } - tk.DNSExperimentFailure = fail - return - } - } -} - -// analysisDNSBogon computes the AnalysisFlagDNSBogon flag. We set this flag if -// we dectect any bogon in the .Queries field of the TestKeys. -func (tk *TestKeys) analysisDNSBogon(logger model.Logger) { - for _, query := range tk.Queries { - // Implementation note: any bogon IP address resolved by a DoH service - // is STILL suspicious since it should not happen. TODO(bassosimone): an - // even better algorithm could possibly check whether also the TH has - // observed bogon IP addrs and avoid flagging in such a case. - // - // See https://github.com/ooni/probe/issues/2274 - for _, answer := range query.Answers { - switch answer.AnswerType { - case "A": - if net.ParseIP(answer.IPv4) != nil && netxlite.IsBogon(answer.IPv4) { - logger.Warnf( - "DNS: got BOGON answer %s for domain %s (see #%d)", - answer.IPv4, - query.Hostname, - query.TransactionID, - ) - tk.DNSFlags |= AnalysisFlagDNSBogon - // continue processing so we print all the bogons we have - } - case "AAAA": - if net.ParseIP(answer.IPv6) != nil && netxlite.IsBogon(answer.IPv6) { - logger.Warnf( - "DNS: got BOGON answer %s for domain %s (see #%d)", - answer.IPv6, - query.Hostname, - query.TransactionID, - ) - tk.DNSFlags |= AnalysisFlagDNSBogon - // continue processing so we print all the bogons we have - } - default: - // nothing - } - } - } -} - -// analysisDNSUnexpectedFailure computes the AnalysisDNSFlagUnexpectedFailure flags. We say -// a failure is unexpected when the TH could resolve a domain and the probe couldn't. -func (tk *TestKeys) analysisDNSUnexpectedFailure(logger model.Logger) { - // make sure we have control before proceeding futher - if tk.Control == nil || tk.ControlRequest == nil { - return - } - - // obtain thRequest and thResponse as shortcuts - thRequest := tk.ControlRequest - thResponse := tk.Control - - // obtain the domain that the TH has queried for - URL, err := url.Parse(thRequest.HTTPRequest) - if err != nil { - return // this looks like a bug - } - domain := URL.Hostname() - - // we obviously don't care if the domain was an IP adddress - if net.ParseIP(domain) != nil { - return - } - - // if the control didn't lookup any IP addresses our job here is done - // because we can't say whether we have unexpected failures - hasAddrs := len(thResponse.DNS.Addrs) > 0 - if !hasAddrs { - return - } - - // with TH-resolved addrs, any local query _for the same domain_ queried - // by the probe that contains an error is suspicious - for _, query := range tk.Queries { - if domain != query.Hostname { - continue // not the domain queried by the test helper - } - if query.Engine == "doh" { - // As mentioned above, a DoH failure is not information about - // the URL we're measuring but about the DoH service being blocked. - // - // See https://github.com/ooni/probe/issues/2274 - continue - } - hasAddrs := false - Loop: - for _, answer := range query.Answers { - switch answer.AnswerType { - case "A", "AAA": - hasAddrs = true - break Loop - } - } - if hasAddrs { - // if the lookup returned any IP address, we are - // not dealing with unexpected failures - continue - } - if query.Failure == nil { - // we expect to see a failure if we don't see - // answers, so this seems a bug? - continue - } - if query.QueryType == "AAAA" && *query.Failure == netxlite.FailureDNSNoAnswer { - // maybe this heuristic could be further improved by checking - // whether the TH did actually see any IPv6 address? - continue - } - logger.Warnf("DNS: unexpected failure %s in #%d", *query.Failure, query.TransactionID) - tk.DNSFlags |= AnalysisDNSFlagUnexpectedFailure - // continue processing so we print all the unexpected failures - - // TODO(https://github.com/ooni/probe/issues/2029#issuecomment-1411716295): we need - // to ensure we correctly handle the android_dns_cache_no_data case. - } -} - -// analysisDNSUnexpectedAddrs computes the AnalysisDNSFlagUnexpectedAddrs flags. This -// algorithm builds upon the original DNSDiff algorithm by introducing an additional -// TLS based heuristic for determining whether an IP address was legit. -func (tk *TestKeys) analysisDNSUnexpectedAddrs( - logger model.Logger, - lookupper model.GeoIPASNLookupper, -) { - // make sure we have control before proceeding futher - if tk.Control == nil || tk.ControlRequest == nil { - return - } - - // obtain thRequest and thResponse as shortcuts - thRequest := tk.ControlRequest - thResponse := tk.Control - - // obtain the domain that the TH has queried for - URL, err := url.Parse(thRequest.HTTPRequest) - if err != nil { - return // this looks like a bug - } - domain := URL.Hostname() - - // we obviously don't care if the domain was an IP adddress - if net.ParseIP(domain) != nil { - return - } - - // if the control didn't resolve any IP address, then we basically - // cannot run this algorithm at all - thAddrs := thResponse.DNS.Addrs - if len(thAddrs) <= 0 { - return - } - - // gather all the IP addresses queried by the probe - // for the same domain for which the TH queried. - var probeAddrs []string - for _, query := range tk.Queries { - if domain != query.Hostname { - continue // not the domain the TH queried for - } - // Implementation note: in the case in which DoH returned answers, here - // it still feels okay to consider them. We should avoid flagging DoH - // failures as measurement failures but if DoH returns us some unexpected - // even-non-bogon addr, it seems worth flagging for now. - // - // See https://github.com/ooni/probe/issues/2274 - for _, answer := range query.Answers { - switch answer.AnswerType { - case "A": - probeAddrs = append(probeAddrs, answer.IPv4) - case "AAAA": - probeAddrs = append(probeAddrs, answer.IPv6) - } - } - } - - // if the probe has not collected any addr for the same domain, it's - // definitely suspicious and counts as a difference - if len(probeAddrs) <= 0 { - logger.Warnf("DNS: the probe did not resolve any IP address") - tk.DNSFlags |= AnalysisDNSFlagUnexpectedAddrs - return - } - - // if there are no different addresses between the probe and the TH then - // our job here is done and we can just stop searching - differentAddrs := tk.analysisDNSDiffAddrs(probeAddrs, thAddrs) - if len(differentAddrs) <= 0 { - return - } - for _, addr := range differentAddrs { - logger.Infof("DNS: address %s: not resolved by TH", addr) - } - - // now, let's exclude the differentAddrs for which we successfully - // completed a TLS handshake: those should be good addrs - withoutHandshake := tk.findAddrsWithoutTLSHandshake(domain, differentAddrs) - if len(withoutHandshake) <= 0 { - return - } - for _, addr := range withoutHandshake { - logger.Infof("DNS: address %s: cannot confirm using TLS handshake", addr) - } - - // as a last resort, accept the addresses without an handshake whose - // ASN overlaps with ASNs resolved by the TH - differentASNs := tk.analysisDNSDiffASN(logger, withoutHandshake, thAddrs, lookupper) - if len(differentASNs) <= 0 { - return - } - - // otherwise, conclude we have unexpected probe addrs - for addr, asn := range differentASNs { - logger.Warnf( - "DNS: address %s has unexpected AS%d and we cannot use TLS to confirm it", - addr, asn, - ) - } - tk.DNSFlags |= AnalysisDNSFlagUnexpectedAddrs -} - -// analysisDNSDiffAddrs returns all the IP addresses that are -// resolved by the probe but not by the test helper. -func (tk *TestKeys) analysisDNSDiffAddrs(probeAddrs, thAddrs []string) (diff []string) { - const ( - inProbe = 1 << iota - inTH - ) - mapping := make(map[string]int) - for _, addr := range probeAddrs { - if net.ParseIP(addr) != nil && netxlite.IsBogon(addr) { - // we can exclude bogons from the analysis because we already analyzed them - continue - } - mapping[addr] |= inProbe - } - for _, addr := range thAddrs { - mapping[addr] = inTH - } - for addr, where := range mapping { - if (where & inTH) == 0 { - diff = append(diff, addr) - } - } - return -} - -// analysisDNSDiffASN returns whether there are IP addresses in the probe's -// list with different ASNs from the ones in the TH's list. -func (tk *TestKeys) analysisDNSDiffASN( - logger model.Logger, - probeAddrs, - thAddrs []string, - lookupper model.GeoIPASNLookupper, -) (result map[string]uint) { - const ( - inProbe = 1 << iota - inTH - ) - logger.Debugf("DNS: probeAddrs %+v, thAddrs %+v", probeAddrs, thAddrs) - uniqueAddrs := make(map[string]uint) - asnToFlags := make(map[uint]int) - for _, addr := range probeAddrs { - asn, _, _ := lookupper.LookupASN(addr) - asnToFlags[asn] |= inProbe // including the zero ASN that means unknown - uniqueAddrs[addr] = asn - } - for _, addr := range thAddrs { - asn, _, _ := lookupper.LookupASN(addr) - asnToFlags[asn] |= inTH // including the zero ASN that means unknown - uniqueAddrs[addr] = asn - } - for addr, asn := range uniqueAddrs { - logger.Infof("DNS: addr %s has AS%d", addr, asn) - } - probeOnlyASNs := make(map[uint]bool) - for asn, where := range asnToFlags { - if (where & inTH) == 0 { - probeOnlyASNs[asn] = true - } - } - for asn := range probeOnlyASNs { - logger.Infof("DNS: AS%d: only seen by probe", asn) - } - result = make(map[string]uint) - for addr, asn := range uniqueAddrs { - if probeOnlyASNs[asn] { - result[addr] = asn - } - } - return -} - -// findAddrsWithoutTLSHandshake computes the list of probe discovered [addresses] -// for which we couldn't successfully perform a TLS handshake for the given [domain]. -func (tk *TestKeys) findAddrsWithoutTLSHandshake(domain string, addresses []string) (output []string) { - const ( - resolvedByProbe = 1 << iota - handshakeOK - hasObviousIPv6Issues - ) - mapping := make(map[string]int) - - // fill the input map with the addresses we're interested to analyze - for _, addr := range addresses { - mapping[addr] = 0 - } - - // flag the subset of addresses resolved by the probe - for _, query := range tk.Queries { - for _, answer := range query.Answers { - var addr string - switch answer.AnswerType { - case "A": - addr = answer.IPv4 - case "AAAA": - addr = answer.IPv6 - default: - continue - } - if _, found := mapping[addr]; !found { - continue // we're not interested into this addr - } - mapping[addr] |= resolvedByProbe - } - } - - // flag the subset of addrs with obvious IPv6 issues - // - // see https://github.com/ooni/probe/issues/2284 for more - // info on why we need to flag them - for _, connect := range tk.TCPConnect { - failure := connect.Status.Failure - if failure == nil { - continue // if we can connect, we don't have IPv6 issues - } - ipv6, err := netxlite.IsIPv6(connect.IP) - if err != nil { - continue // looks like a bug - } - if !ipv6 { - continue // not IPv6 - } - hasIssues := (*failure == netxlite.FailureNetworkUnreachable || - *failure == netxlite.FailureHostUnreachable) - if hasIssues { - mapping[connect.IP] |= hasObviousIPv6Issues - } - } - - // flag the subset of addrs with successful handshake for the right SNI - for _, thx := range tk.TLSHandshakes { - addr, _, err := net.SplitHostPort(thx.Address) - if err != nil { - continue // looks like a bug - } - if thx.Failure != nil { - continue // this handshake failed - } - if _, found := mapping[addr]; !found { - continue // we're not interested into this addr - } - if thx.ServerName != domain { - continue // the SNI is different, so... - } - mapping[addr] |= handshakeOK - } - - // compute the list of addresses without the handshakeOK flag - // excluding though the ones with obvious IPv6 issues - for addr, flags := range mapping { - if flags == 0 { - continue // this looks like a bug - } - if (flags & hasObviousIPv6Issues) != 0 { - continue // see https://github.com/ooni/probe/issues/2284 - } - if (flags & (resolvedByProbe | handshakeOK)) == resolvedByProbe { - output = append(output, addr) - } - } - return -} diff --git a/internal/experiment/webconnectivitylte/analysisdns_test.go b/internal/experiment/webconnectivitylte/analysisdns_test.go deleted file mode 100644 index 90e409ba06..0000000000 --- a/internal/experiment/webconnectivitylte/analysisdns_test.go +++ /dev/null @@ -1,83 +0,0 @@ -package webconnectivitylte - -import ( - "encoding/json" - "os" - "path/filepath" - "testing" - - "github.com/apex/log" - "github.com/ooni/probe-cli/v3/internal/mocks" - "github.com/ooni/probe-cli/v3/internal/model" - "github.com/ooni/probe-cli/v3/internal/runtimex" - "github.com/tailscale/hujson" -) - -func TestTestKeys_analysisDNSToplevel(t *testing.T) { - - // testcase is a test case in this test - type testcase struct { - // name is the name of the test case - name string - - // tkFile is the name of the JSONC file containing the test keys - tkFile string - - // geoInfo contains a static mapping of geoip info - geoInfo map[string]*model.LocationASN - - // expectBlockingFlags contains the expected BlockingFlags - expecteBlockingFlags int64 - } - - testcases := []testcase{{ - name: "https://github.com/ooni/probe/issues/2517", - tkFile: filepath.Join("testdata", "20230706183840.201925_PK_webconnectivity_19f5e0d803cbaea7.jsonc"), - geoInfo: map[string]*model.LocationASN{ - "172.224.19.10": {ASNumber: 36183, Organization: "Akamai Technologies, Inc."}, - "172.224.19.5": {ASNumber: 36183, Organization: "Akamai Technologies, Inc."}, - "172.224.19.9": {ASNumber: 36183, Organization: "Akamai Technologies, Inc."}, - "17.248.248.101": {ASNumber: 714, Organization: "Apple Inc."}, - "2a01:b740:a41:212::8": {ASNumber: 714, Organization: "Apple Inc."}, - "2a01:b740:a41:212::7": {ASNumber: 714, Organization: "Apple Inc."}, - "2a01:b740:a41:213::7": {ASNumber: 714, Organization: "Apple Inc."}, - "172.224.19.3": {ASNumber: 36183, Organization: "Akamai Technologies, Inc."}, - "172.224.19.12": {ASNumber: 36183, Organization: "Akamai Technologies, Inc."}, - "17.248.248.103": {ASNumber: 714, Organization: "Apple Inc."}, - "17.248.248.119": {ASNumber: 714, Organization: "Apple Inc."}, - "2a01:b740:a41:213::5": {ASNumber: 714, Organization: "Apple Inc."}, - "172.224.19.4": {ASNumber: 36183, Organization: "Akamai Technologies, Inc."}, - "172.224.19.6": {ASNumber: 36183, Organization: "Akamai Technologies, Inc."}, - "172.224.19.11": {ASNumber: 36183, Organization: "Akamai Technologies, Inc."}, - "2a01:b740:a41:212::4": {ASNumber: 714, Organization: "Apple Inc."}, - "172.224.19.7": {ASNumber: 36183, Organization: "Akamai Technologies, Inc."}, - "17.248.248.117": {ASNumber: 714, Organization: "Apple Inc."}, - "17.248.248.121": {ASNumber: 714, Organization: "Apple Inc."}, - "2a01:b740:a41:212::5": {ASNumber: 714, Organization: "Apple Inc."}, - "17.248.248.104": {ASNumber: 714, Organization: "Apple Inc."}, - "2a01:b740:a41:213::9": {ASNumber: 714, Organization: "Apple Inc."}, - "172.224.19.14": {ASNumber: 36183, Organization: "Akamai Technologies, Inc."}, - "172.224.19.15": {ASNumber: 36183, Organization: "Akamai Technologies, Inc."}, - "2a01:b740:a41:212::6": {ASNumber: 714, Organization: "Apple Inc."}, - "172.224.19.17": {ASNumber: 36183, Organization: "Akamai Technologies, Inc."}, - "172.224.19.13": {ASNumber: 36183, Organization: "Akamai Technologies, Inc."}, - "17.248.248.105": {ASNumber: 714, Organization: "Apple Inc."}, - "17.248.248.100": {ASNumber: 714, Organization: "Apple Inc."}, - }, - expecteBlockingFlags: AnalysisBlockingFlagDNSBlocking, - }} - - for _, tc := range testcases { - t.Run(tc.name, func(t *testing.T) { - data := runtimex.Try1(os.ReadFile(tc.tkFile)) - data = runtimex.Try1(hujson.Standardize(data)) - var tk TestKeys - runtimex.Try0(json.Unmarshal(data, &tk)) - log.SetLevel(log.DebugLevel) - tk.analysisDNSToplevel(log.Log, mocks.NewGeoIPASNLookupper(tc.geoInfo)) - if tc.expecteBlockingFlags != tk.BlockingFlags { - t.Fatal("expected", tc.expecteBlockingFlags, "got", tk.BlockingFlags) - } - }) - } -} diff --git a/internal/experiment/webconnectivitylte/analysishttpcore.go b/internal/experiment/webconnectivitylte/analysishttpcore.go deleted file mode 100644 index f7ab719b1b..0000000000 --- a/internal/experiment/webconnectivitylte/analysishttpcore.go +++ /dev/null @@ -1,76 +0,0 @@ -package webconnectivitylte - -// -// HTTP core analysis -// - -import ( - "github.com/ooni/probe-cli/v3/internal/model" - "github.com/ooni/probe-cli/v3/internal/netxlite" - "github.com/ooni/probe-cli/v3/internal/optional" -) - -// analysisHTTPToplevel is the toplevel analysis function for HTTP results. -// -// This function's job is to determine whether there were unexpected TLS -// handshake results (compared to what the TH observed), or unexpected -// failures during HTTP round trips (using the TH as benchmark), or whether -// the obtained body differs from the one obtained by the TH. -// -// This results in possibly setting these XBlockingFlags: -// -// - AnalysisBlockingFlagHTTPBlocking -// -// - AnalysisBlockingFlagHTTPDiff -// -// In websteps fashion, we don't stop at the first failure, rather we -// process all the available data and evaluate all possible errors. -func (tk *TestKeys) analysisHTTPToplevel(logger model.Logger) { - // if we don't have any request to check, there's not much more we - // can actually do here, so let's just return. - if len(tk.Requests) <= 0 { - return - } - // TODO(https://github.com/ooni/probe/issues/2641): this code is wrong - // with redirects because LTE only creates an HTTP request when it reaches - // the HTTP stage, so a previous redirect that is successful would cause - // this code to say we're good on the HTTP front, while we're not. - finalRequest := tk.Requests[0] - if finalRequest.Failure != nil { - tk.HTTPExperimentFailure = optional.Some(*finalRequest.Failure) - } - - // don't perform any futher analysis without TH data - if tk.Control == nil || tk.ControlRequest == nil { - return - } - ctrl := tk.Control.HTTPRequest - - // don't perform any analysis if the TH's HTTP measurement failed because - // performing more precise mapping is a job for the pipeline. - if ctrl.Failure != nil { - return - } - - // flag cases of known HTTP failures - if failure := finalRequest.Failure; failure != nil { - switch *failure { - case netxlite.FailureConnectionReset, - netxlite.FailureGenericTimeoutError, - netxlite.FailureEOFError: - tk.BlockingFlags |= AnalysisBlockingFlagHTTPBlocking - logger.Warnf( - "HTTP: unexpected failure %s for %s (see #%d)", - *failure, - finalRequest.Address, - finalRequest.TransactionID, - ) - default: - // leave this case for ooni/pipeline - } - return - } - - // fallback to the HTTP diff algo. - tk.analysisHTTPDiff(logger, finalRequest, &ctrl) -} diff --git a/internal/experiment/webconnectivitylte/analysishttpdiff.go b/internal/experiment/webconnectivitylte/analysishttpdiff.go index 17ca5c8c85..8bc2b522f1 100644 --- a/internal/experiment/webconnectivitylte/analysishttpdiff.go +++ b/internal/experiment/webconnectivitylte/analysishttpdiff.go @@ -5,16 +5,8 @@ package webconnectivitylte // import ( - "net/url" - "reflect" - "strings" - - "github.com/ooni/probe-cli/v3/internal/experiment/webconnectivity" - "github.com/ooni/probe-cli/v3/internal/measurexlite" "github.com/ooni/probe-cli/v3/internal/minipipeline" - "github.com/ooni/probe-cli/v3/internal/model" "github.com/ooni/probe-cli/v3/internal/optional" - "github.com/ooni/probe-cli/v3/internal/runtimex" ) // analysisHTTPDiffStatus contains the status relevant to compute HTTP diff. @@ -75,256 +67,3 @@ func (hds *analysisHTTPDiffStatus) httpDiff() bool { } return true } - -// analysisHTTPDiff computes the HTTP diff between the final request-response -// observed by the probe and the TH's result. The caller is responsible of passing -// us a valid probe observation and a valid TH observation with nil failure. -func (tk *TestKeys) analysisHTTPDiff(logger model.Logger, - probe *model.ArchivalHTTPRequestResult, th *webconnectivity.ControlHTTPRequestResult) { - // make sure the caller respected the contract - runtimex.PanicIfTrue( - probe.Failure != nil || th.Failure != nil, - "the caller should have passed us successful HTTP observations", - ) - - // if we're dealing with an HTTPS request, don't perform any comparison - // under the assumption that we're good if we're using TLS - URL, err := url.Parse(probe.Request.URL) - if err != nil { - return // looks like a bug - } - if URL.Scheme == "https" { - logger.Infof("HTTP: HTTPS && no error => #%d is successful", probe.TransactionID) - tk.BlockingFlags |= AnalysisBlockingFlagSuccess - return - } - - // original HTTP diff algorithm adapted for this implementation - tk.httpDiffBodyLengthChecks(probe, th) - tk.httpDiffStatusCodeMatch(probe, th) - tk.httpDiffHeadersMatch(probe, th) - tk.httpDiffTitleMatch(probe, th) - - if tk.StatusCodeMatch != nil && *tk.StatusCodeMatch { - if tk.BodyLengthMatch != nil && *tk.BodyLengthMatch { - logger.Infof( - "HTTP: statusCodeMatch && bodyLengthMatch => #%d is successful", - probe.TransactionID, - ) - tk.BlockingFlags |= AnalysisBlockingFlagSuccess - return - } - logger.Infof("HTTP: body length: MISMATCH (see #%d)", probe.TransactionID) - if tk.HeadersMatch != nil && *tk.HeadersMatch { - logger.Infof( - "HTTP: statusCodeMatch && headersMatch => #%d is successful", - probe.TransactionID, - ) - tk.BlockingFlags |= AnalysisBlockingFlagSuccess - return - } - logger.Infof("HTTP: uncommon headers: MISMATCH (see #%d)", probe.TransactionID) - if tk.TitleMatch != nil && *tk.TitleMatch { - logger.Infof( - "HTTP: statusCodeMatch && titleMatch => #%d is successful", - probe.TransactionID, - ) - tk.BlockingFlags |= AnalysisBlockingFlagSuccess - return - } - logger.Infof("HTTP: title: MISMATCH (see #%d)", probe.TransactionID) - } else { - logger.Infof("HTTP: status code: MISMATCH (see #%d)", probe.TransactionID) - } - - tk.BlockingFlags |= AnalysisBlockingFlagHTTPDiff - logger.Warnf("HTTP: it seems #%d is a case of httpDiff", probe.TransactionID) -} - -// httpDiffBodyLengthChecks compares the bodies lengths. -func (tk *TestKeys) httpDiffBodyLengthChecks( - probe *model.ArchivalHTTPRequestResult, ctrl *webconnectivity.ControlHTTPRequestResult) { - control := ctrl.BodyLength - if control <= 0 { - return // no actual length - } - response := probe.Response - if response.BodyIsTruncated { - return // cannot trust body length in this case - } - measurement := int64(len(response.Body)) - if measurement <= 0 { - return // no actual length - } - const bodyProportionFactor = 0.7 - var proportion float64 - if measurement >= control { - proportion = float64(control) / float64(measurement) - } else { - proportion = float64(measurement) / float64(control) - } - good := proportion > bodyProportionFactor - tk.BodyLengthMatch = &good -} - -// httpDiffStatusCodeMatch compares the status codes. -func (tk *TestKeys) httpDiffStatusCodeMatch( - probe *model.ArchivalHTTPRequestResult, ctrl *webconnectivity.ControlHTTPRequestResult) { - control := ctrl.StatusCode - measurement := probe.Response.Code - if control <= 0 { - return // no real status code - } - if measurement <= 0 { - return // no real status code - } - good := control == measurement - if !good && control/100 != 2 { - // Avoid comparison if it seems the TH failed _and_ the two - // status codes are not equal. Originally, this algorithm was - // https://github.com/measurement-kit/measurement-kit/blob/b55fbecb205be62c736249b689df0c45ae342804/src/libmeasurement_kit/ooni/web_connectivity.cpp#L60 - // and excluded the case where the TH failed with 5xx. - // - // Then, we discovered when implementing websteps a bunch - // of control failure modes that suggested to be more - // cautious. See https://github.com/bassosimone/websteps-illustrated/blob/632f27443ab9d94fb05efcf5e0b0c1ce190221e2/internal/engine/experiment/websteps/analysisweb.go#L137. - // - // However, it seems a bit retarded to avoid comparison - // when both the TH and the probe failed equallty. See - // https://github.com/ooni/probe/issues/2287, which refers - // to a measurement where both the probe and the TH fail - // with 404, but we fail to say "status_code_match = true". - // - // See https://explorer.ooni.org/measurement/20220911T203447Z_webconnectivity_IT_30722_n1_YDZQZOHAziEJk6o9?input=http%3A%2F%2Fwww.webbox.com%2Findex.php - // for a measurement where this was fixed. - return - } - tk.StatusCodeMatch = &good -} - -// httpDiffHeadersMatch compares the uncommon headers. -func (tk *TestKeys) httpDiffHeadersMatch( - probe *model.ArchivalHTTPRequestResult, ctrl *webconnectivity.ControlHTTPRequestResult) { - control := ctrl.Headers - measurement := probe.Response.Headers - if len(control) <= 0 || len(measurement) <= 0 { - return - } - // Implementation note: using map because we only care about the - // keys being different and we ignore the values. - const ( - inMeasurement = 1 << 0 - inControl = 1 << 1 - inBoth = inMeasurement | inControl - ) - commonHeaders := map[string]bool{ - "date": true, - "content-type": true, - "server": true, - "cache-control": true, - "vary": true, - "set-cookie": true, - "location": true, - "expires": true, - "x-powered-by": true, - "content-encoding": true, - "last-modified": true, - "accept-ranges": true, - "pragma": true, - "x-frame-options": true, - "etag": true, - "x-content-type-options": true, - "age": true, - "via": true, - "p3p": true, - "x-xss-protection": true, - "content-language": true, - "cf-ray": true, - "strict-transport-security": true, - "link": true, - "x-varnish": true, - } - matching := make(map[string]int) - ours := make(map[string]bool) - for key := range measurement { - key = strings.ToLower(key) - if _, ok := commonHeaders[key]; !ok { - matching[key] |= inMeasurement - } - ours[key] = true - } - theirs := make(map[string]bool) - for key := range control { - key = strings.ToLower(key) - if _, ok := commonHeaders[key]; !ok { - matching[key] |= inControl - } - theirs[key] = true - } - // if they are equal we're done - if good := reflect.DeepEqual(ours, theirs); good { - tk.HeadersMatch = &good - return - } - // compute the intersection of uncommon headers - found := false - for _, value := range matching { - if (value & inBoth) == inBoth { - found = true - break - } - } - tk.HeadersMatch = &found -} - -// httpDiffTitleMatch compares the titles. -func (tk *TestKeys) httpDiffTitleMatch( - probe *model.ArchivalHTTPRequestResult, ctrl *webconnectivity.ControlHTTPRequestResult) { - response := probe.Response - if response.Code <= 0 { - return - } - if response.BodyIsTruncated { - return - } - if ctrl.StatusCode <= 0 { - return - } - control := ctrl.Title - measurementBody := string(response.Body) - measurement := measurexlite.WebGetTitle(measurementBody) - if control == "" || measurement == "" { - return - } - const ( - inMeasurement = 1 << 0 - inControl = 1 << 1 - inBoth = inMeasurement | inControl - ) - words := make(map[string]int) - // We don't consider to match words that are shorter than 5 - // characters (5 is the average word length for english) - // - // The original implementation considered the word order but - // considering different languages it seems we could have less - // false positives by ignoring the word order. - const minWordLength = 5 - for _, word := range strings.Split(measurement, " ") { - if len(word) >= minWordLength { - words[strings.ToLower(word)] |= inMeasurement - } - } - for _, word := range strings.Split(control, " ") { - if len(word) >= minWordLength { - words[strings.ToLower(word)] |= inControl - } - } - good := true - for _, score := range words { - if (score & inBoth) != inBoth { - good = false - break - } - } - tk.TitleMatch = &good -} diff --git a/internal/experiment/webconnectivitylte/analysistcpip.go b/internal/experiment/webconnectivitylte/analysistcpip.go deleted file mode 100644 index 353dc9ae8a..0000000000 --- a/internal/experiment/webconnectivitylte/analysistcpip.go +++ /dev/null @@ -1,82 +0,0 @@ -package webconnectivitylte - -// -// TCP/IP analysis -// - -import ( - "fmt" - "net" - - "github.com/ooni/probe-cli/v3/internal/model" - "github.com/ooni/probe-cli/v3/internal/netxlite" -) - -// analysisTCPIPToplevel is the toplevel analysis function for TCP/IP results. -// -// This algorithm has two objectives: -// -// 1. walk the list of TCP connect attempts and mark each of them as -// Status.Blocked = true | false | null depending on what the TH observed -// for the same set of IP addresses (it's ugly to modify a data struct -// in place, but this algorithm is defined by the spec); -// -// 2. assign the AnalysisBlockingFlagTCPIPBlocking flag to XBlockingFlags if -// we see any TCP endpoint for which Status.Blocked is true. -func (tk *TestKeys) analysisTCPIPToplevel(logger model.Logger) { - // if we don't have a control result, do nothing. - if tk.Control == nil || len(tk.Control.TCPConnect) <= 0 { - return - } - var ( - istrue = true - isfalse = false - ) - - // walk the list of probe results and compare with TH results - for _, entry := range tk.TCPConnect { - // skip successful entries - failure := entry.Status.Failure - if failure == nil { - entry.Status.Blocked = &isfalse - continue // did not fail - } - - // make sure we exclude the IPv6 failures caused by lack of - // proper IPv6 support by the probe - ipv6, err := netxlite.IsIPv6(entry.IP) - if err != nil { - continue // looks like a bug - } - if ipv6 { - ignore := (*failure == netxlite.FailureNetworkUnreachable || - *failure == netxlite.FailureHostUnreachable) - if ignore { - // this occurs when we don't have IPv6 on the probe - continue - } - } - - // obtain the corresponding endpoint - epnt := net.JoinHostPort(entry.IP, fmt.Sprintf("%d", entry.Port)) - ctrl, found := tk.Control.TCPConnect[epnt] - if !found { - continue // only the probe tested this, so hard to say anything... - } - if ctrl.Failure != nil { - // If the TH failed as well, don't set XBlockingFlags and - // also don't bother with setting .Status.Blocked thus leaving - // it null. Performing precise error mapping should be a job - // for the pipeline rather than for the probe. - continue - } - logger.Warnf( - "TCP/IP: unexpected failure %s for %s (see #%d)", - *failure, - epnt, - entry.TransactionID, - ) - entry.Status.Blocked = &istrue - tk.BlockingFlags |= AnalysisBlockingFlagTCPIPBlocking - } -} diff --git a/internal/experiment/webconnectivitylte/analysistls.go b/internal/experiment/webconnectivitylte/analysistls.go deleted file mode 100644 index 0b3e96d37c..0000000000 --- a/internal/experiment/webconnectivitylte/analysistls.go +++ /dev/null @@ -1,50 +0,0 @@ -package webconnectivitylte - -// -// TLS analysis -// - -import "github.com/ooni/probe-cli/v3/internal/model" - -// analysisTLSToplevel is the toplevel analysis function for TLS. -// -// This algorithm aims to flag the TLS endpoints that failed unreasonably -// compared to what the TH has observed for the same endpoints. -func (tk *TestKeys) analysisTLSToplevel(logger model.Logger) { - // if we don't have a control result, do nothing. - if tk.Control == nil || len(tk.Control.TLSHandshake) <= 0 { - return - } - - // walk the list of probe results and compare with TH results - for _, entry := range tk.TLSHandshakes { - // skip successful entries - failure := entry.Failure - if failure == nil { - continue // did not fail - } - epnt := entry.Address - - // TODO(bassosimone,kelmenhorst): if, in the future, we choose to - // adapt this code to QUIC, we need to remember to treat EHOSTUNREACH - // and ENETUNREACH specially when the IP address is IPv6. - - // obtain the corresponding endpoint - ctrl, found := tk.Control.TLSHandshake[epnt] - if !found { - continue // only the probe tested this, so hard to say anything... - } - if ctrl.Failure != nil { - // If the TH failed as well, don't set XBlockingFlags. Performing - // precise error mapping should be a job for the pipeline. - continue - } - logger.Warnf( - "TLS: unexpected failure %s for %s (see #%d)", - *failure, - epnt, - entry.TransactionID, - ) - tk.BlockingFlags |= AnalysisBlockingFlagTLSBlocking - } -}