Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix(qatool): reduce generated test cases churn #1505

Merged
merged 6 commits into from
Feb 12, 2024
Merged
Show file tree
Hide file tree
Changes from 5 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions internal/cmd/qatool/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -135,6 +135,9 @@ func main() {
// build the regexp
selector := regexp.MustCompile(*runFlag)

// make sure we produce more predictable observations in output
webconnectivitylte.SortObservations.Add(1)

// select which test cases to run
for _, tc := range webconnectivityqa.AllTestCases() {
name := "webconnectivitylte/" + tc.Name
Expand Down
24 changes: 20 additions & 4 deletions internal/experiment/webconnectivitylte/cleartextflow.go
Original file line number Diff line number Diff line change
Expand Up @@ -253,8 +253,16 @@ func (t *CleartextFlow) httpTransaction(ctx context.Context, network, address, a

// Implementation note: we want to emit http_transaction_start when we actually start doing
// HTTP things such that it's possible to correctly classify network events
t.TestKeys.AppendNetworkEvents(measurexlite.NewAnnotationArchivalNetworkEvent(
trace.Index(), started, "http_transaction_start", trace.Tags()...,
t.TestKeys.AppendNetworkEvents(measurexlite.NewArchivalNetworkEvent(
trace.Index(),
started,
"http_transaction_start",
network,
address,
0,
nil,
started,
trace.Tags()...,
))
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is such that we have the endpoints in this event (because... why not? It helps with sorting and I don't see why omitting a piece of information we know would ever be wrong).


resp, err := txp.RoundTrip(req)
Expand All @@ -272,8 +280,16 @@ func (t *CleartextFlow) httpTransaction(ctx context.Context, network, address, a
}

finished := trace.TimeSince(trace.ZeroTime())
t.TestKeys.AppendNetworkEvents(measurexlite.NewAnnotationArchivalNetworkEvent(
trace.Index(), finished, "http_transaction_done", trace.Tags()...,
t.TestKeys.AppendNetworkEvents(measurexlite.NewArchivalNetworkEvent(
trace.Index(),
finished,
"http_transaction_done",
network,
address,
0,
nil,
finished,
trace.Tags()...,
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is such that we have the endpoints in this event (because... why not? It helps with sorting and I don't see why omitting a piece of information we know would ever be wrong).

))

ev := measurexlite.NewArchivalHTTPRequestResult(
Expand Down
22 changes: 22 additions & 0 deletions internal/experiment/webconnectivitylte/measurer.go
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ import (

"github.com/ooni/probe-cli/v3/internal/experiment/webconnectivity"
"github.com/ooni/probe-cli/v3/internal/inputparser"
"github.com/ooni/probe-cli/v3/internal/minipipeline"
"github.com/ooni/probe-cli/v3/internal/model"
"golang.org/x/net/publicsuffix"
)
Expand Down Expand Up @@ -144,6 +145,23 @@ func (m *Measurer) Run(ctx context.Context, args *model.ExperimentArgs) error {
}
}

// sort test keys to make output more predictable and avoid churn when generating
// minipipeline test cases; see https://github.com/ooni/probe/issues/2677.
//
// Note that tk.Do53 and tk.DoH are initialized by NewTestKeys so we know they're not nil.
//
// Note that we MUST NOT sort tk.Requests because its order matters for historical
// reasons and we don't wnat to break existing data consumers.
if SortObservations.Load() > 0 {
tk.Queries = minipipeline.SortDNSLookupResults(tk.Queries)
tk.Do53.Queries = minipipeline.SortDNSLookupResults(tk.Do53.Queries)
tk.DoH.Queries = minipipeline.SortDNSLookupResults(tk.DoH.Queries)
tk.DNSDuplicateResponses = minipipeline.SortDNSLookupResults(tk.DNSDuplicateResponses)
tk.NetworkEvents = minipipeline.SortNetworkEvents(tk.NetworkEvents)
tk.TCPConnect = minipipeline.SortTCPConnectResults(tk.TCPConnect)
tk.TLSHandshakes = minipipeline.SortTLSHandshakeResults(tk.TLSHandshakes)
}

// return whether there was a fundamental failure, which would prevent
// the measurement from being submitted to the OONI collector.
return tk.fundamentalFailure
Expand All @@ -159,3 +177,7 @@ func registerExtensions(m *model.Measurement) {
model.ArchivalExtTLSHandshake.AddTo(m)
model.ArchivalExtTunnel.AddTo(m)
}

// SortObservations allows to configure sorting observations when that's needed to
// reduce churn in the generated JSON files (mainly for minipipeline).
var SortObservations = &atomic.Int64{}
1 change: 1 addition & 0 deletions internal/experiment/webconnectivitylte/qa_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ import (
)

func TestQA(t *testing.T) {
SortObservations.Add(1) // make sure we have predictable observations
for _, tc := range webconnectivityqa.AllTestCases() {
t.Run(tc.Name, func(t *testing.T) {
if (tc.Flags & webconnectivityqa.TestCaseFlagNoLTE) != 0 {
Expand Down
24 changes: 20 additions & 4 deletions internal/experiment/webconnectivitylte/secureflow.go
Original file line number Diff line number Diff line change
Expand Up @@ -308,8 +308,16 @@ func (t *SecureFlow) httpTransaction(ctx context.Context, network, address, alpn

// Implementation note: we want to emit http_transaction_start when we actually start doing
// HTTP things such that it's possible to correctly classify network events
t.TestKeys.AppendNetworkEvents(measurexlite.NewAnnotationArchivalNetworkEvent(
trace.Index(), started, "http_transaction_start", trace.Tags()...,
t.TestKeys.AppendNetworkEvents(measurexlite.NewArchivalNetworkEvent(
trace.Index(),
started,
"http_transaction_start",
network,
address,
0,
nil,
started,
trace.Tags()...,
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is such that we have the endpoints in this event (because... why not? It helps with sorting and I don't see why omitting a piece of information we know would ever be wrong).

))

resp, err := txp.RoundTrip(req)
Expand All @@ -327,8 +335,16 @@ func (t *SecureFlow) httpTransaction(ctx context.Context, network, address, alpn
}

finished := trace.TimeSince(trace.ZeroTime())
t.TestKeys.AppendNetworkEvents(measurexlite.NewAnnotationArchivalNetworkEvent(
trace.Index(), finished, "http_transaction_done", trace.Tags()...,
t.TestKeys.AppendNetworkEvents(measurexlite.NewArchivalNetworkEvent(
trace.Index(),
finished,
"http_transaction_done",
network,
address,
0,
nil,
finished,
trace.Tags()...,
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is such that we have the endpoints in this event (because... why not? It helps with sorting and I don't see why omitting a piece of information we know would ever be wrong).

))

ev := measurexlite.NewArchivalHTTPRequestResult(
Expand Down
7 changes: 0 additions & 7 deletions internal/experiment/webconnectivitylte/testkeys.go
Original file line number Diff line number Diff line change
Expand Up @@ -291,13 +291,6 @@ func (tk *TestKeys) WithDNSWhoami(fun func(*DNSWhoamiInfo)) {
tk.mu.Unlock()
}

// SetClientResolver sets the ClientResolver field.
func (tk *TestKeys) SetClientResolver(value string) {
tk.mu.Lock()
tk.ClientResolver = value
tk.mu.Unlock()
}

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We don't need this function because we set the field after all measurements have completed.

// AppendConnPriorityLogEntry appends an entry to ConnPriorityLog.
func (tk *TestKeys) AppendConnPriorityLogEntry(entry *ConnPriorityLogEntry) {
tk.mu.Lock()
Expand Down
26 changes: 22 additions & 4 deletions internal/measurexlite/tls.go
Original file line number Diff line number Diff line change
Expand Up @@ -43,8 +43,17 @@ func (thx *tlsHandshakerTrace) Handshake(
func (tx *Trace) OnTLSHandshakeStart(now time.Time, remoteAddr string, config *tls.Config) {
t := now.Sub(tx.ZeroTime())
select {
case tx.networkEvent <- NewAnnotationArchivalNetworkEvent(
tx.Index(), t, "tls_handshake_start", tx.tags...):
case tx.networkEvent <- NewArchivalNetworkEvent(
tx.Index(),
t,
"tls_handshake_start",
"tcp",
remoteAddr,
0,
nil,
t,
tx.tags...,
):
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is such that we have the endpoints in this event (because... why not? It helps with sorting and I don't see why omitting a piece of information we know would ever be wrong).

default: // buffer is full
}
}
Expand All @@ -70,8 +79,17 @@ func (tx *Trace) OnTLSHandshakeDone(started time.Time, remoteAddr string, config
}

select {
case tx.networkEvent <- NewAnnotationArchivalNetworkEvent(
tx.Index(), t, "tls_handshake_done", tx.tags...):
case tx.networkEvent <- NewArchivalNetworkEvent(
tx.Index(),
t,
"tls_handshake_done",
"tcp",
remoteAddr,
0,
nil,
t,
tx.tags...,
):
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is such that we have the endpoints in this event (because... why not? It helps with sorting and I don't see why omitting a piece of information we know would ever be wrong).

default: // buffer is full
}
}
Expand Down
11 changes: 7 additions & 4 deletions internal/minipipeline/analysis.go
Original file line number Diff line number Diff line change
Expand Up @@ -51,21 +51,23 @@ func NewLinearWebAnalysis(input *WebObservationsContainer) (output []*WebObserva
output = append(output, entry)
}

// sort in descending order
// sort using complex sorting rule
sort.SliceStable(output, func(i, j int) bool {
left, right := output[i], output[j]

// We use -1 as the default value such that observations with undefined
// TagDepth sort at the end of the generated list.
if left.TagDepth.UnwrapOr(-1) > right.TagDepth.UnwrapOr(-1) {
return true
} else if left.TagDepth.UnwrapOr(-1) < right.TagDepth.UnwrapOr(-1) {
}
if left.TagDepth.UnwrapOr(-1) < right.TagDepth.UnwrapOr(-1) {
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We prefer a coding style without else if when there's a previous return.

return false
}

if left.Type > right.Type {
return true
} else if left.Type < right.Type {
}
if left.Type < right.Type {
return false
}

Expand All @@ -74,7 +76,8 @@ func NewLinearWebAnalysis(input *WebObservationsContainer) (output []*WebObserva
const defaultFailureValue = "unknown_failure"
if left.Failure.UnwrapOr(defaultFailureValue) < right.Failure.UnwrapOr(defaultFailureValue) {
return true
} else if left.Failure.UnwrapOr(defaultFailureValue) > right.Failure.UnwrapOr(defaultFailureValue) {
}
if left.Failure.UnwrapOr(defaultFailureValue) > right.Failure.UnwrapOr(defaultFailureValue) {
return false
}

Expand Down
150 changes: 150 additions & 0 deletions internal/minipipeline/sorting.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,150 @@
package minipipeline

import (
"sort"

"github.com/ooni/probe-cli/v3/internal/model"
)

// SortDNSLookupResults sorts and returns a copy of the DNS lookup results to avoid too-much
// timing dependent churn when generating testcases for the minipipeline.
func SortDNSLookupResults(inputs []*model.ArchivalDNSLookupResult) (outputs []*model.ArchivalDNSLookupResult) {
// copy the original slice
outputs = append([]*model.ArchivalDNSLookupResult{}, inputs...)

// sort using complex sorting rule
sort.SliceStable(outputs, func(i, j int) bool {
left, right := outputs[i], outputs[j]

// we sort groups by resolver type to avoid the churn caused by parallel runs
if left.Engine < right.Engine {
return true
}
if left.Engine > right.Engine {
return false
}

// within the same group, we sort by ascending transaction ID
if left.TransactionID < right.TransactionID {
return true
}
if left.TransactionID > right.TransactionID {
return false
}

// we want entries that are successful to appear first
fsget := func(value *string) string {
if value == nil {
return ""
}
return *value
}
return fsget(left.Failure) < fsget(right.Failure)
})

return
}

// SortNetworkEvents is like [SortDNSLookupResults] but for network events.
func SortNetworkEvents(inputs []*model.ArchivalNetworkEvent) (outputs []*model.ArchivalNetworkEvent) {
// copy the original slice
outputs = append([]*model.ArchivalNetworkEvent{}, inputs...)

// sort using complex sorting rule
sort.SliceStable(outputs, func(i, j int) bool {
left, right := outputs[i], outputs[j]

// we sort by endpoint address to significantly reduce the churn
if left.Address < right.Address {
return true
}
if left.Address > right.Address {
return false
}

// if the address is the same, then we group by transaction
if left.TransactionID < right.TransactionID {
return true
}
if left.TransactionID > right.TransactionID {
return false
}

// with same transaction, we sort by increasing time
return left.T < right.T
})

return
}

// SortTCPConnectResults is like [SortDNSLookupResults] but for TCP connect results.
func SortTCPConnectResults(
inputs []*model.ArchivalTCPConnectResult) (outputs []*model.ArchivalTCPConnectResult) {
// copy the original slice
outputs = append([]*model.ArchivalTCPConnectResult{}, inputs...)

// sort using complex sorting rule
sort.SliceStable(outputs, func(i, j int) bool {
left, right := outputs[i], outputs[j]

// we sort by endpoint address to significantly reduce the churn
if left.IP < right.IP {
return true
}
if left.IP > right.IP {
return false
}
if left.Port < right.Port {
return true
}
if left.Port > right.Port {
return false
}

// if the address is the same, then we group by transaction
if left.TransactionID < right.TransactionID {
return true
}
if left.TransactionID > right.TransactionID {
return false
}

// with same transaction, we sort by increasing time
return left.T < right.T
})

return
}

// SortTLSHandshakeResults is like [SortDNSLookupResults] but for TLS handshake results.
func SortTLSHandshakeResults(
inputs []*model.ArchivalTLSOrQUICHandshakeResult) (outputs []*model.ArchivalTLSOrQUICHandshakeResult) {
// copy the original slice
outputs = append([]*model.ArchivalTLSOrQUICHandshakeResult{}, inputs...)

// sort using complex sorting rule
sort.SliceStable(outputs, func(i, j int) bool {
left, right := outputs[i], outputs[j]

// we sort by endpoint address to significantly reduce the churn
if left.Address < right.Address {
bassosimone marked this conversation as resolved.
Show resolved Hide resolved
return true
}
if left.Address > right.Address {
return false
}

// if the address is the same, then we group by transaction
if left.TransactionID < right.TransactionID {
return true
}
if left.TransactionID > right.TransactionID {
return false
}

// with same transaction, we sort by increasing time
return left.T < right.T
})

return
}
Loading
Loading