Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat(model): introduce ArchivalBinaryData #1321

Merged
merged 2 commits into from
Sep 28, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
81 changes: 74 additions & 7 deletions internal/model/archival.go
Original file line number Diff line number Diff line change
@@ -1,12 +1,5 @@
package model

import (
"encoding/base64"
"encoding/json"
"errors"
"unicode/utf8"
)

//
// Archival format for individual measurement results
// such as TCP connect, TLS handshake, DNS lookup.
Expand All @@ -17,6 +10,15 @@ import (
// See https://github.com/ooni/spec/tree/master/data-formats.
//

import (
"bytes"
"encoding/base64"
"encoding/json"
"errors"
"fmt"
"unicode/utf8"
)

//
// Data format extension specification
//
Expand Down Expand Up @@ -59,6 +61,66 @@ var (
// Base types
//

// ArchivalBinaryData is a wrapper for bytes that serializes the enclosed
// data using the specific ooni/spec data format for binary data.
//
// See https://github.com/ooni/spec/blob/master/data-formats/df-001-httpt.md#maybebinarydata.
type ArchivalBinaryData struct {
Value []byte
}

// archivalBinaryDataRepr is the wire representation of binary data according to
// https://github.com/ooni/spec/blob/master/data-formats/df-001-httpt.md#maybebinarydata.
type archivalBinaryDataRepr struct {
Data []byte `json:"data"`
Format string `json:"format"`
}

var (
_ json.Marshaler = ArchivalBinaryData{}
_ json.Unmarshaler = &ArchivalBinaryData{}
)

// MarshalJSON implements json.Marshaler.
func (value ArchivalBinaryData) MarshalJSON() ([]byte, error) {
// special case: we need to marshal the empty data as the null value
if len(value.Value) <= 0 {
return json.Marshal(nil)
}

// construct and serialize the OONI representation
repr := &archivalBinaryDataRepr{Format: "base64", Data: value.Value}
return json.Marshal(repr)
}

// ErrInvalidBinaryDataFormat is the format returned when marshaling and
// unmarshaling binary data and the value of "format" is unknown.
var ErrInvalidBinaryDataFormat = errors.New("model: invalid binary data format")

// UnmarshalJSON implements json.Unmarshaler.
func (value *ArchivalBinaryData) UnmarshalJSON(raw []byte) error {
// handle the case where input is a literal null
if bytes.Equal(raw, []byte("null")) {
value.Value = nil
return nil
}

// attempt to unmarshal into the archival representation
var repr archivalBinaryDataRepr
if err := json.Unmarshal(raw, &repr); err != nil {
return err
}

// make sure the data format is "base64"
if repr.Format != "base64" {
return fmt.Errorf("%w: '%s'", ErrInvalidBinaryDataFormat, repr.Format)
}

// we're good because Go uses base64 for []byte automatically
value.Value = repr.Data
return nil
}

// ArchivalMaybeBinaryData is a possibly binary string. We use this helper class
// to define a custom JSON encoder that allows us to choose the proper
// representation depending on whether the Value field is valid UTF-8 or not.
Expand All @@ -72,9 +134,12 @@ type ArchivalMaybeBinaryData struct {
// says that UTF-8 content is represented as string and non-UTF-8 content is
// instead represented using `{"format":"base64","data":"..."}`.
func (hb ArchivalMaybeBinaryData) MarshalJSON() ([]byte, error) {
// if we can serialize as UTF-8 string, do that
if utf8.ValidString(hb.Value) {
return json.Marshal(hb.Value)
}

// otherwise fallback to the ooni/spec representation for binary data
er := make(map[string]string)
er["format"] = "base64"
er["data"] = base64.StdEncoding.EncodeToString([]byte(hb.Value))
Expand Down Expand Up @@ -242,6 +307,8 @@ type ArchivalHTTPResponse struct {
// ArchivalHTTPBody is an HTTP body. As an implementation note, this type must
// be an alias for the MaybeBinaryValue type, otherwise the specific serialisation
// mechanism implemented by MaybeBinaryValue is not working.
//
// QUIRK: it's quite fragile we must use an alias here--more robust solution?
type ArchivalHTTPBody = ArchivalMaybeBinaryData

// ArchivalHTTPHeader is a single HTTP header.
Expand Down
256 changes: 256 additions & 0 deletions internal/model/archival_test.go
Original file line number Diff line number Diff line change
@@ -1,9 +1,12 @@
package model_test

import (
"encoding/json"
"errors"
"testing"

"github.com/google/go-cmp/cmp"
"github.com/google/go-cmp/cmp/cmpopts"
"github.com/ooni/probe-cli/v3/internal/model"
"github.com/ooni/probe-cli/v3/internal/testingx"
)
Expand Down Expand Up @@ -37,6 +40,259 @@ var archivalBinaryInput = []uint8{
// we use this value below to test we can handle binary data
var archivalEncodedBinaryInput = []byte(`{"data":"V+V5+6a7DbzOvaeguqR4eBJZ7mg5pAeYxT68Vcv+NDx+G1qzIp3BLW7KW/EQJUceROItYAjqsArMBUig9Xg48Ns/nZ8lb4kAlpOvQ6xNyawT2yK+en3ZJKJSadiJwdFXqgQrotixGfbVETm7gM+G+V+djKv1xXQkOqLUQE7XEB8=","format":"base64"}`)

func TestArchivalBinaryData(t *testing.T) {
// This test verifies that we correctly serialize binary data to JSON by
// producing null | {"format":"base64","data":"<base64>"}
t.Run("MarshalJSON", func(t *testing.T) {
// testcase is a test case defined by this function
type testcase struct {
// name is the name of the test case
name string

// input is the binary input
input model.ArchivalBinaryData

// expectErr is the error we expect to see or nil
expectErr error

// expectData is the data we expect to see
expectData []byte
}

cases := []testcase{{
name: "with nil .Value",
input: model.ArchivalBinaryData{Value: nil},
expectErr: nil,
expectData: []byte("null"),
}, {
name: "with zero length .Value",
input: model.ArchivalBinaryData{Value: []byte{}},
expectErr: nil,
expectData: []byte("null"),
}, {
name: "with .Value being a simple binary string",
input: model.ArchivalBinaryData{Value: []byte("Elliot")},
expectErr: nil,
expectData: []byte(`{"data":"RWxsaW90","format":"base64"}`),
}, {
name: "with .Value being a long binary string",
input: model.ArchivalBinaryData{Value: archivalBinaryInput},
expectErr: nil,
expectData: archivalEncodedBinaryInput,
}}

for _, tc := range cases {
t.Run(tc.name, func(t *testing.T) {
// serialize to JSON
output, err := json.Marshal(tc.input)

t.Log("got this error", err)
t.Log("got this binary data", output)
t.Logf("converted to string: %s", string(output))

// handle errors
switch {
case err == nil && tc.expectErr != nil:
t.Fatal("expected", tc.expectErr, "got", err)

case err != nil && tc.expectErr == nil:
t.Fatal("expected", tc.expectErr, "got", err)

case err != nil && tc.expectErr != nil:
if err.Error() != tc.expectErr.Error() {
t.Fatal("expected", tc.expectErr, "got", err)
}

case err == nil && tc.expectErr == nil:
// all good--fallthrough
}

if diff := cmp.Diff(tc.expectData, output); diff != "" {
t.Fatal(diff)
}
})
}
})

// This test verifies that we correctly parse binary data to JSON by
// reading from null | {"format":"base64","data":"<base64>"}
t.Run("UnmarshalJSON", func(t *testing.T) {
// testcase is a test case defined by this function
type testcase struct {
// name is the name of the test case
name string

// input is the binary input
input []byte

// expectErr is the error we expect to see or nil
expectErr error

// expectData is the data we expect to see
expectData model.ArchivalBinaryData
}

cases := []testcase{{
name: "with nil input array",
input: nil,
expectErr: errors.New("unexpected end of JSON input"),
expectData: model.ArchivalBinaryData{Value: nil},
}, {
name: "with zero-length input array",
input: []byte{},
expectErr: errors.New("unexpected end of JSON input"),
expectData: model.ArchivalBinaryData{Value: nil},
}, {
name: "with binary input that is not a complete JSON",
input: []byte("{"),
expectErr: errors.New("unexpected end of JSON input"),
expectData: model.ArchivalBinaryData{Value: nil},
}, {
name: "with ~random binary data as input",
input: archivalBinaryInput,
expectErr: errors.New("invalid character 'W' looking for beginning of value"),
expectData: model.ArchivalBinaryData{},
}, {
name: "with valid JSON of the wrong type (array)",
input: []byte("[]"),
expectErr: errors.New("json: cannot unmarshal array into Go value of type model.archivalBinaryDataRepr"),
expectData: model.ArchivalBinaryData{},
}, {
name: "with valid JSON of the wrong type (number)",
input: []byte("1.17"),
expectErr: errors.New("json: cannot unmarshal number into Go value of type model.archivalBinaryDataRepr"),
expectData: model.ArchivalBinaryData{},
}, {
name: "with input being the liternal null",
input: []byte(`null`),
expectErr: nil,
expectData: model.ArchivalBinaryData{Value: nil},
}, {
name: "with empty JSON object",
input: []byte("{}"),
expectErr: errors.New("model: invalid binary data format: ''"),
expectData: model.ArchivalBinaryData{},
}, {
name: "with correct data model but invalid format",
input: []byte(`{"data":"","format":"antani"}`),
expectErr: errors.New("model: invalid binary data format: 'antani'"),
expectData: model.ArchivalBinaryData{},
}, {
name: "with correct data model and format but invalid base64 string",
input: []byte(`{"data":"x","format":"base64"}`),
expectErr: errors.New("illegal base64 data at input byte 0"),
expectData: model.ArchivalBinaryData{},
}, {
name: "with correct data model and format but empty base64 string",
input: []byte(`{"data":"","format":"base64"}`),
expectErr: nil,
expectData: model.ArchivalBinaryData{Value: []byte{}},
}, {
name: "with the encoding of a simple binary string",
input: []byte(`{"data":"RWxsaW90","format":"base64"}`),
expectErr: nil,
expectData: model.ArchivalBinaryData{Value: []byte("Elliot")},
}, {
name: "with the encoding of a complex binary string",
input: archivalEncodedBinaryInput,
expectErr: nil,
expectData: model.ArchivalBinaryData{Value: archivalBinaryInput},
}}

for _, tc := range cases {
t.Run(tc.name, func(t *testing.T) {
// unmarshal the raw input into an ArchivalBinaryData type
var abd model.ArchivalBinaryData
err := json.Unmarshal(tc.input, &abd)

t.Log("got this error", err)
t.Log("got this .Value filed", abd.Value)
t.Logf("converted to string: %s", string(abd.Value))

// handle errors
switch {
case err == nil && tc.expectErr != nil:
t.Fatal("expected", tc.expectErr, "got", err)

case err != nil && tc.expectErr == nil:
t.Fatal("expected", tc.expectErr, "got", err)

case err != nil && tc.expectErr != nil:
if err.Error() != tc.expectErr.Error() {
t.Fatal("expected", tc.expectErr, "got", err)
}

case err == nil && tc.expectErr == nil:
// all good--fallthrough
}

if diff := cmp.Diff(tc.expectData, abd); diff != "" {
t.Fatal(diff)
}
})
}
})

// This test verifies that we correctly round trip through JSON
t.Run("MarshalJSON then UnmarshalJSON", func(t *testing.T) {
// testcase is a test case defined by this function
type testcase struct {
// name is the name of the test case
name string

// input is the binary input
input model.ArchivalBinaryData
}

cases := []testcase{{
name: "with nil .Value",
input: model.ArchivalBinaryData{Value: nil},
}, {
name: "with zero length .Value",
input: model.ArchivalBinaryData{Value: []byte{}},
}, {
name: "with .Value being a simple binary string",
input: model.ArchivalBinaryData{Value: []byte("Elliot")},
}, {
name: "with .Value being a long binary string",
input: model.ArchivalBinaryData{Value: archivalBinaryInput},
}}

for _, tc := range cases {
t.Run(tc.name, func(t *testing.T) {
// serialize to JSON
output, err := json.Marshal(tc.input)

t.Log("got this error", err)
t.Log("got this binary data", output)
t.Logf("converted to string: %s", string(output))

if err != nil {
t.Fatal(err)
}

// parse from JSON
var abc model.ArchivalBinaryData
if err := json.Unmarshal(output, &abc); err != nil {
t.Fatal(err)
}

// make sure we round tripped
//
// Note: the round trip is not perfect because the zero length value,
// which originally is []byte{}, unmarshals to a nil value.
//
// Because the two are ~equivalent in Go most intents and purposes
// and the wire representation does not change, this is OK(TM)
diffOptions := []cmp.Option{cmpopts.EquateEmpty()}
if diff := cmp.Diff(tc.input, abc, diffOptions...); diff != "" {
t.Fatal(diff)
}
})
}
})
}

func TestMaybeBinaryValue(t *testing.T) {
t.Run("MarshalJSON", func(t *testing.T) {
tests := []struct {
Expand Down