Skip to content

Commit

Permalink
Add test that validates fingerprint.StartsWith (#318)
Browse files Browse the repository at this point in the history
  • Loading branch information
Joseph Sirianni authored May 26, 2021
1 parent 80f21a1 commit 4aa956d
Show file tree
Hide file tree
Showing 2 changed files with 69 additions and 0 deletions.
6 changes: 6 additions & 0 deletions operator/builtin/input/file/fingerprint.go
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,8 @@ const defaultFingerprintSize = 1000 // bytes
const minFingerprintSize = 16 // bytes

// Fingerprint is used to identify a file
// A file's fingerprint is the first N bytes of the file,
// where N is the fingerprintSize on the file_input operator
type Fingerprint struct {
FirstBytes []byte
}
Expand Down Expand Up @@ -42,6 +44,10 @@ func (f Fingerprint) Copy() *Fingerprint {

// StartsWith returns true if the fingerprints are the same
// or if the new fingerprint starts with the old one
// This is important functionality for tracking new files,
// since their initial size is typically less than that of
// a fingerprint. As the file grows, its fingerprint is updated
// until it reaches a maximum size, as configured on the operator
func (f Fingerprint) StartsWith(old *Fingerprint) bool {
l0 := len(old.FirstBytes)
if l0 == 0 {
Expand Down
63 changes: 63 additions & 0 deletions operator/builtin/input/file/fingerprint_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@ package file

import (
"fmt"
"io/ioutil"
"math/rand"
"strings"
"testing"

Expand Down Expand Up @@ -200,4 +202,65 @@ func TestFingerprintStartsWith(t *testing.T) {
}
}

// Generates a file filled with many random bytes, then
// writes the same bytes to a second file, one byte at a time.
// Validates, after each byte is written, that fingerprint
// matching would successfully associate the two files.
// The static file can be thought of as the present state of
// the file, while each iteration of the growing file represents
// a possible state of the same file at a previous time.
func TestFingerprintStartsWith_FromFile(t *testing.T) {
r := rand.New(rand.NewSource(112358))

operator, _, tempDir := newTestFileOperator(t, nil, nil)
operator.fingerprintSize *= 10

fileLength := 12 * operator.fingerprintSize

// Make a []byte we can write one at a time
content := make([]byte, fileLength)
r.Read(content) // Fill slice with random bytes

// Overwrite some bytes with \n to ensure
// we are testing a file with multiple lines
newlineMask := make([]byte, fileLength)
r.Read(newlineMask) // Fill slice with random bytes
for i, b := range newlineMask {
if b == 0 && i != 0 { // 1/256 chance, but never first byte
content[i] = byte('\n')
}
}

fullFile, err := ioutil.TempFile(tempDir, "")
require.NoError(t, err)
defer fullFile.Close()

_, err = fullFile.Write(content)
require.NoError(t, err)

fff, err := operator.NewFingerprint(fullFile)
require.NoError(t, err)

partialFile, err := ioutil.TempFile(tempDir, "")
require.NoError(t, err)
defer partialFile.Close()

// Write the first byte before comparing, since empty files will never match
_, err = partialFile.Write(content[:1])
require.NoError(t, err)
content = content[1:]

// Write one byte at a time and validate that
// full fingerprint still starts with updated partial
for i := range content {
_, err = partialFile.Write(content[i:i])
require.NoError(t, err)

pff, err := operator.NewFingerprint(partialFile)
require.NoError(t, err)

require.True(t, fff.StartsWith(pff))
}
}

// TODO TestConfig (config_test.go) - sets defaults, errors appropriately, etc

0 comments on commit 4aa956d

Please sign in to comment.