Skip to content

Commit

Permalink
[bug]- Invalid Seek for Non-Seekable Readers (#3095)
Browse files Browse the repository at this point in the history
* inital work

* fix and add tests

* uncomment

* fix seek end

* use buffer pool

* revert timeout

* make linter happy

* More linting :()
  • Loading branch information
ahrav authored Jul 25, 2024
1 parent 4a8b213 commit ebfbd21
Show file tree
Hide file tree
Showing 9 changed files with 337 additions and 127 deletions.
1 change: 1 addition & 0 deletions pkg/handlers/ar_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ func TestHandleARFile(t *testing.T) {

rdr, err := newFileReader(file)
assert.NoError(t, err)
defer rdr.Close()

handler := newARHandler()
archiveChan, err := handler.HandleFile(context.AddLogger(ctx), rdr)
Expand Down
2 changes: 2 additions & 0 deletions pkg/handlers/archive.go
Original file line number Diff line number Diff line change
Expand Up @@ -111,6 +111,7 @@ func (h *archiveHandler) openArchive(ctx logContext.Context, depth int, reader f
}
return fmt.Errorf("error creating custom reader: %w", err)
}
defer rdr.Close()

return h.openArchive(ctx, depth+1, rdr, archiveChan)
case archiver.Extractor:
Expand Down Expand Up @@ -194,6 +195,7 @@ func (h *archiveHandler) extractorHandler(archiveChan chan []byte) func(context.
}
return fmt.Errorf("error creating custom reader: %w", err)
}
defer rdr.Close()

h.metrics.incFilesProcessed()
h.metrics.observeFileSize(fileSize)
Expand Down
3 changes: 3 additions & 0 deletions pkg/handlers/archive_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -89,6 +89,8 @@ func TestArchiveHandler(t *testing.T) {
if err != nil {
t.Errorf("error creating reusable reader: %s", err)
}
defer newReader.Close()

archiveChan, err := handler.HandleFile(logContext.Background(), newReader)
if testCase.expectErr {
assert.NoError(t, err)
Expand Down Expand Up @@ -119,6 +121,7 @@ func TestOpenInvalidArchive(t *testing.T) {

rdr, err := newFileReader(io.NopCloser(reader))
assert.NoError(t, err)
defer rdr.Close()

archiveChan := make(chan []byte)

Expand Down
1 change: 1 addition & 0 deletions pkg/handlers/default_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ func TestHandleNonArchiveFile(t *testing.T) {

rdr, err := newFileReader(file)
assert.NoError(t, err)
defer rdr.Close()

handler := newDefaultHandler(defaultHandlerType)
archiveChan, err := handler.HandleFile(context.AddLogger(ctx), rdr)
Expand Down
7 changes: 2 additions & 5 deletions pkg/handlers/handlers.go
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,7 @@ func newMimeTypeReaderFromFileReader(r fileReader) mimeTypeReader {

// newMimeTypeReader creates a new mimeTypeReader from an io.Reader.
// It uses a bufio.Reader to perform MIME type detection on the input reader
// without consuming it, by peeking into the first 512 bytes of the input.
// without consuming it, by peeking into the first 3072 bytes of the input.
// This encapsulates both the original reader and the detected MIME type information.
// This function is particularly useful for specialized archive handlers
// that need to pass extracted content to the default handler without modifying the original reader.
Expand All @@ -84,10 +84,6 @@ func newFileReader(r io.Reader) (fileReader, error) {

fReader.BufferedReadSeeker = iobuf.NewBufferedReaderSeeker(r)

// Disable buffering after initial reads.
// This optimization ensures we don't continue writing to the buffer after the initial reads.
defer fReader.DisableBuffering()

mime, err := mimetype.DetectReader(fReader)
if err != nil {
return fReader, fmt.Errorf("unable to detect MIME type: %w", err)
Expand Down Expand Up @@ -281,6 +277,7 @@ func HandleFile(
}
return fmt.Errorf("error creating custom reader: %w", err)
}
defer rdr.Close()

mimeT := mimeType(rdr.mime.String())
config := newFileHandlingConfig(options...)
Expand Down
88 changes: 84 additions & 4 deletions pkg/handlers/handlers_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -27,21 +27,101 @@ func TestHandleFileCancelledContext(t *testing.T) {
}

func TestHandleFile(t *testing.T) {
reporter := sources.ChanReporter{Ch: make(chan *sources.Chunk, 2)}
reporter := sources.ChanReporter{Ch: make(chan *sources.Chunk, 513)}

// Only one chunk is sent on the channel.
// TODO: Embed a zip without making an HTTP request.
resp, err := http.Get("https://raw.githubusercontent.com/bill-rich/bad-secrets/master/aws-canary-creds.zip")
assert.NoError(t, err)
if resp != nil && resp.Body != nil {
defer resp.Body.Close()
}
defer func() {
if resp != nil && resp.Body != nil {
resp.Body.Close()
}
}()

assert.Equal(t, 0, len(reporter.Ch))
assert.NoError(t, HandleFile(context.Background(), resp.Body, &sources.Chunk{}, reporter))
assert.Equal(t, 1, len(reporter.Ch))
}

func TestHandleHTTPJson(t *testing.T) {
resp, err := http.Get("https://raw.githubusercontent.com/ahrav/nothing-to-see-here/main/sm_random_data.json")
assert.NoError(t, err)
defer func() {
if resp != nil && resp.Body != nil {
resp.Body.Close()
}
}()

chunkCh := make(chan *sources.Chunk, 1)
go func() {
defer close(chunkCh)
err := HandleFile(logContext.Background(), resp.Body, &sources.Chunk{}, sources.ChanReporter{Ch: chunkCh})
assert.NoError(t, err)
}()

wantCount := 513
count := 0
for range chunkCh {
count++
}
assert.Equal(t, wantCount, count)
}

func TestHandleHTTPJsonZip(t *testing.T) {
resp, err := http.Get("https://raw.githubusercontent.com/ahrav/nothing-to-see-here/main/sm.zip")
assert.NoError(t, err)
defer func() {
if resp != nil && resp.Body != nil {
resp.Body.Close()
}
}()

chunkCh := make(chan *sources.Chunk, 1)
go func() {
defer close(chunkCh)
err := HandleFile(logContext.Background(), resp.Body, &sources.Chunk{}, sources.ChanReporter{Ch: chunkCh})
assert.NoError(t, err)
}()

wantCount := 513
count := 0
for range chunkCh {
count++
}
assert.Equal(t, wantCount, count)
}

func BenchmarkHandleHTTPJsonZip(b *testing.B) {
b.ReportAllocs()
for i := 0; i < b.N; i++ {
func() {
resp, err := http.Get("https://raw.githubusercontent.com/ahrav/nothing-to-see-here/main/sm.zip")
assert.NoError(b, err)

defer func() {
if resp != nil && resp.Body != nil {
resp.Body.Close()
}
}()

chunkCh := make(chan *sources.Chunk, 1)

b.StartTimer()
go func() {
defer close(chunkCh)
err := HandleFile(logContext.Background(), resp.Body, &sources.Chunk{}, sources.ChanReporter{Ch: chunkCh})
assert.NoError(b, err)
}()

for range chunkCh {
}

b.StopTimer()
}()
}
}

func BenchmarkHandleFile(b *testing.B) {
file, err := os.Open("testdata/test.tgz")
assert.Nil(b, err)
Expand Down
1 change: 1 addition & 0 deletions pkg/handlers/rpm_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ func TestHandleRPMFile(t *testing.T) {

rdr, err := newFileReader(file)
assert.NoError(t, err)
defer rdr.Close()

handler := newRPMHandler()
archiveChan, err := handler.HandleFile(context.AddLogger(ctx), rdr)
Expand Down
Loading

0 comments on commit ebfbd21

Please sign in to comment.