diff --git a/bindings/go/scip/parse.go b/bindings/go/scip/parse.go index 4e7310a4..048ecac1 100644 --- a/bindings/go/scip/parse.go +++ b/bindings/go/scip/parse.go @@ -70,7 +70,7 @@ func (pi *IndexVisitor) ParseStreaming(r io.Reader) error { } // Keep going when len == 0 instead of short-circuiting to preserve empty sub-messages if dataLen > 0 { - numRead, err := r.Read(dataBuf) + numRead, err := io.ReadAtLeast(r, dataBuf, dataLen) if err != nil { return errors.Wrapf(err, "failed to read data for %s", indexFieldName(fieldNumber)) } diff --git a/bindings/go/scip/parse_test.go b/bindings/go/scip/parse_test.go index a3a30419..a89cc7fc 100644 --- a/bindings/go/scip/parse_test.go +++ b/bindings/go/scip/parse_test.go @@ -2,6 +2,8 @@ package scip import ( "bytes" + "compress/gzip" + "os" "regexp" "testing" @@ -46,3 +48,31 @@ func TestFuzz(t *testing.T) { } } } + +func TestLargeDocuments(t *testing.T) { + // Copied from the Sourcegraph monorepo, which triggered a bug + // where Reader.read() didn't actually fill a buffer completely, + // due to the presence of large documents. + gzipped, err := os.Open("./testdata/index1.scip.gz") + if err != nil { + t.Fatalf("unexpected error reading test file: %s", err) + } + reader, err := gzip.NewReader(gzipped) + if err != nil { + t.Fatalf("unexpected error unzipping test file: %s", err) + } + + parsedIndex := Index{} + + indexVisitor := IndexVisitor{func(metadata *Metadata) { + parsedIndex.Metadata = metadata + }, func(document *Document) { + parsedIndex.Documents = append(parsedIndex.Documents, document) + }, func(extSym *SymbolInformation) { + parsedIndex.ExternalSymbols = append(parsedIndex.ExternalSymbols, extSym) + }} + + if err := indexVisitor.ParseStreaming(reader); err != nil { + t.Fatalf("got error parsing index %v", err) + } +} diff --git a/bindings/go/scip/testdata/index1.scip.gz b/bindings/go/scip/testdata/index1.scip.gz new file mode 100644 index 00000000..fdc8979a Binary files /dev/null and b/bindings/go/scip/testdata/index1.scip.gz differ