Skip to content

Commit

Permalink
use a pool of buffers to alleviate memory allocs in csv; related to #553
Browse files Browse the repository at this point in the history


When iterating over multiple files, csv detector allocated a new buffer
for each file. This change adds a pool of buffers that can be reused
between detections. The same pool is shared between csv and tsv
detectors.
  • Loading branch information
gabriel-vasile committed Aug 12, 2024
1 parent 5fca7a6 commit 5f825db
Showing 1 changed file with 21 additions and 1 deletion.
22 changes: 21 additions & 1 deletion internal/magic/text_csv.go
Original file line number Diff line number Diff line change
@@ -1,12 +1,28 @@
package magic

import (
"bufio"
"bytes"
"encoding/csv"
"errors"
"io"
"sync"
)

// A bufio.Reader pool to alleviate problems with memory allocations.
var readerPool = sync.Pool{
New: func() any {
// Initiate with empty source reader.
return bufio.NewReader(nil)
},
}

func newReader(r io.Reader) *bufio.Reader {
br := readerPool.Get().(*bufio.Reader)
br.Reset(r)
return br
}

// Csv matches a comma-separated values file.
func Csv(raw []byte, limit uint32) bool {
return sv(raw, ',', limit)
Expand All @@ -18,7 +34,11 @@ func Tsv(raw []byte, limit uint32) bool {
}

func sv(in []byte, comma rune, limit uint32) bool {
r := csv.NewReader(bytes.NewReader(dropLastLine(in, limit)))
in = dropLastLine(in, limit)

br := newReader(bytes.NewReader(in))
defer readerPool.Put(br)
r := csv.NewReader(br)
r.Comma = comma
r.ReuseRecord = true
r.LazyQuotes = true
Expand Down

0 comments on commit 5f825db

Please sign in to comment.