From 458b62d9ea8613384d204a88cfe48095d8ae92e0 Mon Sep 17 00:00:00 2001 From: Gabriel Vasile Date: Sat, 12 Oct 2024 20:00:33 -0700 Subject: [PATCH] action for benchmarking detectors (#590) * add action for benchmarking each detector --- .github/workflows/benchmark.go | 40 --------------------------------- .github/workflows/benchmark.yml | 32 ++++++++++++++++++++++++++ .github/workflows/go.yml | 5 +---- mimetype_test.go | 33 +-------------------------- 4 files changed, 34 insertions(+), 76 deletions(-) delete mode 100644 .github/workflows/benchmark.go create mode 100644 .github/workflows/benchmark.yml diff --git a/.github/workflows/benchmark.go b/.github/workflows/benchmark.go deleted file mode 100644 index 5953cdf..0000000 --- a/.github/workflows/benchmark.go +++ /dev/null @@ -1,40 +0,0 @@ -name: Run benchmarks -on: - pull_request: - branches: [master] - -permissions: - contents: read - -jobs: - benchmark: - runs-on: ubuntu-latest - steps: - - name: Install Go - uses: actions/setup-go@v5.0.2 - with: - go-version-file: 'go.mod' - - run: go install golang.org/x/perf/cmd/benchstat@latest - // Base for comparison is master branch. - - name: Checkout code - uses: actions/checkout@v4.1.7 - with: - ref: master - - run: go test -run=none -bench=. --count=7 > /tmp/prev & - - - name: Checkout code - uses: actions/checkout@v4.1.7 - - run: go test -run=none -bench=. --count=7 > /tmp/curr & - - // Wait for both benchmarks to complete before comparing. - - run: wait - - run: RESULT="$(benchstat /tmp/prev /tmp/curr)" - - uses: actions/github-script@v7 - with: - script: | - github.rest.issues.createComment({ - issue_number: context.issue.number, - owner: context.repo.owner, - repo: context.repo.repo, - body: $RESULT - }) diff --git a/.github/workflows/benchmark.yml b/.github/workflows/benchmark.yml new file mode 100644 index 0000000..f3f1b7a --- /dev/null +++ b/.github/workflows/benchmark.yml @@ -0,0 +1,32 @@ +name: Run benchmarks +on: + pull_request: + branches: [master] + +permissions: + contents: read + +jobs: + benchmark: + runs-on: ubuntu-latest + steps: + # Base for comparison is master branch. + - name: Checkout code + uses: actions/checkout@v4.1.7 + with: + ref: master + - name: Install Go + uses: actions/setup-go@v5.0.2 + with: + go-version-file: 'go.mod' + + # 30 runs with 100ms benchtime seems to result in acceptable p-values + # When I tried with count=10, it would be unreliable because of the actions + # runner is in a shared environment and CPU and mem would be affected by others. (or so I think) + - run: go test -run=none -bench=. -count=30 -benchtime=100ms -timeout=20m > /tmp/prev + - name: Checkout code + uses: actions/checkout@v4.1.7 + - run: go test -run=none -bench=. -count=30 -benchtime=100ms -timeout=20m > /tmp/curr + + - run: go install golang.org/x/perf/cmd/benchstat@latest + - run: benchstat /tmp/prev /tmp/curr diff --git a/.github/workflows/go.yml b/.github/workflows/go.yml index 36efe49..9db3515 100644 --- a/.github/workflows/go.yml +++ b/.github/workflows/go.yml @@ -24,10 +24,7 @@ jobs: version: "v1.58" test: - strategy: - matrix: - platform: [ubuntu-latest, windows-latest] - runs-on: ${{ matrix.platform }} + runs-on: ubuntu-latest steps: - name: Checkout code uses: actions/checkout@v4.2.1 diff --git a/mimetype_test.go b/mimetype_test.go index ad686e5..7e60297 100644 --- a/mimetype_test.go +++ b/mimetype_test.go @@ -206,7 +206,7 @@ var testcases = []testcase{ {"utf16lebom.txt", text, "text/plain; charset=utf-16le", false}, {"utf32bebom.txt", text, "text/plain; charset=utf-32be", false}, {"utf32lebom.txt", text, "text/plain; charset=utf-32le", false}, - {"utf8.txt", text, "text/plain; charset=utf-8", false}, + {"utf8.txt", text, "text/plain; charset=utf-8", true}, {"utf8ctrlchars", root, "application/octet-stream", false}, {"vcf.vcf", vCard, "text/vcard", true}, {"vcf.dos.vcf", vCard, "text/vcard", false}, @@ -505,37 +505,6 @@ func BenchmarkAll(b *testing.B) { } } -func BenchmarkCommon(b *testing.B) { - commonFiles := []string{ - "xlsx.xlsx", - "pptx.pptx", - "docx.docx", - "tar.tar", - "zip.zip", - "pdf.pdf", - "jpg.jpg", - "png.png", - "gif.gif", - "xls.xls", - "webm.webm", - "csv.csv", - "mp4.mp4", - } - for _, file := range commonFiles { - f, err := os.ReadFile(filepath.Join(testDataDir, file)) - if err != nil { - b.Fatal(err) - } - b.Run(filepath.Ext(file), func(b *testing.B) { - b.ReportAllocs() - b.ResetTimer() - for n := 0; n < b.N; n++ { - Detect(f) - } - }) - } -} - // Check there are no panics for nil inputs. func TestIndexOutOfRangePanic(t *testing.T) { for _, n := range root.flatten() {