From 458b62d9ea8613384d204a88cfe48095d8ae92e0 Mon Sep 17 00:00:00 2001
From: Gabriel Vasile <gabriel.vasile@email.com>
Date: Sat, 12 Oct 2024 20:00:33 -0700
Subject: [PATCH] action for benchmarking detectors (#590)

* add action for benchmarking each detector
---
 .github/workflows/benchmark.go  | 40 ---------------------------------
 .github/workflows/benchmark.yml | 32 ++++++++++++++++++++++++++
 .github/workflows/go.yml        |  5 +----
 mimetype_test.go                | 33 +--------------------------
 4 files changed, 34 insertions(+), 76 deletions(-)
 delete mode 100644 .github/workflows/benchmark.go
 create mode 100644 .github/workflows/benchmark.yml

diff --git a/.github/workflows/benchmark.go b/.github/workflows/benchmark.go
deleted file mode 100644
index 5953cdf..0000000
--- a/.github/workflows/benchmark.go
+++ /dev/null
@@ -1,40 +0,0 @@
-name: Run benchmarks
-on:
-  pull_request:
-    branches: [master]
-
-permissions:
-  contents: read
-
-jobs:
-  benchmark:
-    runs-on: ubuntu-latest
-    steps:
-    - name: Install Go
-      uses: actions/setup-go@v5.0.2
-      with:
-        go-version-file: 'go.mod'
-    - run: go install golang.org/x/perf/cmd/benchstat@latest
-    // Base for comparison is master branch.
-    - name: Checkout code
-      uses: actions/checkout@v4.1.7
-      with:
-        ref: master
-    - run: go test -run=none -bench=. --count=7 > /tmp/prev &
-
-    - name: Checkout code
-      uses: actions/checkout@v4.1.7
-    - run: go test -run=none -bench=. --count=7 > /tmp/curr &
-
-    // Wait for both benchmarks to complete before comparing.
-    - run: wait
-    - run: RESULT="$(benchstat /tmp/prev /tmp/curr)"
-    - uses: actions/github-script@v7
-        with:
-            script: |
-                github.rest.issues.createComment({
-                    issue_number: context.issue.number,
-                    owner: context.repo.owner,
-                    repo: context.repo.repo,
-                    body: $RESULT
-                })
diff --git a/.github/workflows/benchmark.yml b/.github/workflows/benchmark.yml
new file mode 100644
index 0000000..f3f1b7a
--- /dev/null
+++ b/.github/workflows/benchmark.yml
@@ -0,0 +1,32 @@
+name: Run benchmarks
+on:
+  pull_request:
+    branches: [master]
+
+permissions:
+  contents: read
+
+jobs:
+  benchmark:
+    runs-on: ubuntu-latest
+    steps:
+    # Base for comparison is master branch.
+    - name: Checkout code
+      uses: actions/checkout@v4.1.7
+      with:
+        ref: master
+    - name: Install Go
+      uses: actions/setup-go@v5.0.2
+      with:
+        go-version-file: 'go.mod'
+
+    # 30 runs with 100ms benchtime seems to result in acceptable p-values
+    # When I tried with count=10, it would be unreliable because of the actions
+    # runner is in a shared environment and CPU and mem would be affected by others. (or so I think)
+    - run: go test -run=none -bench=. -count=30 -benchtime=100ms -timeout=20m > /tmp/prev
+    - name: Checkout code
+      uses: actions/checkout@v4.1.7
+    - run: go test -run=none -bench=. -count=30 -benchtime=100ms -timeout=20m > /tmp/curr
+
+    - run: go install golang.org/x/perf/cmd/benchstat@latest
+    - run: benchstat /tmp/prev /tmp/curr
diff --git a/.github/workflows/go.yml b/.github/workflows/go.yml
index 36efe49..9db3515 100644
--- a/.github/workflows/go.yml
+++ b/.github/workflows/go.yml
@@ -24,10 +24,7 @@ jobs:
         version: "v1.58"
 
   test:
-    strategy:
-      matrix:
-        platform: [ubuntu-latest, windows-latest]
-    runs-on: ${{ matrix.platform }}
+    runs-on: ubuntu-latest
     steps:
     - name: Checkout code
       uses: actions/checkout@v4.2.1
diff --git a/mimetype_test.go b/mimetype_test.go
index ad686e5..7e60297 100644
--- a/mimetype_test.go
+++ b/mimetype_test.go
@@ -206,7 +206,7 @@ var testcases = []testcase{
 	{"utf16lebom.txt", text, "text/plain; charset=utf-16le", false},
 	{"utf32bebom.txt", text, "text/plain; charset=utf-32be", false},
 	{"utf32lebom.txt", text, "text/plain; charset=utf-32le", false},
-	{"utf8.txt", text, "text/plain; charset=utf-8", false},
+	{"utf8.txt", text, "text/plain; charset=utf-8", true},
 	{"utf8ctrlchars", root, "application/octet-stream", false},
 	{"vcf.vcf", vCard, "text/vcard", true},
 	{"vcf.dos.vcf", vCard, "text/vcard", false},
@@ -505,37 +505,6 @@ func BenchmarkAll(b *testing.B) {
 	}
 }
 
-func BenchmarkCommon(b *testing.B) {
-	commonFiles := []string{
-		"xlsx.xlsx",
-		"pptx.pptx",
-		"docx.docx",
-		"tar.tar",
-		"zip.zip",
-		"pdf.pdf",
-		"jpg.jpg",
-		"png.png",
-		"gif.gif",
-		"xls.xls",
-		"webm.webm",
-		"csv.csv",
-		"mp4.mp4",
-	}
-	for _, file := range commonFiles {
-		f, err := os.ReadFile(filepath.Join(testDataDir, file))
-		if err != nil {
-			b.Fatal(err)
-		}
-		b.Run(filepath.Ext(file), func(b *testing.B) {
-			b.ReportAllocs()
-			b.ResetTimer()
-			for n := 0; n < b.N; n++ {
-				Detect(f)
-			}
-		})
-	}
-}
-
 // Check there are no panics for nil inputs.
 func TestIndexOutOfRangePanic(t *testing.T) {
 	for _, n := range root.flatten() {