Skip to content

Commit

Permalink
Switch to licenseclassifier/v2
Browse files Browse the repository at this point in the history
This greatly improves the classifier's accuracy, and avoids having to
handle the license database in lichen (it's embedded in the
classifier).

The classifier returns "Copyright" matches, which is a behaviour
change for lichen, so these matches are ignored.

Signed-off-by: Stephen Kitt <skitt@redhat.com>
  • Loading branch information
skitt committed Jan 5, 2024
1 parent 46b79df commit edece1a
Show file tree
Hide file tree
Showing 9 changed files with 50 additions and 129 deletions.
3 changes: 0 additions & 3 deletions Makefile

This file was deleted.

38 changes: 20 additions & 18 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -62,31 +62,33 @@ We can run lichen on itself:

```
$ lichen $GOPATH/bin/lichen
github.com/cpuguy83/go-md2man/v2@v2.0.0-20190314233015-f79a8a8ca69d: MIT (allowed)
github.com/google/goterm@v0.0.0-20190703233501-fc88cf888a3f: BSD-3-Clause (allowed)
github.com/google/licenseclassifier@v0.0.0-20200402202327-879cb1424de0: Apache-2.0 (allowed)
github.com/cpuguy83/go-md2man/v2@v2.0.1: MIT (allowed)
github.com/davecgh/go-spew@v1.1.1: ISC (allowed)
github.com/google/licenseclassifier/v2@v2.0.0: Apache-2.0 (allowed)
github.com/hashicorp/errwrap@v1.0.0: MPL-2.0 (allowed)
github.com/hashicorp/go-multierror@v1.1.0: MPL-2.0 (allowed)
github.com/lucasb-eyer/go-colorful@v1.0.3: MIT (allowed)
github.com/mattn/go-isatty@v0.0.12: MIT (allowed)
github.com/muesli/termenv@v0.5.2: MIT (allowed)
github.com/russross/blackfriday/v2@v2.0.1: BSD-2-Clause (allowed)
github.com/sergi/go-diff@v1.0.0: MIT (allowed)
github.com/shurcooL/sanitized_anchor_name@v1.0.0: MIT (allowed)
github.com/urfave/cli/v2@v2.2.0: MIT (allowed)
golang.org/x/sys@v0.0.0-20200116001909-b77594299b42: BSD-3-Clause (allowed)
gopkg.in/yaml.v2@v2.3.0: Apache-2.0, MIT (allowed)
github.com/hashicorp/go-multierror@v1.1.1: MPL-2.0 (allowed)
github.com/lucasb-eyer/go-colorful@v1.2.0: MIT (allowed)
github.com/mattn/go-isatty@v0.0.14: MIT (allowed)
github.com/mattn/go-runewidth@v0.0.13: MIT (allowed)
github.com/muesli/termenv@v0.11.0: MIT (allowed)
github.com/rivo/uniseg@v0.2.0: MIT (allowed)
github.com/russross/blackfriday/v2@v2.1.0: BSD-2-Clause (allowed)
github.com/sergi/go-diff@v1.1.0: MIT (allowed)
github.com/urfave/cli/v2@v2.4.0: MIT (allowed)
golang.org/x/sys@v0.0.0-20210630005230-0f9fa26af87c: BSD-3-Clause (allowed)
gopkg.in/yaml.v2@v2.4.0: Apache-2.0, MIT (allowed)
```

...and using a custom template:

```
$ lichen --template="{{range .Modules}}{{range .Module.Licenses}}{{.Name | printf \"%s\n\"}}{{end}}{{end}}" $GOPATH/bin/lichen | sort | uniq -c | sort -nr
8 MIT
2 MPL-2.0
2 BSD-3-Clause
2 Apache-2.0
1 BSD-2-Clause
9 MIT
2 MPL-2.0
2 Apache-2.0
1 ISC
1 BSD-3-Clause
1 BSD-2-Clause
```

## Config
Expand Down
6 changes: 3 additions & 3 deletions go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ module github.com/uw-labs/lichen
go 1.18

require (
github.com/google/licenseclassifier v0.0.0-20201113175434-78a70215ca36
github.com/google/licenseclassifier/v2 v2.0.0
github.com/hashicorp/go-multierror v1.1.1
github.com/muesli/termenv v0.11.0
github.com/stretchr/testify v1.7.1
Expand All @@ -13,15 +13,15 @@ require (

require (
github.com/cpuguy83/go-md2man/v2 v2.0.1 // indirect
github.com/davecgh/go-spew v1.1.0 // indirect
github.com/davecgh/go-spew v1.1.1 // indirect
github.com/hashicorp/errwrap v1.0.0 // indirect
github.com/lucasb-eyer/go-colorful v1.2.0 // indirect
github.com/mattn/go-isatty v0.0.14 // indirect
github.com/mattn/go-runewidth v0.0.13 // indirect
github.com/pmezard/go-difflib v1.0.0 // indirect
github.com/rivo/uniseg v0.2.0 // indirect
github.com/russross/blackfriday/v2 v2.1.0 // indirect
github.com/sergi/go-diff v1.0.0 // indirect
github.com/sergi/go-diff v1.1.0 // indirect
golang.org/x/sys v0.0.0-20210630005230-0f9fa26af87c // indirect
gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c // indirect
)
28 changes: 20 additions & 8 deletions go.sum
Original file line number Diff line number Diff line change
@@ -1,15 +1,22 @@
github.com/BurntSushi/toml v0.3.1/go.mod h1:xHWCNGjB5oqiDr8zfno3MHue2Ht5sIBksp03qcyfWMU=
github.com/cpuguy83/go-md2man/v2 v2.0.1 h1:r/myEWzV9lfsM1tFLgDyu0atFtJ1fXn261LKYj/3DxU=
github.com/cpuguy83/go-md2man/v2 v2.0.1/go.mod h1:tgQtvFlXSQOSOSIRvRPT7W67SCa46tRHOmNcaadrF8o=
github.com/davecgh/go-spew v1.1.0 h1:ZDRjVQ15GmhC3fiQ8ni8+OwkZQO4DARzQgrnXU1Liz8=
github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
github.com/google/go-cmp v0.2.0/go.mod h1:oXzfMopK8JAjlY9xF4vHSVASa0yLyX7SntLO5aqRK0M=
github.com/google/licenseclassifier v0.0.0-20201113175434-78a70215ca36 h1:YGB3wNLUTvq+lbIwdNRsaMJvoX4mCKkwzHlmlT1V+ow=
github.com/google/licenseclassifier v0.0.0-20201113175434-78a70215ca36/go.mod h1:qsqn2hxC+vURpyBRygGUuinTO42MFRLcsmQ/P8v94+M=
github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
github.com/google/go-cmp v0.5.2 h1:X2ev0eStA3AbceY54o37/0PQ/UWqKEiiO2dKL5OPaFM=
github.com/google/go-cmp v0.5.2/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE=
github.com/google/licenseclassifier/v2 v2.0.0 h1:1Y57HHILNf4m0ABuMVb6xk4vAJYEUO0gDxNpog0pyeA=
github.com/google/licenseclassifier/v2 v2.0.0/go.mod h1:cOjbdH0kyC9R22sdQbYsFkto4NGCAc+ZSwbeThazEtM=
github.com/hashicorp/errwrap v1.0.0 h1:hLrqtEDnRye3+sgx6z4qVLNuviH3MR5aQ0ykNJa/UYA=
github.com/hashicorp/errwrap v1.0.0/go.mod h1:YH+1FKiLXxHSkmPseP+kNlulaMuP3n2brvKWEqk/Jc4=
github.com/hashicorp/go-multierror v1.1.1 h1:H5DkEtf6CXdFp0N0Em5UCwQpXMWke8IA0+lD48awMYo=
github.com/hashicorp/go-multierror v1.1.1/go.mod h1:iw975J/qwKPdAO1clOe2L8331t/9/fmwbPZ6JB6eMoM=
github.com/kr/pretty v0.1.0 h1:L/CwN0zerZDmRFUapSPitk6f+Q3+0za1rQkzVuMiMFI=
github.com/kr/pretty v0.1.0/go.mod h1:dAy3ld7l9f0ibDNOQOHHMYYIIbhfbHSm3C4ZsoJORNo=
github.com/kr/pty v1.1.1/go.mod h1:pFQYn66WHrOpPYNljwOMqo10TkYh1fy3cYio2l3bCsQ=
github.com/kr/text v0.1.0 h1:45sCR5RtlFHMR4UwH9sdQ5TC8v0qDQCHnXt+kaKSTVE=
github.com/kr/text v0.1.0/go.mod h1:4Jbv+DJW3UT/LiOwJeYQe1efqtUx/iVham/4vfdArNI=
github.com/lucasb-eyer/go-colorful v1.2.0 h1:1nnpGOrhyZZuNyfu1QjKiUICQ74+3FNCN69Aj6K7nkY=
github.com/lucasb-eyer/go-colorful v1.2.0/go.mod h1:R4dSotOR9KMtayYi1e77YzuveK+i7ruzyGqttikkLy0=
github.com/mattn/go-isatty v0.0.14 h1:yVuAays6BHfxijgZPzw+3Zlu5yQgKGP2/hcQbHb7S9Y=
Expand All @@ -24,18 +31,23 @@ github.com/rivo/uniseg v0.2.0 h1:S1pD9weZBuJdFmowNwbpi7BJ8TNftyUImj/0WQi72jY=
github.com/rivo/uniseg v0.2.0/go.mod h1:J6wj4VEh+S6ZtnVlnTBMWIodfgj8LQOQFoIToxlJtxc=
github.com/russross/blackfriday/v2 v2.1.0 h1:JIOH55/0cWyOuilr9/qlrm0BSXldqnqwMsf35Ld67mk=
github.com/russross/blackfriday/v2 v2.1.0/go.mod h1:+Rmxgy9KzJVeS9/2gXHxylqXiyQDYRxCVz55jmeOWTM=
github.com/sergi/go-diff v1.0.0 h1:Kpca3qRNrduNnOQeazBd0ysaKrUJiIuISHxogkT9RPQ=
github.com/sergi/go-diff v1.0.0/go.mod h1:0CfEIISq7TuYL3j771MWULgwwjU+GofnZX9QAmXWZgo=
github.com/sergi/go-diff v1.1.0 h1:we8PVUC3FE2uYfodKH/nBHMSetSfHDR6scGdBi+erh0=
github.com/sergi/go-diff v1.1.0/go.mod h1:STckp+ISIX8hZLjrqAeVduY0gWCT9IjLuqbuNXdaHfM=
github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI=
github.com/stretchr/testify v1.4.0/go.mod h1:j7eGeouHqKxXV5pUuKE4zz7dFj8WfuZ+81PSLYec5m4=
github.com/stretchr/testify v1.7.1 h1:5TQK59W5E3v0r2duFAb7P95B6hEeOyEnHRa8MjYSMTY=
github.com/stretchr/testify v1.7.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg=
github.com/urfave/cli/v2 v2.4.0 h1:m2pxjjDFgDxSPtO8WSdbndj17Wu2y8vOT86wE/tjr+I=
github.com/urfave/cli/v2 v2.4.0/go.mod h1:NX9W0zmTvedE5oDoOMs2RTC8RvdK98NTYZE5LbaEYPg=
golang.org/x/sys v0.0.0-20210630005230-0f9fa26af87c h1:F1jZWGFhYfh0Ci55sIpILtKKK8p3i2/krTr0H1rg74I=
golang.org/x/sys v0.0.0-20210630005230-0f9fa26af87c/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405 h1:yhCVgyC4o1eVCa2tZl7eS0r+SDo693bJlVdllGtEeKM=
golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543 h1:E7g+9GITq07hpfrRu66IVDexMakfv52eLZ2CXBWiKr4=
golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
gopkg.in/check.v1 v1.0.0-20190902080502-41f04d3bba15 h1:YR8cESwS4TdDjEe65xsg0ogRM/Nc3DYOhEAlW+xobZo=
gopkg.in/check.v1 v1.0.0-20190902080502-41f04d3bba15/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
gopkg.in/yaml.v2 v2.2.2/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI=
gopkg.in/yaml.v2 v2.2.4/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI=
gopkg.in/yaml.v2 v2.2.8/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI=
gopkg.in/yaml.v2 v2.4.0 h1:D8xgwECY7CYvx+Y2n4sBz93Jn9JRvxdiyyo8CTfuKaY=
gopkg.in/yaml.v2 v2.4.0/go.mod h1:RDklbk79AGWmwhnvt/jBztapEOGDOx6ZbXqjP6csGnQ=
Expand Down
5 changes: 0 additions & 5 deletions internal/license/db/archive.go

This file was deleted.

58 changes: 0 additions & 58 deletions internal/license/db/gen/gen.go

This file was deleted.

Binary file removed internal/license/db/licenses.db
Binary file not shown.
17 changes: 0 additions & 17 deletions internal/license/db/open.go

This file was deleted.

24 changes: 7 additions & 17 deletions internal/license/resolve.go
Original file line number Diff line number Diff line change
@@ -1,30 +1,20 @@
package license

import (
"fmt"
"io/ioutil"
"path/filepath"
"regexp"
"strings"

"github.com/google/licenseclassifier"
"github.com/uw-labs/lichen/internal/license/db"
classifier "github.com/google/licenseclassifier/v2"

Check failure on line 9 in internal/license/resolve.go

View workflow job for this annotation

GitHub Actions / lint

import 'github.com/google/licenseclassifier/v2' is not allowed from list 'Main' (depguard)
"github.com/google/licenseclassifier/v2/assets"

Check failure on line 10 in internal/license/resolve.go

View workflow job for this annotation

GitHub Actions / lint

import 'github.com/google/licenseclassifier/v2/assets' is not allowed from list 'Main' (depguard)
"github.com/uw-labs/lichen/internal/model"

Check failure on line 11 in internal/license/resolve.go

View workflow job for this annotation

GitHub Actions / lint

import 'github.com/uw-labs/lichen/internal/model' is not allowed from list 'Main' (depguard)
)

// Resolve inspects each module and determines what it is licensed under. The returned slice contains each
// module enriched with license information.
func Resolve(modules []model.Module, threshold float64) ([]model.Module, error) {
archiveFn := licenseclassifier.ArchiveFunc(func() ([]byte, error) {
f, err := db.Open()
if err != nil {
return nil, fmt.Errorf("failed to open license databse: %w", err)
}
defer f.Close()
return ioutil.ReadAll(f)
})

lc, err := licenseclassifier.New(threshold, archiveFn)
lc, err := assets.DefaultClassifier()
if err != nil {
return nil, err
}
Expand Down Expand Up @@ -68,17 +58,17 @@ func locateLicenses(path string) (lp []string, err error) {
}

// classify inspects each license file and classifies it
func classify(lc *licenseclassifier.License, paths []string) ([]model.License, error) {
func classify(lc *classifier.Classifier, paths []string) ([]model.License, error) {
licenses := make([]model.License, 0)
for _, p := range paths {
content, err := ioutil.ReadFile(p)
if err != nil {
return nil, err
}
hits := make(map[string]float64)
matches := lc.MultipleMatch(string(content), true)
for _, match := range matches {
if conf, found := hits[match.Name]; !found || match.Confidence > conf {
matches := lc.Match(content)
for _, match := range matches.Matches {
if conf, found := hits[match.Name]; match.MatchType != "Copyright" && (!found || match.Confidence > conf) {
hits[match.Name] = match.Confidence
}
}
Expand Down

0 comments on commit edece1a

Please sign in to comment.