Skip to content

Commit

Permalink
Use trufflesec git parser (#729)
Browse files Browse the repository at this point in the history
* Use trufflesec git parser.

* wip

* Fix line numbers and linter feedback
  • Loading branch information
bill-rich authored Aug 23, 2022
1 parent 619ab20 commit a0d44a3
Show file tree
Hide file tree
Showing 6 changed files with 421 additions and 88 deletions.
5 changes: 2 additions & 3 deletions go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ require (
github.com/Azure/go-autorest/autorest/azure/auth v0.5.11
github.com/TheZeroSlave/zapsentry v1.11.0
github.com/aws/aws-sdk-go v1.44.61
github.com/bill-rich/disk-buffer-reader v0.1.2
github.com/bill-rich/disk-buffer-reader v0.1.3
github.com/bill-rich/go-syslog v0.0.0-20220413021637-49edb52a574c
github.com/bitfinexcom/bitfinex-api-go v0.0.0-20210608095005-9e0b26f200fb
github.com/bradleyfalzon/ghinstallation/v2 v2.1.0
Expand All @@ -22,7 +22,6 @@ require (
github.com/fatih/color v1.13.0
github.com/felixge/fgprof v0.9.2
github.com/getsentry/sentry-go v0.13.0
github.com/gitleaks/go-gitdiff v0.7.4
github.com/go-errors/errors v1.4.2
github.com/go-git/go-git/v5 v5.4.2
github.com/go-logr/logr v1.2.3
Expand All @@ -46,7 +45,6 @@ require (
github.com/stretchr/testify v1.8.0
github.com/tailscale/depaware v0.0.0-20210622194025-720c4b409502
github.com/xanzy/go-gitlab v0.69.0
github.com/zricethezav/gitleaks/v8 v8.5.2
go.uber.org/zap v1.22.0
golang.org/x/crypto v0.0.0-20211215165025-cf75a172585e
golang.org/x/net v0.0.0-20220617184016-355a448f1bc9
Expand Down Expand Up @@ -125,5 +123,6 @@ require (
google.golang.org/appengine v1.6.7 // indirect
google.golang.org/grpc v1.47.0 // indirect
gopkg.in/warnings.v0 v0.1.2 // indirect
gopkg.in/yaml.v2 v2.4.0 // indirect
gopkg.in/yaml.v3 v3.0.1 // indirect
)
9 changes: 3 additions & 6 deletions go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -104,8 +104,8 @@ github.com/aws/aws-sdk-go v1.44.61 h1:NcpLSS3Z0MiVQIYugx4I40vSIEEAXT0baO684ExNRc
github.com/aws/aws-sdk-go v1.44.61/go.mod h1:y4AeaBuwd2Lk+GepC1E9v0qOiTws0MIWAX4oIKwKHZo=
github.com/benbjohnson/clock v1.1.0 h1:Q92kusRqC1XV2MjkWETPvjJVqKetz1OzxZB7mHJLju8=
github.com/benbjohnson/clock v1.1.0/go.mod h1:J11/hYXuz8f4ySSvYwY0FKfm+ezbsZBKZxNJlLklBHA=
github.com/bill-rich/disk-buffer-reader v0.1.2 h1:pN9K5JoacTcNWp2SCd3n7mPouSwMP9ouTS66Qa+5IPY=
github.com/bill-rich/disk-buffer-reader v0.1.2/go.mod h1:VVzzsK1Ac2AnpOfp/5r9JlIFaFkZ9uSf7zisZayCt0Y=
github.com/bill-rich/disk-buffer-reader v0.1.3 h1:8RUWjBrLB52kwJUjVDdrU0S/7z07DrMshgudfiy6QFs=
github.com/bill-rich/disk-buffer-reader v0.1.3/go.mod h1:VVzzsK1Ac2AnpOfp/5r9JlIFaFkZ9uSf7zisZayCt0Y=
github.com/bill-rich/go-syslog v0.0.0-20220413021637-49edb52a574c h1:tSME5FDS02qQll3JYodI6RZR/g4EKOHApGv1wMZT+Z0=
github.com/bill-rich/go-syslog v0.0.0-20220413021637-49edb52a574c/go.mod h1:+sCc6hztur+oZCLOsNk6wCCy+GLrnSNHSRmTnnL+8iQ=
github.com/bitfinexcom/bitfinex-api-go v0.0.0-20210608095005-9e0b26f200fb h1:9v7Bzlg+1EBYi2IYcUmOwHReBEfqBbYIj3ZCi9cIe1Q=
Expand Down Expand Up @@ -415,10 +415,6 @@ github.com/tailscale/depaware v0.0.0-20210622194025-720c4b409502 h1:34icjjmqJ2HP
github.com/tailscale/depaware v0.0.0-20210622194025-720c4b409502/go.mod h1:p9lPsd+cx33L3H9nNoecRRxPssFKUwwI50I3pZ0yT+8=
github.com/therootcompany/xz v1.0.1 h1:CmOtsn1CbtmyYiusbfmhmkpAAETj0wBIH6kCYaX+xzw=
github.com/therootcompany/xz v1.0.1/go.mod h1:3K3UH1yCKgBneZYhuQUvJ9HPD19UEXEI0BWbMn8qNMY=
github.com/trufflesecurity/gitleaks/v8 v8.6.1-custom10 h1:QuGZ5bJcQpVz+3sfvKKPDkQwdUueiBg0V+2eMHzkryo=
github.com/trufflesecurity/gitleaks/v8 v8.6.1-custom10/go.mod h1:2iZpX4Epnmx7VK2atbIMEjHW9rivie5RRe0ZhPWUFvM=
github.com/trufflesecurity/go-gitdiff v0.7.6-zombies2 h1:srCJzbE3b44+ZIPcgJSfvinHCOQlkMwVghtKf23un6o=
github.com/trufflesecurity/go-gitdiff v0.7.6-zombies2/go.mod h1:pKz0X4YzCKZs30BL+weqBIG7mx0jl4tF1uXV9ZyNvrA=
github.com/trufflesecurity/overseer v1.1.7-custom5 h1:xu+Fg6fkSRifUPzUCl7N8HmobJ6WGOkIApGnM7mJS6w=
github.com/trufflesecurity/overseer v1.1.7-custom5/go.mod h1:nT9w37AiO1Nop2VhVhNfzAFaPjthvxgpDV3XKsxYkcI=
github.com/ulikunitz/xz v0.5.6/go.mod h1:2bypXElzHzzJZwzH67Y6wb67pO62Rzfn7BSiF4ABRW8=
Expand Down Expand Up @@ -932,6 +928,7 @@ gopkg.in/yaml.v2 v2.2.4/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI=
gopkg.in/yaml.v2 v2.2.8/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI=
gopkg.in/yaml.v2 v2.3.0/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI=
gopkg.in/yaml.v2 v2.4.0 h1:D8xgwECY7CYvx+Y2n4sBz93Jn9JRvxdiyyo8CTfuKaY=
gopkg.in/yaml.v2 v2.4.0/go.mod h1:RDklbk79AGWmwhnvt/jBztapEOGDOx6ZbXqjP6csGnQ=
gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
gopkg.in/yaml.v3 v3.0.0-20210107192922-496545a6307b/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA=
Expand Down
284 changes: 284 additions & 0 deletions pkg/gitparse/gitparse.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,284 @@
package gitparse

import (
"bufio"
"bytes"
"fmt"
"os/exec"
"path/filepath"
"strconv"
"strings"
"time"

log "github.com/sirupsen/logrus"
)

// DateFormat is the standard date format for git.
const DateFormat = "Mon Jan 02 15:04:05 2006 -0700"

// Commit contains commit header info and diffs.
type Commit struct {
Hash string
Author string
Date time.Time
Message strings.Builder
Diffs []Diff
}

// Diff contains the info about a file diff in a commit.
type Diff struct {
PathB string
LineStart int
Content bytes.Buffer
IsBinary bool
}

// RepoPath parses the output of the `git log` command for the `source` path.
func RepoPath(source string, head string) (chan Commit, error) {
commitChan := make(chan Commit)

args := []string{"-C", source, "log", "-p", "-U0", "--full-history", "--diff-filter=AM", "--date=format:%a %b %d %H:%M:%S %Y %z"}
if head != "" {
args = append(args, head)
} else {
args = append(args, "--all")
}

cmd := exec.Command("git", args...)

absPath, err := filepath.Abs(source)
if err == nil {
cmd.Env = append(cmd.Env, fmt.Sprintf("GIT_DIR=%s", filepath.Join(absPath, ".git")))
}

stdOut, err := cmd.StdoutPipe()
if err != nil {
return commitChan, err
}
stdErr, err := cmd.StderrPipe()
if err != nil {
return commitChan, err
}

err = cmd.Start()
if err != nil {
return commitChan, err
}

outReader := bufio.NewReader(stdOut)
var currentCommit *Commit
var currentDiff *Diff

go func() {
scanner := bufio.NewScanner(stdErr)
for scanner.Scan() {
log.Debug(scanner.Text())
}
}()

go func() {
for {
line, err := outReader.ReadBytes([]byte("\n")[0])
if err != nil && len(line) == 0 {
break
}
switch {
case isCommitLine(line):
// If there is a currentDiff, add it to currentCommit.
if currentDiff != nil && currentDiff.Content.Len() > 0 {
currentCommit.Diffs = append(currentCommit.Diffs, *currentDiff)
}
// If there is a currentCommit, send it to the channel.
if currentCommit != nil {
commitChan <- *currentCommit
}
// Create a new currentDiff and currentCommit
currentDiff = &Diff{}
currentCommit = &Commit{
Message: strings.Builder{},
}
// Check that the commit line contains a hash and set it.
if len(line) >= 47 {
currentCommit.Hash = string(line[7:47])
}
case isAuthorLine(line):
currentCommit.Author = string(line[8:])
case isDateLine(line):
date, err := time.Parse(DateFormat, strings.TrimSpace(string(line[6:])))
if err != nil {
log.WithError(err).Debug("Could not parse date from git stream.")
}
currentCommit.Date = date
case isDiffLine(line):
// This should never be nil, but check in case the stdin stream is messed up.
if currentDiff != nil && currentDiff.Content.Len() > 0 {
currentCommit.Diffs = append(currentCommit.Diffs, *currentDiff)
}
currentDiff = &Diff{}
case isModeLine(line):
// NoOp
case isIndexLine(line):
// NoOp
case isPlusFileLine(line):
currentDiff.PathB = strings.TrimRight(string(line[6:]), "\n")
case isMinusFileLine(line):
// NoOp
case isPlusDiffLine(line):
currentDiff.Content.Write(line[1:])
case isMinusDiffLine(line):
// NoOp. We only care about additions.
case isMessageLine(line):
currentCommit.Message.Write(line[4:])
case isBinaryLine(line):
currentDiff.IsBinary = true
currentDiff.PathB = pathFromBinaryLine(line)
case isLineNumberDiffLine(line):
if currentDiff != nil && currentDiff.Content.Len() > 0 {
currentCommit.Diffs = append(currentCommit.Diffs, *currentDiff)
}
newDiff := &Diff{
PathB: currentDiff.PathB,
}

currentDiff = newDiff

words := bytes.Split(line, []byte(" "))
if len(words) >= 3 {
startSlice := bytes.Split(words[2], []byte(","))
lineStart, err := strconv.Atoi(string(startSlice[0]))
if err == nil {
currentDiff.LineStart = lineStart
}
}

}

}
if currentDiff != nil && currentDiff.Content.Len() > 0 {
currentCommit.Diffs = append(currentCommit.Diffs, *currentDiff)
}
if currentCommit != nil {
commitChan <- *currentCommit
}
if err := cmd.Wait(); err != nil {
log.WithError(err).Debugf("Error waiting for git command to complete.")
}
close(commitChan)
}()
return commitChan, nil
}

// Date: Tue Aug 10 15:20:40 2021 +0100
func isDateLine(line []byte) bool {
if len(line) > 7 && bytes.Equal(line[:5], []byte("Date:")) {
return true
}
return false
}

// Author: Bill Rich <bill.rich@trufflesec.com>
func isAuthorLine(line []byte) bool {
if len(line) > 8 && bytes.Equal(line[:7], []byte("Author:")) {
return true
}
return false
}

// commit 7a95bbf0199e280a0e42dbb1d1a3f56cdd0f6e05
func isCommitLine(line []byte) bool {
if len(line) > 7 && bytes.Equal(line[:6], []byte("commit")) {
return true
}
return false
}

// diff --git a/internal/addrs/move_endpoint_module.go b/internal/addrs/move_endpoint_module.go
func isDiffLine(line []byte) bool {
if len(line) > 5 && bytes.Equal(line[:4], []byte("diff")) {
return true
}
return false
}

// index 1ed6fbee1..aea1e643a 100644
func isIndexLine(line []byte) bool {
if len(line) > 6 && bytes.Equal(line[:5], []byte("index")) {
return true
}
return false
}

// new file mode 100644
func isModeLine(line []byte) bool {
if len(line) > 13 && bytes.Equal(line[:13], []byte("new file mode")) {
return true
}
return false
}

// --- a/internal/addrs/move_endpoint_module.go
func isMinusFileLine(line []byte) bool {
if len(line) > 3 && bytes.Equal(line[:3], []byte("---")) {
return true
}
return false
}

// +++ b/internal/addrs/move_endpoint_module.go
func isPlusFileLine(line []byte) bool {
if len(line) > 3 && bytes.Equal(line[:3], []byte("+++")) {
return true
}
return false
}

// +fmt.Println("ok")
func isPlusDiffLine(line []byte) bool {
if len(line) >= 1 && bytes.Equal(line[:1], []byte("+")) {
return true
}
return false
}

// -fmt.Println("ok")
func isMinusDiffLine(line []byte) bool {
if len(line) >= 1 && bytes.Equal(line[:1], []byte("-")) {
return true
}
return false
}

// Line that starts with 4 spaces
func isMessageLine(line []byte) bool {
if len(line) > 4 && bytes.Equal(line[:4], []byte(" ")) {
return true
}
return false
}

// Binary files /dev/null and b/plugin.sig differ
func isBinaryLine(line []byte) bool {
if len(line) > 7 && bytes.Equal(line[:6], []byte("Binary")) {
return true
}
return false
}

// @@ -298 +298 @@ func maxRetryErrorHandler(resp *http.Response, err error, numTries int)
func isLineNumberDiffLine(line []byte) bool {
if len(line) >= 8 && bytes.Equal(line[:2], []byte("@@")) {
return true
}
return false
}

// Get the b/ file path.
func pathFromBinaryLine(line []byte) string {
sbytes := bytes.Split(line, []byte(" and "))
if len(sbytes) != 2 {
log.Debugf("Expected binary line to be in 'Binary files a/filaA and b/fileB differ' format. Got: %s", line)
return ""
}
bRaw := sbytes[1]
return string(bRaw[2 : len(bRaw)-7]) // drop the "b/" and " differ"
}
Loading

0 comments on commit a0d44a3

Please sign in to comment.