Skip to content

Commit

Permalink
Add Base64URLSafe decoder (trufflesecurity#1292)
Browse files Browse the repository at this point in the history
* Add Base64URLSafe decoder

* Add decoder that can decode base64 strings with '_' and '-' instead of
  of '+' and '/'.

* Combine url-safe b64 decoder into b64 decoder
  • Loading branch information
nyanshak authored May 18, 2023
1 parent f831b62 commit 195f9f0
Show file tree
Hide file tree
Showing 2 changed files with 77 additions and 13 deletions.
34 changes: 22 additions & 12 deletions pkg/decoders/base64.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,11 +7,13 @@ import (
"github.com/trufflesecurity/trufflehog/v3/pkg/sources"
)

type Base64 struct{}
type (
Base64 struct{}
)

var (
b64Charset = []byte("ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/=")
b64EndChars = "+/="
b64Charset = []byte("ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/-_=")
b64EndChars = "+/-_="
// Given characters are mostly ASCII, we can use a simple array to map.
b64CharsetMapping [128]bool
)
Expand All @@ -24,11 +26,19 @@ func init() {
}

func (d *Base64) FromChunk(chunk *sources.Chunk) *sources.Chunk {
encodedSubstrings := getSubstringsOfCharacterSet(chunk.Data, 20)
encodedSubstrings := getSubstringsOfCharacterSet(chunk.Data, 20, b64CharsetMapping, b64EndChars)
decodedSubstrings := make(map[string][]byte)

for _, str := range encodedSubstrings {
dec, err := base64.StdEncoding.DecodeString(str)
if err == nil {
if len(dec) > 0 {
decodedSubstrings[str] = dec
}
continue
}

dec, err = base64.RawURLEncoding.DecodeString(str)
if err == nil && len(dec) > 0 {
decodedSubstrings[str] = dec
}
Expand Down Expand Up @@ -57,7 +67,7 @@ func (d *Base64) FromChunk(chunk *sources.Chunk) *sources.Chunk {
return nil
}

func getSubstringsOfCharacterSet(data []byte, threshold int) []string {
func getSubstringsOfCharacterSet(data []byte, threshold int, charsetMapping [128]bool, endChars string) []string {
if len(data) == 0 {
return nil
}
Expand All @@ -68,7 +78,7 @@ func getSubstringsOfCharacterSet(data []byte, threshold int) []string {
// Determine the number of substrings that will be returned.
// Pre-allocate the slice to avoid reallocations.
for _, char := range data {
if char < 128 && b64CharsetMapping[char] {
if char < 128 && charsetMapping[char] {
count++
} else {
if count > threshold {
Expand All @@ -86,29 +96,29 @@ func getSubstringsOfCharacterSet(data []byte, threshold int) []string {
substrings := make([]string, 0, substringsCount)

for i, char := range data {
if char < 128 && b64CharsetMapping[char] {
if char < 128 && charsetMapping[char] {
if count == 0 {
start = i
}
count++
} else {
if count > threshold {
substrings = appendB64Substring(data, start, count, substrings)
substrings = appendB64Substring(data, start, count, substrings, endChars)
}
count = 0
}
}

if count > threshold {
substrings = appendB64Substring(data, start, count, substrings)
substrings = appendB64Substring(data, start, count, substrings, endChars)
}

return substrings
}

func appendB64Substring(data []byte, start, count int, substrings []string) []string {
substring := bytes.TrimLeft(data[start:start+count], b64EndChars)
if idx := bytes.IndexByte(bytes.TrimRight(substring, b64EndChars), '='); idx != -1 {
func appendB64Substring(data []byte, start, count int, substrings []string, endChars string) []string {
substring := bytes.TrimLeft(data[start:start+count], endChars)
if idx := bytes.IndexByte(bytes.TrimRight(substring, endChars), '='); idx != -1 {
substrings = append(substrings, string(substring[idx+1:]))
} else {
substrings = append(substrings, string(substring))
Expand Down
56 changes: 55 additions & 1 deletion pkg/decoders/base64_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -11,9 +11,9 @@ import (

func TestBase64_FromChunk(t *testing.T) {
tests := []struct {
name string
chunk *sources.Chunk
want *sources.Chunk
name string
}{
{
name: "only b64 chunk",
Expand Down Expand Up @@ -69,6 +69,60 @@ func TestBase64_FromChunk(t *testing.T) {
but only this encapsulated secret should be decoded.`),
},
},
{
name: "b64-url-safe: only b64 chunk",
chunk: &sources.Chunk{
Data: []byte(`bG9uZ2VyLWVuY29kZWQtc2VjcmV0LXRlc3Q`),
},
want: &sources.Chunk{
Data: []byte(`longer-encoded-secret-test`),
},
},
{
name: "b64-url-safe: mixed content",
chunk: &sources.Chunk{
Data: []byte(`token: bG9uZ2VyLWVuY29kZWQtc2VjcmV0LXRlc3Q`),
},
want: &sources.Chunk{
Data: []byte(`token: longer-encoded-secret-test`),
},
},
{
name: "b64-url-safe: env var (looks like all b64 decodable but has `=` in the middle)",
chunk: &sources.Chunk{
Data: []byte(`some-encoded-secret=dGVzdHNlY3JldA`),
},
want: &sources.Chunk{
Data: []byte(`some-encoded-secret=testsecret`),
},
},
{
name: "b64-url-safe: has longer b64 inside",
chunk: &sources.Chunk{
Data: []byte(`some-encoded-secret="bG9uZ2VyLWVuY29kZWQtc2VjcmV0LXRlc3Q"`),
},
want: &sources.Chunk{
Data: []byte(`some-encoded-secret="longer-encoded-secret-test"`),
},
},
{
name: "b64-url-safe: hyphen url b64",
chunk: &sources.Chunk{
Data: []byte(`dHJ1ZmZsZWhvZz4-ZmluZHMtc2VjcmV0cw`),
},
want: &sources.Chunk{
Data: []byte(`trufflehog>>finds-secrets`),
},
},
{
name: "b64-url-safe: underscore url b64",
chunk: &sources.Chunk{
Data: []byte(`YjY0dXJsc2FmZS10ZXN0LXNlY3JldC11bmRlcnNjb3Jlcz8_`),
},
want: &sources.Chunk{
Data: []byte(`b64urlsafe-test-secret-underscores??`),
},
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
Expand Down

0 comments on commit 195f9f0

Please sign in to comment.