Skip to content

Commit

Permalink
Add max object size flag for s3 bucket scanning (trufflesecurity#1294)
Browse files Browse the repository at this point in the history
Co-authored-by: Dustin Decker <dustin@trufflesec.com>
  • Loading branch information
nyanshak and dustin-decker authored Apr 26, 2023
1 parent 2a3f894 commit 10902f8
Show file tree
Hide file tree
Showing 7 changed files with 222 additions and 187 deletions.
24 changes: 13 additions & 11 deletions main.go
Original file line number Diff line number Diff line change
Expand Up @@ -98,12 +98,13 @@ var (
filesystemScanIncludePaths = filesystemScan.Flag("include-paths", "Path to file with newline separated regexes for files to include in scan.").Short('i').String()
filesystemScanExcludePaths = filesystemScan.Flag("exclude-paths", "Path to file with newline separated regexes for files to exclude in scan.").Short('x').String()

s3Scan = cli.Command("s3", "Find credentials in S3 buckets.")
s3ScanKey = s3Scan.Flag("key", "S3 key used to authenticate. Can be provided with environment variable AWS_ACCESS_KEY_ID.").Envar("AWS_ACCESS_KEY_ID").String()
s3ScanSecret = s3Scan.Flag("secret", "S3 secret used to authenticate. Can be provided with environment variable AWS_SECRET_ACCESS_KEY.").Envar("AWS_SECRET_ACCESS_KEY").String()
s3ScanSessionToken = s3Scan.Flag("session-token", "S3 session token used to authenticate temporary credentials. Can be provided with environment variable AWS_SESSION_TOKEN.").Envar("AWS_SESSION_TOKEN").String()
s3ScanCloudEnv = s3Scan.Flag("cloud-environment", "Use IAM credentials in cloud environment.").Bool()
s3ScanBuckets = s3Scan.Flag("bucket", "Name of S3 bucket to scan. You can repeat this flag.").Strings()
s3Scan = cli.Command("s3", "Find credentials in S3 buckets.")
s3ScanKey = s3Scan.Flag("key", "S3 key used to authenticate. Can be provided with environment variable AWS_ACCESS_KEY_ID.").Envar("AWS_ACCESS_KEY_ID").String()
s3ScanSecret = s3Scan.Flag("secret", "S3 secret used to authenticate. Can be provided with environment variable AWS_SECRET_ACCESS_KEY.").Envar("AWS_SECRET_ACCESS_KEY").String()
s3ScanSessionToken = s3Scan.Flag("session-token", "S3 session token used to authenticate temporary credentials. Can be provided with environment variable AWS_SESSION_TOKEN.").Envar("AWS_SESSION_TOKEN").String()
s3ScanCloudEnv = s3Scan.Flag("cloud-environment", "Use IAM credentials in cloud environment.").Bool()
s3ScanBuckets = s3Scan.Flag("bucket", "Name of S3 bucket to scan. You can repeat this flag.").Strings()
s3ScanMaxObjectSize = s3Scan.Flag("max-object-size", "Maximum size of objects to scan. Objects larger than this will be skipped. (Byte units eg. 512B, 2KB, 4MB)").Default("250MB").Bytes()

gcsScan = cli.Command("gcs", "Find credentials in GCS buckets.")
gcsProjectID = gcsScan.Flag("project-id", "GCS project ID used to authenticate. Can NOT be used with unauth scan. Can be provided with environment variable GOOGLE_CLOUD_PROJECT.").Envar("GOOGLE_CLOUD_PROJECT").String()
Expand Down Expand Up @@ -396,11 +397,12 @@ func run(state overseer.State) {
}
case s3Scan.FullCommand():
cfg := sources.S3Config{
Key: *s3ScanKey,
Secret: *s3ScanSecret,
SessionToken: *s3ScanSessionToken,
Buckets: *s3ScanBuckets,
CloudCred: *s3ScanCloudEnv,
Key: *s3ScanKey,
Secret: *s3ScanSecret,
SessionToken: *s3ScanSessionToken,
Buckets: *s3ScanBuckets,
CloudCred: *s3ScanCloudEnv,
MaxObjectSize: int64(*s3ScanMaxObjectSize),
}
if err := e.ScanS3(ctx, cfg); err != nil {
logFatal(err, "Failed to scan S3.")
Expand Down
345 changes: 178 additions & 167 deletions pkg/pb/sourcespb/sources.pb.go

Large diffs are not rendered by default.

2 changes: 2 additions & 0 deletions pkg/pb/sourcespb/sources.pb.validate.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

6 changes: 2 additions & 4 deletions pkg/sources/gcs/gcs_manager.go
Original file line number Diff line number Diff line change
Expand Up @@ -29,9 +29,7 @@ const (
maxObjectSizeLimit = 50 * 1024 * 1024 // 50MB
)

var (
defaultConcurrency = runtime.NumCPU()
)
var defaultConcurrency = runtime.NumCPU()

type objectManager interface {
listObjects(context.Context) (chan io.Reader, error)
Expand Down Expand Up @@ -296,7 +294,7 @@ func withConcurrency(concurrency int) gcsManagerOption {

// withMaxObjectSize sets the maximum size of objects that will be scanned.
// If not set, set to a negative number, or set larger than 1GB,
// the default value of 500MB will be used.
// the default value of 50MB will be used.
func withMaxObjectSize(maxObjectSize int64) gcsManagerOption {
return func(m *gcsManager) error {
if maxObjectSize <= 0 || maxObjectSize > maxObjectSizeLimit {
Expand Down
29 changes: 24 additions & 5 deletions pkg/sources/s3/s3.go
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,11 @@ import (
"github.com/trufflesecurity/trufflehog/v3/pkg/sources"
)

const (
defaultMaxObjectSize = 250 * 1024 * 1024 // 250 MiB
maxObjectSizeLimit = 250 * 1024 * 1024 // 250 MiB
)

type Source struct {
name string
sourceId int64
Expand All @@ -37,9 +42,10 @@ type Source struct {
concurrency int
log logr.Logger
sources.Progress
errorCount *sync.Map
conn *sourcespb.S3
jobPool *errgroup.Group
errorCount *sync.Map
conn *sourcespb.S3
jobPool *errgroup.Group
maxObjectSize int64
}

// Ensure the Source satisfies the interface at compile time
Expand Down Expand Up @@ -79,9 +85,22 @@ func (s *Source) Init(aCtx context.Context, name string, jobId, sourceId int64,
}
s.conn = &conn

s.setMaxObjectSize(conn.GetMaxObjectSize())

return nil
}

// setMaxObjectSize sets the maximum size of objects that will be scanned. If
// not set, set to a negative number, or set larger than the
// maxObjectSizeLimit, the defaultMaxObjectSizeLimit will be used.
func (s *Source) setMaxObjectSize(maxObjectSize int64) {
if maxObjectSize <= 0 || maxObjectSize > maxObjectSizeLimit {
s.maxObjectSize = defaultMaxObjectSize
} else {
s.maxObjectSize = maxObjectSize
}
}

func (s *Source) newClient(region string) (*s3.S3, error) {
cfg := aws.NewConfig()
cfg.CredentialsChainVerboseErrors = aws.Bool(true)
Expand Down Expand Up @@ -203,8 +222,8 @@ func (s *Source) pageChunker(ctx context.Context, client *s3.S3, chunksChan chan
}

// ignore large files
if *obj.Size > int64(250*common.MB) {
s.log.V(3).Info("Skipping %d byte file (over 250MB limit)", "object", *obj.Key)
if *obj.Size > s.maxObjectSize {
s.log.V(3).Info("Skipping %d byte file (over maxObjectSize limit)", "object", *obj.Key)
return
}

Expand Down
2 changes: 2 additions & 0 deletions pkg/sources/sources.go
Original file line number Diff line number Diff line change
Expand Up @@ -145,6 +145,8 @@ type S3Config struct {
SessionToken string
// Buckets is the list of buckets to scan.
Buckets []string
// MaxObjectSize is the maximum object size to scan.
MaxObjectSize int64
}

// SyslogConfig defines the optional configuration for a syslog source.
Expand Down
1 change: 1 addition & 0 deletions proto/sources.proto
Original file line number Diff line number Diff line change
Expand Up @@ -218,6 +218,7 @@ message S3 {
credentials.AWSSessionTokenSecret session_token = 5;
}
repeated string buckets = 3;
int64 max_object_size = 6;
}

message Slack {
Expand Down

0 comments on commit 10902f8

Please sign in to comment.