Skip to content

Commit

Permalink
Implement suggestions
Browse files Browse the repository at this point in the history
  • Loading branch information
romain-gaillard committed Sep 17, 2024
1 parent 500891f commit d8fa6e5
Showing 1 changed file with 138 additions and 124 deletions.
262 changes: 138 additions & 124 deletions internal/component/loki/secretfilter/secretfilter.go
Original file line number Diff line number Diff line change
Expand Up @@ -114,19 +114,141 @@ func New(o component.Options, args Arguments) (*Component, error) {
receiver: loki.NewLogsReceiver(),
}

// Call to Update() once at the start.
if err := c.Update(args); err != nil {
return nil, err
}

// Immediately export the receiver which remains the same for the component
// lifetime.
o.OnStateChange(Exports{Receiver: c.receiver})

return c, nil
}

// Run implements component.Component.
func (c *Component) Run(ctx context.Context) error {
for {
select {
case <-ctx.Done():
return nil
case entry := <-c.receiver.Chan():
// Start processing the log entry to redact secrets
entry = c.processEntry(entry)

for _, f := range c.fanout {
select {
case <-ctx.Done():
return nil
case f.Chan() <- entry:
}
}
}
}
}

func (c *Component) processEntry(entry loki.Entry) loki.Entry {
for _, r := range c.Rules {
// To find the secret within the text captured by the regex (and avoid being too greedy), we can use the 'secretGroup' field in the gitleaks.toml file.
// But it's rare for regexes to have this field set, so we can use a simple heuristic in other cases.
//
// There seems to be two kinds of regexes in the gitleaks.toml file
// 1. Regexes that only match the secret (with no submatches). E.g. (?:A3T[A-Z0-9]|AKIA|ASIA|ABIA|ACCA)[A-Z0-9]{16}
// 2. Regexes that match the secret and some context (or delimiters) and have one submatch (the secret itself). E.g. (?i)\b(AIza[0-9A-Za-z\\-_]{35})(?:['|\"|\n|\r|\s|\x60|;]|$)
//
// For the first case, we can replace the entire match with the redaction string.
// For the second case, we can replace the first submatch with the redaction string (to avoid redacting something else than the secret such as delimiters).
for _, occ := range r.regex.FindAllStringSubmatch(entry.Line, -1) {
// By default, the secret is the full match group
secret := occ[0]

// If a secretGroup is provided, use that instead
if r.secretGroup > 0 && len(occ) > r.secretGroup {
secret = occ[r.secretGroup]
} else {
// If not and there are two submatches, the first one is the secret
if len(occ) == 2 {
secret = occ[1]
}
}

// Check if the secret is in the allowlist
var allowRule *AllowRule = nil
// First check the rule-specific allowlist
for _, a := range r.allowlist {
if a.Regex.MatchString(secret) {
allowRule = &a
break
}
}
// Then check the global allowlist
if allowRule == nil {
for _, a := range c.AllowList {
if a.Regex.MatchString(secret) {
allowRule = &a
break
}
}
}
// If allowed, skip redaction
if allowRule != nil {
level.Info(c.opts.Logger).Log("msg", "secret in allowlist", "rule", r.name, "source", allowRule.Source)
continue
}

// Redact the secret
entry.Line = c.redactLine(entry.Line, secret, r.name)
}
}

return entry
}

func (c *Component) redactLine(line string, secret string, ruleName string) string {
var redactWith = "<REDACTED-SECRET:" + ruleName + ">"
if c.args.RedactWith != "" {
redactWith = c.args.RedactWith
redactWith = strings.ReplaceAll(redactWith, "$SECRET_NAME", ruleName)
redactWith = strings.ReplaceAll(redactWith, "$SECRET_HASH", hashSecret(secret))
}
if c.args.PartialMask > 0 {
redactWith = secret[:c.args.PartialMask] + redactWith
}

line = strings.ReplaceAll(line, secret, redactWith)

return line
}

func hashSecret(secret string) string {
hasher := sha1.New()
hasher.Write([]byte(secret))
return fmt.Sprintf("%x", hasher.Sum(nil))
}

// Update implements component.Component.
func (c *Component) Update(args component.Arguments) error {
newArgs := args.(Arguments)

c.mut.Lock()
defer c.mut.Unlock()
c.args = newArgs

c.fanout = newArgs.ForwardTo

// Parse GitLeaks configuration
var gitleaksCfg GitLeaksConfig
if args.GitleaksConfig == "" {
if c.args.GitleaksConfig == "" {
// If no config file is explicitely provided, use the embedded one
_, err := toml.DecodeFS(embedFs, "gitleaks.toml", &gitleaksCfg)
if err != nil {
return nil, err
return err
}
} else {
// If a config file is provided, use that
_, err := toml.DecodeFile(args.GitleaksConfig, &gitleaksCfg)
_, err := toml.DecodeFile(c.args.GitleaksConfig, &gitleaksCfg)
if err != nil {
return nil, err
return err
}
}

Expand All @@ -135,9 +257,9 @@ func New(o component.Options, args Arguments) (*Component, error) {
// Compile regexes
for _, rule := range gitleaksCfg.Rules {
// If specific secret types are provided, only include rules that match the types
if args.Types != nil && len(args.Types) > 0 {
if c.args.Types != nil && len(c.args.Types) > 0 {
var found bool
for _, t := range args.Types {
for _, t := range c.args.Types {
if strings.HasPrefix(strings.ToLower(rule.ID), strings.ToLower(t)) {
found = true
continue
Expand All @@ -150,17 +272,17 @@ func New(o component.Options, args Arguments) (*Component, error) {
}
re, err := regexp.Compile(rule.Regex)
if err != nil {
level.Error(o.Logger).Log("msg", "error compiling regex", "error", err)
return nil, err
level.Error(c.opts.Logger).Log("msg", "error compiling regex", "error", err)
return err
}

// Compile rule-specific allowlist regexes
var allowlist []AllowRule
for _, r := range rule.Allowlist.Regexes {
re, err := regexp.Compile(r)
if err != nil {
level.Error(o.Logger).Log("msg", "error compiling allowlist regex", "error", err)
return nil, err
level.Error(c.opts.Logger).Log("msg", "error compiling allowlist regex", "error", err)
return err
}
allowlist = append(allowlist, AllowRule{Regex: re, Source: fmt.Sprintf("rule %s", rule.ID)})
}
Expand All @@ -187,135 +309,27 @@ func New(o component.Options, args Arguments) (*Component, error) {
for _, r := range gitleaksCfg.AllowList.Regexes {
re, err := regexp.Compile(r)
if err != nil {
level.Error(o.Logger).Log("msg", "error compiling allowlist regex", "error", err)
return nil, err
level.Error(c.opts.Logger).Log("msg", "error compiling allowlist regex", "error", err)
return err
}
c.AllowList = append(c.AllowList, AllowRule{Regex: re, Source: "gitleaks config"})
}
// From the arguments
for _, r := range args.AllowList {
for _, r := range c.args.AllowList {
re, err := regexp.Compile(r)
if err != nil {
level.Error(o.Logger).Log("msg", "error compiling allowlist regex", "error", err)
return nil, err
level.Error(c.opts.Logger).Log("msg", "error compiling allowlist regex", "error", err)
return err
}
c.AllowList = append(c.AllowList, AllowRule{Regex: re, Source: "alloy config"})
}

// Add the generic API key rule last if needed
if ruleGenericApiKey != nil && !args.ExcludeGeneric {
if ruleGenericApiKey != nil && !c.args.ExcludeGeneric {
c.Rules = append(c.Rules, *ruleGenericApiKey)
}

level.Info(c.opts.Logger).Log("Compiled regexes for secret detection", len(c.Rules))

// Call to Update() once at the start.
if err := c.Update(args); err != nil {
return nil, err
}

// Immediately export the receiver which remains the same for the component
// lifetime.
o.OnStateChange(Exports{Receiver: c.receiver})

return c, nil
}

// Run implements component.Component.
func (c *Component) Run(ctx context.Context) error {
for {
select {
case <-ctx.Done():
return nil
case entry := <-c.receiver.Chan():
for _, r := range c.Rules {
// To find the secret within the text captured by the regex (and avoid being too greedy), we can use the 'secretGroup' field in the gitleaks.toml file.
// But it's rare for regexes to have this field set, so we can use a simple heuristic in other cases.
//
// There seems to be two kinds of regexes in the gitleaks.toml file
// 1. Regexes that only match the secret (with no submatches). E.g. (?:A3T[A-Z0-9]|AKIA|ASIA|ABIA|ACCA)[A-Z0-9]{16}
// 2. Regexes that match the secret and some context (or delimiters) and have one submatch (the secret itself). E.g. (?i)\b(AIza[0-9A-Za-z\\-_]{35})(?:['|\"|\n|\r|\s|\x60|;]|$)
//
// For the first case, we can replace the entire match with the redaction string.
// For the second case, we can replace the first submatch with the redaction string (to avoid redacting something else than the secret such as delimiters).
for _, occ := range r.regex.FindAllStringSubmatch(entry.Line, -1) {
// By default, the secret is the full match group
secret := occ[0]

// If a secretGroup is provided, use that instead
if r.secretGroup > 0 && len(occ) > r.secretGroup {
secret = occ[r.secretGroup]
} else {
// If not and there are two submatches, the first one is the secret
if len(occ) == 2 {
secret = occ[1]
}
}

// Check if the secret is in the allowlist
var allowRule *AllowRule = nil
// First check the rule-specific allowlist
for _, a := range r.allowlist {
if a.Regex.MatchString(secret) {
allowRule = &a
break
}
}
// Then check the global allowlist
if allowRule == nil {
for _, a := range c.AllowList {
if a.Regex.MatchString(secret) {
allowRule = &a
break
}
}
}
// If allowed, skip redaction
if allowRule != nil {
level.Info(c.opts.Logger).Log("msg", "secret in allowlist", "rule", r.name, "source", allowRule.Source)
continue
}

var redactWith = "<REDACTED-SECRET:" + r.name + ">"
if c.args.RedactWith != "" {
redactWith = c.args.RedactWith
redactWith = strings.ReplaceAll(redactWith, "$SECRET_NAME", r.name)
redactWith = strings.ReplaceAll(redactWith, "$SECRET_HASH", hashSecret(secret))
}
if c.args.PartialMask > 0 {
redactWith = secret[:c.args.PartialMask] + redactWith
}

entry.Line = strings.ReplaceAll(entry.Line, secret, redactWith)
}
}

for _, f := range c.fanout {
select {
case <-ctx.Done():
return nil
case f.Chan() <- entry:
}
}
}
}
}

func hashSecret(secret string) string {
hasher := sha1.New()
hasher.Write([]byte(secret))
return fmt.Sprintf("%x", hasher.Sum(nil))
}

// Update implements component.Component.
func (c *Component) Update(args component.Arguments) error {
newArgs := args.(Arguments)

c.mut.Lock()
defer c.mut.Unlock()
c.args = newArgs

c.fanout = newArgs.ForwardTo

return nil
}

0 comments on commit d8fa6e5

Please sign in to comment.