From 892fb6e57d0cf5dac66432c6b89b294647182794 Mon Sep 17 00:00:00 2001 From: Brendan Shaklovitz Date: Thu, 18 May 2023 14:28:07 -0500 Subject: [PATCH 1/2] Make OpenAI regex more specific * OpenAI keys are in the format 'sk-' + 20 alphanumeric chars + the magic string 'OpenAI' base64-encoded + 20 alphanumeric chars. --- pkg/detectors/openai/openai.go | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/pkg/detectors/openai/openai.go b/pkg/detectors/openai/openai.go index 0d0f29dc9c11..6d58ef23e6b0 100644 --- a/pkg/detectors/openai/openai.go +++ b/pkg/detectors/openai/openai.go @@ -20,7 +20,8 @@ type Scanner struct{} var _ detectors.Detector = (*Scanner)(nil) var ( - keyPat = regexp.MustCompile(`\b((?:sk)-[a-zA-Z0-9]{48})\b`) + // The magic string T3BlbkFJ is the base64-encoded string: OpenAI + keyPat = regexp.MustCompile(`\b((?:sk)-[a-zA-Z0-9]{20}T3BlbkFJ[a-zA-Z-09]{20})\b`) ) // TODO: Add secret context?? Information about access, ownership etc From 30dd1aef282f356744a6d516245dff54bc82dc1c Mon Sep 17 00:00:00 2001 From: Brendan Shaklovitz Date: Mon, 22 May 2023 09:11:20 -0500 Subject: [PATCH 2/2] Fix regex typo and use magic string as keyword * Use the more-specific magic word as a keyword to prevent check running as often. * Fix typo in characters in regex by using named character classes. --- pkg/detectors/openai/openai.go | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/pkg/detectors/openai/openai.go b/pkg/detectors/openai/openai.go index 6d58ef23e6b0..c658f6f84a10 100644 --- a/pkg/detectors/openai/openai.go +++ b/pkg/detectors/openai/openai.go @@ -19,10 +19,8 @@ type Scanner struct{} // Ensure the Scanner satisfies the interface at compile time. var _ detectors.Detector = (*Scanner)(nil) -var ( - // The magic string T3BlbkFJ is the base64-encoded string: OpenAI - keyPat = regexp.MustCompile(`\b((?:sk)-[a-zA-Z0-9]{20}T3BlbkFJ[a-zA-Z-09]{20})\b`) -) +// The magic string T3BlbkFJ is the base64-encoded string: OpenAI +var keyPat = regexp.MustCompile(`\b(sk-[[:alnum:]]{20}T3BlbkFJ[[:alnum:]]{20})\b`) // TODO: Add secret context?? Information about access, ownership etc type orgResponse struct { @@ -30,7 +28,7 @@ type orgResponse struct { } type organization struct { - Id string `json:"id"` + ID string `json:"id"` Title string `json:"title"` User string `json:"name"` Description string `json:"description"` @@ -42,7 +40,7 @@ type organization struct { // Keywords are used for efficiently pre-filtering chunks. // Use identifiers in the secret preferably, or the provider name. func (s Scanner) Keywords() []string { - return []string{"sk-"} + return []string{"T3BlbkFJ"} } // FromData will find and optionally verify OpenAI secrets in a given set of bytes. @@ -85,7 +83,7 @@ func (s Scanner) FromData(ctx context.Context, verify bool, data []byte) (result s1.Verified = true org := orgs.Data[0] s1.ExtraData = map[string]string{ - "id": org.Id, + "id": org.ID, "title": org.Title, "user": org.User, "description": org.Description,