Skip to content
Merged
51 changes: 49 additions & 2 deletions easybuild/tools/testing.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@
"""
import copy
import os
import re
import sys
from datetime import datetime
from time import gmtime, strftime
Expand All @@ -58,6 +59,48 @@

_log = fancylogger.getLogger('testing', fname=False)

DEFAULT_EXCLUDE_FROM_TEST_REPORT_ENV_VAR_NAMES = [
'KEY',
'SECRET',
'TOKEN',
'PASSWORD',
'API',
'AUTH',
'CREDENTIAL',
'PRIVATE',
'LICENSE',
'LICENCE',
]
DEFAULT_EXCLUDE_FROM_TEST_REPORT_VALUE_REGEX = [
# From PR comments https://github.com/easybuilders/easybuild-framework/pull/4877
r'AKIA[0-9A-Z]{16}', # AWS access key
r'[A-Za-z0-9/+=]{40}', # AWS secret key
r'eyJ[A-Za-z0-9-_]+\.[A-Za-z0-9-_]+\.[A-Za-z0-9-_]+', # JWT token
r'gh[pousr]_[A-Za-z0-9_]{36,}', # GitHub token
r'xox[baprs]-[A-Za-z0-9-]+', # Slack token

# https://github.com/odomojuli/regextokens
# This is too aggressive and can end up excluding any alphanumeric string with length multiple of 4
# r'^([A-Za-z0-9+/]{4})*([A-Za-z0-9+/]{3}=|[A-Za-z0-9+/]{2}==)?$', # Base64
r'[1-9][0-9]+-[0-9a-zA-Z]{40}', # Twitter token
r'EAACEdEose0cBA[0-9A-Za-z]+', # Facebook token
r'[0-9a-fA-F]{7}.[0-9a-fA-F]{32}', # Instagram token
r'AIza[0-9A-Za-z-_]{35}', # Google API key
r'4/[0-9A-Za-z-_]+', # Google OAuth 2.0 Auth code
r'ya29.[0-9A-Za-z-_]+', # Google OAuth 2.0 access token
r'[rs]k_live_[0-9a-z]{32}', # Picatic/Stripe API key
r'sqOatp-[0-9A-Za-z-_]{22}', # Square Access token
r'access_token,production$[0-9a-z]{161[0-9a,]{32}', # PayPal token
r'55[0-9a-fA-F]{32}', # Twilio token
r'key-[0-9a-zA-Z]{32}', # Mailgun API key
r'[0-9a-f]{32}-us[0-9]{1,2}', # Mailchimp API key
r'[0-9a-fA-F]{8}-[0-9a-fA-F]{4}-[0-9a-fA-F]{12}', # Google Cloud Oauth 2.0 token
r'[A-Za-z0-9_]{21}--[A-Za-z0-9_]{8}', # Google Cloud API key
r'[0-9a-fA-F]{8}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{12}', # Heroku token
r'sk-(.*-)?[A-Za-z0-9]{20}T3BlbkFJ[A-Za-z0-9]{20}', # OpenAI API key
r'waka_[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}', # WakaTime API key
]


def regtest(easyconfig_paths, modtool, build_specs=None):
"""
Expand Down Expand Up @@ -265,8 +308,12 @@ def create_test_report(msg, ecs_with_res, init_session_state, pr_nrs=None, gist_
for key in sorted(environ_dump.keys()):
if env_filter is not None and env_filter.search(key):
continue
else:
environment += ["%s = %s" % (key, environ_dump[key])]
if any(x in key.upper() for x in DEFAULT_EXCLUDE_FROM_TEST_REPORT_ENV_VAR_NAMES):
continue
value = environ_dump[key]
if any(re.match(rgx, value) for rgx in DEFAULT_EXCLUDE_FROM_TEST_REPORT_VALUE_REGEX):
continue
environment += ["%s = %s" % (key, value)]

test_report.extend(["#### Environment", "```"] + environment + ["```"])

Expand Down
63 changes: 60 additions & 3 deletions test/framework/github.py
Original file line number Diff line number Diff line change
Expand Up @@ -1317,26 +1317,83 @@ def test_github_create_test_report(self):
'log_file': logfile,
}),
]
environ = {
'USER': 'test',
}
JWT_HDR = 'eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9'
JWT_PLD = 'eyJzdWIiOiIxMjM0NTY3ODkwIiwibmFtZSI6IkpvaG4gRG9lIiwiaWF0IjoxNzA4MzQ1MTIzLCJleHAiOjE3MDgzNTUxMjN9'
JWT_SIG = 'SflKxwRJSMeKKF2QT4fwpMeJf36POk6yJV_adQssw5c'
secret_environ = {
# Test default removal based on variable value
'TOTALLYPUBLICVAR1': 'AKIAIOSFODNN7EXAMPLE', # AWS_ACCESS_KEY
'TOTALLYPUBLICVAR2': 'wJalrXUtnFEMI/K7MDENG/bPxRfiCYEXAMPLEKEY', # AWS_SECRET_KEY
'TOTALLYPUBLICVAR3': '.'.join([JWT_HDR, JWT_PLD, JWT_SIG]), # JWT
'TOTALLYPUBLICVAR4': 'ghp_123456789_ABCDEFGHIJKlmnopqrstuvwxyz', # GH_TOKEN
'TOTALLYPUBLICVAR5': 'xoxb-1234567890-1234567890123-ABCDEFabcdef', # SLACK_TOKEN

# Test default removal based on variable name
'API_SOMETHING': '1234567890',
'MY_PASSWORD': '1234567890',
'ABC_TOKEN': '1234567890',
'AUTH_XXX': '1234567890',
'LICENSE': '1234567890',
'WORLD_KEY': '1234567890',
'PRIVATE_INFO': '1234567890',
'SECRET_SECRET': '1234567890',
'INFO_CREDENTIALS': '1234567890',
}
init_session_state = {
'easybuild_configuration': ['EASYBUILD_DEBUG=1'],
'environment': {'USER': 'test'},
'environment': {**environ, **secret_environ},
'module_list': [{'mod_name': 'test'}],
'system_info': {'name': 'test'},
'time': gmtime(0),
}

res = create_test_report("just a test", ecs_with_res, init_session_state)
patterns = [
"**SUCCESS** _test.eb_",
"**FAIL (build issue)** _fail.eb_",
"01 Jan 1970 00:00:00",
"EASYBUILD_DEBUG=1",
"USER = test",
]
for pattern in patterns:
self.assertIn(pattern, res['full'])

for pattern in patterns[:2]:
# Test that known token regexes for ENV vars are excluded by default
exclude_patterns = [
'TOTALLYPUBLICVAR1',
'TOTALLYPUBLICVAR2',
'TOTALLYPUBLICVAR3',
'TOTALLYPUBLICVAR4',
'TOTALLYPUBLICVAR5',

'API_SOMETHING',
'MY_PASSWORD',
'ABC_TOKEN',
'AUTH_XXX',
'LICENSE',
'WORLD_KEY',
'PRIVATE_INFO',
'SECRET_SECRET',
'INFO_CREDENTIALS',
]
for pattern in exclude_patterns:
# .lower() test that variable name is not case sensitive for excluding
self.assertNotIn(pattern.lower(), res['full'])

res = create_test_report("just a test", ecs_with_res, init_session_state)
for pattern in patterns:
self.assertIn(pattern, res['full'])

for pattern in patterns[:2]:
self.assertIn(pattern, res['overview'])

for pattern in exclude_patterns:
# .lower() test that variable name is not case sensitive for excluding
self.assertNotIn(pattern.lower(), res['full'])

# mock create_gist function, we don't want to actually create a gist every time we run this test...
def fake_create_gist(*args, **kwargs):
return 'https://gist.github.com/%s/test' % GITHUB_TEST_ACCOUNT
Expand All @@ -1353,7 +1410,7 @@ def fake_create_gist(*args, **kwargs):
self.assertIn(pattern, res['full'])

for pattern in patterns[:3]:
self.assertIn(pattern, res['full'])
self.assertIn(pattern, res['overview'])

self.assertIn("**SUCCESS** _test.eb_", res['overview'])

Expand Down
36 changes: 28 additions & 8 deletions test/framework/options.py
Original file line number Diff line number Diff line change
Expand Up @@ -3390,26 +3390,46 @@ def toy(extra_args=None):
return test_report_txt

# define environment variables that should (not) show up in the test report
test_var_secret = 'THIS_IS_JUST_A_SECRET_ENV_VAR_FOR_EASYBUILD'
os.environ[test_var_secret] = 'thisshouldremainsecretonrequest'
test_var_secret_regex = re.compile(test_var_secret)
# The name contains an auto-excluded pattern `SECRET`
test_var_secret_always = 'THIS_IS_JUST_A_SECRET_ENV_VAR_FOR_EASYBUILD'
os.environ[test_var_secret_always] = 'thisshouldremainsecretonrequest'
test_var_secret_always_regex = re.compile(test_var_secret_always)
# The name contains an autoexcluded value as a recognized GH token
test_var_secret_always2 = 'THIS_IS_JUST_A_TOTALLY_PUBLIC_ENV_VAR_FOR_EASYBUILD'
os.environ[test_var_secret_always2] = 'ghp_123456789_ABCDEFGHIJKlmnopqrstuvwxyz'
test_var_secret_always_regex2 = re.compile(test_var_secret_always2)
# This should be in general present and excluded on demand
test_var_secret_ondemand = 'THIS_IS_A_CUSTOM_ENV_VAR_FOR_EASYBUILD'
os.environ[test_var_secret_ondemand] = 'thisshouldbehiddenondemand'
test_var_secret_ondemand_regex = re.compile(test_var_secret_ondemand)
test_var_public = 'THIS_IS_JUST_A_PUBLIC_ENV_VAR_FOR_EASYBUILD'
os.environ[test_var_public] = 'thisshouldalwaysbeincluded'
test_var_public_regex = re.compile(test_var_public)

# default: no filtering
test_report_txt = toy()
self.assertTrue(test_var_secret_regex.search(test_report_txt))
self.assertTrue(test_var_secret_ondemand_regex.search(test_report_txt))
self.assertTrue(test_var_public_regex.search(test_report_txt))
for rgx in [
test_var_secret_always_regex,
test_var_secret_always_regex2,
]:
res = rgx.search(test_report_txt)
self.assertFalse(res, "No match for %s in %s" % (rgx.pattern, test_report_txt))

# filter out env vars that match specified regex pattern
filter_arg = "--test-report-env-filter=.*_SECRET_ENV_VAR_FOR_EASYBUILD"
filter_arg = "--test-report-env-filter=.*_IS_A_CUSTOM_ENV_VAR_FOR_EASYBUILD"
test_report_txt = toy(extra_args=[filter_arg])
res = test_var_secret_regex.search(test_report_txt)
self.assertFalse(res, "No match for %s in %s" % (test_var_secret_regex.pattern, test_report_txt))
for rgx in [
test_var_secret_ondemand_regex,
test_var_secret_always_regex,
test_var_secret_always_regex2,
]:
res = rgx.search(test_report_txt)
self.assertFalse(res, "No match for %s in %s" % (rgx.pattern, test_report_txt))
self.assertTrue(test_var_public_regex.search(test_report_txt))
# make sure that used filter is reported correctly in test report
filter_arg_regex = re.compile(r"--test-report-env-filter='.\*_SECRET_ENV_VAR_FOR_EASYBUILD'")
filter_arg_regex = re.compile(r"--test-report-env-filter='.\*_IS_A_CUSTOM_ENV_VAR_FOR_EASYBUILD'")
tup = (filter_arg_regex.pattern, test_report_txt)
self.assertTrue(filter_arg_regex.search(test_report_txt), "%s in %s" % tup)

Expand Down