From f14491cc463ebabd621d1a74fa4765b4c143f1e7 Mon Sep 17 00:00:00 2001 From: clavedeluna Date: Fri, 25 Aug 2023 12:01:01 -0300 Subject: [PATCH] support ruamel codemod --- codemodder/codemods/__init__.py | 2 + codemodder/codemods/harden_ruamel.py | 61 +++++++++++++++++ .../codemods/semgrep/harden-ruamel.yaml | 17 +++++ integration_tests/test_harden_ruamel.py | 21 ++++++ integration_tests/test_multiple.py | 21 +++--- tests/codemods/test_harden_ruamel.py | 65 +++++++++++++++++++ tests/samples/unsafe_ruamel.py | 4 ++ 7 files changed, 183 insertions(+), 8 deletions(-) create mode 100644 codemodder/codemods/harden_ruamel.py create mode 100644 codemodder/codemods/semgrep/harden-ruamel.yaml create mode 100644 integration_tests/test_harden_ruamel.py create mode 100644 tests/codemods/test_harden_ruamel.py create mode 100644 tests/samples/unsafe_ruamel.py diff --git a/codemodder/codemods/__init__.py b/codemodder/codemods/__init__.py index 9be7d4cb..5c4237cf 100644 --- a/codemodder/codemods/__init__.py +++ b/codemodder/codemods/__init__.py @@ -4,6 +4,7 @@ DjangoSessionCookieSecureOff, ) from codemodder.codemods.harden_pyyaml import HardenPyyaml +from codemodder.codemods.harden_ruamel import HardenRuamel from codemodder.codemods.limit_readline import LimitReadline from codemodder.codemods.secure_random import SecureRandom from codemodder.codemods.upgrade_sslcontext_tls import UpgradeSSLContextTLS @@ -15,6 +16,7 @@ DjangoDebugFlagOn, DjangoSessionCookieSecureOff, HardenPyyaml, + HardenRuamel, LimitReadline, ProcessSandbox, RemoveUnnecessaryFStr, diff --git a/codemodder/codemods/harden_ruamel.py b/codemodder/codemods/harden_ruamel.py new file mode 100644 index 00000000..eeb5b9ec --- /dev/null +++ b/codemodder/codemods/harden_ruamel.py @@ -0,0 +1,61 @@ +import libcst as cst +from libcst.codemod import CodemodContext +from libcst import matchers +from codemodder.codemods.base_codemod import ( + SemgrepCodemod, + CodemodMetadata, + ReviewGuidance, +) +from codemodder.codemods.base_visitor import BaseTransformer +from codemodder.codemods.change import Change +from codemodder.file_context import FileContext + + +class HardenRuamel(SemgrepCodemod, BaseTransformer): + METADATA = CodemodMetadata( + DESCRIPTION=("Ensures all unsafe calls to ruamel.yaml.YAML use `typ='safe'`."), + NAME="harden-ruamel", + REVIEW_GUIDANCE=ReviewGuidance.MERGE_WITHOUT_REVIEW, + ) + CHANGE_DESCRIPTION = METADATA.DESCRIPTION + YAML_FILES = [ + "harden-ruamel.yaml", + ] + + def __init__(self, codemod_context: CodemodContext, file_context: FileContext): + SemgrepCodemod.__init__(self, file_context) + BaseTransformer.__init__( + self, + codemod_context, + self._results, + file_context.line_exclude, + file_context.line_include, + ) + + def leave_Call(self, original_node: cst.Call, updated_node: cst.Call): + pos_to_match = self.node_position(original_node) + if self.filter_by_result( + pos_to_match + ) and self.filter_by_path_includes_or_excludes(pos_to_match): + line_number = pos_to_match.start.line + self.CHANGES_IN_FILE.append( + Change(str(line_number), self.CHANGE_DESCRIPTION).to_json() + ) + new_args = update_arg_target(original_node.args, target_arg="typ") + return updated_node.with_changes(args=new_args) + return updated_node + + +def update_arg_target(original_args, target_arg): + new_args = [] + for arg in original_args: + if matchers.matches(arg.keyword, matchers.Name(target_arg)): + new = cst.Arg( + keyword=cst.parse_expression("typ"), + value=cst.parse_expression('"safe"'), + equal=arg.equal, + ) + else: + new = arg + new_args.append(new) + return new_args diff --git a/codemodder/codemods/semgrep/harden-ruamel.yaml b/codemodder/codemods/semgrep/harden-ruamel.yaml new file mode 100644 index 00000000..86be3b3a --- /dev/null +++ b/codemodder/codemods/semgrep/harden-ruamel.yaml @@ -0,0 +1,17 @@ +rules: + - id: harden-ruamel + message: Unsafe call to ruamel.yaml.YAML + severity: WARNING + languages: + - python + pattern-either: + - patterns: + - pattern: ruamel.yaml.YAML(typ="unsafe", ...) + - pattern-inside: | + import ruamel + ... + - patterns: + - pattern: ruamel.yaml.YAML(typ="base", ...) + - pattern-inside: | + import ruamel + ... diff --git a/integration_tests/test_harden_ruamel.py b/integration_tests/test_harden_ruamel.py new file mode 100644 index 00000000..98a1c651 --- /dev/null +++ b/integration_tests/test_harden_ruamel.py @@ -0,0 +1,21 @@ +from codemodder.codemods.harden_ruamel import HardenRuamel +from integration_tests.base_test import ( + BaseIntegrationTest, + original_and_expected_from_code_path, +) + + +class TestHardenRuamel(BaseIntegrationTest): + codemod = HardenRuamel + code_path = "tests/samples/unsafe_ruamel.py" + original_code, expected_new_code = original_and_expected_from_code_path( + code_path, + [ + (2, 'serializer = YAML(typ="safe")\n'), + (3, 'serializer = YAML(typ="safe")\n'), + ], + ) + expected_diff = '--- \n+++ \n@@ -1,4 +1,4 @@\n from ruamel.yaml import YAML\n \n-serializer = YAML(typ="unsafe")\n-serializer = YAML(typ="base")\n+serializer = YAML(typ="safe")\n+serializer = YAML(typ="safe")\n' + expected_line_change = "3" + num_changes = 2 + change_description = HardenRuamel.CHANGE_DESCRIPTION diff --git a/integration_tests/test_multiple.py b/integration_tests/test_multiple.py index 44f4b344..c555f5f4 100644 --- a/integration_tests/test_multiple.py +++ b/integration_tests/test_multiple.py @@ -32,7 +32,7 @@ def _assert_codetf_output(self): assert sorted(codetf.keys()) == ["results", "run"] results = codetf["results"] - assert len(results) == 10 + assert len(results) == 11 sorted_results = sorted(results, key=lambda x: x["codemod"]) django_debug = sorted_results[0] @@ -56,7 +56,12 @@ def _assert_codetf_output(self): assert harden_pyyaml["changeset"][0]["path"] == "tests/samples/unsafe_yaml.py" assert len(harden_pyyaml["changeset"][0]["changes"]) == 1 - limit_readline = sorted_results[3] + harden_ruamel = sorted_results[3] + assert len(harden_ruamel["changeset"]) == 1 + assert harden_ruamel["changeset"][0]["path"] == "tests/samples/unsafe_ruamel.py" + assert len(harden_ruamel["changeset"][0]["changes"]) == 2 + + limit_readline = sorted_results[4] assert len(limit_readline["changeset"]) == 1 assert ( limit_readline["changeset"][0]["path"] @@ -64,14 +69,14 @@ def _assert_codetf_output(self): ) assert len(limit_readline["changeset"][0]["changes"]) == 1 - process_sandbox = sorted_results[4] + process_sandbox = sorted_results[5] assert len(process_sandbox["changeset"]) == 1 assert ( process_sandbox["changeset"][0]["path"] == "tests/samples/make_process.py" ) assert len(process_sandbox["changeset"][0]["changes"]) == 4 - unnecessary_f_str = sorted_results[5] + unnecessary_f_str = sorted_results[6] assert len(unnecessary_f_str["changeset"]) == 1 assert ( unnecessary_f_str["changeset"][0]["path"] @@ -79,14 +84,14 @@ def _assert_codetf_output(self): ) assert len(unnecessary_f_str["changeset"][0]["changes"]) == 1 - secure_random = sorted_results[6] + secure_random = sorted_results[7] assert len(secure_random["changeset"]) == 1 assert ( secure_random["changeset"][0]["path"] == "tests/samples/insecure_random.py" ) assert len(secure_random["changeset"][0]["changes"]) == 1 - remove_unused_imports = sorted_results[7] + remove_unused_imports = sorted_results[8] assert len(remove_unused_imports["changeset"]) == 1 assert ( remove_unused_imports["changeset"][0]["path"] @@ -94,12 +99,12 @@ def _assert_codetf_output(self): ) assert len(remove_unused_imports["changeset"][0]["changes"]) == 1 - upgrade_weak_tls = sorted_results[8] + upgrade_weak_tls = sorted_results[9] assert len(upgrade_weak_tls["changeset"]) == 1 assert upgrade_weak_tls["changeset"][0]["path"] == "tests/samples/weak_tls.py" assert len(upgrade_weak_tls["changeset"][0]["changes"]) == 1 - url_sandbox = sorted_results[9] + url_sandbox = sorted_results[10] assert len(url_sandbox["changeset"]) == 1 assert url_sandbox["changeset"][0]["path"] == "tests/samples/make_request.py" assert len(url_sandbox["changeset"][0]["changes"]) == 1 diff --git a/tests/codemods/test_harden_ruamel.py b/tests/codemods/test_harden_ruamel.py new file mode 100644 index 00000000..e34446ea --- /dev/null +++ b/tests/codemods/test_harden_ruamel.py @@ -0,0 +1,65 @@ +import pytest +from codemodder.codemods.harden_ruamel import HardenRuamel +from tests.codemods.base_codemod_test import BaseSemgrepCodemodTest + + +class TestHardenRuamel(BaseSemgrepCodemodTest): + codemod = HardenRuamel + + def test_rule_ids(self): + assert self.codemod.RULE_IDS == ["harden-ruamel"] + + @pytest.mark.parametrize("loader", ["YAML()", "YAML(typ='rt')", "YAML(typ='safe')"]) + def test_safe(self, tmpdir, loader): + input_code = f"""from ruamel.yaml import YAML +serializer = {loader} +""" + self.run_and_assert(tmpdir, input_code, input_code) + + @pytest.mark.parametrize("loader", ["YAML(typ='base')", "YAML(typ='unsafe')"]) + def test_unsafe(self, tmpdir, loader): + input_code = f"""from ruamel.yaml import YAML +serializer = {loader} +""" + + expected = """from ruamel.yaml import YAML +serializer = YAML(typ="safe") +""" + self.run_and_assert(tmpdir, input_code, expected) + + @pytest.mark.parametrize( + "loader", ["YAML(typ='base', pure=True)", "YAML(typ='unsafe', pure=True)"] + ) + def test_unsafe_more_args(self, tmpdir, loader): + input_code = f"""from ruamel.yaml import YAML +serializer = {loader} +""" + + expected = """from ruamel.yaml import YAML +serializer = YAML(typ="safe", pure=True) +""" + self.run_and_assert(tmpdir, input_code, expected) + + @pytest.mark.parametrize("loader", ["YAML(typ='base')", "YAML(typ='unsafe')"]) + def test_unsafe_import(self, tmpdir, loader): + input_code = f"""import ruamel +serializer = ruamel.yaml.{loader} +""" + + expected = """import ruamel +serializer = ruamel.yaml.YAML(typ="safe") +""" + self.run_and_assert(tmpdir, input_code, expected) + + @pytest.mark.skip() + @pytest.mark.parametrize("loader", ["YAML(typ='base')", "YAML(typ='unsafe')"]) + def test_import_alias(self, tmpdir, loader): + input_code = f"""from ruamel import yaml as yam +serializer = yam.{loader} +""" + + expected = """import ruamel +serializer = yam.YAML(typ="safe") +""" + + self.run_and_assert(tmpdir, input_code, expected) diff --git a/tests/samples/unsafe_ruamel.py b/tests/samples/unsafe_ruamel.py new file mode 100644 index 00000000..b91d9d3b --- /dev/null +++ b/tests/samples/unsafe_ruamel.py @@ -0,0 +1,4 @@ +from ruamel.yaml import YAML + +serializer = YAML(typ="unsafe") +serializer = YAML(typ="base")