From fe7a8edd1f70ea00bc007952de39584bea91c5c1 Mon Sep 17 00:00:00 2001 From: Zeyad Tarek Date: Fri, 19 Jul 2024 00:24:51 +0300 Subject: [PATCH 01/32] Parse the arguments at the shim and create a temp json file --- shim/shim.py | 153 ++++++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 151 insertions(+), 2 deletions(-) diff --git a/shim/shim.py b/shim/shim.py index 78dd557f..9ed0c04a 100644 --- a/shim/shim.py +++ b/shim/shim.py @@ -755,6 +755,154 @@ def _get_command_options( return options +def _get_command_options_json( + arguments: argparse.Namespace, apk_directory: str, dex_directory: str +) -> str: + + options_json = {} + options_json["system-jar-paths"] = arguments.system_jar_configuration_path + options_json["apk-directory"] = apk_directory + options_json["dex-directory"] = dex_directory + options_json["rules-paths"] = arguments.rules_paths + options_json["repository-root-directory"] = arguments.repository_root_directory + options_json["source-root-directory"] = arguments.source_root_directory + options_json["apk-path"] = arguments.apk_path + options_json["output-directory"] = arguments.output_directory + options_json["maximum-source-sink-distance"] = arguments.maximum_source_sink_distance + options_json["model-generator-configuration-paths"] = arguments.model_generator_configuration_paths + + if arguments.grepo_metadata_path: + options_json["grepo-metadata-path"] = arguments.grepo_metadata_path + + if arguments.model_generator_search_paths: + options_json["model-generator-search-paths"] = arguments.model_generator_search_paths + + if arguments.models_paths: + options_json["models-paths"] = arguments.models_paths + + if arguments.field_models_paths: + options_json["field-models-paths"] = arguments.field_models_paths + + if arguments.literal_models_paths: + options_json["literal-models-paths"] = arguments.literal_models_paths + + if arguments.proguard_configuration_paths: + options_json["proguard-configuration-paths"] = arguments.proguard_configuration_paths + + if arguments.lifecycles_paths: + options_json["lifecycles-paths"] = arguments.lifecycles_paths + + if arguments.shims_paths: + options_json["shims-paths"] = arguments.shims_paths + + if arguments.graphql_metadata_paths: + options_json["graphql-metadata-paths"] = arguments.graphql_metadata_paths + + if arguments.source_exclude_directories: + options_json["source-exclude-directories"] = arguments.source_exclude_directories + + if arguments.generated_models_directory: + options_json["generated-models-directory"] = arguments.generated_models_directory + + if arguments.sharded_models_directory: + options_json["sharded-models-directory"] = arguments.sharded_models_directory + + if arguments.emit_all_via_cast_features: + options_json["emit-all-via-cast-features"] = arguments.emit_all_via_cast_features + + if arguments.propagate_across_arguments: + options_json["propagate-across-arguments"] = arguments.propagate_across_arguments + + if arguments.allow_via_cast_feature: + options_json["allow-via-cast-feature"] = [] + for feature in arguments.allow_via_cast_feature: + options_json["allow-via-cast-feature"].append(feature.strip()) + + if arguments.heuristics: + options_json["heuristics"] = arguments.heuristics + + if arguments.sequential: + options_json["sequential"] = arguments.sequential + + if arguments.skip_source_indexing: + options_json["skip-source-indexing"] = arguments.skip_source_indexing + + if arguments.skip_analysis: + options_json["skip-analysis"] = arguments.skip_analysis + + if arguments.disable_parameter_type_overrides: + options_json["disable-parameter-type-overrides"] = arguments.disable_parameter_type_overrides + + if arguments.disable_global_type_analysis: + options_json["disable-global-type-analysis"] = arguments.disable_global_type_analysis + + if arguments.verify_expected_output: + options_json["verify-expected-output"] = arguments.verify_expected_output + + if arguments.remove_unreachable_code: + options_json["remove-unreachable-code"] = arguments.remove_unreachable_code + + if arguments.maximum_method_analysis_time is not None: + options_json["maximum-method-analysis-time"] = arguments.maximum_method_analysis_time + + if arguments.enable_cross_component_analysis: + options_json["enable-cross-component-analysis"] = arguments.enable_cross_component_analysis + + if arguments.extra_analysis_arguments: + for arg in shlex.split(arguments.extra_analysis_arguments): + options_json[arg] = arg + + if arguments.job_id: + options_json["job-id"] = arguments.job_id + + if arguments.metarun_id: + options_json["metarun-id"] = arguments.metarun_id + + trace_settings = [f"MARIANA_TRENCH:{arguments.verbosity}"] + if "TRACE" in os.environ: + trace_settings.insert(0, os.environ["TRACE"]) + os.environ["TRACE"] = ",".join(trace_settings) + + if arguments.log_method: + options_json["log-method"] = [] + for method in arguments.log_method: + options_json["log-method"].append(method.strip()) + + if arguments.log_method_types: + options_json["log-method-types"] = [] + for method in arguments.log_method_types: + options_json["log-method-types"].append(method.strip()) + + if arguments.dump_class_hierarchies: + options_json["dump-class-hierarchies"] = arguments.dump_class_hierarchies + + if arguments.dump_overrides: + options_json["dump-overrides"] = arguments.dump_overrides + + if arguments.dump_call_graph: + options_json["dump-call-graph"] = arguments.dump_call_graph + + if arguments.dump_dependencies: + options_json["dump-dependencies"] = arguments.dump_dependencies + + if arguments.dump_methods: + options_json["dump-methods"] = arguments.dump_methods + + if arguments.dump_coverage_info: + options_json["dump-coverage-info"] = arguments.dump_coverage_info + + if arguments.always_export_origins: + options_json["always-export-origins"] = arguments.always_export_origins + + # Dump the options to a file and return the file path + options_file = tempfile.NamedTemporaryFile(suffix=".json") + options_file.write(json.dumps(options_json).encode()) + # Get the options file path + options_file_path = options_file.name + + return options_file_path + + def main() -> None: logging.basicConfig(level=logging.INFO, format="%(levelname)s %(message)s") build_directory = Path(tempfile.mkdtemp()) @@ -855,8 +1003,9 @@ def main() -> None: binary, arguments, apk_directory, dex_directory ) else: - options = _get_command_options(arguments, apk_directory, dex_directory) - command = [os.fspath(binary.resolve())] + options + options_file_path = _get_command_options_json(arguments, apk_directory, dex_directory) + + command = [os.fspath(binary.resolve()), options_file_path] if arguments.gdb: command = ["gdb", "--args"] + command elif arguments.lldb: From ba65ddc57b6f3c9386c6e7c80d585f11025801c0 Mon Sep 17 00:00:00 2001 From: Zeyad Tarek Date: Fri, 19 Jul 2024 00:27:23 +0300 Subject: [PATCH 02/32] Keep the created file --- shim/shim.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/shim/shim.py b/shim/shim.py index 9ed0c04a..37bdf990 100644 --- a/shim/shim.py +++ b/shim/shim.py @@ -895,7 +895,7 @@ def _get_command_options_json( options_json["always-export-origins"] = arguments.always_export_origins # Dump the options to a file and return the file path - options_file = tempfile.NamedTemporaryFile(suffix=".json") + options_file = tempfile.NamedTemporaryFile(suffix=".json",delete=False) options_file.write(json.dumps(options_json).encode()) # Get the options file path options_file_path = options_file.name From 46df9325d23d9a5d3e3a5ed816b6f126760b678b Mon Sep 17 00:00:00 2001 From: Zeyad Tarek Date: Fri, 19 Jul 2024 01:49:56 +0300 Subject: [PATCH 03/32] Parse the arguments from the JSON file at cpp --- source/Main.cpp | 40 +++++++++++++++++++++++++++++++++++++--- 1 file changed, 37 insertions(+), 3 deletions(-) diff --git a/source/Main.cpp b/source/Main.cpp index ef391a7e..6b44006a 100644 --- a/source/Main.cpp +++ b/source/Main.cpp @@ -18,6 +18,8 @@ #include #include +#include +#include #include #include #include @@ -40,11 +42,43 @@ int main(int argc, char* argv[]) { tool.add_options(options); try { + if (argc < 2) { + std::cerr << "Usage: " << argv[0] << " \n"; + return ExitCode::invalid_argument_error("No JSON configuration file provided."); + } + + std::string json_file_path = argv[1]; + + // Use JsonReader to parse the JSON file + Json::Value json = marianatrench::JsonReader::parse_json_file(json_file_path); + // Validate the JSON object + marianatrench::JsonValidation::validate_object(json); + // Use variables_map to store the JSON data program_options::variables_map variables; - program_options::store( - program_options::command_line_parser(argc, argv).options(options).run(), - variables); + // Populate variables_map from JSON data + for (const auto& key : json.getMemberNames()) { + const auto& value = json[key]; + if (value.isString()) { + variables.insert(std::make_pair(key, program_options::variable_value(value.asString(), false))); + } else if (value.isBool()) { + variables.insert(std::make_pair(key, program_options::variable_value(value.asBool(), false))); + } else if (value.isInt()) { + variables.insert(std::make_pair(key, program_options::variable_value(value.asInt(), false))); + } else if (value.isUInt()) { + variables.insert(std::make_pair(key, program_options::variable_value(value.asUInt(), false))); + } else if (value.isDouble()) { + variables.insert(std::make_pair(key, program_options::variable_value(value.asDouble(), false))); + } else if (value.isArray()) { + std::vector array_values; + for (const auto& element : value) { + if (element.isString()) { + array_values.push_back(element.asString()); + } + } + variables.insert(std::make_pair(key, program_options::variable_value(array_values, false))); + } + } if (variables.count("help")) { std::cerr << options; return 0; From 9c47a3ffe613e58e6c58278b8035a6f93bed60b9 Mon Sep 17 00:00:00 2001 From: Zeyad Tarek Date: Fri, 19 Jul 2024 16:00:04 +0300 Subject: [PATCH 04/32] Remove a new line in the beginning of function --- shim/shim.py | 1 - 1 file changed, 1 deletion(-) diff --git a/shim/shim.py b/shim/shim.py index 37bdf990..701f630a 100644 --- a/shim/shim.py +++ b/shim/shim.py @@ -758,7 +758,6 @@ def _get_command_options( def _get_command_options_json( arguments: argparse.Namespace, apk_directory: str, dex_directory: str ) -> str: - options_json = {} options_json["system-jar-paths"] = arguments.system_jar_configuration_path options_json["apk-directory"] = apk_directory From 9eca26d960aba7b5feda7598bf1d157aa1d419d3 Mon Sep 17 00:00:00 2001 From: Zeyad Tarek Date: Fri, 19 Jul 2024 16:01:58 +0300 Subject: [PATCH 05/32] Set environment variables in a separate function --- shim/shim.py | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/shim/shim.py b/shim/shim.py index 701f630a..9ad01d4c 100644 --- a/shim/shim.py +++ b/shim/shim.py @@ -754,6 +754,11 @@ def _get_command_options( options.append("--always-export-origins") return options +def _set_environment_variables(arguments): + trace_settings = [f"MARIANA_TRENCH:{arguments.verbosity}"] + if "TRACE" in os.environ: + trace_settings.insert(0, os.environ["TRACE"]) + os.environ["TRACE"] = ",".join(trace_settings) def _get_command_options_json( arguments: argparse.Namespace, apk_directory: str, dex_directory: str @@ -857,10 +862,7 @@ def _get_command_options_json( if arguments.metarun_id: options_json["metarun-id"] = arguments.metarun_id - trace_settings = [f"MARIANA_TRENCH:{arguments.verbosity}"] - if "TRACE" in os.environ: - trace_settings.insert(0, os.environ["TRACE"]) - os.environ["TRACE"] = ",".join(trace_settings) + _set_environment_variables(arguments) if arguments.log_method: options_json["log-method"] = [] From ca221e98e40a9268fd70f59b3e040b58d6513d21 Mon Sep 17 00:00:00 2001 From: Zeyad Tarek Date: Fri, 19 Jul 2024 16:04:15 +0300 Subject: [PATCH 06/32] Remove the old options function --- shim/shim.py | 142 --------------------------------------------------- 1 file changed, 142 deletions(-) diff --git a/shim/shim.py b/shim/shim.py index 9ad01d4c..79a8dcd8 100644 --- a/shim/shim.py +++ b/shim/shim.py @@ -612,148 +612,6 @@ def _add_debug_arguments(parser: argparse.ArgumentParser) -> None: ) -def _get_command_options( - arguments: argparse.Namespace, apk_directory: str, dex_directory: str -) -> List[str]: - options = [ - "--system-jar-paths", - arguments.system_jar_configuration_path, - "--apk-directory", - apk_directory, - "--dex-directory", - dex_directory, - "--rules-paths", - arguments.rules_paths, - "--repository-root-directory", - arguments.repository_root_directory, - "--source-root-directory", - arguments.source_root_directory, - "--apk-path", - arguments.apk_path, - "--output-directory", - arguments.output_directory, - "--maximum-source-sink-distance", - str(arguments.maximum_source_sink_distance), - "--model-generator-configuration-paths", - arguments.model_generator_configuration_paths, - ] - - if arguments.grepo_metadata_path: - options.append("--grepo-metadata-path") - options.append(arguments.grepo_metadata_path) - - if arguments.model_generator_search_paths: - options.append("--model-generator-search-paths") - options.append(arguments.model_generator_search_paths) - - if arguments.models_paths: - options.append("--models-paths") - options.append(arguments.models_paths) - - if arguments.field_models_paths: - options.append("--field-models-paths") - options.append(arguments.field_models_paths) - - if arguments.literal_models_paths: - options.append("--literal-models-paths") - options.append(arguments.literal_models_paths) - - if arguments.proguard_configuration_paths: - options.append("--proguard-configuration-paths") - options.append(arguments.proguard_configuration_paths) - - if arguments.lifecycles_paths: - options.append("--lifecycles-paths") - options.append(arguments.lifecycles_paths) - - if arguments.shims_paths: - options.append("--shims-paths") - options.append(arguments.shims_paths) - - if arguments.graphql_metadata_paths: - options.append("--graphql-metadata-paths") - options.append(arguments.graphql_metadata_paths) - - if arguments.source_exclude_directories: - options.append("--source-exclude-directories") - options.append(arguments.source_exclude_directories) - - if arguments.generated_models_directory: - options.append("--generated-models-directory") - options.append(arguments.generated_models_directory) - - if arguments.sharded_models_directory: - options.append("--sharded-models-directory") - options.append(arguments.sharded_models_directory) - - if arguments.emit_all_via_cast_features: - options.append("--emit-all-via-cast-features") - if arguments.propagate_across_arguments: - options.append("--propagate-across-arguments") - if arguments.allow_via_cast_feature: - for feature in arguments.allow_via_cast_feature: - options.append("--allow-via-cast-feature=%s" % feature.strip()) - - if arguments.heuristics: - options.append("--heuristics") - options.append(arguments.heuristics) - - if arguments.sequential: - options.append("--sequential") - if arguments.skip_source_indexing: - options.append("--skip-source-indexing") - if arguments.skip_analysis: - options.append("--skip-analysis") - if arguments.disable_parameter_type_overrides: - options.append("--disable-parameter-type-overrides") - if arguments.disable_global_type_analysis: - options.append("--disable-global-type-analysis") - if arguments.verify_expected_output: - options.append("--verify-expected-output") - if arguments.remove_unreachable_code: - options.append("--remove-unreachable-code") - if arguments.maximum_method_analysis_time is not None: - options.append("--maximum-method-analysis-time") - options.append(str(arguments.maximum_method_analysis_time)) - if arguments.enable_cross_component_analysis: - options.append("--enable-cross-component-analysis") - if arguments.extra_analysis_arguments: - options.extend(shlex.split(arguments.extra_analysis_arguments)) - - if arguments.job_id: - options.append("--job-id") - options.append(arguments.job_id) - if arguments.metarun_id: - options.append("--metarun-id") - options.append(arguments.metarun_id) - - trace_settings = [f"MARIANA_TRENCH:{arguments.verbosity}"] - if "TRACE" in os.environ: - trace_settings.insert(0, os.environ["TRACE"]) - os.environ["TRACE"] = ",".join(trace_settings) - - if arguments.log_method: - for method in arguments.log_method: - options.append("--log-method=%s" % method.strip()) - if arguments.log_method_types: - for method in arguments.log_method_types: - options.append("--log-method-types=%s" % method.strip()) - if arguments.dump_class_hierarchies: - options.append("--dump-class-hierarchies") - if arguments.dump_overrides: - options.append("--dump-overrides") - if arguments.dump_call_graph: - options.append("--dump-call-graph") - if arguments.dump_dependencies: - options.append("--dump-dependencies") - if arguments.dump_methods: - options.append("--dump-methods") - if arguments.dump_coverage_info: - options.append("--dump-coverage-info") - if arguments.always_export_origins: - options.append("--always-export-origins") - return options - def _set_environment_variables(arguments): trace_settings = [f"MARIANA_TRENCH:{arguments.verbosity}"] if "TRACE" in os.environ: From d91949f84189a997d26233faafcc47141801e9ce Mon Sep 17 00:00:00 2001 From: Zeyad Tarek Date: Fri, 19 Jul 2024 16:32:27 +0300 Subject: [PATCH 07/32] Return the filename without intermediate variable --- shim/shim.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/shim/shim.py b/shim/shim.py index 79a8dcd8..5d060099 100644 --- a/shim/shim.py +++ b/shim/shim.py @@ -756,10 +756,8 @@ def _get_command_options_json( # Dump the options to a file and return the file path options_file = tempfile.NamedTemporaryFile(suffix=".json",delete=False) options_file.write(json.dumps(options_json).encode()) - # Get the options file path - options_file_path = options_file.name - return options_file_path + return options_file.name def main() -> None: From c72ddf690f234439aef28f336cf808fc782790a2 Mon Sep 17 00:00:00 2001 From: Zeyad Tarek Date: Fri, 19 Jul 2024 17:15:49 +0300 Subject: [PATCH 08/32] Return option json and write the file at the main --- shim/shim.py | 30 +++++++++++++++--------------- 1 file changed, 15 insertions(+), 15 deletions(-) diff --git a/shim/shim.py b/shim/shim.py index 5d060099..642455c9 100644 --- a/shim/shim.py +++ b/shim/shim.py @@ -620,7 +620,7 @@ def _set_environment_variables(arguments): def _get_command_options_json( arguments: argparse.Namespace, apk_directory: str, dex_directory: str -) -> str: +) -> dict: options_json = {} options_json["system-jar-paths"] = arguments.system_jar_configuration_path options_json["apk-directory"] = apk_directory @@ -753,11 +753,7 @@ def _get_command_options_json( if arguments.always_export_origins: options_json["always-export-origins"] = arguments.always_export_origins - # Dump the options to a file and return the file path - options_file = tempfile.NamedTemporaryFile(suffix=".json",delete=False) - options_file.write(json.dumps(options_json).encode()) - - return options_file.name + return options_json def main() -> None: @@ -860,15 +856,19 @@ def main() -> None: binary, arguments, apk_directory, dex_directory ) else: - options_file_path = _get_command_options_json(arguments, apk_directory, dex_directory) - - command = [os.fspath(binary.resolve()), options_file_path] - if arguments.gdb: - command = ["gdb", "--args"] + command - elif arguments.lldb: - command = ["lldb", "--"] + command - LOG.info(f"Running Mariana Trench: {' '.join(command)}") - output = subprocess.run(command) + with tempfile.NamedTemporaryFile(suffix=".json",mode="w") as options_file: + options_json = _get_command_options_json(arguments, apk_directory, dex_directory) + print(f"options_json type: {type(options_json)}") + print(f"options_file mode: {options_file.mode}") + json.dump(options_json, options_file, indent=4) + options_file.flush() + command = [os.fspath(binary.resolve()), options_file.name] + if arguments.gdb: + command = ["gdb", "--args"] + command + elif arguments.lldb: + command = ["lldb", "--"] + command + LOG.info(f"Running Mariana Trench: {' '.join(command)}") + output = subprocess.run(command) if output.returncode != 0: LOG.fatal(f"Analysis binary exited with exit code {output.returncode}.") sys.exit(output.returncode) From 9175f551ea0826388b0214749263a98f732e6852 Mon Sep 17 00:00:00 2001 From: Zeyad Tarek Date: Mon, 22 Jul 2024 21:53:47 +0300 Subject: [PATCH 09/32] Set the environemnt variables outside of the options method --- shim/shim.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/shim/shim.py b/shim/shim.py index 642455c9..fe25b550 100644 --- a/shim/shim.py +++ b/shim/shim.py @@ -720,8 +720,6 @@ def _get_command_options_json( if arguments.metarun_id: options_json["metarun-id"] = arguments.metarun_id - _set_environment_variables(arguments) - if arguments.log_method: options_json["log-method"] = [] for method in arguments.log_method: @@ -857,6 +855,7 @@ def main() -> None: ) else: with tempfile.NamedTemporaryFile(suffix=".json",mode="w") as options_file: + _set_environment_variables(arguments) options_json = _get_command_options_json(arguments, apk_directory, dex_directory) print(f"options_json type: {type(options_json)}") print(f"options_file mode: {options_file.mode}") From 0a7460430c846ccbb9c995365a043766d8ab99cb Mon Sep 17 00:00:00 2001 From: Zeyad Tarek Date: Mon, 22 Jul 2024 21:54:09 +0300 Subject: [PATCH 10/32] Remove debugging logs --- shim/shim.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/shim/shim.py b/shim/shim.py index fe25b550..fc1a10a3 100644 --- a/shim/shim.py +++ b/shim/shim.py @@ -857,8 +857,6 @@ def main() -> None: with tempfile.NamedTemporaryFile(suffix=".json",mode="w") as options_file: _set_environment_variables(arguments) options_json = _get_command_options_json(arguments, apk_directory, dex_directory) - print(f"options_json type: {type(options_json)}") - print(f"options_file mode: {options_file.mode}") json.dump(options_json, options_file, indent=4) options_file.flush() command = [os.fspath(binary.resolve()), options_file.name] From 313e36a87864012b6b6d1f5798970ff17aa3c9dc Mon Sep 17 00:00:00 2001 From: Zeyad Tarek Date: Mon, 22 Jul 2024 21:58:55 +0300 Subject: [PATCH 11/32] Edit some arguments type --- shim/shim.py | 34 +++++++++++++++++----------------- 1 file changed, 17 insertions(+), 17 deletions(-) diff --git a/shim/shim.py b/shim/shim.py index fc1a10a3..f5d20fc2 100644 --- a/shim/shim.py +++ b/shim/shim.py @@ -670,10 +670,10 @@ def _get_command_options_json( options_json["sharded-models-directory"] = arguments.sharded_models_directory if arguments.emit_all_via_cast_features: - options_json["emit-all-via-cast-features"] = arguments.emit_all_via_cast_features + options_json["emit-all-via-cast-features"] = True if arguments.propagate_across_arguments: - options_json["propagate-across-arguments"] = arguments.propagate_across_arguments + options_json["propagate-across-arguments"] = True if arguments.allow_via_cast_feature: options_json["allow-via-cast-feature"] = [] @@ -684,31 +684,31 @@ def _get_command_options_json( options_json["heuristics"] = arguments.heuristics if arguments.sequential: - options_json["sequential"] = arguments.sequential + options_json["sequential"] = True if arguments.skip_source_indexing: - options_json["skip-source-indexing"] = arguments.skip_source_indexing + options_json["skip-source-indexing"] = True if arguments.skip_analysis: - options_json["skip-analysis"] = arguments.skip_analysis + options_json["skip-analysis"] = True if arguments.disable_parameter_type_overrides: - options_json["disable-parameter-type-overrides"] = arguments.disable_parameter_type_overrides + options_json["disable-parameter-type-overrides"] = True if arguments.disable_global_type_analysis: - options_json["disable-global-type-analysis"] = arguments.disable_global_type_analysis + options_json["disable-global-type-analysis"] = True if arguments.verify_expected_output: - options_json["verify-expected-output"] = arguments.verify_expected_output + options_json["verify-expected-output"] = True if arguments.remove_unreachable_code: - options_json["remove-unreachable-code"] = arguments.remove_unreachable_code + options_json["remove-unreachable-code"] = True if arguments.maximum_method_analysis_time is not None: options_json["maximum-method-analysis-time"] = arguments.maximum_method_analysis_time if arguments.enable_cross_component_analysis: - options_json["enable-cross-component-analysis"] = arguments.enable_cross_component_analysis + options_json["enable-cross-component-analysis"] = True if arguments.extra_analysis_arguments: for arg in shlex.split(arguments.extra_analysis_arguments): @@ -731,25 +731,25 @@ def _get_command_options_json( options_json["log-method-types"].append(method.strip()) if arguments.dump_class_hierarchies: - options_json["dump-class-hierarchies"] = arguments.dump_class_hierarchies + options_json["dump-class-hierarchies"] = True if arguments.dump_overrides: - options_json["dump-overrides"] = arguments.dump_overrides + options_json["dump-overrides"] = True if arguments.dump_call_graph: - options_json["dump-call-graph"] = arguments.dump_call_graph + options_json["dump-call-graph"] = True if arguments.dump_dependencies: - options_json["dump-dependencies"] = arguments.dump_dependencies + options_json["dump-dependencies"] = True if arguments.dump_methods: - options_json["dump-methods"] = arguments.dump_methods + options_json["dump-methods"] = True if arguments.dump_coverage_info: - options_json["dump-coverage-info"] = arguments.dump_coverage_info + options_json["dump-coverage-info"] = True if arguments.always_export_origins: - options_json["always-export-origins"] = arguments.always_export_origins + options_json["always-export-origins"] = True return options_json From ef2359ba25c68600cd41a7325c3a7ca63818a3a4 Mon Sep 17 00:00:00 2001 From: Zeyad Tarek Date: Mon, 22 Jul 2024 22:01:40 +0300 Subject: [PATCH 12/32] Handle extra_analysis_arguments in a different way --- shim/shim.py | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/shim/shim.py b/shim/shim.py index f5d20fc2..874e5d9b 100644 --- a/shim/shim.py +++ b/shim/shim.py @@ -711,8 +711,14 @@ def _get_command_options_json( options_json["enable-cross-component-analysis"] = True if arguments.extra_analysis_arguments: - for arg in shlex.split(arguments.extra_analysis_arguments): - options_json[arg] = arg + extra_arguments = json.loads(arguments.extra_analysis_arguments) + for key, value in extra_arguments.items(): + if key in options_json and isinstance(options_json[key],list) and isinstance(value, list): + # Append the values to the existing list + options_json[key].extend(value) + else: + # Overwrite the value + options_json[key] = value if arguments.job_id: options_json["job-id"] = arguments.job_id From ea810d79f1765553fe3f1bbea71a5660be172161 Mon Sep 17 00:00:00 2001 From: Zeyad Tarek Date: Mon, 22 Jul 2024 23:27:45 +0300 Subject: [PATCH 13/32] use boost program options to get the config file path --- shim/shim.py | 2 +- source/Main.cpp | 25 ++++++++++++++----------- 2 files changed, 15 insertions(+), 12 deletions(-) diff --git a/shim/shim.py b/shim/shim.py index 874e5d9b..739a0054 100644 --- a/shim/shim.py +++ b/shim/shim.py @@ -865,7 +865,7 @@ def main() -> None: options_json = _get_command_options_json(arguments, apk_directory, dex_directory) json.dump(options_json, options_file, indent=4) options_file.flush() - command = [os.fspath(binary.resolve()), options_file.name] + command = [os.fspath(binary.resolve()),"--config" ,options_file.name] if arguments.gdb: command = ["gdb", "--args"] + command elif arguments.lldb: diff --git a/source/Main.cpp b/source/Main.cpp index 6b44006a..54766260 100644 --- a/source/Main.cpp +++ b/source/Main.cpp @@ -36,26 +36,33 @@ int main(int argc, char* argv[]) { namespace program_options = boost::program_options; program_options::options_description options; - options.add_options()("help,h", "Show help dialog."); + options.add_options() + ("help,h", "Show help dialog.") + ("config,c", program_options::value()->required(), "Path to the JSON configuration file."); auto tool = marianatrench::MarianaTrench(); tool.add_options(options); try { - if (argc < 2) { - std::cerr << "Usage: " << argv[0] << " \n"; + program_options::variables_map variables; + po::store(po::parse_command_line(argc, argv, options), variables); + if (variables.count("help")) { + std::cerr << options; + return 0; + } + if (!variables.count("config")) { + std::cerr << "No JSON configuration file provided.\n"; + std::cerr << "Usage: " << argv[0] << " --config \n"; return ExitCode::invalid_argument_error("No JSON configuration file provided."); } - std::string json_file_path = argv[1]; + std::string json_file_path = variables["config"].as(); // Use JsonReader to parse the JSON file Json::Value json = marianatrench::JsonReader::parse_json_file(json_file_path); // Validate the JSON object marianatrench::JsonValidation::validate_object(json); - // Use variables_map to store the JSON data - program_options::variables_map variables; - // Populate variables_map from JSON data + for (const auto& key : json.getMemberNames()) { const auto& value = json[key]; @@ -79,10 +86,6 @@ int main(int argc, char* argv[]) { variables.insert(std::make_pair(key, program_options::variable_value(array_values, false))); } } - if (variables.count("help")) { - std::cerr << options; - return 0; - } program_options::notify(variables); From 6cff675b7af51f817769520718fce8831b2c421a Mon Sep 17 00:00:00 2001 From: Zeyad Tarek Date: Tue, 23 Jul 2024 03:16:05 +0300 Subject: [PATCH 14/32] Remove parsing the json from the main --- source/Main.cpp | 34 +--------------------------------- 1 file changed, 1 insertion(+), 33 deletions(-) diff --git a/source/Main.cpp b/source/Main.cpp index 54766260..7b5f83d5 100644 --- a/source/Main.cpp +++ b/source/Main.cpp @@ -45,7 +45,7 @@ int main(int argc, char* argv[]) { try { program_options::variables_map variables; - po::store(po::parse_command_line(argc, argv, options), variables); + program_options::store(program_options::parse_command_line(argc, argv, options), variables); if (variables.count("help")) { std::cerr << options; return 0; @@ -56,38 +56,6 @@ int main(int argc, char* argv[]) { return ExitCode::invalid_argument_error("No JSON configuration file provided."); } - std::string json_file_path = variables["config"].as(); - - // Use JsonReader to parse the JSON file - Json::Value json = marianatrench::JsonReader::parse_json_file(json_file_path); - // Validate the JSON object - marianatrench::JsonValidation::validate_object(json); - - for (const auto& key : json.getMemberNames()) { - const auto& value = json[key]; - - if (value.isString()) { - variables.insert(std::make_pair(key, program_options::variable_value(value.asString(), false))); - } else if (value.isBool()) { - variables.insert(std::make_pair(key, program_options::variable_value(value.asBool(), false))); - } else if (value.isInt()) { - variables.insert(std::make_pair(key, program_options::variable_value(value.asInt(), false))); - } else if (value.isUInt()) { - variables.insert(std::make_pair(key, program_options::variable_value(value.asUInt(), false))); - } else if (value.isDouble()) { - variables.insert(std::make_pair(key, program_options::variable_value(value.asDouble(), false))); - } else if (value.isArray()) { - std::vector array_values; - for (const auto& element : value) { - if (element.isString()) { - array_values.push_back(element.asString()); - } - } - variables.insert(std::make_pair(key, program_options::variable_value(array_values, false))); - } - } - - program_options::notify(variables); marianatrench::GlobalRedexContext redex_context( /* allow_class_duplicates */ true); From f5168b69acb7881fa7d72c2a8d264e9d350e44e9 Mon Sep 17 00:00:00 2001 From: Zeyad Tarek Date: Tue, 23 Jul 2024 03:16:20 +0300 Subject: [PATCH 15/32] Create static function to parse the arguments --- source/Options.cpp | 40 +++++++++++++++++++++++++++++++++++++++- source/Options.h | 2 ++ 2 files changed, 41 insertions(+), 1 deletion(-) diff --git a/source/Options.cpp b/source/Options.cpp index 9b7c4561..5a63b419 100644 --- a/source/Options.cpp +++ b/source/Options.cpp @@ -6,7 +6,7 @@ */ #include - +#include #include #include @@ -303,6 +303,44 @@ Options::Options(const boost::program_options::variables_map& variables) { } } +Options* Options::options_from_json_file(const std::string& options_json_path){ + + std::cout<<"======================\n"; + std::cout << "options from json file options_json_path: " << options_json_path << std::endl; + std::cout<<"======================\n"; + program_options::variables_map variables; + // Use JsonReader to parse the JSON file + Json::Value json = marianatrench::JsonReader::parse_json_file(options_json_path); + // Validate the JSON object + marianatrench::JsonValidation::validate_object(json); + + for (const auto& key : json.getMemberNames()) { + const auto& value = json[key]; + + if (value.isString()) { + variables.insert(std::make_pair(key, program_options::variable_value(value.asString(), false))); + } else if (value.isBool()) { + variables.insert(std::make_pair(key, program_options::variable_value(value.asBool(), false))); + } else if (value.isInt()) { + variables.insert(std::make_pair(key, program_options::variable_value(value.asInt(), false))); + } else if (value.isUInt()) { + variables.insert(std::make_pair(key, program_options::variable_value(value.asUInt(), false))); + } else if (value.isDouble()) { + variables.insert(std::make_pair(key, program_options::variable_value(value.asDouble(), false))); + } else if (value.isArray()) { + std::vector array_values; + for (const auto& element : value) { + if (element.isString()) { + array_values.push_back(element.asString()); + } + } + variables.insert(std::make_pair(key, program_options::variable_value(array_values, false))); + } + } + + Options * options = new Options(variables); + return options; +} void Options::add_options( boost::program_options::options_description& options) { options.add_options()( diff --git a/source/Options.h b/source/Options.h index 4ea89b1b..79eb085f 100644 --- a/source/Options.h +++ b/source/Options.h @@ -52,6 +52,8 @@ class Options final { static void add_options(boost::program_options::options_description& options); + static Options* options_from_json_file(const std::string& options_json_path); + const std::vector& models_paths() const; const std::vector& field_models_paths() const; const std::vector& literal_models_paths() const; From e514945943533ff38b523c6d6f6367f205229faf Mon Sep 17 00:00:00 2001 From: Zeyad Tarek Date: Tue, 23 Jul 2024 03:16:47 +0300 Subject: [PATCH 16/32] Remove couts --- source/Options.cpp | 4 ---- 1 file changed, 4 deletions(-) diff --git a/source/Options.cpp b/source/Options.cpp index 5a63b419..1a85c21c 100644 --- a/source/Options.cpp +++ b/source/Options.cpp @@ -304,10 +304,6 @@ Options::Options(const boost::program_options::variables_map& variables) { } Options* Options::options_from_json_file(const std::string& options_json_path){ - - std::cout<<"======================\n"; - std::cout << "options from json file options_json_path: " << options_json_path << std::endl; - std::cout<<"======================\n"; program_options::variables_map variables; // Use JsonReader to parse the JSON file Json::Value json = marianatrench::JsonReader::parse_json_file(options_json_path); From f46ce38e7371e10b25e589dcd7fe2823994428b1 Mon Sep 17 00:00:00 2001 From: Zeyad Tarek Date: Tue, 23 Jul 2024 03:17:18 +0300 Subject: [PATCH 17/32] parse the options from the json in the run method --- source/MarianaTrench.cpp | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/source/MarianaTrench.cpp b/source/MarianaTrench.cpp index 1fd0c119..93a32873 100644 --- a/source/MarianaTrench.cpp +++ b/source/MarianaTrench.cpp @@ -423,8 +423,9 @@ std::vector filter_existing_jars( void MarianaTrench::run(const program_options::variables_map& variables) { Context context; - - context.options = std::make_unique(variables); + std::string json_file_path = variables["config"].as(); + Options* options_from_file = Options::options_from_json_file(json_file_path); + context.options = std::unique_ptr(options_from_file); const auto& options = *context.options; if (auto heuristics_path = options.heuristics_path()) { From 28d1e3621803d83c27c3b3f00739083f1cbdc52f Mon Sep 17 00:00:00 2001 From: Zeyad Tarek Date: Tue, 23 Jul 2024 05:09:25 +0300 Subject: [PATCH 18/32] return unique_ptr instead of option ptr --- source/Options.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/source/Options.h b/source/Options.h index 79eb085f..4495eaea 100644 --- a/source/Options.h +++ b/source/Options.h @@ -52,7 +52,7 @@ class Options final { static void add_options(boost::program_options::options_description& options); - static Options* options_from_json_file(const std::string& options_json_path); + static std::unique_ptr options_from_json_file(const std::string& options_json_path); const std::vector& models_paths() const; const std::vector& field_models_paths() const; From 7275b9a62023584eaccadfefa7a1fa386db652c1 Mon Sep 17 00:00:00 2001 From: Zeyad Tarek Date: Tue, 23 Jul 2024 05:10:04 +0300 Subject: [PATCH 19/32] Edit return type implementation --- source/Options.cpp | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/source/Options.cpp b/source/Options.cpp index 1a85c21c..9d41bde1 100644 --- a/source/Options.cpp +++ b/source/Options.cpp @@ -303,7 +303,7 @@ Options::Options(const boost::program_options::variables_map& variables) { } } -Options* Options::options_from_json_file(const std::string& options_json_path){ +std::unique_ptr Options::options_from_json_file(const std::string& options_json_path){ program_options::variables_map variables; // Use JsonReader to parse the JSON file Json::Value json = marianatrench::JsonReader::parse_json_file(options_json_path); @@ -333,9 +333,7 @@ Options* Options::options_from_json_file(const std::string& options_json_path){ variables.insert(std::make_pair(key, program_options::variable_value(array_values, false))); } } - - Options * options = new Options(variables); - return options; + return std::make_unique(variables); } void Options::add_options( boost::program_options::options_description& options) { From ef66403adc103ae235f502b419c02a84823bffee Mon Sep 17 00:00:00 2001 From: Zeyad Tarek Date: Tue, 23 Jul 2024 05:10:15 +0300 Subject: [PATCH 20/32] Handle unique_ptr --- source/MarianaTrench.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/source/MarianaTrench.cpp b/source/MarianaTrench.cpp index 93a32873..006d2d39 100644 --- a/source/MarianaTrench.cpp +++ b/source/MarianaTrench.cpp @@ -424,8 +424,8 @@ std::vector filter_existing_jars( void MarianaTrench::run(const program_options::variables_map& variables) { Context context; std::string json_file_path = variables["config"].as(); - Options* options_from_file = Options::options_from_json_file(json_file_path); - context.options = std::unique_ptr(options_from_file); + std::unique_ptr options_from_file = Options::options_from_json_file(json_file_path); + context.options = std::move(options_from_file); const auto& options = *context.options; if (auto heuristics_path = options.heuristics_path()) { From e8184ef617568c4a83791ce3f1a920154f6ad43e Mon Sep 17 00:00:00 2001 From: Zeyad Tarek Date: Tue, 23 Jul 2024 15:59:12 +0300 Subject: [PATCH 21/32] Remove the intermediate variable --- source/MarianaTrench.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/source/MarianaTrench.cpp b/source/MarianaTrench.cpp index 006d2d39..8f1e42a1 100644 --- a/source/MarianaTrench.cpp +++ b/source/MarianaTrench.cpp @@ -424,8 +424,8 @@ std::vector filter_existing_jars( void MarianaTrench::run(const program_options::variables_map& variables) { Context context; std::string json_file_path = variables["config"].as(); - std::unique_ptr options_from_file = Options::options_from_json_file(json_file_path); - context.options = std::move(options_from_file); + + context.options = Options::options_from_json_file(json_file_path); const auto& options = *context.options; if (auto heuristics_path = options.heuristics_path()) { From dd526869aea576d69c49a22999192030239f9772 Mon Sep 17 00:00:00 2001 From: Zeyad Tarek Date: Tue, 23 Jul 2024 20:31:26 +0300 Subject: [PATCH 22/32] Parse the json file in the new constructor --- source/Options.cpp | 488 ++++++++++++++------------------------------- source/Options.h | 4 +- 2 files changed, 149 insertions(+), 343 deletions(-) diff --git a/source/Options.cpp b/source/Options.cpp index 9d41bde1..401143f5 100644 --- a/source/Options.cpp +++ b/source/Options.cpp @@ -147,368 +147,176 @@ Options::Options( export_origins_mode_(export_origins_mode), propagate_across_arguments_(propagate_across_arguments) {} -Options::Options(const boost::program_options::variables_map& variables) { - system_jar_paths_ = parse_paths_list( - variables["system-jar-paths"].as(), - std::nullopt, - /* check exist */ false); - - apk_directory_ = - check_directory_exists(variables["apk-directory"].as()); - dex_directory_ = - check_directory_exists(variables["dex-directory"].as()); - - if (!variables["models-paths"].empty()) { - models_paths_ = parse_paths_list( - variables["models-paths"].as(), /* extension */ ".json"); - } - if (!variables["field-models-paths"].empty()) { - field_models_paths_ = parse_paths_list( - variables["field-models-paths"].as(), - /* extension */ ".json"); - } - if (!variables["literal-models-paths"].empty()) { - literal_models_paths_ = parse_paths_list( - variables["literal-models-paths"].as(), - /* extension */ ".json"); - } - rules_paths_ = parse_paths_list( - variables["rules-paths"].as(), /* extension */ ".json"); +Options::Options(const Json::Value &json){ + system_jar_paths_ = parse_paths_list( + json["system-jar-paths"].asString(), + std::nullopt, + /* check exist */ false); + + apk_directory_ = + check_directory_exists(json["apk-directory"].asString()); + dex_directory_ = + check_directory_exists(json["dex-directory"].asString()); + + if (!json["models-paths"].isNull()) { + models_paths_ = parse_paths_list( + json["models-paths"].asString(), /* extension */ ".json"); + } - if (!variables["lifecycles-paths"].empty()) { - lifecycles_paths_ = parse_paths_list( - variables["lifecycles-paths"].as(), - /* extension */ ".json"); - } + if (!json["field-models-paths"].isNull()) { + field_models_paths_ = parse_paths_list( + json["field-models-paths"].asString(), + /* extension */ ".json"); + } + + if (!json["literal-models-paths"].isNull()) { + literal_models_paths_ = parse_paths_list( + json["literal-models-paths"].asString(), + /* extension */ ".json"); + } + + rules_paths_ = parse_paths_list( + json["rules-paths"].asString(), /* extension */ ".json"); - if (!variables["shims-paths"].empty()) { - shims_paths_ = parse_paths_list( - variables["shims-paths"].as(), + if (!json["lifecycles-paths"].isNull()) { + lifecycles_paths_ = parse_paths_list( + json["lifecycles-paths"].asString(), + /* extension */ ".json"); + } + + if (!json["shims-paths"].isNull()) { + shims_paths_ = parse_paths_list( + json["shims-paths"].asString(), + /* extension */ ".json"); + } + + if (!json["graphql-metadata-paths"].isNull()) { + graphql_metadata_paths_ = check_path_exists( + json["graphql-metadata-paths"].asString()); + } else { + graphql_metadata_paths_ = ""; + } + + if (!json["proguard-configuration-paths"].isNull()) { + proguard_configuration_paths_ = parse_paths_list( + json["proguard-configuration-paths"].asString(), + /* extension */ ".pro"); + } + + if (!json["generated-models-directory"].isNull()) { + generated_models_directory_ = check_path_exists( + json["generated-models-directory"].asString()); + } + + generator_configuration_paths_ = parse_paths_list( + json["model-generator-configuration-paths"].asString(), /* extension */ ".json"); - } + model_generators_configuration_ = + parse_json_configuration_files(generator_configuration_paths_); - if (!variables["graphql-metadata-paths"].empty()) { - graphql_metadata_paths_ = check_path_exists( - variables["graphql-metadata-paths"].as()); - } else { - graphql_metadata_paths_ = ""; - } + if (!json["model-generator-search-paths"].isNull()) { + model_generator_search_paths_ = parse_search_paths( + json["model-generator-search-paths"].asString()); + } - if (!variables["proguard-configuration-paths"].empty()) { - proguard_configuration_paths_ = parse_paths_list( - variables["proguard-configuration-paths"].as(), - /* extension */ ".pro"); - } + repository_root_directory_ = check_directory_exists( + json["repository-root-directory"].asString()); + source_root_directory_ = check_directory_exists( + json["source-root-directory"].asString()); - if (!variables["generated-models-directory"].empty()) { - generated_models_directory_ = check_path_exists( - variables["generated-models-directory"].as()); - } + if (!json["source-exclude-directories"].isNull()) { + source_exclude_directories_ = parse_paths_list( + json["source-exclude-directories"].asString(), + /* extension */ std::nullopt, + /* check_exist */ false); + } - generator_configuration_paths_ = parse_paths_list( - variables["model-generator-configuration-paths"].as(), - /* extension */ ".json"); - model_generators_configuration_ = - parse_json_configuration_files(generator_configuration_paths_); + if (!json["grepo-metadata-path"].isNull()) { + grepo_metadata_path_ = + check_path_exists(json["grepo-metadata-path"].asString()); + } - if (!variables["model-generator-search-paths"].empty()) { - model_generator_search_paths_ = parse_search_paths( - variables["model-generator-search-paths"].as()); - } + apk_path_ = check_path_exists(json["apk-path"].asString()); + output_directory_ = std::filesystem::path( + check_directory_exists(json["output-directory"].asString())); - repository_root_directory_ = check_directory_exists( - variables["repository-root-directory"].as()); - source_root_directory_ = check_directory_exists( - variables["source-root-directory"].as()); + if (!json["sharded-models-directory"].isNull()) { + sharded_models_directory_ = std::filesystem::path(check_directory_exists( + json["sharded-models-directory"].asString())); + } - if (!variables["source-exclude-directories"].empty()) { - source_exclude_directories_ = parse_paths_list( - variables["source-exclude-directories"].as(), - /* extension */ std::nullopt, - /* check_exist */ false); - } + sequential_ = json["sequential"].asBool(); + skip_source_indexing_ = json["skip-source-indexing"].asBool(); + skip_analysis_ = json["skip-analysis"].asBool(); + remove_unreachable_code_ = json["remove-unreachable-code"].asBool(); + disable_parameter_type_overrides_ = + json["disable-parameter-type-overrides"].asBool(); + disable_global_type_analysis_ = json["disable-global-type-analysis"].asBool(); + verify_expected_output_ = json["verify-expected-output"].asBool(); + maximum_method_analysis_time_ = + json["maximum-method-analysis-time"] ? + std::nullopt + : std::make_optional(json["maximum-method-analysis-time"].asInt()); + maximum_source_sink_distance_ = + json["maximum-source-sink-distance"].asInt(); + emit_all_via_cast_features_ = + json["emit-all-via-cast-features"].asBool(); + + if (!json["allow-via-cast-feature"].isNull()) { + for (const auto& value : json["allow-via-cast-feature"]) { + allow_via_cast_features_.push_back(value.asString()); + } + } - if (!variables["grepo-metadata-path"].empty()) { - grepo_metadata_path_ = - check_path_exists(variables["grepo-metadata-path"].as()); - } + if (!json["log-method"].isNull()) { + for (const auto& value : json["log-method"]) { + log_methods_.push_back(value.asString()); + } + } - apk_path_ = check_path_exists(variables["apk-path"].as()); - output_directory_ = std::filesystem::path( - check_directory_exists(variables["output-directory"].as())); + if (!json["log-method-types"].isNull()) { + for (const auto& value : json["log-method-types"]) { + log_method_types_.push_back(value.asString()); + } + } - if (!variables["sharded-models-directory"].empty()) { - sharded_models_directory_ = std::filesystem::path(check_directory_exists( - variables["sharded-models-directory"].as())); - } + dump_class_hierarchies_ = json["dump-class-hierarchies"].asBool(); + dump_class_intervals_ = json["dump-class-intervals"].asBool(); + dump_overrides_ = json["dump-overrides"].asBool(); + dump_call_graph_ = json["dump-call-graph"].asBool(); + dump_dependencies_ = json["dump-dependencies"].asBool(); + dump_methods_ = json["dump-methods"].asBool(); + dump_coverage_info_ = json["dump-coverage-info"].asBool(); - sequential_ = variables.count("sequential") > 0; - skip_source_indexing_ = variables.count("skip-source-indexing") > 0; - skip_analysis_ = variables.count("skip-analysis") > 0; - remove_unreachable_code_ = variables.count("remove-unreachable-code") > 0; - disable_parameter_type_overrides_ = - variables.count("disable-parameter-type-overrides") > 0; - disable_global_type_analysis_ = - variables.count("disable-global-type-analysis") > 0; - verify_expected_output_ = variables.count("verify-expected-output") > 0; - - maximum_method_analysis_time_ = - variables.count("maximum-method-analysis-time") == 0 - ? std::nullopt - : std::make_optional( - variables["maximum-method-analysis-time"].as()); - maximum_source_sink_distance_ = - variables["maximum-source-sink-distance"].as(); - emit_all_via_cast_features_ = - variables.count("emit-all-via-cast-features") > 0; - if (!variables["allow-via-cast-feature"].empty()) { - allow_via_cast_features_ = - variables["allow-via-cast-feature"].as>(); - } + if (!json["job-id"].isNull()) { + job_id_ = std::make_optional(json["job-id"].asString()); + } - if (!variables["log-method"].empty()) { - log_methods_ = variables["log-method"].as>(); - } - if (!variables["log-method-types"].empty()) { - log_method_types_ = - variables["log-method-types"].as>(); - } - dump_class_hierarchies_ = variables.count("dump-class-hierarchies") > 0; - dump_class_intervals_ = variables.count("dump-class-intervals") > 0; - dump_overrides_ = variables.count("dump-overrides") > 0; - dump_call_graph_ = variables.count("dump-call-graph") > 0; - dump_dependencies_ = variables.count("dump-dependencies") > 0; - dump_methods_ = variables.count("dump-methods") > 0; - dump_coverage_info_ = variables.count("dump-coverage-info") > 0; - - job_id_ = variables.count("job-id") == 0 - ? std::nullopt - : std::make_optional(variables["job-id"].as()); - metarun_id_ = variables.count("metarun-id") == 0 - ? std::nullopt - : std::make_optional( - variables["metarun-id"].as()); - - enable_cross_component_analysis_ = - variables.count("enable-cross-component-analysis") > 0; - export_origins_mode_ = variables.count("always-export-origins") - ? ExportOriginsMode::Always - : ExportOriginsMode::OnlyOnOrigins; - propagate_across_arguments_ = - variables.count("propagate-across-arguments") > 0; - - if (!variables["heuristics"].empty()) { - heuristics_path_ = std::filesystem::path( - check_path_exists(variables["heuristics"].as())); - } + if (!json["metarun-id"].isNull()) { + metarun_id_ = std::make_optional(json["metarun-id"].asString()); + } + + enable_cross_component_analysis_ = json["enable-cross-component-analysis"].asBool(); + + export_origins_mode_ = json["always-export-origins"] ? + ExportOriginsMode::Always + : ExportOriginsMode::OnlyOnOrigins; + + propagate_across_arguments_ = json["propagate-across-arguments"].asBool(); + + if (!json["heuristics"].isNull()) { + heuristics_path_ = std::filesystem::path( + check_path_exists(json["heuristics"].asString())); + } } + std::unique_ptr Options::options_from_json_file(const std::string& options_json_path){ - program_options::variables_map variables; // Use JsonReader to parse the JSON file Json::Value json = marianatrench::JsonReader::parse_json_file(options_json_path); // Validate the JSON object marianatrench::JsonValidation::validate_object(json); - - for (const auto& key : json.getMemberNames()) { - const auto& value = json[key]; - - if (value.isString()) { - variables.insert(std::make_pair(key, program_options::variable_value(value.asString(), false))); - } else if (value.isBool()) { - variables.insert(std::make_pair(key, program_options::variable_value(value.asBool(), false))); - } else if (value.isInt()) { - variables.insert(std::make_pair(key, program_options::variable_value(value.asInt(), false))); - } else if (value.isUInt()) { - variables.insert(std::make_pair(key, program_options::variable_value(value.asUInt(), false))); - } else if (value.isDouble()) { - variables.insert(std::make_pair(key, program_options::variable_value(value.asDouble(), false))); - } else if (value.isArray()) { - std::vector array_values; - for (const auto& element : value) { - if (element.isString()) { - array_values.push_back(element.asString()); - } - } - variables.insert(std::make_pair(key, program_options::variable_value(array_values, false))); - } - } - return std::make_unique(variables); -} -void Options::add_options( - boost::program_options::options_description& options) { - options.add_options()( - "system-jar-paths", - program_options::value()->required(), - "A JSON configuration file with a list of paths to the system jars."); - options.add_options()( - "apk-directory", - program_options::value()->required(), - "The extraced APK obtained by `redex -u`."); - options.add_options()( - "dex-directory", - program_options::value()->required(), - "The extraced DEX obtained by `redex -u`."); - - options.add_options()( - "models-paths", - program_options::value(), - "A `;` separated list of models files and directories containing models files."); - options.add_options()( - "field-models-paths", - program_options::value(), - "A `;` separated list of field models files and directories containing field models files."); - options.add_options()( - "literal-models-paths", - program_options::value(), - "A `;` separated list of literal models files and directories containing literal models files."); - options.add_options()( - "rules-paths", - program_options::value()->required(), - "A `;` separated list of rules files and directories containing rules files."); - options.add_options()( - "proguard-configuration-paths", - program_options::value(), - "A `;` separated list of ProGuard configuration files or directories containing ProGuard configuration files."); - options.add_options()( - "lifecycles-paths", - program_options::value(), - "A `;` separated list of files and directories containing life-cycles files."); - options.add_options()( - "shims-paths", - program_options::value(), - "A `;` separated list of files and directories containing shims files."); - options.add_options()( - "graphql-metadata-paths", - program_options::value(), - "A graphql metadata JSON mapping file that will be used to generate asset_xid's for the GraphQL sink."); - options.add_options()( - "generated-models-directory", - program_options::value(), - "Directory where generated models will be stored."); - options.add_options()( - "model-generator-configuration-paths", - program_options::value()->required(), - "A `;` separated list of JSON configuration files each specifying a list of absolute paths to JSON model generators or names of CPP model generators."); - options.add_options()( - "model-generator-search-paths", - program_options::value(), - "A `;` separated list of paths where we look for JSON model generators."); - - options.add_options()( - "repository-root-directory", - program_options::value()->required(), - "The root of the repository."); - options.add_options()( - "source-root-directory", - program_options::value()->required(), - "The root where source files for the APK can be found."); - options.add_options()( - "source-exclude-directories", - program_options::value(), - "A `;`-separated list of directories that should be excluded from indexed source files."); - options.add_options()( - "grepo-metadata-path", - program_options::value(), - "A json file containing grepo metadata for source file indexing."); - - options.add_options()( - "apk-path", - program_options::value()->required(), - "The APK to analyze."); - options.add_options()( - "output-directory", - program_options::value()->required(), - "Directory to write results in."); - options.add_options()( - "sharded-models-directory", - program_options::value(), - "Directory where sharded output models from a previous analysis is stored."); - - options.add_options()( - "sequential", "Run the global fixpoint without parallelization."); - options.add_options()( - "skip-source-indexing", "Skip indexing java source files."); - options.add_options()("skip-analysis", "Skip taint analysis."); - options.add_options()( - "remove-unreachable-code", - "Prune unreachable code based on entry points specified in proguard configuration."); - options.add_options()( - "disable-parameter-type-overrides", - "Disable analyzing methods with specific parameter type information."); - options.add_options()( - "disable-global-type-analysis", - "Disable running Redex's global type analysis to infer types."); - options.add_options()( - "verify-expected-output", - "Verify any @Expected* annotations. Only valid for custom APKs containing said annotations. Results are written to `verification.json`."); - options.add_options()( - "maximum-method-analysis-time", - program_options::value(), - "Specify number of seconds as a bound. If the analysis of a method takes longer than this then make the method obscure (default taint-in-taint-out)."); - - options.add_options()( - "maximum-source-sink-distance", - program_options::value(), - "Limits the distance of sources and sinks from a trace entry point."); - options.add_options()( - "emit-all-via-cast-features", - "Compute and emit all via-cast features. There can be many such features which slows down the analysis so it is disabled by default. Use this to enable it."); - options.add_options()( - "allow-via-cast-feature", - program_options::value>(), - "Compute only these via-cast features. Specified as the full type name, e.g. Ljava/lang/Object;. Multiple inputs allowed. Use --emit-all-via-cast-features to allow everything."); - - options.add_options()( - "log-method", - program_options::value>()->multitoken(), - "Enable logging for the given methods."); - options.add_options()( - "log-method-types", - program_options::value>()->multitoken(), - "Enable logging of types for the given methods."); - options.add_options()( - "dump-class-hierarchies", - "Dump the class hierarchies in `class_hierarchies.json`."); - options.add_options()( - "dump-class-intervals", - "Dump the class intervals in `class_intervals.json`. For test/debug only."); - options.add_options()( - "dump-overrides", "Dump the override graph in `overrides.json`."); - options.add_options()( - "dump-call-graph", "Dump the call graph in `call_graph.json`."); - options.add_options()( - "dump-dependencies", "Dump the dependency graph in `dependencies.json`."); - options.add_options()( - "dump-methods", "Dump the list of method signatures in `methods.json`."); - options.add_options()( - "dump-coverage-info", - "Dump the file coverage info into `file_coverage.txt` and rule coverage info into `rule_coverage.json`."); - - options.add_options()( - "job-id", - program_options::value(), - "Identifier for the current analysis run."); - options.add_options()( - "metarun-id", - program_options::value(), - "Identifier for a group of analysis runs."); - options.add_options()( - "enable-cross-component-analysis", - "Compute taint flows across Android components."); - options.add_options()( - "enable-class-intervals", - "Compute and apply class intervals for improved precision."); - options.add_options()( - "always-export-origins", - "Export origin information for all frames instead of only leaves. Used for debugging."); - options.add_options()( - "propagate-across-arguments", - "Enable taint propagation across object type arguments. By default, taint propagation is only tracked for return values and the `this` argument. This enables taint propagation across method invocations for all other object type arguments as well."); - options.add_options()( - "heuristics", - program_options::value(), - "Path to JSON configuration file which specifies heuristics parameters to use during the analysis. See the documentation for available heuristics parameters."); + return std::make_unique(json); } const std::vector& Options::models_paths() const { diff --git a/source/Options.h b/source/Options.h index 4495eaea..92fbdd65 100644 --- a/source/Options.h +++ b/source/Options.h @@ -46,12 +46,10 @@ class Options final { ExportOriginsMode export_origins_mode = ExportOriginsMode::Always, bool propagate_across_arguments = false); - explicit Options(const boost::program_options::variables_map& variables); + explicit Options(const Json::Value &json); DELETE_COPY_CONSTRUCTORS_AND_ASSIGNMENTS(Options) - static void add_options(boost::program_options::options_description& options); - static std::unique_ptr options_from_json_file(const std::string& options_json_path); const std::vector& models_paths() const; From 9942cd9a097d2fe08573c4c42631ef65e8ab0901 Mon Sep 17 00:00:00 2001 From: Zeyad Tarek Date: Tue, 23 Jul 2024 20:31:35 +0300 Subject: [PATCH 23/32] Remove old unused methods --- source/MarianaTrench.cpp | 4 ---- source/MarianaTrench.h | 2 -- 2 files changed, 6 deletions(-) diff --git a/source/MarianaTrench.cpp b/source/MarianaTrench.cpp index 8f1e42a1..b9bf70d4 100644 --- a/source/MarianaTrench.cpp +++ b/source/MarianaTrench.cpp @@ -60,10 +60,6 @@ MarianaTrench::MarianaTrench() namespace program_options = boost::program_options; -void MarianaTrench::add_options( - program_options::options_description& options) const { - Options::add_options(options); -} Registry MarianaTrench::analyze(Context& context) { context.artificial_methods = std::make_unique( diff --git a/source/MarianaTrench.h b/source/MarianaTrench.h index 7a0f717a..1e1f8910 100644 --- a/source/MarianaTrench.h +++ b/source/MarianaTrench.h @@ -22,8 +22,6 @@ class MarianaTrench : public Tool { public: MarianaTrench(); - void add_options( - boost::program_options::options_description& options) const override; void run(const boost::program_options::variables_map& variables) override; private: From c00ac54a5e0fd38f6a7d468ea41a608c96689e03 Mon Sep 17 00:00:00 2001 From: Zeyad Tarek Date: Wed, 24 Jul 2024 00:23:39 +0300 Subject: [PATCH 24/32] Add more validation methods --- source/JsonValidation.cpp | 31 +++++++++++++++++++++++++++++++ source/JsonValidation.h | 2 ++ 2 files changed, 33 insertions(+) diff --git a/source/JsonValidation.cpp b/source/JsonValidation.cpp index b3db0b1f..4df08b1e 100644 --- a/source/JsonValidation.cpp +++ b/source/JsonValidation.cpp @@ -84,6 +84,21 @@ std::string JsonValidation::string( return string.asString(); } +std::optional JsonValidation::optional_string( + const Json::Value& value, + const std::string& field) { + validate_object( + value, fmt::format("non-null object with string field `{}`", field)); + const auto& string = value[field]; + if (string.isNull()) { + return std::nullopt; + } + if (!string.isString()) { + throw JsonValidationError(value, field, /* expected */ "string"); + } + return string.asString(); +} + int JsonValidation::integer(const Json::Value& value) { if (value.isNull() || !value.isInt()) { throw JsonValidationError( @@ -162,6 +177,22 @@ bool JsonValidation::boolean( return boolean.asBool(); } +bool JsonValidation::optional_boolean( + const Json::Value& value, + const std::string& field, + bool default_value) { + validate_object( + value, fmt::format("non-null object with boolean field `{}`", field)); + const auto& boolean = value[field]; + if (boolean.isNull()) { + return default_value; + } + if (!boolean.isBool()) { + throw JsonValidationError(value, field, /* expected */ "boolean"); + } + return boolean.asBool(); +} + const Json::Value& JsonValidation::null_or_array(const Json::Value& value) { if (!value.isNull() && !value.isArray()) { throw JsonValidationError( diff --git a/source/JsonValidation.h b/source/JsonValidation.h index 45c59ebb..d5823c83 100644 --- a/source/JsonValidation.h +++ b/source/JsonValidation.h @@ -40,6 +40,7 @@ class JsonValidation final { static std::string string(const Json::Value& value); static std::string string(const Json::Value& value, const std::string& field); + static std::optional optional_string(const Json::Value& value, const std::string& field); static int integer(const Json::Value& value); static int integer(const Json::Value& value, const std::string& field); @@ -54,6 +55,7 @@ class JsonValidation final { static bool boolean(const Json::Value& value); static bool boolean(const Json::Value& value, const std::string& field); + static bool optional_boolean(const Json::Value& value, const std::string& field,bool default_value); static const Json::Value& null_or_array(const Json::Value& value); static const Json::Value& null_or_array( From 3401b4818bcc14c2ea8f3d74c89271540b539860 Mon Sep 17 00:00:00 2001 From: Zeyad Tarek Date: Wed, 24 Jul 2024 00:23:59 +0300 Subject: [PATCH 25/32] Validate the json values using JsonValidation --- source/Options.cpp | 145 +++++++++++++++++++++------------------------ 1 file changed, 68 insertions(+), 77 deletions(-) diff --git a/source/Options.cpp b/source/Options.cpp index 401143f5..5961d9d0 100644 --- a/source/Options.cpp +++ b/source/Options.cpp @@ -149,164 +149,155 @@ Options::Options( Options::Options(const Json::Value &json){ system_jar_paths_ = parse_paths_list( - json["system-jar-paths"].asString(), + JsonValidation::string(json,"system-jar-paths"), std::nullopt, /* check exist */ false); apk_directory_ = - check_directory_exists(json["apk-directory"].asString()); + check_directory_exists(JsonValidation::string(json,"apk-directory")); dex_directory_ = - check_directory_exists(json["dex-directory"].asString()); + check_directory_exists(JsonValidation::string(json,"dex-directory")); - if (!json["models-paths"].isNull()) { + if (json.isMember("models-paths")) { models_paths_ = parse_paths_list( - json["models-paths"].asString(), /* extension */ ".json"); + JsonValidation::string(json,"models-paths"), /* extension */ ".json"); } - if (!json["field-models-paths"].isNull()) { + if (json.isMember("field-models-paths")) { field_models_paths_ = parse_paths_list( - json["field-models-paths"].asString(), + JsonValidation::string(json,"field-models-paths"), /* extension */ ".json"); } - if (!json["literal-models-paths"].isNull()) { + if (json.isMember("literal-models-paths")) { literal_models_paths_ = parse_paths_list( - json["literal-models-paths"].asString(), + JsonValidation::string(json,"literal-models-paths"), /* extension */ ".json"); } rules_paths_ = parse_paths_list( - json["rules-paths"].asString(), /* extension */ ".json"); + JsonValidation::string(json,"rules-paths"), /* extension */ ".json"); - if (!json["lifecycles-paths"].isNull()) { + if (json.isMember("lifecycles-paths")) { lifecycles_paths_ = parse_paths_list( - json["lifecycles-paths"].asString(), + JsonValidation::string(json,"lifecycles-paths"), /* extension */ ".json"); } - if (!json["shims-paths"].isNull()) { + if (json.isMember("shims-paths")) { shims_paths_ = parse_paths_list( - json["shims-paths"].asString(), + JsonValidation::string(json,"shims-paths"), /* extension */ ".json"); } - if (!json["graphql-metadata-paths"].isNull()) { + if (json.isMember("graphql-metadata-paths")) { graphql_metadata_paths_ = check_path_exists( - json["graphql-metadata-paths"].asString()); + JsonValidation::string(json,"graphql-metadata-paths")); } else { graphql_metadata_paths_ = ""; } - if (!json["proguard-configuration-paths"].isNull()) { + if (json.isMember("proguard-configuration-paths")) { proguard_configuration_paths_ = parse_paths_list( - json["proguard-configuration-paths"].asString(), + JsonValidation::string(json,"proguard-configuration-paths"), /* extension */ ".pro"); } - if (!json["generated-models-directory"].isNull()) { + if (json.isMember("generated-models-directory")) { generated_models_directory_ = check_path_exists( - json["generated-models-directory"].asString()); + JsonValidation::string(json,"generated-models-directory")); } generator_configuration_paths_ = parse_paths_list( - json["model-generator-configuration-paths"].asString(), + JsonValidation::string(json,"model-generator-configuration-paths"), /* extension */ ".json"); model_generators_configuration_ = parse_json_configuration_files(generator_configuration_paths_); - if (!json["model-generator-search-paths"].isNull()) { + if (json.isMember("model-generator-search-paths")) { model_generator_search_paths_ = parse_search_paths( - json["model-generator-search-paths"].asString()); + JsonValidation::string(json,"model-generator-search-paths")); } repository_root_directory_ = check_directory_exists( - json["repository-root-directory"].asString()); + JsonValidation::string(json,"repository-root-directory")); source_root_directory_ = check_directory_exists( - json["source-root-directory"].asString()); + JsonValidation::string(json,"source-root-directory")); - if (!json["source-exclude-directories"].isNull()) { + if (json.isMember("source-exclude-directories")) { source_exclude_directories_ = parse_paths_list( - json["source-exclude-directories"].asString(), + JsonValidation::string(json,"source-exclude-directories"), /* extension */ std::nullopt, /* check_exist */ false); } - if (!json["grepo-metadata-path"].isNull()) { + if (json.isMember("grepo-metadata-path")) { grepo_metadata_path_ = - check_path_exists(json["grepo-metadata-path"].asString()); + check_path_exists(JsonValidation::string(json,"grepo-metadata-path")); } - apk_path_ = check_path_exists(json["apk-path"].asString()); + apk_path_ = check_path_exists(JsonValidation::string(json,"apk-path")); output_directory_ = std::filesystem::path( - check_directory_exists(json["output-directory"].asString())); + check_directory_exists(JsonValidation::string(json,"output-directory"))); - if (!json["sharded-models-directory"].isNull()) { + if (json.isMember("sharded-models-directory")) { sharded_models_directory_ = std::filesystem::path(check_directory_exists( - json["sharded-models-directory"].asString())); + JsonValidation::string(json,"sharded-models-directory"))); } - sequential_ = json["sequential"].asBool(); - skip_source_indexing_ = json["skip-source-indexing"].asBool(); - skip_analysis_ = json["skip-analysis"].asBool(); - remove_unreachable_code_ = json["remove-unreachable-code"].asBool(); - disable_parameter_type_overrides_ = - json["disable-parameter-type-overrides"].asBool(); - disable_global_type_analysis_ = json["disable-global-type-analysis"].asBool(); - verify_expected_output_ = json["verify-expected-output"].asBool(); - maximum_method_analysis_time_ = - json["maximum-method-analysis-time"] ? - std::nullopt - : std::make_optional(json["maximum-method-analysis-time"].asInt()); - maximum_source_sink_distance_ = - json["maximum-source-sink-distance"].asInt(); - emit_all_via_cast_features_ = - json["emit-all-via-cast-features"].asBool(); - - if (!json["allow-via-cast-feature"].isNull()) { - for (const auto& value : json["allow-via-cast-feature"]) { - allow_via_cast_features_.push_back(value.asString()); + sequential_ = JsonValidation::optional_boolean(json,"sequential",false); + skip_source_indexing_ = JsonValidation::optional_boolean(json,"skip-source-indexing",false); + skip_analysis_ = JsonValidation::optional_boolean(json,"skip-analysis",false); + remove_unreachable_code_ = JsonValidation::optional_boolean(json,"remove-unreachable-code",false); + disable_parameter_type_overrides_ = JsonValidation::optional_boolean(json,"disable-parameter-type-overrides",false); + disable_global_type_analysis_ = JsonValidation::optional_boolean(json,"disable-global-type-analysis",false); + verify_expected_output_ = JsonValidation::optional_boolean(json,"verify-expected-output",false); + maximum_method_analysis_time_ = JsonValidation::optional_integer(json,"maximum-method-analysis-time"); + + maximum_source_sink_distance_ = JsonValidation::integer(json,"maximum-source-sink-distance"); + emit_all_via_cast_features_ = JsonValidation::optional_boolean(json,"emit-all-via-cast-features",false); + + if (json.isMember("allow-via-cast-feature")) { + for (const auto& value : JsonValidation::nonempty_array(json,"allow-via-cast-feature")) { + allow_via_cast_features_.push_back(JsonValidation::string(value)); } } - if (!json["log-method"].isNull()) { - for (const auto& value : json["log-method"]) { - log_methods_.push_back(value.asString()); + if (json.isMember("log-method")) { + for (const auto& value : JsonValidation::nonempty_array(json,"log-method")) { + log_methods_.push_back(JsonValidation::string(value)); } } - if (!json["log-method-types"].isNull()) { - for (const auto& value : json["log-method-types"]) { - log_method_types_.push_back(value.asString()); + if (json.isMember("log-method-types")) { + for (const auto& value :JsonValidation::nonempty_array(json,"log-method-types")) { + log_method_types_.push_back(JsonValidation::string(value)); } } - dump_class_hierarchies_ = json["dump-class-hierarchies"].asBool(); - dump_class_intervals_ = json["dump-class-intervals"].asBool(); - dump_overrides_ = json["dump-overrides"].asBool(); - dump_call_graph_ = json["dump-call-graph"].asBool(); - dump_dependencies_ = json["dump-dependencies"].asBool(); - dump_methods_ = json["dump-methods"].asBool(); - dump_coverage_info_ = json["dump-coverage-info"].asBool(); + dump_class_hierarchies_ = JsonValidation::optional_boolean(json,"dump-class-hierarchies",false); + dump_class_intervals_ = JsonValidation::optional_boolean(json,"dump-class-intervals",false); + dump_overrides_ = JsonValidation::optional_boolean(json,"dump-overrides",false); + dump_call_graph_ = JsonValidation::optional_boolean(json,"dump-call-graph",false); + dump_dependencies_ = JsonValidation::optional_boolean(json,"dump-dependencies",false); + dump_methods_ = JsonValidation::optional_boolean(json,"dump-methods",false); + dump_coverage_info_ = JsonValidation::optional_boolean(json,"dump-coverage-info",false); - if (!json["job-id"].isNull()) { - job_id_ = std::make_optional(json["job-id"].asString()); - } + job_id_ = JsonValidation::optional_string(json,"job-id"); + metarun_id_ = JsonValidation::optional_string(json,"metarun-id"); - if (!json["metarun-id"].isNull()) { - metarun_id_ = std::make_optional(json["metarun-id"].asString()); - } + enable_cross_component_analysis_ = JsonValidation::optional_boolean(json,"enable-cross-component-analysis",false); - enable_cross_component_analysis_ = json["enable-cross-component-analysis"].asBool(); - export_origins_mode_ = json["always-export-origins"] ? + export_origins_mode_ = JsonValidation::optional_boolean(json,"always-export-origins",false) ? ExportOriginsMode::Always : ExportOriginsMode::OnlyOnOrigins; - propagate_across_arguments_ = json["propagate-across-arguments"].asBool(); + propagate_across_arguments_ = JsonValidation::optional_boolean(json,"propagate-across-arguments",false); - if (!json["heuristics"].isNull()) { + if (json.isMember("heuristics")) { heuristics_path_ = std::filesystem::path( - check_path_exists(json["heuristics"].asString())); + check_path_exists(JsonValidation::string(json,"heuristics"))); } } From 0d87ac132662349143e7e29f340b4f857701e330 Mon Sep 17 00:00:00 2001 From: Zeyad Tarek Date: Wed, 24 Jul 2024 02:11:25 +0300 Subject: [PATCH 26/32] pass the jar paths as array of strings --- shim/shim.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/shim/shim.py b/shim/shim.py index 739a0054..923e4a3e 100644 --- a/shim/shim.py +++ b/shim/shim.py @@ -622,7 +622,7 @@ def _get_command_options_json( arguments: argparse.Namespace, apk_directory: str, dex_directory: str ) -> dict: options_json = {} - options_json["system-jar-paths"] = arguments.system_jar_configuration_path + options_json["system-jar-paths"] = arguments.system_jar_configuration_path.split(";") options_json["apk-directory"] = apk_directory options_json["dex-directory"] = dex_directory options_json["rules-paths"] = arguments.rules_paths From 8ab7b89e0adc0dc48dbd65ca3f51448f0f6fd27c Mon Sep 17 00:00:00 2001 From: Zeyad Tarek Date: Wed, 24 Jul 2024 02:11:42 +0300 Subject: [PATCH 27/32] string_list json validation --- source/JsonValidation.cpp | 18 ++++++++++++++++++ source/JsonValidation.h | 1 + 2 files changed, 19 insertions(+) diff --git a/source/JsonValidation.cpp b/source/JsonValidation.cpp index 4df08b1e..6c495607 100644 --- a/source/JsonValidation.cpp +++ b/source/JsonValidation.cpp @@ -99,6 +99,24 @@ std::optional JsonValidation::optional_string( return string.asString(); } +std::vector JsonValidation::string_list(const Json::Value& value, const std::string& field){ + validate_object( + value, fmt::format("non-null object with array field `{}`", field)); + const auto& string_list = value[field]; + if (string_list.isNull() || !string_list.isArray() || string_list.empty()) { + throw JsonValidationError(value, field, /* expected */ "array"); + } + + std::vector result; + for (const auto& element : string_list) { + if (!element.isString()) { + throw JsonValidationError(value, field, /* expected */ "array of strings"); + } + result.push_back(element.asString()); + } + return result; +} + int JsonValidation::integer(const Json::Value& value) { if (value.isNull() || !value.isInt()) { throw JsonValidationError( diff --git a/source/JsonValidation.h b/source/JsonValidation.h index d5823c83..f18e0570 100644 --- a/source/JsonValidation.h +++ b/source/JsonValidation.h @@ -41,6 +41,7 @@ class JsonValidation final { static std::string string(const Json::Value& value); static std::string string(const Json::Value& value, const std::string& field); static std::optional optional_string(const Json::Value& value, const std::string& field); + static std::vector string_list(const Json::Value& value, const std::string& field); static int integer(const Json::Value& value); static int integer(const Json::Value& value, const std::string& field); From 8fbcfbd1338ac508e4bef27c90d37ef8523fee09 Mon Sep 17 00:00:00 2001 From: Zeyad Tarek Date: Wed, 24 Jul 2024 02:11:56 +0300 Subject: [PATCH 28/32] Parse jar paths as string_list --- source/Options.cpp | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/source/Options.cpp b/source/Options.cpp index 5961d9d0..0a951cc8 100644 --- a/source/Options.cpp +++ b/source/Options.cpp @@ -148,10 +148,7 @@ Options::Options( propagate_across_arguments_(propagate_across_arguments) {} Options::Options(const Json::Value &json){ - system_jar_paths_ = parse_paths_list( - JsonValidation::string(json,"system-jar-paths"), - std::nullopt, - /* check exist */ false); + system_jar_paths_ = JsonValidation::string_list(json,"system-jar-paths"); apk_directory_ = check_directory_exists(JsonValidation::string(json,"apk-directory")); From 5fb9c8ad69c0407644b00718d5290ba6079b55d3 Mon Sep 17 00:00:00 2001 From: Zeyad Tarek Date: Wed, 24 Jul 2024 14:34:37 +0300 Subject: [PATCH 29/32] remove string_list --- source/JsonValidation.cpp | 17 ----------------- source/JsonValidation.h | 1 - 2 files changed, 18 deletions(-) diff --git a/source/JsonValidation.cpp b/source/JsonValidation.cpp index 6c495607..6a1896f5 100644 --- a/source/JsonValidation.cpp +++ b/source/JsonValidation.cpp @@ -99,23 +99,6 @@ std::optional JsonValidation::optional_string( return string.asString(); } -std::vector JsonValidation::string_list(const Json::Value& value, const std::string& field){ - validate_object( - value, fmt::format("non-null object with array field `{}`", field)); - const auto& string_list = value[field]; - if (string_list.isNull() || !string_list.isArray() || string_list.empty()) { - throw JsonValidationError(value, field, /* expected */ "array"); - } - - std::vector result; - for (const auto& element : string_list) { - if (!element.isString()) { - throw JsonValidationError(value, field, /* expected */ "array of strings"); - } - result.push_back(element.asString()); - } - return result; -} int JsonValidation::integer(const Json::Value& value) { if (value.isNull() || !value.isInt()) { diff --git a/source/JsonValidation.h b/source/JsonValidation.h index f18e0570..d5823c83 100644 --- a/source/JsonValidation.h +++ b/source/JsonValidation.h @@ -41,7 +41,6 @@ class JsonValidation final { static std::string string(const Json::Value& value); static std::string string(const Json::Value& value, const std::string& field); static std::optional optional_string(const Json::Value& value, const std::string& field); - static std::vector string_list(const Json::Value& value, const std::string& field); static int integer(const Json::Value& value); static int integer(const Json::Value& value, const std::string& field); From 14f6b0343ecdceed990e4a54836bf50f403fe390 Mon Sep 17 00:00:00 2001 From: Zeyad Tarek Date: Wed, 24 Jul 2024 14:34:51 +0300 Subject: [PATCH 30/32] retrurn system jar files as string --- shim/shim.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/shim/shim.py b/shim/shim.py index 923e4a3e..739a0054 100644 --- a/shim/shim.py +++ b/shim/shim.py @@ -622,7 +622,7 @@ def _get_command_options_json( arguments: argparse.Namespace, apk_directory: str, dex_directory: str ) -> dict: options_json = {} - options_json["system-jar-paths"] = arguments.system_jar_configuration_path.split(";") + options_json["system-jar-paths"] = arguments.system_jar_configuration_path options_json["apk-directory"] = apk_directory options_json["dex-directory"] = dex_directory options_json["rules-paths"] = arguments.rules_paths From 269d189118384c33d64ae74f65967317da747b96 Mon Sep 17 00:00:00 2001 From: Zeyad Tarek Date: Wed, 24 Jul 2024 15:04:13 +0300 Subject: [PATCH 31/32] rename from_json_file method --- source/Options.cpp | 9 +++++---- source/Options.h | 2 +- 2 files changed, 6 insertions(+), 5 deletions(-) diff --git a/source/Options.cpp b/source/Options.cpp index 0a951cc8..6eb93d59 100644 --- a/source/Options.cpp +++ b/source/Options.cpp @@ -148,7 +148,10 @@ Options::Options( propagate_across_arguments_(propagate_across_arguments) {} Options::Options(const Json::Value &json){ - system_jar_paths_ = JsonValidation::string_list(json,"system-jar-paths"); + system_jar_paths_ = parse_paths_list( + JsonValidation::string(json,"system-jar-paths"), + std::nullopt, + /* check exist */ false); apk_directory_ = check_directory_exists(JsonValidation::string(json,"apk-directory")); @@ -299,10 +302,8 @@ Options::Options(const Json::Value &json){ } -std::unique_ptr Options::options_from_json_file(const std::string& options_json_path){ - // Use JsonReader to parse the JSON file +std::unique_ptr Options::from_json_file(const std::filesystem::path& options_json_path){ Json::Value json = marianatrench::JsonReader::parse_json_file(options_json_path); - // Validate the JSON object marianatrench::JsonValidation::validate_object(json); return std::make_unique(json); } diff --git a/source/Options.h b/source/Options.h index 92fbdd65..4166d3e8 100644 --- a/source/Options.h +++ b/source/Options.h @@ -50,7 +50,7 @@ class Options final { DELETE_COPY_CONSTRUCTORS_AND_ASSIGNMENTS(Options) - static std::unique_ptr options_from_json_file(const std::string& options_json_path); + static std::unique_ptr from_json_file(const std::filesystem::path& options_json_path); const std::vector& models_paths() const; const std::vector& field_models_paths() const; From dfe8602c9129178d56a580cba6da1438fe21593d Mon Sep 17 00:00:00 2001 From: Zeyad Tarek Date: Wed, 24 Jul 2024 15:04:23 +0300 Subject: [PATCH 32/32] accept std::filesystem::path --- source/MarianaTrench.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/source/MarianaTrench.cpp b/source/MarianaTrench.cpp index b9bf70d4..6cfc7011 100644 --- a/source/MarianaTrench.cpp +++ b/source/MarianaTrench.cpp @@ -419,9 +419,9 @@ std::vector filter_existing_jars( void MarianaTrench::run(const program_options::variables_map& variables) { Context context; - std::string json_file_path = variables["config"].as(); + std::filesystem::path json_file_path = std::filesystem::path(variables["config"].as()); - context.options = Options::options_from_json_file(json_file_path); + context.options = Options::from_json_file(json_file_path); const auto& options = *context.options; if (auto heuristics_path = options.heuristics_path()) {