From 6288fdb268ba65a71c73ad45a183c4926f44e922 Mon Sep 17 00:00:00 2001 From: Niranjan Artal Date: Tue, 1 Oct 2024 14:06:04 -0700 Subject: [PATCH] Generate log files for Python Profiling cli Signed-off-by: Niranjan Artal --- user_tools/src/spark_rapids_pytools/common/utilities.py | 7 ++----- user_tools/src/spark_rapids_pytools/rapids/rapids_job.py | 3 ++- user_tools/src/spark_rapids_pytools/rapids/tool_ctxt.py | 3 +++ .../src/spark_rapids_pytools/resources/profiling-conf.yaml | 2 ++ .../spark_rapids_pytools/resources/qualification-conf.yaml | 1 + user_tools/src/spark_rapids_tools/cmdli/argprocessor.py | 3 ++- 6 files changed, 12 insertions(+), 7 deletions(-) diff --git a/user_tools/src/spark_rapids_pytools/common/utilities.py b/user_tools/src/spark_rapids_pytools/common/utilities.py index b867a89a0..d40ec13ab 100644 --- a/user_tools/src/spark_rapids_pytools/common/utilities.py +++ b/user_tools/src/spark_rapids_pytools/common/utilities.py @@ -231,7 +231,7 @@ def get_and_setup_logger(cls, type_label: str, debug_mode: bool = False): return logger @classmethod - def modify_log4j_properties(cls, prop_file_path: str, new_log_dir: str) -> str: + def modify_log4j_properties(cls, prop_file_path: str, new_log_file: str) -> str: """ Modifies the log file path in a log4j properties file to redirect logging output to a new location. @@ -242,15 +242,12 @@ def modify_log4j_properties(cls, prop_file_path: str, new_log_dir: str) -> str: :param prop_file_path: The file path to the original log4j.properties file. This file should contain configurations for the log4j logging utility. - :param new_log_dir: The base output directory where the new log file will be created. - The actual log file named 'rapids_4_spark_qualification_stderr.log' - will be placed in this directory. + :param new_log_file: The file path where the logging output is saved. :return str: The file path to the temporary modified log4j.properties file. This temporary file retains the modifications and can be accessed until explicitly deleted after the java process is completed. """ - new_log_file = f'{new_log_dir}/rapids_4_spark_qualification_stderr.log' with open(prop_file_path, 'r', encoding='utf-8') as file: lines = file.readlines() diff --git a/user_tools/src/spark_rapids_pytools/rapids/rapids_job.py b/user_tools/src/spark_rapids_pytools/rapids/rapids_job.py index 49cdb3209..f095daa3f 100644 --- a/user_tools/src/spark_rapids_pytools/rapids/rapids_job.py +++ b/user_tools/src/spark_rapids_pytools/rapids/rapids_job.py @@ -192,8 +192,9 @@ def _build_jvm_args(self): if jvm_k.startswith('D'): if jvm_k == 'Dlog4j.configuration': rapids_output_folder = self.exec_ctxt.get_rapids_output_folder() + log4j_file_name = self.exec_ctxt.get_log4j_properties_file() jvm_arg = ToolLogging.modify_log4j_properties( - jvm_arg, f'{rapids_output_folder}') + jvm_arg, f'{rapids_output_folder}/{log4j_file_name}') self.exec_ctxt.set_local('tmp_log4j', jvm_arg) val = f'-{jvm_k}={jvm_arg}' else: diff --git a/user_tools/src/spark_rapids_pytools/rapids/tool_ctxt.py b/user_tools/src/spark_rapids_pytools/rapids/tool_ctxt.py index 320aa69c9..e0756784b 100644 --- a/user_tools/src/spark_rapids_pytools/rapids/tool_ctxt.py +++ b/user_tools/src/spark_rapids_pytools/rapids/tool_ctxt.py @@ -215,6 +215,9 @@ def get_rapids_output_folder(self) -> str: return root_dir return FSUtil.build_path(root_dir, rapids_subfolder) + def get_log4j_properties_file(self) -> str: + return self.get_value_silent('toolOutput', 'textFormat', 'log4jFileName') + def get_platform_name(self) -> str: """ This used to get the lower case of the platform of the runtime. diff --git a/user_tools/src/spark_rapids_pytools/resources/profiling-conf.yaml b/user_tools/src/spark_rapids_pytools/resources/profiling-conf.yaml index 877d94b42..847e33914 100644 --- a/user_tools/src/spark_rapids_pytools/resources/profiling-conf.yaml +++ b/user_tools/src/spark_rapids_pytools/resources/profiling-conf.yaml @@ -1,5 +1,7 @@ toolOutput: subFolder: rapids_4_spark_profile + textFormat: + log4jFileName: rapids_4_spark_profile_stderr.log recommendations: fileName: profile.log headers: diff --git a/user_tools/src/spark_rapids_pytools/resources/qualification-conf.yaml b/user_tools/src/spark_rapids_pytools/resources/qualification-conf.yaml index 54496464e..694696c7d 100644 --- a/user_tools/src/spark_rapids_pytools/resources/qualification-conf.yaml +++ b/user_tools/src/spark_rapids_pytools/resources/qualification-conf.yaml @@ -4,6 +4,7 @@ toolOutput: textFormat: summaryLog: fileName: rapids_4_spark_qualification_output.log + log4jFileName: rapids_4_spark_qualification_stderr.log csv: unsupportedOperatorsReport: fileName: rapids_4_spark_qualification_output_unsupportedOperators.csv diff --git a/user_tools/src/spark_rapids_tools/cmdli/argprocessor.py b/user_tools/src/spark_rapids_tools/cmdli/argprocessor.py index bb7da1def..e578d23b6 100644 --- a/user_tools/src/spark_rapids_tools/cmdli/argprocessor.py +++ b/user_tools/src/spark_rapids_tools/cmdli/argprocessor.py @@ -617,7 +617,8 @@ def build_tools_args(self) -> dict: 'remoteFolder': None, 'platformArgs': { 'jvmMaxHeapSize': self.p_args['toolArgs']['jvmMaxHeapSize'], - 'jvmGC': self.p_args['toolArgs']['jvmGC'] + 'jvmGC': self.p_args['toolArgs']['jvmGC'], + 'Dlog4j.configuration': self.p_args['toolArgs']['log4jPath'] }, 'jobResources': self.p_args['toolArgs']['jobResources'] },