Skip to content

Commit

Permalink
Add support for app ids in spark
Browse files Browse the repository at this point in the history
  • Loading branch information
ambud committed Feb 18, 2023
1 parent f28f20d commit 1f8e4c6
Show file tree
Hide file tree
Showing 3 changed files with 9 additions and 3 deletions.
3 changes: 1 addition & 2 deletions gprofiler/gprofiler_types.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,6 @@ class ProfileData:


class ProfilingErrorStack(StackToSampleCount):

PROFILING_ERROR_STACK_PATTERN = re.compile(r".*;\[Profiling .+: .+\]")

def __init__(self, what: str, reason: str, comm: str):
Expand All @@ -54,7 +53,7 @@ def attach_error_to_stacks(
) -> StackToSampleCount:
_, error_frame = next(iter(error_stack)).split(";", maxsplit=1)
dest_stacks: StackToSampleCount = StackToSampleCount()
for (frame, count) in source_stacks.items():
for frame, count in source_stacks.items():
comm, stack = frame.split(";", maxsplit=1)
annotated = f"{comm};{error_frame};{stack}"
dest_stacks[annotated] = count
Expand Down
8 changes: 8 additions & 0 deletions gprofiler/metadata/application_identifiers_java.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,7 @@ class _JavaSparkApplicationIdentifier(_ApplicationIdentifier):
_SPARK_PROPS_FILE = os.path.join("__spark_conf__", "__spark_conf__.properties")
_APP_NAME_NOT_FOUND = "app name not found"
_APP_NAME_KEY = "spark.app.name"
_APP_ID_KEY = "--app-id"

@staticmethod
def _is_java_spark_executor(process: Process) -> bool:
Expand All @@ -73,4 +74,11 @@ def get_app_id(self, process: Process) -> Optional[str]:
)
if self._APP_NAME_KEY in props:
return f"spark: {props[self._APP_NAME_KEY]}"
args = process.cmdline()
try:
for idx, x in enumerate(args):
if x == self._APP_ID_KEY:
return f"spark: {args[idx+1]}"
except Exception:
pass
return self._APP_NAME_NOT_FOUND
1 change: 0 additions & 1 deletion gprofiler/spark/spark_collector.py
Original file line number Diff line number Diff line change
Expand Up @@ -485,7 +485,6 @@ def _yarn_get_spark_apps(self, *args: Any, **kwargs: Any) -> Dict[str, Tuple[str

if metrics_json.get("apps"):
if metrics_json["apps"].get("app") is not None:

for app_json in metrics_json["apps"]["app"]:
app_id = app_json.get("id")
tracking_url = app_json.get("trackingUrl")
Expand Down

0 comments on commit 1f8e4c6

Please sign in to comment.