Remove old csv_converter module, add new kernel_name_shortener module

Signed-off-by: colramos-amd <colramos@amd.com>
JoseSantosAMD · Jan 30, 2024 · ad169fa · ad169fa
1 parent e1867d3
commit ad169fa
Show file tree

Hide file tree

Showing 4 changed files with 18 additions and 141 deletions.
diff --git a/src/omniperf_analyze/analysis_cli.py b/src/omniperf_analyze/analysis_cli.py
@@ -25,7 +25,7 @@
 from omniperf_analyze.analysis_base import OmniAnalyze_Base
 from utils.utils import demarcate, error
 from utils import file_io, parser, tty
-from utils.csv_processor import kernel_name_shortener
+from utils.kernel_name_shortener import kernel_name_shortener
 
 class cli_analysis(OmniAnalyze_Base):
 

diff --git a/src/omniperf_profile/profiler_rocprof_v1.py b/src/omniperf_profile/profiler_rocprof_v1.py
@@ -27,7 +27,7 @@
 
 from omniperf_profile.profiler_base import OmniProfiler_Base
 from utils.utils import demarcate, replace_timestamps
-from utils.csv_processor import kernel_name_shortener
+from utils.kernel_name_shortener import kernel_name_shortener
 
 
 class rocprof_v1_profiler(OmniProfiler_Base):

diff --git a/src/omniperf_profile/profiler_rocprof_v2.py b/src/omniperf_profile/profiler_rocprof_v2.py
@@ -26,7 +26,7 @@
 import logging
 from omniperf_profile.profiler_base import OmniProfiler_Base
 from utils.utils import demarcate
-from utils.csv_processor import kernel_name_shortener
+from utils.kernel_name_shortener import kernel_name_shortener
 
 class rocprof_v2_profiler(OmniProfiler_Base):
     def __init__(self,profiling_args,profiler_mode,soc):

diff --git a/src/utils/csv_processor.py → src/utils/kernel_name_shortener.py b/src/utils/csv_processor.py → src/utils/kernel_name_shortener.py
@@ -22,41 +22,33 @@
 # SOFTWARE.
 ##############################################################################el
 
-import argparse
-import collections
 import os
-import subprocess
 import sys
+import logging
+import glob 
 import re
+import subprocess
 import pandas as pd
-import getpass
-from pymongo import MongoClient
-from tqdm import tqdm
-import glob
-import re
-import logging
-
-cache = dict()
 
-supported_arch = {"gfx906": "mi50", "gfx908": "mi100", "gfx90a": "mi200"}
-MAX_SERVER_SEL_DELAY = 5000  # 5 sec connection timeout
+from utils.utils import error
 
+cache = dict()
 
 # Note: shortener is now dependent on a rocprof install with llvm
 def kernel_name_shortener(workload_dir, level):
     def shorten_file(df, level):
         global cache
 
-        columnName = ""
-        if "KernelName" in df:
-            columnName = "KernelName"
+        column_name = ""
+        if "Kernel_Name" in df:
+            column_name = "Kernel_Name"
         if "Name" in df:
-            columnName = "Name"
+            column_name = "Name"
 
-        if columnName == "KernelName" or columnName == "Name":
+        if column_name == "Kernel_Name" or column_name == "Name":
             # loop through all indices
             for index in df.index:
-                original_name = df.loc[index, columnName]
+                original_name = df.loc[index, column_name]
                 if original_name in cache:
                     continue
 
@@ -122,20 +114,15 @@ def shorten_file(df, level):
                 if new_name == None or new_name == "":
                     cache[original_name] = demangled_name
 
-            df[columnName] = df[columnName].map(cache)
+            df[column_name] = df[column_name].map(cache)
 
         return df
 
     # Only shorten if valid shortening level
     if level < 5:
         cpp_filt = os.path.join("/usr", "bin", "c++filt")
         if not os.path.isfile(cpp_filt):
-            logging.error(
-                "Error: Could not resolve c++filt in expected directory: {}".format(
-                    cpp_filt
-                )
-            )
-            sys.exit(1)
+            error("Could not resolve c++filt in expected directory: %s" % cpp_filt)
 
         for fpath in glob.glob(workload_dir + "/*.csv"):
             try:
@@ -147,116 +134,6 @@ def shorten_file(df, level):
                 modified_df = shorten_file(orig_df, level)
                 modified_df.to_csv(fpath, index=False)
             except pd.errors.EmptyDataError:
-                logging.debug("[profiling] Skipping shortening on empty csv " + str(fpath))
-
-        logging.info("[profiling] KernelName shortening complete!")
-
-
-# Verify target directory and setup connection
-def parse(args, profileAndExport):
-    host = args.host
-    port = str(args.port)
-    username = args.username
-
-    if profileAndExport:
-        workload = args.workload + "/" + args.target + "/"
-    else:
-        workload = args.workload
-
-    # Verify directory path is valid
-    print("Pulling data from ", workload)
-    if os.path.isdir(workload):
-        print("The directory exists")
-    else:
-        raise argparse.ArgumentTypeError("Directory does not exist")
-
-    sysInfoPath = workload + "/sysinfo.csv"
-    if os.path.isfile(sysInfoPath):
-        print("Found sysinfo file")
-        sysInfo = pd.read_csv(sysInfoPath)
-        # Extract SoC
-        arch = sysInfo["gpu_soc"][0]
-        soc = supported_arch[arch]
-        # Extract name
-        name = sysInfo["workload_name"][0]
-    else:
-        print("Unable to parse SoC or workload name from sysinfo.csv")
-        sys.exit(1)
-
-    db = "omniperf_" + str(args.team) + "_" + str(name) + "_" + soc
-
-    if args.password == "":
-        try:
-            password = getpass.getpass()
-        except Exception as error:
-            print("PASSWORD ERROR", error)
-        else:
-            print("Password recieved")
-    else:
-        password = args.password
-
-    if db.find(".") != -1 or db.find("-") != -1:
-        raise ValueError("'-' and '.' are not permited in workload name", db)
-
-    connectionInfo = {
-        "username": username,
-        "password": password,
-        "host": host,
-        "port": port,
-        "workload": workload,
-        "db": db,
-    }
-
-    return connectionInfo
-
-
-def convert_folder(connectionInfo):
-    # Test connection
-    connection_str = (
-        "mongodb://"
-        + connectionInfo["username"]
-        + ":"
-        + connectionInfo["password"]
-        + "@"
-        + connectionInfo["host"]
-        + ":"
-        + connectionInfo["port"]
-        + "/?authSource=admin"
-    )
-    client = MongoClient(connection_str, serverSelectionTimeoutMS=MAX_SERVER_SEL_DELAY)
-    try:
-        client.server_info()
-    except:
-        print("ERROR: Unable to connect to the server")
-        sys.exit(1)
-
-    i = 0
-    file = "blank"
-    for file in tqdm(os.listdir(connectionInfo["workload"])):
-        if file.endswith(".csv"):
-            print(connectionInfo["workload"] + "/" + file)
-            try:
-                fileName = file[0 : file.find(".")]
-                cmd = (
-                    "mongoimport --quiet --uri mongodb://{}:{}@{}:{}/{}?authSource=admin --file {} -c {} --drop --type csv --headerline"
-                ).format(
-                    connectionInfo["username"],
-                    connectionInfo["password"],
-                    connectionInfo["host"],
-                    connectionInfo["port"],
-                    connectionInfo["db"],
-                    connectionInfo["workload"] + "/" + file,
-                    fileName,
-                )
-                os.system(cmd)
-                i += 1
-            except pd.errors.EmptyDataError:
-                print("Skipping empty csv " + file)
+                logging.debug("[profiling] Skipping shortening on empty csv: %s" % str(fpath))
 
-    mydb = client["workload_names"]
-    mycol = mydb["names"]
-    value = {"name": connectionInfo["db"]}
-    newValue = {"name": connectionInfo["db"]}
-    mycol.replace_one(value, newValue, upsert=True)
-    print("{} collections added.".format(i))
-    print("Workload name uploaded")
+        logging.info("[profiling] Kernel_Name shortening complete.")