Skip to content

Commit

Permalink
added helper script for existing vulnerable jars
Browse files Browse the repository at this point in the history
  • Loading branch information
Cornul11 committed Mar 2, 2024
1 parent 0f1731f commit 092031c
Showing 1 changed file with 84 additions and 0 deletions.
84 changes: 84 additions & 0 deletions util/get_existing_on_goteborg.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,84 @@
import argparse

def parse_arguments():
parser = argparse.ArgumentParser(description='Process the paths to find remaining vulnerable artifacts.')
parser.add_argument('--vulnerable', required=True, help='Path to the vulnerable_artifacts.txt file')
parser.add_argument('--downloaded', required=True, help='Path to the downloaded_artifacts.txt file')
parser.add_argument('--jar_paths', required=True, help='Path to the jar_paths.txt file')
parser.add_argument('--output', required=True, help='Path to the output file for remaining vulnerable artifacts')
return parser.parse_args()


def read_downloaded_artifacts(filename, base_path):
with open(filename, "r") as file:
return {line.strip()[len(base_path) :] for line in file}


def read_vulnerable_artifacts(filename):
with open(filename, "r") as file:
return {line.strip() for line in file}


def gav_to_path(gav):
group_id, artifact_id, version = gav.split(":")
return f"{group_id.replace('.', '/')}/{artifact_id}/{version}"


def read_jar_paths(filename, base_path):
with open(filename, "r") as file:
jar_paths = {}
for line in file:
path = line.strip()
gav = path_to_gav(path, base_path)
jar_paths[gav] = path[len(base_path) :]
return jar_paths


def path_to_gav(path, base_path):
path = path.replace(base_path, "").strip("/")
parts = path.split("/")
version = parts[-2]
artifact_id = parts[-3]
group_id = ".".join(parts[:-3])
return f"{group_id}:{artifact_id}:{version}"


def main():
args = parse_arguments()

base_path_downloaded = "/home/dan/jar-vulnerability-detection/util/download_path/"
base_path_jars = "/data/.m2/repository/"
vulnerable_artifacts = read_vulnerable_artifacts(args.vulnerable)
downloaded_artifacts = read_downloaded_artifacts(args.downloaded, base_path_downloaded
)
jar_paths = read_jar_paths(args.jar_paths, base_path_jars)

vulnerable_paths = {gav_to_path(gav) for gav in vulnerable_artifacts}

remaining_artifacts = vulnerable_paths - set(downloaded_artifacts)

remaining_in_jar_paths = {
jar_paths[gav] for gav in jar_paths if gav_to_path(gav) in remaining_artifacts
}

with open(args.output, "w") as file:
for path in sorted(remaining_in_jar_paths):
file.write(base_path_jars + path + "\n")

print(
f"Found {len(remaining_in_jar_paths)} remaining vulnerable artifacts in JAR paths"
)

not_found_anywhere_count = (
len(vulnerable_paths)
- len({gav_to_path(gav) for gav in jar_paths if gav in vulnerable_artifacts})
- len(remaining_in_jar_paths)
)

print(
f"{not_found_anywhere_count} vulnerable artifacts have not been found in downloaded artifacts or jar paths."
)


if __name__ == "__main__":
main()

0 comments on commit 092031c

Please sign in to comment.