Skip to content

Commit

Permalink
feat(#130): failed to parse
Browse files Browse the repository at this point in the history
  • Loading branch information
h1alexbel committed Oct 8, 2024
1 parent 44866db commit 35793aa
Showing 1 changed file with 46 additions and 42 deletions.
88 changes: 46 additions & 42 deletions sr-data/src/sr_data/steps/maven.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
"""
Collect maven information for each repo.
"""
import xml.dom.minidom
# The MIT License (MIT)
#
# Copyright (c) 2024 Aliaksei Bialiauski
Expand All @@ -24,6 +23,8 @@
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.
import xml.etree.ElementTree as ET
from xml.etree.ElementTree import ParseError
import xml.dom.minidom

import pandas as pd
import requests
Expand Down Expand Up @@ -90,48 +91,51 @@ def merge(build, repo):
for project in build:
path = project["path"]
logger.debug(f"Checking {repo}: {path}")
root = ET.fromstring(project["content"])
pretty = "\n".join(
[
line for line
in xml.dom.minidom.parseString(ET.tostring(root)).toprettyxml(indent=" ").splitlines()
if line.strip()
]
)
logger.debug(f"{path}:\n{pretty}")
if len(
root.findall(
".//pom:dependency[pom:groupId='@project.groupId@']",
namespaces
)
) > 0:
logger.info(f"Skipping {path}, since it contains @project dependency")
else:
profile = {}
packaging = root.find(".//pom:packaging", namespaces)
if packaging is not None:
packgs.append(packaging.text)
try:
root = ET.fromstring(project["content"])
pretty = "\n".join(
[
line for line
in xml.dom.minidom.parseString(ET.tostring(root)).toprettyxml(indent=" ").splitlines()
if line.strip()
]
)
logger.debug(f"{path}:\n{pretty}")
if len(
root.findall(
".//pom:dependency[pom:groupId='@project.groupId@']",
namespaces
)
) > 0:
logger.info(f"Skipping {path}, since it contains @project dependency")
else:
packgs.append("jar")
for plugin in root.findall(".//pom:plugin", namespaces):
group = plugin.find("./pom:groupId", namespaces)
artifact = plugin.find("./pom:artifactId", namespaces)
if group is not None:
plugins.append(f"{group.text}:{artifact.text}")
elif artifact is not None:
plugins.append(artifact.text)
good.append(profile)
used = len(good)
logger.info(f"Found {used} good Maven projects in {repo}")
return {
"projects": used,
"plugins": sorted(list(set(plugins))),
"packages": {
"wars": len(list(filter(lambda p: p == "war", packgs))),
"jars": len(list(filter(lambda p: p == "jar", packgs))),
"poms": len(list(filter(lambda p: p == "pom", packgs)))
}
}
profile = {}
packaging = root.find(".//pom:packaging", namespaces)
if packaging is not None:
packgs.append(packaging.text)
else:
packgs.append("jar")
for plugin in root.findall(".//pom:plugin", namespaces):
group = plugin.find("./pom:groupId", namespaces)
artifact = plugin.find("./pom:artifactId", namespaces)
if group is not None:
plugins.append(f"{group.text}:{artifact.text}")
elif artifact is not None:
plugins.append(artifact.text)
good.append(profile)
used = len(good)
logger.info(f"Found {used} good Maven projects in {repo}")
return {
"projects": used,
"plugins": sorted(list(set(plugins))),
"packages": {
"wars": len(list(filter(lambda p: p == "war", packgs))),
"jars": len(list(filter(lambda p: p == "jar", packgs))),
"poms": len(list(filter(lambda p: p == "pom", packgs)))
}
}
except ParseError:
logger.warning(f"Failed to parse {repo}: {path}. Probably XML is broken")


def request(token, repo) -> Response:
Expand Down

0 comments on commit 35793aa

Please sign in to comment.