Skip to content

Commit

Permalink
only request issues comments when github issue metadata claims to hav…
Browse files Browse the repository at this point in the history
…e them; relatedto #156
  • Loading branch information
Jorrit Poelen committed Jul 26, 2023
1 parent 8366e59 commit ef0e3eb
Show file tree
Hide file tree
Showing 4 changed files with 177 additions and 58 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,6 @@
import com.fasterxml.jackson.databind.JsonNode;
import com.fasterxml.jackson.databind.ObjectMapper;
import org.apache.commons.collections4.map.LRUMap;
import org.apache.commons.collections4.queue.CircularFifoQueue;
import org.apache.commons.lang3.StringUtils;
import org.apache.commons.lang3.tuple.Pair;
import org.apache.commons.rdf.api.IRI;
Expand All @@ -27,7 +26,6 @@
import java.util.List;
import java.util.Map;
import java.util.Optional;
import java.util.Queue;
import java.util.function.Function;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
Expand Down Expand Up @@ -55,6 +53,7 @@ public class RegistryReaderGitHubIssues extends ProcessorReadOnly {
"(/(?<issueNumber>[0-9]+)){0,1}(/comments){0,1}(\\?.*){0,1}");
private static final String MOST_RECENT_ISSUE_QUERY = "/issues?per_page=1&state=all";
private static final String API_PREFIX = "https://api.github.com/repos/";
public static final String COMMENTS_REQUEST_SUFFIX = "/comments?per_page=100";

private Map<IRI, IRI> processedIRIs = new LRUMap<>(4096);

Expand Down Expand Up @@ -82,7 +81,8 @@ private void processStatement(Quad statement, IRI versionSourceIRI) {
if (matcher.matches()) {
String org = matcher.group("org");
String repo = matcher.group("repo");
if (StringUtils.isBlank(matcher.group("issueNumber"))) {
String issueNumber = matcher.group("issueNumber");
if (StringUtils.isBlank(issueNumber)) {
if (StringUtils.endsWith(versionSource, MOST_RECENT_ISSUE_QUERY)) {
emitIssueRequestsFor(statement, org, repo, this);
} else {
Expand All @@ -95,19 +95,54 @@ private void processStatement(Quad statement, IRI versionSourceIRI) {
}
} else {
if (StringUtils.startsWith(versionSource, API_PREFIX)) {
handleIssues(statement, matcher);
deferenceDependencies(statement);
if (issueHasComments(statement, versionSource, issueNumber)) {
ActivityUtil.emitAsNewActivity(
createRequestForIssueComments(org, repo, Integer.parseInt(issueNumber)),
this,
statement.getGraphName()
);
}
} else {
Stream<Quad> requestIssueComments = createRequestForIssueComments(org, repo, Integer.parseInt(matcher.group("issueNumber")));
ActivityUtil.emitAsNewActivity(
requestIssueComments,
createRequestForIssue(org, repo, Integer.parseInt(issueNumber)),
this,
statement.getGraphName()
);
}

}
}
}

private boolean issueHasComments(Quad statement, String versionSource, String issueNumber) {
boolean issueHasComments = false;
if (StringUtils.endsWith(versionSource, "/issues/" + issueNumber)) {
try {
IRI currentPage = (IRI) getVersion(statement);
InputStream is = get(currentPage);
if (is != null) {
try {
JsonNode jsonNode = new ObjectMapper().readTree(is);
if (jsonNode != null && jsonNode.has("comments")) {
JsonNode comments = jsonNode.get("comments");
if (comments.isIntegralNumber() && comments.intValue() > 0) {
issueHasComments = true;
}
}
} catch (IOException ex) {
// ignore malformed json
}
}

} catch (IOException e) {
LOG.warn("failed to handle [" + statement.toString() + "]", e);
}

}
return issueHasComments;
}

private void emitIssueRequestsFor(Quad statement, String org, String repo, StatementsEmitter emitter) {
try {
IRI currentPage = (IRI) getVersion(statement);
Expand All @@ -118,10 +153,10 @@ private void emitIssueRequestsFor(Quad statement, String org, String repo, State
if (jsonNode != null) {
if (jsonNode.isArray()) {
for (JsonNode node : jsonNode) {
emitRequestForIssueComments(org, repo, emitter, node);
emitRequestForIssuesUpToMostRecent(org, repo, emitter, node);
}
} else if (jsonNode.isObject()) {
emitRequestForIssueComments(org, repo, emitter, jsonNode);
emitRequestForIssuesUpToMostRecent(org, repo, emitter, jsonNode);
}
}
} catch (IOException ex) {
Expand All @@ -134,63 +169,73 @@ private void emitIssueRequestsFor(Quad statement, String org, String repo, State
}
}

public static void emitRequestForIssueComments(String org, String repo, StatementsEmitter emitter, JsonNode node) {
public static void emitRequestForIssuesUpToMostRecent(String org, String repo, StatementsEmitter emitter, JsonNode node) {
if (node.has("number")) {
JsonNode issueNumber = node.get("number");
if (issueNumber.isInt()) {
int mostRecentIssue = issueNumber.asInt();
emitRequestForIssueComments(emitter, org, repo, mostRecentIssue);
emitRequestForIssuesUpToMostRecent(emitter, org, repo, mostRecentIssue);
}
}
}

private static void emitRequestForIssueComments(StatementsEmitter emitter, String org, String repo, int mostRecentIssue) {
private static void emitRequestForIssuesUpToMostRecent(StatementsEmitter emitter, String org, String repo, int mostRecentIssue) {
Stream<Quad> statements = IntStream
.rangeClosed(1, mostRecentIssue)
.mapToObj(issue -> createRequestForIssueComments(org, repo, issue))
.mapToObj(issue -> createRequestForIssue(org, repo, issue))
.flatMap(Function.identity());
ActivityUtil.emitAsNewActivity(statements, emitter, Optional.empty());
}

private static Stream<Quad> createRequestForIssueComments(String org, String repo, int issue) {
String issueSuffix = org + "/" + repo + "/issues/" + issue;
String issueRequestPrefix = API_PREFIX + issueSuffix;
String issueRequestPrefix = prefixForIssue(org, repo, issue);
IRI issueRequest = toIRI(issueRequestPrefix);
IRI issueCommentsRequest = toIRI(issueRequestPrefix + "/comments?per_page=100");
IRI issueCommentsRequest = toIRI(issueRequestPrefix + COMMENTS_REQUEST_SUFFIX);
return Stream.of(
toStatement(issueRequest, HAS_TYPE, RefNodeFactory.toLiteral(ResourcesHTTP.MIMETYPE_GITHUB_JSON)),
toStatement(issueRequest, HAS_VERSION, toBlank()),
toStatement(issueRequest, SEE_ALSO, toIRI("https://github.com/" + issueSuffix)),
toStatement(issueRequest, HAD_MEMBER, issueCommentsRequest),
toStatement(issueCommentsRequest, HAS_TYPE, RefNodeFactory.toLiteral(ResourcesHTTP.MIMETYPE_GITHUB_JSON)),
toStatement(issueCommentsRequest, HAS_VERSION, toBlank())
);
}

private void handleIssues(Quad statement, Matcher matcher) {
private static String prefixForIssue(String org, String repo, int issue) {
String issueSuffix = org + "/" + repo + "/issues/" + issue;
return API_PREFIX + issueSuffix;
}

private static Stream<Quad> createRequestForIssue(String org, String repo, int issue) {
String issueSuffix = org + "/" + repo + "/issues/" + issue;
IRI issueRequest = toIRI(prefixForIssue(org, repo, issue));
return Stream.of(
toStatement(issueRequest, HAS_TYPE, RefNodeFactory.toLiteral(ResourcesHTTP.MIMETYPE_GITHUB_JSON)),
toStatement(issueRequest, HAS_VERSION, toBlank()),
toStatement(issueRequest, SEE_ALSO, toIRI("https://github.com/" + issueSuffix))
);
}

private void deferenceDependencies(Quad statement) {
List<Quad> nodes = new ArrayList<>();
try {
IRI currentPage = (IRI) getVersion(statement);
InputStream is = get(currentPage);
if (is != null) {
parseIssuesIgnoreUnexpected(currentPage, new StatementsEmitterAdapter() {
emitRequestsForIssueDependenciesIfNeeded(currentPage, new StatementsEmitterAdapter() {
@Override
public void emit(Quad statement) {
nodes.add(statement);
}
}, is, getVersionSource(statement));
}, is);
}
} catch (IOException e) {
LOG.warn("failed to handle [" + statement.toString() + "]", e);
}
ActivityUtil.emitAsNewActivity(nodes.stream(), this, statement.getGraphName());
}

private static void parseIssuesIgnoreUnexpected(
private static void emitRequestsForIssueDependenciesIfNeeded(
IRI currentPage,
StatementsEmitter emitter,
InputStream in,
IRI versionSource) {
InputStream in) {
try {
JsonNode jsonNode = new ObjectMapper().readTree(in);
ArrayList<Pair<URI, URI>> uris = new ArrayList<>();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -108,7 +108,7 @@ public void matchingIssuesURLs() {
@Test
public void onGitHubIssueCommentsAPI() {
ArrayList<Quad> nodes = new ArrayList<>();
BlobStoreReadOnly blob = key -> getClass().getResourceAsStream("issue_904.json");
BlobStoreReadOnly blob = key -> getClass().getResourceAsStream("904_issue_comments.json");
ProcessorReadOnly reader = new RegistryReaderGitHubIssues(blob, TestUtilForProcessor.testListener(nodes));

reader.on(toStatement(
Expand All @@ -123,7 +123,7 @@ public void onGitHubIssueCommentsAPI() {
@Test
public void onProcessOnce() {
ArrayList<Quad> nodes = new ArrayList<>();
BlobStoreReadOnly blob = key -> getClass().getResourceAsStream("issue_904.json");
BlobStoreReadOnly blob = key -> getClass().getResourceAsStream("904_issue_comments.json");
ProcessorReadOnly reader = new RegistryReaderGitHubIssues(blob, TestUtilForProcessor.testListener(nodes));

reader.on(toStatement(
Expand Down Expand Up @@ -154,7 +154,7 @@ public void onProcessLatest() {
HAS_VERSION,
toIRI("http://something")));

assertThat(nodes.size(), is(325));
assertThat(nodes.size(), is(163));

nodes.clear();
reader.on(toStatement(
Expand All @@ -167,7 +167,7 @@ public void onProcessLatest() {

@Test
public void extractURLs() throws IOException {
JsonNode jsonNode = new ObjectMapper().readTree(getClass().getResourceAsStream("issue_904.json"));
JsonNode jsonNode = new ObjectMapper().readTree(getClass().getResourceAsStream("904_issue_comments.json"));
List<Pair<URI, URI>> uris = new ArrayList<>();
RegistryReaderGitHubIssues.appendURIs(jsonNode, uris);
assertThat(uris,
Expand Down Expand Up @@ -207,72 +207,81 @@ public void extractURLs() throws IOException {
public void onLatestIssue() throws IOException {
JsonNode jsonNode = new ObjectMapper().readTree(getClass().getResourceAsStream("latest_issue.json"));
List<Quad> statements = new ArrayList<>();
RegistryReaderGitHubIssues.emitRequestForIssueComments("foo", "bar", new StatementsEmitterAdapter() {
RegistryReaderGitHubIssues.emitRequestForIssuesUpToMostRecent("foo", "bar", new StatementsEmitterAdapter() {
@Override
public void emit(Quad statement) {
statements.add(statement);
}
}, jsonNode.get(0));

assertThat(statements.size(), is(325));
assertThat(statements.size(), is(163));

Quad statement = statements.get(statements.size() - 1);
assertThat(statement.getSubject().ntriplesString(), is("<https://api.github.com/repos/foo/bar/issues/54/comments?per_page=100>"));
assertThat(statement.getPredicate(), is(HAS_VERSION));
assertThat(statement.getObject().ntriplesString(), startsWith("_:"));

statement = statements.get(statements.size() - 2);
assertThat(statement.getSubject().ntriplesString(), is("<https://api.github.com/repos/foo/bar/issues/54/comments?per_page=100>"));
assertThat(statement.getPredicate(), is(HAS_TYPE));
assertThat(statement.getObject().ntriplesString(), startsWith("\"application/vnd.github+json\""));

statement = statements.get(statements.size() - 3);
assertThat(statement.getSubject().ntriplesString(), is("<https://api.github.com/repos/foo/bar/issues/54>"));
assertThat(statement.getPredicate(), is(HAD_MEMBER));
assertThat(statement.getObject().ntriplesString(), startsWith("<https://api.github.com/repos/foo/bar/issues/54/comments?per_page=100>"));

statement = statements.get(statements.size() - 4);
assertThat(statement.getSubject().ntriplesString(), is("<https://api.github.com/repos/foo/bar/issues/54>"));
assertThat(statement.getPredicate(), is(SEE_ALSO));
assertThat(statement.getObject().ntriplesString(), is("<https://github.com/foo/bar/issues/54>"));

statement = statements.get(statements.size() - 5);
statement = statements.get(statements.size() - 2);
assertThat(statement.getSubject().ntriplesString(), is("<https://api.github.com/repos/foo/bar/issues/54>"));
assertThat(statement.getPredicate(), is(HAS_VERSION));
assertThat(statement.getObject().ntriplesString(), startsWith("_:"));

statement = statements.get(statements.size() - 6);
statement = statements.get(statements.size() - 3);
assertThat(statement.getSubject().ntriplesString(), is("<https://api.github.com/repos/foo/bar/issues/54>"));
assertThat(statement.getPredicate(), is(HAS_TYPE));
assertThat(statement.getObject().ntriplesString(), startsWith("\"application/vnd.github+json\""));
assertThat(statement.getObject().ntriplesString(), is("\"application/vnd.github+json\""));
}

@Test
public void onGitHubIssueAPI() {
ArrayList<Quad> nodes = new ArrayList<>();
BlobStoreReadOnly blob = key -> getClass().getResourceAsStream("issue_904.json");
ProcessorReadOnly reader = new RegistryReaderGitHubIssues(blob, TestUtilForProcessor.testListener(nodes));
ArrayList<Quad> statements = new ArrayList<>();
BlobStoreReadOnly blob = key -> getClass().getResourceAsStream("904_issue.json");
ProcessorReadOnly reader = new RegistryReaderGitHubIssues(blob, TestUtilForProcessor.testListener(statements));

reader.on(toStatement(
toIRI("https://api.github.com/repos/globalbioticinteractions/globalbioticinteractions/issues/904"),
toIRI("https://api.github.com/repos/foo/bar/issues/904"),
HAS_VERSION,
toIRI("http://something")));

assertThat(nodes.size(), not(is(0)));
assertThat(statements.size(), is(5));


Quad statement = statements.get(statements.size() - 1);
assertThat(statement.getSubject().ntriplesString(), is("<https://api.github.com/repos/foo/bar/issues/904/comments?per_page=100>"));
assertThat(statement.getPredicate(), is(HAS_VERSION));
assertThat(statement.getObject().ntriplesString(), startsWith("_:"));


}

@Test
public void onGitHubIssue() {
ArrayList<Quad> nodes = new ArrayList<>();
BlobStoreReadOnly blob = key -> getClass().getResourceAsStream("issue_904.json");
ProcessorReadOnly reader = new RegistryReaderGitHubIssues(blob, TestUtilForProcessor.testListener(nodes));
ArrayList<Quad> statements = new ArrayList<>();
BlobStoreReadOnly blob = key -> getClass().getResourceAsStream("904_issue.json");
ProcessorReadOnly reader = new RegistryReaderGitHubIssues(blob, TestUtilForProcessor.testListener(statements));

reader.on(toStatement(
toIRI("https://github.com/repos/globalbioticinteractions/globalbioticinteractions/issues/904"),
toIRI("https://github.com/repos/foo/bar/issues/54"),
HAS_VERSION,
toIRI("http://something")));

assertThat(nodes.size(), not(is(0)));
assertThat(statements.size(), not(is(0)));

Quad statement = statements.get(statements.size() - 1);
assertThat(statement.getSubject().ntriplesString(), is("<https://api.github.com/repos/foo/bar/issues/54>"));
assertThat(statement.getPredicate(), is(SEE_ALSO));
assertThat(statement.getObject().ntriplesString(), is("<https://github.com/foo/bar/issues/54>"));

statement = statements.get(statements.size() - 2);
assertThat(statement.getSubject().ntriplesString(), is("<https://api.github.com/repos/foo/bar/issues/54>"));
assertThat(statement.getPredicate(), is(HAS_VERSION));
assertThat(statement.getObject().ntriplesString(), startsWith("_:"));

statement = statements.get(statements.size() - 3);
assertThat(statement.getSubject().ntriplesString(), is("<https://api.github.com/repos/foo/bar/issues/54>"));
assertThat(statement.getPredicate(), is(HAS_TYPE));
assertThat(statement.getObject().ntriplesString(), is("\"application/vnd.github+json\""));

}


Expand Down
Loading

0 comments on commit ef0e3eb

Please sign in to comment.