Skip to content

Commit

Permalink
Set wikidata prefixes to an IRI for now.
Browse files Browse the repository at this point in the history
Don't try to add a comment just put a TODO
  • Loading branch information
JervenBolleman committed Sep 20, 2024
1 parent 26e0c46 commit 4883322
Show file tree
Hide file tree
Showing 3 changed files with 72 additions and 12 deletions.
3 changes: 2 additions & 1 deletion src/main/java/swiss/sib/rdf/sparql/examples/Fixer.java
Original file line number Diff line number Diff line change
Expand Up @@ -167,7 +167,8 @@ private Map<String, String> loadPrefixes() throws IOException {

private void fix(IRI queryIri, Value query, Path file, Model model, Map<String, String> prefixes2) {
String queryIriStr = queryIri.stringValue();
String queryStr = query.stringValue();
String queryStr = query.stringValue()
.replace("\\\"", "\"");


String fixedPrefixes = Fixer.fixMissingPrefixes(queryStr, prefixes2);
Expand Down
32 changes: 21 additions & 11 deletions src/main/java/swiss/sib/rdf/sparql/examples/Wikibase.java
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,8 @@ public class Wikibase implements Callable<Integer> {
private File outputHtmlDir;
private static final ValueFactory VF = SimpleValueFactory.getInstance();

private static final IRI WIKIDATA_PREFIXES = VF.createIRI("https://example.org/to_decide/wikidata_prefixes");

private static final IRI CC_BY_4 = VF.createIRI("https://creativecommons.org/licenses/by-sa/4.0/");

public Integer call() {
Expand Down Expand Up @@ -108,7 +110,7 @@ public Integer call() {
}

private enum NamedTemplate {
SPARQL("SPARQL", ".mw-highlight-lang-sparql"),
SPARQL("SPARQL", ".mw-highlight-lang-sparql"),
// WDQUERY("Wdquery", ".mw-highlight-lang-sparql"),
// SPARQL_INLINE("SPARQL_Inline/doc", ".mw-highlight-lang-sparql"),
SPARQL2("SPARQL2", ".mw-highlight-lang-sparql"),;
Expand All @@ -129,7 +131,7 @@ private void fetchHtml() {
String url = wikidatawiki + "w/index.php?title=Special:WhatLinksHere/Template:" + nt.name;

try {
while (url != null) {
while (url != null && !url.equals(wikidatawiki)) {
Document searchResultDocument = retrieveAndCacheResult(url, outputSearchResultDir);
Elements ulWhatLinksHereList = searchResultDocument.select("#mw-whatlinkshere-list li > a");
for (Element link : ulWhatLinksHereList) {
Expand All @@ -150,7 +152,8 @@ private String makeAbsoluteUrl(Element link) {
String pageLinkingToSparqlTemplate = link.attr("href");
if (pageLinkingToSparqlTemplate.startsWith("/")) {
pageLinkingToSparqlTemplate = wikidatawiki + pageLinkingToSparqlTemplate;
}
} else if (pageLinkingToSparqlTemplate == null || pageLinkingToSparqlTemplate.isBlank())
return null;
return pageLinkingToSparqlTemplate;
}

Expand All @@ -167,27 +170,34 @@ private void extractSparqlQueriesFromPage(String pageLinkingToSparqlTemplate, Na

Elements sparqlTemplates = htmlPageDocument.select(nt.cssClass);
for (Element sparqlTemplate : sparqlTemplates) {
String query = sparqlTemplate.select("pre").eachText().stream().collect(Collectors.joining("\n"));
//Concat the SPARQL string undo " escaping.
String query = sparqlTemplate.select("pre").eachText().stream().collect(Collectors.joining("\n"))
.replace("\\\"", "\"");
LinkedHashModel model = new LinkedHashModel();

String urlForFileName = new MD5().evaluate(VF, VF.createLiteral(query)).stringValue();
IRI iriForQuery = VF.createIRI(wikidatawiki + "#query-" + urlForFileName);
addQueryStringToModel(query, model, iriForQuery);
StringBuilder sb = makeThePreviosSiblingNodesTheLabel(sparqlTemplate);
if (!sb.isEmpty() && languageInHtml != null && !languageInHtml.isBlank()) {
model.add(iriForQuery, RDFS.COMMENT, VF.createLiteral(sb.toString(), languageInHtml));
} else if (!sb.isEmpty()) {
model.add(iriForQuery, RDFS.COMMENT, VF.createLiteral(sb.toString()));
}
model.add(iriForQuery, RDFS.COMMENT, VF.createLiteral("TODO", "en"));
// extractComment(languageInHtml, sparqlTemplate, model, iriForQuery);
model.add(iriForQuery, DCTERMS.IS_PART_OF, VF.createIRI(pageLinkingToSparqlTemplate));
model.add(iriForQuery, DCTERMS.LICENSE, CC_BY_4);
model.add(iriForQuery, SHACL.PREFIXES, VF.createBNode("wikidata_prefixes"));
model.add(iriForQuery, SHACL.PREFIXES, WIKIDATA_PREFIXES);
model.add(iriForQuery, SchemaDotOrg.TARGET, VF.createIRI(wikidatasparql));
writeModelToTurtle(model, urlForFileName);
}
}
}

private void extractComment(String languageInHtml, Element sparqlTemplate, LinkedHashModel model, IRI iriForQuery) {
StringBuilder sb = makeThePreviosSiblingNodesTheLabel(sparqlTemplate);
if (!sb.isEmpty() && languageInHtml != null && !languageInHtml.isBlank()) {
model.add(iriForQuery, RDFS.COMMENT, VF.createLiteral(sb.toString(), languageInHtml));
} else if (!sb.isEmpty()) {
model.add(iriForQuery, RDFS.COMMENT, VF.createLiteral(sb.toString()));
}
}

private StringBuilder makeThePreviosSiblingNodesTheLabel(Element sparqlTemplate) {
Element parent = sparqlTemplate.parent();
List<Node> childNodes = parent.childNodes();
Expand Down
49 changes: 49 additions & 0 deletions src/test/java/swiss/sib/rdf/sparql/examples/FixerTest.java
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
package swiss.sib.rdf.sparql.examples;

import static org.junit.jupiter.api.Assertions.assertFalse;
import static org.junit.jupiter.api.Assertions.assertNull;
import static org.junit.jupiter.api.Assertions.fail;

import java.util.Map;
Expand Down Expand Up @@ -37,6 +38,41 @@ public class FixerTest {
INCLUDE %get_labels
?item a rdfs:Class .
}""";

private final String blazeGraphWithoutIncludeExample = """
PREFIX wikibase: <http://wikiba.se/ontology#>
PREFIX wdt: <http://www.wikidata.org/prop/direct/>
PREFIX wd: <http://www.wikidata.org/entity/>
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
PREFIX bd: <http://www.bigdata.com/rdf#>
PREFIX geof: <http://www.opengis.net/def/function/geosparql/>
SELECT
?place
?placeLabel
?dist
(GROUP_CONCAT(DISTINCT ?instanceLabel; SEPARATOR = \", \") AS ?instanceLabels)
(GROUP_CONCAT(DISTINCT ?adminLabel; SEPARATOR = \", \") AS ?adminLabels)
WHERE
{
wd:Q116716361 wdt:P625 ?loc .
SERVICE wikibase:around {
?place wdt:P625 ?location .
bd:serviceParam wikibase:center ?loc .
bd:serviceParam wikibase:radius \"6\" .
}
OPTIONAL { ?place wdt:P31 ?instance }
OPTIONAL { ?place wdt:P131 ?admin }
SERVICE wikibase:label {
bd:serviceParam wikibase:language \"en\" .
?instance rdfs:label ?instanceLabel .
?place rdfs:label ?placeLabel .
?admin rdfs:label ?adminLabel .
}
BIND(geof:distance(?loc, ?location) as ?dist)
}
GROUP BY ?place ?placeLabel ?dist
ORDER BY ?dist""";


@Test
public void simpleIncludeWith() {
Expand All @@ -50,6 +86,19 @@ public void simpleIncludeWith() {
}
}

@Test
public void real() {
try {
String fix = Fixer.fixBlazeGraphIncludeWith(blazeGraphWithoutIncludeExample, "", null);
assertNull(fix);

QueryParser parser = new SPARQLParserFactory().getParser();
parser.parseQuery(blazeGraphWithoutIncludeExample, "http://example.org/");
} catch (MalformedQueryException e) {
fail(e);
}
}

@Test
public void simpleMissingPrefix() {
try {
Expand Down

0 comments on commit 4883322

Please sign in to comment.