diff --git a/CHANGELOG.md b/CHANGELOG.md index 7ba754a28..081521792 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -10,6 +10,9 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## [1.7.2] - 2020-11-18 +### Added +- Added 'strict' mode for loading ontologies in [#788] + ### Changed - Update to whelk 1.0.4 - Run [`query`] on existing TDB dataset (instead of ontology input) in [#792] @@ -225,6 +228,7 @@ First official release of ROBOT! [#796]: https://github.com/ontodev/robot/pull/796 [#792]: https://github.com/ontodev/robot/pull/792 +[#788]: https://github.com/ontodev/robot/pull/788 [#783]: https://github.com/ontodev/robot/pull/783 [#767]: https://github.com/ontodev/robot/pull/767 [#758]: https://github.com/ontodev/robot/pull/758 diff --git a/docs/errors.md b/docs/errors.md index e62ba4afa..29dae872a 100644 --- a/docs/errors.md +++ b/docs/errors.md @@ -146,6 +146,20 @@ Instead, use this (or a `--term-file`): robot extract --input foo.owl --term foo:0000001 --term foo:0000002 ``` +### Unparsed Triples Error + +Sometimes when loading an ontology, not all triples can be parsed. This error is thrown when the `--strict` flag is included in the command. Otherwise, the unparsed triples are logged as errors and excluded from the loaded ontology. + +This is often because OWL does not support RDF reification (for more details, see [this post](https://stackoverflow.com/questions/45610092/owl-reification-vs-rdf-reification)); ROBOT is a tool for working with OWL, not RDF. Usually this can be easily resolved by replacing `rdf:Statement` with `owl:Axiom`. For example, this statement cannot be parsed by ROBOT: +``` +_:blank rdf:type rdf:Statement . +``` + +... but this statement is OK: +``` +_:blank rdf:type owl:Axiom . +``` + ### Wildcard Error Any pattern specified with `--inputs` for [merge](/merge) and [unmerge](/unmerge) must be a wildcard pattern, including either `*` (to match any number of characters) or `?` (to match any single character). diff --git a/docs/global.md b/docs/global.md index 3fe2a560f..632b73100 100644 --- a/docs/global.md +++ b/docs/global.md @@ -7,6 +7,7 @@ 3. [XML Catalogs (`--catalog`)](#xml-catalogs) 4. [Logging (`-v`, `-vv`, `-vvv`)](#logging) 5. [XML Entities (`--xml-entities`)](#xml-entities) +6. [Strict Parsing (`--strict`)](#strict-parsing) ## Java Options @@ -116,6 +117,20 @@ The `obo` abbreviation would be substituted for any instance of `http://purl.obo xml:base="&obo;obi.owl" ``` +## Strict Parsing + +Sometimes, ROBOT is unable to parse all triples in an input file. By default, these triples are excluded from the loaded ontology, but this can cause downstream issues if the triples were expected to be in certain outputs. By including the `--strict` flag, ROBOT will fail on unparsed triples. The `--strict` flag also turns on strict parsing in the [configuration object](http://owlcs.github.io/owlapi/apidocs_5/org/semanticweb/owlapi/model/OWLOntologyLoaderConfiguration.html) for loading the ontology with the OWLAPI `OWLOntologyManager`. + +Unparsed triples are often caused by RDF reification, which is different than OWL reification (for more details, please see [this post](https://stackoverflow.com/questions/45610092/owl-reification-vs-rdf-reification)). ROBOT is a tool for working with OWL-format ontologies, not RDF. Usually, instances of RDF reification can easily be fixed by replacing `rdf:Statement` with `owl:Axiom`. For example, this statement cannot be parsed by ROBOT: +``` +_:blank rdf:type rdf:Statement . +``` + +... but this statement is OK: +``` +_:blank rdf:type owl:Axiom . +``` + --- ## Error Messages diff --git a/robot-command/src/main/java/org/obolibrary/robot/CommandLineHelper.java b/robot-command/src/main/java/org/obolibrary/robot/CommandLineHelper.java index 9281e599f..eb1254ed0 100644 --- a/robot-command/src/main/java/org/obolibrary/robot/CommandLineHelper.java +++ b/robot-command/src/main/java/org/obolibrary/robot/CommandLineHelper.java @@ -436,6 +436,7 @@ public static IOHelper getIOHelper(CommandLine line) throws IOException { } ioHelper.setXMLEntityFlag(line.hasOption("xml-entities")); + ioHelper.setStrict(line.hasOption("strict")); return ioHelper; } @@ -917,6 +918,7 @@ public static Options getCommonOptions() { o.addOption(null, "add-prefix", true, "add prefix 'foo: http://bar' to the output"); o.addOption(null, "add-prefixes", true, "add JSON-LD prefixes to the output"); o.addOption("x", "xml-entities", false, "use entity substitution with ontology XML output"); + o.addOption(null, "strict", false, "use strict parsing when loading an ontology"); return o; } diff --git a/robot-core/src/main/java/org/obolibrary/robot/IOHelper.java b/robot-core/src/main/java/org/obolibrary/robot/IOHelper.java index 1f2160ea7..7f93f2d5b 100644 --- a/robot-core/src/main/java/org/obolibrary/robot/IOHelper.java +++ b/robot-core/src/main/java/org/obolibrary/robot/IOHelper.java @@ -36,6 +36,7 @@ import org.obolibrary.oboformat.writer.OBOFormatWriter; import org.semanticweb.owlapi.apibinding.OWLManager; import org.semanticweb.owlapi.formats.*; +import org.semanticweb.owlapi.io.*; import org.semanticweb.owlapi.io.XMLUtils; import org.semanticweb.owlapi.model.*; import org.semanticweb.owlapi.rdf.rdfxml.renderer.IllegalElementNameException; @@ -117,6 +118,10 @@ public class IOHelper { static final String undefinedPrefixError = NS + "UNDEFINED PREFIX ERROR \"%s\" has unknown prefix; make sure prefix \"%s\" is defined"; + /** Error message when loader contains unparsed triples. */ + private static final String unparsedTriplesError = + NS + "UNPARSED TRIPLES ERROR input ontology contains %d triple(s) that could not be parsed:"; + /** Optional base namespaces. */ private Set baseNamespaces = new HashSet<>(); @@ -126,6 +131,9 @@ public class IOHelper { /** Store the current JSON-LD context. */ private Context context = new Context(); + /** Strict parsing; fail on unparsed triples. */ + private Boolean strict = false; + /** Store xml entities flag. */ private Boolean useXMLEntities = false; @@ -184,6 +192,16 @@ public IOHelper(File file) throws IOException { setContext(jsonString); } + /** + * Set "strict" value. If true, any loadOntology method will fail on unparsed triples or OWLAPI + * "strict" parsing issues. + * + * @param strict boolean value + */ + public void setStrict(Boolean strict) { + this.strict = strict; + } + /** * Given an ontology, a file, and a list of prefixes, save the ontology to the file and include * the prefixes in the header. @@ -394,8 +412,9 @@ public OWLOntology loadOntology(File ontologyFile, File catalogFile) throws IOEx return loadCompressedOntology(ontologyFile, catalogFile.getAbsolutePath()); } } + // Otherwise load from file using default method - return manager.loadOntologyFromOntologyDocument(ontologyFile); + return loadOntology(manager, new FileDocumentSource(ontologyFile)); } catch (JsonLdError | OWLOntologyCreationException e) { throw new IOException(String.format(invalidOntologyFileError, ontologyFile.getName()), e); } @@ -436,7 +455,7 @@ public OWLOntology loadOntology(InputStream ontologyStream, String catalogPath) if (catalogFile != null) { manager.setIRIMappers(Sets.newHashSet(new CatalogXmlIRIMapper(catalogFile))); } - ontology = manager.loadOntologyFromOntologyDocument(ontologyStream); + ontology = loadOntology(manager, new StreamDocumentSource(ontologyStream)); } catch (OWLOntologyCreationException e) { throw new IOException(invalidOntologyStreamError, e); } @@ -483,7 +502,7 @@ public OWLOntology loadOntology(IRI ontologyIRI, String catalogPath) throws IOEx ontology = loadCompressedOntology(new URL(ontologyIRI.toString()), catalogPath); } else { // Otherwise load ontology as normal - ontology = manager.loadOntologyFromOntologyDocument(ontologyIRI); + ontology = loadOntology(manager, new IRIDocumentSource(ontologyIRI)); } } catch (OWLOntologyCreationException e) { throw new IOException(e); @@ -491,6 +510,84 @@ public OWLOntology loadOntology(IRI ontologyIRI, String catalogPath) throws IOEx return ontology; } + /** + * Given an ontology manager and a document source, load the ontology from the source using the + * manager. Log unparsed triples, or throw exception if strict=true. + * + * @param manager OWLOntologyManager with IRI mappers to use + * @param source OWLOntologyDocumentSource to load from + * @return a new ontology object, with a new OWLManager + * @throws IOException on problem with unparsed triples if strict=true + * @throws OWLOntologyCreationException on problem loading ontology document + */ + public OWLOntology loadOntology(OWLOntologyManager manager, OWLOntologyDocumentSource source) + throws IOException, OWLOntologyCreationException { + OWLOntologyLoaderConfiguration config = new OWLOntologyLoaderConfiguration(); + if (strict) { + // Set strict OWLAPI parsing + config = config.setStrict(true); + } + // Load the ontology + OWLOntology loadedOntology = manager.loadOntologyFromOntologyDocument(source, config); + + // Check for unparsed triples - get the document format and then the loader metadata + OWLDocumentFormat f = manager.getOntologyFormat(loadedOntology); + if (f == null) { + // This should never happen + throw new IOException("Unable to get an OWLDocumentFormat from loaded ontology"); + } + RDFParserMetaData metaData = (RDFParserMetaData) f.getOntologyLoaderMetaData(); + Set unparsed = metaData.getUnparsedTriples(); + Set parsed = loadedOntology.getAxioms(); + if (unparsed.size() > 0) { + boolean rdfReification = false; + StringBuilder sb = new StringBuilder(); + for (RDFTriple t : unparsed) { + // Check object to see if it's rdfs:Statement used in RDF reification + String objectIRI; + try { + objectIRI = t.getObject().getIRI().toString(); + } catch (UnsupportedOperationException e) { + // RDF Literals do not have IRIs + objectIRI = ""; + } + if (objectIRI.equals("http://www.w3.org/1999/02/22-rdf-syntax-ns#Statement")) { + rdfReification = true; + } + // Add triple to error lines + sb.append("\n - ").append(t.toString().trim()); + } + Set undeclaredPredicates = getUndeclaredPredicates(parsed, unparsed); + if (rdfReification) { + // Add hint for fixing RDF reification + sb.append( + "\n\nHint: you may be using RDF reification - try replacing 'rdf:Statement' with 'owl:Axiom'"); + } + if (undeclaredPredicates.size() > 0) { + sb.append( + "\n\nHint: you have undeclared predicates - try adding 'rdf:type' declarations to the following:"); + for (IRI p : undeclaredPredicates) { + sb.append("\n - ").append(p.toString()); + } + } + sb.append("\n"); + + if (strict) { + // Fail on unparsed triples + throw new IOException(String.format(unparsedTriplesError, unparsed.size()) + sb.toString()); + } else { + // Log unparsed triples as errors + logger.error( + String.format( + "Input ontology contains %d triple(s) that could not be parsed:", + unparsed.size()) + + sb.toString()); + } + } + // No issues, return ontology + return loadedOntology; + } + /** * Given a path to an RDF/XML or TTL file and a RDF language, load the file as the default model * of a TDB dataset backed by a directory to improve processing time. Return the new dataset. @@ -1495,6 +1592,40 @@ private byte[] getOntologyFileData( return data; } + /** + * Given a set of parsed OWLAxioms and a set of unparsed RDF triples, get any predicates used in + * the unparsed set that are not builtins (OWL, RDF, RDFS) and do not have declarations in the + * parsed axioms. + * + * @param parsedAxioms Set of parsed OWLAxioms from loaded ontology + * @param unparsedTriples Set of unparsed RDF triples from loaded ontology + * @return set of IRIs of any undeclared predicates + */ + private static Set getUndeclaredPredicates( + Set parsedAxioms, Set unparsedTriples) { + Set checkPredicates = new HashSet<>(); + for (RDFTriple t : unparsedTriples) { + IRI pIRI = t.getPredicate().getIRI(); + if (pIRI.toString().startsWith("http://www.w3.org/2002/07/owl#") + || pIRI.toString().startsWith("http://www.w3.org/1999/02/22-rdf-syntax-ns#") + || pIRI.toString().startsWith("http://www.w3.org/2000/01/rdf-schema#")) { + // Skip OWL, RDF, RDFS ... + continue; + } + checkPredicates.add(t.getPredicate().getIRI()); + } + // Look for types + for (OWLAxiom a : parsedAxioms) { + if (!a.getAxiomType().equals(AxiomType.DECLARATION)) { + continue; + } + OWLDeclarationAxiom dec = (OWLDeclarationAxiom) a; + IRI eIRI = dec.getEntity().getIRI(); + checkPredicates.remove(eIRI); + } + return checkPredicates; + } + /** * Given a gzipped ontology file and a catalog path, load the ontology from a zip input stream. * diff --git a/robot-core/src/test/java/org/obolibrary/robot/IOHelperTest.java b/robot-core/src/test/java/org/obolibrary/robot/IOHelperTest.java index 6858638bc..6c531da83 100644 --- a/robot-core/src/test/java/org/obolibrary/robot/IOHelperTest.java +++ b/robot-core/src/test/java/org/obolibrary/robot/IOHelperTest.java @@ -3,8 +3,10 @@ import static org.junit.Assert.assertEquals; import com.github.jsonldjava.core.Context; +import java.io.ByteArrayInputStream; import java.io.File; import java.io.IOException; +import java.io.InputStream; import java.net.URISyntaxException; import java.util.HashMap; import java.util.HashSet; @@ -281,4 +283,44 @@ public void testSaveCompressedOntology() throws IOException { OWLOntology ontology2 = ioHelper.loadOntology(tempFile.getPath()); assertIdentical(ontology, ontology2); } + + /** + * Test loading RDF reification with strict mode turned on. Loading this string should result in + * an IOException. + * + * @throws IOException on error creating IOHelper + */ + @Test + public void testStrict() throws IOException { + IOHelper ioHelper = new IOHelper(); + ioHelper.setStrict(true); + String input = + "@prefix rdf: .\n\n_:Bb65616 rdf:type rdf:Statement ."; + InputStream inputStream = new ByteArrayInputStream(input.getBytes()); + boolean pass = false; + try { + ioHelper.loadOntology(inputStream); + } catch (IOException e) { + // We expect an IOException + pass = true; + } + assert pass; + } + + /** + * Test loading RDF reification with strict mode turned off. Loading this string should not result + * in an exception. + * + * @throws IOException on error creating IOHelper + */ + @Test + public void testNonStrict() throws IOException { + IOHelper ioHelper = new IOHelper(); + String input = + "@prefix rdf: .\n\n_:Bb65616 rdf:type rdf:Statement ."; + InputStream inputStream = new ByteArrayInputStream(input.getBytes()); + // No exception should be thrown here + ioHelper.loadOntology(inputStream); + assert true; + } }