Skip to content

Commit

Permalink
Merge pull request #392 from rctauber/extract-source
Browse files Browse the repository at this point in the history
Add option to annotate extracted entities with source
  • Loading branch information
jamesaoverton authored Feb 4, 2019
2 parents edcbf51 + 6348820 commit 593f5a5
Show file tree
Hide file tree
Showing 11 changed files with 1,492 additions and 37 deletions.
534 changes: 534 additions & 0 deletions docs/examples/annotated_source.owl

Large diffs are not rendered by default.

534 changes: 534 additions & 0 deletions docs/examples/changed_source.owl

Large diffs are not rendered by default.

3 changes: 3 additions & 0 deletions docs/examples/source-map.tsv
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
term source
UBERON:0000916 http://purl.obolibrary.org/obo/go.owl
UBERON:0001062 GO
37 changes: 37 additions & 0 deletions docs/extract.md
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,39 @@ You can also include ontology annotations from the input ontology with `--copy-o
--term UBERON:0000916 \
--copy-ontology-annotations true \
--output results/annotated_module.owl
## Source Annotations

`extract` provides an option to annotate extracted terms with `rdfs:isDefinedBy`. If the term already has an annotation using this property, the existing annotation will be copied and no new annotation will be added.

robot extract --method BOT \
--input annotated.owl \
--term UBERON:0000916 \
--annotate-with-source true \
--output results/annotated_source.owl

The object of the property is, by default, the base name of the term's IRI. For example, the IRI for `GO:0000001` (`http://purl.obolibrary.org/obo/GO_0000001`) would receive the source `http://purl.obolibrary.org/obo/go.owl`.

Sometimes classes are adopted by other ontologies, but retain their original IRI. In this case, you can provide the path to a [term-to-source mapping file](/examples/source-map.tsv) as CSV or TSV.

robot --prefix 'GO: http://purl.obolibrary.org/obo/GO_' extract --method BOT \
--input annotated.owl \
--term UBERON:0000916 \
--annotate-with-source true \
--sources source-map.tsv \
--output results/changed_source.owl

The mapping file can either use full IRIs:

```
http://purl.obolibrary.org/obo/BFO_0000001,http://purl.obolibrary.org/obo/ro.owl
```

Or prefixes, as long as the [prefix is valid](/global#prefixes):

```
BFO:0000001,RO
```

---

Expand All @@ -81,3 +114,7 @@ The following flags *should not* be used with STAR, TOP, or BOT methods:
* `--upper-term` & `--upper-terms`
* `--lower-term` & `--lower-terms`
* `--branch-from-term` & `--branch-from-terms`

### Invalid Source Map Error

The input for `--sources` must be either CSV or TSV format.
4 changes: 4 additions & 0 deletions docs/global.md
Original file line number Diff line number Diff line change
Expand Up @@ -106,3 +106,7 @@ xml:base="&obo;obi.owl"
### JSON-LD Error

ROBOT encountered a problem while writing the given prefixes to JSON-LD.

### Missing File Error

The file provided for an input does not exist. Check the path and try again.
4 changes: 4 additions & 0 deletions robot-command/src/main/java/org/obolibrary/robot/Command.java
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,10 @@
* @author <a href="mailto:james@overton.ca">James A. Overton</a>
*/
public interface Command {

String global = "global#";
String missingFileError = global + "MISSING FILE ERROR file '%s' for '%s' does not exist";

/**
* Name of the command.
*
Expand Down
Original file line number Diff line number Diff line change
@@ -1,9 +1,14 @@
package org.obolibrary.robot;

import com.opencsv.CSVParserBuilder;
import com.opencsv.CSVReader;
import com.opencsv.CSVReaderBuilder;
import java.io.*;
import java.util.*;
import org.apache.commons.cli.CommandLine;
import org.apache.commons.cli.Options;
import org.semanticweb.owlapi.model.*;
import org.semanticweb.owlapi.util.DefaultPrefixManager;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import uk.ac.manchester.cs.owlapi.modularity.ModuleType;
Expand Down Expand Up @@ -36,12 +41,17 @@ public class ExtractCommand implements Command {
private static final String invalidMethodError =
NS + "INVALID METHOD ERROR method must be: MIREOT, STAR, TOP, BOT";

/** Error message when a MIREOT option is used for SLME. */
private static final String invalidOptionError =
NS
+ "INVALID OPTION ERROR "
+ "only --term or --term-file can be used to specify extract term(s) "
+ "for STAR, TOP, or BOT";

/** Error message when the source map is not TSV or CSV. */
private static final String invalidSourceMapError =
NS + "INVALID SOURCE MAP ERROR --sources input must be .tsv or .csv";

/** Store the command-line options for the command. */
private Options options;

Expand All @@ -62,6 +72,8 @@ public ExtractCommand() {
o.addOption("b", "branch-from-term", true, "root term of branch to extract");
o.addOption("B", "branch-from-terms", true, "root terms of branches to extract");
o.addOption("c", "copy-ontology-annotations", true, "if true, include ontology annotations");
o.addOption("a", "annotate-with-source", true, "if true, annotate terms with rdfs:isDefinedBy");
o.addOption("s", "sources", true, "specify a mapping file of term to source ontology");
options = o;
}

Expand Down Expand Up @@ -143,6 +155,14 @@ public CommandState execute(CommandState state, String[] args) throws Exception
outputIRI = inputOntology.getOntologyID().getOntologyIRI().orNull();
}

// Determine if terms should be annotated with isDefinedBy
boolean annotateSource = CommandLineHelper.getBooleanValue(line, "annotate-with-source", false);
String sourceMapPath = CommandLineHelper.getOptionalValue(line, "sources");
Map<IRI, IRI> sourceMap = new HashMap<>();
if (sourceMapPath != null) {
sourceMap = getSourceMap(ioHelper, sourceMapPath);
}

// Get method, make sure it has been specified
String method =
CommandLineHelper.getRequiredValue(line, "method", "method of extraction must be specified")
Expand Down Expand Up @@ -199,14 +219,17 @@ public CommandState execute(CommandState state, String[] args) throws Exception
// First check for lower IRIs, upper IRIs can be null or not
if (lowerIRIs != null) {
outputOntologies.add(
MireotOperation.getAncestors(inputOntology, upperIRIs, lowerIRIs, null));
MireotOperation.getAncestors(
inputOntology, upperIRIs, lowerIRIs, null, annotateSource, sourceMap));
// If there are no lower IRIs, there shouldn't be any upper IRIs
} else if (upperIRIs != null) {
throw new IllegalArgumentException(missingLowerTermError);
}
// Check for branch IRIs
if (branchIRIs != null) {
outputOntologies.add(MireotOperation.getDescendants(inputOntology, branchIRIs, null));
outputOntologies.add(
MireotOperation.getDescendants(
inputOntology, branchIRIs, null, annotateSource, sourceMap));
}
}
outputOntology = MergeOperation.merge(outputOntologies);
Expand All @@ -229,7 +252,9 @@ public CommandState execute(CommandState state, String[] args) throws Exception
Set<IRI> terms =
OntologyHelper.filterExistingTerms(
inputOntology, CommandLineHelper.getTerms(ioHelper, line), false);
outputOntology = ExtractOperation.extract(inputOntology, terms, outputIRI, moduleType);
outputOntology =
ExtractOperation.extract(
inputOntology, terms, outputIRI, moduleType, annotateSource, sourceMap);
} else {
throw new Exception(invalidMethodError);
}
Expand All @@ -248,4 +273,61 @@ public CommandState execute(CommandState state, String[] args) throws Exception
state.setOntology(outputOntology);
return state;
}

/**
* Given an IOHelper and the path to a term-to-source map, return a map of term IRI to source IRI.
*
* @param ioHelper IOHelper to handle prefixes
* @param sourceMapPath path of the term-to-source map
* @return map of term IRI to source IRI
* @throws Exception on file reading issue
*/
private static Map<IRI, IRI> getSourceMap(IOHelper ioHelper, String sourceMapPath)
throws Exception {
File sourceMapFile = new File(sourceMapPath);
if (!sourceMapFile.exists()) {
throw new Exception(String.format(missingFileError, sourceMapPath, "--sources"));
}

char separator;
if (sourceMapPath.endsWith(".tsv")) {
separator = '\t';
} else if (sourceMapPath.endsWith(".csv")) {
separator = ',';
} else {
throw new Exception(invalidSourceMapError);
}

DefaultPrefixManager pm = ioHelper.getPrefixManager();

Reader reader = new FileReader(sourceMapFile);
CSVReader csv =
new CSVReaderBuilder(reader)
.withCSVParser(new CSVParserBuilder().withSeparator(separator).build())
.build();
// Skip first line
csv.skip(1);

Map<IRI, IRI> sourceMap = new HashMap<>();
for (String line[] : csv) {
IRI entity = ioHelper.createIRI(line[0]);

// Maybe create a source IRI from a prefix
// Otherwise the full IRI should be provided
IRI source;
String sourceStr = line[1];
String namespace = pm.getPrefix(sourceStr + ":");
if (namespace != null) {
if (namespace.endsWith("_") || namespace.endsWith("#") || namespace.endsWith("/")) {
namespace = namespace.substring(0, namespace.length() - 1);
}
source = IRI.create(namespace.toLowerCase() + ".owl");
} else {
source = IRI.create(sourceStr);
}
sourceMap.put(entity, source);
}

return sourceMap;
}
}
Original file line number Diff line number Diff line change
@@ -1,15 +1,14 @@
package org.obolibrary.robot;

import java.util.HashSet;
import java.util.Map;
import java.util.Set;
import org.semanticweb.owlapi.apibinding.OWLManager;
import org.semanticweb.owlapi.model.IRI;
import org.semanticweb.owlapi.model.OWLEntity;
import org.semanticweb.owlapi.model.OWLOntology;
import org.semanticweb.owlapi.model.OWLOntologyCreationException;
import org.semanticweb.owlapi.model.*;
import org.semanticweb.owlapi.model.parameters.Imports;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import uk.ac.manchester.cs.owl.owlapi.OWLDataFactoryImpl;
import uk.ac.manchester.cs.owlapi.modularity.ModuleType;
import uk.ac.manchester.cs.owlapi.modularity.SyntacticLocalityModuleExtractor;

Expand All @@ -23,6 +22,12 @@ public class ExtractOperation {
/** Logger. */
private static final Logger logger = LoggerFactory.getLogger(ExtractOperation.class);

/** Shared data factory. */
private static OWLDataFactory dataFactory = new OWLDataFactoryImpl();

/** RDFS isDefinedBy annotation property. */
private static OWLAnnotationProperty isDefinedBy = dataFactory.getRDFSIsDefinedBy();

/**
* Extract a set of terms from an ontology using the OWLAPI's SyntacticLocalityModuleExtractor
* (SLME). The input ontology is not changed.
Expand All @@ -37,6 +42,30 @@ public class ExtractOperation {
public static OWLOntology extract(
OWLOntology inputOntology, Set<IRI> terms, IRI outputIRI, ModuleType moduleType)
throws OWLOntologyCreationException {
return extract(inputOntology, terms, outputIRI, moduleType, false, null);
}

/**
* Extract a set of terms from an ontology using the OWLAPI's SyntacticLocalityModuleExtractor
* (SLME). The input ontology is not changed.
*
* @param inputOntology the ontology to extract from
* @param terms a set of IRIs for terms to extract
* @param outputIRI the OntologyIRI of the new ontology
* @param moduleType determines the type of extraction; defaults to STAR
* @param annotateSource if true, annotate copied classes with rdfs:isDefinedBy
* @param sourceMap map of term IRI to source IRI
* @return a new ontology (with a new manager)
* @throws OWLOntologyCreationException on any OWLAPI problem
*/
public static OWLOntology extract(
OWLOntology inputOntology,
Set<IRI> terms,
IRI outputIRI,
ModuleType moduleType,
boolean annotateSource,
Map<IRI, IRI> sourceMap)
throws OWLOntologyCreationException {
logger.debug("Extracting...");

Set<OWLEntity> entities = new HashSet<>();
Expand All @@ -53,7 +82,58 @@ public static OWLOntology extract(
new SyntacticLocalityModuleExtractor(
inputOntology.getOWLOntologyManager(), inputOntology, type);

return OWLManager.createOWLOntologyManager()
.createOntology(extractor.extract(entities), outputIRI);
OWLOntologyManager manager = OWLManager.createOWLOntologyManager();
OWLOntology outputOntology = manager.createOntology(extractor.extract(entities), outputIRI);
// Maybe annotate entities with rdfs:isDefinedBy
if (annotateSource) {
Set<OWLAnnotationAxiom> sourceAxioms = new HashSet<>();
for (OWLEntity entity : OntologyHelper.getEntities(outputOntology)) {
// Check if rdfs:isDefinedBy already exists
Set<OWLAnnotationValue> existingValues =
OntologyHelper.getAnnotationValues(outputOntology, isDefinedBy, entity.getIRI());
if (existingValues == null || existingValues.size() == 0) {
// If not, add it
sourceAxioms.add(getIsDefinedBy(entity, sourceMap));
}
}
manager.addAxioms(outputOntology, sourceAxioms);
}

return outputOntology;
}

/**
* Given an OWLEntity, return an OWLAnnotationAssertionAxiom indicating the source ontology with
* rdfs:isDefinedBy.
*
* @param entity entity to get source of
* @return OWLAnnotationAssertionAxiom with rdfs:isDefinedBy as the property
*/
protected static OWLAnnotationAxiom getIsDefinedBy(OWLEntity entity, Map<IRI, IRI> sourceMap) {
String iri = entity.getIRI().toString();
IRI base;
if (sourceMap != null && sourceMap.containsKey(entity.getIRI())) {
// IRI exists in the prefixes
base = sourceMap.get(entity.getIRI());
} else {
// Brute force edit the IRI string
// Warning - this may not work with non-OBO Foundry terms, depending on the IRI format!
if (iri.contains("#")) {
if (iri.contains(".owl#")) {
String baseStr = iri.substring(0, iri.lastIndexOf("#")).toLowerCase();
base = IRI.create(baseStr);
} else {
String baseStr = iri.substring(0, iri.lastIndexOf("#")).toLowerCase() + ".owl";
base = IRI.create(baseStr);
}
} else if (iri.contains("_")) {
String baseStr = iri.substring(0, iri.lastIndexOf("_")).toLowerCase() + ".owl";
base = IRI.create(baseStr);
} else {
String baseStr = iri.substring(0, iri.lastIndexOf("/")).toLowerCase() + ".owl";
base = IRI.create(baseStr);
}
}
return dataFactory.getOWLAnnotationAssertionAxiom(isDefinedBy, entity.getIRI(), base);
}
}
Loading

0 comments on commit 593f5a5

Please sign in to comment.