-
Notifications
You must be signed in to change notification settings - Fork 5
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
amidict submits SPARQL and creates dictionaries
- Loading branch information
Showing
6 changed files
with
161 additions
and
42 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,7 +1,9 @@ | ||
## Summary | ||
|
||
This is a template and should be replaced by actual release notes... | ||
Enhancement to Dictionaries | ||
|
||
## Changes in this Release | ||
- First Change | ||
- Second Change | ||
- amidict can submit SPARQL queries to Wikidata | ||
- amidict translates wikidataAltLabel to synonyms | ||
- DSL for transforming dictionaries (EXTRACT, DELETE, etc.) | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
70 changes: 70 additions & 0 deletions
70
src/main/java/org/contentmine/ami/tools/dictionary/DictionaryTransformer.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,70 @@ | ||
package org.contentmine.ami.tools.dictionary; | ||
|
||
import java.util.List; | ||
import java.util.regex.Matcher; | ||
import java.util.regex.Pattern; | ||
|
||
import nu.xom.Attribute; | ||
import nu.xom.Element; | ||
|
||
/** transform dictionary | ||
* | ||
* @author pm286 | ||
* | ||
*/ | ||
public class DictionaryTransformer { | ||
|
||
private static Pattern TRANSFORM_PATTERN = Pattern.compile("(EXTRACT|JUNK)\\(([^,]+),(.*)\\)"); | ||
|
||
private String operation; | ||
private String variableName; | ||
private Pattern pattern; | ||
private String newVariableName; | ||
|
||
public DictionaryTransformer(String newVariableName, String rawTransform) { | ||
this.newVariableName = newVariableName; | ||
readAndParse(rawTransform); | ||
} | ||
|
||
private void readAndParse(String rawTransform) { | ||
// System.out.println(TRANSFORM_PATTERN); | ||
Matcher matcher = TRANSFORM_PATTERN.matcher(rawTransform); | ||
if (!matcher.matches()) { | ||
throw new RuntimeException("bad rawTransform "+rawTransform); | ||
} | ||
operation = matcher.group(1); | ||
variableName = matcher.group(2); | ||
String regexString = matcher.group(3); | ||
pattern = Pattern.compile(regexString); | ||
} | ||
|
||
public String getOperation() { | ||
return operation; | ||
} | ||
|
||
public String getVariableName() { | ||
return variableName; | ||
} | ||
|
||
public Pattern getPattern() { | ||
return pattern; | ||
} | ||
|
||
public void transform(SimpleDictionary simpleDictionary) { | ||
List<Element> entryList = simpleDictionary.getEntryList(); | ||
for (Element entry : entryList) { | ||
// System.out.println(variableName + ": "+entry.toXML()); | ||
String value = entry.getAttributeValue(variableName); | ||
// System.out.println("pattern: "+pattern+" value: "+value); | ||
Matcher matcher = pattern.matcher(value); | ||
if (matcher.matches()) { | ||
String newValue = matcher.group(1); | ||
// System.out.println("g "+newValue); | ||
entry.addAttribute(new Attribute(newVariableName, newValue)); | ||
System.out.println(entry.toXML()); | ||
} | ||
} | ||
} | ||
|
||
|
||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
67 changes: 33 additions & 34 deletions
67
src/test/resources/org/contentmine/ami/dictionary/disease4.expected.xml
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,35 +1,34 @@ | ||
<?xml version="1.0" encoding="UTF-8"?> | ||
<dictionary title="disease4"> | ||
<entry description="injury caused by a bite from a snake" name="snakebite" term="snakebite" wikidata="Q68854" wikidataAltLabel="snake bite, snake bites, snake envenomation, snake envenoming" wikipedia="Snakebite"> | ||
<synonym>snake bite</synonym> | ||
<synonym>snake bites</synonym> | ||
<synonym>snake envenomation</synonym> | ||
<synonym>snake envenoming</synonym> | ||
</entry> | ||
<entry description="Human disease: infectious thrombophlebitis of the internal jugular vein" name="Lemierre's syndrome" term="Lemierre's syndrome" wikidata="Q72000" wikidataAltLabel="Lemierre syndrome, acute sore throat, human necrobacillosis, postanginal sepsis" wikipedia="Lemierre%27s_syndrome"> | ||
<synonym>Lemierre syndrome</synonym> | ||
<synonym>acute sore throat</synonym> | ||
<synonym>human necrobacillosis</synonym> | ||
<synonym>postanginal sepsis</synonym> | ||
</entry> | ||
<entry description="endocarditis that results from the deposition of small sterile vegetations on valve leaflets" name="marantic endocarditis" term="marantic endocarditis" wikidata="Q73518" wikidataAltLabel="non-bacterial thrombotic endocarditis, non-infective endocarditis, nonbacterial thrombotic endocarditis" wikipedia="Nonbacterial_thrombotic_endocarditis"> | ||
<synonym>non-bacterial thrombotic endocarditis</synonym> | ||
<synonym>non-infective endocarditis</synonym> | ||
<synonym>nonbacterial thrombotic endocarditis</synonym> | ||
</entry> | ||
<entry description="type of inflammatory bowel disease" name="Crohn's disease" term="Crohn's disease" wikidata="Q1472" wikidataAltLabel="Crohn disease, Crohn, Crohn's disease of colon, Crohn's disease of colon (disorder), Crohn's disease of large bowel, Granulomatous Colitis, Pediatric Crohn's disease, regional colitis, regional enteritis, regional enteritis of small intestine with large intestine, regional enteritis of the large bowel, regional Ileitis, regional ileocolitis" wikipedia="Crohn%27s_disease"> | ||
<synonym>Crohn disease</synonym> | ||
<synonym>Crohn</synonym> | ||
<synonym>Crohn's disease of colon</synonym> | ||
<synonym>Crohn's disease of colon (disorder)</synonym> | ||
<synonym>Crohn's disease of large bowel</synonym> | ||
<synonym>Granulomatous Colitis</synonym> | ||
<synonym>Pediatric Crohn's disease</synonym> | ||
<synonym>regional colitis</synonym> | ||
<synonym>regional enteritis</synonym> | ||
<synonym>regional enteritis of small intestine with large intestine</synonym> | ||
<synonym>regional enteritis of the large bowel</synonym> | ||
<synonym>regional Ileitis</synonym> | ||
<synonym>regional ileocolitis</synonym> | ||
</entry> | ||
</dictionary> | ||
<entry description="injury caused by a bite from a snake" name="snakebite" term="snakebite" wikidataAltLabel="snake bite, snake bites, snake envenomation, snake envenoming" wikidataURL="http://www.wikidata.org/entity/Q68854" wikipediaURL="https://en.wikipedia.org/wiki/Snakebite" wikidataID="Q68854"> | ||
<synonym>snake bite</synonym> | ||
<synonym>snake bites</synonym> | ||
<synonym>snake envenomation</synonym> | ||
<synonym>snake envenoming</synonym> | ||
</entry> | ||
<entry description="Human disease: infectious thrombophlebitis of the internal jugular vein" name="Lemierre's syndrome" term="Lemierre's syndrome" wikidataAltLabel="Lemierre syndrome, acute sore throat, human necrobacillosis, postanginal sepsis" wikidataURL="http://www.wikidata.org/entity/Q72000" wikipediaURL="https://en.wikipedia.org/wiki/Lemierre%27s_syndrome" wikidataID="Q72000"> | ||
<synonym>Lemierre syndrome</synonym> | ||
<synonym>acute sore throat</synonym> | ||
<synonym>human necrobacillosis</synonym> | ||
<synonym>postanginal sepsis</synonym> | ||
</entry> | ||
<entry description="endocarditis that results from the deposition of small sterile vegetations on valve leaflets" name="marantic endocarditis" term="marantic endocarditis" wikidataAltLabel="non-bacterial thrombotic endocarditis, non-infective endocarditis, nonbacterial thrombotic endocarditis" wikidataURL="http://www.wikidata.org/entity/Q73518" wikipediaURL="https://en.wikipedia.org/wiki/Nonbacterial_thrombotic_endocarditis" wikidataID="Q73518"> | ||
<synonym>non-bacterial thrombotic endocarditis</synonym> | ||
<synonym>non-infective endocarditis</synonym> | ||
<synonym>nonbacterial thrombotic endocarditis</synonym> | ||
</entry> | ||
<entry description="type of inflammatory bowel disease" name="Crohn's disease" term="Crohn's disease" wikidataAltLabel="Crohn disease, Crohn, Crohn's disease of colon, Crohn's disease of colon (disorder), Crohn's disease of large bowel, Granulomatous Colitis, Pediatric Crohn's disease, regional colitis, regional enteritis, regional enteritis of small intestine with large intestine, regional enteritis of the large bowel, regional Ileitis, regional ileocolitis" wikidataURL="http://www.wikidata.org/entity/Q1472" wikipediaURL="https://en.wikipedia.org/wiki/Crohn%27s_disease" wikidataID="Q1472"> | ||
<synonym>Crohn disease</synonym> | ||
<synonym>Crohn</synonym> | ||
<synonym>Crohn's disease of colon</synonym> | ||
<synonym>Crohn's disease of colon (disorder)</synonym> | ||
<synonym>Crohn's disease of large bowel</synonym> | ||
<synonym>Granulomatous Colitis</synonym> | ||
<synonym>Pediatric Crohn's disease</synonym> | ||
<synonym>regional colitis</synonym> | ||
<synonym>regional enteritis</synonym> | ||
<synonym>regional enteritis of small intestine with large intestine</synonym> | ||
<synonym>regional enteritis of the large bowel</synonym> | ||
<synonym>regional Ileitis</synonym> | ||
<synonym>regional ileocolitis</synonym> | ||
</entry> | ||
</dictionary> |