Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Issue 2 #51

Open
wants to merge 5 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
25 changes: 25 additions & 0 deletions pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -220,6 +220,16 @@
<!-- END added for deeplearning4j -->
<!-- ............................ -->
</plugins>

<extensions>
<!-- Required for metamaplite, non-FOSS, deployed on a private Maven repository -->
<extension>
<groupId>org.springframework.build</groupId>
<artifactId>aws-maven</artifactId>
<version>5.0.0.RELEASE</version>
</extension>
</extensions>

</build>

<!-- BEGIN added for deeplearning4j -->
Expand Down Expand Up @@ -387,5 +397,20 @@
<!--<artifactId>snowball-stemmer</artifactId>-->
<!--<version>1.3.0.581.1</version>-->
<!--</dependency>-->
<dependency>
<!-- MetaMapLite is not FOSS -->
<groupId>gov.nih.nlm.nls</groupId>
<artifactId>metamaplite</artifactId>
<version>3.1-SNAPSHOT</version>
</dependency>
</dependencies>

<repositories>
<!-- Private Maven repository used to host MetaMapLite -->
<repository>
<id>maven.imi.medunigraz.at</id>
<name>S3 Maven Repository</name>
<url>s3://maven.imi.medunigraz.at/release</url>
</repository>
</repositories>
</project>
26 changes: 16 additions & 10 deletions src/main/java/at/medunigraz/imi/bst/n2c2/model/Patient.java
Original file line number Diff line number Diff line change
@@ -1,18 +1,12 @@
package at.medunigraz.imi.bst.n2c2.model;


import at.medunigraz.imi.bst.n2c2.preprocess.conceptmapper.MetaMapLiteFacade;

import java.text.ParseException;
import java.text.SimpleDateFormat;
import java.time.Instant;
import java.time.LocalDate;
import java.time.Period;
import java.time.ZoneId;
import java.time.ZonedDateTime;
import java.util.ArrayList;
import java.util.Calendar;
import java.util.Date;
import java.util.HashMap;
import java.util.Map;
import java.time.*;
import java.util.*;


public class Patient {
Expand Down Expand Up @@ -47,6 +41,18 @@ public String getText() {
public Eligibility getEligibility(Criterion criterion) {
return criteria.get(criterion);
}

public List<String> getCUIs() {
return MetaMapLiteFacade.getInstance().map(getText());
}

public Set<String> getUniqueCUIs() {
return MetaMapLiteFacade.getInstance().uniqueMap(getText());
}

public String getAnnotatedText() {
return MetaMapLiteFacade.getInstance().annotate(getText());
}

/**
* getAllVisits() returns all the visits of one patient as
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
package at.medunigraz.imi.bst.n2c2.preprocess.conceptmapper;

import java.util.HashSet;
import java.util.List;
import java.util.Set;

/**
* @author Michel Oleynik <michel.oleynik@stud.medunigraz.at>
* @link https://github.com/michelole/reassess/blob/master/src/main/java/at/medunigraz/imi/reassess/conceptmapper/ConceptMapper.java
*/
public interface ConceptMapper {
List<String> map(String text);

String annotate(String text);

default Set<String> uniqueMap(String text) {
return new HashSet<String>(map(text));
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,170 @@
package at.medunigraz.imi.bst.n2c2.preprocess.conceptmapper;

import bioc.BioCDocument;
import gov.nih.nlm.nls.metamap.document.FreeText;
import gov.nih.nlm.nls.metamap.lite.types.ConceptInfo;
import gov.nih.nlm.nls.metamap.lite.types.Entity;
import gov.nih.nlm.nls.metamap.lite.types.Ev;
import gov.nih.nlm.nls.ner.MetaMapLite;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;

import java.io.File;
import java.io.FileReader;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import java.util.Properties;

/**
* Facade for MetaMapLite (https://metamap.nlm.nih.gov/MetaMapLite.shtml).
* Requires an UMLS license.
*
* @author Michel Oleynik <michel.oleynik@stud.medunigraz.at>
* @link https://github.com/michelole/reassess/blob/master/src/main/java/at/medunigraz/imi/reassess/conceptmapper/metamap/MetaMapLiteFacade.java
*/
public class MetaMapLiteFacade implements ConceptMapper {

private static final Logger LOG = LogManager.getLogger();

private static MetaMapLiteFacade instance = null;
private static Properties properties;
private MetaMapLite metaMapLiteInst;

private MetaMapLiteFacade() {
LOG.info("Building MetaMap instance...");

initProperties();

try {
metaMapLiteInst = new MetaMapLite(properties);
} catch (Exception e) {
// TODO Auto-generated catch block
e.printStackTrace();
}

LOG.info("Building MetaMap instance finished.");
}

public static MetaMapLiteFacade getInstance() {
if (instance == null) {
instance = new MetaMapLiteFacade();
}
return instance;
}

private static void initProperties() {
properties = MetaMapLite.getDefaultConfiguration();

String configPropertyFilename = System.getProperty("metamaplite.property.file",
MetaMapLiteFacade.class.getResource("/metamaplite.properties").getFile());

try {
properties.load(new FileReader(configPropertyFilename));
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}

MetaMapLite.expandModelsDir(properties);
MetaMapLite.expandIndexDir(properties);
}

public static boolean isModelsDirValid() {
initProperties();
return (new File(properties.getProperty("opennlp.models.directory"))).canRead();
}

/*
* (non-Javadoc)
* @see at.medunigraz.imi.reassess.conceptmapper.ConceptMapper#map(java.lang.String)
*/
public List<String> map(String text) {
List<String> ret = new ArrayList<String>();

List<Entity> entityList = process(text);

for (Entity entity : entityList) {
// TODO Should submatches be skipped as in annotate()?
for (Ev ev : entity.getEvSet()) {
ret.add(ev.getConceptInfo().getCUI());
LOG.trace(ev);
}
}

return ret;
}

private List<Entity> process(String text) {
int length = text.length();
LOG.debug("Processing \"{}\"...", text.substring(0, Math.min(length, 20)));

long start = System.currentTimeMillis();

BioCDocument document = FreeText.instantiateBioCDocument(text);
document.setID("1");
List<BioCDocument> documentList = new ArrayList<BioCDocument>();
documentList.add(document);

List<Entity> entityList = null;
try {
entityList = metaMapLiteInst.processDocumentList(documentList);
} catch (Exception e) {
// TODO Auto-generated catch block
e.printStackTrace();
}

long end = System.currentTimeMillis();

float duration = (end - start + 1) / 1000f;

LOG.debug("Processed {} chars in {} sec ({} chars/sec).", length, duration, length / duration);

return entityList;
}

/*
* (non-Javadoc)
* @see at.medunigraz.imi.reassess.conceptmapper.ConceptMapper#annotate(java.lang.String)
*/
public String annotate(String text) {
List<Entity> entityList = process(text);

int length = text.length();

StringBuilder sb = new StringBuilder(length);

int i = 0;
for (Entity entity : entityList) {
int start = entity.getStart();

// Skip submatches
if (start < i) {
continue;
}

String matched = entity.getMatchedText();

sb.append(text, i, start);
sb.append("<");
sb.append(matched);
sb.append("|");

for (Ev ev : entity.getEvSet()) {
ConceptInfo conceptInfo = ev.getConceptInfo();
sb.append(conceptInfo.getCUI());
sb.append(":");
sb.append(conceptInfo.getPreferredName());
sb.append("|");
}
sb.append(">");

i = entity.getStart() + entity.getLength();
}

sb.append(text, i, length);

return sb.toString();
}

}
8 changes: 8 additions & 0 deletions src/main/resources/metamaplite.properties
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
opennlp.models.directory=data/models
metamaplite.index.directory=data/ivf/strict
metamaplite.excluded.termsfile=data/specialterms.txt
metamaplite.segmentation.method=BLANKLINES
metamaplite.sourceset=all
metamaplite.semanticgroup=all
#metamaplite.sourceset = SNOMEDCT_US
#metamaplite.semanticgroup = neop
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,6 @@ public class ConfigTest {

@Test
public void getSVMCost() {
assertEquals(1, Config.SVM_COST_MAKES_DECISIONS, 0.00001);
assertEquals("Your config.properties was not properly generated. Running `mvn clean test` may fix it.", 1, Config.SVM_COST_MAKES_DECISIONS, 0.00001);
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,66 @@
package at.medunigraz.imi.bst.n2c2.integration;

import at.medunigraz.imi.bst.n2c2.dao.PatientDAO;
import at.medunigraz.imi.bst.n2c2.model.Patient;
import at.medunigraz.imi.bst.n2c2.preprocess.conceptmapper.MetaMapLiteFacade;
import org.junit.Assume;
import org.junit.Before;
import org.junit.Test;
import org.junit.experimental.categories.Category;
import org.xml.sax.SAXException;

import java.io.File;
import java.io.IOException;
import java.util.ArrayList;
import java.util.HashSet;
import java.util.List;
import java.util.Set;

import static org.junit.Assert.assertTrue;

@Category(IntegrationTest.class)
public class PatientDAOConceptMapperIntegrationTest {

private static final File SAMPLE = new File(PatientDAOConceptMapperIntegrationTest.class.getResource("/gold-standard/sample.xml").getPath());
private Patient patient;

public PatientDAOConceptMapperIntegrationTest() throws IOException, SAXException {
patient = new PatientDAO().fromXML(SAMPLE);
}

@Before
public void setUp() {
Assume.assumeTrue(MetaMapLiteFacade.isModelsDirValid());
}

@Test
public void getCUIs() {
List<String> expected = new ArrayList<>();
expected.add("C0043094"); // Weight Gain
expected.add("C0013404"); // Dyspnea
expected.add("C0020580"); // Hypesthesia

List<String> actual = patient.getCUIs();
assertTrue(actual.containsAll(expected));
}

@Test
public void getUniqueCUIs() {
Set<String> expected = new HashSet<>();
expected.add("C0043094"); // Weight Gain
expected.add("C0013404"); // Dyspnea
expected.add("C0020580"); // Hypesthesia

Set<String> actual = patient.getUniqueCUIs();
assertTrue(actual.containsAll(expected));
}

@Test
public void getAnnotatedText() {
String expected = "<Patient|C0030705:Patients|> is concerned about <weight gain|C0043094:Weight Gain|>";

String actual = patient.getAnnotatedText();
assertTrue(actual.contains(expected));
}

}
Loading