From 603b9dd5b1fc54b09db1e371338fca44adc25ced Mon Sep 17 00:00:00 2001 From: Xiang Li Date: Thu, 31 Aug 2023 02:50:02 -0400 Subject: [PATCH] Add genomicLocationExplanation field to add location harmonization info --- .../annotation/NotationConverter.java | 88 ++++++++++++++++++- .../genome_nexus/model/VariantAnnotation.java | 9 ++ .../GenomicLocationAnnotationServiceImpl.java | 13 ++- 3 files changed, 107 insertions(+), 3 deletions(-) diff --git a/component/src/main/java/org/cbioportal/genome_nexus/component/annotation/NotationConverter.java b/component/src/main/java/org/cbioportal/genome_nexus/component/annotation/NotationConverter.java index 998ccad5..9e6d7ae5 100644 --- a/component/src/main/java/org/cbioportal/genome_nexus/component/annotation/NotationConverter.java +++ b/component/src/main/java/org/cbioportal/genome_nexus/component/annotation/NotationConverter.java @@ -92,7 +92,7 @@ public String genomicToEnsemblRestRegion(String genomicLocation) { * prefix. (TODO: not sure if this is always a good idea) * 2. Normalize chromsome names. */ - public GenomicLocation normalizeGenomicLocation(GenomicLocation genomicLocation) { +public GenomicLocation normalizeGenomicLocation(GenomicLocation genomicLocation) { GenomicLocation normalizedGenomicLocation = new GenomicLocation(); // if original input is set in the incoming genomic location object then use the same value // for the normalized genomic location object returned, otherwise set it to the @@ -346,4 +346,90 @@ public String longestCommonPrefix(String str1, String str2) { } return str1; } + + @Nullable + public String getGenomicLocationExplanation (GenomicLocation genomicLocation) { + if (genomicLocation == null) { + return null; + } + + StringBuilder explanation = new StringBuilder(); + GenomicLocation normalizedGenomicLocation = normalizeGenomicLocation(genomicLocation); + + Integer start = genomicLocation.getStart(); + Integer end = genomicLocation.getEnd(); + String ref = genomicLocation.getReferenceAllele().trim(); + String var = genomicLocation.getVariantAllele().trim(); + String commonBases = longestCommonPrefix(ref, var); + Integer normalizedStart = normalizedGenomicLocation.getStart(); + Integer normalizedEnd = normalizedGenomicLocation.getEnd(); + String normalizedRef = normalizedGenomicLocation.getReferenceAllele().trim(); + String normalizedVar = normalizedGenomicLocation.getVariantAllele().trim(); + + // start + if (!start.equals(normalizedStart)) { + explanation.append(String.format("Start position changes from %d to %d is attributed to the presence of common bases %s. ", start, normalizedStart, commonBases)); + } + + // end + if (!end.equals(normalizedEnd)) { + if (normalizedRef.equals("-") || normalizedRef.length() == 0 || normalizedRef.equals("NA") || normalizedRef.contains("--")) { + /* + Process Insertion end position + Example insertion: 17 36002277 36002278 - A + */ + explanation.append(String.format("End position changes from %d to %d, end position should equal to (start + 1) to indicate the location of insertion. ", end, normalizedEnd)); + } else if (normalizedVar.equals("-") || normalizedVar.length() == 0 || normalizedVar.equals("NA") || normalizedVar.contains("--")) { + if (normalizedRef.length() == 1) { + /* + Process Deletion (single positon) end position + Example deletion: 13 32914438 32914438 T - + */ + explanation.append(String.format("End position changes from %d to %d, end position should equal to start position for single nucleotide deletion variants. ", end, normalizedEnd)); + } + else { + /* + Process Deletion (multiple postion) end position + Example deletion: 1 206811015 206811016 AC - + */ + explanation.append(String.format("End position changes from %d to %d, end position should be the position of last deleted nucleotide. ", end, normalizedEnd)); + } + } else if (normalizedRef.length() > 1 && normalizedVar.length() >= 1) { + /* + Process ONP (multiple deletion insertion) end position + Example INDEL : 2 216809708 216809709 CA T + */ + explanation.append(String.format("End position changes from %d to %d, end position should be the position of last deleted nucleotide. ", end, normalizedEnd)); + } else if (normalizedRef.length() == 1 && normalizedVar.length() > 1) { + /* + Process ONP (single deletion insertion) end position + Example INDEL : 17 7579363 7579363 A TTT + */ + explanation.append(String.format("End position changes from %d to %d, end position should be the position of last deleted nucleotide. ", end, normalizedEnd)); + } else { + /* + Process SNV end position + Example SNP : 2 216809708 216809708 C T + */ + explanation.append(String.format("End position changes from %d to %d, end position should equal to start position for SNV variants", end, normalizedEnd)); + } + } + + // ref + if (!ref.equals(normalizedRef)) { + explanation.append(String.format("Reference allele changes from %s to %s is attributed to the presence of common bases %s. ", ref, normalizedRef.length() > 0 ? normalizedRef : "-", commonBases)); + } + + // var + if (!var.equals(normalizedVar)) { + explanation.append(String.format("Variant allele changes from %s to %s is attributed to the presence of common bases %s. ", var, normalizedVar.length() > 0 ? normalizedVar : "-", commonBases)); + } + + return explanation.length() > 0 ? explanation.toString().trim() : null; + } + + @Nullable + public String getGenomicLocationExplanation (String genomicLocation) { + return this.getGenomicLocationExplanation(this.parseGenomicLocation(genomicLocation)); + } } diff --git a/model/src/main/java/org/cbioportal/genome_nexus/model/VariantAnnotation.java b/model/src/main/java/org/cbioportal/genome_nexus/model/VariantAnnotation.java index f177fbba..7ac7f8fb 100644 --- a/model/src/main/java/org/cbioportal/genome_nexus/model/VariantAnnotation.java +++ b/model/src/main/java/org/cbioportal/genome_nexus/model/VariantAnnotation.java @@ -76,6 +76,7 @@ public class VariantAnnotation private SignalAnnotation signalAnnotation; private String originalVariantQuery; private Map dynamicProps; + private String genomicLocationExplanation; public VariantAnnotation() { @@ -354,4 +355,12 @@ public Map getDynamicProps() { return this.dynamicProps; } + + public String getGenomicLocationExplanation() { + return genomicLocationExplanation; + } + + public void setGenomicLocationExplanation(String genomicLocationExplanation) { + this.genomicLocationExplanation = genomicLocationExplanation; + } } diff --git a/service/src/main/java/org/cbioportal/genome_nexus/service/internal/GenomicLocationAnnotationServiceImpl.java b/service/src/main/java/org/cbioportal/genome_nexus/service/internal/GenomicLocationAnnotationServiceImpl.java index 79e5772d..ddd299b3 100644 --- a/service/src/main/java/org/cbioportal/genome_nexus/service/internal/GenomicLocationAnnotationServiceImpl.java +++ b/service/src/main/java/org/cbioportal/genome_nexus/service/internal/GenomicLocationAnnotationServiceImpl.java @@ -47,6 +47,7 @@ import org.springframework.beans.factory.annotation.*; import java.util.*; +import java.util.stream.Collectors; @Service public class GenomicLocationAnnotationServiceImpl implements GenomicLocationAnnotationService @@ -58,7 +59,6 @@ public class GenomicLocationAnnotationServiceImpl implements GenomicLocationAnno private final VariantAnnotationService variantAnnotationService; private final GenomicLocationToVariantFormat genomicLocationToVariantFormat; private final GenomicLocationStringToVariantFormat genomicLocationStringToVariantFormat; - private final GenomicLocationsToVariantFormats genomicLocationsToVariantFormats; @Autowired @@ -82,7 +82,6 @@ public GenomicLocationAnnotationServiceImpl(CachedVariantRegionAnnotationFetcher this.genomicLocationToVariantFormat = notationConverter::genomicToHgvs; this.genomicLocationStringToVariantFormat = notationConverter::genomicToHgvs; this.genomicLocationsToVariantFormats = notationConverter::genomicToHgvs; - } } @@ -93,6 +92,7 @@ public VariantAnnotation getAnnotation(GenomicLocation genomicLocation) VariantAnnotation variantAnnotation = this.variantAnnotationService.getAnnotation(this.genomicLocationToVariantFormat.convert(genomicLocation)); genomicLocation.setOriginalInput(genomicLocation.toString()); variantAnnotation.setOriginalVariantQuery(genomicLocation.getOriginalInput()); + variantAnnotation.setGenomicLocationExplanation(this.notationConverter.getGenomicLocationExplanation(genomicLocation)); return variantAnnotation; } @@ -130,6 +130,10 @@ public List getAnnotations(List genomicLocat } } }); + variantAnnotations.stream().map((VariantAnnotation variantAnnotation) -> { + variantAnnotation.setGenomicLocationExplanation(this.notationConverter.getGenomicLocationExplanation(variantAnnotation.getOriginalVariantQuery())); + return variantAnnotation; + }).collect(Collectors.toList()); return variantAnnotations; } @@ -146,6 +150,7 @@ public VariantAnnotation getAnnotation(String genomicLocation, token, fields); variantAnnotation.setOriginalVariantQuery(genomicLocation); + variantAnnotation.setGenomicLocationExplanation(this.notationConverter.getGenomicLocationExplanation(genomicLocation)); return variantAnnotation; } @@ -180,6 +185,10 @@ public List getAnnotations(List genomicLocat } } }); + variantAnnotations.stream().map((VariantAnnotation variantAnnotation) -> { + variantAnnotation.setGenomicLocationExplanation(this.notationConverter.getGenomicLocationExplanation(variantAnnotation.getOriginalVariantQuery())); + return variantAnnotation; + }).collect(Collectors.toList()); return variantAnnotations; }