Skip to content

Commit

Permalink
Browse files Browse the repository at this point in the history
  • Loading branch information
charlesshale committed Mar 20, 2020
2 parents fbfa359 + 4eb61a5 commit 69ba1ed
Show file tree
Hide file tree
Showing 6 changed files with 78 additions and 66 deletions.
Original file line number Diff line number Diff line change
@@ -1,13 +1,14 @@
primaryTumorLocation,doids
Adrenal,3953
Anus,14110
Biliary,4607;4947
Bone/Soft tissue,201;1115;3347
Bone/Soft tissue,201;1115;3347; 9253
Breast,1612
Colon/Rectum,9256;219
CUP,UNMAPPED
Colon/Rectum,9256
Double primary,UNMAPPED
Esophagus,5041;4944
Eye,2174
GI-tract,3119
Head and neck,11934
Kidney,4450;263
Liver,3571
Expand All @@ -16,7 +17,7 @@ Lymphoid,60058
Mesothelioma,1790
Myeloid,70004
Nervous system,3093;1319
NET,169;1800;4434;1798;5410
Neuroendocrine,169;1800;4434;1798;5410;0050872
Ovary,2394
Pancreas,1793
Penile,11615
Expand All @@ -30,4 +31,6 @@ Thyroid,1781
Unknown,UNMAPPED
Urinary tract,3996
Uterus,363;1380
Vulva,1245;119
Vagina,119
Vascular,175
Vulva,1245
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ public class ViccAmpsDelExtractorTestApplication {
public static void main(String[] args) throws IOException, InterruptedException {
String viccJsonPath = System.getProperty("user.home") + "/hmf/projects/vicc/all.json";

String source = "oncokb";
String source = "civic";
LOGGER.info("Reading VICC json from {} with source '{}'", viccJsonPath, source);
List<ViccEntry> viccEntries = ViccJsonReader.readSingleKnowledgebase(viccJsonPath, source);
LOGGER.info("Read {} entries", viccEntries.size());
Expand All @@ -52,7 +52,7 @@ public static void main(String[] args) throws IOException, InterruptedException
List<EventType> eventType = EventTypeAnalyzer.determineEventType(viccEntry);

for (EventType type : eventType) {
// LOGGER.info("gene: " + type.gene() + " name: " + type.name() + " eventMap: " + type.eventMap() + " source: " + type.source());
LOGGER.info("gene: " + type.gene() + " name: " + type.name() + " eventMap: " + type.eventMap() + " source: " + type.source());
// Generating known events
//TODO: map every genomic event to one object
//TODO: if combined event use single event for determine known events
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,6 @@ public static List<EventType> determineEventType(@NotNull ViccEntry viccEntry) {
}

eventMap.put(gene, Lists.newArrayList(name));
LOGGER.info(eventMap);

if (eventMap.isEmpty()) {
LOGGER.warn("Skipping feature interpretation of '{}' on gene '{}' with biomarker type '{}' on source '{}' ",
Expand Down Expand Up @@ -110,16 +109,15 @@ public static List<EventType> determineEventType(@NotNull ViccEntry viccEntry) {
if (name.split(" ", 2).length == 1 && gene.contains("-")) {
eventInfo = FUSION_PAIR;
eventMap.put(gene, Lists.newArrayList(eventInfo));
LOGGER.info(eventMap);
} else {
eventInfo = name.split(" ", 2)[1];
if (eventInfo.contains("fusion") || eventInfo.contains("Fusion") ) {
if (eventInfo.contains("fusion") || eventInfo.contains("Fusion")) {
if (gene.contains("-")) {
eventInfo = FUSION_PAIR;
} else {
eventInfo = FUSION_PROMISCUOUS;
}
}
}

if (eventInfo.equals(".")) {
eventInfo = ONCOGENIC_MUTATION;
Expand Down Expand Up @@ -165,22 +163,25 @@ public static List<EventType> determineEventType(@NotNull ViccEntry viccEntry) {
combinedEvent = true;

if (eventMap.size() == 0) {
eventMap.put(fusion, Lists.newArrayList("Fusion"));
eventMap.put(fusion, Lists.newArrayList(FUSION_PAIR));
if (eventMap.containsKey(geneVariant)) {
eventMap.put(geneVariant, Lists.newArrayList("Fusion", variant));
eventMap.put(geneVariant, Lists.newArrayList(FUSION_PAIR, variant));
} else {
eventMap.put(fusion, Lists.newArrayList("Fusion"));
eventMap.put(fusion, Lists.newArrayList(FUSION_PAIR));
eventMap.put(geneVariant, Lists.newArrayList(variant));
}
}
} else if (combinedEventConvertToSingleEvent.length >= 2) {
LOGGER.warn("This event has more events, which is not interpretated!");
}
} else if (name.contains("-")) {
eventMap.put(name, Lists.newArrayList("Fusion"));
eventMap.put(name, Lists.newArrayList(FUSION_PAIR));
} else if (name.equals("TRUNCATING FUSION")) {
eventMap.put(gene, Lists.newArrayList(name));
} else if (name.contains("FUSION") || name.contains("FUSIONS")) {
eventMap.put(gene, Lists.newArrayList(FUSION_PROMISCUOUS));
} else {
if (name.contains("+")) {
LOGGER.info("combined: " + name);

combinedEvent = true;
String[] combinedEventConvertToSingleEvent = name.replace("+", " ").split(" ", 2);
Expand All @@ -201,6 +202,14 @@ public static List<EventType> determineEventType(@NotNull ViccEntry viccEntry) {
eventMap.put(gene, Lists.newArrayList(name));
}
}
} else if (name.contains("+") && !name.contains("c.") && !name.contains("C.")) {
combinedEvent = true;
String[] combinedEventConvertToSingleEvent = name.split("\\+", 2);
String event1 = combinedEventConvertToSingleEvent[0];
String event2 = combinedEventConvertToSingleEvent[1];

eventMap.put(gene, Lists.newArrayList(event1, event2));

} else {
eventMap.put(gene, Lists.newArrayList(name));
}
Expand Down
83 changes: 42 additions & 41 deletions patient-db/src/main/resources/tumor_location_mapping.csv
Original file line number Diff line number Diff line change
Expand Up @@ -77,7 +77,7 @@ searchTerm,primaryTumorLocation,subType
"Gallbladder cancer",Biliary,"Gall bladder"
"Gasto Intestinal Stroma Tumor (GIST)",Bone/Soft tissue,"Undifferentiated sarcoma"
"gastric carcinoma",Stomach,"Gastric"
"gastrinoma in stomach",NET,"Gastrinoma"
"gastrinoma in stomach",Neuroendocrine,"Gastrinoma: Stomach"
"Gastrointestinal cancer/pancreatic-biliary type",Pancreas,"Biliary type"
"Gastrointestinal Carcinoid Tumor",Bone/Soft tissue,"Gastrointestinal Carcinoid"
"Gastrointestinal Stromal Tumors (GIST)",Bone/Soft tissue,"Gastrointestinal stromal tumor (GIST)"
Expand Down Expand Up @@ -118,12 +118,12 @@ searchTerm,primaryTumorLocation,subType
"jejunum carcinoma",Small intestine,"Jejunum"
"kidney",Kidney,""
"Kidneycell carcinoma",Kidney,"Renal cell"
"lage cell neuroendocrien carcinoma",NET,"Large cell"
"large cell neuroendocrien carcinoma",NET,"Large cell"
"lage cell neuroendocrien carcinoma",Neuroendocrine,"NEC: Large cell"
"large cell neuroendocrien carcinoma",Neuroendocrine,"NEC: Large cell"
"Larynxcarcinoma",Head and neck,"Laryngeal"
"left tuba",Ovary,"Left tuba"
"Leiomyosarcoom",Bone/Soft tissue,"Leiomyosarcoma"
"Long-NET",NET,"Lung"
"Long-NET",Neuroendocrine,"NET: Lung"
"Lung Cancer: Non-Small Cell",Lung,"Non-Small Cell"
"Lung Cancer: Non-Small Cell_",Lung,"Non-Small Cell"
"Lung Cancer: Small Cell",Lung,"Small Cell"
Expand All @@ -134,7 +134,7 @@ searchTerm,primaryTumorLocation,subType
"Lymphoma: Non Hodgkin",Lymphoid,"Non-Hodgkin"
"Lymphoma: other",Lymphoid,"Other"
"Malignant peripheral nerve sheath tumor",Bone/Soft tissue,"Neurofibrosarcoma"
"maligne feochromocytoom",NET,"Adrenal"
"maligne feochromocytoom",Neuroendocrine,"NET: Adrenal"
"Medulloblastoma",Nervous system,"Medulloblastoma"
"Melanoma",Skin,"Melanoma"
"Melanoma_",Skin,"Melanoma"
Expand All @@ -154,34 +154,34 @@ searchTerm,primaryTumorLocation,subType
"multipel myeloma",Myeloid,"Multiple Myeloma"
"myo-epithelioma distale external auditory canal",Head and neck,"External auditory canal"
"Nasal Cavity and Paranasal Sinus Cancer",Head and neck,"Nasal cavity and paranasal sinuses"
"NEC from pancreatic",NET,"Pancreatic NEC"
"NET colon ascendens",NET,"Colon/Rectum"
"NET of unknown origin",NET,""
"NET prostate",NET,"Prostate"
"NET rectosigmoid",NET,"Rectosigmoid"
"NET unknown primary",NET,""
"NET unknown primary location",NET,""
"NET: Large cell neuroendocrine carcinoma (LCNEC)",NET,"Large cell NEC"
"NET: Lung",NET,"Lung"
"NET: lung or small intestine",NET,"Lung or small intestine"
"NET: Lung_",NET,"Lung"
"NET: other",NET,""
"NET: Other (stomach)",NET,"Stomach"
"NET: Pancreatic",NET,"Pancreatic"
"NET: Pancreatic_",NET,"Pancreatic"
"NET: Small Intestinal",NET,"Small Intestinal"
"Neuro endocrine carcinoma",NET,"NEC"
"Neuro endocrine carcinoma (NEC), primary unknown",NET,"NEC"
"Neuro endocrine tumor",NET,""
"Neuro-endocrinal cervix carcinoma",NET,"Cervix"
"Neuro-endocrine carcinoma",NET,"NEC"
"Neuro-endocrine carcinoma of the bladder",NET,"Bladder NEC"
"Neuro-endocrine carcinoma with unknown primary tumor",NET,"NEC"
"neuro-endocrine tumor, possible transformation of prostatecarcinoma",NET,"Possible transformation of prostatecarcinoma"
"Neuro-endorine tumor",NET,""
"NEC from pancreatic",Neuroendocrine,"NEC: Pancreatic"
"NET colon ascendens",Neuroendocrine,"NET: Colon"
"NET of unknown origin",Neuroendocrine,"NET"
"NET prostate",Neuroendocrine,"NET: Prostate"
"NET rectosigmoid",Neuroendocrine,"NET: Rectosigmoid"
"NET unknown primary",Neuroendocrine,"NET"
"NET unknown primary location",Neuroendocrine,"NET"
"NET: Large cell neuroendocrine carcinoma (LCNEC)",Neuroendocrine,"NEC: Large cell"
"NET: Lung",Neuroendocrine,"NET: Lung"
"NET: lung or small intestine",Neuroendocrine,"NET: Lung or small intestine"
"NET: Lung_",Neuroendocrine,"NET: Lung"
"NET: other",Neuroendocrine,"NET"
"NET: Other (stomach)",Neuroendocrine,"NET: Stomach"
"NET: Pancreatic",Neuroendocrine,"NET: Pancreatic"
"NET: Pancreatic_",Neuroendocrine,"NET: Pancreatic"
"NET: Small Intestinal",Neuroendocrine,"NET: Small Intestinal"
"Neuro endocrine carcinoma",Neuroendocrine,"NEC"
"Neuro endocrine carcinoma (NEC), primary unknown",Neuroendocrine,"NEC"
"Neuro endocrine tumor",Neuroendocrine,"NET"
"Neuro-endocrinal cervix carcinoma",Neuroendocrine,"NEC: Cervical"
"Neuro-endocrine carcinoma",Neuroendocrine,"NEC"
"Neuro-endocrine carcinoma of the bladder",Neuroendocrine,"NEC: Bladder"
"Neuro-endocrine carcinoma with unknown primary tumor",Neuroendocrine,"NEC"
"neuro-endocrine tumor, possible transformation of prostatecarcinoma",Neuroendocrine,"NET: Possible transformation of prostatecarcinoma"
"Neuro-endorine tumor",Neuroendocrine,"NET"
"Neuroblastoma",Nervous system,"Neuroblastoma"
"neuroendocrine carcinoma",NET,"NEC"
"Neuroendocrine tumor",NET,""
"neuroendocrine carcinoma",Neuroendocrine,"NEC"
"Neuroendocrine tumor",Neuroendocrine,"NET"
"non-HIV Kaposisarcoom",Skin,"Other"
"Non-small cell carcinoma NOS (mostly resembling lung carcinoma): working diagnosis ""lung carcinoma""",Lung,"Non-Small Cell"
"Not in dropdown list (please specify below)_adenocarcinoom van de klier van Bartholin vulva links",Vulva,"Adenocarcinoma"
Expand All @@ -200,12 +200,12 @@ searchTerm,primaryTumorLocation,subType
"Other (please specify below)_empithelioid hemangioendothelioom",Vascular,"Hemangioendothelioma"
"Other (please specify below)_empithelioid hemangioendothelioom",Vascular,"Hemangioendothelioma"
"Other (please specify below)_Esophagus squamous cell carcinoma (ESCC)",Esophagus,""
"Other (please specify below)_LCNEC",NET,"Large cell NEC"
"Other (please specify below)_LCNEC",Neuroendocrine,"NEC: Large cell"
"Other (please specify below)_Merkel Cel Carcinoom",Skin,"Merkel-cell carcinoma"
"Other (please specify below)_myo-epitheliaal carcinoom",Bone/Soft tissue,"Undifferentiated sarcoma"
"Other (please specify below)_NEC",NET,"NEC"
"Other (please specify below)_NEC Colon",NET,"Colon NEC"
"Other (please specify below)_NET; Rectal",NET,"Rectum"
"Other (please specify below)_NEC",Neuroendocrine,"NEC"
"Other (please specify below)_NEC Colon",Neuroendocrine,"NEC: Colon"
"Other (please specify below)_NET; Rectal",Neuroendocrine,"NET: Rectal"
"Other (please specify below)_ovariele stromale tumor",Ovary,""
"Other (please specify below)_pleiomorf",Bone/Soft tissue,"Pleomorphic Sarcoma"
"Other (please specify below)_Trichilemmal carcinoma ear",Skin,"Trichilemmal carcinoma"
Expand All @@ -215,8 +215,8 @@ searchTerm,primaryTumorLocation,subType
"Other (please specify below)_Vaginal cancer",Vagina,""
"Ovarian cancer",Ovary,""
"Pancreatic cancer",Pancreas,""
"Pancreatic Neuroendocrine Tumors (Islet Cell Tumors)",NET,"Pancreatic"
"Paraganglioma / NET lung?",NET,"Paraganglioma (lung?)"
"Pancreatic Neuroendocrine Tumors (Islet Cell Tumors)",Neuroendocrine,"NET: Pancreatic"
"Paraganglioma / NET lung?",Neuroendocrine,"NET: Paraganglioma (lung?)"
"Penile cancer",Penile,""
"plexuspapilloom",Nervous system,"Plexuspapilloom"
"primary unknown",Unknown,"Unknown"
Expand All @@ -225,7 +225,7 @@ searchTerm,primaryTumorLocation,subType
"Pseudomyxoma peritonei",Colon/Rectum,"Low-grade mucinous adenocarcinoma"
"Rectum",Colon/Rectum,"Rectum"
"Rectum carcinoma",Colon/Rectum,"Rectum"
"Rectum NET",NET,"Rectum"
"Rectum NET",Neuroendocrine,"NET: Rectal"
"rectumcarcinoom",Colon/Rectum,""
"renal carcinoma",Kidney,"Renal cell"
"Renal Cell Cancer",Kidney,"Renal cell"
Expand Down Expand Up @@ -275,9 +275,9 @@ searchTerm,primaryTumorLocation,subType
"Synovial carcinoma",Bone/Soft tissue,"Synovial"
"T-cell lymphoma",Lymphoid,"T-cell"
"Testicular cancer",Testis,""
"thymic NET",NET,"Thymus"
"thymic NET",Neuroendocrine,"NET: Thymus"
"Thymoma and Thymic Carcinoma",Thymus,""
"thymus-neuroendocrine tumor",NET,"Thymus"
"thymus-neuroendocrine tumor",Neuroendocrine,"NET: Thymus"
"Thyroid Cancer",Thyroid,""
"Thyroid Cancer_",Thyroid,""
"Tongue carcinoma lateral side",Head and neck,"Tongue"
Expand All @@ -290,6 +290,7 @@ searchTerm,primaryTumorLocation,subType
"unknow primary tumor",Unknown,"Unknown"
"unknown",Unknown,"Unknown"
"Unknown primary",Unknown,"Unknown"
"Unknown primary location melanoma",Unknown,"Melanoma"
"unknown primary tumor in tractus genitalis",Unknown,"Tractus Genitalis"
"Unknown, probably head and neck cancer",Unknown,"Possibly Head and neck"
"Urachuscarcinoma",Urinary tract,"Urachal carcinoma"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ show_tick_labels = no
<<include etc/image.conf>>
</image>

karyotype = data/karyotype/karyotype.human.txt
karyotype = KARYOTYPE

chromosomes_units = 1000000
chromosomes_display_default = yes
Expand Down
15 changes: 7 additions & 8 deletions sage/README2.md
Original file line number Diff line number Diff line change
Expand Up @@ -365,20 +365,19 @@ Peak memory is measure in gigabytes.

Threads | Elapsed Time| CPU Time | Peak Mem
---|---|---|---
1 | 597 | 601 | 44
8 | 87 | 653 | 83
16 | 50 | 722 | 83
24 | 38 | 773 | 82
32 | 32 | 830 | 84
48 | 30 | 1150 | 82
64 | 29 | 1478 | 82
72 | 30 | 1660 | 82
1 | 529 | 539 | 55
8 | 77 | 594 | 68
16 | 46 | 675 | 68
24 | 32 | 671 | 67
32 | 28 | 748 | 67
48 | 27 | 1047 | 66

## Version History
- Upcoming
- Multiple tumor support
- Multiple reference (or RNA) support
- Removed explicit RNA support (can use additional reference instead)
- Performance and memory improvements
- [2.1](https://github.com/hartwigmedical/hmftools/releases/tag/sage-v2.1)
- Reduced memory footprint
- Add version info to VCF
Expand Down

0 comments on commit 69ba1ed

Please sign in to comment.