From 38f79d236f318eceae6ec788819ab0272c9e782a Mon Sep 17 00:00:00 2001 From: Ryan Williams Date: Sun, 31 May 2015 04:14:11 +0000 Subject: [PATCH 1/6] add .interval_list file parser fixes #685 --- .../org/bdgenomics/adam/rdd/ADAMContext.scala | 31 +- .../adam/rdd/features/FeatureParser.scala | 69 ++- .../SeqCap_EZ_Exome_v3.hg19.interval_list | 463 ++++++++++++++++++ .../adam/rdd/ADAMContextSuite.scala | 17 + 4 files changed, 575 insertions(+), 5 deletions(-) create mode 100644 adam-core/src/test/resources/features/SeqCap_EZ_Exome_v3.hg19.interval_list diff --git a/adam-core/src/main/scala/org/bdgenomics/adam/rdd/ADAMContext.scala b/adam-core/src/main/scala/org/bdgenomics/adam/rdd/ADAMContext.scala index 707448d3b2..ea82dfc2e1 100644 --- a/adam-core/src/main/scala/org/bdgenomics/adam/rdd/ADAMContext.scala +++ b/adam-core/src/main/scala/org/bdgenomics/adam/rdd/ADAMContext.scala @@ -370,6 +370,26 @@ class ADAMContext(val sc: SparkContext) extends Serializable with Logging { if (Metrics.isRecording) records.instrument() else records } + def loadIntervalList(filePath: String): RDD[Feature] = { + val parsedLines = sc.textFile(filePath).map(new IntervalListParser().parse) + val (seqDict, records) = (SequenceDictionary(parsedLines.flatMap(_._1).collect(): _*), parsedLines.flatMap(_._2)) + val seqDictMap = seqDict.records.map(sr => sr.name -> sr).toMap + val recordsWithContigs = for { + record <- records + seqRecord <- seqDictMap.get(record.getContig.getContigName) + } yield Feature.newBuilder(record) + .setContig( + Contig.newBuilder() + .setContigName(seqRecord.name) + .setReferenceURL(seqRecord.url.getOrElse(null)) + .setContigMD5(seqRecord.md5.getOrElse(null)) + .setContigLength(seqRecord.length) + .build() + ) + .build() + if (Metrics.isRecording) recordsWithContigs.instrument() else recordsWithContigs + } + def loadParquetFeatures( filePath: String, predicate: Option[FilterPredicate] = None, @@ -418,18 +438,21 @@ class ADAMContext(val sc: SparkContext) extends Serializable with Logging { projection: Option[Schema] = None): RDD[Feature] = { if (filePath.endsWith(".bed")) { - log.info("Loading " + filePath + " as BED and converting to features. Projection is ignored.") + log.info(s"Loading $filePath as BED and converting to features. Projection is ignored.") loadBED(filePath) } else if (filePath.endsWith(".gtf") || filePath.endsWith(".gff")) { - log.info("Loading " + filePath + " as GTF/GFF and converting to features. Projection is ignored.") + log.info(s"Loading $filePath as GTF/GFF and converting to features. Projection is ignored.") loadGTF(filePath) } else if (filePath.endsWith(".narrowPeak") || filePath.endsWith(".narrowpeak")) { - log.info("Loading " + filePath + " as NarrowPeak and converting to features. Projection is ignored.") + log.info(s"Loading $filePath as NarrowPeak and converting to features. Projection is ignored.") loadNarrowPeak(filePath) + } else if (filePath.endsWith(".interval_list")) { + log.info(s"Loading $filePath as IntervalList and converting to features. Projection is ignored.") + loadIntervalList(filePath) } else { - log.info("Loading " + filePath + " as Parquet containing Features.") + log.info(s"Loading $filePath as Parquet containing Features.") loadParquetFeatures(filePath, None, projection) } } diff --git a/adam-core/src/main/scala/org/bdgenomics/adam/rdd/features/FeatureParser.scala b/adam-core/src/main/scala/org/bdgenomics/adam/rdd/features/FeatureParser.scala index 1618d80e03..da4d90c2f9 100644 --- a/adam-core/src/main/scala/org/bdgenomics/adam/rdd/features/FeatureParser.scala +++ b/adam-core/src/main/scala/org/bdgenomics/adam/rdd/features/FeatureParser.scala @@ -19,7 +19,8 @@ package org.bdgenomics.adam.rdd.features import java.io.File import java.util.UUID -import org.bdgenomics.formats.avro.{ Contig, Strand, Feature } +import org.bdgenomics.adam.models.SequenceRecord +import org.bdgenomics.formats.avro.{ Dbxref, Contig, Strand, Feature } import scala.collection.JavaConversions._ import scala.collection.mutable.ArrayBuffer @@ -118,6 +119,72 @@ class GTFParser extends FeatureParser { } } +class IntervalListParser extends Serializable { + def parse(line: String): (Option[SequenceRecord], Option[Feature]) = { + val fields = line.split("[ \t]+") + if (fields.length < 2) { + (None, None) + } else { + if (fields(0).startsWith("@")) { + if (fields(0).startsWith("@SQ")) { + val (name, length, url, md5) = { + val attrs = fields.drop(1).map(field => field.split(":", 2) match { + case Array(key, value) => key -> value + case x => throw new Exception(s"Expected fields of the form 'key:value' in field $field but got: $x. Line:\n$line") + }).toMap + + // Require that all @SQ lines have name, length, url, md5. + (attrs("SN"), attrs("LN").toLong, attrs("UR"), attrs("M5")) + } + + (Some(SequenceRecord(name, length, md5, url)), None) + } else { + (None, None) + } + } else { + if (fields.length < 4) { + throw new Exception(s"Invalid line: $line") + } + + val (dbxrfs, attrs: Map[String, String]) = + (if (fields.length < 5 || fields(4) == ".") { + (Nil, Map()) + } else { + val a = fields(4).split(';').map(field => field.split('|') match { + case Array(key, value) => + key match { + case "gn" | "ens" | "vega" | "ccds" => (Some(Dbxref.newBuilder().setDb(key).setAccession(value).build()), None) + case _ => (None, Some(key -> value)) + } + case x => throw new Exception(s"Expected fields of the form 'key:value' but got: $field. Line:\n$line") + }) + + (a.flatMap(_._1).toList, a.flatMap(_._2).toMap) + }) + + ( + None, + Some( + Feature.newBuilder() + .setContig(Contig.newBuilder().setContigName(fields(0)).build()) + .setStart(fields(1).toLong) + .setEnd(fields(2).toLong) + .setStrand(fields(3) match { + case "+" => Strand.Forward + case "-" => Strand.Reverse + case _ => Strand.Independent + }) + .setAttributes(attrs) + .setDbxrefs(dbxrfs) + .build() + ) + ) + } + } + } + +} + class BEDParser extends FeatureParser { override def parse(line: String): Seq[Feature] = { diff --git a/adam-core/src/test/resources/features/SeqCap_EZ_Exome_v3.hg19.interval_list b/adam-core/src/test/resources/features/SeqCap_EZ_Exome_v3.hg19.interval_list new file mode 100644 index 0000000000..8a31eb493f --- /dev/null +++ b/adam-core/src/test/resources/features/SeqCap_EZ_Exome_v3.hg19.interval_list @@ -0,0 +1,463 @@ +@HD VN:1.4 SO:unsorted +@SQ SN:chrM LN:16571 UR:file:/gs01/projects/ngs/resources/gatk/2.3/ucsc.hg19.parmasked.fasta M5:d2ed829b8a1628d16cbeee88e88e39eb +@SQ SN:chr1 LN:249250621 UR:file:/gs01/projects/ngs/resources/gatk/2.3/ucsc.hg19.parmasked.fasta M5:1b22b98cdeb4a9304cb5d48026a85128 +@SQ SN:chr2 LN:243199373 UR:file:/gs01/projects/ngs/resources/gatk/2.3/ucsc.hg19.parmasked.fasta M5:a0d9851da00400dec1098a9255ac712e +@SQ SN:chr3 LN:198022430 UR:file:/gs01/projects/ngs/resources/gatk/2.3/ucsc.hg19.parmasked.fasta M5:641e4338fa8d52a5b781bd2a2c08d3c3 +@SQ SN:chr4 LN:191154276 UR:file:/gs01/projects/ngs/resources/gatk/2.3/ucsc.hg19.parmasked.fasta M5:23dccd106897542ad87d2765d28a19a1 +@SQ SN:chr5 LN:180915260 UR:file:/gs01/projects/ngs/resources/gatk/2.3/ucsc.hg19.parmasked.fasta M5:0740173db9ffd264d728f32784845cd7 +@SQ SN:chr6 LN:171115067 UR:file:/gs01/projects/ngs/resources/gatk/2.3/ucsc.hg19.parmasked.fasta M5:1d3a93a248d92a729ee764823acbbc6b +@SQ SN:chr7 LN:159138663 UR:file:/gs01/projects/ngs/resources/gatk/2.3/ucsc.hg19.parmasked.fasta M5:618366e953d6aaad97dbe4777c29375e +@SQ SN:chr8 LN:146364022 UR:file:/gs01/projects/ngs/resources/gatk/2.3/ucsc.hg19.parmasked.fasta M5:96f514a9929e410c6651697bded59aec +@SQ SN:chr9 LN:141213431 UR:file:/gs01/projects/ngs/resources/gatk/2.3/ucsc.hg19.parmasked.fasta M5:3e273117f15e0a400f01055d9f393768 +@SQ SN:chr10 LN:135534747 UR:file:/gs01/projects/ngs/resources/gatk/2.3/ucsc.hg19.parmasked.fasta M5:988c28e000e84c26d552359af1ea2e1d +@SQ SN:chr11 LN:135006516 UR:file:/gs01/projects/ngs/resources/gatk/2.3/ucsc.hg19.parmasked.fasta M5:98c59049a2df285c76ffb1c6db8f8b96 +@SQ SN:chr12 LN:133851895 UR:file:/gs01/projects/ngs/resources/gatk/2.3/ucsc.hg19.parmasked.fasta M5:51851ac0e1a115847ad36449b0015864 +@SQ SN:chr13 LN:115169878 UR:file:/gs01/projects/ngs/resources/gatk/2.3/ucsc.hg19.parmasked.fasta M5:283f8d7892baa81b510a015719ca7b0b +@SQ SN:chr14 LN:107349540 UR:file:/gs01/projects/ngs/resources/gatk/2.3/ucsc.hg19.parmasked.fasta M5:98f3cae32b2a2e9524bc19813927542e +@SQ SN:chr15 LN:102531392 UR:file:/gs01/projects/ngs/resources/gatk/2.3/ucsc.hg19.parmasked.fasta M5:e5645a794a8238215b2cd77acb95a078 +@SQ SN:chr16 LN:90354753 UR:file:/gs01/projects/ngs/resources/gatk/2.3/ucsc.hg19.parmasked.fasta M5:fc9b1a7b42b97a864f56b348b06095e6 +@SQ SN:chr17 LN:81195210 UR:file:/gs01/projects/ngs/resources/gatk/2.3/ucsc.hg19.parmasked.fasta M5:351f64d4f4f9ddd45b35336ad97aa6de +@SQ SN:chr18 LN:78077248 UR:file:/gs01/projects/ngs/resources/gatk/2.3/ucsc.hg19.parmasked.fasta M5:b15d4b2d29dde9d3e4f93d1d0f2cbc9c +@SQ SN:chr19 LN:59128983 UR:file:/gs01/projects/ngs/resources/gatk/2.3/ucsc.hg19.parmasked.fasta M5:1aacd71f30db8e561810913e0b72636d +@SQ SN:chr20 LN:63025520 UR:file:/gs01/projects/ngs/resources/gatk/2.3/ucsc.hg19.parmasked.fasta M5:0dec9660ec1efaaf33281c0d5ea2560f +@SQ SN:chr21 LN:48129895 UR:file:/gs01/projects/ngs/resources/gatk/2.3/ucsc.hg19.parmasked.fasta M5:2979a6085bfe28e3ad6f552f361ed74d +@SQ SN:chr22 LN:51304566 UR:file:/gs01/projects/ngs/resources/gatk/2.3/ucsc.hg19.parmasked.fasta M5:a718acaa6135fdca8357d5bfe94211dd +@SQ SN:chrX LN:155270560 UR:file:/gs01/projects/ngs/resources/gatk/2.3/ucsc.hg19.parmasked.fasta M5:7e0e2e580297b7764e31dbc80c2540dd +@SQ SN:chrY LN:59373566 UR:file:/gs01/projects/ngs/resources/gatk/2.3/ucsc.hg19.parmasked.fasta M5:3393b0779f142dc59f4cfcc22b61c1ee +@SQ SN:chr1_gl000191_random LN:106433 UR:file:/gs01/projects/ngs/resources/gatk/2.3/ucsc.hg19.parmasked.fasta M5:d75b436f50a8214ee9c2a51d30b2c2cc +@SQ SN:chr1_gl000192_random LN:547496 UR:file:/gs01/projects/ngs/resources/gatk/2.3/ucsc.hg19.parmasked.fasta M5:325ba9e808f669dfeee210fdd7b470ac +@SQ SN:chr4_ctg9_hap1 LN:590426 UR:file:/gs01/projects/ngs/resources/gatk/2.3/ucsc.hg19.parmasked.fasta M5:fa24f81b680df26bcfb6d69b784fbe36 +@SQ SN:chr4_gl000193_random LN:189789 UR:file:/gs01/projects/ngs/resources/gatk/2.3/ucsc.hg19.parmasked.fasta M5:dbb6e8ece0b5de29da56601613007c2a +@SQ SN:chr4_gl000194_random LN:191469 UR:file:/gs01/projects/ngs/resources/gatk/2.3/ucsc.hg19.parmasked.fasta M5:6ac8f815bf8e845bb3031b73f812c012 +@SQ SN:chr6_apd_hap1 LN:4622290 UR:file:/gs01/projects/ngs/resources/gatk/2.3/ucsc.hg19.parmasked.fasta M5:fe71bc63420d666884f37a3ad79f3317 +@SQ SN:chr6_cox_hap2 LN:4795371 UR:file:/gs01/projects/ngs/resources/gatk/2.3/ucsc.hg19.parmasked.fasta M5:18c17e1641ef04873b15f40f6c8659a4 +@SQ SN:chr6_dbb_hap3 LN:4610396 UR:file:/gs01/projects/ngs/resources/gatk/2.3/ucsc.hg19.parmasked.fasta M5:2a3c677c426a10e137883ae1ffb8da3f +@SQ SN:chr6_mann_hap4 LN:4683263 UR:file:/gs01/projects/ngs/resources/gatk/2.3/ucsc.hg19.parmasked.fasta M5:9d51d4152174461cd6715c7ddc588dc8 +@SQ SN:chr6_mcf_hap5 LN:4833398 UR:file:/gs01/projects/ngs/resources/gatk/2.3/ucsc.hg19.parmasked.fasta M5:efed415dd8742349cb7aaca054675b9a +@SQ SN:chr6_qbl_hap6 LN:4611984 UR:file:/gs01/projects/ngs/resources/gatk/2.3/ucsc.hg19.parmasked.fasta M5:094d037050cad692b57ea12c4fef790f +@SQ SN:chr6_ssto_hap7 LN:4928567 UR:file:/gs01/projects/ngs/resources/gatk/2.3/ucsc.hg19.parmasked.fasta M5:3b6d666200e72bcc036bf88a4d7e0749 +@SQ SN:chr7_gl000195_random LN:182896 UR:file:/gs01/projects/ngs/resources/gatk/2.3/ucsc.hg19.parmasked.fasta M5:5d9ec007868d517e73543b005ba48535 +@SQ SN:chr8_gl000196_random LN:38914 UR:file:/gs01/projects/ngs/resources/gatk/2.3/ucsc.hg19.parmasked.fasta M5:d92206d1bb4c3b4019c43c0875c06dc0 +@SQ SN:chr8_gl000197_random LN:37175 UR:file:/gs01/projects/ngs/resources/gatk/2.3/ucsc.hg19.parmasked.fasta M5:6f5efdd36643a9b8c8ccad6f2f1edc7b +@SQ SN:chr9_gl000198_random LN:90085 UR:file:/gs01/projects/ngs/resources/gatk/2.3/ucsc.hg19.parmasked.fasta M5:868e7784040da90d900d2d1b667a1383 +@SQ SN:chr9_gl000199_random LN:169874 UR:file:/gs01/projects/ngs/resources/gatk/2.3/ucsc.hg19.parmasked.fasta M5:569af3b73522fab4b40995ae4944e78e +@SQ SN:chr9_gl000200_random LN:187035 UR:file:/gs01/projects/ngs/resources/gatk/2.3/ucsc.hg19.parmasked.fasta M5:75e4c8d17cd4addf3917d1703cacaf25 +@SQ SN:chr9_gl000201_random LN:36148 UR:file:/gs01/projects/ngs/resources/gatk/2.3/ucsc.hg19.parmasked.fasta M5:dfb7e7ec60ffdcb85cb359ea28454ee9 +@SQ SN:chr11_gl000202_random LN:40103 UR:file:/gs01/projects/ngs/resources/gatk/2.3/ucsc.hg19.parmasked.fasta M5:06cbf126247d89664a4faebad130fe9c +@SQ SN:chr17_ctg5_hap1 LN:1680828 UR:file:/gs01/projects/ngs/resources/gatk/2.3/ucsc.hg19.parmasked.fasta M5:d89517b400226d3b56e753972a7cad67 +@SQ SN:chr17_gl000203_random LN:37498 UR:file:/gs01/projects/ngs/resources/gatk/2.3/ucsc.hg19.parmasked.fasta M5:96358c325fe0e70bee73436e8bb14dbd +@SQ SN:chr17_gl000204_random LN:81310 UR:file:/gs01/projects/ngs/resources/gatk/2.3/ucsc.hg19.parmasked.fasta M5:efc49c871536fa8d79cb0a06fa739722 +@SQ SN:chr17_gl000205_random LN:174588 UR:file:/gs01/projects/ngs/resources/gatk/2.3/ucsc.hg19.parmasked.fasta M5:d22441398d99caf673e9afb9a1908ec5 +@SQ SN:chr17_gl000206_random LN:41001 UR:file:/gs01/projects/ngs/resources/gatk/2.3/ucsc.hg19.parmasked.fasta M5:43f69e423533e948bfae5ce1d45bd3f1 +@SQ SN:chr18_gl000207_random LN:4262 UR:file:/gs01/projects/ngs/resources/gatk/2.3/ucsc.hg19.parmasked.fasta M5:f3814841f1939d3ca19072d9e89f3fd7 +@SQ SN:chr19_gl000208_random LN:92689 UR:file:/gs01/projects/ngs/resources/gatk/2.3/ucsc.hg19.parmasked.fasta M5:aa81be49bf3fe63a79bdc6a6f279abf6 +@SQ SN:chr19_gl000209_random LN:159169 UR:file:/gs01/projects/ngs/resources/gatk/2.3/ucsc.hg19.parmasked.fasta M5:f40598e2a5a6b26e84a3775e0d1e2c81 +@SQ SN:chr21_gl000210_random LN:27682 UR:file:/gs01/projects/ngs/resources/gatk/2.3/ucsc.hg19.parmasked.fasta M5:851106a74238044126131ce2a8e5847c +@SQ SN:chrUn_gl000211 LN:166566 UR:file:/gs01/projects/ngs/resources/gatk/2.3/ucsc.hg19.parmasked.fasta M5:7daaa45c66b288847b9b32b964e623d3 +@SQ SN:chrUn_gl000212 LN:186858 UR:file:/gs01/projects/ngs/resources/gatk/2.3/ucsc.hg19.parmasked.fasta M5:563531689f3dbd691331fd6c5730a88b +@SQ SN:chrUn_gl000213 LN:164239 UR:file:/gs01/projects/ngs/resources/gatk/2.3/ucsc.hg19.parmasked.fasta M5:9d424fdcc98866650b58f004080a992a +@SQ SN:chrUn_gl000214 LN:137718 UR:file:/gs01/projects/ngs/resources/gatk/2.3/ucsc.hg19.parmasked.fasta M5:46c2032c37f2ed899eb41c0473319a69 +@SQ SN:chrUn_gl000215 LN:172545 UR:file:/gs01/projects/ngs/resources/gatk/2.3/ucsc.hg19.parmasked.fasta M5:5eb3b418480ae67a997957c909375a73 +@SQ SN:chrUn_gl000216 LN:172294 UR:file:/gs01/projects/ngs/resources/gatk/2.3/ucsc.hg19.parmasked.fasta M5:642a232d91c486ac339263820aef7fe0 +@SQ SN:chrUn_gl000217 LN:172149 UR:file:/gs01/projects/ngs/resources/gatk/2.3/ucsc.hg19.parmasked.fasta M5:6d243e18dea1945fb7f2517615b8f52e +@SQ SN:chrUn_gl000218 LN:161147 UR:file:/gs01/projects/ngs/resources/gatk/2.3/ucsc.hg19.parmasked.fasta M5:1d708b54644c26c7e01c2dad5426d38c +@SQ SN:chrUn_gl000219 LN:179198 UR:file:/gs01/projects/ngs/resources/gatk/2.3/ucsc.hg19.parmasked.fasta M5:f977edd13bac459cb2ed4a5457dba1b3 +@SQ SN:chrUn_gl000220 LN:161802 UR:file:/gs01/projects/ngs/resources/gatk/2.3/ucsc.hg19.parmasked.fasta M5:fc35de963c57bf7648429e6454f1c9db +@SQ SN:chrUn_gl000221 LN:155397 UR:file:/gs01/projects/ngs/resources/gatk/2.3/ucsc.hg19.parmasked.fasta M5:3238fb74ea87ae857f9c7508d315babb +@SQ SN:chrUn_gl000222 LN:186861 UR:file:/gs01/projects/ngs/resources/gatk/2.3/ucsc.hg19.parmasked.fasta M5:6fe9abac455169f50470f5a6b01d0f59 +@SQ SN:chrUn_gl000223 LN:180455 UR:file:/gs01/projects/ngs/resources/gatk/2.3/ucsc.hg19.parmasked.fasta M5:399dfa03bf32022ab52a846f7ca35b30 +@SQ SN:chrUn_gl000224 LN:179693 UR:file:/gs01/projects/ngs/resources/gatk/2.3/ucsc.hg19.parmasked.fasta M5:d5b2fc04f6b41b212a4198a07f450e20 +@SQ SN:chrUn_gl000225 LN:211173 UR:file:/gs01/projects/ngs/resources/gatk/2.3/ucsc.hg19.parmasked.fasta M5:63945c3e6962f28ffd469719a747e73c +@SQ SN:chrUn_gl000226 LN:15008 UR:file:/gs01/projects/ngs/resources/gatk/2.3/ucsc.hg19.parmasked.fasta M5:1c1b2cd1fccbc0a99b6a447fa24d1504 +@SQ SN:chrUn_gl000227 LN:128374 UR:file:/gs01/projects/ngs/resources/gatk/2.3/ucsc.hg19.parmasked.fasta M5:a4aead23f8053f2655e468bcc6ecdceb +@SQ SN:chrUn_gl000228 LN:129120 UR:file:/gs01/projects/ngs/resources/gatk/2.3/ucsc.hg19.parmasked.fasta M5:c5a17c97e2c1a0b6a9cc5a6b064b714f +@SQ SN:chrUn_gl000229 LN:19913 UR:file:/gs01/projects/ngs/resources/gatk/2.3/ucsc.hg19.parmasked.fasta M5:d0f40ec87de311d8e715b52e4c7062e1 +@SQ SN:chrUn_gl000230 LN:43691 UR:file:/gs01/projects/ngs/resources/gatk/2.3/ucsc.hg19.parmasked.fasta M5:b4eb71ee878d3706246b7c1dbef69299 +@SQ SN:chrUn_gl000231 LN:27386 UR:file:/gs01/projects/ngs/resources/gatk/2.3/ucsc.hg19.parmasked.fasta M5:ba8882ce3a1efa2080e5d29b956568a4 +@SQ SN:chrUn_gl000232 LN:40652 UR:file:/gs01/projects/ngs/resources/gatk/2.3/ucsc.hg19.parmasked.fasta M5:3e06b6741061ad93a8587531307057d8 +@SQ SN:chrUn_gl000233 LN:45941 UR:file:/gs01/projects/ngs/resources/gatk/2.3/ucsc.hg19.parmasked.fasta M5:7fed60298a8d62ff808b74b6ce820001 +@SQ SN:chrUn_gl000234 LN:40531 UR:file:/gs01/projects/ngs/resources/gatk/2.3/ucsc.hg19.parmasked.fasta M5:93f998536b61a56fd0ff47322a911d4b +@SQ SN:chrUn_gl000235 LN:34474 UR:file:/gs01/projects/ngs/resources/gatk/2.3/ucsc.hg19.parmasked.fasta M5:118a25ca210cfbcdfb6c2ebb249f9680 +@SQ SN:chrUn_gl000236 LN:41934 UR:file:/gs01/projects/ngs/resources/gatk/2.3/ucsc.hg19.parmasked.fasta M5:fdcd739913efa1fdc64b6c0cd7016779 +@SQ SN:chrUn_gl000237 LN:45867 UR:file:/gs01/projects/ngs/resources/gatk/2.3/ucsc.hg19.parmasked.fasta M5:e0c82e7751df73f4f6d0ed30cdc853c0 +@SQ SN:chrUn_gl000238 LN:39939 UR:file:/gs01/projects/ngs/resources/gatk/2.3/ucsc.hg19.parmasked.fasta M5:131b1efc3270cc838686b54e7c34b17b +@SQ SN:chrUn_gl000239 LN:33824 UR:file:/gs01/projects/ngs/resources/gatk/2.3/ucsc.hg19.parmasked.fasta M5:99795f15702caec4fa1c4e15f8a29c07 +@SQ SN:chrUn_gl000240 LN:41933 UR:file:/gs01/projects/ngs/resources/gatk/2.3/ucsc.hg19.parmasked.fasta M5:445a86173da9f237d7bcf41c6cb8cc62 +@SQ SN:chrUn_gl000241 LN:42152 UR:file:/gs01/projects/ngs/resources/gatk/2.3/ucsc.hg19.parmasked.fasta M5:ef4258cdc5a45c206cea8fc3e1d858cf +@SQ SN:chrUn_gl000242 LN:43523 UR:file:/gs01/projects/ngs/resources/gatk/2.3/ucsc.hg19.parmasked.fasta M5:2f8694fc47576bc81b5fe9e7de0ba49e +@SQ SN:chrUn_gl000243 LN:43341 UR:file:/gs01/projects/ngs/resources/gatk/2.3/ucsc.hg19.parmasked.fasta M5:cc34279a7e353136741c9fce79bc4396 +@SQ SN:chrUn_gl000244 LN:39929 UR:file:/gs01/projects/ngs/resources/gatk/2.3/ucsc.hg19.parmasked.fasta M5:0996b4475f353ca98bacb756ac479140 +@SQ SN:chrUn_gl000245 LN:36651 UR:file:/gs01/projects/ngs/resources/gatk/2.3/ucsc.hg19.parmasked.fasta M5:89bc61960f37d94abf0df2d481ada0ec +@SQ SN:chrUn_gl000246 LN:38154 UR:file:/gs01/projects/ngs/resources/gatk/2.3/ucsc.hg19.parmasked.fasta M5:e4afcd31912af9d9c2546acf1cb23af2 +@SQ SN:chrUn_gl000247 LN:36422 UR:file:/gs01/projects/ngs/resources/gatk/2.3/ucsc.hg19.parmasked.fasta M5:7de00226bb7df1c57276ca6baabafd15 +@SQ SN:chrUn_gl000248 LN:39786 UR:file:/gs01/projects/ngs/resources/gatk/2.3/ucsc.hg19.parmasked.fasta M5:5a8e43bec9be36c7b49c84d585107776 +@SQ SN:chrUn_gl000249 LN:38502 UR:file:/gs01/projects/ngs/resources/gatk/2.3/ucsc.hg19.parmasked.fasta M5:1d78abec37c15fe29a275eb08d5af236 +chr1 14416 14499 + gn|DDX11L1;gn|RP11-34P13.2;ens|ENSG00000223972;ens|ENSG00000227232;vega|OTTHUMG00000000958;vega|OTTHUMG00000000961 +chr1 1919863 1920037 + gn|C1orf222;ens|ENSG00000142609;vega|OTTHUMG00000000945 +chr1 7202038 7202320 + gn|CAMTA1;ccds|CCDS30576;ens|ENSG00000171735;vega|OTTHUMG00000001212 +chr1 11852240 11852486 + gn|MTHFR;ccds|CCDS137;ens|ENSG00000177000;vega|OTTHUMG00000002277 +chr1 16260740 16260857 + gn|SPEN;ccds|CCDS164;ens|ENSG00000065526;vega|OTTHUMG00000009376 +chr1 19583830 19583980 + gn|MRTO4;ccds|CCDS191;ens|ENSG00000053372;vega|OTTHUMG00000002496 +chr1 23649945 23650172 + gn|HNRNPR;ccds|CCDS232;ccds|CCDS44085;ens|ENSG00000125944;vega|OTTHUMG00000003224 +chr1 27105951 27106202 + gn|ARID1A;ccds|CCDS285;ccds|CCDS44091;ens|ENSG00000117713;vega|OTTHUMG00000004004 +chr1 32148680 32148835 + gn|COL16A1;ccds|CCDS41297;ens|ENSG00000084636;vega|OTTHUMG00000003883 +chr1 36022702 36022913 + gn|KIAA0319L;ccds|CCDS390;ens|ENSG00000142687;vega|OTTHUMG00000004370 +chr1 39980887 39981050 + gn|BMP8A;gn|OXCT2P1;ccds|CCDS437;ens|ENSG00000183682;ens|ENSG00000237624;vega|OTTHUMG00000008394;vega|OTTHUMG00000009250 +chr1 43918455 43918718 + gn|HYI;ccds|CCDS488;ccds|CCDS53309;ens|ENSG00000178922;vega|OTTHUMG00000007502 +chr1 47150040 47150299 + gn|KIAA0494;gn|RP11-8J9.4;ccds|CCDS30706;ens|ENSG00000159658;ens|ENSG00000228237;vega|OTTHUMG00000007992;vega|OTTHUMG00000007993 +chr1 54562720 54562844 + gn|TCEANC2;ccds|CCDS587;ens|ENSG00000116205;vega|OTTHUMG00000008434 +chr1 63997445 63997725 + gn|EFCAB7;gn|ITGB3BP;ccds|CCDS30736;ccds|CCDS30737;ens|ENSG00000142856;ens|ENSG00000203965;vega|OTTHUMG00000008983;vega|OTTHUMG00000013364 +chr1 76349439 76349541 + gn|MSH4;ccds|CCDS670;ens|ENSG00000057468;vega|OTTHUMG00000009788 +chr1 89271245 89271508 + gn|PKN2;ccds|CCDS714;ens|ENSG00000065243;vega|OTTHUMG00000010074 +chr1 95616780 95617017 + gn|TMEM56;ccds|CCDS753;ens|ENSG00000152078;vega|OTTHUMG00000010847 +chr1 109482229 109482469 + gn|AKNAD1;gn|CLCC1;ccds|CCDS41362;ccds|CCDS791;ccds|CCDS793;ens|ENSG00000121940;ens|ENSG00000162641;vega|OTTHUMG00000011231;vega|OTTHUMG00000011732 +chr1 113124524 113124716 + gn|ST7L;ccds|CCDS848;ccds|CCDS849;ccds|CCDS850;ccds|CCDS852;ens|ENSG00000007341;vega|OTTHUMG00000011753 +chr1 118168699 118168934 + gn|FAM46C;ccds|CCDS896;ens|ENSG00000183508;vega|OTTHUMG00000013703 +chr1 146011048 146011255 + gn|RP11-94I2.1;ens|ENSG00000229828;vega|OTTHUMG00000013879 +chr1 150679220 150679290 + gn|HORMAD1;ccds|CCDS967;ens|ENSG00000143452;vega|OTTHUMG00000035005 +chr1 152883076 152883193 + gn|IVL;ccds|CCDS1030;ens|ENSG00000163207;vega|OTTHUMG00000012451 +chr1 155156233 155156600 + gn|KRTCAP2;gn|TRIM46;ccds|CCDS1096;ccds|CCDS1097;ens|ENSG00000163462;ens|ENSG00000163463;vega|OTTHUMG00000013904;vega|OTTHUMG00000035680 +chr1 156693943 156694178 + gn|ISG20L2;ccds|CCDS1153;ens|ENSG00000143319;vega|OTTHUMG00000041301 +chr1 160209688 160209848 + gn|DCAF8;gn|RP11-574F21.3;ccds|CCDS1200;ens|ENSG00000132716;ens|ENSG00000258465;vega|OTTHUMG00000031604;vega|OTTHUMG00000171815 +chr1 166904659 166904733 + gn|ILDR2;ccds|CCDS1256;ens|ENSG00000143195;vega|OTTHUMG00000034320 +chr1 173517451 173517588 + gn|SLC9A11;ccds|CCDS1308;ens|ENSG00000162753;vega|OTTHUMG00000034800 +chr1 180904236 180904447 + gn|KIAA1614;ccds|CCDS41442;ens|ENSG00000135835;vega|OTTHUMG00000035183 +chr1 186328870 186328968 + gn|TPR;ccds|CCDS41446;ens|ENSG00000047410;vega|OTTHUMG00000035580 +chr1 201042672 201042801 + gn|CACNA1S;ccds|CCDS1407;ens|ENSG00000081248;vega|OTTHUMG00000035784 +chr1 204125903 204125997 + gn|REN;ccds|CCDS30981;ens|ENSG00000143839;vega|OTTHUMG00000036059 +chr1 207966755 207967002 + gn|CD46;ccds|CCDS1479;ccds|CCDS1480;ccds|CCDS1481;ccds|CCDS1482;ccds|CCDS1484;ccds|CCDS1485;ccds|CCDS31008;ccds|CCDS31009;ens|ENSG00000117335;vega|OTTHUMG00000036397 +chr1 218614453 218614522 + gn|TGFB2;ccds|CCDS1521;ccds|CCDS44318;ens|ENSG00000092969;vega|OTTHUMG00000039521 +chr1 226483637 226483705 + gn|LIN9;gn|PARP1;ccds|CCDS1553;ccds|CCDS1554;ens|ENSG00000143799;ens|ENSG00000183814;vega|OTTHUMG00000037556;vega|OTTHUMG00000037557 +chr1 231955220 231955316 + gn|DISC1;ccds|CCDS31055;ccds|CCDS31056;ccds|CCDS53482;ccds|CCDS53483;ccds|CCDS53484;ccds|CCDS53485;ens|ENSG00000162946;vega|OTTHUMG00000037835 +chr1 240497064 240497210 + gn|FMN2;ccds|CCDS31069;ens|ENSG00000155816;vega|OTTHUMG00000039883 +chr1 248457808 248458089 + gn|OR2T12;ccds|CCDS31110;ens|ENSG00000177201;vega|OTTHUMG00000040457 +chr2 10960803 10961007 + gn|PDIA6;ccds|CCDS1675;ens|ENSG00000143870;vega|OTTHUMG00000090479 +chr2 24398254 24398524 + gn|C2orf84;ccds|CCDS42659;ens|ENSG00000186453;vega|OTTHUMG00000151903 +chr2 27594094 27594284 + gn|SNX17;gn|TRIM54;ccds|CCDS1745;ccds|CCDS1746;ccds|CCDS1750;ens|ENSG00000115234;ens|ENSG00000138100;vega|OTTHUMG00000097078;vega|OTTHUMG00000097781 +chr2 32530901 32531123 + gn|YIPF4;ccds|CCDS1781;ens|ENSG00000119820;vega|OTTHUMG00000128452 +chr2 42530315 42530541 + gn|EML4;ccds|CCDS1807;ccds|CCDS46266;ens|ENSG00000143924;vega|OTTHUMG00000128603 +chr2 54001410 54001617 + gn|ASB3;gn|CHAC2;ccds|CCDS1846;ccds|CCDS1847;ccds|CCDS33196;ccds|CCDS54361;ens|ENSG00000115239;ens|ENSG00000143942;vega|OTTHUMG00000129279;vega|OTTHUMG00000151824 +chr2 62973702 62973871 + gn|AC092567.1;gn|EHBP1;ccds|CCDS1872;ccds|CCDS46299;ccds|CCDS46300;ens|ENSG00000115504;ens|ENSG00000211984;vega|OTTHUMG00000129453 +chr2 71415151 71415339 + gn|PAIP2B;ccds|CCDS46322;ens|ENSG00000124374;vega|OTTHUMG00000153284 +chr2 74834124 74834412 + gn|C2orf65;ccds|CCDS33229;ens|ENSG00000159374;vega|OTTHUMG00000152918 +chr2 87258859 87259192 + gn|AC125232.1;ens|ENSG00000231259;vega|OTTHUMG00000153315 +chr2 96919503 96919845 + gn|TMEM127;ccds|CCDS2018;ens|ENSG00000135956;vega|OTTHUMG00000130454 +chr2 100058741 100058814 + gn|REV1;ccds|CCDS2045;ccds|CCDS42722;ens|ENSG00000135945;vega|OTTHUMG00000130636 +chr2 110368938 110369062 + gn|SEPT10;ccds|CCDS42726;ccds|CCDS46383;ens|ENSG00000186522;vega|OTTHUMG00000154957 +chr2 116447220 116447358 + gn|DPP10;ccds|CCDS33278;ccds|CCDS46400;ccds|CCDS54388;ccds|CCDS54389;ens|ENSG00000175497;vega|OTTHUMG00000153294 +chr2 130777014 130777215 + gn|AC018865.8;ens|ENSG00000180178;vega|OTTHUMG00000153562 +chr2 136740909 136741078 + gn|DARS;ccds|CCDS2180;ens|ENSG00000115866;vega|OTTHUMG00000131741 +chr2 153475294 153475511 + gn|FMNL2;ccds|CCDS46429;ens|ENSG00000157827;vega|OTTHUMG00000154035 +chr2 165802008 165802270 + gn|SLC38A11;ccds|CCDS2224;ens|ENSG00000169507;vega|OTTHUMG00000132144 +chr2 171705679 171705936 + gn|GAD1;ccds|CCDS2239;ccds|CCDS2240;ens|ENSG00000128683;vega|OTTHUMG00000044175 +chr2 179323057 179323142 + gn|DFNB59;ccds|CCDS42787;ens|ENSG00000204311;vega|OTTHUMG00000154425 +chr2 179977350 179977416 + gn|SESTD1;ccds|CCDS33338;ens|ENSG00000187231;vega|OTTHUMG00000154554 +chr2 191125784 191125991 + gn|HIBCH;ccds|CCDS2304;ccds|CCDS46475;ens|ENSG00000198130;vega|OTTHUMG00000132673 +chr2 201687144 201687271 + gn|BZW1;ens|ENSG00000082153;vega|OTTHUMG00000154560 +chr2 207173411 207173718 + gn|ZDBF2;ccds|CCDS46501;ens|ENSG00000204186;vega|OTTHUMG00000154648 +chr2 216261739 216261811 + gn|FN1;ccds|CCDS2399;ccds|CCDS2400;ccds|CCDS42813;ccds|CCDS42814;ccds|CCDS46510;ccds|CCDS46512;ens|ENSG00000115414;vega|OTTHUMG00000133054 +chr2 220117444 220117806 + gn|TUBA4A;ccds|CCDS2438;ens|ENSG00000127824;vega|OTTHUMG00000133126 +chr2 228564228 228564322 + gn|SLC19A3;ccds|CCDS2468;ens|ENSG00000135917;vega|OTTHUMG00000133185 +chr2 234421100 234421373 + gn|USP40;ccds|CCDS46547;ens|ENSG00000085982;vega|OTTHUMG00000153199 +chr2 241494341 241494506 + gn|ANKMY1;ccds|CCDS2535;ccds|CCDS2536;ens|ENSG00000144504;vega|OTTHUMG00000133355 +chr3 5229513 5229710 + gn|EDEM1;ccds|CCDS33686;ens|ENSG00000134109;vega|OTTHUMG00000154896 +chr3 12533696 12533807 + gn|TSEN2;ccds|CCDS2611;ccds|CCDS46757;ccds|CCDS46758;ccds|CCDS46759;ens|ENSG00000154743;vega|OTTHUMG00000129765 +chr3 20216422 20216517 + gn|SGOL1;gn|SGOL1-AS1;ccds|CCDS2635;ccds|CCDS33716;ccds|CCDS46771;ccds|CCDS46772;ccds|CCDS46773;ccds|CCDS46774;ens|ENSG00000129810;ens|ENSG00000237485;vega|OTTHUMG00000130479;vega|OTTHUMG00000155532 +chr3 37042378 37042634 + gn|MLH1;ccds|CCDS2663;ccds|CCDS54562;ccds|CCDS54563;ens|ENSG00000076242;vega|OTTHUMG00000130797 +chr3 41977225 41977519 + gn|ULK4;ccds|CCDS43071;ens|ENSG00000168038;vega|OTTHUMG00000156210 +chr3 46750519 46750713 + gn|TMIE;ccds|CCDS43081;ens|ENSG00000181585;vega|OTTHUMG00000149909 +chr3 48688712 48688939 + gn|CELSR3;ccds|CCDS2775;ens|ENSG00000008300;vega|OTTHUMG00000133544 +chr3 50310674 50310945 + gn|SEMA3B;ens|ENSG00000012171;vega|OTTHUMG00000156970 +chr3 52545664 52545828 + gn|STAB1;ccds|CCDS33768;ens|ENSG00000010327;vega|OTTHUMG00000158574 +chr3 57557180 57557278 + gn|ARF4;ccds|CCDS2884;ens|ENSG00000168374;vega|OTTHUMG00000158601 +chr3 70008411 70008650 + gn|MITF;ccds|CCDS2913;ccds|CCDS43106;ccds|CCDS43107;ccds|CCDS46865;ccds|CCDS46866;ccds|CCDS54607;ens|ENSG00000187098;vega|OTTHUMG00000149921 +chr3 98541165 98541277 + gn|DCBLD2;ccds|CCDS46878;ens|ENSG00000057019;vega|OTTHUMG00000151985 +chr3 111835426 111835493 + gn|C3orf52;ccds|CCDS46887;ccds|CCDS54620;ens|ENSG00000114529;vega|OTTHUMG00000159230 +chr3 121341583 121342002 + gn|FBXO40;ccds|CCDS33835;ens|ENSG00000163833;vega|OTTHUMG00000159410 +chr3 125651394 125651600 + gn|ALG1L;ccds|CCDS33840;ens|ENSG00000189366;vega|OTTHUMG00000159588 +chr3 130402993 130403079 + gn|PIK3R4;ccds|CCDS3067;ens|ENSG00000196455;vega|OTTHUMG00000159645 +chr3 138289896 138290026 + gn|CEP70;ccds|CCDS3102;ens|ENSG00000114107;vega|OTTHUMG00000159891 +chr3 149258070 149258142 + gn|WWTR1;ccds|CCDS3144;ens|ENSG00000018408;vega|OTTHUMG00000159614 +chr3 159736828 159737092 + gn|CTD-2049J23.2;gn|CTD-2049J23.3;ens|ENSG00000242107;ens|ENSG00000244040;vega|OTTHUMG00000158951;vega|OTTHUMG00000158954 +chr3 173993064 173993224 + gn|NLGN1;ccds|CCDS3222;ens|ENSG00000169760;vega|OTTHUMG00000157005 +chr3 184035477 184035691 + gn|EIF2B5;gn|EIF4G1;ccds|CCDS3252;ccds|CCDS3259;ccds|CCDS3260;ccds|CCDS3261;ccds|CCDS46970;ccds|CCDS54687;ccds|CCDS54688;ens|ENSG00000114867;ens|ENSG00000145191;vega|OTTHUMG00000156784;vega|OTTHUMG00000156840 +chr3 193180415 193180545 + gn|ATP13A4;ccds|CCDS3304;ens|ENSG00000127249;vega|OTTHUMG00000074067 +chr3 197803691 197803879 + gn|AC073135.1;ens|ENSG00000226435;vega|OTTHUMG00000150228 +chr4 3088656 3088835 + gn|HTT;ccds|CCDS43206;ens|ENSG00000197386;vega|OTTHUMG00000159916 +chr4 9486048 9486167 + gn|RP11-1396O13.19;ens|ENSG00000250884;vega|OTTHUMG00000160195 +chr4 25351057 25351299 + gn|ZCCHC4;ccds|CCDS43218;ens|ENSG00000168228;vega|OTTHUMG00000160563 +chr4 42051449 42051555 + gn|SLC30A9;ccds|CCDS3465;ens|ENSG00000014824;vega|OTTHUMG00000099387 +chr4 56883747 56883806 + gn|CEP135;ccds|CCDS33986;ens|ENSG00000174799;vega|OTTHUMG00000160748 +chr4 71859471 71859708 + gn|DCK;gn|MOBKL1A;ccds|CCDS34002;ccds|CCDS3548;ens|ENSG00000156136;ens|ENSG00000173542;vega|OTTHUMG00000129908;vega|OTTHUMG00000160844 +chr4 79525354 79525571 + gn|ANXA3;ccds|CCDS3584;ens|ENSG00000138772;vega|OTTHUMG00000130198 +chr4 89345653 89345909 + gn|HERC6;ccds|CCDS47098;ccds|CCDS54777;ens|ENSG00000138642;vega|OTTHUMG00000160983 +chr4 104119392 104119544 + gn|CENPE;ccds|CCDS34042;ens|ENSG00000138778;vega|OTTHUMG00000160980 +chr4 115898230 115898499 + gn|NDST4;ccds|CCDS3706;ens|ENSG00000138653;vega|OTTHUMG00000132916 +chr4 130003433 130003580 + gn|SCLT1;ccds|CCDS3740;ens|ENSG00000151466;vega|OTTHUMG00000133346 +chr4 151246874 151247001 + gn|LRBA;ccds|CCDS3773;ens|ENSG00000198589;vega|OTTHUMG00000161443 +chr4 164050416 164050552 + gn|NAF1;ccds|CCDS3803;ccds|CCDS47159;ens|ENSG00000145414;vega|OTTHUMG00000161370 +chr4 184368409 184368480 + gn|CDKN2AIP;ccds|CCDS34110;ens|ENSG00000168564;vega|OTTHUMG00000160626 +chr5 1085479 1085660 + gn|SLC12A7;ccds|CCDS34129;ens|ENSG00000113504;vega|OTTHUMG00000161931 +chr5 17276869 17276973 + gn|BASP1;ccds|CCDS3888;ens|ENSG00000176788;vega|OTTHUMG00000131061 +chr5 37162477 37162755 + gn|C5orf42;ccds|CCDS34146;ens|ENSG00000197603;vega|OTTHUMG00000160492 +chr5 52385653 52385798 + gn|ITGA2;ccds|CCDS3957;ens|ENSG00000164171;vega|OTTHUMG00000131165 +chr5 64961665 64961929 + gn|C5orf44;gn|SGTB;ccds|CCDS3988;ccds|CCDS47221;ccds|CCDS47222;ccds|CCDS47223;ens|ENSG00000113597;ens|ENSG00000197860;vega|OTTHUMG00000097801;vega|OTTHUMG00000163649 +chr5 71757231 71757358 + gn|ZNF366;ccds|CCDS4015;ens|ENSG00000178175;vega|OTTHUMG00000100965 +chr5 79375027 79375129 + gn|CTD-2201I18.1;gn|THBS4;ccds|CCDS4049;ens|ENSG00000113296;ens|ENSG00000249825;vega|OTTHUMG00000108173;vega|OTTHUMG00000162569 +chr5 95128831 95129110 + gn|GLRX;gn|RHOBTB3;ccds|CCDS4077;ccds|CCDS4078;ens|ENSG00000164292;ens|ENSG00000173221;vega|OTTHUMG00000121167;vega|OTTHUMG00000121171 +chr5 113798650 113798850 + gn|KCNN2;ccds|CCDS4114;ccds|CCDS43352;ens|ENSG00000080709;vega|OTTHUMG00000128836 +chr5 129477298 129477533 + gn|CHSY3;gn|RP1-45I14.1;ccds|CCDS34223;ens|ENSG00000198108;ens|ENSG00000248610;vega|OTTHUMG00000163043;vega|OTTHUMG00000163044 +chr5 137013152 137013356 + gn|KLHL3;ccds|CCDS4192;ens|ENSG00000146021;vega|OTTHUMG00000129155 +chr5 140080288 140080478 + gn|ZMAT2;ccds|CCDS4239;ens|ENSG00000146007;vega|OTTHUMG00000129503 +chr5 141027462 141027534 + gn|ARAP3;gn|FCHSD1;ccds|CCDS4266;ccds|CCDS47295;ens|ENSG00000120318;ens|ENSG00000197948;vega|OTTHUMG00000129610;vega|OTTHUMG00000163762 +chr5 149436883 149437134 + gn|CSF1R;ccds|CCDS4302;ens|ENSG00000182578;vega|OTTHUMG00000130050 +chr5 156940316 156940583 + gn|ADAM19;ccds|CCDS4338;ens|ENSG00000135074;vega|OTTHUMG00000130242 +chr5 171773021 171773263 + gn|SH3PXD2B;ccds|CCDS34291;ens|ENSG00000174705;vega|OTTHUMG00000163280 +chr5 177546559 177546722 + gn|N4BP3;ccds|CCDS34307;ens|ENSG00000145911;vega|OTTHUMG00000163456 +chr6 2717394 2717646 + gn|MYLK4;ccds|CCDS34330;ens|ENSG00000145949;vega|OTTHUMG00000014121 +chr6 12135924 12136179 + gn|HIVEP1;ccds|CCDS43426;ens|ENSG00000095951;vega|OTTHUMG00000014265 +chr6 25850135 25850330 + gn|SLC17A3;ccds|CCDS4566;ccds|CCDS47385;ens|ENSG00000124564;vega|OTTHUMG00000014412 +chr6 29999663 29999807 + gn|ETF1P1;gn|ZNRD1-AS1;ens|ENSG00000204623;ens|ENSG00000232757;vega|OTTHUMG00000031109;vega|OTTHUMG00000031163 +chr6 31734889 31735021 + gn|C6orf27;ccds|CCDS4721;ens|ENSG00000204396;vega|OTTHUMG00000031132 +chr6 32977125 32977403 + gn|HLA-DOA;ccds|CCDS4763;ens|ENSG00000204252;vega|OTTHUMG00000031211 +chr6 35762820 35763146 + gn|CLPS;ccds|CCDS4811;ens|ENSG00000137392;vega|OTTHUMG00000014578 +chr6 41553065 41553279 + gn|FOXP4;ccds|CCDS34447;ccds|CCDS34448;ccds|CCDS4856;ens|ENSG00000137166;vega|OTTHUMG00000014679 +chr6 43581827 43582101 + gn|GTPBP2;gn|POLH;ccds|CCDS4902;ccds|CCDS4903;ens|ENSG00000170734;ens|ENSG00000172432;vega|OTTHUMG00000014743;vega|OTTHUMG00000014744 +chr6 52147388 52147631 + gn|MCM3;ccds|CCDS4940;ens|ENSG00000112118;vega|OTTHUMG00000014844 +chr6 69347219 69347342 + gn|BAI3;ccds|CCDS4968;ens|ENSG00000135298;vega|OTTHUMG00000014982 +chr6 82457766 82457970 + gn|FAM46A;ccds|CCDS34489;ens|ENSG00000112773;vega|OTTHUMG00000015097 +chr6 94066346 94066767 + gn|EPHA7;ccds|CCDS5031;ens|ENSG00000135333;vega|OTTHUMG00000015228 +chr6 109769822 109770023 + gn|MICAL1;ccds|CCDS5076;ccds|CCDS55047;ens|ENSG00000135596;vega|OTTHUMG00000015350 +chr6 119509484 119509802 + gn|MAN1A1;ccds|CCDS5122;ens|ENSG00000111885;vega|OTTHUMG00000015472 +chr6 133427255 133427477 + gn|NCRNA00326;ens|ENSG00000231023;vega|OTTHUMG00000015597 +chr6 145156888 145157126 + gn|UTRN;ccds|CCDS34547;ens|ENSG00000152818;vega|OTTHUMG00000015746 +chr6 155145278 155145592 + gn|SCAF8;ccds|CCDS5247;ens|ENSG00000213079;vega|OTTHUMG00000015877 +chr6 166843830 166844173 + gn|RPS6KA2;ccds|CCDS34570;ccds|CCDS5294;ens|ENSG00000071242;vega|OTTHUMG00000016007 +chr7 2558082 2558277 + gn|LFNG;ccds|CCDS34586;ccds|CCDS34587;ccds|CCDS55081;ccds|CCDS55082;ens|ENSG00000106003;vega|OTTHUMG00000152043 +chr7 7516650 7516819 + gn|COL28A1;ccds|CCDS43553;ens|ENSG00000215018;vega|OTTHUMG00000150034 +chr7 23309543 23309830 + gn|GPNMB;ccds|CCDS34610;ccds|CCDS5380;ens|ENSG00000136235;vega|OTTHUMG00000022811 +chr7 33078385 33078557 + gn|AVL9;gn|NT5C3;ccds|CCDS34613;ccds|CCDS34616;ccds|CCDS34617;ccds|CCDS55101;ens|ENSG00000105778;ens|ENSG00000122643;vega|OTTHUMG00000152929;vega|OTTHUMG00000152983 +chr7 44444103 44444252 + gn|NUDCD3;ccds|CCDS5490;ens|ENSG00000015676;vega|OTTHUMG00000129174 +chr7 56145758 56145883 + gn|SUMF2;ccds|CCDS43588;ccds|CCDS43589;ccds|CCDS47589;ccds|CCDS55111;ccds|CCDS5524;ens|ENSG00000129103;vega|OTTHUMG00000129373 +chr7 72734085 72734292 + gn|TRIM50;ccds|CCDS34654;ens|ENSG00000146755;vega|OTTHUMG00000156805 +chr7 76326941 76327009 + gn|AC004980.1;gn|UPK3B;ccds|CCDS5588;ccds|CCDS5589;ens|ENSG00000221249;ens|ENSG00000243566;vega|OTTHUMG00000149929 +chr7 90007385 90007455 + gn|GTPBP10;ccds|CCDS43614;ccds|CCDS5617;ens|ENSG00000105793;vega|OTTHUMG00000023655 +chr7 98258760 98258887 + gn|NPTX2;ccds|CCDS5657;ens|ENSG00000106236;vega|OTTHUMG00000154369 +chr7 100278832 100278962 + gn|GIGYF1;ccds|CCDS34708;ens|ENSG00000146830;vega|OTTHUMG00000157036 +chr7 102978041 102978292 + gn|DNAJC2;ccds|CCDS43628;ccds|CCDS47679;ens|ENSG00000105821;vega|OTTHUMG00000157202 +chr7 113518379 113518501 + gn|PPP1R3A;ccds|CCDS5759;ens|ENSG00000154415;vega|OTTHUMG00000156944 +chr7 128045771 128045954 + gn|IMPDH1;ccds|CCDS34748;ccds|CCDS34749;ccds|CCDS43643;ccds|CCDS47699;ccds|CCDS47700;ccds|CCDS55161;ens|ENSG00000106348;vega|OTTHUMG00000157713 +chr7 134928013 134928161 + gn|STRA8;ccds|CCDS5839;ens|ENSG00000146857;vega|OTTHUMG00000155415 +chr7 142111442 142111617 + gn|TRBV5-7;ens|ENSG00000211731;refseq|NG_001333 +chr7 149171458 149171535 + gn|ZNF746;ccds|CCDS55180;ccds|CCDS5897;ens|ENSG00000181220;vega|OTTHUMG00000158972 +chr7 151902192 151902358 + gn|MLL3;ccds|CCDS5931;ens|ENSG00000055609;vega|OTTHUMG00000150553 +chr8 6690174 6690293 + gn|XKR5;ens|ENSG00000186530;vega|OTTHUMG00000153652 +chr8 17407718 17407827 + gn|SLC7A2;ccds|CCDS34852;ccds|CCDS55203;ccds|CCDS6002;ens|ENSG00000003989;vega|OTTHUMG00000130819 +chr8 24350528 24350668 + gn|ADAM7;gn|RP11-561E1.1;gn|RP11-624C23.1;ccds|CCDS6045;ens|ENSG00000069206;ens|ENSG00000253535;ens|ENSG00000253643;vega|OTTHUMG00000097859;vega|OTTHUMG00000163790;vega|OTTHUMG00000163819 +chr8 36704322 36704434 + gn|KCNU1;ccds|CCDS55220;ens|ENSG00000215262;vega|OTTHUMG00000163981 +chr8 43033170 43033448 + gn|HGSNAT;ccds|CCDS47852;ens|ENSG00000165102;vega|OTTHUMG00000164102 +chr8 64099044 64099322 + gn|YTHDF3;ens|ENSG00000185728;vega|OTTHUMG00000164369 +chr8 79073298 79073435 + . +chr8 95504865 95505061 + gn|KIAA1429;ccds|CCDS34923;ccds|CCDS47894;ens|ENSG00000164944;vega|OTTHUMG00000164426 +chr8 105026679 105026842 + gn|RIMS2;ccds|CCDS43761;ccds|CCDS55269;ens|ENSG00000176406;vega|OTTHUMG00000162097 +chr8 124346483 124346648 + gn|ATAD2;ccds|CCDS6343;ens|ENSG00000156802;vega|OTTHUMG00000165090 +chr8 141549327 141549401 + gn|EIF2C2;ccds|CCDS55279;ccds|CCDS6380;ens|ENSG00000123908;vega|OTTHUMG00000164232 +chr8 145154732 145154806 + gn|SHARPIN;ccds|CCDS43777;ens|ENSG00000179526;vega|OTTHUMG00000165243 +chr9 3879594 3879661 + gn|GLIS3;ccds|CCDS43784;ccds|CCDS6451;ens|ENSG00000107249;vega|OTTHUMG00000019463 +chr9 18706618 18706960 + gn|ADAMTSL1;ccds|CCDS47954;ccds|CCDS6485;ens|ENSG00000178031;vega|OTTHUMG00000019604 +chr9 33675391 33675641 + gn|PTENP1;ens|ENSG00000237984;vega|OTTHUMG00000000410 +chr9 35826088 35826172 + gn|C9orf128;gn|TMEM8B;ccds|CCDS43799;ccds|CCDS43800;ccds|CCDS6595;ens|ENSG00000137103;ens|ENSG00000204930;vega|OTTHUMG00000019880;vega|OTTHUMG00000019885 +chr9 45002135 45002291 + gn|RP11-374M1.5;ens|ENSG00000235659;vega|OTTHUMG00000013244 +chr9 77376956 77377108 + gn|TRPM6;ccds|CCDS55318;ccds|CCDS55319;ccds|CCDS6647;ens|ENSG00000119121;vega|OTTHUMG00000020027 +chr9 90283451 90283642 + gn|DAPK1;ccds|CCDS43842;ens|ENSG00000196730;vega|OTTHUMG00000020150 +chr9 98206203 98206279 + gn|PTCH1;ccds|CCDS43851;ccds|CCDS47995;ccds|CCDS47996;ccds|CCDS6714;ens|ENSG00000185920;vega|OTTHUMG00000020280 +chr9 106898046 106898246 + gn|SMC2;ccds|CCDS35086;ens|ENSG00000136824;vega|OTTHUMG00000020401 +chr9 115249205 115249351 + gn|C9orf147;gn|KIAA1958;ccds|CCDS35108;ens|ENSG00000165185;ens|ENSG00000230185;vega|OTTHUMG00000020505;vega|OTTHUMG00000020508 +chr9 123924040 123924186 + gn|CNTRL;ccds|CCDS35118;ens|ENSG00000119397;vega|OTTHUMG00000020581 +chr9 129980893 129981121 + gn|RALGPS1;gn|RP13-225O21.2;ccds|CCDS35143;ccds|CCDS55344;ccds|CCDS55345;ccds|CCDS55346;ens|ENSG00000136828;ens|ENSG00000228487;vega|OTTHUMG00000020696;vega|OTTHUMG00000020697 +chr9 131848896 131849129 + gn|DOLPP1;ccds|CCDS48039;ccds|CCDS6918;ens|ENSG00000167130;vega|OTTHUMG00000020771 +chr9 135978130 135978360 + gn|RALGDS;ccds|CCDS43897;ccds|CCDS6959;ens|ENSG00000160271;vega|OTTHUMG00000020858 +chr9 139400068 139400348 + gn|NOTCH1;ccds|CCDS43905;ens|ENSG00000148400;vega|OTTHUMG00000020935 +chr10 255779 256058 + gn|ZMYND11;ccds|CCDS7052;ens|ENSG00000015171;vega|OTTHUMG00000017526 +chr10 12833115 12833283 + gn|CAMK1D;ccds|CCDS7091;ccds|CCDS7092;ens|ENSG00000183049;vega|OTTHUMG00000017683 +chr10 23270408 23270531 + gn|ARMC3;ccds|CCDS7142;ens|ENSG00000165309;vega|OTTHUMG00000017811 +chr10 33135254 33135424 + gn|C10orf68;ccds|CCDS31177;ens|ENSG00000150076;vega|OTTHUMG00000017926 +chr10 47271446 47271565 + gn|CTSL1P7;ens|ENSG00000232438;vega|OTTHUMG00000018112 +chr10 52419539 52419740 + gn|RP11-564C4.6;gn|RP11-564C4.7;ens|ENSG00000231345;ens|ENSG00000240660;vega|OTTHUMG00000018232;vega|OTTHUMG00000018236 +chr10 70506833 70506897 + gn|CCAR1;ccds|CCDS7282;ens|ENSG00000060339;vega|OTTHUMG00000018361 +chr10 75435483 75435631 + gn|AGAP5;gn|RP11-464F9.1;gn|RP11-464F9.9;ccds|CCDS44439;ens|ENSG00000172650;ens|ENSG00000242288;ens|ENSG00000251582;vega|OTTHUMG00000018473;vega|OTTHUMG00000018474;vega|OTTHUMG00000018482 +chr10 86132135 86132255 + gn|FAM190B;ccds|CCDS31235;ens|ENSG00000107771;vega|OTTHUMG00000018641 +chr10 95101594 95101777 + gn|MYOF;ccds|CCDS41550;ccds|CCDS41551;ens|ENSG00000138119;vega|OTTHUMG00000018772 +chr10 99512720 99512836 + gn|ZFYVE27;ccds|CCDS31262;ccds|CCDS31263;ccds|CCDS31264;ccds|CCDS53562;ccds|CCDS53563;ccds|CCDS53564;ccds|CCDS53565;ens|ENSG00000155256;vega|OTTHUMG00000018867 +chr10 104161780 104162079 + gn|NFKB2;ccds|CCDS41564;ccds|CCDS41565;ens|ENSG00000077150;vega|OTTHUMG00000018962 +chr10 114170125 114170422 + gn|ACSL5;ccds|CCDS7572;ccds|CCDS7573;ens|ENSG00000197142;vega|OTTHUMG00000019060 +chr10 121691806 121691878 + gn|SEC23IP;ccds|CCDS7618;ens|ENSG00000107651;vega|OTTHUMG00000019161 +chr10 129902794 129902868 + gn|MKI67;ccds|CCDS53588;ccds|CCDS7659;ens|ENSG00000148773;vega|OTTHUMG00000019255 +chr11 608884 609070 + gn|PHRF1;ccds|CCDS44507;ens|ENSG00000070047;vega|OTTHUMG00000165141 +chr11 2975655 2976013 + gn|NAP1L4;ccds|CCDS41599;ens|ENSG00000205531;vega|OTTHUMG00000011009 +chr11 6578205 6578346 + gn|DNHD1;ccds|CCDS44532;ccds|CCDS7767;ens|ENSG00000179532;vega|OTTHUMG00000133403 +chr11 11354163 11354431 + gn|GALNTL4;ccds|CCDS7807;ens|ENSG00000110328;vega|OTTHUMG00000165707 +chr11 18743423 18743662 + gn|IGSF22;gn|RP11-1081L13.4;ccds|CCDS41625;ens|ENSG00000179057;ens|ENSG00000254966;vega|OTTHUMG00000160502;vega|OTTHUMG00000166097 +chr11 33772999 33773185 + gn|FBXO3;ccds|CCDS44566;ccds|CCDS7887;ens|ENSG00000110429;vega|OTTHUMG00000166244 +chr11 46822694 46822886 + gn|CKAP5;ccds|CCDS31477;ccds|CCDS7924;ens|ENSG00000175216;vega|OTTHUMG00000166599 +chr11 56057861 56057940 + gn|OR8H1;ccds|CCDS31526;ens|ENSG00000181693;vega|OTTHUMG00000162671 +chr11 60642345 60642578 + gn|ZP1;ccds|CCDS31572;ens|ENSG00000149506;vega|OTTHUMG00000167797 +chr11 62656046 62656190 + gn|SLC3A2;ccds|CCDS31588;ccds|CCDS31589;ccds|CCDS31590;ccds|CCDS8039;ens|ENSG00000168003;vega|OTTHUMG00000074091 +chr11 64889719 64889883 + gn|FAU;gn|MRPL49;gn|SYVN1;ccds|CCDS31605;ccds|CCDS8095;ccds|CCDS8096;ccds|CCDS8097;ens|ENSG00000149792;ens|ENSG00000149806;ens|ENSG00000162298;vega|OTTHUMG00000165606;vega|OTTHUMG00000165607;vega|OTTHUMG00000165608 +chr11 66391606 66391683 + gn|RBM14;gn|RBM14-RBM4;ccds|CCDS41676;ccds|CCDS8147;ens|ENSG00000239306;ens|ENSG00000248643;vega|OTTHUMG00000140380;vega|OTTHUMG00000160813 +chr11 70261760 70261870 + gn|CTTN;ccds|CCDS41680;ccds|CCDS53676;ccds|CCDS8197;ens|ENSG00000085733;vega|OTTHUMG00000134307 +chr11 74952615 74952727 + . +chr11 86111623 86111857 + gn|CCDC81;ccds|CCDS53691;ccds|CCDS8276;ens|ENSG00000149201;vega|OTTHUMG00000167213 +chr11 96124721 96124934 + gn|JRKL;ccds|CCDS8308;ens|ENSG00000183340;vega|OTTHUMG00000154950 +chr11 110036172 110036271 + gn|ZC3H12C;ccds|CCDS44727;ens|ENSG00000149289;vega|OTTHUMG00000166572 +chr11 117299354 117299539 + gn|DSCAML1;ccds|CCDS8384;ens|ENSG00000177103;vega|OTTHUMG00000167071 +chr11 120198006 120198080 + gn|TMEM136;ccds|CCDS8432;ens|ENSG00000181264;vega|OTTHUMG00000166142 +chr11 126145185 126145369 + gn|FOXRED1;ccds|CCDS8471;ens|ENSG00000110074;vega|OTTHUMG00000165827 +chr12 1943348 1943789 + gn|CACNA2D4;gn|LRTM2;ccds|CCDS31726;ccds|CCDS44785;ens|ENSG00000151062;ens|ENSG00000166159;vega|OTTHUMG00000168088;vega|OTTHUMG00000168111 +chr12 6851932 6852260 + . +chr12 9723415 9723534 + gn|RP11-726G1.1;ens|ENSG00000214776;vega|OTTHUMG00000164997 +chr12 18892036 18892113 + gn|CAPZA3;ccds|CCDS8681;ens|ENSG00000177938;vega|OTTHUMG00000169001 +chr12 30814170 30814286 + gn|IPO8;ccds|CCDS53773;ccds|CCDS8719;ens|ENSG00000133704;vega|OTTHUMG00000169172 +chr12 44192281 44192436 + gn|TWF1;ccds|CCDS31780;ens|ENSG00000151239;vega|OTTHUMG00000169426 +chr12 49579040 49579138 + gn|TUBA1A;ccds|CCDS8781;ens|ENSG00000167552;vega|OTTHUMG00000169511 +chr12 52774262 52774350 + gn|KRT84;gn|RP3-416H24.4;ccds|CCDS8825;ens|ENSG00000161849;ens|ENSG00000258253;vega|OTTHUMG00000169634;vega|OTTHUMG00000169635 +chr12 54963266 54963444 + gn|PDE1B;ccds|CCDS53800;ccds|CCDS8882;ens|ENSG00000123360;vega|OTTHUMG00000169844 +chr12 57113140 57113290 + gn|NACA;ccds|CCDS31837;ccds|CCDS44925;ens|ENSG00000196531;vega|OTTHUMG00000170033 +chr12 63962904 63963107 + gn|DPY19L2;ccds|CCDS31851;ens|ENSG00000177990;vega|OTTHUMG00000168712 +chr12 72771765 72771930 + gn|TRHDE;ccds|CCDS9004;ens|ENSG00000072657;vega|OTTHUMG00000169642 +chr12 89984211 89984285 + gn|ATP2B1;gn|RP11-981P6.1;ccds|CCDS41817;ccds|CCDS9035;ens|ENSG00000070961;ens|ENSG00000258302;vega|OTTHUMG00000169945;vega|OTTHUMG00000169946 +chr12 101598200 101598355 + gn|SLC5A8;ccds|CCDS9080;ens|ENSG00000256870;vega|OTTHUMG00000170499 +chr12 108917210 108917419 + gn|FICD;gn|SART3;ccds|CCDS9116;ccds|CCDS9117;ens|ENSG00000075856;ens|ENSG00000198855;vega|OTTHUMG00000169449;vega|OTTHUMG00000169569 +chr12 112681129 112681532 + gn|C12orf51;ens|ENSG00000173064;vega|OTTHUMG00000150719 +chr12 120783891 120784078 + gn|MSI1;ccds|CCDS9196;ens|ENSG00000135097;vega|OTTHUMG00000169344 +chr12 123893050 123893118 + gn|SETD8;ccds|CCDS9247;ens|ENSG00000183955;vega|OTTHUMG00000150477 +chr12 133150632 133150777 + gn|FBRSL1;ccds|CCDS45010;ens|ENSG00000112787;vega|OTTHUMG00000167991 +chr13 25285422 25285633 + gn|ATP12A;ccds|CCDS31948;ccds|CCDS53858;ens|ENSG00000075673;vega|OTTHUMG00000016588 +chr13 33347237 33347508 + gn|PDS5B;ccds|CCDS41878;ens|ENSG00000083642;vega|OTTHUMG00000016704 +chr13 45515232 45515497 + gn|NUFIP1;ccds|CCDS9393;ens|ENSG00000083635;vega|OTTHUMG00000016842 +chr13 53211479 53211651 + gn|HNRNPA1L2;gn|RP11-78J21.2;ccds|CCDS31980;ens|ENSG00000139675;ens|ENSG00000250299;vega|OTTHUMG00000016972;vega|OTTHUMG00000016973 +chr13 94482512 94482587 + gn|GPC6;gn|GPC6-AS2;ccds|CCDS9469;ens|ENSG00000183098;ens|ENSG00000224394;vega|OTTHUMG00000017203;vega|OTTHUMG00000017205 +chr13 107862947 107863110 + gn|FAM155A;ccds|CCDS32006;ens|ENSG00000204442;vega|OTTHUMG00000017326 +chr14 20077938 20078020 + gn|RP11-597A11.1;ens|ENSG00000258027;vega|OTTHUMG00000170544 +chr14 23299057 23299474 + gn|MRPL52;gn|SLC7A7;ccds|CCDS41917;ccds|CCDS41918;ccds|CCDS9574;ccds|CCDS9575;ccds|CCDS9576;ens|ENSG00000155465;ens|ENSG00000172590;vega|OTTHUMG00000028692;vega|OTTHUMG00000028703 +chr14 24775506 24775785 + gn|C14orf21;gn|CIDEB;gn|LTB4R2;ccds|CCDS32056;ccds|CCDS9624;ccds|CCDS9625;ens|ENSG00000136305;ens|ENSG00000196943;ens|ENSG00000213906;vega|OTTHUMG00000029342;vega|OTTHUMG00000029345;vega|OTTHUMG00000171555 +chr14 37180443 37180746 + gn|SLC25A21;ccds|CCDS9663;ens|ENSG00000183032;vega|OTTHUMG00000140250 +chr14 52471039 52471161 + gn|C14orf166;ccds|CCDS9705;ens|ENSG00000087302;vega|OTTHUMG00000152332 +chr14 60575103 60575215 + gn|C14orf135;ens|ENSG00000126773;vega|OTTHUMG00000150361 +chr14 68126557 68126699 + gn|RP11-1012A1.4;gn|VTI1B;ccds|CCDS9786;ens|ENSG00000100568;ens|ENSG00000258466;vega|OTTHUMG00000171251;vega|OTTHUMG00000171262 +chr14 74952974 74953150 + gn|NPC2;ccds|CCDS32121;ens|ENSG00000119655;vega|OTTHUMG00000171212 +chr14 88454764 88454878 + gn|GALC;ccds|CCDS9878;ens|ENSG00000054983;vega|OTTHUMG00000028646 +chr14 94697066 94697141 + gn|PPP4R4;ccds|CCDS9921;ccds|CCDS9922;ens|ENSG00000119698;vega|OTTHUMG00000171344 +chr14 102605503 102605704 + gn|HSP90AA1;ccds|CCDS32160;ccds|CCDS9967;ens|ENSG00000080824;vega|OTTHUMG00000171752 +chr14 106053312 106053462 + gn|IGHA2;ens|ENSG00000211890;refseq|NG_001019 +chr15 24927790 24927859 + gn|C15orf2;ccds|CCDS10015;ens|ENSG00000185823;vega|OTTHUMG00000129179 +chr15 31251002 31251332 + gn|CHRNA7;gn|MTMR10;ccds|CCDS10027;ccds|CCDS45204;ccds|CCDS53924;ens|ENSG00000166912;ens|ENSG00000175344;vega|OTTHUMG00000129285 +chr15 40033946 40034140 + gn|FSIP1;ccds|CCDS10050;ens|ENSG00000150667;vega|OTTHUMG00000129874 +chr15 42175592 42175733 + gn|SPTBN5;ens|ENSG00000137877 +chr15 44003974 44004109 + gn|STRC;gn|STRCP1;ccds|CCDS10098;ens|ENSG00000166763;ens|ENSG00000242866;vega|OTTHUMG00000059899;vega|OTTHUMG00000059934 +chr15 50570627 50570804 + gn|GABPB1;ccds|CCDS10135;ccds|CCDS10136;ccds|CCDS32239;ccds|CCDS45258;ens|ENSG00000104064;vega|OTTHUMG00000131642 +chr15 58724134 58724365 + gn|LIPC;ccds|CCDS10166;ens|ENSG00000166035;vega|OTTHUMG00000132632 +chr15 65236809 65236993 + gn|ANKDD1A;ccds|CCDS10197;ens|ENSG00000166839;vega|OTTHUMG00000133051 +chr15 72069702 72069800 + gn|THSD4;ccds|CCDS10238;ens|ENSG00000187720;vega|OTTHUMG00000133389 +chr15 75890913 75891061 + gn|SNUPN;ccds|CCDS10281;ens|ENSG00000169371;vega|OTTHUMG00000142833 +chr15 82532734 82532937 + gn|EFTUD1;ccds|CCDS42070;ccds|CCDS42071;ens|ENSG00000140598 +chr15 88472317 88472544 + gn|NTRK3;ccds|CCDS10340;ccds|CCDS32322;ccds|CCDS32323;ens|ENSG00000140538;vega|OTTHUMG00000148677 +chr15 94841730 94841903 + gn|MCTP2;ccds|CCDS32338;ccds|CCDS53975;ccds|CCDS53976;ens|ENSG00000140563;vega|OTTHUMG00000171751 +chr16 684564 684710 + gn|C16orf13;gn|Z84479.1;ccds|CCDS32352;ccds|CCDS42090;ccds|CCDS42091;ccds|CCDS45367;ccds|CCDS45368;ens|ENSG00000130731;ens|ENSG00000249124;vega|OTTHUMG00000047855 +chr16 2051524 2051789 + gn|ZNF598;ens|ENSG00000167962 +chr16 3447518 3447653 + gn|ZNF434;ccds|CCDS10503;ens|ENSG00000140987;vega|OTTHUMG00000129357 +chr16 11348348 11348499 + gn|SOCS1;ccds|CCDS10546;ens|ENSG00000185338;vega|OTTHUMG00000129792 +chr16 16462454 16462556 + gn|RP11-958N24.2;ens|ENSG00000227827;vega|OTTHUMG00000166306 +chr16 21475331 21475405 + gn|CTD-2547E10.2;ens|ENSG00000180747;vega|OTTHUMG00000164329 +chr16 27561150 27561250 + gn|GTF3C1;ccds|CCDS32414;ens|ENSG00000077235 +chr16 30214794 30215000 + gn|SULT1A3;ccds|CCDS10674;ens|ENSG00000213599;vega|OTTHUMG00000048083 +chr16 32014571 32014728 + . +chr16 53320055 53320235 + gn|CHD9;ccds|CCDS45485;ens|ENSG00000177200 +chr16 58314336 58314569 + gn|CCDC113;gn|PRSS54;ccds|CCDS10795;ccds|CCDS32463;ccds|CCDS45497;ens|ENSG00000103021;ens|ENSG00000103023;vega|OTTHUMG00000133489 +chr16 67858981 67859061 + gn|TSNAXIP1;ccds|CCDS10846;ens|ENSG00000102904;vega|OTTHUMG00000137545 +chr16 70817291 70817500 + gn|VAC14;ccds|CCDS10896;ens|ENSG00000103043;vega|OTTHUMG00000137583 +chr16 76574556 76574712 + gn|CNTNAP4;ccds|CCDS10924;ens|ENSG00000152910;vega|OTTHUMG00000137617 +chr16 87885462 87885536 + gn|SLC7A5;ccds|CCDS10964;ens|ENSG00000103257;vega|OTTHUMG00000137658 +chr17 900373 900645 + gn|TIMM22;ccds|CCDS32521;ens|ENSG00000177370 +chr17 3922916 3923093 + gn|ZZEF1;ccds|CCDS11043;ens|ENSG00000074755;vega|OTTHUMG00000090741 +chr17 6909724 6909858 + gn|AC027763.2;gn|ALOX12;ccds|CCDS11084;ens|ENSG00000108839;ens|ENSG00000215067;vega|OTTHUMG00000102088;vega|OTTHUMG00000132903 +chr17 7847935 7848002 + gn|CNTROB;ccds|CCDS32557;ens|ENSG00000170037 +chr17 11835239 11835392 + gn|DNAH9;ccds|CCDS11160;ccds|CCDS11161;ens|ENSG00000007174;vega|OTTHUMG00000130383 +chr17 18148648 18148772 + gn|FLII;ccds|CCDS11192;ens|ENSG00000177731;vega|OTTHUMG00000059389 +chr17 26073092 26073287 + . +chr17 28776500 28776824 + gn|CPD;ccds|CCDS11257;ens|ENSG00000108582;vega|OTTHUMG00000132797 +chr17 34171311 34171386 + gn|TAF15;ccds|CCDS32623;ens|ENSG00000172660 +chr17 37580824 37581009 + gn|MED1;ccds|CCDS11336;ens|ENSG00000125686;vega|OTTHUMG00000133216 +chr17 39742781 39742851 + gn|KRT14;ccds|CCDS11400;ens|ENSG00000186847;vega|OTTHUMG00000133426 +chr17 41361894 41362131 + gn|NBR1;ccds|CCDS45694;ens|ENSG00000188554 +chr17 44104635 44104775 + gn|MAPT;ccds|CCDS11499;ccds|CCDS11500;ccds|CCDS11501;ccds|CCDS11502;ccds|CCDS45715;ccds|CCDS45716;ens|ENSG00000186868;vega|OTTHUMG00000168833 +chr17 48153598 48153826 + gn|ITGA3;ccds|CCDS11557;ccds|CCDS11558;ens|ENSG00000005884;vega|OTTHUMG00000161890 +chr17 56356851 56357011 + gn|MPO;ccds|CCDS11604;ens|ENSG00000005381 +chr17 60493530 60493713 + gn|EFCAB3;ccds|CCDS11632;ccds|CCDS45751;ens|ENSG00000172421;vega|OTTHUMG00000164527 +chr17 65105309 65105438 + gn|HELZ;ccds|CCDS42374;ens|ENSG00000198265 +chr17 72840409 72840683 + gn|GRIN2C;ccds|CCDS32724;ens|ENSG00000161509;vega|OTTHUMG00000044524 +chr17 74382415 74382502 + gn|SPHK1;ccds|CCDS11744;ccds|CCDS45785;ens|ENSG00000176170 +chr17 78312950 78313241 + gn|RNF213;ccds|CCDS11772;ccds|CCDS32761;ens|ENSG00000173821;vega|OTTHUMG00000161415 +chr17 80706681 80706957 + gn|FN3K;ccds|CCDS11818;ens|ENSG00000167363 +chr18 10680323 10680422 + gn|FAM38B;ens|ENSG00000154864;vega|OTTHUMG00000131634 +chr18 21495178 21495461 + gn|LAMA3;ccds|CCDS11880;ccds|CCDS42419;ccds|CCDS45838;ens|ENSG00000053747;vega|OTTHUMG00000131874 +chr18 34353571 34353729 + gn|FHOD3;ccds|CCDS32816;ens|ENSG00000134775 +chr18 51880812 51880988 + gn|STARD6;ccds|CCDS11955;ens|ENSG00000174448;vega|OTTHUMG00000132702 +chr18 67718567 67718811 + gn|RTTN;ccds|CCDS42443;ens|ENSG00000176225 +chr19 1009095 1009198 + gn|GRIN3B;ccds|CCDS32861;ens|ENSG00000116032;vega|OTTHUMG00000044546 +chr19 2900938 2901121 + gn|ZNF57;gn|ZNF77;ccds|CCDS12098;ccds|CCDS12099;ens|ENSG00000171970;ens|ENSG00000175691;vega|OTTHUMG00000164485 +chr19 5622476 5622662 + gn|SAFB2;ccds|CCDS32879;ens|ENSG00000130254 +chr19 7965391 7965670 + gn|AC010336.2;gn|LRRC8E;ccds|CCDS12189;ens|ENSG00000171017;ens|ENSG00000214248 +chr19 10203911 10204031 + gn|ANGPTL6;gn|C19orf66;ccds|CCDS12224;ccds|CCDS45957;ens|ENSG00000130812;ens|ENSG00000130813 +chr19 12126408 12126479 + gn|CTD-2006C1.2;gn|ZNF433;ccds|CCDS45983;ens|ENSG00000197647;ens|ENSG00000219665;vega|OTTHUMG00000156427;vega|OTTHUMG00000156428 +chr19 14273906 14274084 + gn|AC022098.3;gn|LPHN1;ccds|CCDS12307;ccds|CCDS32928;ens|ENSG00000072071;ens|ENSG00000248045 +chr19 17169326 17169640 + gn|HAUS8;ccds|CCDS32948;ccds|CCDS46009;ens|ENSG00000131351 +chr19 19162729 19163049 + gn|ARMC6;ccds|CCDS32965;ens|ENSG00000105676;vega|OTTHUMG00000169271 +chr19 31770245 31770365 + gn|TSHZ3;ccds|CCDS12421;ens|ENSG00000121297;vega|OTTHUMG00000150184 +chr19 36228460 36228657 + gn|AD000671.1;ccds|CCDS46055;ens|ENSG00000105663;vega|OTTHUMG00000048119 +chr19 38886375 38886700 + gn|SPRED3;ccds|CCDS42560;ccds|CCDS42561;ens|ENSG00000188766 +chr19 40903143 40903258 + gn|PRX;ccds|CCDS12556;ccds|CCDS33028;ens|ENSG00000105227 +chr19 43093626 43093833 + gn|CEACAM8;ccds|CCDS12610;ens|ENSG00000124469;vega|OTTHUMG00000151124 +chr19 45911340 45911572 + gn|CD3EAP;gn|ERCC1;ccds|CCDS12661;ccds|CCDS12662;ccds|CCDS12663;ccds|CCDS54279;ens|ENSG00000012061;ens|ENSG00000117877 +chr19 48668731 48668932 + gn|LIG1;ccds|CCDS12711;ens|ENSG00000105486 +chr19 50200476 50200784 + gn|CPT1C;ccds|CCDS12779;ccds|CCDS46147;ens|ENSG00000169169 +chr19 52115423 52115619 + gn|SIGLEC5;ccds|CCDS33088;ens|ENSG00000105501;vega|OTTHUMG00000165510 +chr19 54711188 54711404 + gn|RPS9;ccds|CCDS12884;ens|ENSG00000170889;vega|OTTHUMG00000066618 +chr19 56321638 56321765 + gn|NLRP11;ccds|CCDS12935;ens|ENSG00000179873 +chr19 58928697 58928837 + gn|ZNF584;ccds|CCDS12979;ens|ENSG00000171574 +chr20 3673452 3673545 + gn|SIGLEC1;ccds|CCDS13060;ens|ENSG00000088827;vega|OTTHUMG00000031757 +chr20 17495277 17495509 + gn|BFSP1;ccds|CCDS13126;ccds|CCDS54448;ens|ENSG00000125864;vega|OTTHUMG00000031940 +chr20 30521494 30521720 + gn|TTLL9;ccds|CCDS42863;ens|ENSG00000131044 +chr20 33725614 33725735 + gn|EDEM2;ccds|CCDS13247;ccds|CCDS46592;ens|ENSG00000088298;vega|OTTHUMG00000032322 +chr20 37263228 37263458 + gn|ARHGAP40;ens|ENSG00000124143;vega|OTTHUMG00000032453 +chr20 44574284 44574493 + gn|PCIF1;ccds|CCDS13388;ens|ENSG00000100982;vega|OTTHUMG00000032635 +chr20 50803339 50803433 + gn|ZFP64;ccds|CCDS13439;ccds|CCDS13440;ccds|CCDS13441;ccds|CCDS13442;ens|ENSG00000020256;vega|OTTHUMG00000032756 +chr20 60905679 60905768 + gn|LAMA5;ccds|CCDS33502;ens|ENSG00000130702;vega|OTTHUMG00000032908 +chr21 14758525 14758776 + gn|ANKRD30BP1;ens|ENSG00000175302;vega|OTTHUMG00000074201 +chr21 33974479 33974709 + gn|C21orf59;ccds|CCDS13617;ens|ENSG00000159079;vega|OTTHUMG00000064928 +chr21 40685119 40685254 + gn|BRWD1;gn|TMPRSS3;ccds|CCDS13662;ccds|CCDS13663;ccds|CCDS13686;ccds|CCDS33557;ccds|CCDS42939;ens|ENSG00000160183;ens|ENSG00000185658;vega|OTTHUMG00000066030;vega|OTTHUMG00000086796 +chr21 45760249 45760448 + gn|AP001062.9;ens|ENSG00000232969;vega|OTTHUMG00000086923 +chr22 18077238 18077412 + gn|ATP6V1E1;ccds|CCDS13745;ccds|CCDS42977;ccds|CCDS42978;ens|ENSG00000131100;vega|OTTHUMG00000059320 +chr22 21088691 21088886 + gn|PI4KA;ccds|CCDS33603;ccds|CCDS46667;ens|ENSG00000241973;vega|OTTHUMG00000167440 +chr22 24109462 24109559 + gn|CHCHD10;ccds|CCDS13815;ens|ENSG00000250479;vega|OTTHUMG00000150736 +chr22 29737680 29737813 + gn|AP1B1;ccds|CCDS13855;ccds|CCDS13856;ccds|CCDS54515;ens|ENSG00000100280;vega|OTTHUMG00000151109 +chr22 33672976 33673184 + gn|LARGE;ccds|CCDS13912;ens|ENSG00000133424;vega|OTTHUMG00000150914 +chr22 38793230 38793490 + gn|CSNK1E;gn|RP3-449O17.1;ccds|CCDS13970;ens|ENSG00000213923;ens|ENSG00000244627;vega|OTTHUMG00000151135;vega|OTTHUMG00000151245 +chr22 42092584 42092700 + gn|C22orf46;ccds|CCDS46717;ens|ENSG00000184208;vega|OTTHUMG00000151188 +chr22 46930854 46930931 + gn|CELSR1;ccds|CCDS14076;ens|ENSG00000075275;vega|OTTHUMG00000150423 +chrX 1720023 1720166 + gn|AKAP17A;ccds|CCDS14116;ens|ENSG00000197976;vega|OTTHUMG00000021063 +chrX 16685531 16685606 + gn|CTPS2;ccds|CCDS14175;ens|ENSG00000047230;vega|OTTHUMG00000021193 +chrX 27481016 27481269 + gn|GS1-309P15.2;ens|ENSG00000224960;vega|OTTHUMG00000021300 +chrX 44132089 44132158 + gn|EFHC2;ccds|CCDS55405;ens|ENSG00000183690;vega|OTTHUMG00000021393 +chrX 49076868 49077038 + gn|CACNA1F;ccds|CCDS35253;ens|ENSG00000102001;vega|OTTHUMG00000022703 +chrX 54324658 54324839 + gn|WNK3;ccds|CCDS14357;ccds|CCDS35302;ens|ENSG00000196632;vega|OTTHUMG00000021626 +chrX 70093005 70093203 + gn|TEX11;ccds|CCDS35323;ccds|CCDS43968;ens|ENSG00000120498;vega|OTTHUMG00000021782 +chrX 77112257 77112439 + gn|MAGT1;ccds|CCDS14436;ens|ENSG00000102158;vega|OTTHUMG00000021882 +chrX 100880023 100880097 + gn|ARMCX3;ccds|CCDS14489;ens|ENSG00000102401;vega|OTTHUMG00000022035 +chrX 107821164 107821229 + gn|COL4A5;ccds|CCDS14543;ccds|CCDS35366;ens|ENSG00000188153;vega|OTTHUMG00000022182 +chrX 119391984 119392122 + gn|ZBTB33;ccds|CCDS14596;ens|ENSG00000177485;vega|OTTHUMG00000171159 +chrX 131573354 131573687 + gn|MBNL3;ccds|CCDS14633;ccds|CCDS14634;ccds|CCDS55492;ccds|CCDS55493;ccds|CCDS55494;ens|ENSG00000076770;vega|OTTHUMG00000022426 +chrX 148037056 148037332 + gn|AFF2;ccds|CCDS14684;ccds|CCDS55520;ccds|CCDS55521;ens|ENSG00000155966;vega|OTTHUMG00000022613 +chrX 153209957 153210027 + gn|RENBP;ccds|CCDS14738;ens|ENSG00000102032;vega|OTTHUMG00000024224 +chrY 2847386 2847504 + gn|ZFY;gn|ZFY-AS1;ccds|CCDS14774;ccds|CCDS48200;ccds|CCDS48201;ens|ENSG00000067646;ens|ENSG00000233070;vega|OTTHUMG00000036156;vega|OTTHUMG00000036159 +chrY 27190032 27190210 + gn|BPY2C;ccds|CCDS44030;ens|ENSG00000185894;vega|OTTHUMG00000045199 diff --git a/adam-core/src/test/scala/org/bdgenomics/adam/rdd/ADAMContextSuite.scala b/adam-core/src/test/scala/org/bdgenomics/adam/rdd/ADAMContextSuite.scala index e594040ee8..8a8514ff36 100644 --- a/adam-core/src/test/scala/org/bdgenomics/adam/rdd/ADAMContextSuite.scala +++ b/adam-core/src/test/scala/org/bdgenomics/adam/rdd/ADAMContextSuite.scala @@ -201,6 +201,23 @@ class ADAMContextSuite extends ADAMFunSuite { assert(annot.count === 10) } + sparkTest("Can read a .interval_list file") { + val path = testFile("features/SeqCap_EZ_Exome_v3.hg19.interval_list") + val annot: RDD[Feature] = sc.loadFeatures(path) + assert(annot.count == 369) + val arr = annot.collect + + val first = arr.find(f => f.getContig.getContigName == "chr1" && f.getStart == 14416L && f.getEnd == 14499L).get + assert(first.getContig.getContigLength == 249250621L) + assert(first.getContig.getReferenceURL == "file:/gs01/projects/ngs/resources/gatk/2.3/ucsc.hg19.parmasked.fasta") + assert(first.getContig.getContigMD5 == "1b22b98cdeb4a9304cb5d48026a85128") + + val last = arr.find(f => f.getContig.getContigName == "chrY" && f.getStart == 27190032L && f.getEnd == 27190210L).get + assert(last.getContig.getContigLength == 59373566L) + assert(last.getContig.getReferenceURL == "file:/gs01/projects/ngs/resources/gatk/2.3/ucsc.hg19.parmasked.fasta") + assert(last.getContig.getContigMD5 == "3393b0779f142dc59f4cfcc22b61c1ee") + } + sparkTest("can read a small .vcf file") { val path = ClassLoader.getSystemClassLoader.getResource("small.vcf").getFile From 7dfb1172cb04e06254149de7a7422e7612fd39b1 Mon Sep 17 00:00:00 2001 From: Ryan Williams Date: Sun, 31 May 2015 04:14:33 +0000 Subject: [PATCH 2/6] nit: fix deprecation warning --- adam-core/src/main/scala/org/bdgenomics/adam/rdd/Coverage.scala | 1 + 1 file changed, 1 insertion(+) diff --git a/adam-core/src/main/scala/org/bdgenomics/adam/rdd/Coverage.scala b/adam-core/src/main/scala/org/bdgenomics/adam/rdd/Coverage.scala index 5686f4ee27..f90e0adde5 100644 --- a/adam-core/src/main/scala/org/bdgenomics/adam/rdd/Coverage.scala +++ b/adam-core/src/main/scala/org/bdgenomics/adam/rdd/Coverage.scala @@ -147,6 +147,7 @@ class Coverage(val window: Long) extends Serializable { OrientedPoint(r1.referenceName, r1.end, false), OrientedPoint(r2.referenceName, r2.start, true)) } + case _ => Seq() } val paired = points.pair() val pairedAndFiltered = paired.filter(p => From add7458bc85a953cbeee526c5328c6ea689527cf Mon Sep 17 00:00:00 2001 From: Ryan Williams Date: Sun, 31 May 2015 04:34:14 +0000 Subject: [PATCH 3/6] allow ',' delimiters in interval-list lines --- .../org/bdgenomics/adam/rdd/features/FeatureParser.scala | 2 +- .../features/SeqCap_EZ_Exome_v3.hg19.interval_list | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/adam-core/src/main/scala/org/bdgenomics/adam/rdd/features/FeatureParser.scala b/adam-core/src/main/scala/org/bdgenomics/adam/rdd/features/FeatureParser.scala index da4d90c2f9..9adcf002a0 100644 --- a/adam-core/src/main/scala/org/bdgenomics/adam/rdd/features/FeatureParser.scala +++ b/adam-core/src/main/scala/org/bdgenomics/adam/rdd/features/FeatureParser.scala @@ -150,7 +150,7 @@ class IntervalListParser extends Serializable { (if (fields.length < 5 || fields(4) == ".") { (Nil, Map()) } else { - val a = fields(4).split(';').map(field => field.split('|') match { + val a = fields(4).split(Array(';', ',')).map(field => field.split('|') match { case Array(key, value) => key match { case "gn" | "ens" | "vega" | "ccds" => (Some(Dbxref.newBuilder().setDb(key).setAccession(value).build()), None) diff --git a/adam-core/src/test/resources/features/SeqCap_EZ_Exome_v3.hg19.interval_list b/adam-core/src/test/resources/features/SeqCap_EZ_Exome_v3.hg19.interval_list index 8a31eb493f..52412c21b3 100644 --- a/adam-core/src/test/resources/features/SeqCap_EZ_Exome_v3.hg19.interval_list +++ b/adam-core/src/test/resources/features/SeqCap_EZ_Exome_v3.hg19.interval_list @@ -104,9 +104,9 @@ chr1 32148680 32148835 + gn|COL16A1;ccds|CCDS41297;ens|ENSG00000084636;vega|OTTH chr1 36022702 36022913 + gn|KIAA0319L;ccds|CCDS390;ens|ENSG00000142687;vega|OTTHUMG00000004370 chr1 39980887 39981050 + gn|BMP8A;gn|OXCT2P1;ccds|CCDS437;ens|ENSG00000183682;ens|ENSG00000237624;vega|OTTHUMG00000008394;vega|OTTHUMG00000009250 chr1 43918455 43918718 + gn|HYI;ccds|CCDS488;ccds|CCDS53309;ens|ENSG00000178922;vega|OTTHUMG00000007502 -chr1 47150040 47150299 + gn|KIAA0494;gn|RP11-8J9.4;ccds|CCDS30706;ens|ENSG00000159658;ens|ENSG00000228237;vega|OTTHUMG00000007992;vega|OTTHUMG00000007993 -chr1 54562720 54562844 + gn|TCEANC2;ccds|CCDS587;ens|ENSG00000116205;vega|OTTHUMG00000008434 -chr1 63997445 63997725 + gn|EFCAB7;gn|ITGB3BP;ccds|CCDS30736;ccds|CCDS30737;ens|ENSG00000142856;ens|ENSG00000203965;vega|OTTHUMG00000008983;vega|OTTHUMG00000013364 +chr1 47150040 47150299 + gn|KIAA0494,gn|RP11-8J9.4,ccds|CCDS30706,ens|ENSG00000159658,ens|ENSG00000228237,vega|OTTHUMG00000007992,vega|OTTHUMG00000007993 +chr1 54562720 54562844 + gn|TCEANC2,ccds|CCDS587,ens|ENSG00000116205,vega|OTTHUMG00000008434 +chr1 63997445 63997725 + gn|EFCAB7,gn|ITGB3BP,ccds|CCDS30736,ccds|CCDS30737,ens|ENSG00000142856,ens|ENSG00000203965,vega|OTTHUMG00000008983,vega|OTTHUMG00000013364 chr1 76349439 76349541 + gn|MSH4;ccds|CCDS670;ens|ENSG00000057468;vega|OTTHUMG00000009788 chr1 89271245 89271508 + gn|PKN2;ccds|CCDS714;ens|ENSG00000065243;vega|OTTHUMG00000010074 chr1 95616780 95617017 + gn|TMEM56;ccds|CCDS753;ens|ENSG00000152078;vega|OTTHUMG00000010847 From a5e33ce238418c50380b61b5fbac1f36967d2bd8 Mon Sep 17 00:00:00 2001 From: Ryan Williams Date: Sun, 31 May 2015 04:34:33 +0000 Subject: [PATCH 4/6] fix error msg in interval-list parser --- .../scala/org/bdgenomics/adam/rdd/features/FeatureParser.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/adam-core/src/main/scala/org/bdgenomics/adam/rdd/features/FeatureParser.scala b/adam-core/src/main/scala/org/bdgenomics/adam/rdd/features/FeatureParser.scala index 9adcf002a0..1986c2207e 100644 --- a/adam-core/src/main/scala/org/bdgenomics/adam/rdd/features/FeatureParser.scala +++ b/adam-core/src/main/scala/org/bdgenomics/adam/rdd/features/FeatureParser.scala @@ -156,7 +156,7 @@ class IntervalListParser extends Serializable { case "gn" | "ens" | "vega" | "ccds" => (Some(Dbxref.newBuilder().setDb(key).setAccession(value).build()), None) case _ => (None, Some(key -> value)) } - case x => throw new Exception(s"Expected fields of the form 'key:value' but got: $field. Line:\n$line") + case x => throw new Exception(s"Expected fields of the form 'key|value;' but got: $field. Line:\n$line") }) (a.flatMap(_._1).toList, a.flatMap(_._2).toMap) From bed9dd3571a905d915b5113b53ed04a59e0b5d2b Mon Sep 17 00:00:00 2001 From: Ryan Williams Date: Sun, 31 May 2015 04:34:59 +0000 Subject: [PATCH 5/6] add tests of interval-list features' dbxrefs --- .../adam/rdd/ADAMContextSuite.scala | 25 +++++++++++++++++++ 1 file changed, 25 insertions(+) diff --git a/adam-core/src/test/scala/org/bdgenomics/adam/rdd/ADAMContextSuite.scala b/adam-core/src/test/scala/org/bdgenomics/adam/rdd/ADAMContextSuite.scala index 8a8514ff36..8639731174 100644 --- a/adam-core/src/test/scala/org/bdgenomics/adam/rdd/ADAMContextSuite.scala +++ b/adam-core/src/test/scala/org/bdgenomics/adam/rdd/ADAMContextSuite.scala @@ -211,11 +211,36 @@ class ADAMContextSuite extends ADAMFunSuite { assert(first.getContig.getContigLength == 249250621L) assert(first.getContig.getReferenceURL == "file:/gs01/projects/ngs/resources/gatk/2.3/ucsc.hg19.parmasked.fasta") assert(first.getContig.getContigMD5 == "1b22b98cdeb4a9304cb5d48026a85128") + assert( + first + .getDbxrefs + .map(dbxref => dbxref.getDb -> dbxref.getAccession) + .groupBy(_._1) + .mapValues(_.map(_._2).toSet) == + Map( + "gn" -> Set("DDX11L1", "RP11-34P13.2"), + "ens" -> Set("ENSG00000223972", "ENSG00000227232"), + "vega" -> Set("OTTHUMG00000000958", "OTTHUMG00000000961") + ) + ) val last = arr.find(f => f.getContig.getContigName == "chrY" && f.getStart == 27190032L && f.getEnd == 27190210L).get assert(last.getContig.getContigLength == 59373566L) assert(last.getContig.getReferenceURL == "file:/gs01/projects/ngs/resources/gatk/2.3/ucsc.hg19.parmasked.fasta") assert(last.getContig.getContigMD5 == "3393b0779f142dc59f4cfcc22b61c1ee") + assert( + last + .getDbxrefs + .map(dbxref => dbxref.getDb -> dbxref.getAccession) + .groupBy(_._1) + .mapValues(_.map(_._2).toSet) == + Map( + "gn" -> Set("BPY2C"), + "ccds" -> Set("CCDS44030"), + "ens" -> Set("ENSG00000185894"), + "vega" -> Set("OTTHUMG00000045199") + ) + ) } sparkTest("can read a small .vcf file") { From 7bec862bccc827c5bdbfe4e5d461461d66db8936 Mon Sep 17 00:00:00 2001 From: Ryan Williams Date: Sun, 31 May 2015 04:39:09 +0000 Subject: [PATCH 6/6] allow "-" to denote empty attrs in interval-list --- .../scala/org/bdgenomics/adam/rdd/features/FeatureParser.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/adam-core/src/main/scala/org/bdgenomics/adam/rdd/features/FeatureParser.scala b/adam-core/src/main/scala/org/bdgenomics/adam/rdd/features/FeatureParser.scala index 1986c2207e..aba17dfdf3 100644 --- a/adam-core/src/main/scala/org/bdgenomics/adam/rdd/features/FeatureParser.scala +++ b/adam-core/src/main/scala/org/bdgenomics/adam/rdd/features/FeatureParser.scala @@ -147,7 +147,7 @@ class IntervalListParser extends Serializable { } val (dbxrfs, attrs: Map[String, String]) = - (if (fields.length < 5 || fields(4) == ".") { + (if (fields.length < 5 || fields(4) == "." || fields(4) == "-") { (Nil, Map()) } else { val a = fields(4).split(Array(';', ',')).map(field => field.split('|') match {