-
Notifications
You must be signed in to change notification settings - Fork 244
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Tp info #97
Tp info #97
Changes from all commits
b72af87
ccee7d8
99db742
da29bfa
59d8c08
ec09ddf
d58ba41
a0c46fe
1b06044
f3ff63d
7d7026a
d4a3acd
866b435
6446687
1405f80
205a4a7
5d2ec4e
1c601fb
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,14 @@ | ||
package org.broadinstitute.hail.annotations | ||
|
||
abstract class AnnotationSignature { | ||
def emitUtilities: String | ||
def emitConversionIdentifier: String | ||
def emitType: String | ||
|
||
} | ||
|
||
case class SimpleSignature(emitType: String, emitConversionIdentifier: String) extends AnnotationSignature { | ||
|
||
def emitUtilities = "" | ||
|
||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,105 @@ | ||
package org.broadinstitute.hail.annotations | ||
|
||
case class Annotations[T](maps: Map[String, Map[String, T]], vals: Map[String, T]) extends Serializable { | ||
|
||
def hasMap(str: String): Boolean = maps.contains(str) | ||
|
||
def containsVal(str: String): Boolean = vals.contains(str) | ||
|
||
def containsInMap(parent: String, str: String): Boolean = hasMap(parent) && maps(parent).contains(str) | ||
|
||
def getVal(str: String): Option[T] = vals.get(str) | ||
|
||
def getInMap(parent: String, str: String): Option[T] = | ||
maps.get(parent).flatMap(_.get(str)) | ||
|
||
def getMap(parent: String): Option[Map[String, T]] = maps.get(parent) | ||
|
||
def addMap(name: String, m: Map[String, T]): Annotations[T] = | ||
Annotations(maps + ((name, m)), vals) | ||
|
||
def addMaps(newMaps: Map[String, Map[String, T]]): Annotations[T] = | ||
Annotations(maps ++ newMaps, vals) | ||
|
||
def addVal(name: String, mapping: T): Annotations[T] = Annotations(maps, vals + ((name, mapping))) | ||
|
||
def addVals(newVals: Map[String, T]): Annotations[T] = Annotations(maps, vals ++ newVals) | ||
|
||
def ++(other: Annotations[T]): Annotations[T] = { | ||
new Annotations(maps ++ other.maps, vals ++ other.vals) | ||
} | ||
} | ||
|
||
object Annotations { | ||
|
||
def empty[T](): Annotations[T] = | ||
Annotations(Map.empty[String, Map[String, T]], Map.empty[String, T]) | ||
|
||
def emptyOfSignature(): AnnotationSignatures = empty[AnnotationSignature]() | ||
|
||
def emptyOfData(): AnnotationData = empty[String]() | ||
|
||
def emptyOfArrayString(nSamples: Int): IndexedSeq[AnnotationData] = | ||
IndexedSeq.fill[Annotations[String]](nSamples)(empty[String]()) | ||
} | ||
|
||
object AnnotationClassBuilder { | ||
|
||
def signatures(sigs: AnnotationSignatures, className: String, | ||
makeToString: Boolean = false): String = { | ||
val internalClasses = sigs.maps.map { | ||
case (subclass, subMap) => | ||
val attrs = subMap | ||
.map { case (k, sig) => | ||
s""" val $k: Option[${sig.emitType}] = subMap.get("$k").map(_.${sig.emitConversionIdentifier})""" | ||
} | ||
.mkString("\n") | ||
val methods: String = { | ||
if (makeToString) { | ||
s""" def __fields: Array[String] = Array( | ||
| ${subMap.keys.toArray.sorted.map(s => s"""toTSVString($s)""").mkString(",")} | ||
| ) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Do triple-equals properly nest? Amazing. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. You can use them in ${}. Otherwise, they don't nest the way one would like |
||
| override def toString: String = __fields.mkString(";") | ||
| def all: String = __fields.mkString("\t")""".stripMargin | ||
} else "" | ||
} | ||
s"""class __$subclass(subMap: Map[String, String]) extends Serializable { | ||
|$attrs | ||
|$methods | ||
|}""".stripMargin | ||
} | ||
.mkString("\n") | ||
|
||
val hiddenClass = { | ||
val classes = | ||
sigs.maps.map { case (subclass, subMap) => | ||
s""" val $subclass = new __$subclass(annot.maps("$subclass"))""" | ||
} | ||
.mkString("\n") | ||
val vals = sigs.vals.map { case (k, sig) => | ||
s""" val $k: Option[${sig.emitType}] = annot.getVal("$k").map(_.${sig.emitConversionIdentifier})""" | ||
} | ||
.mkString("\n") | ||
s"""class $className(annot: org.broadinstitute.hail.annotations.AnnotationData) | ||
| extends Serializable { | ||
| ${if (internalClasses.nonEmpty) internalClasses else "// no internal class declarations"} | ||
| ${if (classes.nonEmpty) classes else "// no class instantiations"} | ||
| ${if (vals.nonEmpty) vals else "// no vals"} | ||
|} | ||
|""".stripMargin | ||
} | ||
|
||
s""" | ||
|$hiddenClass | ||
""".stripMargin | ||
} | ||
|
||
def instantiate(exposedName: String, className: String, rawName: String): String = { | ||
s"val $exposedName = new $className($rawName)\n" | ||
} | ||
|
||
def instantiateIndexedSeq(exposedName: String, classIdentifier: String, rawArrayName: String): String = | ||
s"""val $exposedName: IndexedSeq[$classIdentifier] = | ||
| $rawArrayName.map(new $classIdentifier(_)) | ||
""".stripMargin | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,55 @@ | ||
package org.broadinstitute.hail.annotations | ||
|
||
import htsjdk.variant.vcf.{VCFInfoHeaderLine, VCFHeaderLineCount, VCFHeaderLineType} | ||
|
||
case class VCFSignature(vcfType: String, emitType: String, number: String, | ||
emitConversionIdentifier: String, description: String) | ||
extends AnnotationSignature { | ||
|
||
def emitUtilities: String = "" | ||
} | ||
|
||
object VCFSignature { | ||
|
||
val arrayRegex = """Array\[(\w+)\]""".r | ||
val setRegex = """Set\[(\w+)\]""".r | ||
val integerRegex = """(\d+)""".r | ||
|
||
def parseConversionIdentifier(str: String): String = { | ||
str match { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I don't like the |
||
case arrayRegex(subType) => s"toArray$subType" | ||
case setRegex(subType) => s"toSet$subType" | ||
case _ => s"to$str" | ||
} | ||
} | ||
|
||
def parse(line: VCFInfoHeaderLine): AnnotationSignature = { | ||
val vcfType = line.getType.toString | ||
val parsedType = line.getType match { | ||
case VCFHeaderLineType.Integer => "Int" | ||
case VCFHeaderLineType.Float => "Double" | ||
case VCFHeaderLineType.String => "String" | ||
case VCFHeaderLineType.Character => "Character" | ||
case VCFHeaderLineType.Flag => "Boolean" | ||
} | ||
val parsedCount = line.getCountType match { | ||
case VCFHeaderLineCount.A => "A" | ||
case VCFHeaderLineCount.G => "G" | ||
case VCFHeaderLineCount.R => "R" | ||
case VCFHeaderLineCount.INTEGER => line.getCount.toString | ||
case VCFHeaderLineCount.UNBOUNDED => "." | ||
} | ||
val scalaType = parsedCount match { | ||
case "A" | "R" | "G" => s"Array[$parsedType]" | ||
case integerRegex(i) => if (i.toInt > 1) s"Array[$parsedType]" else parsedType | ||
case _ => parsedType | ||
} | ||
val conversionMethod = parseConversionIdentifier(scalaType) | ||
val desc = line.getDescription | ||
|
||
|
||
new VCFSignature(vcfType, scalaType, parsedCount, conversionMethod, desc) | ||
|
||
|
||
} | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,6 @@ | ||
package org.broadinstitute.hail | ||
|
||
package object annotations { | ||
type AnnotationSignatures = Annotations[AnnotationSignature] | ||
type AnnotationData = Annotations[String] | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,105 @@ | ||
package org.broadinstitute.hail.driver | ||
|
||
import org.broadinstitute.hail.Utils._ | ||
import org.broadinstitute.hail.methods._ | ||
import org.broadinstitute.hail.variant._ | ||
import org.broadinstitute.hail.annotations._ | ||
import org.kohsuke.args4j.{Option => Args4jOption} | ||
|
||
object ExportGenotypes extends Command { | ||
|
||
class Options extends BaseOptions { | ||
|
||
@Args4jOption(required = true, name = "-o", aliases = Array("--output"), | ||
usage = "path of output tsv") | ||
var output: String = _ | ||
|
||
@Args4jOption(required = true, name = "-c", aliases = Array("--condition"), | ||
usage = "Comma-separated list of fields to be printed to tsv") | ||
var condition: String = _ | ||
} | ||
|
||
def newOptions = new Options | ||
|
||
def name = "exportgenotypes" | ||
|
||
def description = "Export list of sample-variant information to tsv" | ||
|
||
def run(state: State, options: Options): State = { | ||
val vds = state.vds | ||
val cond = options.condition | ||
val output = options.output | ||
|
||
val vas: AnnotationSignatures = vds.metadata.variantAnnotationSignatures | ||
val sas: AnnotationSignatures = vds.metadata.sampleAnnotationSignatures | ||
val sa = vds.metadata.sampleAnnotations | ||
val ids = vds.sampleIds | ||
|
||
val makeString: ((Variant, AnnotationData) => | ||
((Int, Genotype) => String)) = { | ||
val cf = new ExportGenotypeEvaluator(options.condition, vds.metadata) | ||
cf.typeCheck() | ||
cf.apply | ||
} | ||
|
||
val stringVDS = vds.mapValuesWithPartialApplication( | ||
(v: Variant, va: AnnotationData) => | ||
(s: Int, g: Genotype) => | ||
makeString(v, va)(s, g)) | ||
|
||
// FIXME add additional command parsing functionality. Somewhat hacky | ||
val variantRegex = | ||
"""v\.(\w+)""".r | ||
val sampleRegex = """s\.(\w+)""".r | ||
val topLevelSampleAnnoRegex = """sa\.(\w+)""".r | ||
val topLevelVariantAnnoRegex = """va\.(\w+)""".r | ||
val samplePrintMapRegex = """sa\.(\w+)\.all""".r | ||
val variantPrintMapRegex = """va\.(\w+)\.all""".r | ||
val annoRegex = """\wa\.(.+)""".r | ||
def mapColumnNames(input: String): String = { | ||
input match { | ||
case "v" => "Variant" | ||
case "s" => "Sample" | ||
case "va" => | ||
fatal("parse error in condition: cannot print 'va', choose a group or value in annotations") | ||
case "sa" => | ||
fatal("parse error in condition: cannot print 'sa', choose a group or value in annotations") | ||
case variantRegex(x) => x | ||
case sampleRegex(x) => x | ||
case topLevelSampleAnnoRegex(x) => | ||
if (sas.maps.contains(x)) { | ||
val keys = sas.maps(x).keys.toArray.sorted | ||
if (keys.isEmpty) x else s"$x:" + keys.mkString(";") | ||
} | ||
else x | ||
case topLevelVariantAnnoRegex(x) => | ||
if (vas.maps.contains(x)) { | ||
val keys = vas.maps(x).keys.toArray.sorted | ||
if (keys.isEmpty) x else s"$x:" + keys.mkString(";") | ||
} | ||
else x | ||
case samplePrintMapRegex(x) => | ||
val keys = sas.maps(x).keys | ||
if (keys.isEmpty) x else keys.mkString("\t") | ||
case variantPrintMapRegex(x) => | ||
val keys = vas.maps(x).keys | ||
if (keys.isEmpty) x else keys.mkString("\t") | ||
case annoRegex(x) => x | ||
case _ => input | ||
} | ||
} | ||
|
||
writeTextFile(output + ".header", state.hadoopConf) { s => | ||
s.write(cond.split(",").map(_.split("\\.").last).mkString("\t")) | ||
s.write("\n") | ||
} | ||
|
||
hadoopDelete(output, state.hadoopConf, recursive = true) | ||
|
||
stringVDS.rdd | ||
.flatMap { case (v, va, strings) => strings } | ||
.saveAsTextFile(output) | ||
|
||
state | ||
} | ||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I think just
empty
is traditional.