Skip to content

Commit

Permalink
Merge pull request #82 from broadinstitute/tp_gc
Browse files Browse the repository at this point in the history
Removed TupleVSM and RecordReader
  • Loading branch information
cseed committed Nov 24, 2015
2 parents 67500bc + 205e465 commit 02f4fec
Show file tree
Hide file tree
Showing 8 changed files with 8 additions and 429 deletions.
14 changes: 1 addition & 13 deletions src/main/scala/org/broadinstitute/hail/driver/Import.scala
Original file line number Diff line number Diff line change
Expand Up @@ -18,9 +18,6 @@ object Import extends Command {
@Args4jOption(required = false, name = "-m", aliases = Array("--vsm-type"), usage = "Select VariantSampleMatrix implementation")
var vsmtype: String = "sparky"

@Args4jOption(required = false, name = "-p", aliases = Array("--parser"), usage = "Select parser, one of htsjdk or native")
var parser: String = "htsjdk"

@Args4jOption(required = false, name = "-d", aliases = Array("--no-compress"), usage = "Don't compress in-memory representation")
var noCompress: Boolean = false

Expand All @@ -36,15 +33,6 @@ object Import extends Command {
def run(state: State, options: Options): State = {
val input = options.input

val parser = options.parser
println("parser = " + parser)
val readerBuilder = if (parser == "htsjdk")
vcf.HtsjdkRecordReaderBuilder
else if (parser == "native")
vcf.RecordReaderBuilder
else
fatal("unknown parser `" + parser + "'")

val newVDS =
if (input.endsWith(".vcf")
|| input.endsWith(".vcf.bgz")
Expand All @@ -54,7 +42,7 @@ object Import extends Command {
fatal(".gz cannot be loaded in parallel, use .bgz or -f override")
}

LoadVCF(state.sc, input, readerBuilder, options.vsmtype, !options.noCompress,
LoadVCF(state.sc, input, options.vsmtype, !options.noCompress,
if (options.nPartitions != 0)
Some(options.nPartitions)
else
Expand Down
3 changes: 1 addition & 2 deletions src/main/scala/org/broadinstitute/hail/methods/LoadVCF.scala
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,6 @@ object LoadVCF {
// FIXME move to VariantDataset
def apply(sc: SparkContext,
file: String,
readerBuilder: vcf.AbstractRecordReaderBuilder = vcf.HtsjdkRecordReaderBuilder,
vsmtype: String = "sparky",
compress: Boolean = true,
nPartitions: Option[Int] = None): VariantDataset = {
Expand All @@ -37,7 +36,7 @@ object LoadVCF {
val headerLinesBc = sc.broadcast(headerLines)
val genotypes = sc.textFile(file, nPartitions.getOrElse(sc.defaultMinPartitions))
.mapPartitions { lines =>
val reader = readerBuilder.result(headerLinesBc.value)
val reader = vcf.HtsjdkRecordReader(headerLinesBc.value)
lines.filter(line => !line.isEmpty && line(0) != '#')
.flatMap(reader.readRecord)
.map { case (v, gs) =>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ import org.apache.spark.SparkContext
import org.apache.spark.rdd.RDD
import org.apache.spark.sql.SQLContext
import org.broadinstitute.hail.Utils._
import org.broadinstitute.hail.variant.vsm.{SparkyVSM, TupleVSM}
import org.broadinstitute.hail.variant.vsm.SparkyVSM

import scala.reflect.ClassTag
import scala.reflect.runtime.universe._
Expand All @@ -15,7 +15,6 @@ object VariantSampleMatrix {
rdd: RDD[(Variant, GenotypeStream)]): VariantSampleMatrix[Genotype] = {
vsmtype match {
case "sparky" => new SparkyVSM(metadata, rdd)
case "tuple" => TupleVSM(metadata, rdd)
}
}

Expand All @@ -25,7 +24,6 @@ object VariantSampleMatrix {

vsmType match {
case "sparky" => SparkyVSM.read(sqlContext, dirname, metadata)
case "tuple" => TupleVSM.read(sqlContext, dirname, metadata)
}
}
}
Expand Down
142 changes: 0 additions & 142 deletions src/main/scala/org/broadinstitute/hail/variant/vsm/TupleVSM.scala

This file was deleted.

This file was deleted.

Original file line number Diff line number Diff line change
Expand Up @@ -16,9 +16,8 @@ class BufferedLineIterator(bit: BufferedIterator[String]) extends htsjdk.tribble
override def remove() { throw new UnsupportedOperationException }
}

class HtsjdkRecordReader(codec: htsjdk.variant.vcf.VCFCodec)
extends AbstractRecordReader {
override def readRecord(line: String): Iterator[(Variant, Iterator[Genotype])] = {
class HtsjdkRecordReader(codec: htsjdk.variant.vcf.VCFCodec) extends Serializable {
def readRecord(line: String): Iterator[(Variant, Iterator[Genotype])] = {

val vc = codec.decode(line)
if (vc.isBiallelic) {
Expand Down Expand Up @@ -111,9 +110,8 @@ class HtsjdkRecordReader(codec: htsjdk.variant.vcf.VCFCodec)
}
}


object HtsjdkRecordReaderBuilder extends AbstractRecordReaderBuilder {
def result(headerLines: Array[String]): HtsjdkRecordReader = {
object HtsjdkRecordReader {
def apply(headerLines: Array[String]): HtsjdkRecordReader = {
val codec = new htsjdk.variant.vcf.VCFCodec()
codec.readHeader(new BufferedLineIterator(headerLines.iterator.buffered))
new HtsjdkRecordReader(codec)
Expand Down
Loading

0 comments on commit 02f4fec

Please sign in to comment.