From 9c7d00ad79bea81ad6a4a8113bbabf05f53a3361 Mon Sep 17 00:00:00 2001 From: wang Date: Sat, 17 Mar 2018 02:15:32 -0400 Subject: [PATCH 1/6] wip --- src/main/scala/is/hail/expr/Relational.scala | 34 ++++++++++++++++++++ src/main/scala/is/hail/expr/ir/Compile.scala | 24 ++++++++++++++ 2 files changed, 58 insertions(+) diff --git a/src/main/scala/is/hail/expr/Relational.scala b/src/main/scala/is/hail/expr/Relational.scala index 62eb709b40e..d9a91b9571f 100644 --- a/src/main/scala/is/hail/expr/Relational.scala +++ b/src/main/scala/is/hail/expr/Relational.scala @@ -379,6 +379,40 @@ case class FilterRows( } } +case class MapEntries(child: MatrixIR, newEntries: IR) extends MatrixIR { + + def children: IndexedSeq[BaseIR] = Array(child, newEntries) + + def copy(newChildren: IndexedSeq[BaseIR]): MapEntries = { + assert(newChildren.length == 1) + MapEntries(newChildren(0).asInstanceOf[MatrixIR], newChildren(1).asInstanceOf[IR]) + } + + val newRow = { + val ArrayLen(GetField(Ref("va"), MatrixType.entriesIdentifier)) + } + InsertFields(Ref("va"), Seq((MatrixType.entriesIdentifier, ArrayMap(GetField(Ref("va"), MatrixType.entriesIdentifier), "g", newEntries)))) + + val typ: MatrixType = { + Infer(newRow, None, new Env[Type]().bind("va", child.typ.rvRowType)) + child.typ.copy(rvRowType=newRow.typ) + } + + def execute(hc: HailContext): MatrixValue = { + val prev = child.execute(hc) + + val (rTyp, f) = ir.Compile[Long, Long, Long, Long]( + "global", child.typ.globalType, + "va", child.typ.rvRowType, + "sa", child.typ.colType, + newRow) + assert(rTyp == typ.rvRowType) + + prev.rvd.mapPartitionsPreservesPartitioning() + + } +} + case class TableValue(typ: TableType, globals: BroadcastValue, rvd: RVD) { def rdd: RDD[Row] = { val localRowType = typ.rowType diff --git a/src/main/scala/is/hail/expr/ir/Compile.scala b/src/main/scala/is/hail/expr/ir/Compile.scala index 673a7429758..4e84d44f347 100644 --- a/src/main/scala/is/hail/expr/ir/Compile.scala +++ b/src/main/scala/is/hail/expr/ir/Compile.scala @@ -45,6 +45,30 @@ object Compile { apply[AsmFunction5[Region, T0, Boolean, T1, Boolean, R], R](Seq((name0, typ0, classTag[T0]), (name1, typ1, classTag[T1])), body) } + def apply[T0: TypeInfo : ClassTag, T1: TypeInfo : ClassTag, T2: TypeInfo : ClassTag, R: TypeInfo : ClassTag]( + name0: String, + typ0: Type, + name1: String, + typ1: Type, + name2: String, + typ2: Type, + body: IR): (Type, () => AsmFunction7[Region, T0, Boolean, T1, Boolean, T2, Boolean, R]) = { + assert(TypeToIRIntermediateClassTag(typ0) == classTag[T0]) + assert(TypeToIRIntermediateClassTag(typ1) == classTag[T1]) + assert(TypeToIRIntermediateClassTag(typ2) == classTag[T2]) + val fb = FunctionBuilder.functionBuilder[Region, T0, Boolean, T1, Boolean, T2, Boolean, R] + var e = body + val env = new Env[IR]() + .bind(name0, In(0, typ0)) + .bind(name1, In(1, typ1)) + .bind(name1, In(2, typ2)) + e = Subst(e, env) + Infer(e) + assert(TypeToIRIntermediateClassTag(e.typ) == classTag[R]) + Emit(e, fb) + (e.typ, fb.result()) + } + def apply[T0: TypeInfo : ClassTag, T1: TypeInfo : ClassTag, T2: TypeInfo : ClassTag, T3: TypeInfo : ClassTag, T4: TypeInfo : ClassTag, T5: TypeInfo : ClassTag, R: TypeInfo : ClassTag]( From fd3ef69261285cda8dffc75a4ede65fcd2abaef1 Mon Sep 17 00:00:00 2001 From: wang Date: Mon, 19 Mar 2018 13:15:34 -0400 Subject: [PATCH 2/6] add MapEntries, use in annotate_entries --- src/main/scala/is/hail/expr/Relational.scala | 46 ++++++++-- src/main/scala/is/hail/expr/ir/Compile.scala | 2 +- src/main/scala/is/hail/expr/ir/Infer.scala | 2 +- .../scala/is/hail/variant/MatrixTable.scala | 85 +++++++++++-------- 4 files changed, 92 insertions(+), 43 deletions(-) diff --git a/src/main/scala/is/hail/expr/Relational.scala b/src/main/scala/is/hail/expr/Relational.scala index d9a91b9571f..691a2f6a539 100644 --- a/src/main/scala/is/hail/expr/Relational.scala +++ b/src/main/scala/is/hail/expr/Relational.scala @@ -384,17 +384,25 @@ case class MapEntries(child: MatrixIR, newEntries: IR) extends MatrixIR { def children: IndexedSeq[BaseIR] = Array(child, newEntries) def copy(newChildren: IndexedSeq[BaseIR]): MapEntries = { - assert(newChildren.length == 1) + assert(newChildren.length == 2) MapEntries(newChildren(0).asInstanceOf[MatrixIR], newChildren(1).asInstanceOf[IR]) } val newRow = { - val ArrayLen(GetField(Ref("va"), MatrixType.entriesIdentifier)) + val arrayLength = ArrayLen(GetField(Ref("va"), MatrixType.entriesIdentifier)) + val idxEnv = new Env[IR]() + .bind("g", ArrayRef(GetField(Ref("va"), MatrixType.entriesIdentifier), Ref("i"))) + .bind("sa", ArrayRef(Ref("sa"), Ref("i"))) + val entries = ArrayMap(ArrayRange(I32(0), arrayLength, I32(1)), "i", Subst(newEntries, idxEnv)) + InsertFields(Ref("va"), Seq((MatrixType.entriesIdentifier, entries))) } - InsertFields(Ref("va"), Seq((MatrixType.entriesIdentifier, ArrayMap(GetField(Ref("va"), MatrixType.entriesIdentifier), "g", newEntries)))) val typ: MatrixType = { - Infer(newRow, None, new Env[Type]().bind("va", child.typ.rvRowType)) + Infer(newRow, None, new Env[Type]() + .bind("global", child.typ.globalType) + .bind("va", child.typ.rvRowType) + .bind("sa", TArray(child.typ.colType)) + ) child.typ.copy(rvRowType=newRow.typ) } @@ -408,8 +416,36 @@ case class MapEntries(child: MatrixIR, newEntries: IR) extends MatrixIR { newRow) assert(rTyp == typ.rvRowType) - prev.rvd.mapPartitionsPreservesPartitioning() + val localGlobalsType = typ.globalType + val localColsType = TArray(typ.colType) + val colValuesBc = prev.colValuesBc + val globalsBc = prev.globals.broadcast + + val newRVD = prev.rvd.mapPartitionsPreservesPartitioning(typ.orvdType) { it => + val rvb = new RegionValueBuilder() + val newRV = RegionValue() + val rowF = f() + + it.map { rv => + val region = rv.region + val oldRow = rv.offset + + rvb.set(region) + rvb.start(localGlobalsType) + rvb.addAnnotation(localGlobalsType, globalsBc.value) + val globals = rvb.end() + rvb.start(localColsType) + rvb.addAnnotation(localColsType, colValuesBc.value) + val cols = rvb.end() + + val off = rowF(region, globals, false, oldRow, false, cols, false) + + newRV.set(region, off) + newRV + } + } + prev.copy(typ = typ, rvd = newRVD) } } diff --git a/src/main/scala/is/hail/expr/ir/Compile.scala b/src/main/scala/is/hail/expr/ir/Compile.scala index 4e84d44f347..0fd1405166a 100644 --- a/src/main/scala/is/hail/expr/ir/Compile.scala +++ b/src/main/scala/is/hail/expr/ir/Compile.scala @@ -61,7 +61,7 @@ object Compile { val env = new Env[IR]() .bind(name0, In(0, typ0)) .bind(name1, In(1, typ1)) - .bind(name1, In(2, typ2)) + .bind(name2, In(2, typ2)) e = Subst(e, env) Infer(e) assert(TypeToIRIntermediateClassTag(e.typ) == classTag[R]) diff --git a/src/main/scala/is/hail/expr/ir/Infer.scala b/src/main/scala/is/hail/expr/ir/Infer.scala index 667af75c0f8..f0cde8016f9 100644 --- a/src/main/scala/is/hail/expr/ir/Infer.scala +++ b/src/main/scala/is/hail/expr/ir/Infer.scala @@ -151,7 +151,7 @@ object Infer { case x@GetField(o, name, _) => infer(o) val t = coerce[TStruct](o.typ) - assert(t.index(name).nonEmpty) + assert(t.index(name).nonEmpty, s"$name not in $t") x.typ = -t.field(name).typ case GetFieldMissingness(o, name) => infer(o) diff --git a/src/main/scala/is/hail/variant/MatrixTable.scala b/src/main/scala/is/hail/variant/MatrixTable.scala index 0dcd2c33e4b..f930324546e 100644 --- a/src/main/scala/is/hail/variant/MatrixTable.scala +++ b/src/main/scala/is/hail/variant/MatrixTable.scala @@ -4,6 +4,7 @@ import is.hail.annotations._ import is.hail.check.Gen import is.hail.linalg._ import is.hail.expr._ +import is.hail.expr.ir import is.hail.methods._ import is.hail.rvd._ import is.hail.table.{Table, TableSpec} @@ -1578,49 +1579,61 @@ class MatrixTable(val hc: HailContext, val ast: MatrixIR) { val globalsBc = globals.broadcast - val (paths, types, f) = Parser.parseAnnotationExprs(expr, ec, Some(Annotation.ENTRY_HEAD)) + val asts = Parser.parseAnnotationExprsToAST(expr, ec, Some(Annotation.ENTRY_HEAD)) - val inserterBuilder = new ArrayBuilder[Inserter]() - val newEntryType = (paths, types).zipped.foldLeft(entryType) { case (gsig, (ids, signature)) => - val (s, i) = gsig.structInsert(signature, ids) - inserterBuilder += i - s - } - val inserters = inserterBuilder.result() + val irs = asts.flatMap { case (f, a) => a.toIR().map((f, _)) } - val localNSamples = numCols - val fullRowType = rvRowType - val localColValuesBc = colValuesBc - val localEntriesIndex = entriesIndex + if (irs.length == asts.length) { + val newEntries = ir.InsertFields(ir.Ref("g"), irs) - insertEntries(() => { - val fullRow = new UnsafeRow(fullRowType) - val row = fullRow.deleteField(localEntriesIndex) - (fullRow, row) - })(newEntryType, { case ((fullRow, row), rv, rvb) => - fullRow.set(rv) - val entries = fullRow.getAs[IndexedSeq[Annotation]](localEntriesIndex) + new MatrixTable(hc, MapEntries(ast, newEntries)) + } else { + info("No IR conversion found for annotate_entries. Falling back to AST.") - rvb.startArray(localNSamples) + val (paths, types, f) = Parser.parseAnnotationExprs(expr, ec, Some(Annotation.ENTRY_HEAD)) - var i = 0 - while (i < localNSamples) { - val entry = entries(i) - ec.setAll(row, - localColValuesBc.value(i), - entry, - globalsBc.value) + val inserterBuilder = new ArrayBuilder[Inserter]() + val newEntryType = (paths, types).zipped.foldLeft(entryType) { case (gsig, (ids, signature)) => + val (s, i) = gsig.structInsert(signature, ids) + inserterBuilder += i + s + } + val inserters = inserterBuilder.result() - val newEntry = f().zip(inserters) - .foldLeft(entry) { case (ga, (a, inserter)) => - inserter(ga, a) - } - rvb.addAnnotation(newEntryType, newEntry) + val localNSamples = numCols + val fullRowType = rvRowType + val localColValuesBc = colValuesBc + val localEntriesIndex = entriesIndex - i += 1 - } - rvb.endArray() - }) + insertEntries(() => { + val fullRow = new UnsafeRow(fullRowType) + val row = fullRow.deleteField(localEntriesIndex) + (fullRow, row) + })(newEntryType, { case ((fullRow, row), rv, rvb) => + fullRow.set(rv) + val entries = fullRow.getAs[IndexedSeq[Annotation]](localEntriesIndex) + + rvb.startArray(localNSamples) + + var i = 0 + while (i < localNSamples) { + val entry = entries(i) + ec.setAll(row, + localColValuesBc.value(i), + entry, + globalsBc.value) + + val newEntry = f().zip(inserters) + .foldLeft(entry) { case (ga, (a, inserter)) => + inserter(ga, a) + } + rvb.addAnnotation(newEntryType, newEntry) + + i += 1 + } + rvb.endArray() + }) + } } def filterCols(p: (Annotation, Int) => Boolean): MatrixTable = { From 3a6ec2c3e5c47ff53002cb07058a3308ee1ff1e2 Mon Sep 17 00:00:00 2001 From: wang Date: Mon, 19 Mar 2018 16:19:58 -0400 Subject: [PATCH 3/6] fix test --- src/test/scala/is/hail/io/ExportVCFSuite.scala | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/test/scala/is/hail/io/ExportVCFSuite.scala b/src/test/scala/is/hail/io/ExportVCFSuite.scala index b1da4c2235d..0cd0ad9bac3 100644 --- a/src/test/scala/is/hail/io/ExportVCFSuite.scala +++ b/src/test/scala/is/hail/io/ExportVCFSuite.scala @@ -210,13 +210,13 @@ class ExportVCFSuite extends SparkSuite { TestUtils.interceptFatal("Invalid type for format field 'BOOL'. Found 'bool'.") { ExportVCF(vds - .annotateEntriesExpr("g = {BOOL: true}"), + .annotateEntriesExpr("g.BOOL = true"), out) } TestUtils.interceptFatal("Invalid type for format field 'AA'.") { ExportVCF(vds - .annotateEntriesExpr("g = {AA: [[0]]}"), + .annotateEntriesExpr("g.AA = [[0]]"), out) } } From 73bc254bd454af1756783a5b65f01181dcaf50bd Mon Sep 17 00:00:00 2001 From: wang Date: Mon, 19 Mar 2018 17:17:37 -0400 Subject: [PATCH 4/6] fix --- python/hail/tests/utils.py | 2 +- src/main/scala/is/hail/expr/Relational.scala | 14 +++++++------- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/python/hail/tests/utils.py b/python/hail/tests/utils.py index 5e05ba032d3..923e144f268 100644 --- a/python/hail/tests/utils.py +++ b/python/hail/tests/utils.py @@ -4,7 +4,7 @@ def startTestHailContext(): - hail.init(master='local[2]', min_block_size=0, quiet=True) + hail.init(master='local[2]', min_block_size=0) def stopTestHailContext(): diff --git a/src/main/scala/is/hail/expr/Relational.scala b/src/main/scala/is/hail/expr/Relational.scala index 691a2f6a539..526927ebb23 100644 --- a/src/main/scala/is/hail/expr/Relational.scala +++ b/src/main/scala/is/hail/expr/Relational.scala @@ -409,18 +409,18 @@ case class MapEntries(child: MatrixIR, newEntries: IR) extends MatrixIR { def execute(hc: HailContext): MatrixValue = { val prev = child.execute(hc) - val (rTyp, f) = ir.Compile[Long, Long, Long, Long]( - "global", child.typ.globalType, - "va", child.typ.rvRowType, - "sa", child.typ.colType, - newRow) - assert(rTyp == typ.rvRowType) - val localGlobalsType = typ.globalType val localColsType = TArray(typ.colType) val colValuesBc = prev.colValuesBc val globalsBc = prev.globals.broadcast + val (rTyp, f) = ir.Compile[Long, Long, Long, Long]( + "global", localGlobalsType, + "va", prev.typ.rvRowType, + "sa", localColsType, + newRow) + assert(rTyp == typ.rvRowType) + val newRVD = prev.rvd.mapPartitionsPreservesPartitioning(typ.orvdType) { it => val rvb = new RegionValueBuilder() val newRV = RegionValue() From 5faa2ea51a3d02de97cd21e4a25d96f96e7299e4 Mon Sep 17 00:00:00 2001 From: wang Date: Tue, 20 Mar 2018 12:08:15 -0400 Subject: [PATCH 5/6] fix and disable --- python/hail/tests/utils.py | 2 +- src/main/scala/is/hail/expr/Relational.scala | 2 +- src/main/scala/is/hail/variant/MatrixTable.scala | 13 ++++++------- 3 files changed, 8 insertions(+), 9 deletions(-) diff --git a/python/hail/tests/utils.py b/python/hail/tests/utils.py index 923e144f268..5e05ba032d3 100644 --- a/python/hail/tests/utils.py +++ b/python/hail/tests/utils.py @@ -4,7 +4,7 @@ def startTestHailContext(): - hail.init(master='local[2]', min_block_size=0) + hail.init(master='local[2]', min_block_size=0, quiet=True) def stopTestHailContext(): diff --git a/src/main/scala/is/hail/expr/Relational.scala b/src/main/scala/is/hail/expr/Relational.scala index 526927ebb23..e0664da99cc 100644 --- a/src/main/scala/is/hail/expr/Relational.scala +++ b/src/main/scala/is/hail/expr/Relational.scala @@ -403,7 +403,7 @@ case class MapEntries(child: MatrixIR, newEntries: IR) extends MatrixIR { .bind("va", child.typ.rvRowType) .bind("sa", TArray(child.typ.colType)) ) - child.typ.copy(rvRowType=newRow.typ) + child.typ.copy(rvRowType = newRow.typ) } def execute(hc: HailContext): MatrixValue = { diff --git a/src/main/scala/is/hail/variant/MatrixTable.scala b/src/main/scala/is/hail/variant/MatrixTable.scala index f930324546e..4dab7bb858e 100644 --- a/src/main/scala/is/hail/variant/MatrixTable.scala +++ b/src/main/scala/is/hail/variant/MatrixTable.scala @@ -1583,12 +1583,11 @@ class MatrixTable(val hc: HailContext, val ast: MatrixIR) { val irs = asts.flatMap { case (f, a) => a.toIR().map((f, _)) } - if (irs.length == asts.length) { - val newEntries = ir.InsertFields(ir.Ref("g"), irs) - - new MatrixTable(hc, MapEntries(ast, newEntries)) - } else { - info("No IR conversion found for annotate_entries. Falling back to AST.") +// if (irs.length == asts.length) { +// val newEntries = ir.InsertFields(ir.Ref("g"), irs) +// +// new MatrixTable(hc, MapEntries(ast, newEntries)) +// } else { val (paths, types, f) = Parser.parseAnnotationExprs(expr, ec, Some(Annotation.ENTRY_HEAD)) @@ -1633,7 +1632,7 @@ class MatrixTable(val hc: HailContext, val ast: MatrixIR) { } rvb.endArray() }) - } +// } } def filterCols(p: (Annotation, Int) => Boolean): MatrixTable = { From 3b37894c2045c70f8df0d43ffeb52c46ee3e9e24 Mon Sep 17 00:00:00 2001 From: wang Date: Tue, 20 Mar 2018 12:22:02 -0400 Subject: [PATCH 6/6] use IR for smol col annotations --- src/main/scala/is/hail/variant/MatrixTable.scala | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/src/main/scala/is/hail/variant/MatrixTable.scala b/src/main/scala/is/hail/variant/MatrixTable.scala index 4dab7bb858e..d4be5a6f8dd 100644 --- a/src/main/scala/is/hail/variant/MatrixTable.scala +++ b/src/main/scala/is/hail/variant/MatrixTable.scala @@ -1583,11 +1583,12 @@ class MatrixTable(val hc: HailContext, val ast: MatrixIR) { val irs = asts.flatMap { case (f, a) => a.toIR().map((f, _)) } -// if (irs.length == asts.length) { -// val newEntries = ir.InsertFields(ir.Ref("g"), irs) -// -// new MatrixTable(hc, MapEntries(ast, newEntries)) -// } else { + val colValuesIsSmall = colType.size == 1 && colType.types.head.isOfType(TString()) + if (irs.length == asts.length && colValuesIsSmall) { + val newEntries = ir.InsertFields(ir.Ref("g"), irs) + + new MatrixTable(hc, MapEntries(ast, newEntries)) + } else { val (paths, types, f) = Parser.parseAnnotationExprs(expr, ec, Some(Annotation.ENTRY_HEAD)) @@ -1632,7 +1633,7 @@ class MatrixTable(val hc: HailContext, val ast: MatrixIR) { } rvb.endArray() }) -// } + } } def filterCols(p: (Annotation, Int) => Boolean): MatrixTable = {