Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

WX-1462 POSIX-flavored sub() #7374

Merged
merged 9 commits into from
Feb 27, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,8 @@ metadata {
"outputs.biscayne_new_engine_functions.minMaxIntFloatComposition": 1.0
"outputs.biscayne_new_engine_functions.maxIntVsMaxFloat": 1.79769313E+308

"outputs.biscayne_new_engine_functions.substituted": "WATtheWAT"

"outputs.biscayne_new_engine_functions.with_suffixes.0": "aaaS"
"outputs.biscayne_new_engine_functions.with_suffixes.1": "bbbS"
"outputs.biscayne_new_engine_functions.with_suffixes.2": "cccS"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ workflow biscayne_new_engine_functions {

meta {
description: "This test makes sure that these functions work in a real workflow"
functions_under_test: [ "keys", "as_map", "as_pairs", "collect_by_key", "suffix" ]
functions_under_test: [ "keys", "as_map", "as_pairs", "collect_by_key", "sub", "suffix" ]
}

Map[String, Int] x_map_in = {"a": 1, "b": 2, "c": 3}
Expand Down Expand Up @@ -51,6 +51,11 @@ workflow biscayne_new_engine_functions {
Float minMaxIntFloatComposition = min(max(biggestInt, smallFloat), smallestInt) # 1.0
Float maxIntVsMaxFloat = max(maxInt, maxFloat)

# sub():
# (Exists before Biscayne, but uses different regex flavor here)
# =================================================
String substituted = sub("AtheZ", "[[:upper:]]", "WAT")

# suffix():
# =================================================
Array[String] with_suffixes = suffix("S", some_strings)
Expand Down
4 changes: 3 additions & 1 deletion project/Dependencies.scala
Original file line number Diff line number Diff line change
Expand Up @@ -103,6 +103,7 @@ object Dependencies {
private val postgresV = "42.4.4"
private val pprintV = "0.7.3"
private val rdf4jV = "3.7.1"
private val re2jV = "1.6"
private val refinedV = "0.10.1"
private val rhinoV = "1.7.14"

Expand Down Expand Up @@ -517,7 +518,8 @@ object Dependencies {
val wdlDependencies: List[ModuleID] = List(
"commons-io" % "commons-io" % commonsIoV,
"org.scala-graph" %% "graph-core" % scalaGraphV,
"com.chuusai" %% "shapeless" % shapelessV
"com.chuusai" %% "shapeless" % shapelessV,
"com.google.re2j" % "re2j" % re2jV,
) ++ betterFilesDependencies

val languageFactoryDependencies = List(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -180,13 +180,23 @@ object ExpressionElement {
def arg2: ExpressionElement
def arg3: ExpressionElement
}

// Pre-1.1 WDL versions have undefined regex flavor. Cromwell uses the standard Java flavor.
final case class Sub(input: ExpressionElement, pattern: ExpressionElement, replace: ExpressionElement)
extends ThreeParamFunctionCallElement {
override def arg1: ExpressionElement = input
override def arg2: ExpressionElement = pattern
override def arg3: ExpressionElement = replace
}

// As of WDL 1.1, WDL regular expressions are expected to be POSIX ERE flavor.
final case class SubPosix(input: ExpressionElement, pattern: ExpressionElement, replace: ExpressionElement)
extends ThreeParamFunctionCallElement {
override def arg1: ExpressionElement = input
override def arg2: ExpressionElement = pattern
override def arg3: ExpressionElement = replace
}

/**
* A single identifier lookup expression, eg Int x = y
*/
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ package wdl.transforms.biscayne.ast2wdlom
import cats.syntax.validated._
import common.validation.ErrorOr.ErrorOr
import wdl.model.draft3.elements.ExpressionElement
import wdl.model.draft3.elements.ExpressionElement.{AsMap, AsPairs, CollectByKey, Keys, Max, Min, Sep, Suffix}
import wdl.model.draft3.elements.ExpressionElement.{AsMap, AsPairs, CollectByKey, Keys, Max, Min, Sep, SubPosix, Suffix}
import wdl.transforms.base.ast2wdlom.AstNodeToExpressionElement

object AstToNewExpressionElements {
Expand All @@ -15,6 +15,7 @@ object AstToNewExpressionElements {
"min" -> AstNodeToExpressionElement.validateTwoParamEngineFunction(Min, "min"),
"max" -> AstNodeToExpressionElement.validateTwoParamEngineFunction(Max, "max"),
"sep" -> AstNodeToExpressionElement.validateTwoParamEngineFunction(Sep, "sep"),
"sub" -> AstNodeToExpressionElement.validateThreeParamEngineFunction(SubPosix, "sub"),
"suffix" -> AstNodeToExpressionElement.validateTwoParamEngineFunction(Suffix, "suffix"),
"read_object" -> (_ =>
"read_object is no longer available in this WDL version. Consider using read_json instead".invalidNel
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,16 @@ object BiscayneExpressionValueConsumers {
expressionValueConsumer.expressionConsumedValueHooks(a.arg2)(expressionValueConsumer)
}

implicit val subPosixExpressionValueConsumer: ExpressionValueConsumer[SubPosix] =
new ExpressionValueConsumer[SubPosix] {
override def expressionConsumedValueHooks(a: SubPosix)(implicit
expressionValueConsumer: ExpressionValueConsumer[ExpressionElement]
): Set[UnlinkedConsumedValueHook] =
expressionValueConsumer.expressionConsumedValueHooks(a.arg1)(expressionValueConsumer) ++
expressionValueConsumer.expressionConsumedValueHooks(a.arg2)(expressionValueConsumer) ++
expressionValueConsumer.expressionConsumedValueHooks(a.arg3)(expressionValueConsumer)
}

implicit val suffixExpressionValueConsumer: ExpressionValueConsumer[Suffix] = new ExpressionValueConsumer[Suffix] {
override def expressionConsumedValueHooks(a: Suffix)(implicit
expressionValueConsumer: ExpressionValueConsumer[ExpressionElement]
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -96,7 +96,7 @@ package object consumed {
case a: Zip => a.expressionConsumedValueHooks(expressionValueConsumer)
case a: Cross => a.expressionConsumedValueHooks(expressionValueConsumer)

case a: Sub => a.expressionConsumedValueHooks(expressionValueConsumer)
case a: SubPosix => a.expressionConsumedValueHooks(expressionValueConsumer)

// New WDL biscayne expressions:
case a: Keys => a.expressionConsumedValueHooks(expressionValueConsumer)
Expand Down
Original file line number Diff line number Diff line change
@@ -1,9 +1,12 @@
package wdl.transforms.biscayne.linking.expression.files

import wdl.model.draft3.elements.ExpressionElement.{AsMap, AsPairs, CollectByKey, Keys, Max, Min, Sep, Suffix}
import wdl.model.draft3.elements.ExpressionElement.{AsMap, AsPairs, CollectByKey, Keys, Max, Min, Sep, SubPosix, Suffix}
import wdl.model.draft3.graph.expression.FileEvaluator
import wdl.transforms.base.linking.expression.files.EngineFunctionEvaluators
import wdl.transforms.base.linking.expression.files.EngineFunctionEvaluators.twoParameterFunctionPassthroughFileEvaluator
import wdl.transforms.base.linking.expression.files.EngineFunctionEvaluators.{
threeParameterFunctionPassthroughFileEvaluator,
twoParameterFunctionPassthroughFileEvaluator
}

object BiscayneFileEvaluators {

Expand All @@ -16,6 +19,8 @@
EngineFunctionEvaluators.singleParameterPassthroughFileEvaluator

implicit val sepFunctionEvaluator: FileEvaluator[Sep] = twoParameterFunctionPassthroughFileEvaluator[Sep]
implicit val subPosixFunctionEvaluator: FileEvaluator[SubPosix] =
threeParameterFunctionPassthroughFileEvaluator[SubPosix]

Check warning on line 23 in wdl/transforms/biscayne/src/main/scala/wdl/transforms/biscayne/linking/expression/files/BiscayneFileEvaluators.scala

View check run for this annotation

Codecov / codecov/patch

wdl/transforms/biscayne/src/main/scala/wdl/transforms/biscayne/linking/expression/files/BiscayneFileEvaluators.scala#L23

Added line #L23 was not covered by tests
implicit val suffixFunctionEvaluator: FileEvaluator[Suffix] = twoParameterFunctionPassthroughFileEvaluator[Suffix]

implicit val minFunctionEvaluator: FileEvaluator[Min] = twoParameterFunctionPassthroughFileEvaluator[Min]
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -154,7 +154,8 @@
case a: Zip => a.predictFilesNeededToEvaluate(inputs, ioFunctionSet, coerceTo)(fileEvaluator, valueEvaluator)
case a: Cross => a.predictFilesNeededToEvaluate(inputs, ioFunctionSet, coerceTo)(fileEvaluator, valueEvaluator)

case a: Sub => a.predictFilesNeededToEvaluate(inputs, ioFunctionSet, coerceTo)(fileEvaluator, valueEvaluator)
case a: SubPosix =>
a.predictFilesNeededToEvaluate(inputs, ioFunctionSet, coerceTo)(fileEvaluator, valueEvaluator)

Check warning on line 158 in wdl/transforms/biscayne/src/main/scala/wdl/transforms/biscayne/linking/expression/files/files.scala

View check run for this annotation

Codecov / codecov/patch

wdl/transforms/biscayne/src/main/scala/wdl/transforms/biscayne/linking/expression/files/files.scala#L158

Added line #L158 was not covered by tests

case a: Keys => a.predictFilesNeededToEvaluate(inputs, ioFunctionSet, coerceTo)(fileEvaluator, valueEvaluator)
case a: AsMap => a.predictFilesNeededToEvaluate(inputs, ioFunctionSet, coerceTo)(fileEvaluator, valueEvaluator)
Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
package wdl.transforms.biscayne.linking.expression.types

import cats.implicits.catsSyntaxTuple2Semigroupal
import cats.implicits.{catsSyntaxTuple2Semigroupal, catsSyntaxTuple3Semigroupal}
import cats.syntax.validated._
import common.validation.ErrorOr._
import wdl.model.draft3.elements.ExpressionElement
Expand Down Expand Up @@ -102,6 +102,16 @@ object BiscayneTypeEvaluators {
}
}

implicit val subPosixFunctionEvaluator: TypeEvaluator[SubPosix] = new TypeEvaluator[SubPosix] {
override def evaluateType(a: SubPosix, linkedValues: Map[UnlinkedConsumedValueHook, GeneratedValueHandle])(implicit
expressionTypeEvaluator: TypeEvaluator[ExpressionElement]
): ErrorOr[WomType] =
(validateParamType(a.input, linkedValues, WomSingleFileType),
validateParamType(a.pattern, linkedValues, WomSingleFileType),
validateParamType(a.replace, linkedValues, WomSingleFileType)
) mapN { (_, _, _) => WomStringType }
}

implicit val suffixFunctionEvaluator: TypeEvaluator[Suffix] = new TypeEvaluator[Suffix] {
override def evaluateType(a: Suffix, linkedValues: Map[UnlinkedConsumedValueHook, GeneratedValueHandle])(implicit
expressionTypeEvaluator: TypeEvaluator[ExpressionElement]
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -98,7 +98,7 @@ package object types {
case a: Zip => a.evaluateType(linkedValues)(typeEvaluator)
case a: Cross => a.evaluateType(linkedValues)(typeEvaluator)

case a: Sub => a.evaluateType(linkedValues)(typeEvaluator)
case a: SubPosix => a.evaluateType(linkedValues)(typeEvaluator)

case a: StdoutElement.type => a.evaluateType(linkedValues)(typeEvaluator)
case a: StderrElement.type => a.evaluateType(linkedValues)(typeEvaluator)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,12 +4,15 @@ import cats.data.NonEmptyList
import cats.syntax.validated._
import cats.syntax.traverse._
import cats.instances.list._
import com.google.re2j.{Pattern => RE2JPattern}
import common.validation.ErrorOr._
import common.collections.EnhancedCollections._
import common.validation.ErrorOr
import wdl.model.draft3.elements.ExpressionElement
import wdl.model.draft3.elements.ExpressionElement._
import wdl.model.draft3.graph.expression.{EvaluatedValue, ForCommandInstantiationOptions, ValueEvaluator}
import wdl.transforms.base.linking.expression.values.EngineFunctionEvaluators.{
processThreeValidatedValues,
processTwoValidatedValues,
processValidatedSingleValue
}
Expand Down Expand Up @@ -218,6 +221,36 @@ object BiscayneValueEvaluators {
}
}

implicit val subPosixFunctionEvaluator: ValueEvaluator[SubPosix] = new ValueEvaluator[SubPosix] {
override def evaluateValue(a: SubPosix,
inputs: Map[String, WomValue],
ioFunctionSet: IoFunctionSet,
forCommandInstantiationOptions: Option[ForCommandInstantiationOptions]
)(implicit expressionValueEvaluator: ValueEvaluator[ExpressionElement]): ErrorOr[EvaluatedValue[WomString]] =
processThreeValidatedValues[WomString, WomString, WomString, WomString](
expressionValueEvaluator.evaluateValue(a.input, inputs, ioFunctionSet, forCommandInstantiationOptions)(
expressionValueEvaluator
),
expressionValueEvaluator.evaluateValue(a.pattern, inputs, ioFunctionSet, forCommandInstantiationOptions)(
expressionValueEvaluator
),
expressionValueEvaluator.evaluateValue(a.replace, inputs, ioFunctionSet, forCommandInstantiationOptions)(
expressionValueEvaluator
)
) { (input, pattern, replace) =>
ErrorOr(
EvaluatedValue(WomString(
RE2JPattern
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

handy! thanks google

.compile(pattern.valueString)
.matcher(input.valueString)
.replaceAll(replace.valueString)
),
Seq.empty
)
)
}
}

implicit val suffixFunctionEvaluator: ValueEvaluator[Suffix] = new ValueEvaluator[Suffix] {
override def evaluateValue(a: Suffix,
inputs: Map[String, WomValue],
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -165,7 +165,8 @@ package object values {
case a: Cross =>
a.evaluateValue(inputs, ioFunctionSet, forCommandInstantiationOptions)(expressionValueEvaluator)

case a: Sub => a.evaluateValue(inputs, ioFunctionSet, forCommandInstantiationOptions)(expressionValueEvaluator)
case a: SubPosix =>
a.evaluateValue(inputs, ioFunctionSet, forCommandInstantiationOptions)(expressionValueEvaluator)

case a: Keys => a.evaluateValue(inputs, ioFunctionSet, forCommandInstantiationOptions)(expressionValueEvaluator)
case a: AsMap =>
Expand Down
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
package wdl.transforms.biscayne

import java.util

import common.Checked
import common.assertion.ErrorOrAssertions._
import common.transforms.CheckedAtoB
Expand Down Expand Up @@ -98,6 +97,12 @@ class Ast2WdlomSpec extends AnyFlatSpec with CromwellTimeoutSpec with Matchers {
expr shouldBeValid NoneLiteralElement
}

it should "get the posix version when parsing the sub function" in {
val str = """sub("my input", "[A-Za-z]", "repl")"""
val expr = fromString[ExpressionElement](str, parser.parse_e)
expr shouldBeValid (SubPosix(StringLiteral("my input"), StringLiteral("[A-Za-z]"), StringLiteral("repl")))
}

it should "parse the new suffix function" in {
val str = "suffix(some_str, some_arr)"
val expr = fromString[ExpressionElement](str, parser.parse_e)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,15 @@ class BiscayneExpressionValueConsumersSpec extends AnyFlatSpec with CromwellTime
}
}

it should "discover the variable lookups within a sub() call" in {
val str = """ sub(my_input, "^[A-Z]$", "0") """
val expr = fromString[ExpressionElement](str, parser.parse_e)

expr.shouldBeValidPF { case e =>
e.expressionConsumedValueHooks should be(Set(UnlinkedIdentifierHook("my_input")))
}
}

it should "discover the variable lookups within a suffix() call" in {
val str = """ suffix(my_suffix, ["a", "b", c]) """
val expr = fromString[ExpressionElement](str, parser.parse_e)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,17 @@ class BiscayneFileEvaluatorSpec extends AnyFlatSpec with CromwellTimeoutSpec wit
}
}

it should "discover the file which would be required to evaluate a sub() function" in {
val str = """ sub(read_string("my_nice_file.txt"), "foo", "NEW_VAL") """
val expr = fromString[ExpressionElement](str, parser.parse_e)

expr.shouldBeValidPF { case e =>
e.predictFilesNeededToEvaluate(Map.empty, NoIoFunctionSet, WomStringType) shouldBeValid Set(
WomSingleFile("my_nice_file.txt")
)
}
}

it should "discover the file which would be required to evaluate a suffix() function" in {
val str = """ suffix(' # what a line', read_lines("foo.txt")) """
val expr = fromString[ExpressionElement](str, parser.parse_e)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,15 @@ class BiscayneTypeEvaluatorSpec extends AnyFlatSpec with CromwellTimeoutSpec wit
}
}

it should "evaluate the type of a sub() function as String" in {
val str = """ sub("input", "^pattern$", "s") """
val expr = fromString[ExpressionElement](str, parser.parse_e)

expr.shouldBeValidPF { case e =>
e.evaluateType(Map.empty) shouldBeValid WomStringType
}
}

it should "evaluate the type of a suffix() function as Array[String]" in {
val str = """ suffix('S', ["a", "b", "c"]) """
val expr = fromString[ExpressionElement](str, parser.parse_e)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -201,6 +201,29 @@ class BiscayneValueEvaluatorSpec extends AnyFlatSpec with CromwellTimeoutSpec wi
}
}

it should "evaluate a POSIX-flavor regex in a sub expression correctly" in {
val str = """ sub("aB", "[[:lower:]]", "9") """
val expr = fromString[ExpressionElement](str, parser.parse_e)

val expectedString: WomString = WomString("9B")

expr.shouldBeValidPF { case e =>
e.evaluateValue(Map.empty, NoIoFunctionSet, None) shouldBeValid EvaluatedValue(expectedString, Seq.empty)
}
}

it should "fail to evaluate a Java-flavor regex in a sub expression" in {
val str = """ sub("aB", "\\p{Lower}", "9") """
val expr = fromString[ExpressionElement](str, parser.parse_e)

expr.shouldBeValidPF { case e =>
e.evaluateValue(Map.empty, NoIoFunctionSet, None)
.shouldBeInvalid(
"""error parsing regexp: invalid character class range: `\p{Lower}`"""
)
}
}

it should "evaluate a suffix expression correctly" in {
val str = """ suffix("S", ["a", "b", "c"]) """
val expr = fromString[ExpressionElement](str, parser.parse_e)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ package wdl.transforms.cascades.ast2wdlom
import cats.syntax.validated._
import common.validation.ErrorOr.ErrorOr
import wdl.model.draft3.elements.ExpressionElement
import wdl.model.draft3.elements.ExpressionElement.{AsMap, AsPairs, CollectByKey, Keys, Max, Min, Sep, Suffix}
import wdl.model.draft3.elements.ExpressionElement.{AsMap, AsPairs, CollectByKey, Keys, Max, Min, Sep, SubPosix, Suffix}
import wdl.transforms.base.ast2wdlom.AstNodeToExpressionElement

object AstToNewExpressionElements {
Expand All @@ -15,6 +15,7 @@ object AstToNewExpressionElements {
"min" -> AstNodeToExpressionElement.validateTwoParamEngineFunction(Min, "min"),
"max" -> AstNodeToExpressionElement.validateTwoParamEngineFunction(Max, "max"),
"sep" -> AstNodeToExpressionElement.validateTwoParamEngineFunction(Sep, "sep"),
"sub" -> AstNodeToExpressionElement.validateThreeParamEngineFunction(SubPosix, "sub"),
"suffix" -> AstNodeToExpressionElement.validateTwoParamEngineFunction(Suffix, "suffix"),
"read_object" -> (_ =>
"read_object is no longer available in this WDL version. Consider using read_json instead".invalidNel
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,16 @@ object cascadesExpressionValueConsumers {
expressionValueConsumer.expressionConsumedValueHooks(a.arg2)(expressionValueConsumer)
}

implicit val subPosixExpressionValueConsumer: ExpressionValueConsumer[SubPosix] =
new ExpressionValueConsumer[SubPosix] {
override def expressionConsumedValueHooks(a: SubPosix)(implicit
expressionValueConsumer: ExpressionValueConsumer[ExpressionElement]
): Set[UnlinkedConsumedValueHook] =
expressionValueConsumer.expressionConsumedValueHooks(a.arg1)(expressionValueConsumer) ++
expressionValueConsumer.expressionConsumedValueHooks(a.arg2)(expressionValueConsumer) ++
expressionValueConsumer.expressionConsumedValueHooks(a.arg3)(expressionValueConsumer)
}

implicit val suffixExpressionValueConsumer: ExpressionValueConsumer[Suffix] = new ExpressionValueConsumer[Suffix] {
override def expressionConsumedValueHooks(a: Suffix)(implicit
expressionValueConsumer: ExpressionValueConsumer[ExpressionElement]
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -96,7 +96,7 @@ package object consumed {
case a: Zip => a.expressionConsumedValueHooks(expressionValueConsumer)
case a: Cross => a.expressionConsumedValueHooks(expressionValueConsumer)

case a: Sub => a.expressionConsumedValueHooks(expressionValueConsumer)
case a: SubPosix => a.expressionConsumedValueHooks(expressionValueConsumer)

// New WDL biscayne expressions:
case a: Keys => a.expressionConsumedValueHooks(expressionValueConsumer)
Expand Down
Loading
Loading