-
Notifications
You must be signed in to change notification settings - Fork 11.9k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[mlir][scf] Allow unrolling loops with integer-typed IV. #106164
Conversation
@llvm/pr-subscribers-mlir-arith @llvm/pr-subscribers-mlir Author: Hongtao Yu (htyu) ChangesSCF loops now can operate on integer-typed IV, thus I'm changing the loop unroller correspondingly. Full diff: https://github.com/llvm/llvm-project/pull/106164.diff 4 Files Affected:
diff --git a/mlir/include/mlir/Dialect/Arith/Utils/Utils.h b/mlir/include/mlir/Dialect/Arith/Utils/Utils.h
index 76f5825025739b..2202a2d62ebd92 100644
--- a/mlir/include/mlir/Dialect/Arith/Utils/Utils.h
+++ b/mlir/include/mlir/Dialect/Arith/Utils/Utils.h
@@ -93,6 +93,8 @@ Value createScalarOrSplatConstant(OpBuilder &builder, Location loc, Type type,
int64_t value);
Value createScalarOrSplatConstant(OpBuilder &builder, Location loc, Type type,
const APFloat &value);
+Value createIntOrIndexConstant(OpBuilder &builder, Location loc, Type type,
+ int64_t value);
/// Returns the int type of the integer in ofr.
/// Other attribute types are not supported.
diff --git a/mlir/lib/Dialect/Arith/Utils/Utils.cpp b/mlir/lib/Dialect/Arith/Utils/Utils.cpp
index e75db84b75e280..5fb1cda3211828 100644
--- a/mlir/lib/Dialect/Arith/Utils/Utils.cpp
+++ b/mlir/lib/Dialect/Arith/Utils/Utils.cpp
@@ -302,6 +302,16 @@ Value mlir::createScalarOrSplatConstant(OpBuilder &builder, Location loc,
return builder.createOrFold<arith::ConstantOp>(loc, type, splat);
}
+Value mlir::createIntOrIndexConstant(OpBuilder &b, Location loc, Type type,
+ int64_t value) {
+ assert(type.isIntOrIndex() &&
+ "unexpected type other than integers and index");
+ if (type.isIndex())
+ return b.create<arith::ConstantIndexOp>(loc, value);
+ else
+ return b.create<arith::ConstantOp>(loc, b.getIntegerAttr(type, value));
+}
+
Type mlir::getType(OpFoldResult ofr) {
if (auto value = dyn_cast_if_present<Value>(ofr))
return value.getType();
diff --git a/mlir/lib/Dialect/SCF/Utils/Utils.cpp b/mlir/lib/Dialect/SCF/Utils/Utils.cpp
index ff5e3a002263d3..49bda65bad2df2 100644
--- a/mlir/lib/Dialect/SCF/Utils/Utils.cpp
+++ b/mlir/lib/Dialect/SCF/Utils/Utils.cpp
@@ -264,11 +264,13 @@ bool mlir::getInnermostParallelLoops(Operation *rootOp,
static Value ceilDivPositive(OpBuilder &builder, Location loc, Value dividend,
int64_t divisor) {
assert(divisor > 0 && "expected positive divisor");
- assert(dividend.getType().isIndex() && "expected index-typed value");
+ assert(dividend.getType().isIntOrIndex() &&
+ "expected integer or index-typed value");
Value divisorMinusOneCst =
- builder.create<arith::ConstantIndexOp>(loc, divisor - 1);
- Value divisorCst = builder.create<arith::ConstantIndexOp>(loc, divisor);
+ createIntOrIndexConstant(builder, loc, dividend.getType(), divisor - 1);
+ Value divisorCst =
+ createIntOrIndexConstant(builder, loc, dividend.getType(), divisor);
Value sum = builder.create<arith::AddIOp>(loc, dividend, divisorMinusOneCst);
return builder.create<arith::DivUIOp>(loc, sum, divisorCst);
}
@@ -279,9 +281,9 @@ static Value ceilDivPositive(OpBuilder &builder, Location loc, Value dividend,
// where divis is rounding-to-zero division.
static Value ceilDivPositive(OpBuilder &builder, Location loc, Value dividend,
Value divisor) {
- assert(dividend.getType().isIndex() && "expected index-typed value");
-
- Value cstOne = builder.create<arith::ConstantIndexOp>(loc, 1);
+ assert(dividend.getType().isIntOrIndex() &&
+ "expected integer or index-typed value");
+ Value cstOne = createIntOrIndexConstant(builder, loc, dividend.getType(), 1);
Value divisorMinusOne = builder.create<arith::SubIOp>(loc, divisor, cstOne);
Value sum = builder.create<arith::AddIOp>(loc, dividend, divisorMinusOne);
return builder.create<arith::DivUIOp>(loc, sum, divisor);
@@ -388,16 +390,17 @@ LogicalResult mlir::loopUnrollByFactor(
// Create constant for 'upperBoundUnrolled' and set epilogue loop flag.
generateEpilogueLoop = upperBoundUnrolledCst < ubCst;
if (generateEpilogueLoop)
- upperBoundUnrolled = boundsBuilder.create<arith::ConstantIndexOp>(
- loc, upperBoundUnrolledCst);
+ upperBoundUnrolled = createIntOrIndexConstant(
+ boundsBuilder, loc, forOp.getUpperBound().getType(), upperBoundUnrolledCst);
else
upperBoundUnrolled = forOp.getUpperBound();
// Create constant for 'stepUnrolled'.
- stepUnrolled = stepCst == stepUnrolledCst
- ? step
- : boundsBuilder.create<arith::ConstantIndexOp>(
- loc, stepUnrolledCst);
+ stepUnrolled =
+ stepCst == stepUnrolledCst
+ ? step
+ : createIntOrIndexConstant(boundsBuilder, loc, step.getType(),
+ stepUnrolledCst);
} else {
// Dynamic loop bounds computation.
// TODO: Add dynamic asserts for negative lb/ub/step, or
@@ -407,8 +410,8 @@ LogicalResult mlir::loopUnrollByFactor(
Value diff =
boundsBuilder.create<arith::SubIOp>(loc, upperBound, lowerBound);
Value tripCount = ceilDivPositive(boundsBuilder, loc, diff, step);
- Value unrollFactorCst =
- boundsBuilder.create<arith::ConstantIndexOp>(loc, unrollFactor);
+ Value unrollFactorCst = createIntOrIndexConstant(
+ boundsBuilder, loc, tripCount.getType(), unrollFactor);
Value tripCountRem =
boundsBuilder.create<arith::RemSIOp>(loc, tripCount, unrollFactorCst);
// Compute tripCountEvenMultiple = tripCount - (tripCount % unrollFactor)
@@ -455,7 +458,7 @@ LogicalResult mlir::loopUnrollByFactor(
[&](unsigned i, Value iv, OpBuilder b) {
// iv' = iv + step * i;
auto stride = b.create<arith::MulIOp>(
- loc, step, b.create<arith::ConstantIndexOp>(loc, i));
+ loc, step, createIntOrIndexConstant(b, loc, iv.getType(), i));
return b.create<arith::AddIOp>(loc, iv, stride);
},
annotateFn, iterArgs, yieldedValues);
diff --git a/mlir/test/Dialect/SCF/loop-unroll.mlir b/mlir/test/Dialect/SCF/loop-unroll.mlir
index e28efbb6ec2b91..ae994a8e6c5d7b 100644
--- a/mlir/test/Dialect/SCF/loop-unroll.mlir
+++ b/mlir/test/Dialect/SCF/loop-unroll.mlir
@@ -311,10 +311,10 @@ func.func @static_loop_unroll_up_to_factor(%arg0 : memref<?xf32>) {
// Test that epilogue's arguments are correctly renamed.
func.func @static_loop_unroll_by_3_rename_epilogue_arguments() -> (f32, f32) {
%0 = arith.constant 7.0 : f32
- %lb = arith.constant 0 : index
- %ub = arith.constant 20 : index
- %step = arith.constant 1 : index
- %result:2 = scf.for %i0 = %lb to %ub step %step iter_args(%arg0 = %0, %arg1 = %0) -> (f32, f32) {
+ %lb = arith.constant 0 : i32
+ %ub = arith.constant 20 : i32
+ %step = arith.constant 1 : i32
+ %result:2 = scf.for %i0 = %lb to %ub step %step iter_args(%arg0 = %0, %arg1 = %0) -> (f32, f32) : i32{
%add = arith.addf %arg0, %arg1 : f32
%mul = arith.mulf %arg0, %arg1 : f32
scf.yield %add, %mul : f32, f32
@@ -324,13 +324,13 @@ func.func @static_loop_unroll_by_3_rename_epilogue_arguments() -> (f32, f32) {
// UNROLL-BY-3-LABEL: func @static_loop_unroll_by_3_rename_epilogue_arguments
//
// UNROLL-BY-3-DAG: %[[CST:.*]] = arith.constant {{.*}} : f32
-// UNROLL-BY-3-DAG: %[[C0:.*]] = arith.constant 0 : index
-// UNROLL-BY-3-DAG: %[[C1:.*]] = arith.constant 1 : index
-// UNROLL-BY-3-DAG: %[[C20:.*]] = arith.constant 20 : index
-// UNROLL-BY-3-DAG: %[[C18:.*]] = arith.constant 18 : index
-// UNROLL-BY-3-DAG: %[[C3:.*]] = arith.constant 3 : index
+// UNROLL-BY-3-DAG: %[[C0:.*]] = arith.constant 0 : i32
+// UNROLL-BY-3-DAG: %[[C1:.*]] = arith.constant 1 : i32
+// UNROLL-BY-3-DAG: %[[C20:.*]] = arith.constant 20 : i32
+// UNROLL-BY-3-DAG: %[[C18:.*]] = arith.constant 18 : i32
+// UNROLL-BY-3-DAG: %[[C3:.*]] = arith.constant 3 : i32
// UNROLL-BY-3: %[[FOR:.*]]:2 = scf.for %[[IV:.*]] = %[[C0]] to %[[C18]] step %[[C3]]
-// UNROLL-BY-3-SAME: iter_args(%[[ARG0:.*]] = %[[CST]], %[[ARG1:.*]] = %[[CST]]) -> (f32, f32) {
+// UNROLL-BY-3-SAME: iter_args(%[[ARG0:.*]] = %[[CST]], %[[ARG1:.*]] = %[[CST]]) -> (f32, f32) : i32 {
// UNROLL-BY-3-NEXT: %[[ADD0:.*]] = arith.addf %[[ARG0]], %[[ARG1]] : f32
// UNROLL-BY-3-NEXT: %[[MUL0:.*]] = arith.mulf %[[ARG0]], %[[ARG1]] : f32
// UNROLL-BY-3-NEXT: %[[ADD1:.*]] = arith.addf %[[ADD0]], %[[MUL0]] : f32
@@ -340,7 +340,7 @@ func.func @static_loop_unroll_by_3_rename_epilogue_arguments() -> (f32, f32) {
// UNROLL-BY-3-NEXT: scf.yield %[[ADD2]], %[[MUL2]] : f32, f32
// UNROLL-BY-3-NEXT: }
// UNROLL-BY-3: %[[EFOR:.*]]:2 = scf.for %[[EIV:.*]] = %[[C18]] to %[[C20]] step %[[C1]]
-// UNROLL-BY-3-SAME: iter_args(%[[EARG0:.*]] = %[[FOR]]#0, %[[EARG1:.*]] = %[[FOR]]#1) -> (f32, f32) {
+// UNROLL-BY-3-SAME: iter_args(%[[EARG0:.*]] = %[[FOR]]#0, %[[EARG1:.*]] = %[[FOR]]#1) -> (f32, f32) : i32 {
// UNROLL-BY-3-NEXT: %[[EADD:.*]] = arith.addf %[[EARG0]], %[[EARG1]] : f32
// UNROLL-BY-3-NEXT: %[[EMUL:.*]] = arith.mulf %[[EARG0]], %[[EARG1]] : f32
// UNROLL-BY-3-NEXT: scf.yield %[[EADD]], %[[EMUL]] : f32, f32
|
@llvm/pr-subscribers-mlir-scf Author: Hongtao Yu (htyu) ChangesSCF loops now can operate on integer-typed IV, thus I'm changing the loop unroller correspondingly. Full diff: https://github.com/llvm/llvm-project/pull/106164.diff 4 Files Affected:
diff --git a/mlir/include/mlir/Dialect/Arith/Utils/Utils.h b/mlir/include/mlir/Dialect/Arith/Utils/Utils.h
index 76f5825025739b..2202a2d62ebd92 100644
--- a/mlir/include/mlir/Dialect/Arith/Utils/Utils.h
+++ b/mlir/include/mlir/Dialect/Arith/Utils/Utils.h
@@ -93,6 +93,8 @@ Value createScalarOrSplatConstant(OpBuilder &builder, Location loc, Type type,
int64_t value);
Value createScalarOrSplatConstant(OpBuilder &builder, Location loc, Type type,
const APFloat &value);
+Value createIntOrIndexConstant(OpBuilder &builder, Location loc, Type type,
+ int64_t value);
/// Returns the int type of the integer in ofr.
/// Other attribute types are not supported.
diff --git a/mlir/lib/Dialect/Arith/Utils/Utils.cpp b/mlir/lib/Dialect/Arith/Utils/Utils.cpp
index e75db84b75e280..5fb1cda3211828 100644
--- a/mlir/lib/Dialect/Arith/Utils/Utils.cpp
+++ b/mlir/lib/Dialect/Arith/Utils/Utils.cpp
@@ -302,6 +302,16 @@ Value mlir::createScalarOrSplatConstant(OpBuilder &builder, Location loc,
return builder.createOrFold<arith::ConstantOp>(loc, type, splat);
}
+Value mlir::createIntOrIndexConstant(OpBuilder &b, Location loc, Type type,
+ int64_t value) {
+ assert(type.isIntOrIndex() &&
+ "unexpected type other than integers and index");
+ if (type.isIndex())
+ return b.create<arith::ConstantIndexOp>(loc, value);
+ else
+ return b.create<arith::ConstantOp>(loc, b.getIntegerAttr(type, value));
+}
+
Type mlir::getType(OpFoldResult ofr) {
if (auto value = dyn_cast_if_present<Value>(ofr))
return value.getType();
diff --git a/mlir/lib/Dialect/SCF/Utils/Utils.cpp b/mlir/lib/Dialect/SCF/Utils/Utils.cpp
index ff5e3a002263d3..49bda65bad2df2 100644
--- a/mlir/lib/Dialect/SCF/Utils/Utils.cpp
+++ b/mlir/lib/Dialect/SCF/Utils/Utils.cpp
@@ -264,11 +264,13 @@ bool mlir::getInnermostParallelLoops(Operation *rootOp,
static Value ceilDivPositive(OpBuilder &builder, Location loc, Value dividend,
int64_t divisor) {
assert(divisor > 0 && "expected positive divisor");
- assert(dividend.getType().isIndex() && "expected index-typed value");
+ assert(dividend.getType().isIntOrIndex() &&
+ "expected integer or index-typed value");
Value divisorMinusOneCst =
- builder.create<arith::ConstantIndexOp>(loc, divisor - 1);
- Value divisorCst = builder.create<arith::ConstantIndexOp>(loc, divisor);
+ createIntOrIndexConstant(builder, loc, dividend.getType(), divisor - 1);
+ Value divisorCst =
+ createIntOrIndexConstant(builder, loc, dividend.getType(), divisor);
Value sum = builder.create<arith::AddIOp>(loc, dividend, divisorMinusOneCst);
return builder.create<arith::DivUIOp>(loc, sum, divisorCst);
}
@@ -279,9 +281,9 @@ static Value ceilDivPositive(OpBuilder &builder, Location loc, Value dividend,
// where divis is rounding-to-zero division.
static Value ceilDivPositive(OpBuilder &builder, Location loc, Value dividend,
Value divisor) {
- assert(dividend.getType().isIndex() && "expected index-typed value");
-
- Value cstOne = builder.create<arith::ConstantIndexOp>(loc, 1);
+ assert(dividend.getType().isIntOrIndex() &&
+ "expected integer or index-typed value");
+ Value cstOne = createIntOrIndexConstant(builder, loc, dividend.getType(), 1);
Value divisorMinusOne = builder.create<arith::SubIOp>(loc, divisor, cstOne);
Value sum = builder.create<arith::AddIOp>(loc, dividend, divisorMinusOne);
return builder.create<arith::DivUIOp>(loc, sum, divisor);
@@ -388,16 +390,17 @@ LogicalResult mlir::loopUnrollByFactor(
// Create constant for 'upperBoundUnrolled' and set epilogue loop flag.
generateEpilogueLoop = upperBoundUnrolledCst < ubCst;
if (generateEpilogueLoop)
- upperBoundUnrolled = boundsBuilder.create<arith::ConstantIndexOp>(
- loc, upperBoundUnrolledCst);
+ upperBoundUnrolled = createIntOrIndexConstant(
+ boundsBuilder, loc, forOp.getUpperBound().getType(), upperBoundUnrolledCst);
else
upperBoundUnrolled = forOp.getUpperBound();
// Create constant for 'stepUnrolled'.
- stepUnrolled = stepCst == stepUnrolledCst
- ? step
- : boundsBuilder.create<arith::ConstantIndexOp>(
- loc, stepUnrolledCst);
+ stepUnrolled =
+ stepCst == stepUnrolledCst
+ ? step
+ : createIntOrIndexConstant(boundsBuilder, loc, step.getType(),
+ stepUnrolledCst);
} else {
// Dynamic loop bounds computation.
// TODO: Add dynamic asserts for negative lb/ub/step, or
@@ -407,8 +410,8 @@ LogicalResult mlir::loopUnrollByFactor(
Value diff =
boundsBuilder.create<arith::SubIOp>(loc, upperBound, lowerBound);
Value tripCount = ceilDivPositive(boundsBuilder, loc, diff, step);
- Value unrollFactorCst =
- boundsBuilder.create<arith::ConstantIndexOp>(loc, unrollFactor);
+ Value unrollFactorCst = createIntOrIndexConstant(
+ boundsBuilder, loc, tripCount.getType(), unrollFactor);
Value tripCountRem =
boundsBuilder.create<arith::RemSIOp>(loc, tripCount, unrollFactorCst);
// Compute tripCountEvenMultiple = tripCount - (tripCount % unrollFactor)
@@ -455,7 +458,7 @@ LogicalResult mlir::loopUnrollByFactor(
[&](unsigned i, Value iv, OpBuilder b) {
// iv' = iv + step * i;
auto stride = b.create<arith::MulIOp>(
- loc, step, b.create<arith::ConstantIndexOp>(loc, i));
+ loc, step, createIntOrIndexConstant(b, loc, iv.getType(), i));
return b.create<arith::AddIOp>(loc, iv, stride);
},
annotateFn, iterArgs, yieldedValues);
diff --git a/mlir/test/Dialect/SCF/loop-unroll.mlir b/mlir/test/Dialect/SCF/loop-unroll.mlir
index e28efbb6ec2b91..ae994a8e6c5d7b 100644
--- a/mlir/test/Dialect/SCF/loop-unroll.mlir
+++ b/mlir/test/Dialect/SCF/loop-unroll.mlir
@@ -311,10 +311,10 @@ func.func @static_loop_unroll_up_to_factor(%arg0 : memref<?xf32>) {
// Test that epilogue's arguments are correctly renamed.
func.func @static_loop_unroll_by_3_rename_epilogue_arguments() -> (f32, f32) {
%0 = arith.constant 7.0 : f32
- %lb = arith.constant 0 : index
- %ub = arith.constant 20 : index
- %step = arith.constant 1 : index
- %result:2 = scf.for %i0 = %lb to %ub step %step iter_args(%arg0 = %0, %arg1 = %0) -> (f32, f32) {
+ %lb = arith.constant 0 : i32
+ %ub = arith.constant 20 : i32
+ %step = arith.constant 1 : i32
+ %result:2 = scf.for %i0 = %lb to %ub step %step iter_args(%arg0 = %0, %arg1 = %0) -> (f32, f32) : i32{
%add = arith.addf %arg0, %arg1 : f32
%mul = arith.mulf %arg0, %arg1 : f32
scf.yield %add, %mul : f32, f32
@@ -324,13 +324,13 @@ func.func @static_loop_unroll_by_3_rename_epilogue_arguments() -> (f32, f32) {
// UNROLL-BY-3-LABEL: func @static_loop_unroll_by_3_rename_epilogue_arguments
//
// UNROLL-BY-3-DAG: %[[CST:.*]] = arith.constant {{.*}} : f32
-// UNROLL-BY-3-DAG: %[[C0:.*]] = arith.constant 0 : index
-// UNROLL-BY-3-DAG: %[[C1:.*]] = arith.constant 1 : index
-// UNROLL-BY-3-DAG: %[[C20:.*]] = arith.constant 20 : index
-// UNROLL-BY-3-DAG: %[[C18:.*]] = arith.constant 18 : index
-// UNROLL-BY-3-DAG: %[[C3:.*]] = arith.constant 3 : index
+// UNROLL-BY-3-DAG: %[[C0:.*]] = arith.constant 0 : i32
+// UNROLL-BY-3-DAG: %[[C1:.*]] = arith.constant 1 : i32
+// UNROLL-BY-3-DAG: %[[C20:.*]] = arith.constant 20 : i32
+// UNROLL-BY-3-DAG: %[[C18:.*]] = arith.constant 18 : i32
+// UNROLL-BY-3-DAG: %[[C3:.*]] = arith.constant 3 : i32
// UNROLL-BY-3: %[[FOR:.*]]:2 = scf.for %[[IV:.*]] = %[[C0]] to %[[C18]] step %[[C3]]
-// UNROLL-BY-3-SAME: iter_args(%[[ARG0:.*]] = %[[CST]], %[[ARG1:.*]] = %[[CST]]) -> (f32, f32) {
+// UNROLL-BY-3-SAME: iter_args(%[[ARG0:.*]] = %[[CST]], %[[ARG1:.*]] = %[[CST]]) -> (f32, f32) : i32 {
// UNROLL-BY-3-NEXT: %[[ADD0:.*]] = arith.addf %[[ARG0]], %[[ARG1]] : f32
// UNROLL-BY-3-NEXT: %[[MUL0:.*]] = arith.mulf %[[ARG0]], %[[ARG1]] : f32
// UNROLL-BY-3-NEXT: %[[ADD1:.*]] = arith.addf %[[ADD0]], %[[MUL0]] : f32
@@ -340,7 +340,7 @@ func.func @static_loop_unroll_by_3_rename_epilogue_arguments() -> (f32, f32) {
// UNROLL-BY-3-NEXT: scf.yield %[[ADD2]], %[[MUL2]] : f32, f32
// UNROLL-BY-3-NEXT: }
// UNROLL-BY-3: %[[EFOR:.*]]:2 = scf.for %[[EIV:.*]] = %[[C18]] to %[[C20]] step %[[C1]]
-// UNROLL-BY-3-SAME: iter_args(%[[EARG0:.*]] = %[[FOR]]#0, %[[EARG1:.*]] = %[[FOR]]#1) -> (f32, f32) {
+// UNROLL-BY-3-SAME: iter_args(%[[EARG0:.*]] = %[[FOR]]#0, %[[EARG1:.*]] = %[[FOR]]#1) -> (f32, f32) : i32 {
// UNROLL-BY-3-NEXT: %[[EADD:.*]] = arith.addf %[[EARG0]], %[[EARG1]] : f32
// UNROLL-BY-3-NEXT: %[[EMUL:.*]] = arith.mulf %[[EARG0]], %[[EARG1]] : f32
// UNROLL-BY-3-NEXT: scf.yield %[[EADD]], %[[EMUL]] : f32, f32
|
✅ With the latest revision this PR passed the C/C++ code formatter. |
Value divisorMinusOneCst = | ||
builder.create<arith::ConstantIndexOp>(loc, divisor - 1); | ||
Value divisorCst = builder.create<arith::ConstantIndexOp>(loc, divisor); | ||
Value divisorMinusOneCst = builder.create<arith::ConstantOp>( |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Nit: we also have a ConstantIntOp
specialization that will make this slightly less verbose.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
It seems that ConstantIntOp
cannot take index type.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM but please wait in case Alex and Mehdi have more comments
@ftynse @joker-eph please let me know if you have more comments for the latest version. Thanks. |
I'm going to land this. Please let me know if you have any concerns and I'll address them in a new PR. Thanks. |
SCF loops now can operate on integer-typed IV, thus I'm changing the loop unroller correspondingly.