diff --git a/onnxruntime/core/providers/cpu/math/cumsum.cc b/onnxruntime/core/providers/cpu/math/cumsum.cc
index 9c403d75b84aa..98ca7e590eed3 100644
--- a/onnxruntime/core/providers/cpu/math/cumsum.cc
+++ b/onnxruntime/core/providers/cpu/math/cumsum.cc
@@ -1,6 +1,8 @@
 // Copyright (c) Microsoft Corporation. All rights reserved.
 // Licensed under the MIT License.
 
+#include <functional>
+
 #include "cumsum.h"
 #include "core/providers/common.h"
 #include "core/providers/cpu/tensor/utils.h"
@@ -162,9 +164,10 @@ Status CumSum<T>::Compute(OpKernelContext* ctx) const {
 
   // we solve the problem by using the identity that(in the case of exclusive)
   // 1) out[upper_dims...][0][lower_dims...] = 0
-  // 2) out[upper_dims...][i][lower_dims...] = in[upper_dims...][i-1][lower_dims...] + out[upper_dims...][i-1][lower_dims...]
+  // 2) out[upper_dims...][i][lower_dims...] =
+  //      in[upper_dims...][i-1][lower_dims...] + out[upper_dims...][i-1][lower_dims...]
   // we loop through the [upper_dims...] and start applying the identity in each slice
-  // in each slice since again the [lower_dims...] are adjecent in memory, we can add them like vectors
+  // since the [lower_dims...] are adjecent in memory, so we can add them like vectors
 
   const auto dim = input->Shape()[axis];  // dimension size for the axis
   const auto input_shape = input->Shape().GetDims();
diff --git a/onnxruntime/test/providers/cpu/math/cumsum_test.cc b/onnxruntime/test/providers/cpu/math/cumsum_test.cc
index c0c2ee4fad061..f71d222c8c66b 100644
--- a/onnxruntime/test/providers/cpu/math/cumsum_test.cc
+++ b/onnxruntime/test/providers/cpu/math/cumsum_test.cc
@@ -249,11 +249,11 @@ TEST(CumSumTest, _1DTestdouble_WithInt64Axis) {
 }
 TEST(CumSumTest, _1DTestLong) {
   OpTester test("CumSum", 11, onnxruntime::kOnnxDomain);
-  const int N = 1000000;
-  test.AddInput<int32_t>("x", {N}, std::vector<int32_t>(N, 1));
-  test.AddInput<int32_t>("axis", {}, {0});
+  const int N = 10000000;
   std::vector<int32_t> output_value(N);
   std::iota(output_value.begin(), output_value.end(), 1);
+  test.AddInput<int32_t>("x", {N}, std::vector<int32_t>(N, 1));
+  test.AddInput<int32_t>("axis", {}, {0});
   test.AddOutput<int32_t>("y", {N}, output_value);
   test.Run(OpTester::ExpectResult::kExpectSuccess, "", {kTensorrtExecutionProvider});
 }