From 595c4284ab0985abee66fcc9a50cbaa2045f662f Mon Sep 17 00:00:00 2001
From: Dahua Lin <lindahua@gmail.com>
Date: Fri, 30 May 2014 14:40:43 -0500
Subject: [PATCH 01/15] Refactoring of folding functions: `foldl`, `foldr`,
 `mapfoldl`, and `mapfoldr`

---
 base/reduce.jl | 54 ++++++++++++++++++++++++++++----------------------
 1 file changed, 30 insertions(+), 24 deletions(-)

diff --git a/base/reduce.jl b/base/reduce.jl
index e9cbeb5beb72d..998a21d89c47a 100644
--- a/base/reduce.jl
+++ b/base/reduce.jl
@@ -32,61 +32,67 @@ evaluate(::AndFun, x, y) = x & y
 evaluate(::OrFun, x, y) = x | y
 evaluate(f::Callable, x, y) = f(x, y)
 
-###### Generic reduction functions ######
+###### Generic (map)reduce functions ######
 
-# Note that getting type-stable results from reduction functions,
-# or at least having type-stable loops, is nontrivial (#6069).
+## foldl && mapfoldl
 
-## foldl
-
-function _foldl(op, v0, itr, i)
+function mapfoldl_impl(f, op, v0, itr, i)
     if done(itr, i)
         return v0
     else
         (x, i) = next(itr, i)
-        v = evaluate(op, v0, x)
+        v = evaluate(op, v0, evaluate(f, x))
         while !done(itr, i)
             (x, i) = next(itr, i)
-            v = evaluate(op, v, x)
+            v = evaluate(op, v, evaluate(f, x))
         end
         return v
     end
 end
 
-function foldl(op::Callable, v0, itr, i)
-    is(op, +) && return _foldl(AddFun(), v0, itr, i)
-    is(op, *) && return _foldl(MulFun(), v0, itr, i)
-    is(op, &) && return _foldl(AndFun(), v0, itr, i)
-    is(op, |) && return _foldl(OrFun(), v0, itr, i)
-    return _foldl(op, v0, itr, i)
-end
+mapfoldl(f, op, v0, itr) = mapfoldl_impl(op, v0, itr, start(itr))
 
-foldl(op::Callable, v0, itr) = foldl(op, v0, itr, start(itr))
+function mapfoldl(f, op::Function, v0, itr)
+    is(op, +) && return mapfoldl(f, AddFun(), v0, itr)
+    is(op, *) && return mapfoldl(f, MulFun(), v0, itr)
+    is(op, &) && return mapfoldl(f, AndFun(), v0, itr)
+    is(op, |) && return mapfoldl(f, OrFun(), v0, itr)
+    return mapfoldl_impl(f, op, v0, itr, start(itr))
+end
 
-function foldl(op::Callable, itr)
+function mapfoldl(f, op, itr)
     i = start(itr)
     done(itr, i) && error("Argument is empty.")
-    (v0, i) = next(itr, i)
-    return foldl(op, v0, itr, i)
+    (x, i) = next(itr, i)
+    v0 = evaluate(f, x)
+    mapfoldl_impl(f, op, v0, itr, i)
 end
 
+foldl(op, v0, itr) = mapfoldl(IdFun(), op, v0, itr)
+foldl(op, itr) = mapfoldl(IdFun(), op, itr)
+
 ## foldr
 
-function foldr(op::Callable, v0, itr, i=endof(itr))
-    # use type stable procedure
+# core implementation
+function mapfoldr_impl(f, op, v0, itr, i::Integer)
     if i == 0
         return v0
     else
-        v = op(itr[i], v0)
+        x = itr[i]
+        v = evaluate(op, evaluate(f, x), v0)
         while i > 1
             x = itr[i -= 1]
-            v = op(x, v)
+            v = evaluate(op, evaluate(f, x), v)
         end
         return v
     end
 end
 
-foldr(op::Callable, itr) = (i = endof(itr); foldr(op, itr[i], itr, i-1))
+mapfoldr(f, op, v0, itr) = mapfoldr_impl(f, op, v0, itr, endof(itr))
+mapfoldr(f, op, itr) = (i = endof(itr); mapfoldr_impl(f, op, itr[i], itr, i-1))
+
+foldr(op, v0, itr) = mapfoldr(IdFun(), op, v0, itr)
+foldr(op, itr) = mapfoldr(IdFun(), op, itr)
 
 ## reduce
 

From 271d8a271d71488455c9a49a7000ebf187bde423 Mon Sep 17 00:00:00 2001
From: Dahua Lin <lindahua@gmail.com>
Date: Fri, 30 May 2014 16:34:11 -0500
Subject: [PATCH 02/15] test mapfoldl and mapfoldr + a bug fix to mapfoldr

---
 base/reduce.jl |  2 +-
 test/reduce.jl | 21 ++++++++++++++-------
 2 files changed, 15 insertions(+), 8 deletions(-)

diff --git a/base/reduce.jl b/base/reduce.jl
index 998a21d89c47a..f314c80c9b697 100644
--- a/base/reduce.jl
+++ b/base/reduce.jl
@@ -89,7 +89,7 @@ function mapfoldr_impl(f, op, v0, itr, i::Integer)
 end
 
 mapfoldr(f, op, v0, itr) = mapfoldr_impl(f, op, v0, itr, endof(itr))
-mapfoldr(f, op, itr) = (i = endof(itr); mapfoldr_impl(f, op, itr[i], itr, i-1))
+mapfoldr(f, op, itr) = (i = endof(itr); mapfoldr_impl(f, op, evaluate(f, itr[i]), itr, i-1))
 
 foldr(op, v0, itr) = mapfoldr(IdFun(), op, v0, itr)
 foldr(op, itr) = mapfoldr(IdFun(), op, itr)
diff --git a/test/reduce.jl b/test/reduce.jl
index 11fe3c5239d96..20eb49c93d470 100644
--- a/test/reduce.jl
+++ b/test/reduce.jl
@@ -1,18 +1,25 @@
 ## foldl & foldr
 
-# folds -- reduce.jl
-@test foldl(-,[1:5]) == -13
-@test foldl(-,10,[1:5]) == foldl(-,[10,1:5])
+# folds 
+@test foldl(-, 1:5) == -13
+@test foldl(-, 10, 1:5) == -5
 
-@test foldr(-,[1:5]) == 3
-@test foldr(-,10,[1:5]) == foldr(-,[1:5,10])
+@test Base.mapfoldl(abs2, -, 2:5) == -46
+@test Base.mapfoldl(abs2, -, 10, 2:5) == -44
 
-# reduce -- reduce.jl
+@test foldr(-, 1:5) == 3
+@test foldr(-, 10, 1:5) == -7
+
+@test Base.mapfoldr(abs2, -, 2:5) == -14
+@test Base.mapfoldr(abs2, -, 10, 2:5) == -4
+
+
+# reduce 
 @test reduce((x,y)->"($x+$y)", [9:11]) == "((9+10)+11)"
 @test reduce(max, [8 6 7 5 3 0 9]) == 9
 @test reduce(+, 1000, [1:5]) == (1000 + 1 + 2 + 3 + 4 + 5)
 
-# mapreduce -- reduce.jl
+# mapreduce 
 @test mapreduce(-, +, [-10 -9 -3]) == ((10 + 9) + 3)
 @test mapreduce((x)->x[1:3], (x,y)->"($x+$y)", ["abcd", "efgh", "01234"]) == "((abc+efg)+012)"
 

From bc023fbe83cde298fa2aa1830dbeafa639d69cba Mon Sep 17 00:00:00 2001
From: Dahua Lin <lindahua@gmail.com>
Date: Fri, 30 May 2014 16:41:43 -0500
Subject: [PATCH 03/15] move mapreduce functions above

---
 base/reduce.jl | 109 ++++++++++++++++++++++++-------------------------
 1 file changed, 53 insertions(+), 56 deletions(-)

diff --git a/base/reduce.jl b/base/reduce.jl
index f314c80c9b697..1433cfcc15ba6 100644
--- a/base/reduce.jl
+++ b/base/reduce.jl
@@ -71,9 +71,8 @@ end
 foldl(op, v0, itr) = mapfoldl(IdFun(), op, v0, itr)
 foldl(op, itr) = mapfoldl(IdFun(), op, itr)
 
-## foldr
+## foldr & mapfoldr
 
-# core implementation
 function mapfoldr_impl(f, op, v0, itr, i::Integer)
     if i == 0
         return v0
@@ -94,7 +93,58 @@ mapfoldr(f, op, itr) = (i = endof(itr); mapfoldr_impl(f, op, evaluate(f, itr[i])
 foldr(op, v0, itr) = mapfoldr(IdFun(), op, v0, itr)
 foldr(op, itr) = mapfoldr(IdFun(), op, itr)
 
-## reduce
+## reduce & mapreduce
+
+function mapreduce(f::Callable, op::Callable, itr)
+    s = start(itr)
+    if done(itr, s)
+        error("argument is empty")
+    end
+    (x, s) = next(itr, s)
+    v = f(x)
+    if done(itr, s)
+        return v
+    else # specialize for length > 1 to have a hopefully type-stable loop
+        (x, s) = next(itr, s)
+        result = op(v, f(x))
+        while !done(itr, s)
+            (x, s) = next(itr, s)
+            result = op(result, f(x))
+        end
+        return result
+    end
+end
+
+function mapreduce(f::Callable, op::Callable, v0, itr)
+    v = v0
+    for x in itr
+        v = op(v,f(x))
+    end
+    return v
+end
+
+# pairwise reduction, requires n > 1 (to allow type-stable loop)
+function mr_pairwise(f::Callable, op::Callable, A::AbstractArray, i1,n)
+    if n < 128
+        @inbounds v = op(f(A[i1]), f(A[i1+1]))
+        for i = i1+2:i1+n-1
+            @inbounds v = op(v,f(A[i]))
+        end
+        return v
+    else
+        n2 = div(n,2)
+        return op(mr_pairwise(f,op,A, i1,n2), mr_pairwise(f,op,A, i1+n2,n-n2))
+    end
+end
+function mapreduce(f::Callable, op::Callable, A::AbstractArray)
+    n = length(A)
+    n == 0 ? error("argument is empty") : n == 1 ? f(A[1]) : mr_pairwise(f,op,A, 1,n)
+end
+function mapreduce(f::Callable, op::Callable, v0, A::AbstractArray)
+    n = length(A)
+    n == 0 ? v0 : n == 1 ? op(v0, f(A[1])) : op(v0, mr_pairwise(f,op,A, 1,n))
+end
+
 
 reduce(op::Callable, v, itr) = foldl(op, v, itr)
 
@@ -648,56 +698,3 @@ function all(pred::Union(Function,Func{1}), itr)
     return true
 end
 
-
-###### mapreduce ######
-
-function mapreduce(f::Callable, op::Callable, itr)
-    s = start(itr)
-    if done(itr, s)
-        error("argument is empty")
-    end
-    (x, s) = next(itr, s)
-    v = f(x)
-    if done(itr, s)
-        return v
-    else # specialize for length > 1 to have a hopefully type-stable loop
-        (x, s) = next(itr, s)
-        result = op(v, f(x))
-        while !done(itr, s)
-            (x, s) = next(itr, s)
-            result = op(result, f(x))
-        end
-        return result
-    end
-end
-
-function mapreduce(f::Callable, op::Callable, v0, itr)
-    v = v0
-    for x in itr
-        v = op(v,f(x))
-    end
-    return v
-end
-
-# pairwise reduction, requires n > 1 (to allow type-stable loop)
-function mr_pairwise(f::Callable, op::Callable, A::AbstractArray, i1,n)
-    if n < 128
-        @inbounds v = op(f(A[i1]), f(A[i1+1]))
-        for i = i1+2:i1+n-1
-            @inbounds v = op(v,f(A[i]))
-        end
-        return v
-    else
-        n2 = div(n,2)
-        return op(mr_pairwise(f,op,A, i1,n2), mr_pairwise(f,op,A, i1+n2,n-n2))
-    end
-end
-function mapreduce(f::Callable, op::Callable, A::AbstractArray)
-    n = length(A)
-    n == 0 ? error("argument is empty") : n == 1 ? f(A[1]) : mr_pairwise(f,op,A, 1,n)
-end
-function mapreduce(f::Callable, op::Callable, v0, A::AbstractArray)
-    n = length(A)
-    n == 0 ? v0 : n == 1 ? op(v0, f(A[1])) : op(v0, mr_pairwise(f,op,A, 1,n))
-end
-

From de8f672403cd312703f5be1c08489302b649a58d Mon Sep 17 00:00:00 2001
From: Dahua Lin <lindahua@gmail.com>
Date: Fri, 30 May 2014 18:04:20 -0500
Subject: [PATCH 04/15] generalize mapreduce (still need some tweaking ...)

---
 base/reduce.jl | 115 +++++++++++++++++++++++++++++++++----------------
 test/reduce.jl |  44 +++++++++----------
 2 files changed, 100 insertions(+), 59 deletions(-)

diff --git a/base/reduce.jl b/base/reduce.jl
index 1433cfcc15ba6..37217ecacb870 100644
--- a/base/reduce.jl
+++ b/base/reduce.jl
@@ -32,6 +32,7 @@ evaluate(::AndFun, x, y) = x & y
 evaluate(::OrFun, x, y) = x | y
 evaluate(f::Callable, x, y) = f(x, y)
 
+
 ###### Generic (map)reduce functions ######
 
 ## foldl && mapfoldl
@@ -95,26 +96,66 @@ foldr(op, itr) = mapfoldr(IdFun(), op, itr)
 
 ## reduce & mapreduce
 
-function mapreduce(f::Callable, op::Callable, itr)
-    s = start(itr)
-    if done(itr, s)
-        error("argument is empty")
+# mapreduce_***_impl require ifirst < ilast
+
+function mapreduce_seq_impl(f, op, A::AbstractArray, ifirst::Int, ilast::Int)
+    @inbounds fx1 = evaluate(f, A[ifirst])
+    @inbounds fx2 = evaluate(f, A[ifirst+=1])
+    @inbounds v = evaluate(op, fx1, fx2)
+    while ifirst < ilast
+        @inbounds fx = evaluate(f, A[ifirst+=1])
+        v = evaluate(op, v, fx)
     end
-    (x, s) = next(itr, s)
-    v = f(x)
-    if done(itr, s)
-        return v
-    else # specialize for length > 1 to have a hopefully type-stable loop
-        (x, s) = next(itr, s)
-        result = op(v, f(x))
-        while !done(itr, s)
-            (x, s) = next(itr, s)
-            result = op(result, f(x))
-        end
-        return result
+    return v
+end
+
+function mapreduce_pairwise_impl(f, op, A::AbstractArray, ifirst::Int, ilast::Int, blksize::Int)
+    if ifirst + blksiz < ilast
+        return mapreduce_seq_impl(f, op, A, ifirst, ilast)
+    else
+        imid = (ifirst + ilast) >>> 1
+        v1 = mapreduce_seq_impl(f, op, A, ifirst, imid)
+        v2 = mapreduce_seq_impl(f, op, A, imid+1, ilast)
+        evaluate(op, v1, v2)
     end
 end
 
+mapreduce(f, op::Union(Function,Func{2}), itr) = mapfoldl(f, op, itr)
+mapreduce(f, op::Union(Function,Func{2}), v0, itr) = mapfoldl(f, op, v0, itr)
+
+# select different implementation depending on input arguments
+mapreduce_impl(f, op::Union(Function,Func{2}), A::AbstractArray) = mapreduce_seq_impl(f, op, A, 1, length(A))
+mapreduce_impl(f, op::AddFun, A::AbstractArray) = 
+    mapreduce_pairwise_impl(f, op, A, 1, length(A), sum_pairwise_blocksize(f))
+
+function mapreduce(f, op::Union(Function,Func{2}), A::AbstractArray)
+    n = length(A)
+    n == 0 && error("Argument is empty.")
+    n == 1 && evaluate(f, A[1])
+    mapreduce_impl(f, op, A)
+end
+
+
+# function mapreduce(f::Callable, op::Callable, itr)
+#     s = start(itr)
+#     if done(itr, s)
+#         error("argument is empty")
+#     end
+#     (x, s) = next(itr, s)
+#     v = f(x)
+#     if done(itr, s)
+#         return v
+#     else # specialize for length > 1 to have a hopefully type-stable loop
+#         (x, s) = next(itr, s)
+#         result = op(v, f(x))
+#         while !done(itr, s)
+#             (x, s) = next(itr, s)
+#             result = op(result, f(x))
+#         end
+#         return result
+#     end
+# end
+
 function mapreduce(f::Callable, op::Callable, v0, itr)
     v = v0
     for x in itr
@@ -123,27 +164,27 @@ function mapreduce(f::Callable, op::Callable, v0, itr)
     return v
 end
 
-# pairwise reduction, requires n > 1 (to allow type-stable loop)
-function mr_pairwise(f::Callable, op::Callable, A::AbstractArray, i1,n)
-    if n < 128
-        @inbounds v = op(f(A[i1]), f(A[i1+1]))
-        for i = i1+2:i1+n-1
-            @inbounds v = op(v,f(A[i]))
-        end
-        return v
-    else
-        n2 = div(n,2)
-        return op(mr_pairwise(f,op,A, i1,n2), mr_pairwise(f,op,A, i1+n2,n-n2))
-    end
-end
-function mapreduce(f::Callable, op::Callable, A::AbstractArray)
-    n = length(A)
-    n == 0 ? error("argument is empty") : n == 1 ? f(A[1]) : mr_pairwise(f,op,A, 1,n)
-end
-function mapreduce(f::Callable, op::Callable, v0, A::AbstractArray)
-    n = length(A)
-    n == 0 ? v0 : n == 1 ? op(v0, f(A[1])) : op(v0, mr_pairwise(f,op,A, 1,n))
-end
+# # pairwise reduction, requires n > 1 (to allow type-stable loop)
+# function mr_pairwise(f::Callable, op::Callable, A::AbstractArray, i1,n)
+#     if n < 128
+#         @inbounds v = op(f(A[i1]), f(A[i1+1]))
+#         for i = i1+2:i1+n-1
+#             @inbounds v = op(v,f(A[i]))
+#         end
+#         return v
+#     else
+#         n2 = div(n,2)
+#         return op(mr_pairwise(f,op,A, i1,n2), mr_pairwise(f,op,A, i1+n2,n-n2))
+#     end
+# end
+# function mapreduce(f::Callable, op::Callable, A::AbstractArray)
+#     n = length(A)
+#     n == 0 ? error("argument is empty") : n == 1 ? f(A[1]) : mr_pairwise(f,op,A, 1,n)
+# end
+# function mapreduce(f::Callable, op::Callable, v0, A::AbstractArray)
+#     n = length(A)
+#     n == 0 ? v0 : n == 1 ? op(v0, f(A[1])) : op(v0, mr_pairwise(f,op,A, 1,n))
+# end
 
 
 reduce(op::Callable, v, itr) = foldl(op, v, itr)
diff --git a/test/reduce.jl b/test/reduce.jl
index 20eb49c93d470..2eb3dfabe787e 100644
--- a/test/reduce.jl
+++ b/test/reduce.jl
@@ -32,28 +32,28 @@ end
 @test sum(z) == sum(z,(1,2,3,4))[1] == 136
 
 # check variants of summation for type-stability and other issues (#6069)
-sum2(itr) = invoke(sum, (Any,), itr)
-plus(x,y) = x + y
-sum3(A) = reduce(plus, A)
-sum4(itr) = invoke(reduce, (Function, Any), plus, itr)
-sum5(A) = reduce(plus, 0, A)
-sum6(itr) = invoke(reduce, (Function, Int, Any), plus, 0, itr)
-sum7(A) = mapreduce(x->x, plus, A)
-sum8(itr) = invoke(mapreduce, (Function, Function, Any), x->x, plus, itr)
-sum9(A) = mapreduce(x->x, plus, 0, A)
-sum10(itr) = invoke(mapreduce, (Function, Function, Int, Any), x->x,plus,0,itr)
-for f in (sum2, sum5, sum6, sum9, sum10)
-    @test sum(z) == f(z)
-    @test sum(Int[]) == f(Int[]) == 0
-    @test sum(Int[7]) == f(Int[7]) == 7
-    @test typeof(f(Int8[])) == typeof(f(Int8[1])) == typeof(f(Int8[1 7]))
-end
-for f in (sum3, sum4, sum7, sum8)
-    @test sum(z) == f(z)
-    @test_throws ErrorException f(Int[])
-    @test sum(Int[7]) == f(Int[7]) == 7
-end
-@test typeof(sum(Int8[])) == typeof(sum(Int8[1])) == typeof(sum(Int8[1 7]))
+# sum2(itr) = invoke(sum, (Any,), itr)
+# plus(x,y) = x + y
+# sum3(A) = reduce(plus, A)
+# sum4(itr) = invoke(reduce, (Function, Any), plus, itr)
+# sum5(A) = reduce(plus, 0, A)
+# sum6(itr) = invoke(reduce, (Function, Int, Any), plus, 0, itr)
+# sum7(A) = mapreduce(x->x, plus, A)
+# sum8(itr) = invoke(mapreduce, (Function, Function, Any), x->x, plus, itr)
+# sum9(A) = mapreduce(x->x, plus, 0, A)
+# sum10(itr) = invoke(mapreduce, (Function, Function, Int, Any), x->x,plus,0,itr)
+# for f in (sum2, sum5, sum6, sum9, sum10)
+#     @test sum(z) == f(z)
+#     @test sum(Int[]) == f(Int[]) == 0
+#     @test sum(Int[7]) == f(Int[7]) == 7
+#     @test typeof(f(Int8[])) == typeof(f(Int8[1])) == typeof(f(Int8[1 7]))
+# end
+# for f in (sum3, sum4, sum7, sum8)
+#     @test sum(z) == f(z)
+#     @test_throws ErrorException f(Int[])
+#     @test sum(Int[7]) == f(Int[7]) == 7
+# end
+# @test typeof(sum(Int8[])) == typeof(sum(Int8[1])) == typeof(sum(Int8[1 7]))
 
 prod2(itr) = invoke(prod, (Any,), itr)
 @test prod(Int[]) == prod2(Int[]) == 1

From bcfcfe0dc81483a7468d85d3a937c868989037ff Mon Sep 17 00:00:00 2001
From: Dahua Lin <lindahua@gmail.com>
Date: Fri, 30 May 2014 19:11:53 -0500
Subject: [PATCH 05/15] fix a bug of mapreduce

---
 base/reduce.jl | 62 ++++----------------------------------------------
 test/reduce.jl | 44 +++++++++++++++++------------------
 2 files changed, 27 insertions(+), 79 deletions(-)

diff --git a/base/reduce.jl b/base/reduce.jl
index 37217ecacb870..e3fcbf21abedc 100644
--- a/base/reduce.jl
+++ b/base/reduce.jl
@@ -120,73 +120,21 @@ function mapreduce_pairwise_impl(f, op, A::AbstractArray, ifirst::Int, ilast::In
     end
 end
 
-mapreduce(f, op::Union(Function,Func{2}), itr) = mapfoldl(f, op, itr)
-mapreduce(f, op::Union(Function,Func{2}), v0, itr) = mapfoldl(f, op, v0, itr)
+mapreduce(f, op, itr) = mapfoldl(f, op, itr)
+mapreduce(f, op, v0, itr) = mapfoldl(f, op, v0, itr)
 
 # select different implementation depending on input arguments
-mapreduce_impl(f, op::Union(Function,Func{2}), A::AbstractArray) = mapreduce_seq_impl(f, op, A, 1, length(A))
+mapreduce_impl(f, op, A::AbstractArray) = mapreduce_seq_impl(f, op, A, 1, length(A))
 mapreduce_impl(f, op::AddFun, A::AbstractArray) = 
     mapreduce_pairwise_impl(f, op, A, 1, length(A), sum_pairwise_blocksize(f))
 
-function mapreduce(f, op::Union(Function,Func{2}), A::AbstractArray)
+function mapreduce(f, op, A::AbstractArray)
     n = length(A)
     n == 0 && error("Argument is empty.")
-    n == 1 && evaluate(f, A[1])
+    n == 1 && return evaluate(f, A[1])
     mapreduce_impl(f, op, A)
 end
 
-
-# function mapreduce(f::Callable, op::Callable, itr)
-#     s = start(itr)
-#     if done(itr, s)
-#         error("argument is empty")
-#     end
-#     (x, s) = next(itr, s)
-#     v = f(x)
-#     if done(itr, s)
-#         return v
-#     else # specialize for length > 1 to have a hopefully type-stable loop
-#         (x, s) = next(itr, s)
-#         result = op(v, f(x))
-#         while !done(itr, s)
-#             (x, s) = next(itr, s)
-#             result = op(result, f(x))
-#         end
-#         return result
-#     end
-# end
-
-function mapreduce(f::Callable, op::Callable, v0, itr)
-    v = v0
-    for x in itr
-        v = op(v,f(x))
-    end
-    return v
-end
-
-# # pairwise reduction, requires n > 1 (to allow type-stable loop)
-# function mr_pairwise(f::Callable, op::Callable, A::AbstractArray, i1,n)
-#     if n < 128
-#         @inbounds v = op(f(A[i1]), f(A[i1+1]))
-#         for i = i1+2:i1+n-1
-#             @inbounds v = op(v,f(A[i]))
-#         end
-#         return v
-#     else
-#         n2 = div(n,2)
-#         return op(mr_pairwise(f,op,A, i1,n2), mr_pairwise(f,op,A, i1+n2,n-n2))
-#     end
-# end
-# function mapreduce(f::Callable, op::Callable, A::AbstractArray)
-#     n = length(A)
-#     n == 0 ? error("argument is empty") : n == 1 ? f(A[1]) : mr_pairwise(f,op,A, 1,n)
-# end
-# function mapreduce(f::Callable, op::Callable, v0, A::AbstractArray)
-#     n = length(A)
-#     n == 0 ? v0 : n == 1 ? op(v0, f(A[1])) : op(v0, mr_pairwise(f,op,A, 1,n))
-# end
-
-
 reduce(op::Callable, v, itr) = foldl(op, v, itr)
 
 function reduce(op::Callable, itr) # this is a left fold
diff --git a/test/reduce.jl b/test/reduce.jl
index 2eb3dfabe787e..20eb49c93d470 100644
--- a/test/reduce.jl
+++ b/test/reduce.jl
@@ -32,28 +32,28 @@ end
 @test sum(z) == sum(z,(1,2,3,4))[1] == 136
 
 # check variants of summation for type-stability and other issues (#6069)
-# sum2(itr) = invoke(sum, (Any,), itr)
-# plus(x,y) = x + y
-# sum3(A) = reduce(plus, A)
-# sum4(itr) = invoke(reduce, (Function, Any), plus, itr)
-# sum5(A) = reduce(plus, 0, A)
-# sum6(itr) = invoke(reduce, (Function, Int, Any), plus, 0, itr)
-# sum7(A) = mapreduce(x->x, plus, A)
-# sum8(itr) = invoke(mapreduce, (Function, Function, Any), x->x, plus, itr)
-# sum9(A) = mapreduce(x->x, plus, 0, A)
-# sum10(itr) = invoke(mapreduce, (Function, Function, Int, Any), x->x,plus,0,itr)
-# for f in (sum2, sum5, sum6, sum9, sum10)
-#     @test sum(z) == f(z)
-#     @test sum(Int[]) == f(Int[]) == 0
-#     @test sum(Int[7]) == f(Int[7]) == 7
-#     @test typeof(f(Int8[])) == typeof(f(Int8[1])) == typeof(f(Int8[1 7]))
-# end
-# for f in (sum3, sum4, sum7, sum8)
-#     @test sum(z) == f(z)
-#     @test_throws ErrorException f(Int[])
-#     @test sum(Int[7]) == f(Int[7]) == 7
-# end
-# @test typeof(sum(Int8[])) == typeof(sum(Int8[1])) == typeof(sum(Int8[1 7]))
+sum2(itr) = invoke(sum, (Any,), itr)
+plus(x,y) = x + y
+sum3(A) = reduce(plus, A)
+sum4(itr) = invoke(reduce, (Function, Any), plus, itr)
+sum5(A) = reduce(plus, 0, A)
+sum6(itr) = invoke(reduce, (Function, Int, Any), plus, 0, itr)
+sum7(A) = mapreduce(x->x, plus, A)
+sum8(itr) = invoke(mapreduce, (Function, Function, Any), x->x, plus, itr)
+sum9(A) = mapreduce(x->x, plus, 0, A)
+sum10(itr) = invoke(mapreduce, (Function, Function, Int, Any), x->x,plus,0,itr)
+for f in (sum2, sum5, sum6, sum9, sum10)
+    @test sum(z) == f(z)
+    @test sum(Int[]) == f(Int[]) == 0
+    @test sum(Int[7]) == f(Int[7]) == 7
+    @test typeof(f(Int8[])) == typeof(f(Int8[1])) == typeof(f(Int8[1 7]))
+end
+for f in (sum3, sum4, sum7, sum8)
+    @test sum(z) == f(z)
+    @test_throws ErrorException f(Int[])
+    @test sum(Int[7]) == f(Int[7]) == 7
+end
+@test typeof(sum(Int8[])) == typeof(sum(Int8[1])) == typeof(sum(Int8[1 7]))
 
 prod2(itr) = invoke(prod, (Any,), itr)
 @test prod(Int[]) == prod2(Int[]) == 1

From f5fb96aee18458e6ecbcf2adca00d1484f94bcbc Mon Sep 17 00:00:00 2001
From: Dahua Lin <lindahua@gmail.com>
Date: Fri, 30 May 2014 21:30:10 -0500
Subject: [PATCH 06/15] reduce implemented based on new mapreduce

---
 base/reduce.jl | 81 +++++++++++++++++---------------------------------
 1 file changed, 28 insertions(+), 53 deletions(-)

diff --git a/base/reduce.jl b/base/reduce.jl
index e3fcbf21abedc..efcc28703eac1 100644
--- a/base/reduce.jl
+++ b/base/reduce.jl
@@ -51,14 +51,14 @@ function mapfoldl_impl(f, op, v0, itr, i)
     end
 end
 
-mapfoldl(f, op, v0, itr) = mapfoldl_impl(op, v0, itr, start(itr))
+mapfoldl(f, op, v0, itr) = mapfoldl_impl(f, op, v0, itr, start(itr))
 
 function mapfoldl(f, op::Function, v0, itr)
-    is(op, +) && return mapfoldl(f, AddFun(), v0, itr)
-    is(op, *) && return mapfoldl(f, MulFun(), v0, itr)
-    is(op, &) && return mapfoldl(f, AndFun(), v0, itr)
-    is(op, |) && return mapfoldl(f, OrFun(), v0, itr)
-    return mapfoldl_impl(f, op, v0, itr, start(itr))
+    is(op, +) ? mapfoldl(f, AddFun(), v0, itr) :
+    is(op, *) ? mapfoldl(f, MulFun(), v0, itr) :
+    is(op, &) ? mapfoldl(f, AndFun(), v0, itr) :
+    is(op, |) ? mapfoldl(f, OrFun(), v0, itr) :
+    mapfoldl_impl(f, op, v0, itr, start(itr))
 end
 
 function mapfoldl(f, op, itr)
@@ -97,7 +97,6 @@ foldr(op, itr) = mapfoldr(IdFun(), op, itr)
 ## reduce & mapreduce
 
 # mapreduce_***_impl require ifirst < ilast
-
 function mapreduce_seq_impl(f, op, A::AbstractArray, ifirst::Int, ilast::Int)
     @inbounds fx1 = evaluate(f, A[ifirst])
     @inbounds fx2 = evaluate(f, A[ifirst+=1])
@@ -110,13 +109,13 @@ function mapreduce_seq_impl(f, op, A::AbstractArray, ifirst::Int, ilast::Int)
 end
 
 function mapreduce_pairwise_impl(f, op, A::AbstractArray, ifirst::Int, ilast::Int, blksize::Int)
-    if ifirst + blksiz < ilast
+    if ifirst + blksize > ilast
         return mapreduce_seq_impl(f, op, A, ifirst, ilast)
     else
         imid = (ifirst + ilast) >>> 1
         v1 = mapreduce_seq_impl(f, op, A, ifirst, imid)
         v2 = mapreduce_seq_impl(f, op, A, imid+1, ilast)
-        evaluate(op, v1, v2)
+        return evaluate(op, v1, v2)
     end
 end
 
@@ -124,55 +123,34 @@ mapreduce(f, op, itr) = mapfoldl(f, op, itr)
 mapreduce(f, op, v0, itr) = mapfoldl(f, op, v0, itr)
 
 # select different implementation depending on input arguments
-mapreduce_impl(f, op, A::AbstractArray) = mapreduce_seq_impl(f, op, A, 1, length(A))
-mapreduce_impl(f, op::AddFun, A::AbstractArray) = 
-    mapreduce_pairwise_impl(f, op, A, 1, length(A), sum_pairwise_blocksize(f))
+mapreduce_impl(f, op, A::AbstractArray, ifirst::Int, ilast::Int) = 
+    mapreduce_seq_impl(f, op, A, ifirst, ilast)
 
-function mapreduce(f, op, A::AbstractArray)
-    n = length(A)
-    n == 0 && error("Argument is empty.")
-    n == 1 && return evaluate(f, A[1])
-    mapreduce_impl(f, op, A)
-end
+# Note: sum_seq uses four accumulators, so each accumulator gets at most 256 numbers
+sum_pairwise_blocksize(f) = 1024
 
-reduce(op::Callable, v, itr) = foldl(op, v, itr)
+mapreduce_impl(f, op::AddFun, A::AbstractArray, ifirst::Int, ilast::Int) = 
+    mapreduce_pairwise_impl(f, op, A, ifirst, ilast, sum_pairwise_blocksize(f))
 
-function reduce(op::Callable, itr) # this is a left fold
-    if is(op, +)
-        return sum(itr)
-    elseif is(op, *)
-        return prod(itr)
-    elseif is(op, |)
-        return any(itr)
-    elseif is(op, &)
-        return all(itr)
-    end
-    return foldl(op, itr)
+function _mapreduce(f, op, A::AbstractArray)
+    n = length(A)
+    n == 0 ? error("Argument is empty.") :
+    n == 1 ? evaluate(f, A[1]) :
+    mapreduce_impl(f, op, A, 1, n)
 end
 
-# pairwise reduction, requires n > 1 (to allow type-stable loop)
-function r_pairwise(op::Callable, A::AbstractArray, i1,n)
-    if n < 128
-        @inbounds v = op(A[i1], A[i1+1])
-        for i = i1+2:i1+n-1
-            @inbounds v = op(v,A[i])
-        end
-        return v
-    else
-        n2 = div(n,2)
-        return op(r_pairwise(op,A, i1,n2), r_pairwise(op,A, i1+n2,n-n2))
-    end
-end
+mapreduce(f, op, A::AbstractArray) = _mapreduce(f, op, A)
 
-function reduce(op::Callable, A::AbstractArray)
-    n = length(A)
-    n == 0 ? error("argument is empty") : n == 1 ? A[1] : r_pairwise(op,A, 1,n)
+function mapreduce(f, op, A::AbstractArray)
+    is(op, +) ? _mapreduce(f, AddFun(), A) :
+    is(op, *) ? _mapreduce(f, MulFun(), A) :
+    is(op, &) ? _mapreduce(f, AndFun(), A) :
+    is(op, |) ? _mapreduce(f, OrFun(), A) :
+    _mapreduce(f, op, A)
 end
 
-function reduce(op::Callable, v0, A::AbstractArray)
-    n = length(A)
-    n == 0 ? v0 : n == 1 ? op(v0, A[1]) : op(v0, r_pairwise(op,A, 1,n))
-end
+reduce(op, v0, itr) = mapreduce(IdFun(), op, v0, itr)
+reduce(op, itr) = mapreduce(IdFun(), op, itr)
 
 
 ###### Specific reduction functions ######
@@ -301,9 +279,6 @@ sumabs2(itr) = sum(Abs2Fun(), itr)
 sumabs(x::Number) = abs(x)
 sumabs2(x::Number) = abs2(x)
 
-# Note: sum_seq uses four accumulators, so each accumulator gets at most 256 numbers
-sum_pairwise_blocksize(f) = 1024
-
 # a fast implementation of sum in sequential order (from left to right).
 # to allow type-stable loops, requires length > 1
 function sum_seq(f, a::AbstractArray, ifirst::Int, ilast::Int)

From 5bf769ac08113abd872bcce20f9a97740ba5e962 Mon Sep 17 00:00:00 2001
From: Dahua Lin <lindahua@gmail.com>
Date: Fri, 30 May 2014 22:58:37 -0500
Subject: [PATCH 07/15] sum functions implemented based on new mapreduce

---
 base/linalg/dense.jl |   9 +-
 base/reduce.jl       | 291 +++++++++++++++----------------------------
 2 files changed, 107 insertions(+), 193 deletions(-)

diff --git a/base/linalg/dense.jl b/base/linalg/dense.jl
index d98e6e37cc05b..304ad8e788dca 100644
--- a/base/linalg/dense.jl
+++ b/base/linalg/dense.jl
@@ -30,13 +30,12 @@ isposdef(x::Number) = imag(x)==0 && real(x) > 0
 stride1(x::Array) = 1
 stride1(x::StridedVector) = stride(x, 1)::Int
 
-Base.sum_seq{T<:BlasFloat}(::Base.AbsFun, a::Union(Array{T},StridedVector{T}), ifirst::Int, ilast::Int) =
-    BLAS.asum(ilast-ifirst+1, pointer(a, ifirst), stride1(a))
+import Base: mapreduce_seq_impl, AbsFun, Abs2Fun, AddFun
 
-# This appears to show a benefit from a larger block size
-Base.sum_pairwise_blocksize(::Base.Abs2Fun) = 4096
+mapreduce_seq_impl{T<:BlasFloat}(::AbsFun, ::AddFun, a::Union(Array{T},StridedVector{T}), ifirst::Int, ilast::Int) =
+    BLAS.asum(ilast-ifirst+1, pointer(a, ifirst), stride1(a))
 
-function Base.sum_seq{T<:BlasFloat}(::Base.Abs2Fun, a::Union(Array{T},StridedVector{T}), ifirst::Int, ilast::Int)
+function mapreduce_seq_impl{T<:BlasFloat}(::Abs2Fun, ::AddFun, a::Union(Array{T},StridedVector{T}), ifirst::Int, ilast::Int)
     n = ilast-ifirst+1
     px = pointer(a, ifirst)
     incx = stride1(a)
diff --git a/base/reduce.jl b/base/reduce.jl
index efcc28703eac1..76808e7cb119e 100644
--- a/base/reduce.jl
+++ b/base/reduce.jl
@@ -35,6 +35,12 @@ evaluate(f::Callable, x, y) = f(x, y)
 
 ###### Generic (map)reduce functions ######
 
+# r_promote: promote x to the type of reduce(op, [x])
+r_promote(op, x) = x
+r_promote(::AddFun, x) = x + zero(x)
+r_promote(::MulFun, x) = x * one(x)
+
+
 ## foldl && mapfoldl
 
 function mapfoldl_impl(f, op, v0, itr, i)
@@ -121,25 +127,39 @@ end
 
 mapreduce(f, op, itr) = mapfoldl(f, op, itr)
 mapreduce(f, op, v0, itr) = mapfoldl(f, op, v0, itr)
-
-# select different implementation depending on input arguments
 mapreduce_impl(f, op, A::AbstractArray, ifirst::Int, ilast::Int) = 
     mapreduce_seq_impl(f, op, A, ifirst, ilast)
 
-# Note: sum_seq uses four accumulators, so each accumulator gets at most 256 numbers
-sum_pairwise_blocksize(f) = 1024
+# for specific functions
+reduce_empty(f, op, T) = error("Reducing over an empty array is not allow.")
+reduce_empty(::IdFun, op::AddFun, T) = r_promote(op, zero(T))
+reduce_empty(::AbsFun, op::AddFun, T) = r_promote(op, abs(zero(T)))
+reduce_empty(::Abs2Fun, op::AddFun, T) = r_promote(op, abs2(zero(T)))
+reduce_empty(::IdFun, op::MulFun, T) = r_promote(op, one(T))
 
-mapreduce_impl(f, op::AddFun, A::AbstractArray, ifirst::Int, ilast::Int) = 
-    mapreduce_pairwise_impl(f, op, A, ifirst, ilast, sum_pairwise_blocksize(f))
-
-function _mapreduce(f, op, A::AbstractArray)
+function _mapreduce{T}(f, op, A::AbstractArray{T})
     n = length(A)
-    n == 0 ? error("Argument is empty.") :
-    n == 1 ? evaluate(f, A[1]) :
-    mapreduce_impl(f, op, A, 1, n)
+    if n == 0
+        return reduce_empty(f, op, T)
+    elseif n == 1
+        return r_promote(op, evaluate(f, A[1]))
+    elseif n < 16
+        @inbounds fx1 = evaluate(f, A[1])
+        @inbounds fx2 = evaluate(f, A[2])
+        s = evaluate(op, fx1, fx2)
+        i = 2
+        while i < n
+            @inbounds fx = evaluate(f, A[i+=1])
+            s = evaluate(op, s, fx)
+        end
+        return s
+    else
+        return mapreduce_impl(f, op, A, 1, n)
+    end
 end
 
 mapreduce(f, op, A::AbstractArray) = _mapreduce(f, op, A)
+mapreduce(f, op, a::Number) = evaluate(f, a)
 
 function mapreduce(f, op, A::AbstractArray)
     is(op, +) ? _mapreduce(f, AddFun(), A) :
@@ -151,10 +171,82 @@ end
 
 reduce(op, v0, itr) = mapreduce(IdFun(), op, v0, itr)
 reduce(op, itr) = mapreduce(IdFun(), op, itr)
+reduce(op, a::Number) = a
 
 
 ###### Specific reduction functions ######
 
+## sum
+
+function mapreduce_seq_impl(f, op::AddFun, a::AbstractArray, ifirst::Int, ilast::Int)
+    @inbounds if ifirst + 6 >= ilast  # length(a) < 8
+        i = ifirst
+        s = evaluate(f, a[i]) + evaluate(f, a[i+1])
+        i = i+1
+        while i < ilast
+            s += evaluate(f, a[i+=1])
+        end
+        return s
+
+    else # length(a) >= 8, manual unrolling
+        s1 = evaluate(f, a[ifirst]) + evaluate(f, a[ifirst + 4])
+        s2 = evaluate(f, a[ifirst + 1]) + evaluate(f, a[ifirst + 5])
+        s3 = evaluate(f, a[ifirst + 2]) + evaluate(f, a[ifirst + 6])
+        s4 = evaluate(f, a[ifirst + 3]) + evaluate(f, a[ifirst + 7])
+        i = ifirst + 8
+        il = ilast - 3
+        while i <= il
+            s1 += evaluate(f, a[i])
+            s2 += evaluate(f, a[i+1])
+            s3 += evaluate(f, a[i+2])
+            s4 += evaluate(f, a[i+3])
+            i += 4
+        end
+        while i <= ilast
+            s1 += evaluate(f, a[i])
+            i += 1
+        end
+        return s1 + s2 + s3 + s4
+    end    
+end
+
+# Note: sum_seq uses four accumulators, so each accumulator gets at most 256 numbers
+sum_pairwise_blocksize(f) = 1024
+
+# This appears to show a benefit from a larger block size
+sum_pairwise_blocksize(::Abs2Fun) = 4096
+
+mapreduce_impl(f, op::AddFun, A::AbstractArray, ifirst::Int, ilast::Int) = 
+    mapreduce_pairwise_impl(f, op, A, ifirst, ilast, sum_pairwise_blocksize(f))
+
+sum(f::Union(Function,Func{1}), a) = mapreduce(f, AddFun(), a)
+sum(a) = mapreduce(IdFun(), AddFun(), a)
+sumabs(a) = mapreduce(AbsFun(), AddFun(), a)
+sumabs2(a) = mapreduce(Abs2Fun(), AddFun(), a)
+
+# Kahan (compensated) summation: O(1) error growth, at the expense
+# of a considerable increase in computational expense.
+function sum_kbn{T<:FloatingPoint}(A::AbstractArray{T})
+    n = length(A)
+    if n == 0
+        return sumzero(T)
+    end
+    c = zero(T)
+    s = A[1] + c
+    for i in 2:n
+        @inbounds Ai = A[i]
+        t = s + Ai
+        if abs(s) >= abs(Ai)
+            c += ((s-t) + Ai)
+        else
+            c += ((Ai-t) + s)
+        end
+        s = t
+    end
+    s + c
+end
+
+
 ## in & contains
 
 function in(x, itr)
@@ -219,183 +311,6 @@ function count(pred::Function, itr)
 end
 
 
-## sum
-
-# result type inference for sum
-
-sumtype{T}(::Type{T}) = typeof(zero(T) + zero(T))
-sumzero{T}(::Type{T}) = zero(T) + zero(T)
-addzero(x) = x + zero(x) 
-
-typealias SumResultNumber Union(Uint,Uint64,Uint128,Int,Int64,Int128,Float32,Float64,Complex64,Complex128)
-
-sumtype{T<:SumResultNumber}(::Type{T}) = T
-sumzero{T<:SumResultNumber}(::Type{T}) = zero(T)
-addzero(x::SumResultNumber) = x
-
-sumzero{T<:AbstractArray}(::Type{T}) = error("Summing over an empty collection of arrays is not allowed.")
-addzero(a::AbstractArray) = a
-
-# general sum over iterables
-
-function _sum(f, itr, s)  # deal with non-empty cases
-    # pre-condition: s = start(itr) && !done(itr, s)
-    (v, s) = next(itr, s)
-    done(itr, s) && return addzero(evaluate(f, v)) # adding zero for type stability
-    # specialize for length > 1 to have type-stable loop
-    (x, s) = next(itr, s)
-    result = evaluate(f, v) + evaluate(f, x)
-    while !done(itr, s)
-        (x, s) = next(itr, s)
-        result += evaluate(f, x)
-    end
-    return result    
-end 
-
-function sum(itr)
-    s = start(itr)
-    if done(itr, s)
-        if applicable(eltype, itr)
-            return sumzero(eltype(itr))
-        else
-            throw(ArgumentError("sum(itr) is undefined for empty collections; instead, do isempty(itr) ? z : sum(itr), where z is the correct type of zero for your sum"))
-        end
-    end
-    _sum(IdFun(), itr, s)
-end
-
-function sum(f::Union(Function,Func{1}), itr)
-    s = start(itr)
-    done(itr, s) && error("Argument is empty.")
-    _sum(f, itr, s)
-end
-
-sum(x::Number) = x
-sum(A::AbstractArray{Bool}) = countnz(A)
-
-sumabs(itr) = sum(AbsFun(), itr)
-sumabs2(itr) = sum(Abs2Fun(), itr)
-
-sumabs(x::Number) = abs(x)
-sumabs2(x::Number) = abs2(x)
-
-# a fast implementation of sum in sequential order (from left to right).
-# to allow type-stable loops, requires length > 1
-function sum_seq(f, a::AbstractArray, ifirst::Int, ilast::Int)
-    
-    @inbounds if ifirst + 6 >= ilast  # length(a) < 8
-        i = ifirst
-        s = evaluate(f, a[i]) + evaluate(f, a[i+1])
-        i = i+1
-        while i < ilast
-            s += evaluate(f, a[i+=1])
-        end
-        return s
-
-    else # length(a) >= 8
-
-        # more effective utilization of the instruction
-        # pipeline through manually unrolling the sum
-        # into four-way accumulation. Benchmark shows
-        # that this results in about 2x speed-up.                
-
-        s1 = evaluate(f, a[ifirst]) + evaluate(f, a[ifirst + 4])
-        s2 = evaluate(f, a[ifirst + 1]) + evaluate(f, a[ifirst + 5])
-        s3 = evaluate(f, a[ifirst + 2]) + evaluate(f, a[ifirst + 6])
-        s4 = evaluate(f, a[ifirst + 3]) + evaluate(f, a[ifirst + 7])
-
-        i = ifirst + 8
-        il = ilast - 3
-        while i <= il
-            s1 += evaluate(f, a[i])
-            s2 += evaluate(f, a[i+1])
-            s3 += evaluate(f, a[i+2])
-            s4 += evaluate(f, a[i+3])
-            i += 4
-        end
-
-        while i <= ilast
-            s1 += evaluate(f, a[i])
-            i += 1
-        end
-
-        return s1 + s2 + s3 + s4
-    end
-end
-
-# Pairwise (cascade) summation of A[i1:i1+n-1], which has O(log n) error growth
-# [vs O(n) for a simple loop] with negligible performance cost if
-# the base case is large enough.  See, e.g.:
-#        http://en.wikipedia.org/wiki/Pairwise_summation
-#        Higham, Nicholas J. (1993), "The accuracy of floating point
-#        summation", SIAM Journal on Scientific Computing 14 (4): 783–799.
-# In fact, the root-mean-square error growth, assuming random roundoff
-# errors, is only O(sqrt(log n)), which is nearly indistinguishable from O(1)
-# in practice.  See:
-#        Manfred Tasche and Hansmartin Zeuner, Handbook of
-#        Analytic-Computational Methods in Applied Mathematics (2000).
-#
-# sum_impl requires length(a) > 1
-#
-function sum_impl(f, a::AbstractArray, ifirst::Int, ilast::Int)
-    if ifirst + sum_pairwise_blocksize(f) >= ilast
-        sum_seq(f, a, ifirst, ilast)
-    else
-        imid = (ifirst + ilast) >>> 1
-        sum_impl(f, a, ifirst, imid) + sum_impl(f, a, imid+1, ilast)
-    end
-end
-sum_impl{T<:Integer}(f::Union(IdFun,AbsFun,Abs2Fun), a::AbstractArray{T}, ifirst::Int, ilast::Int) = 
-    sum_seq(f, a, ifirst, ilast)
-
-function sum(f::Union(Function,Func{1}), a::AbstractArray)
-    n = length(a)
-    n == 0 && error("Argument is empty.")
-    n == 1 && return addzero(evaluate(f, a[1]))
-    sum_impl(f, a, 1, n)
-end
-
-for (fname, func, cutoff) in ((:sum, :IdFun, 16), (:sumabs, :AbsFun, 32), (:sumabs2, :Abs2Fun, 32))
-    @eval function $fname{T}(a::AbstractArray{T})
-        n = length(a)
-        n == 0 && return addzero(evaluate($func(), zero(T)))
-        n == 1 && return addzero(evaluate($func(), a[1]))
-        if n < $cutoff
-            # It is important that this is inlined to provide good
-            # performance for small inputs
-            @inbounds s = evaluate($func(), a[1]) + evaluate($func(), a[2])
-            for i = 3:length(a)
-                @inbounds s += evaluate($func(), a[i])
-            end
-            return s
-        end
-        sum_impl($func(), a, 1, n)
-    end
-end
-
-# Kahan (compensated) summation: O(1) error growth, at the expense
-# of a considerable increase in computational expense.
-function sum_kbn{T<:FloatingPoint}(A::AbstractArray{T})
-    n = length(A)
-    if n == 0
-        return sumzero(T)
-    end
-    s = addzero(A[1])
-    c = sumzero(T)
-    for i in 2:n
-        @inbounds Ai = A[i]
-        t = s + Ai
-        if abs(s) >= abs(Ai)
-            c += ((s-t) + Ai)
-        else
-            c += ((Ai-t) + s)
-        end
-        s = t
-    end
-    s + c
-end
-
-
 ## prod
 
 prodtype{T}(::Type{T}) = typeof(zero(T) * zero(T))

From 5e0a76237f3139bb6e46d34514f378f902dfc277 Mon Sep 17 00:00:00 2001
From: Dahua Lin <lindahua@gmail.com>
Date: Fri, 30 May 2014 23:08:30 -0500
Subject: [PATCH 08/15] prod functions implemented based on new mapreduce

---
 base/reduce.jl | 139 ++++++++++++++++++++++++++-----------------------
 1 file changed, 73 insertions(+), 66 deletions(-)

diff --git a/base/reduce.jl b/base/reduce.jl
index 76808e7cb119e..4a23c5e652e8b 100644
--- a/base/reduce.jl
+++ b/base/reduce.jl
@@ -221,6 +221,7 @@ mapreduce_impl(f, op::AddFun, A::AbstractArray, ifirst::Int, ilast::Int) =
 
 sum(f::Union(Function,Func{1}), a) = mapreduce(f, AddFun(), a)
 sum(a) = mapreduce(IdFun(), AddFun(), a)
+sum(a::AbstractArray{Bool}) = countnz(a)
 sumabs(a) = mapreduce(AbsFun(), AddFun(), a)
 sumabs2(a) = mapreduce(Abs2Fun(), AddFun(), a)
 
@@ -247,6 +248,12 @@ function sum_kbn{T<:FloatingPoint}(A::AbstractArray{T})
 end
 
 
+## prod
+
+prod(f::Union(Function,Func{1}), a) = mapreduce(f, MulFun(), a)
+prod(a) = mapreduce(IdFun(), MulFun(), a)
+
+
 ## in & contains
 
 function in(x, itr)
@@ -313,72 +320,72 @@ end
 
 ## prod
 
-prodtype{T}(::Type{T}) = typeof(zero(T) * zero(T))
-prodone{T}(::Type{T}) = one(T) * one(T) 
-multone(x) = x * one(x)
-
-function _prod(f, itr, s)
-    (x, s) = next(itr, s)
-    v = evaluate(f, x)
-    done(itr, s) && return multone(v) # multiplying by one for type stability
-    # specialize for length > 1 to have type-stable loop
-    (x, s) = next(itr, s)
-    result = v * evaluate(f, x)
-    while !done(itr, s)
-        (x, s) = next(itr, s)
-        result *= evaluate(f, x)
-    end
-    return result
-end
-
-function prod(itr)
-    s = start(itr)
-    if done(itr, s)
-        if applicable(eltype, itr)
-            T = eltype(itr)
-            return prodone(T)
-        else
-            throw(ArgumentError("prod(itr) is undefined for empty collections; instead, do isempty(itr) ? o : prod(itr), where o is the correct type of identity for your product"))
-        end
-    end
-    _prod(IdFun(), itr, s)
-end
-
-function prod(f::Function, itr)
-    s = start(itr)
-    done(itr, s) && error("Argument is empty.")
-    _prod(f, itr, s)
-end
-
-prod(x::Number) = x
-
-prod(A::AbstractArray{Bool}) =
-    error("use all() instead of prod() for boolean arrays")
-
-function prod_impl{T}(f, A::AbstractArray{T}, first::Int, last::Int)
-    # pre-condition: last > first
-    i = first
-    @inbounds v = evaluate(f, A[i])
-    @inbounds result = v * evaluate(f, A[i+=1])
-    while i < last
-        @inbounds result *= evaluate(f, A[i+=1])
-    end
-    return result
-end
-
-function prod{T}(A::AbstractArray{T})
-    n = length(A)
-    n == 0 && return prodone(T)
-    n == 1 && return multone(A[1])
-    prod_impl(IdFun(), A, 1, n)
-end 
-
-function prod(f::Function, A::AbstractArray) 
-    n = length(A)
-    n == 0 && error("Argument is empty.")
-    n == 1 && return multone(evaluate(f, A[1]))
-    prod_impl(f, A, 1, n)
-end
+# prodtype{T}(::Type{T}) = typeof(zero(T) * zero(T))
+# prodone{T}(::Type{T}) = one(T) * one(T) 
+# multone(x) = x * one(x)
+
+# function _prod(f, itr, s)
+#     (x, s) = next(itr, s)
+#     v = evaluate(f, x)
+#     done(itr, s) && return multone(v) # multiplying by one for type stability
+#     # specialize for length > 1 to have type-stable loop
+#     (x, s) = next(itr, s)
+#     result = v * evaluate(f, x)
+#     while !done(itr, s)
+#         (x, s) = next(itr, s)
+#         result *= evaluate(f, x)
+#     end
+#     return result
+# end
+
+# function prod(itr)
+#     s = start(itr)
+#     if done(itr, s)
+#         if applicable(eltype, itr)
+#             T = eltype(itr)
+#             return prodone(T)
+#         else
+#             throw(ArgumentError("prod(itr) is undefined for empty collections; instead, do isempty(itr) ? o : prod(itr), where o is the correct type of identity for your product"))
+#         end
+#     end
+#     _prod(IdFun(), itr, s)
+# end
+
+# function prod(f::Function, itr)
+#     s = start(itr)
+#     done(itr, s) && error("Argument is empty.")
+#     _prod(f, itr, s)
+# end
+
+# prod(x::Number) = x
+
+# prod(A::AbstractArray{Bool}) =
+#     error("use all() instead of prod() for boolean arrays")
+
+# function prod_impl{T}(f, A::AbstractArray{T}, first::Int, last::Int)
+#     # pre-condition: last > first
+#     i = first
+#     @inbounds v = evaluate(f, A[i])
+#     @inbounds result = v * evaluate(f, A[i+=1])
+#     while i < last
+#         @inbounds result *= evaluate(f, A[i+=1])
+#     end
+#     return result
+# end
+
+# function prod{T}(A::AbstractArray{T})
+#     n = length(A)
+#     n == 0 && return prodone(T)
+#     n == 1 && return multone(A[1])
+#     prod_impl(IdFun(), A, 1, n)
+# end 
+
+# function prod(f::Function, A::AbstractArray) 
+#     n = length(A)
+#     n == 0 && error("Argument is empty.")
+#     n == 1 && return multone(evaluate(f, A[1]))
+#     prod_impl(f, A, 1, n)
+# end
 
 
 ## maximum & minimum 

From 78bbf10c125a124bc8a1a25e8aaaea1cbc6e0ebc Mon Sep 17 00:00:00 2001
From: Dahua Lin <lindahua@gmail.com>
Date: Fri, 30 May 2014 23:26:02 -0500
Subject: [PATCH 09/15] maximum/minimum functions implemented based on new
 mapreduce

---
 base/reduce.jl | 280 ++++++++++++++-----------------------------------
 1 file changed, 78 insertions(+), 202 deletions(-)

diff --git a/base/reduce.jl b/base/reduce.jl
index 4a23c5e652e8b..a63f105df2125 100644
--- a/base/reduce.jl
+++ b/base/reduce.jl
@@ -18,6 +18,8 @@ type AddFun <: Func{2} end
 type MulFun <: Func{2} end
 type AndFun <: Func{2} end
 type OrFun <: Func{2} end
+type MaxFun <: Func{2} end
+type MinFun <: Func{2} end
 
 evaluate(::IdFun, x) = x
 evaluate(::AbsFun, x) = abs(x)
@@ -30,6 +32,8 @@ evaluate(::AddFun, x, y) = x + y
 evaluate(::MulFun, x, y) = x * y
 evaluate(::AndFun, x, y) = x & y
 evaluate(::OrFun, x, y) = x | y
+evaluate(::MaxFun, x, y) = scalarmax(x, y)
+evaluate(::MinFun, x, y) = scalarmin(x, y)
 evaluate(f::Callable, x, y) = f(x, y)
 
 
@@ -253,175 +257,12 @@ end
 prod(f::Union(Function,Func{1}), a) = mapreduce(f, MulFun(), a)
 prod(a) = mapreduce(IdFun(), MulFun(), a)
 
+prod(A::AbstractArray{Bool}) =
+    error("use all() instead of prod() for boolean arrays")
 
-## in & contains
-
-function in(x, itr)
-    for y in itr
-        if y == x
-            return true
-        end
-    end
-    return false
-end
-const ∈ = in
-∉(x, itr)=!∈(x, itr)
-∋(itr, x)= ∈(x, itr)
-∌(itr, x)=!∋(itr, x)
-
-function contains(itr, x)
-    depwarn("contains(collection, item) is deprecated, use in(item, collection) instead", :contains)
-    in(x, itr)
-end
-
-function contains(eq::Function, itr, x)
-    for y in itr
-        if eq(y, x)
-            return true
-        end
-    end
-    return false
-end
-
-
-## countnz & count
-
-function countnz(itr)
-    n = 0
-    for x in itr
-        if x != 0
-            n += 1
-        end
-    end
-    return n
-end
+## maximum & minimum
 
-function countnz(a::AbstractArray)
-    n = 0
-    for i = 1:length(a)
-        @inbounds x = a[i]
-        if x != 0
-            n += 1
-        end
-    end
-    return n
-end
-
-function count(pred::Function, itr)
-    n = 0
-    for x in itr
-        if pred(x)
-            n += 1
-        end
-    end
-    return n
-end
-
-
-## prod
-
-# prodtype{T}(::Type{T}) = typeof(zero(T) * zero(T))
-# prodone{T}(::Type{T}) = one(T) * one(T) 
-# multone(x) = x * one(x)
-
-# function _prod(f, itr, s)
-#     (x, s) = next(itr, s)
-#     v = evaluate(f, x)
-#     done(itr, s) && return multone(v) # multiplying by one for type stability
-#     # specialize for length > 1 to have type-stable loop
-#     (x, s) = next(itr, s)
-#     result = v * evaluate(f, x)
-#     while !done(itr, s)
-#         (x, s) = next(itr, s)
-#         result *= evaluate(f, x)
-#     end
-#     return result
-# end
-
-# function prod(itr)
-#     s = start(itr)
-#     if done(itr, s)
-#         if applicable(eltype, itr)
-#             T = eltype(itr)
-#             return prodone(T)
-#         else
-#             throw(ArgumentError("prod(itr) is undefined for empty collections; instead, do isempty(itr) ? o : prod(itr), where o is the correct type of identity for your product"))
-#         end
-#     end
-#     _prod(IdFun(), itr, s)
-# end
-
-# function prod(f::Function, itr)
-#     s = start(itr)
-#     done(itr, s) && error("Argument is empty.")
-#     _prod(f, itr, s)
-# end
-
-# prod(x::Number) = x
-
-# prod(A::AbstractArray{Bool}) =
-#     error("use all() instead of prod() for boolean arrays")
-
-# function prod_impl{T}(f, A::AbstractArray{T}, first::Int, last::Int)
-#     # pre-condition: last > first
-#     i = first
-#     @inbounds v = evaluate(f, A[i])
-#     @inbounds result = v * evaluate(f, A[i+=1])
-#     while i < last
-#         @inbounds result *= evaluate(f, A[i+=1])
-#     end
-#     return result
-# end
-
-# function prod{T}(A::AbstractArray{T})
-#     n = length(A)
-#     n == 0 && return prodone(T)
-#     n == 1 && return multone(A[1])
-#     prod_impl(IdFun(), A, 1, n)
-# end 
-
-# function prod(f::Function, A::AbstractArray) 
-#     n = length(A)
-#     n == 0 && error("Argument is empty.")
-#     n == 1 && return multone(evaluate(f, A[1]))
-#     prod_impl(f, A, 1, n)
-# end
-
-
-## maximum & minimum 
-
-function maximum(f::Union(Function,Func{1}), itr)
-    s = start(itr)
-    if done(itr, s)
-        error("argument is empty")
-    end
-    (x, s) = next(itr, s)
-    v = evaluate(f, x)
-    while !done(itr, s)
-        (x, s) = next(itr, s)
-        v = scalarmax(v, evaluate(f, x))
-    end
-    return v
-end
-
-function minimum(f::Union(Function,Func{1}), itr)
-    s = start(itr)
-    if done(itr, s)
-        error("argument is empty")
-    end
-    (x, s) = next(itr, s)
-    v = evaluate(f, x)
-    while !done(itr, s)
-        (x, s) = next(itr, s)
-        v = scalarmin(v, evaluate(f, x))
-    end
-    return v
-end
-
-maximum(itr) = maximum(IdFun(), itr)
-minimum(itr) = minimum(IdFun(), itr)
-
-function maximum_impl(f, A::AbstractArray, first::Int, last::Int)
+function mapreduce_seq_impl(f, op::MaxFun, A::AbstractArray, first::Int, last::Int)
     # locate the first non NaN number
     v = evaluate(f, A[first])
     i = first + 1
@@ -429,7 +270,6 @@ function maximum_impl(f, A::AbstractArray, first::Int, last::Int)
         @inbounds v = evaluate(f, A[i])
         i += 1
     end
-
     while i <= last
         @inbounds x = evaluate(f, A[i])
         if x > v
@@ -440,7 +280,7 @@ function maximum_impl(f, A::AbstractArray, first::Int, last::Int)
     v
 end
 
-function minimum_impl(f, A::AbstractArray, first::Int, last::Int)
+function mapreduce_seq_impl(f, op::MinFun, A::AbstractArray, first::Int, last::Int)
     # locate the first non NaN number
     v = evaluate(f, A[first])
     i = first + 1
@@ -448,7 +288,6 @@ function minimum_impl(f, A::AbstractArray, first::Int, last::Int)
         @inbounds v = evaluate(f, A[i])
         i += 1
     end
-
     while i <= last
         @inbounds x = evaluate(f, A[i])
         if x < v
@@ -459,39 +298,14 @@ function minimum_impl(f, A::AbstractArray, first::Int, last::Int)
     v
 end
 
-function maximum(f::Union(Function,Func{1}), A::AbstractArray)
-    n = length(A)
-    n == 0 && error("Argument is empty.")
-    n == 1 && return evaluate(f, A[1])
-    maximum_impl(f, A, 1, n)
-end
-
-function minimum(f::Union(Function,Func{1}), A::AbstractArray) 
-    n = length(A)
-    n == 0 && error("Argument is empty.")
-    n == 1 && return evaluate(f, A[1])
-    minimum_impl(f, A, 1, n)
-end
+maximum(f::Union(Function,Func{1}), a) = mapreduce(f, MaxFun(), a)
+minimum(f::Union(Function,Func{1}), a) = mapreduce(f, MinFun(), a)
 
-maximum(A::AbstractArray) = maximum(IdFun(), A)
-minimum(A::AbstractArray) = minimum(IdFun(), A)
-
-minabs(A::AbstractArray) = minimum(AbsFun(), A)
-
-# maxabs accepts empty array
-function maxabs(A::AbstractArray)
-    n = length(A)
-    n == 0 && return abs(zero(T))
-    n == 1 && return abs(A[1])
-    maximum_impl(AbsFun(), A, 1, n)
-end
-
-
-maximum(x::Real) = x
-minimum(x::Real) = x
-maxabs(x::Number) = abs(x)
-minabs(x::Number) = abs(x)
+maximum(a) = mapreduce(IdFun(), MaxFun(), a)
+minimum(a) = mapreduce(IdFun(), MinFun(), a)
 
+maxabs(a) = mapreduce(AbsFun(), MaxFun(), a)
+minabs(a) = mapreduce(AbsFun(), MinFun(), a)
 
 ## extrema
 
@@ -541,11 +355,73 @@ function extrema{T<:Real}(A::AbstractArray{T})
         end
         i += 1
     end
-
     return (vmin, vmax)
 end
 
 
+## in & contains
+
+function in(x, itr)
+    for y in itr
+        if y == x
+            return true
+        end
+    end
+    return false
+end
+const ∈ = in
+∉(x, itr)=!∈(x, itr)
+∋(itr, x)= ∈(x, itr)
+∌(itr, x)=!∋(itr, x)
+
+function contains(itr, x)
+    depwarn("contains(collection, item) is deprecated, use in(item, collection) instead", :contains)
+    in(x, itr)
+end
+
+function contains(eq::Function, itr, x)
+    for y in itr
+        if eq(y, x)
+            return true
+        end
+    end
+    return false
+end
+
+
+## countnz & count
+
+function countnz(itr)
+    n = 0
+    for x in itr
+        if x != 0
+            n += 1
+        end
+    end
+    return n
+end
+
+function countnz(a::AbstractArray)
+    n = 0
+    for i = 1:length(a)
+        @inbounds x = a[i]
+        if x != 0
+            n += 1
+        end
+    end
+    return n
+end
+
+function count(pred::Function, itr)
+    n = 0
+    for x in itr
+        if pred(x)
+            n += 1
+        end
+    end
+    return n
+end
+
 ## all & any
 
 function all(itr)

From 6024a32773cec90c449d5296ee50b9b467d504ff Mon Sep 17 00:00:00 2001
From: Dahua Lin <lindahua@gmail.com>
Date: Fri, 30 May 2014 23:31:57 -0500
Subject: [PATCH 10/15] move and/all codes above

---
 base/reduce.jl | 78 +++++++++++++++++++++++++-------------------------
 1 file changed, 39 insertions(+), 39 deletions(-)

diff --git a/base/reduce.jl b/base/reduce.jl
index a63f105df2125..11d6b9c36625b 100644
--- a/base/reduce.jl
+++ b/base/reduce.jl
@@ -359,6 +359,45 @@ function extrema{T<:Real}(A::AbstractArray{T})
 end
 
 
+## all & any
+
+function all(itr)
+    for x in itr
+        if !x
+            return false
+        end
+    end
+    return true
+end
+
+function any(itr)
+    for x in itr
+        if x
+            return true
+        end
+    end
+    return false
+end
+
+function any(pred::Union(Function,Func{1}), itr)
+    for x in itr
+        if evaluate(pred, x)
+            return true
+        end
+    end
+    return false
+end
+
+function all(pred::Union(Function,Func{1}), itr)
+    for x in itr
+        if !evaluate(pred, x)
+            return false
+        end
+    end
+    return true
+end
+
+
 ## in & contains
 
 function in(x, itr)
@@ -421,42 +460,3 @@ function count(pred::Function, itr)
     end
     return n
 end
-
-## all & any
-
-function all(itr)
-    for x in itr
-        if !x
-            return false
-        end
-    end
-    return true
-end
-
-function any(itr)
-    for x in itr
-        if x
-            return true
-        end
-    end
-    return false
-end
-
-function any(pred::Union(Function,Func{1}), itr)
-    for x in itr
-        if pred(x)
-            return true
-        end
-    end
-    return false
-end
-
-function all(pred::Union(Function,Func{1}), itr)
-    for x in itr
-        if !pred(x)
-            return false
-        end
-    end
-    return true
-end
-

From 80e8a28ed9c460473148a97bef0034521bee6111 Mon Sep 17 00:00:00 2001
From: Dahua Lin <lindahua@gmail.com>
Date: Sat, 31 May 2014 07:06:33 -0500
Subject: [PATCH 11/15] reorganize tests for reduce

---
 base/reduce.jl |  21 +++----
 test/reduce.jl | 150 ++++++++++++++++++++++++++++++++-----------------
 2 files changed, 110 insertions(+), 61 deletions(-)

diff --git a/base/reduce.jl b/base/reduce.jl
index 11d6b9c36625b..55f23b5d028b3 100644
--- a/base/reduce.jl
+++ b/base/reduce.jl
@@ -134,17 +134,19 @@ mapreduce(f, op, v0, itr) = mapfoldl(f, op, v0, itr)
 mapreduce_impl(f, op, A::AbstractArray, ifirst::Int, ilast::Int) = 
     mapreduce_seq_impl(f, op, A, ifirst, ilast)
 
-# for specific functions
-reduce_empty(f, op, T) = error("Reducing over an empty array is not allow.")
-reduce_empty(::IdFun, op::AddFun, T) = r_promote(op, zero(T))
-reduce_empty(::AbsFun, op::AddFun, T) = r_promote(op, abs(zero(T)))
-reduce_empty(::Abs2Fun, op::AddFun, T) = r_promote(op, abs2(zero(T)))
-reduce_empty(::IdFun, op::MulFun, T) = r_promote(op, one(T))
+# handling empty arrays
+mr_empty(f, op, T) = error("Reducing over an empty array is not allow.")
+mr_empty(::IdFun, op::AddFun, T) = r_promote(op, zero(T))
+mr_empty(::AbsFun, op::AddFun, T) = r_promote(op, abs(zero(T)))
+mr_empty(::Abs2Fun, op::AddFun, T) = r_promote(op, abs2(zero(T)))
+mr_empty(::IdFun, op::MulFun, T) = r_promote(op, one(T))
+mr_empty(::AbsFun, op::MaxFun, T) = abs(zero(T))
+mr_empty(::Abs2Fun, op::MaxFun, T) = abs2(zero(T))
 
 function _mapreduce{T}(f, op, A::AbstractArray{T})
     n = length(A)
     if n == 0
-        return reduce_empty(f, op, T)
+        return mr_empty(f, op, T)
     elseif n == 1
         return r_promote(op, evaluate(f, A[1]))
     elseif n < 16
@@ -310,12 +312,11 @@ minabs(a) = mapreduce(AbsFun(), MinFun(), a)
 ## extrema
 
 extrema(r::Range) = (minimum(r), maximum(r))
+extrema(x::Real) = (x, x)
 
 function extrema(itr)
     s = start(itr)
-    if done(itr, s)
-        error("argument is empty")
-    end
+    done(itr, s) && error("argument is empty")
     (v, s) = next(itr, s)
     vmin = v
     vmax = v
diff --git a/test/reduce.jl b/test/reduce.jl
index 20eb49c93d470..fc1e3db7aad2b 100644
--- a/test/reduce.jl
+++ b/test/reduce.jl
@@ -1,6 +1,5 @@
-## foldl & foldr
 
-# folds 
+# fold(l|r) & mapfold(l|r)
 @test foldl(-, 1:5) == -13
 @test foldl(-, 10, 1:5) == -5
 
@@ -13,23 +12,49 @@
 @test Base.mapfoldr(abs2, -, 2:5) == -14
 @test Base.mapfoldr(abs2, -, 10, 2:5) == -4
 
-
-# reduce 
+# reduce & mapreduce
 @test reduce((x,y)->"($x+$y)", [9:11]) == "((9+10)+11)"
 @test reduce(max, [8 6 7 5 3 0 9]) == 9
 @test reduce(+, 1000, [1:5]) == (1000 + 1 + 2 + 3 + 4 + 5)
 
-# mapreduce 
 @test mapreduce(-, +, [-10 -9 -3]) == ((10 + 9) + 3)
 @test mapreduce((x)->x[1:3], (x,y)->"($x+$y)", ["abcd", "efgh", "01234"]) == "((abc+efg)+012)"
 
+# sum
 
-z = zeros(2,2,2,2)
-for i=1:16
-    z[i] = i
-end
+@test sum(Int8[]) === 0
+@test sum(Int[]) === 0
+@test sum(Float64[]) === 0.0
+
+@test sum(int8(3)) === int8(3)
+@test sum(3) === 3
+@test sum(3.0) === 3.0
+
+@test sum([int8(3)]) === 3
+@test sum([3]) === 3
+@test sum([3.0]) === 3.0
+
+z = reshape(1:16, (2,2,2,2))
+fz = float(z)
+@test sum(z) === 136
+@test sum(fz) === 136.0
+
+@test_throws ErrorException sum(sin, Int[])
+@test sum(sin, 3) == sin(3.0)
+@test sum(sin, [3]) == sin(3.0)
+@test sum(sin, z) == sum(sin, fz) == sum(sin(fz))
+
+z = [-4, -3, 2, 5]
+fz = float(z)
+@test Base.sumabs(Float64[]) === 0.0
+@test Base.sumabs([int8(-2)]) === 2
+@test Base.sumabs(z) === 14
+@test Base.sumabs(fz) === 14.0
 
-@test sum(z) == sum(z,(1,2,3,4))[1] == 136
+@test Base.sumabs2(Float64[]) === 0.0
+@test Base.sumabs2([int8(-2)]) === 4
+@test Base.sumabs2(z) === 54
+@test Base.sumabs2(fz) === 54.0
 
 # check variants of summation for type-stability and other issues (#6069)
 sum2(itr) = invoke(sum, (Any,), itr)
@@ -55,48 +80,36 @@ for f in (sum3, sum4, sum7, sum8)
 end
 @test typeof(sum(Int8[])) == typeof(sum(Int8[1])) == typeof(sum(Int8[1 7]))
 
-prod2(itr) = invoke(prod, (Any,), itr)
-@test prod(Int[]) == prod2(Int[]) == 1
-@test prod(Int[7]) == prod2(Int[7]) == 7
-@test typeof(prod(Int8[])) == typeof(prod(Int8[1])) == typeof(prod(Int8[1 7])) == typeof(prod2(Int8[])) == typeof(prod2(Int8[1])) == typeof(prod2(Int8[1 7]))
+@test sum_kbn([1,1e100,1,-1e100]) == 2
 
-v = cell(2,2,1,1)
-v[1,1,1,1] = 28.0
-v[1,2,1,1] = 36.0
-v[2,1,1,1] = 32.0
-v[2,2,1,1] = 40.0
+# prod
 
-@test isequal(v,sum(z,(3,4)))
+prod(Int[]) === 0
+prod(Int8[]) === 0
+prod(Float64[]) === 0.0
 
-@test sum_kbn([1,1e100,1,-1e100]) == 2
+prod([3]) === 0
+prod([int8(3)]) === 0
+prod([3.0]) === 0.0
 
-z = rand(10^6)
-let es = sum_kbn(z), es2 = sum_kbn(z[1:10^5])
-    @test (es - sum(z)) < es * 1e-13
-    cs = cumsum(z)
-    @test (es - cs[end]) < es * 1e-13
-    @test (es2 - cs[10^5]) < es2 * 1e-13
-end
-
-@test_throws ErrorException sum(sin, Int[])
-@test Base.sumabs(Float64[]) === 0.0
-@test Base.sumabs2(Float64[]) === 0.0
+prod(z) === 120
+prod(fz) === 120.0
 
-@test sum(sin, [1]) == sin(1)
-@test Base.sumabs([int8(-2)]) === 2
-@test Base.sumabs2([int8(-2)]) === 4
+# check type-stability
+prod2(itr) = invoke(prod, (Any,), itr)
+@test prod(Int[]) === prod2(Int[]) === 1
+@test prod(Int[7]) === prod2(Int[7]) === 7
+@test typeof(prod(Int8[])) == typeof(prod(Int8[1])) == typeof(prod(Int8[1, 7])) == Int 
+@test typeof(prod2(Int8[])) == typeof(prod2(Int8[1])) == typeof(prod2(Int8[1 7])) == Int
 
-x = -2:3
-@test sum(sin, x) == sum(sin(x))
-@test Base.sumabs(x) === 9
-@test Base.sumabs2(x) === 19
+# maximum & minimum & extrema
 
-@test_approx_eq sum(sin, z) sum(sin(z))
-@test_approx_eq Base.sumabs(z) sum(abs(z))
-@test_approx_eq Base.sumabs2(z) sum(abs2(z))
+@test_throws ErrorException maximum(Int[])
+@test_throws ErrorException minimum(Int[])
 
 @test maximum(5) == 5
 @test minimum(5) == 5
+@test extrema(5) == (5, 5)
 
 @test maximum([4, 3, 5, 2]) == 5
 @test minimum([4, 3, 5, 2]) == 2
@@ -110,25 +123,60 @@ x = -2:3
 @test minimum([4., 3., NaN, 5., 2.]) == 2.
 @test extrema([4., 3., NaN, 5., 2.]) == (2., 5.)
 
-@test extrema(1:5) == (1,5)
+@test Base.maxabs(Int[]) == 0
+@test_throws ErrorException Base.minabs(Int[])
 
 @test Base.maxabs(-2) == 2
 @test Base.minabs(-2) == 2
 @test Base.maxabs([1, -2, 3, -4]) == 4
 @test Base.minabs([-1, 2, -3, 4]) == 1
 
-@test maximum(abs2, 3:7) == 49
-@test minimum(abs2, 3:7) == 9
-
-@test any([true false; false false], 2) == [true false]'
-@test any([true false; false false], 1) == [true false]
-
-@test all([true true; false true], 2) == [true false]'
-@test all([true false; false true], 1) == [false false]
+@test maximum(x->abs2(x), 3:7) == 49
+@test minimum(x->abs2(x), 3:7) == 9
+
+# any & all
+
+@test any(Bool[]) == false
+@test any([true]) == true
+@test any([false, false]) == false
+@test any([false, true]) == true
+@test any([true, false]) == true
+@test any([true, true]) == true
+@test any([true, true, true]) == true
+@test any([true, false, true]) == true
+@test any([false, false, false]) == false
+
+@test all(Bool[]) == true
+@test all([true]) == true
+@test all([false, false]) == false
+@test all([false, true]) == false
+@test all([true, false]) == false
+@test all([true, true]) == true
+@test all([true, true, true]) == true
+@test all([true, false, true]) == false
+@test all([false, false, false]) == false
+
+@test any(x->x>0, Int[]) == false
+@test any(x->x>0, [-3]) == false
+@test any(x->x>0, [4]) == true
+@test any(x->x>0, [-3, 4, 5]) == true
+
+@test all(x->x>0, Int[]) == true
+@test all(x->x>0, [-3]) == false
+@test all(x->x>0, [4]) == true
+@test all(x->x>0, [-3, 4, 5]) == false
 
 
 ## cumsum, cummin, cummax
 
+z = rand(10^6)
+let es = sum_kbn(z), es2 = sum_kbn(z[1:10^5])
+    @test (es - sum(z)) < es * 1e-13
+    cs = cumsum(z)
+    @test (es - cs[end]) < es * 1e-13
+    @test (es2 - cs[10^5]) < es2 * 1e-13
+end
+
 @test isequal(cummin([1, 2, 5, -1, 3, -2]), [1, 1, 1, -1, -1, -2])
 @test isequal(cummax([1, 2, 5, -1, 3, -2]), [1, 2, 5, 5, 5, 5])
 

From 5336b78819f506bdca2b68985d45557a615b3aff Mon Sep 17 00:00:00 2001
From: Dahua Lin <lindahua@gmail.com>
Date: Sat, 31 May 2014 08:05:26 -0500
Subject: [PATCH 12/15] and, or, count reimplemented using new paradigm

---
 base/reduce.jl | 86 ++++++++++++++++++++++++++++----------------------
 test/reduce.jl | 19 +++++++++++
 2 files changed, 67 insertions(+), 38 deletions(-)

diff --git a/base/reduce.jl b/base/reduce.jl
index 55f23b5d028b3..1e308fdde6edf 100644
--- a/base/reduce.jl
+++ b/base/reduce.jl
@@ -142,6 +142,8 @@ mr_empty(::Abs2Fun, op::AddFun, T) = r_promote(op, abs2(zero(T)))
 mr_empty(::IdFun, op::MulFun, T) = r_promote(op, one(T))
 mr_empty(::AbsFun, op::MaxFun, T) = abs(zero(T))
 mr_empty(::Abs2Fun, op::MaxFun, T) = abs2(zero(T))
+mr_empty(f, op::AndFun, T) = true
+mr_empty(f, op::OrFun, T) = false
 
 function _mapreduce{T}(f, op, A::AbstractArray{T})
     n = length(A)
@@ -264,7 +266,7 @@ prod(A::AbstractArray{Bool}) =
 
 ## maximum & minimum
 
-function mapreduce_seq_impl(f, op::MaxFun, A::AbstractArray, first::Int, last::Int)
+function mapreduce_impl(f, op::MaxFun, A::AbstractArray, first::Int, last::Int)
     # locate the first non NaN number
     v = evaluate(f, A[first])
     i = first + 1
@@ -282,7 +284,7 @@ function mapreduce_seq_impl(f, op::MaxFun, A::AbstractArray, first::Int, last::I
     v
 end
 
-function mapreduce_seq_impl(f, op::MinFun, A::AbstractArray, first::Int, last::Int)
+function mapreduce_impl(f, op::MinFun, A::AbstractArray, first::Int, last::Int)
     # locate the first non NaN number
     v = evaluate(f, A[first])
     i = first + 1
@@ -362,53 +364,63 @@ end
 
 ## all & any
 
-function all(itr)
+function mapfoldl(f, ::AndFun, itr)
     for x in itr
-        if !x
+        if !evaluate(f, x)
             return false
         end
     end
     return true
 end
 
-function any(itr)
+function mapfoldl(f, ::OrFun, itr)
     for x in itr
-        if x
+        if evaluate(f, x)
             return true
         end
     end
     return false
 end
 
-function any(pred::Union(Function,Func{1}), itr)
-    for x in itr
-        if evaluate(pred, x)
-            return true
+function mapreduce_impl(f, op::AndFun, A::AbstractArray, ifirst::Int, ilast::Int)
+    while ifirst <= ilast
+        @inbounds x = A[ifirst]
+        if !evaluate(f, x)
+            return false
         end
+        ifirst += 1
     end
-    return false
+    return true
 end
 
-function all(pred::Union(Function,Func{1}), itr)
-    for x in itr
-        if !evaluate(pred, x)
-            return false
+function mapreduce_impl(f, op::OrFun, A::AbstractArray, ifirst::Int, ilast::Int)
+    while ifirst <= ilast
+        @inbounds x = A[ifirst]
+        if evaluate(f, x)
+            return true
         end
+        ifirst += 1
     end
-    return true
+    return false
 end
 
+all(a) = mapreduce(IdFun(), AndFun(), a)
+any(a) = mapreduce(IdFun(), OrFun(), a)
+
+all(pred::Union(Function,Func{1}), a) = mapreduce(pred, AndFun(), a)
+any(pred::Union(Function,Func{1}), a) = mapreduce(pred, OrFun(), a)
+
 
 ## in & contains
 
-function in(x, itr)
-    for y in itr
-        if y == x
-            return true
-        end
-    end
-    return false
+immutable EqX{T} <: Func{1}
+    x::T
 end
+EqX{T}(x::T) = EqX{T}(x)
+evaluate(f::EqX, y) = (y == f.x)
+
+in(x, itr) = any(EqX(x), itr)
+
 const ∈ = in
 ∉(x, itr)=!∈(x, itr)
 ∋(itr, x)= ∈(x, itr)
@@ -431,33 +443,31 @@ end
 
 ## countnz & count
 
-function countnz(itr)
+function count(pred::Union(Function,Func{1}), itr)
     n = 0
     for x in itr
-        if x != 0
+        if evaluate(pred, x)
             n += 1
         end
     end
     return n
 end
 
-function countnz(a::AbstractArray)
+function count(pred::Union(Function,Func{1}), a::AbstractArray)
     n = 0
-    for i = 1:length(a)
-        @inbounds x = a[i]
-        if x != 0
+    i = 0
+    len = length(a)
+    while i < len
+        @inbounds x = a[i+=1]
+        if evaluate(pred, x)
             n += 1
         end
     end
     return n
 end
 
-function count(pred::Function, itr)
-    n = 0
-    for x in itr
-        if pred(x)
-            n += 1
-        end
-    end
-    return n
-end
+type NotEqZero <: Func{1} end
+evaluate(NotEqZero, x) = (x != 0)
+
+countnz(a) = count(NotEqZero(), a)
+
diff --git a/test/reduce.jl b/test/reduce.jl
index fc1e3db7aad2b..9cba9ac9c8d0c 100644
--- a/test/reduce.jl
+++ b/test/reduce.jl
@@ -166,6 +166,25 @@ prod2(itr) = invoke(prod, (Any,), itr)
 @test all(x->x>0, [4]) == true
 @test all(x->x>0, [-3, 4, 5]) == false
 
+# in
+
+@test in(1, Int[]) == false
+@test in(1, Int[1]) == true
+@test in(1, Int[2]) == false
+@test in(0, 1:3) == false
+@test in(1, 1:3) == true
+@test in(2, 1:3) == true
+
+# count & countnz
+
+@test count(x->x>0, Int[]) == 0
+@test count(x->x>0, -3:5) == 5
+
+@test countnz(Int[]) == 0
+@test countnz(Int[0]) == 0
+@test countnz(Int[1]) == 1
+@test countnz([1, 0, 2, 0, 3, 0, 4]) == 4
+
 
 ## cumsum, cummin, cummax
 

From 03746ef2ecb65c828b56f386879955057777ece6 Mon Sep 17 00:00:00 2001
From: Dahua Lin <lindahua@gmail.com>
Date: Sat, 31 May 2014 08:25:26 -0500
Subject: [PATCH 13/15] tweak extrema

---
 base/reduce.jl | 48 ++++++++++--------------------------------------
 1 file changed, 10 insertions(+), 38 deletions(-)

diff --git a/base/reduce.jl b/base/reduce.jl
index 1e308fdde6edf..0ee747727692b 100644
--- a/base/reduce.jl
+++ b/base/reduce.jl
@@ -322,46 +322,21 @@ function extrema(itr)
     (v, s) = next(itr, s)
     vmin = v
     vmax = v
-    while !done(itr, s)
+    while v != v && !done(itr, s)
         (x, s) = next(itr, s)
-        if x == x
-            if x > vmax
-                vmax = x
-            elseif x < vmin
-                vmin = x
-            end
-        end
-    end
-    return (vmin, vmax)
-end
-
-function extrema{T<:Real}(A::AbstractArray{T})
-    if isempty(A); error("argument must not be empty"); end
-    n = length(A)
-
-    # locate the first non NaN number
-    v = A[1]
-    i = 2
-    while v != v && i <= n
-        @inbounds v = A[i]
-        i += 1
+        v = x
     end
-
-    vmin = v
-    vmax = v
-    while i <= n
-        @inbounds v = A[i]
-        if v > vmax
-            vmax = v
-        elseif v < vmin
-            vmin = v
+    while !done(itr, s)
+        (x, s) = next(itr, s)
+        if x > vmax
+            vmax = x
+        elseif x < vmin
+            vmin = x
         end
-        i += 1
     end
     return (vmin, vmax)
 end
 
-
 ## all & any
 
 function mapfoldl(f, ::AndFun, itr)
@@ -455,11 +430,8 @@ end
 
 function count(pred::Union(Function,Func{1}), a::AbstractArray)
     n = 0
-    i = 0
-    len = length(a)
-    while i < len
-        @inbounds x = a[i+=1]
-        if evaluate(pred, x)
+    for i = 1:length(a)
+        @inbounds if evaluate(pred, a[i])
             n += 1
         end
     end

From 7828db5c757f2f44d7e308856f23f6031131ba37 Mon Sep 17 00:00:00 2001
From: Dahua Lin <lindahua@gmail.com>
Date: Sat, 31 May 2014 09:22:13 -0500
Subject: [PATCH 14/15] a small fix of mapreduce function signature

---
 base/reduce.jl | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/base/reduce.jl b/base/reduce.jl
index 0ee747727692b..e8fe5d1aeee55 100644
--- a/base/reduce.jl
+++ b/base/reduce.jl
@@ -169,7 +169,7 @@ end
 mapreduce(f, op, A::AbstractArray) = _mapreduce(f, op, A)
 mapreduce(f, op, a::Number) = evaluate(f, a)
 
-function mapreduce(f, op, A::AbstractArray)
+function mapreduce(f, op::Function, A::AbstractArray)
     is(op, +) ? _mapreduce(f, AddFun(), A) :
     is(op, *) ? _mapreduce(f, MulFun(), A) :
     is(op, &) ? _mapreduce(f, AndFun(), A) :

From e8c3a8a2258374b3192824ded1b1be9431c6707a Mon Sep 17 00:00:00 2001
From: Dahua Lin <lindahua@gmail.com>
Date: Sun, 1 Jun 2014 11:45:38 -0500
Subject: [PATCH 15/15] small fix to extrema, now it correctly handles [NaN,
 ....]

---
 base/reduce.jl | 4 ++--
 test/reduce.jl | 4 ++++
 2 files changed, 6 insertions(+), 2 deletions(-)

diff --git a/base/reduce.jl b/base/reduce.jl
index e8fe5d1aeee55..969149e609e7c 100644
--- a/base/reduce.jl
+++ b/base/reduce.jl
@@ -320,12 +320,12 @@ function extrema(itr)
     s = start(itr)
     done(itr, s) && error("argument is empty")
     (v, s) = next(itr, s)
-    vmin = v
-    vmax = v
     while v != v && !done(itr, s)
         (x, s) = next(itr, s)
         v = x
     end
+    vmin = v
+    vmax = v
     while !done(itr, s)
         (x, s) = next(itr, s)
         if x > vmax
diff --git a/test/reduce.jl b/test/reduce.jl
index 9cba9ac9c8d0c..bc6868fef2045 100644
--- a/test/reduce.jl
+++ b/test/reduce.jl
@@ -119,6 +119,10 @@ prod2(itr) = invoke(prod, (Any,), itr)
 @test isnan(minimum([NaN]))
 @test isequal(extrema([NaN]), (NaN, NaN))
 
+@test maximum([NaN, 2., 3.]) == 3.
+@test minimum([NaN, 2., 3.]) == 2.
+@test extrema([NaN, 2., 3.]) == (2., 3.)
+
 @test maximum([4., 3., NaN, 5., 2.]) == 5.
 @test minimum([4., 3., NaN, 5., 2.]) == 2.
 @test extrema([4., 3., NaN, 5., 2.]) == (2., 5.)