Skip to content

Commit

Permalink
deprecate histogram functionality
Browse files Browse the repository at this point in the history
  • Loading branch information
simonbyrne committed Feb 11, 2015
1 parent 0a225b0 commit 40c824c
Show file tree
Hide file tree
Showing 8 changed files with 161 additions and 241 deletions.
144 changes: 144 additions & 0 deletions base/deprecated.jl
Original file line number Diff line number Diff line change
Expand Up @@ -290,3 +290,147 @@ end
# 8898
@deprecate precision(x::DateTime) eps(x)
@deprecate precision(x::Date) eps(x)

# Histogram: moved to StatsBase (#6842)
function histrange{T<:FloatingPoint,N}(v::AbstractArray{T,N}, n::Integer)
nv = length(v)
if nv == 0 && n < 0
throw(ArgumentError("number of bins must be ≥ 0 for an empty array, got $n"))
elseif nv > 0 && n < 1
throw(ArgumentError("number of bins must be ≥ 1 for a non-empty array, got $n"))
end
if nv == 0
return 0.0:1.0:0.0
end
lo, hi = extrema(v)
if hi == lo
step = 1.0
else
bw = (hi - lo) / n
e = 10.0^floor(log10(bw))
r = bw / e
if r <= 2
step = 2*e
elseif r <= 5
step = 5*e
else
step = 10*e
end
end
start = step*(ceil(lo/step)-1)
nm1 = ceil(Int,(hi - start)/step)
start:step:(start + nm1*step)
end

function histrange{T<:Integer,N}(v::AbstractArray{T,N}, n::Integer)
nv = length(v)
if nv == 0 && n < 0
throw(ArgumentError("number of bins must be ≥ 0 for an empty array, got $n"))
elseif nv > 0 && n < 1
throw(ArgumentError("number of bins must be ≥ 1 for a non-empty array, got $n"))
end
if nv == 0
return 0:1:0
end
lo, hi = extrema(v)
if hi == lo
step = 1
else
bw = (hi - lo) / n
e = 10^max(0,floor(Int,log10(bw)))
r = bw / e
if r <= 1
step = e
elseif r <= 2
step = 2*e
elseif r <= 5
step = 5*e
else
step = 10*e
end
end
start = step*(ceil(lo/step)-1)
nm1 = ceil(Int,(hi - start)/step)
start:step:(start + nm1*step)
end

## midpoints of intervals
midpoints(r::Range) = (depwarn("midpoints(x) is deprecated. Method now in StatsBase.jl"); r[1:length(r)-1] + 0.5*step(r))
midpoints(v::AbstractVector) = (depwarn("midpoints(x) is deprecated. Method now in StatsBase.jl"); [0.5*(v[i] + v[i+1]) for i in 1:length(v)-1])

## hist ##
function sturges(n) # Sturges' formula
n==0 && return one(n)
ceil(Int,log2(n))+1
end

function hist!{HT}(h::AbstractArray{HT}, v::AbstractVector, edg::AbstractVector; init::Bool=true)
depwarn("hist(...) and hist!(...) are deprecated. Use fit(Histogram,...) in StatsBase.jl instead.")
n = length(edg) - 1
length(h) == n || throw(DimensionMismatch("length(histogram) must equal length(edges) - 1"))
if init
fill!(h, zero(HT))
end
for x in v
i = searchsortedfirst(edg, x)-1
if 1 <= i <= n
h[i] += 1
end
end
edg, h
end

hist(v::AbstractVector, edg::AbstractVector) = hist!(Array(Int, length(edg)-1), v, edg)
hist(v::AbstractVector, n::Integer) = hist(v,histrange(v,n))
hist(v::AbstractVector) = hist(v,sturges(length(v)))

function hist!{HT}(H::AbstractArray{HT,2}, A::AbstractMatrix, edg::AbstractVector; init::Bool=true)
depwarn("hist(...) and hist!(...) are deprecated. Use fit(Histogram,...) in StatsBase.jl instead.")
m, n = size(A)
sH = size(H)
sE = (length(edg)-1,n)
sH == sE || throw(DimensionMismatch("incorrect size of histogram"))
if init
fill!(H, zero(HT))
end
for j = 1:n
hist!(sub(H, :, j), sub(A, :, j), edg)
end
edg, H
end

hist(A::AbstractMatrix, edg::AbstractVector) = hist!(Array(Int, length(edg)-1, size(A,2)), A, edg)
hist(A::AbstractMatrix, n::Integer) = hist(A,histrange(A,n))
hist(A::AbstractMatrix) = hist(A,sturges(size(A,1)))


## hist2d
function hist2d!{HT}(H::AbstractArray{HT,2}, v::AbstractMatrix,
edg1::AbstractVector, edg2::AbstractVector; init::Bool=true)
depwarn("hist2d(...) is deprecated. Use fit(Histogram,...) in StatsBase.jl instead.")
size(v,2) == 2 || throw(DimensionMismatch("hist2d requires an Nx2 matrix"))
n = length(edg1) - 1
m = length(edg2) - 1
size(H) == (n, m) || throw(DimensionMismatch("incorrect size of histogram"))
if init
fill!(H, zero(HT))
end
for i = 1:size(v,1)
x = searchsortedfirst(edg1, v[i,1]) - 1
y = searchsortedfirst(edg2, v[i,2]) - 1
if 1 <= x <= n && 1 <= y <= m
@inbounds H[x,y] += 1
end
end
edg1, edg2, H
end

hist2d(v::AbstractMatrix, edg1::AbstractVector, edg2::AbstractVector) =
hist2d!(Array(Int, length(edg1)-1, length(edg2)-1), v, edg1, edg2)

hist2d(v::AbstractMatrix, edg::AbstractVector) = hist2d(v, edg, edg)

hist2d(v::AbstractMatrix, n1::Integer, n2::Integer) =
hist2d(v, histrange(sub(v,:,1),n1), histrange(sub(v,:,2),n2))
hist2d(v::AbstractMatrix, n::Integer) = hist2d(v, n, n)
hist2d(v::AbstractMatrix) = hist2d(v, sturges(size(v,1)))
147 changes: 0 additions & 147 deletions base/statistics.jl
Original file line number Diff line number Diff line change
Expand Up @@ -506,150 +506,3 @@ function bound_quantiles(qs::AbstractVector)
end
[min(1,max(0,q)) for q = qs]
end



##### histogram #####

## nice-valued ranges for histograms

function histrange{T<:FloatingPoint,N}(v::AbstractArray{T,N}, n::Integer)
nv = length(v)
if nv == 0 && n < 0
throw(ArgumentError("number of bins must be ≥ 0 for an empty array, got $n"))
elseif nv > 0 && n < 1
throw(ArgumentError("number of bins must be ≥ 1 for a non-empty array, got $n"))
end
if nv == 0
return 0.0:1.0:0.0
end
lo, hi = extrema(v)
if hi == lo
step = 1.0
else
bw = (hi - lo) / n
e = 10.0^floor(log10(bw))
r = bw / e
if r <= 2
step = 2*e
elseif r <= 5
step = 5*e
else
step = 10*e
end
end
start = step*(ceil(lo/step)-1)
nm1 = ceil(Int,(hi - start)/step)
start:step:(start + nm1*step)
end

function histrange{T<:Integer,N}(v::AbstractArray{T,N}, n::Integer)
nv = length(v)
if nv == 0 && n < 0
throw(ArgumentError("number of bins must be ≥ 0 for an empty array, got $n"))
elseif nv > 0 && n < 1
throw(ArgumentError("number of bins must be ≥ 1 for a non-empty array, got $n"))
end
if nv == 0
return 0:1:0
end
lo, hi = extrema(v)
if hi == lo
step = 1
else
bw = (hi - lo) / n
e = 10^max(0,floor(Int,log10(bw)))
r = bw / e
if r <= 1
step = e
elseif r <= 2
step = 2*e
elseif r <= 5
step = 5*e
else
step = 10*e
end
end
start = step*(ceil(lo/step)-1)
nm1 = ceil(Int,(hi - start)/step)
start:step:(start + nm1*step)
end

## midpoints of intervals
midpoints(r::Range) = r[1:length(r)-1] + 0.5*step(r)
midpoints(v::AbstractVector) = [0.5*(v[i] + v[i+1]) for i in 1:length(v)-1]

## hist ##
function sturges(n) # Sturges' formula
n==0 && return one(n)
ceil(Int,log2(n))+1
end

function hist!{HT}(h::AbstractArray{HT}, v::AbstractVector, edg::AbstractVector; init::Bool=true)
n = length(edg) - 1
length(h) == n || throw(DimensionMismatch("length(histogram) must equal length(edges) - 1"))
if init
fill!(h, zero(HT))
end
for x in v
i = searchsortedfirst(edg, x)-1
if 1 <= i <= n
h[i] += 1
end
end
edg, h
end

hist(v::AbstractVector, edg::AbstractVector) = hist!(Array(Int, length(edg)-1), v, edg)
hist(v::AbstractVector, n::Integer) = hist(v,histrange(v,n))
hist(v::AbstractVector) = hist(v,sturges(length(v)))

function hist!{HT}(H::AbstractArray{HT,2}, A::AbstractMatrix, edg::AbstractVector; init::Bool=true)
m, n = size(A)
sH = size(H)
sE = (length(edg)-1,n)
sH == sE || throw(DimensionMismatch("incorrect size of histogram"))
if init
fill!(H, zero(HT))
end
for j = 1:n
hist!(sub(H, :, j), sub(A, :, j), edg)
end
edg, H
end

hist(A::AbstractMatrix, edg::AbstractVector) = hist!(Array(Int, length(edg)-1, size(A,2)), A, edg)
hist(A::AbstractMatrix, n::Integer) = hist(A,histrange(A,n))
hist(A::AbstractMatrix) = hist(A,sturges(size(A,1)))


## hist2d
function hist2d!{HT}(H::AbstractArray{HT,2}, v::AbstractMatrix,
edg1::AbstractVector, edg2::AbstractVector; init::Bool=true)
size(v,2) == 2 || throw(DimensionMismatch("hist2d requires an Nx2 matrix"))
n = length(edg1) - 1
m = length(edg2) - 1
size(H) == (n, m) || throw(DimensionMismatch("incorrect size of histogram"))
if init
fill!(H, zero(HT))
end
for i = 1:size(v,1)
x = searchsortedfirst(edg1, v[i,1]) - 1
y = searchsortedfirst(edg2, v[i,2]) - 1
if 1 <= x <= n && 1 <= y <= m
@inbounds H[x,y] += 1
end
end
edg1, edg2, H
end

hist2d(v::AbstractMatrix, edg1::AbstractVector, edg2::AbstractVector) =
hist2d!(Array(Int, length(edg1)-1, length(edg2)-1), v, edg1, edg2)

hist2d(v::AbstractMatrix, edg::AbstractVector) = hist2d(v, edg, edg)

hist2d(v::AbstractMatrix, n1::Integer, n2::Integer) =
hist2d(v, histrange(sub(v,:,1),n1), histrange(sub(v,:,2),n2))
hist2d(v::AbstractMatrix, n::Integer) = hist2d(v, n, n)
hist2d(v::AbstractMatrix) = hist2d(v, sturges(size(v,1)))

49 changes: 0 additions & 49 deletions doc/stdlib/math.rst
Original file line number Diff line number Diff line change
Expand Up @@ -1239,55 +1239,6 @@ Statistics

Like ``median``, but may overwrite the input vector.

.. function:: hist(v[, n]) -> e, counts

Compute the histogram of ``v``, optionally using approximately ``n``
bins. The return values are a range ``e``, which correspond to the
edges of the bins, and ``counts`` containing the number of elements of
``v`` in each bin.
Note: Julia does not ignore ``NaN`` values in the computation.

.. function:: hist(v, e) -> e, counts

Compute the histogram of ``v`` using a vector/range ``e`` as the edges for
the bins. The result will be a vector of length ``length(e) - 1``, such that the
element at location ``i`` satisfies ``sum(e[i] .< v .<= e[i+1])``.
Note: Julia does not ignore ``NaN`` values in the computation.

.. function:: hist!(counts, v, e) -> e, counts

Compute the histogram of ``v``, using a vector/range ``e`` as the edges for the bins.
This function writes the resultant counts to a pre-allocated array ``counts``.

.. function:: hist2d(M, e1, e2) -> (edge1, edge2, counts)

Compute a "2d histogram" of a set of N points specified by N-by-2 matrix ``M``.
Arguments ``e1`` and ``e2`` are bins for each dimension, specified either as
integer bin counts or vectors of bin edges. The result is a tuple of
``edge1`` (the bin edges used in the first dimension), ``edge2`` (the bin edges
used in the second dimension), and ``counts``, a histogram matrix of size
``(length(edge1)-1, length(edge2)-1)``.
Note: Julia does not ignore ``NaN`` values in the computation.

.. function:: hist2d!(counts, M, e1, e2) -> (e1, e2, counts)

Compute a "2d histogram" with respect to the bins delimited by the edges given
in ``e1`` and ``e2``. This function writes the results to a pre-allocated
array ``counts``.

.. function:: histrange(v, n)

Compute *nice* bin ranges for the edges of a histogram of ``v``, using
approximately ``n`` bins. The resulting step sizes will be 1, 2 or 5
multiplied by a power of 10.
Note: Julia does not ignore ``NaN`` values in the computation.

.. function:: midpoints(e)

Compute the midpoints of the bins with edges ``e``. The result is a
vector/range of length ``length(e) - 1``.
Note: Julia does not ignore ``NaN`` values in the computation.

.. function:: quantile(v, p)

Compute the quantiles of a vector ``v`` at a specified set of probability values ``p``.
Expand Down
4 changes: 0 additions & 4 deletions test/arrayops.jl
Original file line number Diff line number Diff line change
Expand Up @@ -613,13 +613,9 @@ B = cat(3, 1, 2, 3)
begin
local a,h,i
a = rand(5,5)
h = mapslices(v -> hist(v,0:0.1:1)[2], a, 1)
H = mapslices(v -> hist(v,0:0.1:1)[2], a, 2)
s = mapslices(sort, a, [1])
S = mapslices(sort, a, [2])
for i = 1:5
@test h[:,i] == hist(a[:,i],0:0.1:1)[2]
@test vec(H[i,:]) == hist(vec(a[i,:]),0:0.1:1)[2]
@test s[:,i] == sort(a[:,i])
@test vec(S[i,:]) == sort(vec(a[i,:]))
end
Expand Down
3 changes: 2 additions & 1 deletion test/parallel.jl
Original file line number Diff line number Diff line change
Expand Up @@ -167,7 +167,8 @@ map!(x->1, d)

# Test @parallel load balancing - all processors should get either M or M+1
# iterations out of the loop range for some M.
workloads = hist(@parallel((a,b)->[a;b], for i=1:7; myid(); end), nprocs())[2]
ids = @parallel((a,b)->[a;b], for i=1:7; myid(); end)
workloads = Int[sum(ids .== i) for i in 1:nprocs()]
@test maximum(workloads) - minimum(workloads) <= 1

# @parallel reduction should work even with very short ranges
Expand Down
Loading

0 comments on commit 40c824c

Please sign in to comment.