Skip to content

Commit

Permalink
move crc32c to stdlib (#24489)
Browse files Browse the repository at this point in the history
* move crc32c to stdlib

* fix capitalization
  • Loading branch information
KristofferC authored and stevengj committed Nov 7, 2017
1 parent 698ef27 commit 8e96fbc
Show file tree
Hide file tree
Showing 13 changed files with 150 additions and 115 deletions.
2 changes: 2 additions & 0 deletions base/deprecated.jl
Original file line number Diff line number Diff line change
Expand Up @@ -1354,6 +1354,8 @@ export conv, conv2, deconv, filt, filt!, xcorr
@deprecate_moved watch_file "FileWatching" true true
@deprecate_moved FileMonitor "FileWatching" true true

@deprecate_moved crc32c "CRC32c" true true

# PR #21709
@deprecate cov(x::AbstractVector, corrected::Bool) cov(x, corrected=corrected)
@deprecate cov(x::AbstractMatrix, vardim::Int, corrected::Bool) cov(x, vardim, corrected=corrected)
Expand Down
1 change: 0 additions & 1 deletion base/exports.jl
Original file line number Diff line number Diff line change
Expand Up @@ -978,7 +978,6 @@ export
atexit,
atreplinit,
clipboard,
crc32c,
exit,
ntuple,
quit,
Expand Down
4 changes: 2 additions & 2 deletions base/iobuffer.jl
Original file line number Diff line number Diff line change
Expand Up @@ -408,7 +408,7 @@ function readuntil(io::GenericIOBuffer, delim::UInt8)
end

# copy-free crc32c of IOBuffer:
function crc32c(io::IOBuffer, nb::Integer, crc::UInt32=0x00000000)
function _crc32c(io::IOBuffer, nb::Integer, crc::UInt32=0x00000000)
nb < 0 && throw(ArgumentError("number of bytes to checksum must be ≥ 0"))
io.readable || throw(ArgumentError("read failed, IOBuffer is not readable"))
n = min(nb, nb_available(io))
Expand All @@ -417,4 +417,4 @@ function crc32c(io::IOBuffer, nb::Integer, crc::UInt32=0x00000000)
io.ptr += n
return crc
end
crc32c(io::IOBuffer, crc::UInt32=0x00000000) = crc32c(io, nb_available(io), crc)
_crc32c(io::IOBuffer, crc::UInt32=0x00000000) = _crc32c(io, nb_available(io), crc)
4 changes: 2 additions & 2 deletions base/loading.jl
Original file line number Diff line number Diff line change
Expand Up @@ -631,7 +631,7 @@ function compilecache(name::String)
if success(create_expr_cache(path, cachefile, concrete_deps))
# append checksum to the end of the .ji file:
open(cachefile, "a+") do f
write(f, hton(crc32c(seekstart(f))))
write(f, hton(_crc32c(seekstart(f))))
end
else
error("Failed to precompile $name to $cachefile.")
Expand Down Expand Up @@ -790,7 +790,7 @@ function stale_cachefile(modpath::String, cachefile::String)
end

# finally, verify that the cache file has a valid checksum
crc = crc32c(seekstart(io), filesize(io)-4)
crc = _crc32c(seekstart(io), filesize(io)-4)
if crc != ntoh(read(io, UInt32))
DEBUG_LOADING[] && info("JL_DEBUG_LOADING: Rejecting cache file $cachefile because it has an invalid checksum.")
return true
Expand Down
40 changes: 5 additions & 35 deletions base/util.jl
Original file line number Diff line number Diff line change
Expand Up @@ -713,44 +713,14 @@ if Sys.iswindows()

end

"""
crc32c(data, crc::UInt32=0x00000000)
Compute the CRC-32c checksum of the given `data`, which can be
an `Array{UInt8}`, a contiguous subarray thereof, or a `String`. Optionally, you can pass
a starting `crc` integer to be mixed in with the checksum. The `crc` parameter
can be used to compute a checksum on data divided into chunks: performing
`crc32c(data2, crc32c(data1))` is equivalent to the checksum of `[data1; data2]`.
(Technically, a little-endian checksum is computed.)
There is also a method `crc32c(io, nb, crc)` to checksum `nb` bytes from
a stream `io`, or `crc32c(io, crc)` to checksum all the remaining bytes.
Hence you can do [`open(crc32c, filename)`](@ref) to checksum an entire file,
or `crc32c(seekstart(buf))` to checksum an [`IOBuffer`](@ref) without
calling [`take!`](@ref).
For a `String`, note that the result is specific to the UTF-8 encoding
(a different checksum would be obtained from a different Unicode encoding).
To checksum an `a::Array` of some other bitstype, you can do `crc32c(reinterpret(UInt8,a))`,
but note that the result may be endian-dependent.
"""
function crc32c end

unsafe_crc32c(a, n, crc) = ccall(:jl_crc32c, UInt32, (UInt32, Ptr{UInt8}, Csize_t), crc, a, n)

crc32c(a::Union{Array{UInt8},FastContiguousSubArray{UInt8,N,<:Array{UInt8}} where N}, crc::UInt32=0x00000000) =
_crc32c(a::Union{Array{UInt8},FastContiguousSubArray{UInt8,N,<:Array{UInt8}} where N}, crc::UInt32=0x00000000) =
unsafe_crc32c(a, length(a) % Csize_t, crc)

crc32c(s::String, crc::UInt32=0x00000000) = unsafe_crc32c(s, sizeof(s) % Csize_t, crc)
_crc32c(s::String, crc::UInt32=0x00000000) = unsafe_crc32c(s, sizeof(s) % Csize_t, crc)

"""
crc32c(io::IO, [nb::Integer,] crc::UInt32=0x00000000)
Read up to `nb` bytes from `io` and return the CRC-32c checksum, optionally
mixed with a starting `crc` integer. If `nb` is not supplied, then
`io` will be read until the end of the stream.
"""
function crc32c(io::IO, nb::Integer, crc::UInt32=0x00000000)
function _crc32c(io::IO, nb::Integer, crc::UInt32=0x00000000)
nb < 0 && throw(ArgumentError("number of bytes to checksum must be ≥ 0"))
# use block size 24576=8192*3, since that is the threshold for
# 3-way parallel SIMD code in the underlying jl_crc32c C function.
Expand All @@ -762,8 +732,8 @@ function crc32c(io::IO, nb::Integer, crc::UInt32=0x00000000)
end
return unsafe_crc32c(buf, readbytes!(io, buf, min(nb, length(buf))), crc)
end
crc32c(io::IO, crc::UInt32=0x00000000) = crc32c(io, typemax(Int64), crc)
crc32c(io::IOStream, crc::UInt32=0x00000000) = crc32c(io, filesize(io)-position(io), crc)
_crc32c(io::IO, crc::UInt32=0x00000000) = _crc32c(io, typemax(Int64), crc)
_crc32c(io::IOStream, crc::UInt32=0x00000000) = _crc32c(io, filesize(io)-position(io), crc)


"""
Expand Down
29 changes: 16 additions & 13 deletions doc/make.jl
Original file line number Diff line number Diff line change
Expand Up @@ -22,20 +22,22 @@ cp_q(src, dest) = isfile(dest) || cp(src, dest)
# make links for stdlib package docs
if Sys.iswindows()
cp_q("../stdlib/DelimitedFiles/docs/src/index.md", "src/stdlib/delimitedfiles.md")
cp_q("../stdlib/Test/docs/src/index.md", "src/stdlib/test.md")
cp_q("../stdlib/Mmap/docs/src/index.md", "src/stdlib/mmap.md")
cp_q("../stdlib/SharedArrays/docs/src/index.md", "src/stdlib/sharedarrays.md")
cp_q("../stdlib/Profile/docs/src/index.md", "src/stdlib/profile.md")
cp_q("../stdlib/Base64/docs/src/index.md", "src/stdlib/base64.md")
cp_q("../stdlib/FileWatching/docs/src/index.md", "src/stdlib/filewatching.md")
cp_q("../stdlib/Test/docs/src/index.md", "src/stdlib/test.md")
cp_q("../stdlib/Mmap/docs/src/index.md", "src/stdlib/mmap.md")
cp_q("../stdlib/SharedArrays/docs/src/index.md", "src/stdlib/sharedarrays.md")
cp_q("../stdlib/Profile/docs/src/index.md", "src/stdlib/profile.md")
cp_q("../stdlib/Base64/docs/src/index.md", "src/stdlib/base64.md")
cp_q("../stdlib/FileWatching/docs/src/index.md", "src/stdlib/filewatching.md")
cp_q("../stdlib/CRC32c/docs/src/index.md", "src/stdlib/crc32c.md")
else
symlink_q("../../../stdlib/DelimitedFiles/docs/src/index.md", "src/stdlib/delimitedfiles.md")
symlink_q("../../../stdlib/Test/docs/src/index.md", "src/stdlib/test.md")
symlink_q("../../../stdlib/Mmap/docs/src/index.md", "src/stdlib/mmap.md")
symlink_q("../../../stdlib/SharedArrays/docs/src/index.md", "src/stdlib/sharedarrays.md")
symlink_q("../../../stdlib/Profile/docs/src/index.md", "src/stdlib/profile.md")
symlink_q("../../../stdlib/Base64/docs/src/index.md", "src/stdlib/base64.md")
symlink_q("../../../stdlib/FileWatching/docs/src/index.md", "src/stdlib/filewatching.md")
symlink_q("../../../stdlib/Test/docs/src/index.md", "src/stdlib/test.md")
symlink_q("../../../stdlib/Mmap/docs/src/index.md", "src/stdlib/mmap.md")
symlink_q("../../../stdlib/SharedArrays/docs/src/index.md", "src/stdlib/sharedarrays.md")
symlink_q("../../../stdlib/Profile/docs/src/index.md", "src/stdlib/profile.md")
symlink_q("../../../stdlib/Base64/docs/src/index.md", "src/stdlib/base64.md")
symlink_q("../../../stdlib/FileWatching/docs/src/index.md", "src/stdlib/filewatching.md")
symlink_q("../../../stdlib/CRC32c/docs/src/index.md", "src/stdlib/crc32c.md")
end

const PAGES = [
Expand Down Expand Up @@ -109,6 +111,7 @@ const PAGES = [
"stdlib/mmap.md",
"stdlib/sharedarrays.md",
"stdlib/filewatching.md",
"stdlib/crc32c.md",
],
"Developer Documentation" => [
"devdocs/reflection.md",
Expand Down Expand Up @@ -143,7 +146,7 @@ const PAGES = [
],
]

using DelimitedFiles, Test, Mmap, SharedArrays, Profile, Base64, FileWatching
using DelimitedFiles, Test, Mmap, SharedArrays, Profile, Base64, FileWatching, CRC32c

makedocs(
build = joinpath(pwd(), "_build/html/en"),
Expand Down
4 changes: 4 additions & 0 deletions doc/src/stdlib/.gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -3,3 +3,7 @@ test.md
mmap.md
sharedarrays.md
profile.md
base64.md
filewatching.md
crc32c.md

1 change: 0 additions & 1 deletion doc/src/stdlib/arrays.md
Original file line number Diff line number Diff line change
Expand Up @@ -134,7 +134,6 @@ Base.cumprod!
Base.cumsum
Base.cumsum!
Base.cumsum_kbn
Base.crc32c
Base.LinAlg.diff
Base.repeat(::AbstractArray)
Base.rot180
Expand Down
1 change: 0 additions & 1 deletion doc/src/stdlib/io-network.md
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,6 @@ Base.take!(::Base.GenericIOBuffer)
Base.fdio
Base.flush
Base.close
Base.crc32c(::IO, ::Integer, ::UInt32)
Base.write
Base.read
Base.read!
Expand Down
6 changes: 6 additions & 0 deletions stdlib/CRC32c/docs/src/index.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
# CRC32c

```@docs
CRC32c.crc32c
CRC32c.crc32c(::IO, ::Integer, ::UInt32)
```
50 changes: 50 additions & 0 deletions stdlib/CRC32c/src/CRC32c.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
"""
Standard library module for computing the CRC-32c checksum.
See [`CRC32c.crc32c`](@ref) for more information.
"""
module CRC32c

import Base.FastContiguousSubArray

export crc32c

"""
crc32c(data, crc::UInt32=0x00000000)
Compute the CRC-32c checksum of the given `data`, which can be
an `Array{UInt8}`, a contiguous subarray thereof, or a `String`. Optionally, you can pass
a starting `crc` integer to be mixed in with the checksum. The `crc` parameter
can be used to compute a checksum on data divided into chunks: performing
`crc32c(data2, crc32c(data1))` is equivalent to the checksum of `[data1; data2]`.
(Technically, a little-endian checksum is computed.)
There is also a method `crc32c(io, nb, crc)` to checksum `nb` bytes from
a stream `io`, or `crc32c(io, crc)` to checksum all the remaining bytes.
Hence you can do [`open(crc32c, filename)`](@ref) to checksum an entire file,
or `crc32c(seekstart(buf))` to checksum an [`IOBuffer`](@ref) without
calling [`take!`](@ref).
For a `String`, note that the result is specific to the UTF-8 encoding
(a different checksum would be obtained from a different Unicode encoding).
To checksum an `a::Array` of some other bitstype, you can do `crc32c(reinterpret(UInt8,a))`,
but note that the result may be endian-dependent.
"""
function crc32c end


crc32c(a::Union{Array{UInt8},FastContiguousSubArray{UInt8,N,<:Array{UInt8}} where N}, crc::UInt32=0x00000000) = Base._crc32c(a, crc)
crc32c(s::String, crc::UInt32=0x00000000) = Base._crc32c(s, crc)

"""
crc32c(io::IO, [nb::Integer,] crc::UInt32=0x00000000)
Read up to `nb` bytes from `io` and return the CRC-32c checksum, optionally
mixed with a starting `crc` integer. If `nb` is not supplied, then
`io` will be read until the end of the stream.
"""
crc32c(io::IO, nb::Integer, crc::UInt32=0x00000000) = Base._crc32c(io, nb, crc)
crc32c(io::IO, crc::UInt32=0x00000000) = Base._crc32c(io, crc)
crc32c(io::IOStream, crc::UInt32=0x00000000) = Base._crc32c(io, crc)

end
63 changes: 63 additions & 0 deletions stdlib/CRC32c/test/runtests.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
using Test
using CRC32c

function test_crc32c(crc32c)
# CRC32c checksum (test data generated from @andrewcooke's CRC.jl package)
for (n,crc) in [(0,0x00000000),(1,0xa016d052),(2,0x03f89f52),(3,0xf130f21e),(4,0x29308cf4),(5,0x53518fab),(6,0x4f4dfbab),(7,0xbd3a64dc),(8,0x46891f81),(9,0x5a14b9f9),(10,0xb219db69),(11,0xd232a91f),(12,0x51a15563),(13,0x9f92de41),(14,0x4d8ae017),(15,0xc8b74611),(16,0xa0de6714),(17,0x672c992a),(18,0xe8206eb6),(19,0xc52fd285),(20,0x327b0397),(21,0x318263dd),(22,0x08485ccd),(23,0xea44d29e),(24,0xf6c0cb13),(25,0x3969bba2),(26,0x6a8810ec),(27,0x75b3d0df),(28,0x82d535b1),(29,0xbdf7fc12),(30,0x1f836b7d),(31,0xd29f33af),(32,0x8e4acb3e),(33,0x1cbee2d1),(34,0xb25f7132),(35,0xb0fa484c),(36,0xb9d262b4),(37,0x3207fe27),(38,0xa024d7ac),(39,0x49a2e7c5),(40,0x0e2c157f),(41,0x25f7427f),(42,0x368c6adc),(43,0x75efd4a5),(44,0xa84c5c31),(45,0x0fc817b2),(46,0x8d99a881),(47,0x5cc3c078),(48,0x9983d5e2),(49,0x9267c2db),(50,0xc96d4745),(51,0x058d8df3),(52,0x453f9cf3),(53,0xb714ade1),(54,0x55d3c2bc),(55,0x495710d0),(56,0x3bddf494),(57,0x4f2577d0),(58,0xdae0f604),(59,0x3c57c632),(60,0xfe39bbb0),(61,0x6f5d1d41),(62,0x7d996665),(63,0x68c738dc),(64,0x8dfea7ae)]
@test crc32c(UInt8[1:n;]) == crc == crc32c(String(UInt8[1:n;]))
end

# test that crc parameter is equivalent to checksum of concatenated data,
# and test crc of subarrays:
a = UInt8[1:255;]
crc_256 = crc32c(a)
@views for n = 1:255
@test crc32c(a[n+1:end], crc32c(a[1:n])) == crc_256
end
@test crc32c(IOBuffer(a)) == crc_256
let buf = IOBuffer()
write(buf, a[1:3])
@test crc32c(seekstart(buf)) == crc32c(a[1:3])
@test crc32c(buf) == 0x00000000
@test crc32c(seek(buf, 1)) == crc32c(a[2:3])
@test crc32c(seek(buf, 0), 2) == crc32c(a[1:2])
@test crc32c(buf) == crc32c(a[3:3])
end

let f = tempname()
try
write(f, a)
@test open(crc32c, f) == crc_256
open(f, "r") do io
@test crc32c(io, 16) == crc32c(a[1:16])
@test crc32c(io, 16) == crc32c(a[17:32])
@test crc32c(io) == crc32c(a[33:end])
@test crc32c(io, 1000) == 0x00000000
end
a = rand(UInt8, 30000)
write(f, a)
@test open(crc32c, f) == crc32c(a) == open(io -> crc32c(io, 10^6), f)
finally
rm(f, force=true)
end
end
end
unsafe_crc32c_sw(a, n, crc) =
ccall(:jl_crc32c_sw, UInt32, (UInt32, Ptr{UInt8}, Csize_t), crc, a, n)
crc32c_sw(a::Union{Array{UInt8},Base.FastContiguousSubArray{UInt8,N,<:Array{UInt8}} where N},
crc::UInt32=0x00000000) = unsafe_crc32c_sw(a, length(a), crc)
crc32c_sw(s::String, crc::UInt32=0x00000000) = unsafe_crc32c_sw(s, sizeof(s), crc)
function crc32c_sw(io::IO, nb::Integer, crc::UInt32=0x00000000)
nb < 0 && throw(ArgumentError("number of bytes to checksum must be ≥ 0"))
buf = Array{UInt8}(min(nb, 24576))
while !eof(io) && nb > 24576
n = readbytes!(io, buf)
crc = unsafe_crc32c_sw(buf, n, crc)
nb -= n
end
return unsafe_crc32c_sw(buf, readbytes!(io, buf, min(nb, length(buf))), crc)
end
crc32c_sw(io::IO, crc::UInt32=0x00000000) = crc32c_sw(io, typemax(Int64), crc)
test_crc32c(crc32c)
test_crc32c(crc32c_sw)

60 changes: 0 additions & 60 deletions test/misc.jl
Original file line number Diff line number Diff line change
Expand Up @@ -585,66 +585,6 @@ if Sys.iswindows()
end
end

function test_crc32c(crc32c)
# CRC32c checksum (test data generated from @andrewcooke's CRC.jl package)
for (n,crc) in [(0,0x00000000),(1,0xa016d052),(2,0x03f89f52),(3,0xf130f21e),(4,0x29308cf4),(5,0x53518fab),(6,0x4f4dfbab),(7,0xbd3a64dc),(8,0x46891f81),(9,0x5a14b9f9),(10,0xb219db69),(11,0xd232a91f),(12,0x51a15563),(13,0x9f92de41),(14,0x4d8ae017),(15,0xc8b74611),(16,0xa0de6714),(17,0x672c992a),(18,0xe8206eb6),(19,0xc52fd285),(20,0x327b0397),(21,0x318263dd),(22,0x08485ccd),(23,0xea44d29e),(24,0xf6c0cb13),(25,0x3969bba2),(26,0x6a8810ec),(27,0x75b3d0df),(28,0x82d535b1),(29,0xbdf7fc12),(30,0x1f836b7d),(31,0xd29f33af),(32,0x8e4acb3e),(33,0x1cbee2d1),(34,0xb25f7132),(35,0xb0fa484c),(36,0xb9d262b4),(37,0x3207fe27),(38,0xa024d7ac),(39,0x49a2e7c5),(40,0x0e2c157f),(41,0x25f7427f),(42,0x368c6adc),(43,0x75efd4a5),(44,0xa84c5c31),(45,0x0fc817b2),(46,0x8d99a881),(47,0x5cc3c078),(48,0x9983d5e2),(49,0x9267c2db),(50,0xc96d4745),(51,0x058d8df3),(52,0x453f9cf3),(53,0xb714ade1),(54,0x55d3c2bc),(55,0x495710d0),(56,0x3bddf494),(57,0x4f2577d0),(58,0xdae0f604),(59,0x3c57c632),(60,0xfe39bbb0),(61,0x6f5d1d41),(62,0x7d996665),(63,0x68c738dc),(64,0x8dfea7ae)]
@test crc32c(UInt8[1:n;]) == crc == crc32c(String(UInt8[1:n;]))
end

# test that crc parameter is equivalent to checksum of concatenated data,
# and test crc of subarrays:
a = UInt8[1:255;]
crc_256 = crc32c(a)
@views for n = 1:255
@test crc32c(a[n+1:end], crc32c(a[1:n])) == crc_256
end
@test crc32c(IOBuffer(a)) == crc_256
let buf = IOBuffer()
write(buf, a[1:3])
@test crc32c(seekstart(buf)) == crc32c(a[1:3])
@test crc32c(buf) == 0x00000000
@test crc32c(seek(buf, 1)) == crc32c(a[2:3])
@test crc32c(seek(buf, 0), 2) == crc32c(a[1:2])
@test crc32c(buf) == crc32c(a[3:3])
end

let f = tempname()
try
write(f, a)
@test open(crc32c, f) == crc_256
open(f, "r") do io
@test crc32c(io, 16) == crc32c(a[1:16])
@test crc32c(io, 16) == crc32c(a[17:32])
@test crc32c(io) == crc32c(a[33:end])
@test crc32c(io, 1000) == 0x00000000
end
a = rand(UInt8, 30000)
write(f, a)
@test open(crc32c, f) == crc32c(a) == open(io -> crc32c(io, 10^6), f)
finally
rm(f, force=true)
end
end
end
unsafe_crc32c_sw(a, n, crc) =
ccall(:jl_crc32c_sw, UInt32, (UInt32, Ptr{UInt8}, Csize_t), crc, a, n)
crc32c_sw(a::Union{Array{UInt8},Base.FastContiguousSubArray{UInt8,N,<:Array{UInt8}} where N},
crc::UInt32=0x00000000) = unsafe_crc32c_sw(a, length(a), crc)
crc32c_sw(s::String, crc::UInt32=0x00000000) = unsafe_crc32c_sw(s, sizeof(s), crc)
function crc32c_sw(io::IO, nb::Integer, crc::UInt32=0x00000000)
nb < 0 && throw(ArgumentError("number of bytes to checksum must be ≥ 0"))
buf = Array{UInt8}(min(nb, 24576))
while !eof(io) && nb > 24576
n = readbytes!(io, buf)
crc = unsafe_crc32c_sw(buf, n, crc)
nb -= n
end
return unsafe_crc32c_sw(buf, readbytes!(io, buf, min(nb, length(buf))), crc)
end
crc32c_sw(io::IO, crc::UInt32=0x00000000) = crc32c_sw(io, typemax(Int64), crc)
test_crc32c(crc32c)
test_crc32c(crc32c_sw)

let
old_have_color = Base.have_color
try
Expand Down

0 comments on commit 8e96fbc

Please sign in to comment.