-
-
Notifications
You must be signed in to change notification settings - Fork 5.5k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
3 changed files
with
129 additions
and
122 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -2,103 +2,6 @@ | |
|
||
const invalid_dlm = char(0xfffffffe) | ||
|
||
function dlm_readrow(io::IO, dlm, eol::Char) | ||
row_string = readuntil(io, eol) | ||
while length(row_string)==1 && row_string[1] == eol | ||
row_string = readuntil(io, eol) | ||
end | ||
if dlm == invalid_dlm | ||
row = split(row_string) | ||
else | ||
row = split(row_string, dlm, true) | ||
end | ||
if endswith(row[end], eol) | ||
row[end] = chop(row[end]) | ||
end | ||
row | ||
end | ||
|
||
# all strings | ||
function readdlm(a, io, dlm, nr, nc, row, eol) | ||
for i=1:nr | ||
a[i,:] = row | ||
if i < nr | ||
row = dlm_readrow(io, dlm, eol) | ||
end | ||
end | ||
a | ||
end | ||
|
||
# all numeric, with NaN for invalid data | ||
function readdlm{T<:Number}(a::Array{T}, io, dlm, nr, nc, row, eol) | ||
tmp = Array(Float64,1) | ||
for i=1:nr | ||
for j=1:nc | ||
if T <: Char | ||
if length(row[j]) != 1 | ||
error("file entry \"$(row[j])\" is not a Char") | ||
end | ||
a[i,j] = row[j][1] | ||
elseif float64_isvalid(row[j], tmp) | ||
a[i,j] = tmp[1] | ||
else | ||
if !(T <: FloatingPoint) | ||
error("file entry \"$(row[j])\" cannot be converted to $T") | ||
end | ||
a[i,j] = NaN | ||
end | ||
end | ||
if i < nr | ||
row = dlm_readrow(io, dlm, eol) | ||
end | ||
end | ||
end | ||
|
||
# float64 or string | ||
readdlm(a::Array{Any}, io, dlm, nr, nc, row, eol) = | ||
readdlm(a, io, dlm, nr, nc, row, eol, 1, 1) | ||
function readdlm(a::Array{Any}, io, dlm, nr, nc, row, eol, i0, j0) | ||
tmp = Array(Float64,1) | ||
j = j0 | ||
for i=i0:nr | ||
while j <= nc | ||
el = row[j] | ||
if float64_isvalid(el, tmp) | ||
a[i,j] = tmp[1] | ||
else | ||
a[i,j] = el | ||
end | ||
j += 1 | ||
end | ||
j = 1 | ||
if i < nr | ||
row = dlm_readrow(io, dlm, eol) | ||
end | ||
end | ||
a | ||
end | ||
|
||
# float64 or cell depending on data | ||
function readdlm_auto(a, io, dlm, nr, nc, row, eol) | ||
tmp = Array(Float64, 1) | ||
for i=1:nr | ||
for j=1:nc | ||
el = row[j] | ||
if !float64_isvalid(el, tmp) | ||
a = convert(Array{Any,2}, a) | ||
readdlm(a, io, dlm, nr, nc, row, eol, i, j) | ||
return a | ||
else | ||
a[i,j] = tmp[1] | ||
end | ||
end | ||
if i < nr | ||
row = dlm_readrow(io, dlm, eol) | ||
end | ||
end | ||
a | ||
end | ||
|
||
countlines(nameorfile) = countlines(nameorfile, '\n') | ||
function countlines(filename::String, eol::Char) | ||
open(filename) do io | ||
|
@@ -129,38 +32,108 @@ function countlines(io::IO, eol::Char) | |
nl | ||
end | ||
|
||
function readdlm_setup(fname::String, dlm, eol) | ||
if length(dlm) == 0 | ||
error("readdlm: no separator characters specified") | ||
end | ||
nr = countlines(fname,eol) | ||
io = open(fname) | ||
row = dlm_readrow(io, dlm, eol) | ||
nc = length(row) | ||
return (io, nr, nc, row) | ||
readdlm(input, T::Type) = readdlm(input, invalid_dlm, T, '\n') | ||
readdlm(input, dlm::Char, T::Type) = readdlm(input, dlm, T, '\n') | ||
|
||
readdlm(input) = readdlm(input, invalid_dlm, '\n') | ||
readdlm(input, dlm::Char) = readdlm(input, dlm, '\n') | ||
|
||
readdlm(input, dlm::Char, eol::Char) = readdlm_auto(input, dlm, Float64, eol, true) | ||
readdlm(input, dlm::Char, T::Type, eol::Char) = readdlm_auto(input, dlm, T, eol, false) | ||
function readdlm_auto(input, dlm::Char, T::Type, eol::Char, auto::Bool=false) | ||
sbuff = readall(input) | ||
nrows,ncols = dlm_dims(sbuff, eol, dlm) | ||
offsets = zeros(Int, nrows, ncols) | ||
cells = Array(T, nrows, ncols) | ||
dlm_offsets(sbuff, dlm, eol, offsets) | ||
dlm_fill(cells, offsets, sbuff, auto) | ||
end | ||
|
||
readdlm(fname::String, T::Type) = readdlm(fname, invalid_dlm, T, '\n') | ||
function dlm_col_begin(ncols::Int, offsets::Array{Int,2}, row::Int, col::Int) | ||
(row == 1) && (col == 1) && return 1 | ||
pp_row = (1 == col) ? (row-1) : row | ||
pp_col = (1 == col) ? ncols : (col-1) | ||
|
||
ret = offsets[pp_row, pp_col] | ||
(ret == 0) ? dlm_col_begin(csv, pp_row, pp_col) : (ret+2) | ||
This comment has been minimized.
Sorry, something went wrong.
This comment has been minimized.
Sorry, something went wrong.
tanmaykm
Author
Member
|
||
end | ||
|
||
readdlm(fname::String, dlm, T::Type) = readdlm(fname, dlm, T, '\n') | ||
function dlm_fill{T}(cells::Array{T,2}, offsets::Array{Int,2}, sbuff::String, auto::Bool) | ||
maxrow,maxcol = size(cells) | ||
tmp64 = Array(Float64,1) | ||
for row in 1:maxrow | ||
for col in 1:maxcol | ||
start_pos = dlm_col_begin(maxcol, offsets, row, col) | ||
end_pos = offsets[row,col] | ||
sval = SubString(sbuff, start_pos, end_pos) | ||
|
||
function readdlm(fname::String, dlm, T::Type, eol::Char) | ||
(io, nr, nc, row) = readdlm_setup(fname, dlm, eol) | ||
a = Array(T, nr, nc) | ||
readdlm(a, io, dlm, nr, nc, row, eol) | ||
close(io) | ||
return a | ||
if T <: Char | ||
(length(sval) != 1) && error("file entry \"$(sval)\" is not a Char") | ||
cells[row,col] = sval | ||
elseif T <: Number | ||
if(float64_isvalid(sval, tmp64)) | ||
cells[row,col] = tmp64[1] | ||
elseif auto | ||
return dlm_fill(Array(Any,maxrow,maxcol), offsets, sbuff, false) | ||
else | ||
cells[row,col] = NaN | ||
end | ||
elseif T <: String | ||
cells[row,col] = sval | ||
elseif T == Any | ||
cells[row,col] = float64_isvalid(sval, tmp64) ? tmp64[1] : sval | ||
else | ||
error("file entry \"$(sval)\" cannot be converted to $T") | ||
end | ||
end | ||
end | ||
cells | ||
end | ||
|
||
readdlm(fname::String) = readdlm(fname, invalid_dlm, '\n') | ||
readdlm(fname::String, dlm) = readdlm(fname, dlm, '\n') | ||
|
||
function readdlm(fname::String, dlm, eol::Char) | ||
(io, nr, nc, row) = readdlm_setup(fname, dlm, eol) | ||
a = Array(Float64, nr, nc) | ||
a = readdlm_auto(a, io, dlm, nr, nc, row, eol) | ||
close(io) | ||
return a | ||
function dlm_offsets(sbuff::UTF8String, dlm, eol, offsets::Array{Int,2}) | ||
col = 0 | ||
row = 1 | ||
maxrow,maxcol = size(offsets) | ||
idx = 1 | ||
while(idx < length(sbuff.data)) | ||
val,idx = next(sbuff, idx) | ||
(val != eol) && ((dlm == invalid_dlm) ? !contains(_default_delims, val) : (val != dlm)) && continue | ||
col += 1 | ||
offsets[row,col] = idx-2 | ||
(row >= maxrow) && (col == maxcol) && break | ||
(val == eol) && (row += 1; col = 0) | ||
end | ||
end | ||
function dlm_offsets(sbuff::ASCIIString, dlmc, eolc, offsets::Array{Int,2}) | ||
dbuff = sbuff.data | ||
dlm = uint8(dlmc) | ||
eol = uint8(eolc) | ||
col = 0 | ||
row = 1 | ||
maxrow,maxcol = size(offsets) | ||
for idx in 1:length(dbuff) | ||
val = dbuff[idx] | ||
(val != eol) && ((dlm == invalid_dlm) ? !contains(_default_delims, val) : (val != dlm)) && continue | ||
col += 1 | ||
offsets[row,col] = idx-1 | ||
(row >= maxrow) && (col == maxcol) && break | ||
(val == eol) && (row += 1; col = 0) | ||
end | ||
end | ||
|
||
dlm_dims(s::ASCIIString, eol, dlm) = dlm_dims(s.data, uint8(eol), uint8(dlm)) | ||
function dlm_dims(dbuff, eol, dlm) | ||
ncols = nrows = col = 0 | ||
for val in dbuff | ||
(val != eol) && ((dlm == invalid_dlm) ? !contains(_default_delims, val) : (val != dlm)) && continue | ||
col += 1 | ||
(val == eol) && (nrows += 1; ncols = max(ncols, col); col = 0) | ||
end | ||
(col > 0) && (nrow += 1) | ||
ncols = max(ncols, col, 1) | ||
nrows = max(nrows, 1) | ||
return (nrows, ncols) | ||
end | ||
|
||
readcsv(io) = readdlm(io, ',') | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
@tanmaykm, Where does the
csv
variable come from here? Is this ever called? I don't quite get this conditional. I'm trying to dig into this new code for a port to DataFrames. Thanks!