Skip to content

Commit

Permalink
Merge pull request #13 from whatsthecraic/non-text-bytes-in-the-header
Browse files Browse the repository at this point in the history
Non text bytes in the header
  • Loading branch information
Drvi authored Nov 22, 2023
2 parents a7837ec + 1bde0d6 commit 40498f3
Show file tree
Hide file tree
Showing 2 changed files with 24 additions and 2 deletions.
18 changes: 16 additions & 2 deletions src/init_parsing.jl
Original file line number Diff line number Diff line change
Expand Up @@ -97,7 +97,14 @@ function process_header_and_schema_and_finish_row_skip!(
elseif !Parsers.ok(code)
throw(HeaderParsingError("Error parsing header for column $i at $(lines_skipped_total+1):$(pos) (row:pos)."))
else
push!(parsing_ctx.header, Symbol(strip(Parsers.getstring(row_bytes, val, options.e))))
identifier_s = strip(Parsers.getstring(row_bytes, val, options.e))
try
push!(parsing_ctx.header, Symbol(identifier_s))
catch
# defensively truncate identifier_s to 2k characters in case something is very cursed
throw(HeaderParsingError("Error parsing header for column $i ('$(first(identifier_s, 2000))') at " *
"$(lines_skipped_total+1):$pos (row:pos): presence of invalid non text bytes in the CSV snippet"))
end
end
pos += tlen
end
Expand Down Expand Up @@ -166,7 +173,14 @@ function process_header_and_schema_and_finish_row_skip!(
elseif !Parsers.ok(code)
throw(HeaderParsingError("Error parsing header for column $i at $(lines_skipped_total+1):$pos (row:pos)."))
else
push!(parsing_ctx.header, Symbol(strip(Parsers.getstring(row_bytes, val, options.e))))
identifier_s = strip(Parsers.getstring(row_bytes, val, options.e))
try
push!(parsing_ctx.header, Symbol(identifier_s))
catch
# defensively truncate identifier_s to 2k characters in case something is very cursed
throw(HeaderParsingError("Error parsing header for column $i ('$(first(identifier_s, 2000))') at " *
"$(lines_skipped_total+1):$pos (row:pos): presence of invalid non text bytes in the CSV snippet"))
end
end
pos += tlen
i += 1
Expand Down
8 changes: 8 additions & 0 deletions test/exception_handling.jl
Original file line number Diff line number Diff line change
Expand Up @@ -228,6 +228,14 @@ end
header=true,
)

@test_throws ChunkedCSV.HeaderParsingError("Error parsing header for column 1 ('a\0') at 1:1 (row:pos): presence of invalid non text bytes in the CSV snippet") parse_file(IOBuffer("""
a\0,b
1,2
"""),
[Int,Int],
header=true,
)

@test_throws ArgumentError("Provided header and schema names don't match. In schema, not in header: [:q]. In header, not in schema: [:a, :b, :c]") parse_file(IOBuffer("""
a,b,c
1,2,3
Expand Down

5 comments on commit 40498f3

@Drvi
Copy link
Member Author

@Drvi Drvi commented on 40498f3 Nov 25, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@Drvi
Copy link
Member Author

@Drvi Drvi commented on 40498f3 Nov 27, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@JuliaRegistrator
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Error while trying to register: Register Failed
@Drvi, it looks like you are not a publicly listed member/owner in the parent organization (RelationalAI).
If you are a member/owner, you will need to change your membership to public. See GitHub Help

@Drvi
Copy link
Member Author

@Drvi Drvi commented on 40498f3 Nov 27, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@JuliaRegistrator
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Registration pull request created: JuliaRegistries/General/96030

Tip: Release Notes

Did you know you can add release notes too? Just add markdown formatted text underneath the comment after the text
"Release notes:" and it will be added to the registry PR, and if TagBot is installed it will also be added to the
release that TagBot creates. i.e.

@JuliaRegistrator register

Release notes:

## Breaking changes

- blah

To add them here just re-invoke and the PR will be updated.

Tagging

After the above pull request is merged, it is recommended that a tag is created on this repository for the registered package version.

This will be done automatically if the Julia TagBot GitHub Action is installed, or can be done manually through the github interface, or via:

git tag -a v0.1.0 -m "<description of version>" 40498f337ecb1da27d7b4c5b23f43bcbb8a8a273
git push origin v0.1.0

Please sign in to comment.