Skip to content

Commit

Permalink
Column headers only as headers
Browse files Browse the repository at this point in the history
This fixes a bug where column headers were being repeated for each batch
in the CSV output.
  • Loading branch information
edsu committed Jan 11, 2024
1 parent 40a000f commit e10a0af
Show file tree
Hide file tree
Showing 3 changed files with 3 additions and 2 deletions.
1 change: 1 addition & 0 deletions marctable/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@ def to_csv(
first_batch = True
for df in dataframe_iter(marc_input, rules=rules, batch=batch):
df.to_csv(csv_output, header=first_batch, index=False)
first_batch = False


def to_jsonl(
Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[tool.poetry]
name = "marctable"
version = "0.3.1"
version = "0.3.2"
description = "Convert MARC to CSV and Parquet"
authors = ["Ed Summers <ehs@pobox.com>"]
license = "Apache"
Expand Down
2 changes: 1 addition & 1 deletion test_marctable.py
Original file line number Diff line number Diff line change
Expand Up @@ -133,7 +133,7 @@ def test_to_csv() -> None:
open("test-data/utf8.marc", "rb"), open("test-data/utf8.csv", "w"), batch=1000
)
df = pandas.read_csv("test-data/utf8.csv")
assert len(df) == 10622
assert len(df) == 10612
assert len(df.columns) == 215
assert (
df.iloc[0]["F245"]
Expand Down

0 comments on commit e10a0af

Please sign in to comment.