From e10a0afcedc1daaa371eb189ca22869d69de7265 Mon Sep 17 00:00:00 2001 From: Ed Summers Date: Wed, 10 Jan 2024 19:20:30 -0500 Subject: [PATCH] Column headers only as headers This fixes a bug where column headers were being repeated for each batch in the CSV output. --- marctable/utils.py | 1 + pyproject.toml | 2 +- test_marctable.py | 2 +- 3 files changed, 3 insertions(+), 2 deletions(-) diff --git a/marctable/utils.py b/marctable/utils.py index 42a15c9..28d5d7d 100644 --- a/marctable/utils.py +++ b/marctable/utils.py @@ -31,6 +31,7 @@ def to_csv( first_batch = True for df in dataframe_iter(marc_input, rules=rules, batch=batch): df.to_csv(csv_output, header=first_batch, index=False) + first_batch = False def to_jsonl( diff --git a/pyproject.toml b/pyproject.toml index b6eb0fe..95974f6 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "marctable" -version = "0.3.1" +version = "0.3.2" description = "Convert MARC to CSV and Parquet" authors = ["Ed Summers "] license = "Apache" diff --git a/test_marctable.py b/test_marctable.py index 016222a..51da3ef 100644 --- a/test_marctable.py +++ b/test_marctable.py @@ -133,7 +133,7 @@ def test_to_csv() -> None: open("test-data/utf8.marc", "rb"), open("test-data/utf8.csv", "w"), batch=1000 ) df = pandas.read_csv("test-data/utf8.csv") - assert len(df) == 10622 + assert len(df) == 10612 assert len(df.columns) == 215 assert ( df.iloc[0]["F245"]