Skip to content

Commit

Permalink
Fixed a bug in CsvReader where lines were mistakenly treated as empty…
Browse files Browse the repository at this point in the history
… and skipped when skipEmptyLines was set (default)
  • Loading branch information
osiegmar committed Sep 23, 2024
1 parent 7824099 commit 567fc19
Show file tree
Hide file tree
Showing 8 changed files with 96 additions and 44 deletions.
10 changes: 8 additions & 2 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,12 +1,17 @@
# Changelog

All notable changes to this project will be documented in this file.

The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/)
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/),
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).

## [Unreleased]
- Nothing yet

## [3.3.1] - 2024-09-23
### Fixed
- Fixed a bug in CsvReader where lines were mistakenly treated as empty and skipped when skipEmptyLines was set (default). These affected lines made up solely of field separators, solely empty quoted fields, or fields rendered empty after applying optional field modifiers.

## [3.3.0] - 2024-09-19
### Added
- Implement `Flushable` interface for `CsvWriter` to allow flushing the underlying writer
Expand Down Expand Up @@ -126,7 +131,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0

- Initial release

[Unreleased]: https://github.com/osiegmar/FastCSV/compare/v3.3.0...main
[Unreleased]: https://github.com/osiegmar/FastCSV/compare/v3.3.1...main
[3.3.0]: https://github.com/osiegmar/FastCSV/compare/v3.3.0...v3.3.1
[3.3.0]: https://github.com/osiegmar/FastCSV/compare/v3.2.0...v3.3.0
[3.2.0]: https://github.com/osiegmar/FastCSV/compare/v3.1.0...v3.2.0
[3.1.0]: https://github.com/osiegmar/FastCSV/compare/v3.0.0...v3.1.0
Expand Down
2 changes: 1 addition & 1 deletion lib/build.gradle.kts
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ plugins {
}

group = "de.siegmar"
version = "3.3.0"
version = "3.3.1"

project.base.archivesName = "fastcsv"

Expand Down
28 changes: 0 additions & 28 deletions lib/src/intTest/java/blackbox/reader/CsvReaderTest.java
Original file line number Diff line number Diff line change
Expand Up @@ -95,34 +95,6 @@ void readerToString() {
+ "ignoreDifferentFieldCount=true]");
}

// skipped record

@Test
void singleRecordNoSkipEmpty() {
crb.skipEmptyLines(false);
assertThat(crb.ofCsvRecord("").iterator()).isExhausted();
}

@Test
void multipleRecordsNoSkipEmpty() {
crb.skipEmptyLines(false);

assertThat(crb.ofCsvRecord("\n\na").iterator()).toIterable()
.satisfiesExactly(
item1 -> CsvRecordAssert.assertThat(item1).isStartingLineNumber(1).fields().containsExactly(""),
item2 -> CsvRecordAssert.assertThat(item2).isStartingLineNumber(2).fields().containsExactly(""),
item3 -> CsvRecordAssert.assertThat(item3).isStartingLineNumber(3).fields().containsExactly("a"));
}

@Test
void skippedRecords() {
assertThat(crb.ofCsvRecord("\n\nfoo\n\nbar\n\n").stream())
.satisfiesExactly(
item1 -> CsvRecordAssert.assertThat(item1).isStartingLineNumber(3).fields().containsExactly("foo"),
item2 -> CsvRecordAssert.assertThat(item2).isStartingLineNumber(5).fields().containsExactly("bar")
);
}

// different field count

@ParameterizedTest
Expand Down
74 changes: 74 additions & 0 deletions lib/src/intTest/java/blackbox/reader/SkipRecordsTest.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,74 @@
package blackbox.reader;

import static org.assertj.core.api.Assertions.assertThat;

import java.util.ArrayList;
import java.util.List;

import org.junit.jupiter.api.Test;
import org.junit.jupiter.params.ParameterizedTest;
import org.junit.jupiter.params.provider.ValueSource;

import de.siegmar.fastcsv.reader.AbstractBaseCsvCallbackHandler;
import de.siegmar.fastcsv.reader.CsvReader;
import de.siegmar.fastcsv.reader.CsvRecordHandler;
import de.siegmar.fastcsv.reader.FieldModifiers;
import de.siegmar.fastcsv.reader.RecordWrapper;
import testutil.CsvRecordAssert;

class SkipRecordsTest {

private final CsvReader.CsvReaderBuilder crb = CsvReader.builder();

@Test
void singleRecordNoSkipEmpty() {
crb.skipEmptyLines(false);
assertThat(crb.ofCsvRecord("").iterator()).isExhausted();
}

@Test
void multipleRecordsNoSkipEmpty() {
crb.skipEmptyLines(false);

assertThat(crb.ofCsvRecord("\n\na").iterator()).toIterable()
.satisfiesExactly(
item1 -> CsvRecordAssert.assertThat(item1).isStartingLineNumber(1).fields().containsExactly(""),
item2 -> CsvRecordAssert.assertThat(item2).isStartingLineNumber(2).fields().containsExactly(""),
item3 -> CsvRecordAssert.assertThat(item3).isStartingLineNumber(3).fields().containsExactly("a"));
}

@ParameterizedTest
@ValueSource(strings = {",\nfoo\n", ",,\nfoo\n", "''\nfoo\n", "' '\nfoo\n"})
void notEmpty(final String input) {
crb.quoteCharacter('\'');
final var cbh = new CsvRecordHandler(FieldModifiers.TRIM);
assertThat(crb.build(cbh, input).stream()).hasSize(2);
}

@ParameterizedTest
@ValueSource(strings = {",\nfoo\n", ",,\nfoo\n", "''\nfoo\n", "' '\nfoo\n"})
void notEmptyCustomCallback(final String input) {
crb.quoteCharacter('\'');
final AbstractBaseCsvCallbackHandler<String[]> cbh = new AbstractBaseCsvCallbackHandler<>() {
private final List<String> fields = new ArrayList<>();

@Override
protected void handleBegin(final long startingLineNumber) {
fields.clear();
}

@Override
protected void handleField(final int fieldIdx, final char[] buf,
final int offset, final int len, final boolean quoted) {
fields.add(new String(buf, offset, len).trim());
}

@Override
protected RecordWrapper<String[]> buildRecord() {
return wrapRecord(fields.toArray(new String[0]));
}
};
assertThat(crb.build(cbh, input).stream()).hasSize(2);
}

}
Original file line number Diff line number Diff line change
Expand Up @@ -93,7 +93,7 @@ protected void handleBegin(final long startingLineNumber) {
*/
@Override
protected final void addField(final char[] buf, final int offset, final int len, final boolean quoted) {
emptyLine = emptyLine && len == 0;
emptyLine = emptyLine && fieldCount == 0 && len == 0 && !quoted;
handleField(fieldCount++, buf, offset, len, quoted);
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,11 @@ abstract class AbstractCsvCallbackHandler<T> extends CsvCallbackHandler<T> {
*/
protected boolean comment;

/**
* Whether the line is empty.
*/
protected boolean emptyLine;

/**
* Constructs a new instance with an initial fields array of size {@value #INITIAL_FIELDS_SIZE}.
*/
Expand Down Expand Up @@ -81,6 +86,7 @@ protected void beginRecord(final long startingLineNumber) {
fieldIdx = 0;
recordSize = 0;
comment = false;
emptyLine = true;
}

/**
Expand All @@ -95,6 +101,7 @@ protected void addField(final char[] buf, final int offset, final int len, final
if (recordSize + len > Limits.MAX_RECORD_SIZE) {
throw new CsvParseException(maxRecordSizeExceededMessage(startingLineNumber));
}
emptyLine = emptyLine && fieldIdx == 0 && len == 0 && !quoted;
addField(modifyField(materializeField(buf, offset, len), quoted), quoted);
}

Expand Down Expand Up @@ -175,8 +182,9 @@ protected void setComment(final char[] buf, final int offset, final int len) {
* @throws CsvParseException if the addition exceeds the limit of record size.
*/
protected void setComment(final String value) {
recordSize += value.length();
comment = true;
emptyLine = false;
recordSize += value.length();
fields[fieldIdx++] = value;
}

Expand Down Expand Up @@ -234,7 +242,7 @@ protected String[] compactFields() {
* @throws NullPointerException if {@code rec} is {@code null}
*/
protected RecordWrapper<T> buildWrapper(final T rec) {
return new RecordWrapper<>(comment, recordSize == 0, fieldIdx, rec);
return new RecordWrapper<>(comment, emptyLine, fieldIdx, rec);
}

}
9 changes: 2 additions & 7 deletions lib/src/main/java/de/siegmar/fastcsv/reader/CsvReader.java
Original file line number Diff line number Diff line change
Expand Up @@ -380,13 +380,8 @@ public CsvReaderBuilder commentCharacter(final char commentCharacter) {
/**
* Defines whether empty lines should be skipped when reading data.
* <p>
* The default implementation interprets empty lines as lines that do not contain any data.
* This includes lines that consist only of opening and closing quote characters.
* <p>
* A line that only contains whitespace characters is not considered empty.
* However, the determination of empty lines is done after field modifiers have been applied.
* If you use a field trimming modifier (like {@link FieldModifiers#TRIM}), lines that only contain whitespaces
* are considered empty.
* The default implementation interprets empty lines as lines that do not contain any data
* (no whitespace, no quotes, nothing).
* <p>
* Commented lines are not considered empty lines. Use {@link #commentStrategy(CommentStrategy)} for handling
* commented lines.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,6 @@
/**
* Implementations of this class are used within {@link CsvCallbackHandler} implementations to modify the fields of
* a CSV record before storing them in the resulting object.
* <p>
* Applying field modifiers might affect the behavior of skipping empty lines – see
* {@link CsvReader.CsvReaderBuilder#skipEmptyLines(boolean)}.
*
* @see FieldModifiers
* @see SimpleFieldModifier
Expand Down

0 comments on commit 567fc19

Please sign in to comment.