Skip to content

Commit

Permalink
Handle empty content by treating contentLength of 0 as empty data
Browse files Browse the repository at this point in the history
  • Loading branch information
marevol committed Mar 14, 2024
1 parent a7ac5ab commit cbcf9c5
Showing 1 changed file with 10 additions and 5 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -23,8 +23,8 @@
import java.util.Map;

import org.apache.commons.io.output.DeferredFileOutputStream;
import org.apache.poi.util.StringUtil;
import org.codelibs.core.io.CopyUtil;
import org.codelibs.core.lang.StringUtil;
import org.codelibs.fess.crawler.container.CrawlerContainer;
import org.codelibs.fess.crawler.entity.ExtractData;
import org.codelibs.fess.crawler.exception.CrawlingAccessException;
Expand Down Expand Up @@ -119,10 +119,15 @@ public ExtractData extract() {
if (contentLength > maxContentLength) {
throw new MaxLengthExceededException(
"The content length (" + contentLength + " byte) is over " + maxContentLength + " byte.");
}

try (InputStream is = getContentInputStream(out)) {
return extractor.getText(is, params);
} else if (contentLength == 0) {
if (logger.isDebugEnabled()) {
logger.debug("The content length is 0.");
}
return new ExtractData(StringUtil.EMPTY);
} else {
try (InputStream is = getContentInputStream(out)) {
return extractor.getText(is, params);
}
}
} catch (final CrawlingAccessException e) {
throw e;
Expand Down

0 comments on commit cbcf9c5

Please sign in to comment.