Skip to content

Commit 7cf6cf4

Browse files
authored
[fix](index compaction)Skip writing terms with a doc frequency of 0 (#248)
1 parent 9882657 commit 7cf6cf4

File tree

1 file changed

+11
-0
lines changed

1 file changed

+11
-0
lines changed

src/core/CLucene/index/IndexWriter.cpp

+11
Original file line numberDiff line numberDiff line change
@@ -1831,6 +1831,17 @@ void IndexWriter::mergeTerms(bool hasProx, IndexVersion indexVersion) {
18311831
}
18321832

18331833
for (int i = 0; i < numDestIndexes; ++i) {
1834+
if (dfs[i] == 0) {
1835+
if (infoStream != nullptr) {
1836+
std::string name = lucene_wcstoutf8string(smallestTerm->text(), smallestTerm->textLength());
1837+
std::string field = lucene_wcstoutf8string(smallestTerm->field(), wcslen(smallestTerm->field()));
1838+
std::stringstream ss;
1839+
ss << "term: " << name << ", field: " << field << ", doc frequency is zero[" << dfs[i] << "], skip it." << "\n";
1840+
message(ss.str());
1841+
}
1842+
// if doc frequency is 0, it means the term is deleted. So we should not write it.
1843+
continue;
1844+
}
18341845
DefaultSkipListWriter *skipListWriter = skipListWriterList[i];
18351846
CL_NS(store)::IndexOutput *freqOutput = freqOutputList[i];
18361847
CL_NS(store)::IndexOutput *proxOutput = proxOutputList[i];

0 commit comments

Comments
 (0)