Skip to content

Commit dd8bcb2

Browse files
committed
fix
1 parent dd08cae commit dd8bcb2

File tree

4 files changed

+312
-0
lines changed

4 files changed

+312
-0
lines changed

src/test/CMakeLists.txt

+1
Original file line numberDiff line numberDiff line change
@@ -86,6 +86,7 @@ SET(test_files ./tests.cpp
8686
./search/spans/TestSpanExplanationsOfNonMatches.cpp
8787
./search/spans/TestSpanExplanationsOfNonMatches.h
8888
./index/TestIndexCompaction.cpp
89+
./index/TestIndexCompress.cpp
8990
./index/TestIndexModifier.cpp
9091
./index/TestIndexWriter.cpp
9192
./index/TestIndexModifier.cpp

src/test/index/TestIndexCompress.cpp

+309
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,309 @@
1+
#include <CLucene.h> // IWYU pragma: keep
2+
#include <CLucene/index/IndexReader.h>
3+
#include <CLucene/search/query/TermPositionIterator.h>
4+
#include <CLucene/util/stringUtil.h>
5+
6+
#include <ctime>
7+
#include <exception>
8+
#include <stdexcept>
9+
#include <string>
10+
#include <vector>
11+
12+
#include "CLucene/analysis/Analyzers.h"
13+
#include "CLucene/index/IndexVersion.h"
14+
#include "CLucene/index/Term.h"
15+
#include "CLucene/store/FSDirectory.h"
16+
#include "test.h"
17+
18+
CL_NS_USE(search)
19+
CL_NS_USE(store)
20+
CL_NS_USE(index)
21+
CL_NS_USE(util)
22+
23+
static constexpr int32_t doc_count = 10000;
24+
25+
#define FINALLY(eptr, finallyBlock) \
26+
{ \
27+
finallyBlock; \
28+
if (eptr) { \
29+
std::rethrow_exception(eptr); \
30+
} \
31+
}
32+
33+
int32_t getDaySeed() {
34+
std::time_t now = std::time(nullptr);
35+
std::tm* localTime = std::localtime(&now);
36+
localTime->tm_sec = 0;
37+
localTime->tm_min = 0;
38+
localTime->tm_hour = 0;
39+
return static_cast<int32_t>(std::mktime(localTime) / (60 * 60 * 24));
40+
}
41+
42+
static std::string generateRandomIP() {
43+
std::string ip_v4;
44+
ip_v4.append(std::to_string(rand() % 256));
45+
ip_v4.append(".");
46+
ip_v4.append(std::to_string(rand() % 256));
47+
ip_v4.append(".");
48+
ip_v4.append(std::to_string(rand() % 256));
49+
ip_v4.append(".");
50+
ip_v4.append(std::to_string(rand() % 256));
51+
return ip_v4;
52+
}
53+
54+
static void write_index(const std::string& name, RAMDirectory* dir, IndexVersion index_version,
55+
const std::vector<std::string>& datas) {
56+
auto* analyzer = _CLNEW lucene::analysis::SimpleAnalyzer<char>;
57+
analyzer->set_stopwords(nullptr);
58+
auto* indexwriter = _CLNEW lucene::index::IndexWriter(dir, analyzer, true);
59+
indexwriter->setRAMBufferSizeMB(512);
60+
indexwriter->setMaxBufferedDocs(-1);
61+
indexwriter->setMaxFieldLength(0x7FFFFFFFL);
62+
indexwriter->setMergeFactor(1000000000);
63+
indexwriter->setUseCompoundFile(false);
64+
65+
auto* char_string_reader = _CLNEW lucene::util::SStringReader<char>;
66+
67+
auto* doc = _CLNEW lucene::document::Document();
68+
int32_t field_config = lucene::document::Field::STORE_NO;
69+
field_config |= lucene::document::Field::INDEX_NONORMS;
70+
field_config |= lucene::document::Field::INDEX_TOKENIZED;
71+
auto field_name = std::wstring(name.begin(), name.end());
72+
auto* field = _CLNEW lucene::document::Field(field_name.c_str(), field_config);
73+
field->setOmitTermFreqAndPositions(false);
74+
field->setIndexVersion(index_version);
75+
doc->add(*field);
76+
77+
for (const auto& data : datas) {
78+
char_string_reader->init(data.data(), data.size(), false);
79+
auto* stream = analyzer->reusableTokenStream(field->name(), char_string_reader);
80+
field->setValue(stream);
81+
indexwriter->addDocument(doc);
82+
}
83+
84+
indexwriter->close();
85+
86+
_CLLDELETE(indexwriter);
87+
_CLLDELETE(doc);
88+
_CLLDELETE(analyzer);
89+
_CLLDELETE(char_string_reader);
90+
}
91+
92+
static void read_index(RAMDirectory* dir, int32_t doc_count) {
93+
auto* reader = IndexReader::open(dir);
94+
95+
std::exception_ptr eptr;
96+
try {
97+
if (doc_count != reader->numDocs()) {
98+
std::string msg = "doc_count: " + std::to_string(doc_count) +
99+
", numDocs: " + std::to_string(reader->numDocs());
100+
_CLTHROWA(CL_ERR_IllegalArgument, msg.c_str());
101+
}
102+
103+
Term* term = nullptr;
104+
TermEnum* enumerator = nullptr;
105+
try {
106+
enumerator = reader->terms();
107+
while (enumerator->next()) {
108+
term = enumerator->term();
109+
110+
auto* term_pos = reader->termPositions(term);
111+
112+
std::exception_ptr eptr;
113+
try {
114+
TermPositionIterator iter(term_pos);
115+
int32_t doc = 0;
116+
while ((doc = iter.nextDoc()) != INT32_MAX) {
117+
for (int32_t i = 0; i < iter.freq(); i++) {
118+
int32_t pos = iter.nextPosition();
119+
if (pos < 0 || pos > 3) {
120+
std::string msg = "pos: " + std::to_string(pos);
121+
_CLTHROWA(CL_ERR_IllegalArgument, msg.c_str());
122+
}
123+
}
124+
}
125+
} catch (...) {
126+
eptr = std::current_exception();
127+
}
128+
FINALLY(eptr, { _CLDELETE(term_pos); })
129+
130+
_CLDECDELETE(term);
131+
}
132+
}
133+
_CLFINALLY({
134+
_CLDECDELETE(term);
135+
enumerator->close();
136+
_CLDELETE(enumerator);
137+
})
138+
139+
} catch (...) {
140+
eptr = std::current_exception();
141+
}
142+
FINALLY(eptr, {
143+
reader->close();
144+
_CLLDELETE(reader);
145+
})
146+
}
147+
148+
static void index_compaction(RAMDirectory* tmp_dir, std::vector<lucene::store::Directory*> srcDirs,
149+
std::vector<lucene::store::Directory*> destDirs, int32_t count) {
150+
auto* analyzer = _CLNEW lucene::analysis::SimpleAnalyzer<char>;
151+
auto* indexwriter = _CLNEW lucene::index::IndexWriter(tmp_dir, analyzer, true);
152+
153+
std::vector<std::vector<std::pair<uint32_t, uint32_t>>> trans_vec(
154+
srcDirs.size(), std::vector<std::pair<uint32_t, uint32_t>>(count));
155+
int32_t idx = 0;
156+
int32_t id = 0;
157+
for (int32_t i = 0; i < count; i++) {
158+
for (int32_t j = 0; j < srcDirs.size(); j++) {
159+
if (id == count * destDirs.size()) {
160+
idx++;
161+
id = 0;
162+
}
163+
trans_vec[j][i] = std::make_pair(idx, id++);
164+
}
165+
}
166+
167+
std::vector<uint32_t> dest_index_docs(destDirs.size());
168+
for (int32_t i = 0; i < destDirs.size(); i++) {
169+
dest_index_docs[i] = count * destDirs.size();
170+
}
171+
172+
std::exception_ptr eptr;
173+
try {
174+
indexwriter->indexCompaction(srcDirs, destDirs, trans_vec, dest_index_docs);
175+
} catch (...) {
176+
eptr = std::current_exception();
177+
}
178+
FINALLY(eptr, {
179+
indexwriter->close();
180+
_CLDELETE(indexwriter);
181+
_CLDELETE(analyzer);
182+
})
183+
}
184+
185+
void TestIndexCompressV2(CuTest* tc) {
186+
std::srand(getDaySeed());
187+
188+
std::string name = "v2_field_name";
189+
std::vector<std::string> datas;
190+
for (int32_t i = 0; i < doc_count; i++) {
191+
std::string ip_v4 = generateRandomIP();
192+
datas.emplace_back(ip_v4);
193+
}
194+
195+
RAMDirectory dir;
196+
write_index(name, &dir, IndexVersion::kV2, datas);
197+
198+
try {
199+
read_index(&dir, doc_count);
200+
} catch (...) {
201+
assertTrue(false);
202+
}
203+
204+
std::cout << "\nTestIndexCompressV2 sucess" << std::endl;
205+
}
206+
207+
void TestIndexCompactionV2(CuTest* tc) {
208+
std::srand(getDaySeed());
209+
std::string name = "field_name";
210+
211+
// index v2
212+
RAMDirectory in_dir;
213+
{
214+
std::vector<std::string> datas;
215+
for (int32_t i = 0; i < doc_count; i++) {
216+
std::string ip_v4 = generateRandomIP();
217+
datas.emplace_back(ip_v4);
218+
}
219+
write_index(name, &in_dir, IndexVersion::kV2, datas);
220+
}
221+
222+
// index compaction v3
223+
RAMDirectory outdir1;
224+
RAMDirectory outdir2;
225+
RAMDirectory outdir3;
226+
{
227+
std::vector<lucene::store::Directory*> srcDirs;
228+
srcDirs.push_back(&in_dir);
229+
srcDirs.push_back(&in_dir);
230+
srcDirs.push_back(&in_dir);
231+
srcDirs.push_back(&in_dir);
232+
srcDirs.push_back(&in_dir);
233+
srcDirs.push_back(&in_dir);
234+
srcDirs.push_back(&in_dir);
235+
srcDirs.push_back(&in_dir);
236+
srcDirs.push_back(&in_dir);
237+
std::vector<lucene::store::Directory*> destDirs;
238+
destDirs.push_back(&outdir1);
239+
destDirs.push_back(&outdir2);
240+
destDirs.push_back(&outdir3);
241+
242+
try {
243+
RAMDirectory empty_dir;
244+
index_compaction(&empty_dir, srcDirs, destDirs, doc_count);
245+
} catch (...) {
246+
assertTrue(false);
247+
}
248+
}
249+
250+
std::cout << "TestIndexCompactionV2 sucess" << std::endl;
251+
}
252+
253+
void TestIndexCompactionException(CuTest* tc) {
254+
std::srand(getDaySeed());
255+
std::string name = "field_name";
256+
257+
// index v1
258+
RAMDirectory in_dir_v1;
259+
{
260+
std::vector<std::string> datas;
261+
for (int32_t i = 0; i < 10; i++) {
262+
std::string ip_v4 = generateRandomIP();
263+
datas.emplace_back(ip_v4);
264+
}
265+
write_index(name, &in_dir_v1, IndexVersion::kV1, datas);
266+
}
267+
268+
// index v2
269+
RAMDirectory in_dir_v2;
270+
{
271+
std::vector<std::string> datas;
272+
for (int32_t i = 0; i < 10; i++) {
273+
std::string ip_v4 = generateRandomIP();
274+
datas.emplace_back(ip_v4);
275+
}
276+
write_index(name, &in_dir_v2, IndexVersion::kV2, datas);
277+
}
278+
279+
// index compaction exception 1
280+
RAMDirectory out_dir;
281+
{
282+
std::vector<lucene::store::Directory*> srcDirs;
283+
srcDirs.push_back(&in_dir_v1);
284+
srcDirs.push_back(&in_dir_v2);
285+
std::vector<lucene::store::Directory*> destDirs;
286+
destDirs.push_back(&out_dir);
287+
288+
bool flag = false;
289+
try {
290+
RAMDirectory empty_dir;
291+
index_compaction(&empty_dir, srcDirs, destDirs, 10);
292+
} catch (...) {
293+
flag = true;
294+
}
295+
assertTrue(flag);
296+
}
297+
298+
std::cout << "TestIndexCompactionException sucess" << std::endl;
299+
}
300+
301+
CuSuite* testIndexCompress() {
302+
CuSuite* suite = CuSuiteNew(_T("CLucene Index Compress Test"));
303+
304+
SUITE_ADD_TEST(suite, TestIndexCompressV2);
305+
SUITE_ADD_TEST(suite, TestIndexCompactionV2);
306+
SUITE_ADD_TEST(suite, TestIndexCompactionException);
307+
308+
return suite;
309+
}

src/test/test.h

+1
Original file line numberDiff line numberDiff line change
@@ -85,6 +85,7 @@ CuSuite *testSearchRange(void);
8585
CuSuite *testMultiPhraseQuery(void);
8686
CuSuite *testIndexCompaction(void);
8787
CuSuite *testStringReader(void);
88+
CuSuite *testIndexCompress(void);
8889

8990
#ifdef TEST_CONTRIB_LIBS
9091
//CuSuite *testGermanAnalyzer(void);

src/test/tests.cpp

+1
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@ unittest tests[] = {
1919
{"MultiPhraseQuery", testMultiPhraseQuery},
2020
{"IndexCompaction", testIndexCompaction},
2121
{"testStringReader", testStringReader},
22+
{"IndexCompress", testIndexCompress},
2223
#ifdef TEST_CONTRIB_LIBS
2324
{"chinese", testchinese},
2425
#endif

0 commit comments

Comments
 (0)