From 2749564424200ae93355099062ba842ffa05349e Mon Sep 17 00:00:00 2001 From: Veloman Yunkan Date: Tue, 5 Mar 2024 17:57:22 +0400 Subject: [PATCH 1/7] Made entries in test library point to "different" files Two entries in test library.xml used to point to the same file path. Note that though the third entry pointed to a different file name it is a symbolic link to the same file. Now all three entries point to pseudo-different files (having the same content behind them). This change is needed so that tests don't break when detection of conflicting book names is made stricter. --- test/data/library.xml | 8 ++--- test/data/zimfile_raycharles.zim | 1 + .../data/zimfile_raycharles_uncategorized.zim | 1 + test/library_server.cpp | 30 +++++++++---------- test/meson.build | 2 ++ 5 files changed, 23 insertions(+), 19 deletions(-) create mode 120000 test/data/zimfile_raycharles.zim create mode 120000 test/data/zimfile_raycharles_uncategorized.zim diff --git a/test/data/library.xml b/test/data/library.xml index 7a2fcdf9c..1c1fe4886 100644 --- a/test/data/library.xml +++ b/test/data/library.xml @@ -1,8 +1,8 @@ \n ", \ CONTENT_NAME, \ - "zimfile", \ + "zimfile_raycharles", \ "569344"\ ) -#define RAY_CHARLES_CATALOG_ENTRY _RAY_CHARLES_CATALOG_ENTRY("zimfile") +#define RAY_CHARLES_CATALOG_ENTRY _RAY_CHARLES_CATALOG_ENTRY("zimfile_raycharles") #define RAY_CHARLES_CATALOG_ENTRY_NO_MAPPER _RAY_CHARLES_CATALOG_ENTRY("raycharles") #define UNCATEGORIZED_RAY_CHARLES_CATALOG_ENTRY CATALOG_ENTRY(\ @@ -145,8 +145,8 @@ std::string maskVariableOPDSFeedData(std::string s) "",\ "public_tag_with_a_value:value_of_a_public_tag;_private_tag_with_a_value:value_of_a_private_tag;wikipedia;_pictures:no;_videos:no;_details:no",\ "",\ - "zimfile", \ - "zimfile", \ + "zimfile_raycharles_uncategorized", \ + "zimfile_raycharles_uncategorized", \ "125952"\ ) @@ -1110,10 +1110,10 @@ TEST_F(LibraryServerTest, no_name_mapper_catalog_v2_individual_entry_access) " \n" \ @@ -1130,10 +1130,10 @@ TEST_F(LibraryServerTest, no_name_mapper_catalog_v2_individual_entry_access) " \n" \ @@ -1224,16 +1224,16 @@ TEST_F(LibraryServerTest, no_name_mapper_catalog_v2_individual_entry_access) "
\n" \ " Download links for Ray (uncategorized) Charles\n" \ "
\n" \ - " \n" \ + " \n" \ "
Direct
\n" \ "
\n" \ - " \n" \ + " \n" \ "
Sha256 hash
\n" \ "
\n" \ - " \n" \ + " \n" \ "
Magnet link
\n" \ "
\n" \ - " \n" \ + " \n" \ "
Torrent file
\n" \ "
\n" \ "\n" \ @@ -1273,7 +1273,7 @@ TEST_F(LibraryServerTest, noJS) { FINAL_HTML_TEXT); // no_js_download - r = zfs1_->GET("/ROOT%23%3F/nojs/download/zimfile"); + r = zfs1_->GET("/ROOT%23%3F/nojs/download/zimfile_raycharles_uncategorized"); EXPECT_EQ(r->status, 200); EXPECT_EQ(r->body, RAY_CHARLES_UNCTZ_DOWNLOAD); } diff --git a/test/meson.build b/test/meson.build index 78446f473..aa55e1d5f 100644 --- a/test/meson.build +++ b/test/meson.build @@ -38,6 +38,8 @@ if gtest_dep.found() and not meson.is_cross_build() 'example.zim', 'zimfile.zim', 'zimfile&other.zim', + 'zimfile_raycharles.zim', + 'zimfile_raycharles_uncategorized.zim', 'corner_cases#&.zim', 'poor.zim', 'library.xml', From 5e669cd65cab52cf3efe3243d952f2f3cdc68a2d Mon Sep 17 00:00:00 2001 From: Veloman Yunkan Date: Tue, 5 Mar 2024 18:18:54 +0400 Subject: [PATCH 2/7] Deduplicated test output data --- test/name_mapper.cpp | 20 ++++++++------------ 1 file changed, 8 insertions(+), 12 deletions(-) diff --git a/test/name_mapper.cpp b/test/name_mapper.cpp index 4396356e8..b427ec5e6 100644 --- a/test/name_mapper.cpp +++ b/test/name_mapper.cpp @@ -55,6 +55,12 @@ class CapturedStderr operator std::string() const { return buffer.str(); } }; + +const std::string ZERO_FOUR_NAME_CONFLICT_MSG = + "Path collision: /data/zero_four_2021-10.zim and" + " /data/zero_four_2021-11.zim can't share the same URL path 'zero_four'." + " Therefore, only /data/zero_four_2021-10.zim will be served.\n"; + } // unnamed namespace void checkUnaliasedEntriesInNameMapper(const kiwix::NameMapper& nm) @@ -91,12 +97,7 @@ TEST_F(NameMapperTest, HumanReadableNameMapperWithAliases) { CapturedStderr stderror; kiwix::HumanReadableNameMapper nm(*lib, true); - EXPECT_EQ( - "Path collision: /data/zero_four_2021-10.zim and" - " /data/zero_four_2021-11.zim can't share the same URL path 'zero_four'." - " Therefore, only /data/zero_four_2021-10.zim will be served.\n" - , std::string(stderror) - ); + EXPECT_EQ(ZERO_FOUR_NAME_CONFLICT_MSG, std::string(stderror)); checkUnaliasedEntriesInNameMapper(nm); EXPECT_EQ("04-2021-10", nm.getIdForName("zero_four")); @@ -127,12 +128,7 @@ TEST_F(NameMapperTest, UpdatableNameMapperWithAliases) { CapturedStderr stderror; kiwix::UpdatableNameMapper nm(lib, true); - EXPECT_EQ( - "Path collision: /data/zero_four_2021-10.zim and" - " /data/zero_four_2021-11.zim can't share the same URL path 'zero_four'." - " Therefore, only /data/zero_four_2021-10.zim will be served.\n" - , std::string(stderror) - ); + EXPECT_EQ(ZERO_FOUR_NAME_CONFLICT_MSG, std::string(stderror)); checkUnaliasedEntriesInNameMapper(nm); EXPECT_EQ("04-2021-10", nm.getIdForName("zero_four")); From 4e64d26edeedb5290e258cd10504d81767573c48 Mon Sep 17 00:00:00 2001 From: Veloman Yunkan Date: Tue, 5 Mar 2024 18:21:37 +0400 Subject: [PATCH 3/7] Added more name conflicts to NameMapper unit test The extended setup of the NameMapper unit test demonstrates (by the fact that this change doesn't break the tests that check the stderr) that certain naming conflicts escape NameMapper's attention. --- test/name_mapper.cpp | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/test/name_mapper.cpp b/test/name_mapper.cpp index b427ec5e6..d972bfbae 100644 --- a/test/name_mapper.cpp +++ b/test/name_mapper.cpp @@ -14,6 +14,12 @@ const char libraryXML[] = R"( + + + + + +
)"; @@ -70,12 +76,30 @@ void checkUnaliasedEntriesInNameMapper(const kiwix::NameMapper& nm) EXPECT_EQ("zero_three", nm.getNameForId("03")); EXPECT_EQ("zero_four_2021-10", nm.getNameForId("04-2021-10")); EXPECT_EQ("zero_four_2021-11", nm.getNameForId("04-2021-11")); + EXPECT_EQ("zero_five-a", nm.getNameForId("05-a")); + EXPECT_EQ("zero_five-b", nm.getNameForId("05-b")); + + // unreported conflict + EXPECT_EQ("zero_plus_six", nm.getNameForId("06+")); + EXPECT_EQ("zero_plus_six", nm.getNameForId("06plus")); + + // unreported conflict + EXPECT_EQ("zero_seven", nm.getNameForId("07-super")); + EXPECT_EQ("zero_seven", nm.getNameForId("07-sub")); EXPECT_EQ("01", nm.getIdForName("zero_one")); EXPECT_EQ("02", nm.getIdForName("zero_two")); EXPECT_EQ("03", nm.getIdForName("zero_three")); EXPECT_EQ("04-2021-10", nm.getIdForName("zero_four_2021-10")); EXPECT_EQ("04-2021-11", nm.getIdForName("zero_four_2021-11")); + + // book name doesn't participate in name mapping + EXPECT_THROW(nm.getIdForName("zero_five"), std::out_of_range); + EXPECT_EQ("05-a", nm.getIdForName("zero_five-a")); + EXPECT_EQ("05-b", nm.getIdForName("zero_five-b")); + + EXPECT_EQ("06plus", nm.getIdForName("zero_plus_six")); + EXPECT_EQ("07-super", nm.getIdForName("zero_seven")); } TEST_F(NameMapperTest, HumanReadableNameMapperWithoutAliases) From 5b9daf0d9d656e84c13b415fbea9347b4fa79900 Mon Sep 17 00:00:00 2001 From: Veloman Yunkan Date: Mon, 4 Mar 2024 17:30:13 +0400 Subject: [PATCH 4/7] Extracted HumanReadableNameMapper::mapName() No cleanup of the new function was performed to keep the diff minimal. --- include/name_mapper.h | 3 +++ src/name_mapper.cpp | 8 +++++++- 2 files changed, 10 insertions(+), 1 deletion(-) diff --git a/include/name_mapper.h b/include/name_mapper.h index 63333515d..28706350e 100644 --- a/include/name_mapper.h +++ b/include/name_mapper.h @@ -54,6 +54,9 @@ class HumanReadableNameMapper : public NameMapper { virtual ~HumanReadableNameMapper() = default; virtual std::string getNameForId(const std::string& id) const; virtual std::string getIdForName(const std::string& name) const; + + private: + void mapName(const kiwix::Library& lib, std::string name, std::string id); }; class UpdatableNameMapper : public NameMapper { diff --git a/src/name_mapper.cpp b/src/name_mapper.cpp index bf2a2e4dd..56844cc04 100644 --- a/src/name_mapper.cpp +++ b/src/name_mapper.cpp @@ -38,9 +38,16 @@ HumanReadableNameMapper::HumanReadableNameMapper(kiwix::Library& library, bool w if (aliasName == bookName) { continue; } + + mapName(library, aliasName, bookId); + } +} + +void HumanReadableNameMapper::mapName(const Library& library, std::string aliasName, std::string bookId) { if (m_nameToId.find(aliasName) == m_nameToId.end()) { m_nameToId[aliasName] = bookId; } else { + const auto& currentBook = library.getBookById(bookId); auto alreadyPresentPath = library.getBookById(m_nameToId[aliasName]).getPath(); std::cerr << "Path collision: " << alreadyPresentPath << " and " << currentBook.getPath() @@ -48,7 +55,6 @@ HumanReadableNameMapper::HumanReadableNameMapper(kiwix::Library& library, bool w << " Therefore, only " << alreadyPresentPath << " will be served." << std::endl; } - } } std::string HumanReadableNameMapper::getNameForId(const std::string& id) const { From 181893d31a7c4057557f420154dd95930f62ce77 Mon Sep 17 00:00:00 2001 From: Veloman Yunkan Date: Mon, 4 Mar 2024 17:33:21 +0400 Subject: [PATCH 5/7] Cleanup after previous change - Got rid of the continue statement - Renamed the function parameter - Fixed indentation --- src/name_mapper.cpp | 30 ++++++++++++++---------------- 1 file changed, 14 insertions(+), 16 deletions(-) diff --git a/src/name_mapper.cpp b/src/name_mapper.cpp index 56844cc04..88daed5b0 100644 --- a/src/name_mapper.cpp +++ b/src/name_mapper.cpp @@ -35,26 +35,24 @@ HumanReadableNameMapper::HumanReadableNameMapper(kiwix::Library& library, bool w continue; auto aliasName = replaceRegex(bookName, "", "_[[:digit:]]{4}-[[:digit:]]{2}$"); - if (aliasName == bookName) { - continue; + if (aliasName != bookName) { + mapName(library, aliasName, bookId); } - - mapName(library, aliasName, bookId); } } -void HumanReadableNameMapper::mapName(const Library& library, std::string aliasName, std::string bookId) { - if (m_nameToId.find(aliasName) == m_nameToId.end()) { - m_nameToId[aliasName] = bookId; - } else { - const auto& currentBook = library.getBookById(bookId); - auto alreadyPresentPath = library.getBookById(m_nameToId[aliasName]).getPath(); - std::cerr << "Path collision: " << alreadyPresentPath - << " and " << currentBook.getPath() - << " can't share the same URL path '" << aliasName << "'." - << " Therefore, only " << alreadyPresentPath - << " will be served." << std::endl; - } +void HumanReadableNameMapper::mapName(const Library& library, std::string name, std::string bookId) { + if (m_nameToId.find(name) == m_nameToId.end()) { + m_nameToId[name] = bookId; + } else { + const auto& currentBook = library.getBookById(bookId); + auto alreadyPresentPath = library.getBookById(m_nameToId[name]).getPath(); + std::cerr << "Path collision: " << alreadyPresentPath + << " and " << currentBook.getPath() + << " can't share the same URL path '" << name << "'." + << " Therefore, only " << alreadyPresentPath + << " will be served." << std::endl; + } } std::string HumanReadableNameMapper::getNameForId(const std::string& id) const { From 0168764f4c02d2ea9c8fbb70a5780dd1daf9db64 Mon Sep 17 00:00:00 2001 From: Veloman Yunkan Date: Tue, 5 Mar 2024 19:20:24 +0400 Subject: [PATCH 6/7] NameMapper detects all naming conflicts Also this change leads to the change in the mapping (since conflicts that previously went undetected and just overwrote the existing entry are now rejected). --- src/name_mapper.cpp | 2 +- test/name_mapper.cpp | 33 ++++++++++++++++++++++++++------- 2 files changed, 27 insertions(+), 8 deletions(-) diff --git a/src/name_mapper.cpp b/src/name_mapper.cpp index 88daed5b0..645c18147 100644 --- a/src/name_mapper.cpp +++ b/src/name_mapper.cpp @@ -29,7 +29,7 @@ HumanReadableNameMapper::HumanReadableNameMapper(kiwix::Library& library, bool w auto& currentBook = library.getBookById(bookId); auto bookName = currentBook.getHumanReadableIdFromPath(); m_idToName[bookId] = bookName; - m_nameToId[bookName] = bookId; + mapName(library, bookName, bookId); if (!withAlias) continue; diff --git a/test/name_mapper.cpp b/test/name_mapper.cpp index d972bfbae..6956ff8bd 100644 --- a/test/name_mapper.cpp +++ b/test/name_mapper.cpp @@ -67,6 +67,25 @@ const std::string ZERO_FOUR_NAME_CONFLICT_MSG = " /data/zero_four_2021-11.zim can't share the same URL path 'zero_four'." " Therefore, only /data/zero_four_2021-10.zim will be served.\n"; +const std::string ZERO_SIX_NAME_CONFLICT_MSG = + "Path collision: /data/zërô + SIX.zim and " + "/data/zero_plus_six.zim can't share the same URL path 'zero_plus_six'." + " Therefore, only /data/zërô + SIX.zim will be served.\n"; + +const std::string ZERO_SEVEN_NAME_CONFLICT_MSG = + "Path collision: /data/subdir/zero_seven.zim and" + " /data/zero_seven.zim can't share the same URL path 'zero_seven'." + " Therefore, only /data/subdir/zero_seven.zim will be served.\n"; + +// Name conflicts in the default mode (without the --nodatealiases is off +const std::string DEFAULT_NAME_CONFLICTS = ZERO_SIX_NAME_CONFLICT_MSG + + ZERO_SEVEN_NAME_CONFLICT_MSG; + +// Name conflicts in --nodatealiases mode +const std::string ALL_NAME_CONFLICTS = ZERO_FOUR_NAME_CONFLICT_MSG + + ZERO_SIX_NAME_CONFLICT_MSG + + ZERO_SEVEN_NAME_CONFLICT_MSG; + } // unnamed namespace void checkUnaliasedEntriesInNameMapper(const kiwix::NameMapper& nm) @@ -98,15 +117,15 @@ void checkUnaliasedEntriesInNameMapper(const kiwix::NameMapper& nm) EXPECT_EQ("05-a", nm.getIdForName("zero_five-a")); EXPECT_EQ("05-b", nm.getIdForName("zero_five-b")); - EXPECT_EQ("06plus", nm.getIdForName("zero_plus_six")); - EXPECT_EQ("07-super", nm.getIdForName("zero_seven")); + EXPECT_EQ("06+", nm.getIdForName("zero_plus_six")); + EXPECT_EQ("07-sub", nm.getIdForName("zero_seven")); } TEST_F(NameMapperTest, HumanReadableNameMapperWithoutAliases) { CapturedStderr stderror; kiwix::HumanReadableNameMapper nm(*lib, false); - EXPECT_EQ("", std::string(stderror)); + EXPECT_EQ(DEFAULT_NAME_CONFLICTS, std::string(stderror)); checkUnaliasedEntriesInNameMapper(nm); EXPECT_THROW(nm.getIdForName("zero_four"), std::out_of_range); @@ -121,7 +140,7 @@ TEST_F(NameMapperTest, HumanReadableNameMapperWithAliases) { CapturedStderr stderror; kiwix::HumanReadableNameMapper nm(*lib, true); - EXPECT_EQ(ZERO_FOUR_NAME_CONFLICT_MSG, std::string(stderror)); + EXPECT_EQ(ALL_NAME_CONFLICTS, std::string(stderror)); checkUnaliasedEntriesInNameMapper(nm); EXPECT_EQ("04-2021-10", nm.getIdForName("zero_four")); @@ -136,7 +155,7 @@ TEST_F(NameMapperTest, UpdatableNameMapperWithoutAliases) { CapturedStderr stderror; kiwix::UpdatableNameMapper nm(lib, false); - EXPECT_EQ("", std::string(stderror)); + EXPECT_EQ(DEFAULT_NAME_CONFLICTS, std::string(stderror)); checkUnaliasedEntriesInNameMapper(nm); EXPECT_THROW(nm.getIdForName("zero_four"), std::out_of_range); @@ -152,7 +171,7 @@ TEST_F(NameMapperTest, UpdatableNameMapperWithAliases) { CapturedStderr stderror; kiwix::UpdatableNameMapper nm(lib, true); - EXPECT_EQ(ZERO_FOUR_NAME_CONFLICT_MSG, std::string(stderror)); + EXPECT_EQ(ALL_NAME_CONFLICTS, std::string(stderror)); checkUnaliasedEntriesInNameMapper(nm); EXPECT_EQ("04-2021-10", nm.getIdForName("zero_four")); @@ -161,7 +180,7 @@ TEST_F(NameMapperTest, UpdatableNameMapperWithAliases) CapturedStderr nmUpdateStderror; lib->removeBookById("04-2021-10"); nm.update(); - EXPECT_EQ("", std::string(nmUpdateStderror)); + EXPECT_EQ(DEFAULT_NAME_CONFLICTS, std::string(nmUpdateStderror)); } EXPECT_EQ("04-2021-11", nm.getIdForName("zero_four")); EXPECT_THROW(nm.getNameForId("04-2021-10"), std::out_of_range); From 068555de38749c01b021a2d760ed7843c0704d3f Mon Sep 17 00:00:00 2001 From: Veloman Yunkan Date: Tue, 5 Mar 2024 19:40:34 +0400 Subject: [PATCH 7/7] Paths in the error are put in single quotes --- src/name_mapper.cpp | 10 +++++----- test/name_mapper.cpp | 18 +++++++++--------- 2 files changed, 14 insertions(+), 14 deletions(-) diff --git a/src/name_mapper.cpp b/src/name_mapper.cpp index 645c18147..3b3f789d7 100644 --- a/src/name_mapper.cpp +++ b/src/name_mapper.cpp @@ -47,11 +47,11 @@ void HumanReadableNameMapper::mapName(const Library& library, std::string name, } else { const auto& currentBook = library.getBookById(bookId); auto alreadyPresentPath = library.getBookById(m_nameToId[name]).getPath(); - std::cerr << "Path collision: " << alreadyPresentPath - << " and " << currentBook.getPath() - << " can't share the same URL path '" << name << "'." - << " Therefore, only " << alreadyPresentPath - << " will be served." << std::endl; + std::cerr << "Path collision: '" << alreadyPresentPath + << "' and '" << currentBook.getPath() + << "' can't share the same URL path '" << name << "'." + << " Therefore, only '" << alreadyPresentPath + << "' will be served." << std::endl; } } diff --git a/test/name_mapper.cpp b/test/name_mapper.cpp index 6956ff8bd..74fecf481 100644 --- a/test/name_mapper.cpp +++ b/test/name_mapper.cpp @@ -63,19 +63,19 @@ class CapturedStderr const std::string ZERO_FOUR_NAME_CONFLICT_MSG = - "Path collision: /data/zero_four_2021-10.zim and" - " /data/zero_four_2021-11.zim can't share the same URL path 'zero_four'." - " Therefore, only /data/zero_four_2021-10.zim will be served.\n"; + "Path collision: '/data/zero_four_2021-10.zim' and" + " '/data/zero_four_2021-11.zim' can't share the same URL path 'zero_four'." + " Therefore, only '/data/zero_four_2021-10.zim' will be served.\n"; const std::string ZERO_SIX_NAME_CONFLICT_MSG = - "Path collision: /data/zërô + SIX.zim and " - "/data/zero_plus_six.zim can't share the same URL path 'zero_plus_six'." - " Therefore, only /data/zërô + SIX.zim will be served.\n"; + "Path collision: '/data/zërô + SIX.zim' and " + "'/data/zero_plus_six.zim' can't share the same URL path 'zero_plus_six'." + " Therefore, only '/data/zërô + SIX.zim' will be served.\n"; const std::string ZERO_SEVEN_NAME_CONFLICT_MSG = - "Path collision: /data/subdir/zero_seven.zim and" - " /data/zero_seven.zim can't share the same URL path 'zero_seven'." - " Therefore, only /data/subdir/zero_seven.zim will be served.\n"; + "Path collision: '/data/subdir/zero_seven.zim' and" + " '/data/zero_seven.zim' can't share the same URL path 'zero_seven'." + " Therefore, only '/data/subdir/zero_seven.zim' will be served.\n"; // Name conflicts in the default mode (without the --nodatealiases is off const std::string DEFAULT_NAME_CONFLICTS = ZERO_SIX_NAME_CONFLICT_MSG