Skip to content

Commit

Permalink
Merge pull request #1067 from kiwix/stricter_namemapper
Browse files Browse the repository at this point in the history
  • Loading branch information
mgautierfr authored Mar 6, 2024
2 parents ddde6db + 068555d commit a8368b3
Show file tree
Hide file tree
Showing 8 changed files with 97 additions and 47 deletions.
3 changes: 3 additions & 0 deletions include/name_mapper.h
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,9 @@ class HumanReadableNameMapper : public NameMapper {
virtual ~HumanReadableNameMapper() = default;
virtual std::string getNameForId(const std::string& id) const;
virtual std::string getIdForName(const std::string& name) const;

private:
void mapName(const kiwix::Library& lib, std::string name, std::string id);
};

class UpdatableNameMapper : public NameMapper {
Expand Down
30 changes: 17 additions & 13 deletions src/name_mapper.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -29,28 +29,32 @@ HumanReadableNameMapper::HumanReadableNameMapper(kiwix::Library& library, bool w
auto& currentBook = library.getBookById(bookId);
auto bookName = currentBook.getHumanReadableIdFromPath();
m_idToName[bookId] = bookName;
m_nameToId[bookName] = bookId;
mapName(library, bookName, bookId);

if (!withAlias)
continue;

auto aliasName = replaceRegex(bookName, "", "_[[:digit:]]{4}-[[:digit:]]{2}$");
if (aliasName == bookName) {
continue;
}
if (m_nameToId.find(aliasName) == m_nameToId.end()) {
m_nameToId[aliasName] = bookId;
} else {
auto alreadyPresentPath = library.getBookById(m_nameToId[aliasName]).getPath();
std::cerr << "Path collision: " << alreadyPresentPath
<< " and " << currentBook.getPath()
<< " can't share the same URL path '" << aliasName << "'."
<< " Therefore, only " << alreadyPresentPath
<< " will be served." << std::endl;
if (aliasName != bookName) {
mapName(library, aliasName, bookId);
}
}
}

void HumanReadableNameMapper::mapName(const Library& library, std::string name, std::string bookId) {
if (m_nameToId.find(name) == m_nameToId.end()) {
m_nameToId[name] = bookId;
} else {
const auto& currentBook = library.getBookById(bookId);
auto alreadyPresentPath = library.getBookById(m_nameToId[name]).getPath();
std::cerr << "Path collision: '" << alreadyPresentPath
<< "' and '" << currentBook.getPath()
<< "' can't share the same URL path '" << name << "'."
<< " Therefore, only '" << alreadyPresentPath
<< "' will be served." << std::endl;
}
}

std::string HumanReadableNameMapper::getNameForId(const std::string& id) const {
return m_idToName.at(id);
}
Expand Down
8 changes: 4 additions & 4 deletions test/data/library.xml
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
<library version="1.0">
<book
id="raycharles"
path="./zimfile.zim"
url="https://github.com/kiwix/libkiwix/raw/master/test/data/zimfile.zim"
path="./zimfile_raycharles.zim"
url="https://github.com/kiwix/libkiwix/raw/master/test/data/zimfile_raycharles.zim"
title="Ray Charles"
description="Wikipedia articles about Ray Charles"
language="eng"
Expand All @@ -19,8 +19,8 @@
></book>
<book
id="raycharles_uncategorized"
path="./zimfile.zim"
url="https://github.com/kiwix/libkiwix/raw/master/test/data/zimfile.zim"
path="./zimfile_raycharles_uncategorized.zim"
url="https://github.com/kiwix/libkiwix/raw/master/test/data/zimfile_raycharles_uncategorized.zim"
title="Ray (uncategorized) Charles"
description="No category is assigned to this library entry."
language="rus,eng"
Expand Down
1 change: 1 addition & 0 deletions test/data/zimfile_raycharles.zim
1 change: 1 addition & 0 deletions test/data/zimfile_raycharles_uncategorized.zim
30 changes: 15 additions & 15 deletions test/library_server.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -129,11 +129,11 @@ std::string maskVariableOPDSFeedData(std::string s)
" href=\"/ROOT%23%3F/catalog/v2/illustration/raycharles/?size=48\"\n" \
" type=\"image/png;width=48;height=48;scale=1\"/>\n ", \
CONTENT_NAME, \
"zimfile", \
"zimfile_raycharles", \
"569344"\
)

#define RAY_CHARLES_CATALOG_ENTRY _RAY_CHARLES_CATALOG_ENTRY("zimfile")
#define RAY_CHARLES_CATALOG_ENTRY _RAY_CHARLES_CATALOG_ENTRY("zimfile_raycharles")
#define RAY_CHARLES_CATALOG_ENTRY_NO_MAPPER _RAY_CHARLES_CATALOG_ENTRY("raycharles")

#define UNCATEGORIZED_RAY_CHARLES_CATALOG_ENTRY CATALOG_ENTRY(\
Expand All @@ -145,8 +145,8 @@ std::string maskVariableOPDSFeedData(std::string s)
"",\
"public_tag_with_a_value:value_of_a_public_tag;_private_tag_with_a_value:value_of_a_private_tag;wikipedia;_pictures:no;_videos:no;_details:no",\
"",\
"zimfile", \
"zimfile", \
"zimfile_raycharles_uncategorized", \
"zimfile_raycharles_uncategorized", \
"125952"\
)

Expand Down Expand Up @@ -1110,10 +1110,10 @@ TEST_F(LibraryServerTest, no_name_mapper_catalog_v2_individual_entry_access)
" <div class=\"book__link__wrapper\">\n" \
" <div class=\"book__icon\" style=background-image:url(/ROOT%23%3F/catalog/v2/illustration/raycharles/?size=48)></div>\n" \
" <div class=\"book__header\">\n" \
" <div id=\"book__title\"><a href=\"/ROOT%23%3F/content/zimfile\">Ray Charles</a></div>\n" \
" <div class=\"book__download\"><span><a href=\"/ROOT%23%3F/nojs/download/zimfile\">Download</a></span></div>\n" \
" <div id=\"book__title\"><a href=\"/ROOT%23%3F/content/zimfile_raycharles\">Ray Charles</a></div>\n" \
" <div class=\"book__download\"><span><a href=\"/ROOT%23%3F/nojs/download/zimfile_raycharles\">Download</a></span></div>\n" \
" </div>\n" \
" <a class=\"book__link\" href=\"/ROOT%23%3F/content/zimfile\" title=\"Preview\" aria-label=\"Preview\">\n" \
" <a class=\"book__link\" href=\"/ROOT%23%3F/content/zimfile_raycharles\" title=\"Preview\" aria-label=\"Preview\">\n" \
" <div class=\"book__description\" title=\"Wikipedia articles about Ray Charles\">Wikipedia articles about Ray Charles</div>\n" \
" </a>\n" \
" </div>\n" \
Expand All @@ -1130,10 +1130,10 @@ TEST_F(LibraryServerTest, no_name_mapper_catalog_v2_individual_entry_access)
" <div class=\"book__link__wrapper\">\n" \
" <div class=\"book__icon\" style=background-image:url(/ROOT%23%3F/catalog/v2/illustration/raycharles_uncategorized/?size=48)></div>\n" \
" <div class=\"book__header\">\n" \
" <div id=\"book__title\"><a href=\"/ROOT%23%3F/content/zimfile\">Ray (uncategorized) Charles</a></div>\n" \
" <div class=\"book__download\"><span><a href=\"/ROOT%23%3F/nojs/download/zimfile\">Download</a></span></div>\n" \
" <div id=\"book__title\"><a href=\"/ROOT%23%3F/content/zimfile_raycharles_uncategorized\">Ray (uncategorized) Charles</a></div>\n" \
" <div class=\"book__download\"><span><a href=\"/ROOT%23%3F/nojs/download/zimfile_raycharles_uncategorized\">Download</a></span></div>\n" \
" </div>\n" \
" <a class=\"book__link\" href=\"/ROOT%23%3F/content/zimfile\" title=\"Preview\" aria-label=\"Preview\">\n" \
" <a class=\"book__link\" href=\"/ROOT%23%3F/content/zimfile_raycharles_uncategorized\" title=\"Preview\" aria-label=\"Preview\">\n" \
" <div class=\"book__description\" title=\"No category is assigned to this library entry.\">No category is assigned to this library entry.</div>\n" \
" </a>\n" \
" </div>\n" \
Expand Down Expand Up @@ -1224,16 +1224,16 @@ TEST_F(LibraryServerTest, no_name_mapper_catalog_v2_individual_entry_access)
" <div class=\"downloadLinksTitle\">\n" \
" Download links for <b><i>Ray (uncategorized) Charles</i></b>\n" \
" </div>\n" \
" <a href=\"https://github.com/kiwix/libkiwix/raw/master/test/data/zimfile.zim\" download>\n" \
" <a href=\"https://github.com/kiwix/libkiwix/raw/master/test/data/zimfile_raycharles_uncategorized.zim\" download>\n" \
" <div>Direct</div>\n" \
" </a>\n" \
" <a href=\"https://github.com/kiwix/libkiwix/raw/master/test/data/zimfile.zim.sha256\" download>\n" \
" <a href=\"https://github.com/kiwix/libkiwix/raw/master/test/data/zimfile_raycharles_uncategorized.zim.sha256\" download>\n" \
" <div>Sha256 hash</div>\n" \
" </a>\n" \
" <a href=\"https://github.com/kiwix/libkiwix/raw/master/test/data/zimfile.zim.magnet\" target=\"_blank\">\n" \
" <a href=\"https://github.com/kiwix/libkiwix/raw/master/test/data/zimfile_raycharles_uncategorized.zim.magnet\" target=\"_blank\">\n" \
" <div>Magnet link</div>\n" \
" </a>\n" \
" <a href=\"https://github.com/kiwix/libkiwix/raw/master/test/data/zimfile.zim.torrent\" download>\n" \
" <a href=\"https://github.com/kiwix/libkiwix/raw/master/test/data/zimfile_raycharles_uncategorized.zim.torrent\" download>\n" \
" <div>Torrent file</div>\n" \
" </a>\n" \
"</body>\n" \
Expand Down Expand Up @@ -1273,7 +1273,7 @@ TEST_F(LibraryServerTest, noJS) {
FINAL_HTML_TEXT);

// no_js_download
r = zfs1_->GET("/ROOT%23%3F/nojs/download/zimfile");
r = zfs1_->GET("/ROOT%23%3F/nojs/download/zimfile_raycharles_uncategorized");
EXPECT_EQ(r->status, 200);
EXPECT_EQ(r->body, RAY_CHARLES_UNCTZ_DOWNLOAD);
}
Expand Down
2 changes: 2 additions & 0 deletions test/meson.build
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,8 @@ if gtest_dep.found() and not meson.is_cross_build()
'example.zim',
'zimfile.zim',
'zimfile&other.zim',
'zimfile_raycharles.zim',
'zimfile_raycharles_uncategorized.zim',
'corner_cases#&.zim',
'poor.zim',
'library.xml',
Expand Down
69 changes: 54 additions & 15 deletions test/name_mapper.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,12 @@ const char libraryXML[] = R"(
<book id="03" path="/data/ZERO thrêë.zim"> </book>
<book id="04-2021-10" path="/data/zero_four_2021-10.zim"></book>
<book id="04-2021-11" path="/data/zero_four_2021-11.zim"></book>
<book id="05-a" path="/data/zero_five-a.zim" name="zero_five"></book>
<book id="05-b" path="/data/zero_five-b.zim" name="zero_five"></book>
<book id="06+" path="/data/zërô + SIX.zim"></book>
<book id="06plus" path="/data/zero_plus_six.zim"></book>
<book id="07-super" path="/data/zero_seven.zim"></book>
<book id="07-sub" path="/data/subdir/zero_seven.zim"></book>
</library>
)";

Expand Down Expand Up @@ -55,6 +61,31 @@ class CapturedStderr
operator std::string() const { return buffer.str(); }
};


const std::string ZERO_FOUR_NAME_CONFLICT_MSG =
"Path collision: '/data/zero_four_2021-10.zim' and"
" '/data/zero_four_2021-11.zim' can't share the same URL path 'zero_four'."
" Therefore, only '/data/zero_four_2021-10.zim' will be served.\n";

const std::string ZERO_SIX_NAME_CONFLICT_MSG =
"Path collision: '/data/zërô + SIX.zim' and "
"'/data/zero_plus_six.zim' can't share the same URL path 'zero_plus_six'."
" Therefore, only '/data/zërô + SIX.zim' will be served.\n";

const std::string ZERO_SEVEN_NAME_CONFLICT_MSG =
"Path collision: '/data/subdir/zero_seven.zim' and"
" '/data/zero_seven.zim' can't share the same URL path 'zero_seven'."
" Therefore, only '/data/subdir/zero_seven.zim' will be served.\n";

// Name conflicts in the default mode (without the --nodatealiases is off
const std::string DEFAULT_NAME_CONFLICTS = ZERO_SIX_NAME_CONFLICT_MSG
+ ZERO_SEVEN_NAME_CONFLICT_MSG;

// Name conflicts in --nodatealiases mode
const std::string ALL_NAME_CONFLICTS = ZERO_FOUR_NAME_CONFLICT_MSG
+ ZERO_SIX_NAME_CONFLICT_MSG
+ ZERO_SEVEN_NAME_CONFLICT_MSG;

} // unnamed namespace

void checkUnaliasedEntriesInNameMapper(const kiwix::NameMapper& nm)
Expand All @@ -64,19 +95,37 @@ void checkUnaliasedEntriesInNameMapper(const kiwix::NameMapper& nm)
EXPECT_EQ("zero_three", nm.getNameForId("03"));
EXPECT_EQ("zero_four_2021-10", nm.getNameForId("04-2021-10"));
EXPECT_EQ("zero_four_2021-11", nm.getNameForId("04-2021-11"));
EXPECT_EQ("zero_five-a", nm.getNameForId("05-a"));
EXPECT_EQ("zero_five-b", nm.getNameForId("05-b"));

// unreported conflict
EXPECT_EQ("zero_plus_six", nm.getNameForId("06+"));
EXPECT_EQ("zero_plus_six", nm.getNameForId("06plus"));

// unreported conflict
EXPECT_EQ("zero_seven", nm.getNameForId("07-super"));
EXPECT_EQ("zero_seven", nm.getNameForId("07-sub"));

EXPECT_EQ("01", nm.getIdForName("zero_one"));
EXPECT_EQ("02", nm.getIdForName("zero_two"));
EXPECT_EQ("03", nm.getIdForName("zero_three"));
EXPECT_EQ("04-2021-10", nm.getIdForName("zero_four_2021-10"));
EXPECT_EQ("04-2021-11", nm.getIdForName("zero_four_2021-11"));

// book name doesn't participate in name mapping
EXPECT_THROW(nm.getIdForName("zero_five"), std::out_of_range);
EXPECT_EQ("05-a", nm.getIdForName("zero_five-a"));
EXPECT_EQ("05-b", nm.getIdForName("zero_five-b"));

EXPECT_EQ("06+", nm.getIdForName("zero_plus_six"));
EXPECT_EQ("07-sub", nm.getIdForName("zero_seven"));
}

TEST_F(NameMapperTest, HumanReadableNameMapperWithoutAliases)
{
CapturedStderr stderror;
kiwix::HumanReadableNameMapper nm(*lib, false);
EXPECT_EQ("", std::string(stderror));
EXPECT_EQ(DEFAULT_NAME_CONFLICTS, std::string(stderror));

checkUnaliasedEntriesInNameMapper(nm);
EXPECT_THROW(nm.getIdForName("zero_four"), std::out_of_range);
Expand All @@ -91,12 +140,7 @@ TEST_F(NameMapperTest, HumanReadableNameMapperWithAliases)
{
CapturedStderr stderror;
kiwix::HumanReadableNameMapper nm(*lib, true);
EXPECT_EQ(
"Path collision: /data/zero_four_2021-10.zim and"
" /data/zero_four_2021-11.zim can't share the same URL path 'zero_four'."
" Therefore, only /data/zero_four_2021-10.zim will be served.\n"
, std::string(stderror)
);
EXPECT_EQ(ALL_NAME_CONFLICTS, std::string(stderror));

checkUnaliasedEntriesInNameMapper(nm);
EXPECT_EQ("04-2021-10", nm.getIdForName("zero_four"));
Expand All @@ -111,7 +155,7 @@ TEST_F(NameMapperTest, UpdatableNameMapperWithoutAliases)
{
CapturedStderr stderror;
kiwix::UpdatableNameMapper nm(lib, false);
EXPECT_EQ("", std::string(stderror));
EXPECT_EQ(DEFAULT_NAME_CONFLICTS, std::string(stderror));

checkUnaliasedEntriesInNameMapper(nm);
EXPECT_THROW(nm.getIdForName("zero_four"), std::out_of_range);
Expand All @@ -127,12 +171,7 @@ TEST_F(NameMapperTest, UpdatableNameMapperWithAliases)
{
CapturedStderr stderror;
kiwix::UpdatableNameMapper nm(lib, true);
EXPECT_EQ(
"Path collision: /data/zero_four_2021-10.zim and"
" /data/zero_four_2021-11.zim can't share the same URL path 'zero_four'."
" Therefore, only /data/zero_four_2021-10.zim will be served.\n"
, std::string(stderror)
);
EXPECT_EQ(ALL_NAME_CONFLICTS, std::string(stderror));

checkUnaliasedEntriesInNameMapper(nm);
EXPECT_EQ("04-2021-10", nm.getIdForName("zero_four"));
Expand All @@ -141,7 +180,7 @@ TEST_F(NameMapperTest, UpdatableNameMapperWithAliases)
CapturedStderr nmUpdateStderror;
lib->removeBookById("04-2021-10");
nm.update();
EXPECT_EQ("", std::string(nmUpdateStderror));
EXPECT_EQ(DEFAULT_NAME_CONFLICTS, std::string(nmUpdateStderror));
}
EXPECT_EQ("04-2021-11", nm.getIdForName("zero_four"));
EXPECT_THROW(nm.getNameForId("04-2021-10"), std::out_of_range);
Expand Down

0 comments on commit a8368b3

Please sign in to comment.