Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Quick-fix of issues caused by URL encoding #870

Merged
merged 16 commits into from
Jan 25, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions src/opds_dumper.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -82,7 +82,7 @@ std::string fullEntryXML(const Book& book, const std::string& rootLocation, cons
{"title", book.getTitle()},
{"description", book.getDescription()},
{"language", book.getLanguage()},
{"content_id", urlEncode(contentId, true)},
{"content_id", urlEncode(contentId)},
{"updated", bookDate}, // XXX: this should be the entry update datetime
{"book_date", bookDate},
{"category", book.getCategory()},
Expand Down Expand Up @@ -216,7 +216,7 @@ string OPDSDumper::dumpOPDSFeedV2(const std::vector<std::string>& bookIds, const
{"endpoint_root", endpointRoot},
{"feed_id", gen_uuid(libraryId + endpoint + "?" + query)},
{"filter", onlyAsNonEmptyMustacheValue(query)},
{"query", query.empty() ? "" : "?" + urlEncode(query)},
{"query", query.empty() ? "" : "?" + query},
{"totalResults", to_string(m_totalResults)},
{"startIndex", to_string(m_startIndex)},
{"itemsPerPage", to_string(m_count)},
Expand Down
7 changes: 4 additions & 3 deletions src/search_renderer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -94,7 +94,7 @@ kainjow::mustache::data buildQueryData
kainjow::mustache::data query;
query.set("pattern", kiwix::encodeDiples(pattern));
std::ostringstream ss;
ss << searchProtocolPrefix << "?pattern=" << urlEncode(pattern, true);
ss << searchProtocolPrefix << "?pattern=" << urlEncode(pattern);
ss << "&" << bookQuery;
query.set("unpaginatedQuery", ss.str());
auto lang = extractValueFromQuery(bookQuery, "books.filter.lang");
Expand Down Expand Up @@ -171,9 +171,10 @@ std::string SearchRenderer::renderTemplate(const std::string& tmpl_str)
kainjow::mustache::data items{kainjow::mustache::data::type::list};
for (auto it = m_srs.begin(); it != m_srs.end(); it++) {
kainjow::mustache::data result;
std::string zim_id(it.getZimId());
const std::string zim_id(it.getZimId());
const auto path = mp_nameMapper->getNameForId(zim_id) + "/" + it.getPath();
result.set("title", it.getTitle());
result.set("absolutePath", absPathPrefix + urlEncode(mp_nameMapper->getNameForId(zim_id), true) + "/" + urlEncode(it.getPath()));
result.set("absolutePath", absPathPrefix + urlEncode(path));
result.set("snippet", it.getSnippet());
if (mp_library) {
result.set("bookTitle", mp_library->getBookById(zim_id).getTitle());
Expand Down
6 changes: 3 additions & 3 deletions src/server/internalServer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1030,7 +1030,7 @@ ParameterizedMessage suggestSearchMsg(const std::string& searchURL, const std::s
std::unique_ptr<Response>
InternalServer::build_redirect(const std::string& bookName, const zim::Item& item) const
{
const auto path = kiwix::urlEncode(item.getPath(), true);
const auto path = kiwix::urlEncode(item.getPath());
const auto redirectUrl = m_root + "/content/" + bookName + "/" + path;
return Response::build_redirect(*this, redirectUrl);
}
Expand All @@ -1055,7 +1055,7 @@ std::unique_ptr<Response> InternalServer::handle_content(const RequestContext& r
} catch (const std::out_of_range& e) {}

if (archive == nullptr) {
const std::string searchURL = m_root + "/search?pattern=" + kiwix::urlEncode(pattern, true);
const std::string searchURL = m_root + "/search?pattern=" + kiwix::urlEncode(pattern);
return HTTP404Response(*this, request)
+ urlNotFoundMsg
+ suggestSearchMsg(searchURL, kiwix::urlDecode(pattern));
Expand Down Expand Up @@ -1096,7 +1096,7 @@ std::unique_ptr<Response> InternalServer::handle_content(const RequestContext& r
if (m_verbose.load())
printf("Failed to find %s\n", urlStr.c_str());

std::string searchURL = m_root + "/search?content=" + bookName + "&pattern=" + kiwix::urlEncode(pattern, true);
std::string searchURL = m_root + "/search?content=" + bookName + "&pattern=" + kiwix::urlEncode(pattern);
return HTTP404Response(*this, request)
+ urlNotFoundMsg
+ suggestSearchMsg(searchURL, kiwix::urlDecode(pattern));
Expand Down
4 changes: 2 additions & 2 deletions src/server/request_context.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -116,10 +116,10 @@ MHD_Result RequestContext::fill_argument(void *__this, enum MHD_ValueKind kind,
if ( ! _this->queryString.empty() ) {
_this->queryString += "&";
}
_this->queryString += key;
_this->queryString += urlEncode(key);
if ( value ) {
_this->queryString += "=";
_this->queryString += value;
_this->queryString += urlEncode(value);
}
return MHD_YES;
}
Expand Down
2 changes: 1 addition & 1 deletion src/server/request_context.h
Original file line number Diff line number Diff line change
Expand Up @@ -99,7 +99,7 @@ class RequestContext {
std::string get_query(F filter, bool mustEncode) const {
std::string q;
const char* sep = "";
auto encode = [=](const std::string& value) { return mustEncode?urlEncode(value, true):value; };
auto encode = [=](const std::string& value) { return mustEncode?urlEncode(value):value; };
for ( const auto& a : arguments ) {
if (!filter(a.first)) {
continue;
Expand Down
3 changes: 0 additions & 3 deletions src/tools/otherTools.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -322,9 +322,6 @@ kainjow::mustache::data kiwix::onlyAsNonEmptyMustacheValue(const std::string& s)
std::string kiwix::render_template(const std::string& template_str, kainjow::mustache::data data)
{
kainjow::mustache::mustache tmpl(template_str);
kainjow::mustache::data urlencode{kainjow::mustache::lambda2{
[](const std::string& str,const kainjow::mustache::renderer& r) { return urlEncode(r(str), true); }}};
data.set("urlencoded", urlencode);
std::stringstream ss;
tmpl.render(data, [&ss](const std::string& str) { ss << str; });
return ss.str();
Expand Down
48 changes: 24 additions & 24 deletions src/tools/stringTools.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -161,38 +161,37 @@ std::string kiwix::encodeDiples(const std::string& str)
return result;
}

/* urlEncode() based on javascript encodeURI() &
encodeURIComponent(). Mostly code from rstudio/httpuv (GPLv3) */
namespace
{

bool isReservedUrlChar(char c)
{
switch (c) {
case ';':
case ',':
case '/':
case '?':
case ':':
case '@':
case '&':
case '=':
case '+':
case '$':
case '#':
return true;
default:
return false;
}
}

bool needsEscape(char c, bool encodeReserved)
bool isHarmlessUriChar(char c)
{
if (c >= 'a' && c <= 'z')
return false;
return true;
if (c >= 'A' && c <= 'Z')
return false;
return true;
if (c >= '0' && c <= '9')
return false;
if (isReservedUrlChar(c))
return encodeReserved;
return true;

switch (c) {
case '-':
case '_':
Expand All @@ -203,9 +202,10 @@ bool needsEscape(char c, bool encodeReserved)
case '\'':
case '(':
case ')':
return false;
case '/':
return true;
}
return true;
return false;
}

int hexToInt(char c) {
Expand All @@ -230,18 +230,18 @@ int hexToInt(char c) {
}
}

std::string kiwix::urlEncode(const std::string& value, bool encodeReserved)
} // unnamed namespace

std::string kiwix::urlEncode(const std::string& value)
{
std::ostringstream os;
os << std::hex << std::uppercase;
for (std::string::const_iterator it = value.begin();
it != value.end();
it++) {

if (!needsEscape(*it, encodeReserved)) {
os << *it;
for (const char c : value) {
if (isHarmlessUriChar(c)) {
os << c;
} else {
os << '%' << std::setw(2) << static_cast<unsigned int>(static_cast<unsigned char>(*it));
const unsigned int charVal = static_cast<unsigned char>(c);
os << '%' << std::setw(2) << std::setfill('0') << charVal;
}
}
return os.str();
Expand All @@ -267,15 +267,15 @@ std::string kiwix::urlDecode(const std::string& value, bool component)
int iHi = hexToInt(hi);
int iLo = hexToInt(lo);
if (iHi < 0 || iLo < 0) {
// Invalid escape sequence
os << '%' << hi << lo;
continue;
// Invalid escape sequence
os << '%' << hi << lo;
continue;
}
char c = (char)(iHi << 4 | iLo);
if (!component && isReservedUrlChar(c)) {
os << '%' << hi << lo;
os << '%' << hi << lo;
} else {
os << c;
os << c;
}
} else {
os << *it;
Expand Down
4 changes: 3 additions & 1 deletion src/tools/stringTools.h
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,9 @@ class ICULanguageInfo
};


std::string urlEncode(const std::string& value, bool encodeReserved = false);
/* urlEncode() is the equivalent of JS encodeURIComponent(), with the only
* difference that the slash (/) symbol is NOT encoded. */
std::string urlEncode(const std::string& value);
std::string urlDecode(const std::string& value, bool component = false);

std::string join(const std::vector<std::string>& list, const std::string& sep);
Expand Down
22 changes: 11 additions & 11 deletions test/library_server.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -193,7 +193,7 @@ TEST_F(LibraryServerTest, catalog_search_by_phrase)
EXPECT_EQ(maskVariableOPDSFeedData(r->body),
OPDS_FEED_TAG
" <id>12345678-90ab-cdef-1234-567890abcdef</id>\n"
" <title>Filtered zims (q=&quot;ray charles&quot;)</title>\n"
" <title>Filtered zims (q=%22ray%20charles%22)</title>\n"
" <updated>YYYY-MM-DDThh:mm:ssZ</updated>\n"
" <totalResults>2</totalResults>\n"
" <startIndex>0</startIndex>\n"
Expand All @@ -212,7 +212,7 @@ TEST_F(LibraryServerTest, catalog_search_by_words)
EXPECT_EQ(maskVariableOPDSFeedData(r->body),
OPDS_FEED_TAG
" <id>12345678-90ab-cdef-1234-567890abcdef</id>\n"
" <title>Filtered zims (q=ray charles)</title>\n"
" <title>Filtered zims (q=ray%20charles)</title>\n"
" <updated>YYYY-MM-DDThh:mm:ssZ</updated>\n"
" <totalResults>3</totalResults>\n"
" <startIndex>0</startIndex>\n"
Expand All @@ -233,7 +233,7 @@ TEST_F(LibraryServerTest, catalog_prefix_search)
EXPECT_EQ(maskVariableOPDSFeedData(r->body),
OPDS_FEED_TAG
" <id>12345678-90ab-cdef-1234-567890abcdef</id>\n"
" <title>Filtered zims (q=description:ray description:charles)</title>\n"
" <title>Filtered zims (q=description%3Aray%20description%3Acharles)</title>\n"
" <updated>YYYY-MM-DDThh:mm:ssZ</updated>\n"
" <totalResults>2</totalResults>\n"
" <startIndex>0</startIndex>\n"
Expand All @@ -250,7 +250,7 @@ TEST_F(LibraryServerTest, catalog_prefix_search)
EXPECT_EQ(maskVariableOPDSFeedData(r->body),
OPDS_FEED_TAG
" <id>12345678-90ab-cdef-1234-567890abcdef</id>\n"
" <title>Filtered zims (q=title:&quot;ray charles&quot;)</title>\n"
" <title>Filtered zims (q=title%3A%22ray%20charles%22)</title>\n"
" <updated>YYYY-MM-DDThh:mm:ssZ</updated>\n"
" <totalResults>1</totalResults>\n"
" <startIndex>0</startIndex>\n"
Expand All @@ -269,7 +269,7 @@ TEST_F(LibraryServerTest, catalog_search_with_word_exclusion)
EXPECT_EQ(maskVariableOPDSFeedData(r->body),
OPDS_FEED_TAG
" <id>12345678-90ab-cdef-1234-567890abcdef</id>\n"
" <title>Filtered zims (q=ray -uncategorized)</title>\n"
" <title>Filtered zims (q=ray%20-uncategorized)</title>\n"
" <updated>YYYY-MM-DDThh:mm:ssZ</updated>\n"
" <totalResults>2</totalResults>\n"
" <startIndex>0</startIndex>\n"
Expand All @@ -288,7 +288,7 @@ TEST_F(LibraryServerTest, catalog_search_by_tag)
EXPECT_EQ(maskVariableOPDSFeedData(r->body),
OPDS_FEED_TAG
" <id>12345678-90ab-cdef-1234-567890abcdef</id>\n"
" <title>Filtered zims (tag=_category:jazz)</title>\n"
" <title>Filtered zims (tag=_category%3Ajazz)</title>\n"
" <updated>YYYY-MM-DDThh:mm:ssZ</updated>\n"
" <totalResults>1</totalResults>\n"
" <startIndex>0</startIndex>\n"
Expand Down Expand Up @@ -342,7 +342,7 @@ TEST_F(LibraryServerTest, catalog_search_by_language)
EXPECT_EQ(maskVariableOPDSFeedData(r->body),
OPDS_FEED_TAG
" <id>12345678-90ab-cdef-1234-567890abcdef</id>\n"
" <title>Filtered zims (lang=eng,fra)</title>\n"
" <title>Filtered zims (lang=eng%2Cfra)</title>\n"
" <updated>YYYY-MM-DDThh:mm:ssZ</updated>\n"
" <totalResults>2</totalResults>\n"
" <startIndex>0</startIndex>\n"
Expand Down Expand Up @@ -694,7 +694,7 @@ TEST_F(LibraryServerTest, catalog_v2_entries_filtered_by_search_terms)
EXPECT_EQ(r->status, 200);
EXPECT_EQ(maskVariableOPDSFeedData(r->body),
CATALOG_V2_ENTRIES_PREAMBLE("?q=%22ray%20charles%22")
" <title>Filtered Entries (q=&quot;ray charles&quot;)</title>\n"
" <title>Filtered Entries (q=%22ray%20charles%22)</title>\n"
" <updated>YYYY-MM-DDThh:mm:ssZ</updated>\n"
" <totalResults>2</totalResults>\n"
" <startIndex>0</startIndex>\n"
Expand Down Expand Up @@ -726,8 +726,8 @@ TEST_F(LibraryServerTest, catalog_v2_entries_filtered_by_language)
const auto r = zfs1_->GET("/ROOT/catalog/v2/entries?lang=eng,fra");
EXPECT_EQ(r->status, 200);
EXPECT_EQ(maskVariableOPDSFeedData(r->body),
CATALOG_V2_ENTRIES_PREAMBLE("?lang=eng,fra")
" <title>Filtered Entries (lang=eng,fra)</title>\n"
CATALOG_V2_ENTRIES_PREAMBLE("?lang=eng%2Cfra")
" <title>Filtered Entries (lang=eng%2Cfra)</title>\n"
" <updated>YYYY-MM-DDThh:mm:ssZ</updated>\n"
" <totalResults>2</totalResults>\n"
" <startIndex>0</startIndex>\n"
Expand Down Expand Up @@ -865,7 +865,7 @@ TEST_F(LibraryServerTest, no_name_mapper_returned_catalog_use_uuid_in_link)
EXPECT_EQ(maskVariableOPDSFeedData(r->body),
OPDS_FEED_TAG
" <id>12345678-90ab-cdef-1234-567890abcdef</id>\n"
" <title>Filtered zims (tag=_category:jazz)</title>\n"
" <title>Filtered zims (tag=_category%3Ajazz)</title>\n"
" <updated>YYYY-MM-DDThh:mm:ssZ</updated>\n"
" <totalResults>1</totalResults>\n"
" <startIndex>0</startIndex>\n"
Expand Down
28 changes: 14 additions & 14 deletions test/opds_catalog.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -37,34 +37,34 @@ TEST(OpdsCatalog, getSearchUrl)
}
{
Filter f;
f.query("abc def");
EXPECT_SEARCH_URL("/catalog/v2/entries?q=abc%20def");
f.query("abc def#xyz");
EXPECT_SEARCH_URL("/catalog/v2/entries?q=abc%20def%23xyz");
}
{
Filter f;
f.category("ted");
EXPECT_SEARCH_URL("/catalog/v2/entries?category=ted");
f.category("ted&bob");
EXPECT_SEARCH_URL("/catalog/v2/entries?category=ted%26bob");
}
{
Filter f;
f.lang("eng");
EXPECT_SEARCH_URL("/catalog/v2/entries?lang=eng");
f.lang("eng,fra");
EXPECT_SEARCH_URL("/catalog/v2/entries?lang=eng%2Cfra");
}
{
Filter f;
f.name("second");
EXPECT_SEARCH_URL("/catalog/v2/entries?name=second");
f.name("second?");
EXPECT_SEARCH_URL("/catalog/v2/entries?name=second%3F");
}
{
Filter f;
f.acceptTags({"paper", "plastic"});
EXPECT_SEARCH_URL("/catalog/v2/entries?tag=paper;plastic");
f.acceptTags({"#paper", "#plastic"});
EXPECT_SEARCH_URL("/catalog/v2/entries?tag=%23paper%3B%23plastic");
}
{
Filter f;
f.query("abc");
f.category("ted");
EXPECT_SEARCH_URL("/catalog/v2/entries?q=abc&category=ted");
f.query("abc=123");
f.category("@ted");
EXPECT_SEARCH_URL("/catalog/v2/entries?q=abc%3D123&category=%40ted");
}
{
Filter f;
Expand All @@ -79,7 +79,7 @@ TEST(OpdsCatalog, getSearchUrl)
f.lang("html");
f.name("edsonarantesdonascimento");
f.acceptTags({"body", "script"});
EXPECT_SEARCH_URL("/catalog/v2/entries?q=peru&category=scifi&lang=html&name=edsonarantesdonascimento&tag=body;script");
EXPECT_SEARCH_URL("/catalog/v2/entries?q=peru&category=scifi&lang=html&name=edsonarantesdonascimento&tag=body%3Bscript");
}
#undef EXPECT_SEARCH_URL
}
Loading