From 160a74f5f8be70f9b63a3c18071d71f385fb9222 Mon Sep 17 00:00:00 2001 From: Matthieu Gautier Date: Wed, 8 Dec 2021 16:09:35 +0100 Subject: [PATCH 1/2] Extend ItemResponse and ContentResponse to return raw content. --- src/server/internalServer.h | 4 ++-- src/server/response.cpp | 41 +++++++++++++++++++++++++++++-------- src/server/response.h | 27 ++++++++++++++++++++---- 3 files changed, 58 insertions(+), 14 deletions(-) diff --git a/src/server/internalServer.h b/src/server/internalServer.h index 7af45769e..b51915e7e 100644 --- a/src/server/internalServer.h +++ b/src/server/internalServer.h @@ -117,8 +117,8 @@ class InternalServer { std::string m_library_id; friend std::unique_ptr Response::build(const InternalServer& server); - friend std::unique_ptr ContentResponse::build(const InternalServer& server, const std::string& content, const std::string& mimetype, bool isHomePage); - friend std::unique_ptr ItemResponse::build(const InternalServer& server, const RequestContext& request, const zim::Item& item); + friend std::unique_ptr ContentResponse::build(const InternalServer& server, const std::string& content, const std::string& mimetype, bool isHomePage, bool raw); + friend std::unique_ptr ItemResponse::build(const InternalServer& server, const RequestContext& request, const zim::Item& item, bool raw); friend std::unique_ptr Response::build_500(const InternalServer& server, const std::string& msg); }; diff --git a/src/server/response.cpp b/src/server/response.cpp index 36f9339f8..398c5ee88 100644 --- a/src/server/response.cpp +++ b/src/server/response.cpp @@ -117,7 +117,16 @@ std::unique_ptr Response::build_500(const InternalServer& server, cons data.set("error", msg); auto content = render_template(RESOURCE::templates::_500_html, data); std::unique_ptr response ( - new ContentResponse(server.m_root, true, false, false, false, content, "text/html")); + new ContentResponse( + server.m_root, //root + true, //verbose + true, //raw + false, //withTaskbar + false, //withLibraryButton + false, //blockExternalLinks + content, //content + "text/html" //mimetype + )); response->set_code(MHD_HTTP_INTERNAL_SERVER_ERROR); return response; } @@ -238,8 +247,11 @@ ContentResponse::can_compress(const RequestContext& request) const bool ContentResponse::contentDecorationAllowed() const { - return (startsWith(m_mimeType, "text/html") - && m_mimeType.find(";raw=true") == std::string::npos); + if (m_raw) { + return false; + } + return (startsWith(m_mimeType, "text/html") + && m_mimeType.find(";raw=true") == std::string::npos); } MHD_Response* @@ -327,11 +339,12 @@ void ContentResponse::set_taskbar(const std::string& bookName, const std::string } -ContentResponse::ContentResponse(const std::string& root, bool verbose, bool withTaskbar, bool withLibraryButton, bool blockExternalLinks, const std::string& content, const std::string& mimetype) : +ContentResponse::ContentResponse(const std::string& root, bool verbose, bool raw, bool withTaskbar, bool withLibraryButton, bool blockExternalLinks, const std::string& content, const std::string& mimetype) : Response(verbose), m_root(root), m_content(content), m_mimeType(mimetype), + m_raw(raw), m_withTaskbar(withTaskbar), m_withLibraryButton(withLibraryButton), m_blockExternalLinks(blockExternalLinks), @@ -341,11 +354,17 @@ ContentResponse::ContentResponse(const std::string& root, bool verbose, bool wit add_header(MHD_HTTP_HEADER_CONTENT_TYPE, m_mimeType); } -std::unique_ptr ContentResponse::build(const InternalServer& server, const std::string& content, const std::string& mimetype, bool isHomePage) +std::unique_ptr ContentResponse::build( + const InternalServer& server, + const std::string& content, + const std::string& mimetype, + bool isHomePage, + bool raw) { return std::unique_ptr(new ContentResponse( server.m_root, server.m_verbose.load(), + raw, server.m_withTaskbar && !isHomePage, server.m_withLibraryButton, server.m_blockExternalLinks, @@ -353,7 +372,13 @@ std::unique_ptr ContentResponse::build(const InternalServer& se mimetype)); } -std::unique_ptr ContentResponse::build(const InternalServer& server, const std::string& template_str, kainjow::mustache::data data, const std::string& mimetype, bool isHomePage) { +std::unique_ptr ContentResponse::build( + const InternalServer& server, + const std::string& template_str, + kainjow::mustache::data data, + const std::string& mimetype, + bool isHomePage) +{ auto content = render_template(template_str, data); return ContentResponse::build(server, content, mimetype, isHomePage); } @@ -368,14 +393,14 @@ ItemResponse::ItemResponse(bool verbose, const zim::Item& item, const std::strin add_header(MHD_HTTP_HEADER_CONTENT_TYPE, m_mimeType); } -std::unique_ptr ItemResponse::build(const InternalServer& server, const RequestContext& request, const zim::Item& item) +std::unique_ptr ItemResponse::build(const InternalServer& server, const RequestContext& request, const zim::Item& item, bool raw) { const std::string mimetype = get_mime_type(item); auto byteRange = request.get_range().resolve(item.getSize()); const bool noRange = byteRange.kind() == ByteRange::RESOLVED_FULL_CONTENT; if (noRange && is_compressible_mime_type(mimetype)) { // Return a contentResponse - auto response = ContentResponse::build(server, item.getData(), mimetype); + auto response = ContentResponse::build(server, item.getData(), mimetype, /*isHomePage=*/false, raw); response->set_cacheable(); response->m_byteRange = byteRange; return std::move(response); diff --git a/src/server/response.h b/src/server/response.h index 2a8f6de4a..1a74aaf00 100644 --- a/src/server/response.h +++ b/src/server/response.h @@ -78,9 +78,27 @@ class Response { class ContentResponse : public Response { public: - ContentResponse(const std::string& root, bool verbose, bool withTaskbar, bool withLibraryButton, bool blockExternalLinks, const std::string& content, const std::string& mimetype); - static std::unique_ptr build(const InternalServer& server, const std::string& content, const std::string& mimetype, bool isHomePage = false); - static std::unique_ptr build(const InternalServer& server, const std::string& template_str, kainjow::mustache::data data, const std::string& mimetype, bool isHomePage = false); + ContentResponse( + const std::string& root, + bool verbose, + bool raw, + bool withTaskbar, + bool withLibraryButton, + bool blockExternalLinks, + const std::string& content, + const std::string& mimetype); + static std::unique_ptr build( + const InternalServer& server, + const std::string& content, + const std::string& mimetype, + bool isHomePage = false, + bool raw = false); + static std::unique_ptr build( + const InternalServer& server, + const std::string& template_str, + kainjow::mustache::data data, + const std::string& mimetype, + bool isHomePage = false); void set_taskbar(const std::string& bookName, const std::string& bookTitle); @@ -98,6 +116,7 @@ class ContentResponse : public Response { std::string m_root; std::string m_content; std::string m_mimeType; + bool m_raw; bool m_withTaskbar; bool m_withLibraryButton; bool m_blockExternalLinks; @@ -108,7 +127,7 @@ class ContentResponse : public Response { class ItemResponse : public Response { public: ItemResponse(bool verbose, const zim::Item& item, const std::string& mimetype, const ByteRange& byterange); - static std::unique_ptr build(const InternalServer& server, const RequestContext& request, const zim::Item& item); + static std::unique_ptr build(const InternalServer& server, const RequestContext& request, const zim::Item& item, bool raw = false); private: MHD_Response* create_mhd_response(const RequestContext& request); From dc15a9a824963abd289aa39cb3683be7959e5821 Mon Sep 17 00:00:00 2001 From: Matthieu Gautier Date: Wed, 8 Dec 2021 16:15:39 +0100 Subject: [PATCH 2/2] Add `raw` endpoint. As the name suggests it, this endpoint is not smart : It returns the content as it is and only if it is present (no compatibility or whatever). The only "smart" thing is to return a redirect if the entry is a redirect. --- src/server/internalServer.cpp | 61 +++++++++++++++++++++++++++++++++++ src/server/internalServer.h | 1 + test/server.cpp | 33 +++++++++++++++++++ 3 files changed, 95 insertions(+) diff --git a/src/server/internalServer.cpp b/src/server/internalServer.cpp index 14d03e1ef..7f5bc624e 100644 --- a/src/server/internalServer.cpp +++ b/src/server/internalServer.cpp @@ -285,6 +285,9 @@ std::unique_ptr InternalServer::handle_request(const RequestContext& r if (startsWith(request.get_url(), "/catalog/")) return handle_catalog(request); + if (startsWith(request.get_url(), "/raw/")) + return handle_raw(request); + if (request.get_url() == "/meta") return handle_meta(request); @@ -878,4 +881,62 @@ std::unique_ptr InternalServer::handle_content(const RequestContext& r } } + +std::unique_ptr InternalServer::handle_raw(const RequestContext& request) +{ + if (m_verbose.load()) { + printf("** running handle_raw\n"); + } + + std::string bookName; + std::string kind; + try { + bookName = request.get_url_part(1); + kind = request.get_url_part(2); + } catch (const std::out_of_range& e) { + return Response::build_404(*this, request.get_full_url(), bookName, "", ""); + } + + if (kind != "meta" && kind!= "content") { + const std::string error_details = kind + " is not a valid request for raw content."; + return Response::build_404(*this, request.get_full_url(), bookName, "", error_details); + } + + std::shared_ptr archive; + try { + const std::string bookId = mp_nameMapper->getIdForName(bookName); + archive = mp_library->getArchiveById(bookId); + } catch (const std::out_of_range& e) {} + + if (archive == nullptr) { + const std::string error_details = "No such book: " + bookName; + return Response::build_404(*this, request.get_full_url(), bookName, "", error_details); + } + + // Remove the beggining of the path: + // /raw///foo + // ^^^^^ ^ ^ + // 5 + 1 + 1 = 7 + auto itemPath = request.get_url().substr(bookName.size()+kind.size()+7); + + try { + if (kind == "meta") { + auto item = archive->getMetadataItem(itemPath); + return ItemResponse::build(*this, request, item, /*raw=*/true); + } else { + auto entry = archive->getEntryByPath(itemPath); + if (entry.isRedirect()) { + return build_redirect(bookName, entry.getItem(true)); + } + return ItemResponse::build(*this, request, entry.getItem(), /*raw=*/true); + } + } catch (zim::EntryNotFound& e ) { + if (m_verbose.load()) { + printf("Failed to find %s\n", itemPath.c_str()); + } + const std::string error_details = "Cannot find " + kind + " entry " + itemPath; + return Response::build_404(*this, request.get_full_url(), bookName, getArchiveTitle(*archive), error_details); + } +} + } diff --git a/src/server/internalServer.h b/src/server/internalServer.h index b51915e7e..ddd33f7e8 100644 --- a/src/server/internalServer.h +++ b/src/server/internalServer.h @@ -89,6 +89,7 @@ class InternalServer { std::unique_ptr handle_random(const RequestContext& request); std::unique_ptr handle_captured_external(const RequestContext& request); std::unique_ptr handle_content(const RequestContext& request); + std::unique_ptr handle_raw(const RequestContext& request); std::vector search_catalog(const RequestContext& request, kiwix::OPDSDumper& opdsDumper); diff --git a/test/server.cpp b/test/server.cpp index 68a7ac07e..5f79ad153 100644 --- a/test/server.cpp +++ b/test/server.cpp @@ -185,6 +185,9 @@ const ResourceCollection resources200Compressible{ { WITH_ETAG, "/ROOT/zimfile/A/index" }, { WITH_ETAG, "/ROOT/zimfile/A/Ray_Charles" }, + + { WITH_ETAG, "/ROOT/raw/zimfile/content/A/index" }, + { WITH_ETAG, "/ROOT/raw/zimfile/content/A/Ray_Charles" }, }; const ResourceCollection resources200Uncompressible{ @@ -208,6 +211,10 @@ const ResourceCollection resources200Uncompressible{ { WITH_ETAG, "/ROOT/corner_cases/A/empty.html" }, { WITH_ETAG, "/ROOT/corner_cases/-/empty.css" }, { WITH_ETAG, "/ROOT/corner_cases/-/empty.js" }, + + // The title and creator are too small to be compressed + { WITH_ETAG, "/ROOT/raw/zimfile/meta/Creator" }, + { WITH_ETAG, "/ROOT/raw/zimfile/meta/Title" }, }; ResourceCollection all200Resources() @@ -313,6 +320,32 @@ TEST_F(ServerTest, BookMainPageIsRedirectedToArticleIndex) ASSERT_EQ("/ROOT/zimfile/A/index", g->get_header_value("Location")); } + +TEST_F(ServerTest, RawEntry) +{ + auto p = zfs1_->GET("/ROOT/raw/zimfile/meta/Title"); + EXPECT_EQ(200, p->status); + EXPECT_EQ(p->body, std::string("Ray Charles")); + + p = zfs1_->GET("/ROOT/raw/zimfile/meta/Creator"); + EXPECT_EQ(200, p->status); + EXPECT_EQ(p->body, std::string("Wikipedia")); + + // The raw content of Ray_Charles returned by the server is + // the same as the one in the zim file. + auto archive = zim::Archive("./test/zimfile.zim"); + auto entry = archive.getEntryByPath("A/Ray_Charles"); + p = zfs1_->GET("/ROOT/raw/zimfile/content/A/Ray_Charles"); + EXPECT_EQ(200, p->status); + EXPECT_EQ(std::string(p->body), std::string(entry.getItem(true).getData())); + + // ... but the "normal" content is not + p = zfs1_->GET("/ROOT/zimfile/A/Ray_Charles"); + EXPECT_EQ(200, p->status); + EXPECT_NE(std::string(p->body), std::string(entry.getItem(true).getData())); + EXPECT_TRUE(p->body.find("taskbar") != std::string::npos); +} + TEST_F(ServerTest, HeadMethodIsSupported) { for ( const Resource& res : all200Resources() )