Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Extend libkiwix structures to be contructed/updated from libzim structures #576

Merged
merged 3 commits into from
Aug 3, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions include/book.h
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,10 @@ namespace pugi {
class xml_node;
}

namespace zim {
class Archive;
}

namespace kiwix
{

Expand All @@ -43,6 +47,7 @@ class Book

bool update(const Book& other);
void update(const Reader& reader);
void update(const zim::Archive& archive);
void updateFromXml(const pugi::xml_node& node, const std::string& baseDir);
void updateFromOpds(const pugi::xml_node& node, const std::string& urlHost);
std::string getHumanReadableIdFromPath() const;
Expand Down
10 changes: 0 additions & 10 deletions include/reader.h
Original file line number Diff line number Diff line change
Expand Up @@ -292,16 +292,6 @@ class Reader
*/
string getScraper() const;

/**
* Get the origId of the zim file.
*
* The origId is only used in the case of patch zim file and is the Id
* of the original zim file.
*
* @return The origId of the zim file as specified in the zim metadata.
*/
string getOrigId() const;

/**
* Get the favicon of the zim file.
*
Expand Down
39 changes: 22 additions & 17 deletions src/book.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,9 @@
#include "tools/otherTools.h"
#include "tools/stringTools.h"
#include "tools/pathTools.h"
#include "tools/archiveTools.h"

#include <zim/archive.h>

#include <pugixml.hpp>

Expand Down Expand Up @@ -80,26 +83,28 @@ bool Book::update(const kiwix::Book& other)

void Book::update(const kiwix::Reader& reader)
{
m_path = reader.getZimFilePath();
update(*reader.getZimArchive());
}

void Book::update(const zim::Archive& archive) {
m_path = archive.getFilename();
m_pathValid = true;
m_id = reader.getId();
m_title = reader.getTitle();
m_description = reader.getDescription();
m_language = reader.getLanguage();
m_creator = reader.getCreator();
m_publisher = reader.getPublisher();
m_date = reader.getDate();
m_name = reader.getName();
m_flavour = reader.getFlavour();
m_tags = reader.getTags();
m_id = getArchiveId(archive);
m_title = getArchiveTitle(archive);
m_description = getMetaDescription(archive);
m_language = getMetaLanguage(archive);
m_creator = getMetaCreator(archive);
m_publisher = getMetaPublisher(archive);
m_date = getMetaDate(archive);
m_name = getMetaName(archive);
m_flavour = getMetaFlavour(archive);
m_tags = getMetaTags(archive);
m_category = getCategoryFromTags();
m_origId = reader.getOrigId();
m_articleCount = reader.getArticleCount();
m_mediaCount = reader.getMediaCount();
m_size = static_cast<uint64_t>(reader.getFileSize()) << 10;
m_pathValid = true;
m_articleCount = archive.getArticleCount();
m_mediaCount = getArchiveMediaCount(archive);
m_size = static_cast<uint64_t>(getArchiveFileSize(archive)) << 10;

reader.getFavicon(m_favicon, m_faviconMimeType);
getArchiveFavicon(archive, m_favicon, m_faviconMimeType);
}

#define ATTR(name) node.attribute(name).value()
Expand Down
4 changes: 2 additions & 2 deletions src/manager.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -215,8 +215,8 @@ bool Manager::readBookFromPath(const std::string& path, kiwix::Book* book)
tmp_path = computeAbsolutePath(getCurrentDirectory(), path);
}
try {
kiwix::Reader reader(tmp_path);
book->update(reader);
zim::Archive archive(tmp_path);
book->update(archive);
book->setPathValid(true);
} catch (const std::exception& e) {
book->setPathValid(false);
Expand Down
92 changes: 5 additions & 87 deletions src/reader.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -29,44 +29,6 @@
#include "tools/otherTools.h"
#include "tools/archiveTools.h"

inline char hi(char v)
{
char hex[] = "0123456789abcdef";
return hex[(v >> 4) & 0xf];
}

inline char lo(char v)
{
char hex[] = "0123456789abcdef";
return hex[v & 0xf];
}

std::string hexUUID(std::string in)
{
std::ostringstream out;
for (unsigned n = 0; n < 4; ++n) {
out << hi(in[n]) << lo(in[n]);
}
out << '-';
for (unsigned n = 4; n < 6; ++n) {
out << hi(in[n]) << lo(in[n]);
}
out << '-';
for (unsigned n = 6; n < 8; ++n) {
out << hi(in[n]) << lo(in[n]);
}
out << '-';
for (unsigned n = 8; n < 10; ++n) {
out << hi(in[n]) << lo(in[n]);
}
out << '-';
for (unsigned n = 10; n < 16; ++n) {
out << hi(in[n]) << lo(in[n]);
}
std::string op = out.str();
return op;
}

namespace kiwix
{
/* Constructor */
Expand Down Expand Up @@ -119,12 +81,7 @@ zim::Archive* Reader::getZimArchive() const

MimeCounterType Reader::parseCounterMetadata() const
{
try {
auto counterContent = zimArchive->getMetadata("Counter");
return parseMimetypeCounter(counterContent);
} catch (zim::EntryNotFound& e) {
return {};
}
return kiwix::parseArchiveCounter(*zimArchive);
}

/* Get the count of articles which can be indexed/displayed */
Expand All @@ -146,19 +103,7 @@ unsigned int Reader::getArticleCount() const
/* Get the count of medias content in the ZIM file */
unsigned int Reader::getMediaCount() const
{
std::map<const std::string, unsigned int> counterMap
= this->parseCounterMetadata();
unsigned int counter = 0;

for (auto &pair:counterMap) {
if (startsWith(pair.first, "image/") ||
startsWith(pair.first, "video/") ||
startsWith(pair.first, "audio/")) {
counter += pair.second;
}
}

return counter;
return kiwix::getArchiveMediaCount(*zimArchive);
}

/* Get the total of all items of a ZIM file, redirects included */
Expand All @@ -170,9 +115,7 @@ unsigned int Reader::getGlobalCount() const
/* Return the UID of the ZIM file */
string Reader::getId() const
{
std::ostringstream s;
s << zimArchive->getUuid();
return s.str();
return kiwix::getArchiveId(*zimArchive);
}

Entry Reader::getRandomPage() const
Expand Down Expand Up @@ -281,7 +224,7 @@ string Reader::getRelation() const

string Reader::getFlavour() const
{
METADATA("Flavour")
return kiwix::getMetaFlavour(*zimArchive);
}

string Reader::getSource() const
Expand All @@ -295,31 +238,6 @@ string Reader::getScraper() const
}
#undef METADATA

string Reader::getOrigId() const
{
string value;
this->getMetadata("startfileuid", value);
if (value.empty()) {
return "";
}
std::string id = value;
std::string origID;
std::string temp = "";
unsigned int k = 0;
char tempArray[16] = "";
for (unsigned int i = 0; i < id.size(); i++) {
if (id[i] == '\n') {
tempArray[k] = atoi(temp.c_str());
temp = "";
k++;
} else {
temp += id[i];
}
}
origID = hexUUID(tempArray);
return origID;
}

Entry Reader::getEntryFromPath(const std::string& path) const
{
try {
Expand Down Expand Up @@ -546,7 +464,7 @@ bool Reader::isCorrupted() const
/* Return the file size, works also for splitted files */
unsigned int Reader::getFileSize() const
{
return zimArchive->getFilesize() / 1024;
return kiwix::getArchiveFileSize(*zimArchive);
}

}
61 changes: 49 additions & 12 deletions src/tools/archiveTools.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -69,18 +69,6 @@ std::string getMetaTags(const zim::Archive& archive, bool original) {
return join(tags, ";");
}

bool getArchiveFavicon(const zim::Archive& archive,
std::string& content, std::string& mimeType){
try {
auto item = archive.getIllustrationItem();
content = item.getData();
mimeType = item.getMimetype();
return true;
} catch(zim::EntryNotFound& e) {};

return false;
}

std::string getMetaLanguage(const zim::Archive& archive) {
return getMetadata(archive, "Language");
}
Expand All @@ -101,6 +89,46 @@ std::string getMetaPublisher(const zim::Archive& archive) {
return getMetadata(archive, "Publisher");
}

std::string getMetaFlavour(const zim::Archive& archive) {
return getMetadata(archive, "Flavour");
}

std::string getArchiveId(const zim::Archive& archive) {
return (std::string) archive.getUuid();
}

bool getArchiveFavicon(const zim::Archive& archive,
std::string& content, std::string& mimeType){
try {
auto item = archive.getIllustrationItem();
content = item.getData();
mimeType = item.getMimetype();
return true;
} catch(zim::EntryNotFound& e) {};

return false;
}

// should this be in libzim
unsigned int getArchiveMediaCount(const zim::Archive& archive) {
std::map<const std::string, unsigned int> counterMap = parseArchiveCounter(archive);
unsigned int counter = 0;

for (auto &pair:counterMap) {
if (startsWith(pair.first, "image/") ||
startsWith(pair.first, "video/") ||
startsWith(pair.first, "audio/")) {
counter += pair.second;
}
}

return counter;
}

unsigned int getArchiveFileSize(const zim::Archive& archive) {
return archive.getFilesize() / 1024;
}

zim::Item getFinalItem(const zim::Archive& archive, const zim::Entry& entry)
{
return entry.getItem(true);
Expand All @@ -118,4 +146,13 @@ zim::Entry getEntryFromPath(const zim::Archive& archive, const std::string& path
throw zim::EntryNotFound("Cannot find entry for non empty path");
}

MimeCounterType parseArchiveCounter(const zim::Archive& archive) {
try {
auto counterContent = archive.getMetadata("Counter");
return parseMimetypeCounter(counterContent);
} catch (zim::EntryNotFound& e) {
return {};
}
}

} // kiwix
16 changes: 14 additions & 2 deletions src/tools/archiveTools.h
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
#define KIWIX_ARCHIVETOOLS_H

#include <zim/archive.h>
#include <tools/otherTools.h>

/**
* This file contains all the functions that would make handling data related to
Expand All @@ -33,15 +34,26 @@ namespace kiwix
std::string getArchiveTitle(const zim::Archive& archive);
std::string getMetaDescription(const zim::Archive& archive);
std::string getMetaTags(const zim::Archive& archive, bool original = false);
bool getArchiveFavicon(const zim::Archive& archive,
std::string& content, std::string& mimeType);
std::string getMetaLanguage(const zim::Archive& archive);
std::string getMetaName(const zim::Archive& archive);
std::string getMetaDate(const zim::Archive& archive);
std::string getMetaCreator(const zim::Archive& archive);
std::string getMetaPublisher(const zim::Archive& archive);
std::string getMetaFlavour(const zim::Archive& archive);
std::string getArchiveId(const zim::Archive& archive);

bool getArchiveFavicon(const zim::Archive& archive,
std::string& content, std::string& mimeType);

unsigned int getArchiveMediaCount(const zim::Archive& archive);
unsigned int getArchiveFileSize(const zim::Archive& archive);

zim::Item getFinalItem(const zim::Archive& archive, const zim::Entry& entry);

zim::Entry getEntryFromPath(const zim::Archive& archive, const std::string& path);

MimeCounterType parseArchiveCounter(const zim::Archive& archive);

}

#endif