diff --git a/include/zim/zim.h b/include/zim/zim.h index 91ae3471..910b4a4c 100644 --- a/include/zim/zim.h +++ b/include/zim/zim.h @@ -114,6 +114,11 @@ namespace zim */ CLUSTER_PTRS, + /** + * Checks that offsets inside each clusters are valid. + */ + CLUSTERS_OFFSETS, + /** * Checks that mime-type values in dirents are valid. */ diff --git a/src/cluster.cpp b/src/cluster.cpp index 9c19433f..214af0e3 100644 --- a/src/cluster.cpp +++ b/src/cluster.cpp @@ -118,7 +118,9 @@ getClusterReader(const Reader& zimReader, offset_t offset, Cluster::Compression* while (--n_offset) { OFFSET_TYPE new_offset = seqReader.read(); - ASSERT(new_offset, >=, offset); + if (new_offset < offset) { + throw zim::ZimFileFormatError("Error parsing cluster. Offsets are not ordered."); + } m_blobOffsets.push_back(offset_t(new_offset)); offset = new_offset; diff --git a/src/fileimpl.cpp b/src/fileimpl.cpp index 91132f89..0dbfff79 100644 --- a/src/fileimpl.cpp +++ b/src/fileimpl.cpp @@ -625,6 +625,7 @@ class Grouping case IntegrityCheck::DIRENT_ORDER: return FileImpl::checkDirentOrder(); case IntegrityCheck::TITLE_INDEX: return FileImpl::checkTitleIndex(); case IntegrityCheck::CLUSTER_PTRS: return FileImpl::checkClusterPtrs(); + case IntegrityCheck::CLUSTERS_OFFSETS: return FileImpl::checkClusters(); case IntegrityCheck::DIRENT_MIMETYPES: return FileImpl::checkDirentMimeTypes(); case IntegrityCheck::COUNT: ASSERT("shouldn't have reached here", ==, ""); } @@ -676,6 +677,21 @@ class Grouping return true; } + bool FileImpl::checkClusters() { + const cluster_index_type clusterCount = getCountClusters().v; + for ( cluster_index_type i = 0; i < clusterCount; ++i ) + { + // Force a read of each clusters (which will throw ZimFileFormatError in case of error) + try { + readCluster(cluster_index_t(i)); + } catch (ZimFileFormatError& e) { + std::cerr << e.what() << std::endl; + return false; + } + } + return true; + } + bool FileImpl::checkClusterPtrs() { const cluster_index_type clusterCount = getCountClusters().v; const offset_t validClusterRangeStart(80); // XXX: really??? diff --git a/src/fileimpl.h b/src/fileimpl.h index d7f2678c..74550204 100644 --- a/src/fileimpl.h +++ b/src/fileimpl.h @@ -168,6 +168,7 @@ namespace zim bool checkDirentOrder(); bool checkTitleIndex(); bool checkClusterPtrs(); + bool checkClusters(); bool checkDirentMimeTypes(); }; diff --git a/test/archive.cpp b/test/archive.cpp index 6a87b9af..b5382ced 100644 --- a/test/archive.cpp +++ b/test/archive.cpp @@ -520,6 +520,11 @@ TEST(ZimArchive, validate) "Invalid cluster pointer\n" ); + TEST_BROKEN_ZIM_NAME( + "invalid.offset_in_cluster.zim", + "Error parsing cluster. Offsets are not ordered.\n" + ) + for(auto& testfile: getDataFilePath("invalid.nonsorted_dirent_table.zim")) { std::string expected;