From e0480800d5174da5f0ae44c1aceccd65cffbd901 Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Wed, 30 Jan 2019 17:47:44 +0100 Subject: [PATCH 1/3] cache_sync: compute size/count stats, borg info: consider part files fixes #3522 --- src/borg/archive.py | 10 ++++-- src/borg/cache_sync/cache_sync.c | 41 +++++++++++++++++++++++-- src/borg/cache_sync/unpack.h | 52 +++++++++++++++++++++++++++++--- src/borg/hashindex.pyx | 31 +++++++++++++++++-- 4 files changed, 121 insertions(+), 13 deletions(-) diff --git a/src/borg/archive.py b/src/borg/archive.py index ae611179da..f9624a5b86 100644 --- a/src/borg/archive.py +++ b/src/borg/archive.py @@ -500,10 +500,14 @@ def add(id): add(id) data = self.key.decrypt(id, chunk) sync.feed(data) - stats = Statistics() - stats.osize, stats.csize, unique_size, stats.usize, unique_chunks, chunks = archive_index.stats_against(cache.chunks) - stats.nfiles = sync.num_files + size, csize, unique_size, unique_csize, unique_chunks, chunks = archive_index.stats_against(cache.chunks) pi.finish() + stats = Statistics() + stats.nfiles = sync.num_files_totals if self.consider_part_files \ + else sync.num_files_totals - sync.num_files_parts + stats.osize = size if self.consider_part_files else size - sync.size_parts + stats.csize = csize if self.consider_part_files else csize - sync.csize_parts + stats.usize = unique_csize # the part files use same chunks as the full file return stats @contextmanager diff --git a/src/borg/cache_sync/cache_sync.c b/src/borg/cache_sync/cache_sync.c index 53b6155223..1a2cfb0f2b 100644 --- a/src/borg/cache_sync/cache_sync.c +++ b/src/borg/cache_sync/cache_sync.c @@ -38,7 +38,12 @@ cache_sync_init(HashIndex *chunks) unpack_init(&ctx->ctx); /* needs to be set only once */ ctx->ctx.user.chunks = chunks; - ctx->ctx.user.num_files = 0; + ctx->ctx.user.parts.size = 0; + ctx->ctx.user.parts.csize = 0; + ctx->ctx.user.parts.num_files = 0; + ctx->ctx.user.totals.size = 0; + ctx->ctx.user.totals.csize = 0; + ctx->ctx.user.totals.num_files = 0; ctx->buf = NULL; ctx->head = 0; ctx->tail = 0; @@ -63,9 +68,39 @@ cache_sync_error(const CacheSyncCtx *ctx) } static uint64_t -cache_sync_num_files(const CacheSyncCtx *ctx) +cache_sync_num_files_totals(const CacheSyncCtx *ctx) { - return ctx->ctx.user.num_files; + return ctx->ctx.user.totals.num_files; +} + +static uint64_t +cache_sync_num_files_parts(const CacheSyncCtx *ctx) +{ + return ctx->ctx.user.parts.num_files; +} + +static uint64_t +cache_sync_size_totals(const CacheSyncCtx *ctx) +{ + return ctx->ctx.user.totals.size; +} + +static uint64_t +cache_sync_size_parts(const CacheSyncCtx *ctx) +{ + return ctx->ctx.user.parts.size; +} + +static uint64_t +cache_sync_csize_totals(const CacheSyncCtx *ctx) +{ + return ctx->ctx.user.totals.csize; +} + +static uint64_t +cache_sync_csize_parts(const CacheSyncCtx *ctx) +{ + return ctx->ctx.user.parts.csize; } /** diff --git a/src/borg/cache_sync/unpack.h b/src/borg/cache_sync/unpack.h index 8332fcff9e..cb6a9b4594 100644 --- a/src/borg/cache_sync/unpack.h +++ b/src/borg/cache_sync/unpack.h @@ -40,7 +40,7 @@ #endif typedef struct unpack_user { - /* Item.chunks is at the top level; we don't care about anything else, + /* Item.chunks and Item.part are at the top level; we don't care about anything else, * only need to track the current level to navigate arbitrary and unknown structure. * To discern keys from everything else on the top level we use expect_map_item_end. */ @@ -50,8 +50,6 @@ typedef struct unpack_user { HashIndex *chunks; - uint64_t num_files; - /* * We don't care about most stuff. This flag tells us whether we're at the chunks structure, * meaning: @@ -59,6 +57,13 @@ typedef struct unpack_user { * ^-HERE-^ */ int inside_chunks; + + /* is this item a .part file (created for checkpointing inside files)? */ + int part; + + /* does this item have a chunks list in it? */ + int has_chunks; + enum { /* the next thing is a map key at the Item root level, * and it might be the "chunks" key we're looking for */ @@ -95,11 +100,28 @@ typedef struct unpack_user { expect_item_begin } expect; + /* collect values here for current chunklist entry */ struct { char key[32]; uint32_t csize; uint32_t size; } current; + + /* summing up chunks sizes here within a single item */ + struct { + uint64_t size, csize; + } item; + + /* total sizes and files count coming from all files */ + struct { + uint64_t size, csize, num_files; + } totals; + + /* total sizes and files count coming from part files */ + struct { + uint64_t size, csize, num_files; + } parts; + } unpack_user; struct unpack_context; @@ -270,6 +292,8 @@ static inline int unpack_callback_array_end(unpack_user* u) return -1; } } + u->item.size += u->current.size; + u->item.csize += u->current.csize; u->expect = expect_entry_begin_or_chunks_end; break; @@ -303,6 +327,10 @@ static inline int unpack_callback_map(unpack_user* u, unsigned int n) } /* This begins a new Item */ u->expect = expect_chunks_map_key; + u->part = 0; + u->has_chunks = 0; + u->item.size = 0; + u->item.csize = 0; } if(u->inside_chunks) { @@ -338,6 +366,19 @@ static inline int unpack_callback_map_end(unpack_user* u) SET_LAST_ERROR("Unexpected map end"); return -1; } + if(u->level == 0) { + /* This ends processing of an Item */ + if(u->has_chunks) { + if(u->part) { + u->parts.num_files += 1; + u->parts.size += u->item.size; + u->parts.csize += u->item.csize; + } + u->totals.num_files += 1; + u->totals.size += u->item.size; + u->totals.csize += u->item.csize; + } + } return 0; } @@ -360,7 +401,10 @@ static inline int unpack_callback_raw(unpack_user* u, const char* b, const char* if(length == 6 && !memcmp("chunks", p, 6)) { u->expect = expect_chunks_begin; u->inside_chunks = 1; - u->num_files++; + u->has_chunks = 1; + } else if(length == 4 && !memcmp("part", p, 4)) { + u->expect = expect_map_item_end; + u->part = 1; } else { u->expect = expect_map_item_end; } diff --git a/src/borg/hashindex.pyx b/src/borg/hashindex.pyx index 57d13240d9..e40c426ebc 100644 --- a/src/borg/hashindex.pyx +++ b/src/borg/hashindex.pyx @@ -46,7 +46,12 @@ cdef extern from "cache_sync/cache_sync.c": CacheSyncCtx *cache_sync_init(HashIndex *chunks) const char *cache_sync_error(const CacheSyncCtx *ctx) - uint64_t cache_sync_num_files(const CacheSyncCtx *ctx) + uint64_t cache_sync_num_files_totals(const CacheSyncCtx *ctx) + uint64_t cache_sync_num_files_parts(const CacheSyncCtx *ctx) + uint64_t cache_sync_size_totals(const CacheSyncCtx *ctx) + uint64_t cache_sync_size_parts(const CacheSyncCtx *ctx) + uint64_t cache_sync_csize_totals(const CacheSyncCtx *ctx) + uint64_t cache_sync_csize_parts(const CacheSyncCtx *ctx) int cache_sync_feed(CacheSyncCtx *ctx, void *data, uint32_t length) void cache_sync_free(CacheSyncCtx *ctx) @@ -525,5 +530,25 @@ cdef class CacheSynchronizer: raise ValueError('cache_sync_feed failed: ' + error.decode('ascii')) @property - def num_files(self): - return cache_sync_num_files(self.sync) + def num_files_totals(self): + return cache_sync_num_files_totals(self.sync) + + @property + def num_files_parts(self): + return cache_sync_num_files_parts(self.sync) + + @property + def size_totals(self): + return cache_sync_size_totals(self.sync) + + @property + def size_parts(self): + return cache_sync_size_parts(self.sync) + + @property + def csize_totals(self): + return cache_sync_csize_totals(self.sync) + + @property + def csize_parts(self): + return cache_sync_csize_parts(self.sync) From f8feaa7dd5136542d956b77e4e7b43528e8a6bd1 Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Thu, 31 Jan 2019 06:42:54 +0100 Subject: [PATCH 2/3] fixup: rename expect_chunks_map_key state to expect_map_key --- src/borg/cache_sync/unpack.h | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/src/borg/cache_sync/unpack.h b/src/borg/cache_sync/unpack.h index cb6a9b4594..f340556560 100644 --- a/src/borg/cache_sync/unpack.h +++ b/src/borg/cache_sync/unpack.h @@ -66,14 +66,14 @@ typedef struct unpack_user { enum { /* the next thing is a map key at the Item root level, - * and it might be the "chunks" key we're looking for */ - expect_chunks_map_key, + * and it might be the "chunks" or "part" key we're looking for */ + expect_map_key, - /* blocking state to expect_chunks_map_key + /* blocking state to expect_map_key * { 'stuff': , 'chunks': [ - * ecmk -> emie -> -> -> -> ecmk ecb eeboce + * emk -> emie -> -> -> -> emk ecb eeboce * (nested containers are tracked via level) - * ecmk=expect_chunks_map_key, emie=expect_map_item_end, ecb=expect_chunks_begin, + * emk=expect_map_key, emie=expect_map_item_end, ecb=expect_chunks_begin, * eeboce=expect_entry_begin_or_chunks_end */ expect_map_item_end, @@ -129,7 +129,7 @@ typedef struct unpack_context unpack_context; typedef int (*execute_fn)(unpack_context *ctx, const char* data, size_t len, size_t* off); #define UNEXPECTED(what) \ - if(u->inside_chunks || u->expect == expect_chunks_map_key) { \ + if(u->inside_chunks || u->expect == expect_map_key) { \ SET_LAST_ERROR("Unexpected object: " what); \ return -1; \ } @@ -326,7 +326,7 @@ static inline int unpack_callback_map(unpack_user* u, unsigned int n) return -1; } /* This begins a new Item */ - u->expect = expect_chunks_map_key; + u->expect = expect_map_key; u->part = 0; u->has_chunks = 0; u->item.size = 0; @@ -349,7 +349,7 @@ static inline int unpack_callback_map_item(unpack_user* u, unsigned int current) if(u->level == 1) { switch(u->expect) { case expect_map_item_end: - u->expect = expect_chunks_map_key; + u->expect = expect_map_key; break; default: SET_LAST_ERROR("Unexpected map item"); @@ -397,7 +397,7 @@ static inline int unpack_callback_raw(unpack_user* u, const char* b, const char* memcpy(u->current.key, p, 32); u->expect = expect_size; break; - case expect_chunks_map_key: + case expect_map_key: if(length == 6 && !memcmp("chunks", p, 6)) { u->expect = expect_chunks_begin; u->inside_chunks = 1; From b407b36320cac8170ccec2a7b12cbbaab160cade Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Sat, 2 Feb 2019 14:39:04 +0100 Subject: [PATCH 3/3] fixup: compute all from sync.* --- src/borg/archive.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/src/borg/archive.py b/src/borg/archive.py index f9624a5b86..6ec168e265 100644 --- a/src/borg/archive.py +++ b/src/borg/archive.py @@ -500,13 +500,15 @@ def add(id): add(id) data = self.key.decrypt(id, chunk) sync.feed(data) - size, csize, unique_size, unique_csize, unique_chunks, chunks = archive_index.stats_against(cache.chunks) + unique_csize = archive_index.stats_against(cache.chunks)[3] pi.finish() stats = Statistics() stats.nfiles = sync.num_files_totals if self.consider_part_files \ else sync.num_files_totals - sync.num_files_parts - stats.osize = size if self.consider_part_files else size - sync.size_parts - stats.csize = csize if self.consider_part_files else csize - sync.csize_parts + stats.osize = sync.size_totals if self.consider_part_files \ + else sync.size_totals - sync.size_parts + stats.csize = sync.csize_totals if self.consider_part_files \ + else sync.csize_totals - sync.csize_parts stats.usize = unique_csize # the part files use same chunks as the full file return stats