Skip to content

Commit

Permalink
Merge pull request #4539 from ThomasWaldmann/fix-extract-hardlinks
Browse files Browse the repository at this point in the history
slave hardlinks extraction issue, see #4350 (master)
  • Loading branch information
ThomasWaldmann authored May 11, 2019
2 parents 70c049e + f33f318 commit 839e92f
Show file tree
Hide file tree
Showing 3 changed files with 51 additions and 16 deletions.
42 changes: 36 additions & 6 deletions src/borg/archive.py
Original file line number Diff line number Diff line change
Expand Up @@ -248,7 +248,7 @@ def __init__(self, repository, key):
self.repository = repository
self.key = key

def unpack_many(self, ids, filter=None, preload=False):
def unpack_many(self, ids, filter=None, partial_extract=False, preload=False, hardlink_masters=None):
"""
Return iterator of items.
Expand All @@ -265,12 +265,40 @@ def unpack_many(self, ids, filter=None, preload=False):
for item in items:
if 'chunks' in item:
item.chunks = [ChunkListEntry(*e) for e in item.chunks]

def preload(chunks):
self.repository.preload([c.id for c in chunks])

if filter:
items = [item for item in items if filter(item)]

if preload:
for item in items:
if 'chunks' in item:
self.repository.preload([c.id for c in item.chunks])
if filter and partial_extract:
# if we do only a partial extraction, it gets a bit
# complicated with computing the preload items: if a hardlink master item is not
# selected (== not extracted), we will still need to preload its chunks if a
# corresponding hardlink slave is selected (== is extracted).
# due to a side effect of the filter() call, we now have hardlink_masters dict populated.
masters_preloaded = set()
for item in items:
if 'chunks' in item: # regular file, maybe a hardlink master
preload(item.chunks)
# if this is a hardlink master, remember that we already preloaded it:
if 'source' not in item and hardlinkable(item.mode) and item.get('hardlink_master', True):
masters_preloaded.add(item.path)
elif 'source' in item and hardlinkable(item.mode): # hardlink slave
source = item.source
if source not in masters_preloaded:
# we only need to preload *once* (for the 1st selected slave)
chunks, _ = hardlink_masters[source]
preload(chunks)
masters_preloaded.add(source)
else:
# easy: we do not have a filter, thus all items are selected, thus we need to preload all chunks.
for item in items:
if 'chunks' in item:
preload(item.chunks)

for item in items:
yield item

Expand Down Expand Up @@ -486,8 +514,10 @@ def item_filter(self, item, filter=None):
return False
return filter(item) if filter else True

def iter_items(self, filter=None, preload=False):
for item in self.pipeline.unpack_many(self.metadata.items, preload=preload,
def iter_items(self, filter=None, partial_extract=False, preload=False, hardlink_masters=None):
assert not (filter and partial_extract and preload) or hardlink_masters is not None
for item in self.pipeline.unpack_many(self.metadata.items, partial_extract=partial_extract,
preload=preload, hardlink_masters=hardlink_masters,
filter=lambda item: self.item_filter(item, filter)):
yield item

Expand Down
5 changes: 3 additions & 2 deletions src/borg/archiver.py
Original file line number Diff line number Diff line change
Expand Up @@ -769,7 +769,8 @@ def peek_and_store_hardlink_masters(item, matched):
else:
pi = None

for item in archive.iter_items(filter, preload=True):
for item in archive.iter_items(filter, partial_extract=partial_extract,
preload=True, hardlink_masters=hardlink_masters):
orig_path = item.path
if strip_components:
item.path = os.sep.join(orig_path.split(os.sep)[strip_components:])
Expand Down Expand Up @@ -1011,7 +1012,7 @@ def item_to_tarinfo(item, original_path):
return None, stream
return tarinfo, stream

for item in archive.iter_items(filter, preload=True):
for item in archive.iter_items(filter, preload=True, hardlink_masters=hardlink_masters):
orig_path = item.path
if strip_components:
item.path = os.sep.join(orig_path.split(os.sep)[strip_components:])
Expand Down
20 changes: 12 additions & 8 deletions src/borg/testsuite/archiver.py
Original file line number Diff line number Diff line change
Expand Up @@ -824,7 +824,18 @@ def test_mount_hardlinks(self):
assert open('input/dir1/subdir/hardlink', 'rb').read() == b'123456'

@requires_hardlinks
def test_extract_hardlinks(self):
def test_extract_hardlinks1(self):
self._extract_hardlinks_setup()
with changedir('output'):
self.cmd('extract', self.repository_location + '::test')
assert os.stat('input/source').st_nlink == 4
assert os.stat('input/abba').st_nlink == 4
assert os.stat('input/dir1/hardlink').st_nlink == 4
assert os.stat('input/dir1/subdir/hardlink').st_nlink == 4
assert open('input/dir1/subdir/hardlink', 'rb').read() == b'123456'

@requires_hardlinks
def test_extract_hardlinks2(self):
self._extract_hardlinks_setup()
with changedir('output'):
self.cmd('extract', self.repository_location + '::test', '--strip-components', '2')
Expand All @@ -840,13 +851,6 @@ def test_extract_hardlinks(self):
assert open('input/dir1/subdir/hardlink', 'rb').read() == b'123456'
assert os.stat('input/dir1/aaaa').st_nlink == 2
assert os.stat('input/dir1/source2').st_nlink == 2
with changedir('output'):
self.cmd('extract', self.repository_location + '::test')
assert os.stat('input/source').st_nlink == 4
assert os.stat('input/abba').st_nlink == 4
assert os.stat('input/dir1/hardlink').st_nlink == 4
assert os.stat('input/dir1/subdir/hardlink').st_nlink == 4
assert open('input/dir1/subdir/hardlink', 'rb').read() == b'123456'

def test_extract_include_exclude(self):
self.cmd('init', '--encryption=repokey', self.repository_location)
Expand Down

0 comments on commit 839e92f

Please sign in to comment.