Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

use --json-lines mode for list archive results for better parsing of unusual filenames #885

Merged
merged 2 commits into from
Mar 1, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 6 additions & 7 deletions src/vorta/borg/list_archive.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,17 +13,16 @@ def finished_event(self, result):
self.result.emit(result)

@classmethod
def prepare(cls, profile):
def prepare(cls, profile, archive_name):
ret = super().prepare(profile)
if not ret['ok']:
return ret
else:
ret['ok'] = False # Set back to false, so we can do our own checks here.

cmd = ['borg', 'list', '--info', '--log-json', '--format', "{size:8d}{TAB}{mtime}{TAB}{path}{NL}"]
cmd.append(f'{profile.repo.url}')

ret['archive_name'] = archive_name
ret['cmd'] = [
'borg', 'list', '--info', '--log-json', '--json-lines',
'--format', "{size:8d}{TAB}{mtime}{TAB}{path}{NL}",
f'{profile.repo.url}::{archive_name}']
ret['ok'] = True
m3nu marked this conversation as resolved.
Show resolved Hide resolved
ret['cmd'] = cmd

return ret
4 changes: 1 addition & 3 deletions src/vorta/views/archive_tab.py
Original file line number Diff line number Diff line change
Expand Up @@ -376,13 +376,11 @@ def list_archive_action(self):
archive_cell = self.archiveTable.item(row_selected[0].row(), 4)
if archive_cell:
archive_name = archive_cell.text()
params = BorgListArchiveThread.prepare(profile)
params = BorgListArchiveThread.prepare(profile, archive_name)

if not params['ok']:
self._set_status(params['message'])
return
params['cmd'][-1] += f'::{archive_name}'
params['archive_name'] = archive_name
self._set_status('')
self._toggle_all_buttons(False)

Expand Down
25 changes: 16 additions & 9 deletions src/vorta/views/extract_dialog.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
import json
import os
from datetime import datetime

from PyQt5 import uic
from PyQt5.QtCore import Qt
Expand All @@ -20,21 +22,26 @@ def __init__(self, fs_data, archive):
nested_file_list = nested_dict()
self.selected = set()

def parse_line(line):
size, modified, full_path = line.split("\t")
size = int(size)
dir, name = os.path.split(full_path)

def parse_json_line(line):
data = json.loads(line)
size = data["size"]
# python >= 3.7
# modified = datetime.fromisoformat(data["mtime"]).ctime()
# python < 3.7
try:
modified = datetime.strptime(data["mtime"], "%Y-%m-%dT%H:%M:%S.%f").ctime()
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

How about using the date string as-is? Else this could get slow for many entries. (hundreds of thousands of files are common) Back when I added the feature, I tried hard to keep it fast.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Actually it's still fast even when parsing the date. Tried with 40k files.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't recall now, but I think I was trying to get it into a datetime object so it would be easier to manipulate how it was displayed on the gui, if ever needed.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The time string from json is a bit ugly with MS included. So let's just use ctime() for now. Maybe in the future there could be a setting for a default time format or something.

except ValueError:
modified = datetime.strptime(data["mtime"], "%Y-%m-%dT%H:%M:%S").ctime()
dirpath, name = os.path.split(data["path"])
# add to nested dict of folders to find nested dirs.
d = get_dict_from_list(nested_file_list, dir.split("/"))
d = get_dict_from_list(nested_file_list, dirpath.split("/"))
if name not in d:
d[name] = {}

return size, modified, name, dir
return size, modified, name, dirpath

for line in fs_data.split("\n"):
try:
files_with_attributes.append(parse_line(line))
files_with_attributes.append(parse_json_line(line))
except ValueError:
pass

Expand Down
Loading