Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Show backup size with excludes applied #961

Merged
merged 6 commits into from
Oct 18, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
102 changes: 92 additions & 10 deletions src/vorta/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,26 +30,97 @@
_network_status_monitor = None


# copied from https://github.com/borgbackup/borg/blob/master/src/borg/shellpattern.py
def pattern_to_regex(pat, match_end=r"\Z"):
"""Translate a shell-style pattern to a regular expression.
The pattern may include ``**<sep>`` (<sep> stands for the platform-specific path separator; "/" on POSIX systems)
for matching zero or more directory levels and "*" for matching zero or more arbitrary characters with the exception
of any path separator. Wrap meta-characters in brackets for a literal match (i.e. "[?]" to match the literal
character "?").
Using match_end=regex one can give a regular expression that is used to match after the regex that is generated from
the pattern. The default is to match the end of the string.
This function is derived from the "fnmatch" module distributed with the Python standard library.
Copyright (C) 2001-2016 Python Software Foundation. All rights reserved.
TODO: support {alt1,alt2} shell-style alternatives
"""
sep = os.path.sep
n = len(pat)
i = 0
res = ""

while i < n:
c = pat[i]
i += 1

if c == "*":
if i + 1 < n and pat[i] == "*" and pat[i + 1] == sep:
# **/ == wildcard for 0+ full (relative) directory names with trailing slashes; the forward slash stands
# for the platform-specific path separator
res += r"(?:[^\%s]*\%s)*" % (sep, sep)
i += 2
else:
# * == wildcard for name parts (does not cross path separator)
res += r"[^\%s]*" % sep
elif c == "?":
# ? == any single character excluding path separator
res += r"[^\%s]" % sep
elif c == "[":
j = i
if j < n and pat[j] == "!":
j += 1
if j < n and pat[j] == "]":
j += 1
while j < n and pat[j] != "]":
j += 1
if j >= n:
res += "\\["
else:
stuff = pat[i:j].replace("\\", "\\\\")
i = j + 1
if stuff[0] == "!":
stuff = "^" + stuff[1:]
elif stuff[0] == "^":
stuff = "\\" + stuff
res += "[%s]" % stuff
else:
res += re.escape(c)

return "(?ms)" + res + match_end


class FilePathInfoAsync(QThread):
signal = pyqtSignal(str, str, str)

def __init__(self, path):
def __init__(self, path, exclude_patterns_str):
self.path = path
QThread.__init__(self)
self.exiting = False
self.exclude_patterns = []
for _line in (exclude_patterns_str or '').splitlines():
line = _line.strip()
if line != '':
self.exclude_patterns.append(line)
# translate exclude patterns to regular expressions
self.exclude_patterns_re = [
pattern_to_regex(pattern, '')
for pattern in self.exclude_patterns
]

def run(self):
# logger.info("running thread to get path=%s...", self.path)
self.files_count = 0
self.size, self.files_count = get_path_datasize(self.path)
self.size, self.files_count = get_path_datasize(
self.path,
self.exclude_patterns_re
)
self.signal.emit(self.path, str(self.size), str(self.files_count))


def get_directory_size(dir_path):
def get_directory_size(dir_path, exclude_patterns_re):
''' Get number of files only and total size in bytes from a path.
Based off https://stackoverflow.com/a/17936789 '''
data_size = 0
data_size_filtered = 0
seen = set()
seen_filtered = set()

for curr_path, _, file_names in os.walk(dir_path):
for file_name in file_names:
Expand All @@ -59,17 +130,25 @@ def get_directory_size(dir_path):
if os.path.islink(file_path):
continue

is_excluded = False
for pattern in exclude_patterns_re:
if re.match(pattern, file_path) is not None:
is_excluded = True
break

try:
stat = os.stat(file_path)
if stat.st_ino not in seen: # Visit each file only once
seen.add(stat.st_ino)
data_size += stat.st_size
if not is_excluded:
data_size_filtered += stat.st_size
seen_filtered.add(stat.st_ino)
except (FileNotFoundError, PermissionError):
continue

files_count = len(seen)
files_count_filtered = len(seen_filtered)

return data_size, files_count
return data_size_filtered, files_count_filtered


def get_network_status_monitor():
Expand All @@ -80,12 +159,15 @@ def get_network_status_monitor():
return _network_status_monitor


def get_path_datasize(path):
def get_path_datasize(path, exclude_patterns_re):
file_info = QFileInfo(path)
data_size = 0

if file_info.isDir():
data_size, files_count = get_directory_size(file_info.absoluteFilePath())
data_size, files_count = get_directory_size(
file_info.absoluteFilePath(),
exclude_patterns_re
)
# logger.info("path (folder) %s %u elements size now=%u (%s)",
# file_info.absoluteFilePath(), files_count, data_size, pretty_bytes(data_size))
else:
Expand Down
2 changes: 1 addition & 1 deletion src/vorta/views/source_tab.py
Original file line number Diff line number Diff line change
Expand Up @@ -100,7 +100,7 @@ def update_path_info(self, index_row):
self.sourceFilesWidget.item(index_row, SourceColumn.Type).setText(self.tr("Calculating..."))
self.sourceFilesWidget.item(index_row, SourceColumn.Size).setText(self.tr("Calculating..."))
self.sourceFilesWidget.item(index_row, SourceColumn.FilesCount).setText(self.tr("Calculating..."))
getDir = FilePathInfoAsync(path)
getDir = FilePathInfoAsync(path, self.profile().exclude_patterns)
getDir.signal.connect(self.set_path_info)
getDir.setObjectName(path)
self.updateThreads.append(getDir) # this is ugly, is there a better way to keep the thread object?
Expand Down