-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathfiletracker.py
executable file
·126 lines (98 loc) · 2.99 KB
/
filetracker.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
#!/usr/bin/env python3
#
# Copyright (C) 2018 Jiun Y. Yen
#
# This is a free program. You can redistribute and/or modify it under the terms of the GNU General Public License as
# published by the Free Software Fundation
#
# This is distributed with hope of being useful. No warranty of any kind what-so-ever.
#
# Have fun!
#
# Instruction:
# 1. Copy this to where ever you want
# 2. Enter $ python3 filetracker.py <path of directory containing files to track> <filename to keep track of files>
# i.e.
# $ python3 filetracker.py foo/ bar.txt
#
# Imports
import os
import sys
# Functions
def compile_files(p_dir):
"""
Compile unique file names and duplicated file names
:param p_dir: String, path of directory containing new files
:return: A list, dir_paths, of directory paths, and a list, file_names, of file names with matching indices to
dir_paths
"""
dir_paths = []
file_names = []
for d,_,f in os.walk(p_dir):
dir_paths += [d]*len(f)
file_names += f
return(dir_paths,file_names)
def show_duplicated_files(dir_paths, file_names, p_dir):
"""
Display duplicated files and return the unique ones
:param dir_paths: List, paths of directories that contains the files
:param file_names: List, file names
:param p_dir: String, path of the directory currently investigating
:return: A list, dir_paths, of directory paths, and a list, file_names, of file names with matching indices to
dir_paths with no duplicated file names
"""
data = ['%s>>>%s'%(x,y) for x,y in zip(file_names,dir_paths)]
data.sort()
file_names = []
dir_paths = []
f0,d0 = data.pop(0).split('>>>')
file_names.append(f0)
dir_paths.append(d0)
while data:
f1,d1 = data.pop(0).split('>>>')
if f0.upper() == f1.upper():
print('Duplicated: %s @ %s'%(f0,p_dir))
else:
file_names.append(f1)
dir_paths.append(d1)
f0 = f1
return(dir_paths,file_names)
def write_files(dir_paths, file_names, p_out):
"""
Append to the file tracking text file
:param dir_paths: List, paths of directories that contains the files
:param file_names: List, file names
:param p_out: String, path of the text file to keep track of all the files
:return: Nothing
"""
with open(p_out, 'w+') as fi:
for d,f in zip(dir_paths,file_names):
fi.write('%s\t%s\n'%(f,d))
return()
def read_list(p_in):
"""
Read the file tracking text file
:param p_in: String, path of the text file to keep track of all the files
:return: A list, dir_paths, of directory paths, and a list, file_names, of file names with matching indices to
dir_paths
"""
with open(p_in, 'r') as f:
file_names = []
dir_paths = []
for l in f:
tmp = l.strip().split('\t')
file_names.append(tmp[0])
dir_paths.append(tmp[1])
return(dir_paths,file_names)
# Main
if __name__ == "__main__":
p_source = sys.argv[1]
p_list = sys.argv[2]
ds,fs = compile_files(p_source)
if os.path.isfile(p_list):
ds0,fs0 = read_list(p_list)
ds += ds0
fs += fs0
ds,fs = show_duplicated_files(ds,fs,p_source)
write_files(ds,fs,p_list)
print('File count: %d'%len(fs))