@@ -84,13 +84,6 @@ def rearrange_archive(root):
84
84
for fn in fnmatch .filter (os .listdir (root ), 'cppreference-export*.xml' ):
85
85
os .remove (os .path .join (root , fn ))
86
86
87
- def add_file_to_rename_map (rename_map , dir , fn , new_fn ):
88
- path = os .path .join (dir , fn )
89
- if not os .path .isfile (path ):
90
- print ("ERROR: Not renaming '{0}' because path does not exist" .format (path ))
91
- return
92
- rename_map .append ((dir , fn , new_fn ))
93
-
94
87
# Converts complex URL to resources supplied by MediaWiki loader to a simplified name
95
88
def convert_loader_name (fn ):
96
89
if "modules=site&only=scripts" in fn :
@@ -106,56 +99,53 @@ def convert_loader_name(fn):
106
99
else :
107
100
raise Exception ('Loader file {0} does not match any known files' .format (fn ))
108
101
109
- def find_files_to_be_renamed (root ):
110
- # Returns a rename map: array of tuples each of which contain three strings:
111
- # the directory the file resides in, the source and destination filenames.
102
+ def build_rename_map (root ):
103
+ # Returns a rename map: a map from old to new file name
104
+ loader = re .compile (r'load\.php\?.*' )
105
+ query = re .compile (r'\?.*' )
106
+ result = dict ()
112
107
113
- # The rename map specifies files to be renamed in order to support them on
114
- # windows filesystems which don't support certain characters in file names
115
- rename_map = []
108
+ # find files with invalid names -> rename all occurrences
109
+ for fn in set (fn for _ , _ , filenames in os .walk (root ) for fn in filenames ):
110
+ if loader .match (fn ):
111
+ result [fn ] = convert_loader_name (fn )
116
112
117
- files_rename = [] # general files to be renamed
118
- files_loader = [] # files served by load.php. These should map to
119
- # consistent and short file names because we
120
- # modify some of them later in the pipeline
113
+ elif any ((c in fn ) for c in '?*"' ):
114
+ new_fn = query .sub ('' , fn )
115
+ new_fn = new_fn .replace ('"' , '_q_' )
116
+ new_fn = new_fn .replace ('*' , '_star_' )
117
+ result [fn ] = new_fn
121
118
119
+ # find files that conflict on case-insensitive filesystems
122
120
for dir , _ , filenames in os .walk (root ):
123
- filenames_loader = set (fnmatch .filter (filenames , 'load.php[?]*' ))
124
- # match any filenames with '?"*' characters
125
- filenames_rename = set (fnmatch .filter (filenames , '*[?"*]*' ))
126
-
127
- # don't process load.php files in general rename handler
128
- filenames_rename -= filenames_loader
129
-
130
- for fn in filenames_loader :
131
- files_loader .append ((dir , fn ))
132
- for fn in filenames_rename :
133
- files_rename .append ((dir , fn ))
134
-
135
- for dir , orig_fn in files_rename :
136
- fn = orig_fn
137
- fn = re .sub (r'\?.*' , '' , fn )
138
- fn = fn .replace ('"' , '_q_' )
139
- fn = fn .replace ('*' , '_star_' )
140
- add_file_to_rename_map (rename_map , dir , orig_fn , fn )
141
-
142
- # map loader names to more recognizable names
143
- for dir , fn in files_loader :
144
- new_fn = convert_loader_name (fn )
145
- add_file_to_rename_map (rename_map , dir , fn , new_fn )
146
-
147
- # rename filenames that conflict on case-insensitive filesystems
148
- # TODO: perform this automatically
149
- add_file_to_rename_map (rename_map , os .path .join (root , 'en/cpp/numeric/math' ), 'NAN.html' , 'NAN.2.html' )
150
- add_file_to_rename_map (rename_map , os .path .join (root , 'en/c/numeric/math' ), 'NAN.html' , 'NAN.2.html' )
151
- return rename_map
152
-
153
- def rename_files (rename_map ):
154
- for dir , old_fn , new_fn in rename_map :
121
+ seen = dict ()
122
+ for fn in (result .get (s , s ) for s in filenames ):
123
+ low = fn .lower ()
124
+ num = seen .setdefault (low , 0 )
125
+ if num > 0 :
126
+ name , ext = os .path .splitext (fn )
127
+ # add file with its path -> only rename that occurrence
128
+ result [os .path .join (dir , fn )] = "{}.{}{}" .format (name , num + 1 , ext )
129
+ seen [low ] += 1
130
+
131
+ return result
132
+
133
+ def rename_files (root , rename_map ):
134
+ for dir , old_fn in ((dir , fn ) for dir , _ , filenames in os .walk (root ) for fn in filenames ):
155
135
src_path = os .path .join (dir , old_fn )
156
- dst_path = os .path .join (dir , new_fn )
157
- print ("Renaming '{0}' to \n '{1}'" .format (src_path , dst_path ))
158
- shutil .move (src_path , dst_path )
136
+
137
+ new_fn = rename_map .get (old_fn )
138
+ if new_fn :
139
+ # look for case conflict of the renamed file
140
+ new_path = os .path .join (dir , new_fn )
141
+ new_fn = rename_map .get (new_path , new_fn )
142
+ else :
143
+ # original filename unchanged, look for case conflict
144
+ new_fn = rename_map .get (src_path )
145
+ if new_fn :
146
+ dst_path = os .path .join (dir , new_fn )
147
+ print ("Renaming {0}\n to {1}" .format (src_path , dst_path ))
148
+ shutil .move (src_path , dst_path )
159
149
160
150
def find_html_files (root ):
161
151
# find files that need to be preprocessed
@@ -172,7 +162,7 @@ def is_loader_link(target):
172
162
173
163
def transform_loader_link (target , file , root ):
174
164
# Absolute loader.php links need to be made relative
175
- abstarget = os .path .join (root , "common/" + convert_loader_name (target ))
165
+ abstarget = os .path .join (root , "common" , convert_loader_name (target ))
176
166
return os .path .relpath (abstarget , os .path .dirname (file ))
177
167
178
168
def is_ranges_placeholder (target ):
@@ -201,20 +191,33 @@ def transform_ranges_placeholder(target, file, root):
201
191
return os .path .relpath (abstarget , os .path .dirname (file ))
202
192
203
193
def is_external_link (target ):
204
- if re .match ('(ht|f)tps?://' , target ):
205
- return True
206
- return False
194
+ url = urllib .parse .urlparse (target )
195
+ return url .scheme != '' or url .netloc != ''
196
+
197
+ def trasform_relative_link (rename_map , target , file ):
198
+ # urlparse returns (scheme, host, path, params, query, fragment)
199
+ _ , _ , path , params , _ , fragment = urllib .parse .urlparse (target )
200
+ assert params == ''
201
+
202
+ path = urllib .parse .unquote (path )
203
+ path = path .replace ('../../upload.cppreference.com/mwiki/' ,'../common/' )
204
+ path = path .replace ('../mwiki/' ,'../common/' )
205
+
206
+ dir , fn = os .path .split (path )
207
+ new_fn = rename_map .get (fn )
208
+ if new_fn :
209
+ # look for case conflict of the renamed file
210
+ abstarget = os .path .normpath (os .path .join (os .path .dirname (file ), dir , new_fn ))
211
+ new_fn = rename_map .get (abstarget , new_fn )
212
+ else :
213
+ # original filename unchanged, look for case conflict
214
+ abstarget = os .path .normpath (os .path .join (os .path .dirname (file ), path ))
215
+ new_fn = rename_map .get (abstarget )
216
+ if new_fn :
217
+ path = os .path .join (dir , new_fn )
207
218
208
- def trasform_relative_link (rename_map , target ):
209
- target = urllib .parse .unquote (target )
210
- for _ , fn , new_fn in rename_map :
211
- target = target .replace (fn , new_fn )
212
- target = target .replace ('../../upload.cppreference.com/mwiki/' ,'../common/' )
213
- target = target .replace ('../mwiki/' ,'../common/' )
214
- target = re .sub (r'(\.php|\.css)\?.*' , r'\1' , target )
215
- target = urllib .parse .quote (target )
216
- target = target .replace ('%23' , '#' )
217
- return target
219
+ path = urllib .parse .quote (path )
220
+ return urllib .parse .urlunparse (('' , '' , path , params , '' , fragment ))
218
221
219
222
# Transforms a link in the given file according to rename map.
220
223
# target is the link to transform.
@@ -230,7 +233,7 @@ def transform_link(rename_map, target, file, root):
230
233
if is_external_link (target ):
231
234
return target
232
235
233
- return trasform_relative_link (rename_map , target )
236
+ return trasform_relative_link (rename_map , target , file )
234
237
235
238
def has_class (el , * classes_to_check ):
236
239
value = el .get ('class' )
0 commit comments