Skip to content

Commit f334b06

Browse files
committed
Merge remote-tracking branch 'upstream/master'
* upstream/master: Gadgets/Coliru: Acquire Ace editor resources via https Headers: Fix definition of basic_ifstream Add Doxygen web tag file to HTML book archive Preprocess: rename fix_relative_link() to tranform_link() Preprocess: split fix_relative_link() Preprocess: fix edge case handling in has_class() Tests: add tests for has_class() Tests: add tests for convert_loader_name() Add script to run tests Preproces: move reusable components to separate folder Transform: Rename incorrectly named class Transform: Allow XML data to be transformed directly Transform: Move reusable components to separate folder Transform: use argparse for argument parsing Transform: Move code in transform scripts to main()
2 parents a43b378 + 64caa54 commit f334b06

21 files changed

+1009
-709
lines changed

commands/preprocess.py

Lines changed: 291 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,291 @@
1+
#!/usr/bin/env python3
2+
3+
# Copyright (C) 2011, 2012 Povilas Kanapickas <povilas@radix.lt>
4+
#
5+
# This file is part of cppreference-doc
6+
#
7+
# This program is free software: you can redistribute it and/or modify
8+
# it under the terms of the GNU General Public License as published by
9+
# the Free Software Foundation, either version 3 of the License, or
10+
# (at your option) any later version.
11+
#
12+
# This program is distributed in the hope that it will be useful,
13+
# but WITHOUT ANY WARRANTY; without even the implied warranty of
14+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15+
# GNU General Public License for more details.
16+
#
17+
# You should have received a copy of the GNU General Public License
18+
# along with this program. If not, see http://www.gnu.org/licenses/.
19+
20+
from datetime import datetime
21+
import fnmatch
22+
from lxml import etree
23+
import re
24+
import os
25+
import sys
26+
import shutil
27+
import urllib.parse
28+
from xml_utils import xml_escape, xml_unescape
29+
30+
def rmtree_if_exists(dir):
31+
if os.path.isdir(dir):
32+
shutil.rmtree(dir)
33+
34+
def move_dir_contents_to_dir(srcdir, dstdir):
35+
for fn in os.listdir(srcdir):
36+
shutil.move(os.path.join(srcdir, fn),
37+
os.path.join(dstdir, fn))
38+
39+
def rearrange_archive(root):
40+
# rearrange the archive. {root} here is output/reference
41+
42+
# before
43+
# {root}/en.cppreference.com/w/ : html
44+
# {root}/en.cppreference.com/mwiki/ : data
45+
# {root}/en.cppreference.com/ : data
46+
# ... (other languages)
47+
# {root}/upload.cppreference.com/mwiki/ : data
48+
49+
# after
50+
# {root}/common/ : all common data
51+
# {root}/en/ : html for en
52+
# ... (other languages)
53+
54+
data_path = os.path.join(root, 'common')
55+
rmtree_if_exists(data_path)
56+
shutil.move(os.path.join(root, 'upload.cppreference.com/mwiki'), data_path)
57+
shutil.rmtree(os.path.join(root, 'upload.cppreference.com'))
58+
59+
for lang in ["en"]:
60+
path = os.path.join(root, lang + ".cppreference.com/")
61+
src_html_path = path + "w/"
62+
src_data_path = path + "mwiki/"
63+
html_path = os.path.join(root, lang)
64+
65+
if os.path.isdir(src_html_path):
66+
shutil.move(src_html_path, html_path)
67+
68+
if os.path.isdir(src_data_path):
69+
# the skin files should be the same for all languages thus we
70+
# can merge everything
71+
move_dir_contents_to_dir(src_data_path, data_path)
72+
73+
# also copy the custom fonts
74+
shutil.copy(os.path.join(path, 'DejaVuSansMonoCondensed60.ttf'), data_path)
75+
shutil.copy(os.path.join(path, 'DejaVuSansMonoCondensed75.ttf'), data_path)
76+
# and the favicon
77+
shutil.copy(os.path.join(path, 'favicon.ico'), data_path)
78+
79+
# remove what's left
80+
shutil.rmtree(path)
81+
82+
# remove the XML source file
83+
for fn in fnmatch.filter(os.listdir(root), 'cppreference-export*.xml'):
84+
os.remove(os.path.join(root, fn))
85+
86+
def add_file_to_rename_map(rename_map, dir, fn, new_fn):
87+
path = os.path.join(dir, fn)
88+
if not os.path.isfile(path):
89+
print("ERROR: Not renaming '{0}' because path does not exist".format(path))
90+
return
91+
rename_map.append((dir, fn, new_fn))
92+
93+
# Converts complex URL to resources supplied by MediaWiki loader to a simplified name
94+
def convert_loader_name(fn):
95+
if re.search("modules=site&only=scripts", fn):
96+
return "site_scripts.js"
97+
elif re.search("modules=site&only=styles", fn):
98+
return "site_modules.css"
99+
elif re.search("modules=skins.*&only=scripts", fn):
100+
return "skin_scripts.js"
101+
elif re.search("modules=startup&only=scripts", fn):
102+
return "startup_scripts.js"
103+
elif re.search("modules=.*ext.*&only=styles", fn):
104+
return "ext.css"
105+
else:
106+
raise Exception('Loader file {0} does not match any known files'.format(fn))
107+
108+
def find_files_to_be_renamed(root):
109+
# Returns a rename map: array of tuples each of which contain three strings:
110+
# the directory the file resides in, the source and destination filenames.
111+
112+
# The rename map specifies files to be renamed in order to support them on
113+
# windows filesystems which don't support certain characters in file names
114+
rename_map = []
115+
116+
files_rename = [] # general files to be renamed
117+
files_loader = [] # files served by load.php. These should map to
118+
# consistent and short file names because we
119+
# modify some of them later in the pipeline
120+
121+
for dir, dirnames, filenames in os.walk(root):
122+
filenames_loader = set(fnmatch.filter(filenames, 'load.php[?]*'))
123+
# match any filenames with '?"*' characters
124+
filenames_rename = set(fnmatch.filter(filenames, '*[?"*]*'))
125+
126+
# don't process load.php files in general rename handler
127+
filenames_rename -= filenames_loader
128+
129+
for fn in filenames_loader:
130+
files_loader.append((dir, fn))
131+
for fn in filenames_rename:
132+
files_rename.append((dir, fn))
133+
134+
for dir,orig_fn in files_rename:
135+
fn = orig_fn
136+
fn = re.sub('\?.*', '', fn)
137+
fn = re.sub('"', '_q_', fn)
138+
fn = re.sub('\*', '_star_', fn)
139+
add_file_to_rename_map(rename_map, dir, orig_fn, fn)
140+
141+
# map loader names to more recognizable names
142+
for dir,fn in files_loader:
143+
new_fn = convert_loader_name(fn)
144+
add_file_to_rename_map(rename_map, dir, fn, new_fn)
145+
146+
# rename filenames that conflict on case-insensitive filesystems
147+
# TODO: perform this automatically
148+
add_file_to_rename_map(rename_map, os.path.join(root, 'en/cpp/numeric/math'), 'NAN.html', 'NAN.2.html')
149+
add_file_to_rename_map(rename_map, os.path.join(root, 'en/c/numeric/math'), 'NAN.html', 'NAN.2.html')
150+
return rename_map
151+
152+
def rename_files(rename_map):
153+
for dir, old_fn, new_fn in rename_map:
154+
src_path = os.path.join(dir, old_fn)
155+
dst_path = os.path.join(dir, new_fn)
156+
print("Renaming '{0}' to \n '{1}'".format(src_path, dst_path))
157+
shutil.move(src_path, dst_path)
158+
159+
def find_html_files(root):
160+
# find files that need to be preprocessed
161+
html_files = []
162+
for dir, dirnames, filenames in os.walk(root):
163+
for filename in fnmatch.filter(filenames, '*.html'):
164+
html_files.append(os.path.join(dir, filename))
165+
return html_files
166+
167+
def is_loader_link(target):
168+
if re.match('https?://[a-z]+\.cppreference\.com/mwiki/load\.php', target):
169+
return True
170+
return False
171+
172+
def transform_loader_link(target, file, root):
173+
# Absolute loader.php links need to be made relative
174+
abstarget = os.path.join(root, "common/" + convert_loader_name(target))
175+
return os.path.relpath(abstarget, os.path.dirname(file))
176+
177+
def is_external_link(target):
178+
if re.match('(ht|f)tps?://', target):
179+
return True
180+
return False
181+
182+
def trasform_relative_link(rename_map, target):
183+
target = urllib.parse.unquote(target)
184+
for dir,fn,new_fn in rename_map:
185+
target = target.replace(fn, new_fn)
186+
target = target.replace('../../upload.cppreference.com/mwiki/','../common/')
187+
target = target.replace('../mwiki/','../common/')
188+
target = re.sub('(\.php|\.css)\?.*', '\\1', target)
189+
target = urllib.parse.quote(target)
190+
target = target.replace('%23', '#')
191+
return target
192+
193+
# Transforms a link in the given file according to rename map.
194+
# target is the link to transform.
195+
# file is the path of the file the link came from.
196+
# root is the path to the root of the archive.
197+
def transform_link(rename_map, target, file, root):
198+
if is_loader_link(target):
199+
return transform_loader_link(target, file, root)
200+
201+
if is_external_link(target):
202+
return target
203+
204+
return trasform_relative_link(rename_map, target)
205+
206+
def has_class(el, classes_to_check):
207+
value = el.get('class')
208+
if value is None:
209+
return False
210+
classes = value.split(' ')
211+
for cl in classes_to_check:
212+
if cl != '' and cl in classes:
213+
return True
214+
return False
215+
216+
def preprocess_html_file(root, fn, rename_map):
217+
parser = etree.HTMLParser()
218+
html = etree.parse(fn, parser)
219+
220+
# remove non-printable elements
221+
for el in html.xpath('//*'):
222+
if has_class(el, ['noprint', 'editsection']) and el.get('id') != 'cpp-footer-base':
223+
el.getparent().remove(el)
224+
if el.get('id') == 'toc':
225+
el.getparent().remove(el)
226+
227+
# remove external links to unused resources
228+
for el in html.xpath('/html/head/link'):
229+
if el.get('rel') in [ 'alternate', 'search', 'edit', 'EditURI' ]:
230+
el.getparent().remove(el)
231+
elif el.get('rel') == 'shortcut icon':
232+
(head, tail) = os.path.split(el.get('href'))
233+
el.set('href', os.path.join(head, 'common', tail))
234+
235+
# remove Google Analytics scripts
236+
for el in html.xpath('/html/body/script'):
237+
if el.get('src') is not None and 'google-analytics.com/ga.js' in el.get('src'):
238+
el.getparent().remove(el)
239+
elif el.text is not None and ('google-analytics.com/ga.js' in el.text or 'pageTracker' in el.text):
240+
el.getparent().remove(el)
241+
242+
# make custom footer
243+
footer = html.xpath('//*[@id=\'footer\']')[0]
244+
for child in footer.getchildren():
245+
id = child.get('id')
246+
if id == 'cpp-navigation':
247+
items = child.find('ul')
248+
items.clear()
249+
250+
link = etree.SubElement(etree.SubElement(items, 'li'), 'a')
251+
url = re.sub('(..)/(.*)\\.html', 'http://\\1.cppreference.com/w/\\2', os.path.relpath(fn, root))
252+
url = re.sub('(.*)/index', '\\1/', url)
253+
link.set('href', url)
254+
link.text = 'Online version'
255+
256+
li = etree.SubElement(items, 'li')
257+
mtime = datetime.fromtimestamp(os.stat(fn).st_mtime);
258+
li.text = f"Offline version retrieved {mtime.isoformat(sep=' ', timespec='minutes')}."
259+
elif id == 'footer-info':
260+
pass
261+
else:
262+
footer.remove(child)
263+
264+
# apply changes to links caused by file renames
265+
for el in html.xpath('//*[@src or @href]'):
266+
if el.get('src') is not None:
267+
el.set('src', transform_link(rename_map, el.get('src'), fn, root))
268+
elif el.get('href') is not None:
269+
el.set('href', transform_link(rename_map, el.get('href'), fn, root))
270+
271+
for err in parser.error_log:
272+
print("HTML WARN: {0}".format(err))
273+
274+
html.write(fn, encoding='utf-8', method='html')
275+
276+
def preprocess_css_file(fn):
277+
f = open(fn, "r", encoding='utf-8')
278+
text = f.read()
279+
f.close()
280+
281+
# note that query string is not used in css files
282+
283+
text = text.replace('../DejaVuSansMonoCondensed60.ttf', 'DejaVuSansMonoCondensed60.ttf')
284+
text = text.replace('../DejaVuSansMonoCondensed75.ttf', 'DejaVuSansMonoCondensed75.ttf')
285+
286+
# QT Help viewer doesn't understand nth-child
287+
text = text.replace('nth-child(1)', 'first-child')
288+
289+
f = open(fn, "w", encoding='utf-8')
290+
f.write(text)
291+
f.close()

gadgets/coliru_compiler.js

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -545,10 +545,10 @@ window.jump_to_error = function(node, lineno) {
545545
var editors = [];
546546

547547
$.when(
548-
get_script_cached('http://d1n0x3qji82z53.cloudfront.net/src-min-noconflict/ace.js')
548+
get_script_cached('https://d1n0x3qji82z53.cloudfront.net/src-min-noconflict/ace.js')
549549
).done(function() {
550550
$.when(
551-
get_script_cached('http://d1n0x3qji82z53.cloudfront.net/src-min-noconflict/mode-c_cpp.js'),
551+
get_script_cached('https://d1n0x3qji82z53.cloudfront.net/src-min-noconflict/mode-c_cpp.js'),
552552
$.Deferred(function(deferred) {
553553
$(deferred.resolve);
554554
})

headers/fstream

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -101,7 +101,7 @@ typedef basic_fstream<wchar_t> wfstream;
101101
template <
102102
class CharT,
103103
class Traits = std::char_traits<CharT>
104-
> class basic_ifstream : public std::basic_ostream<CharT, Traits> {
104+
> class basic_ifstream : public std::basic_istream<CharT, Traits> {
105105
public:
106106
basic_ifstream();
107107
explicit basic_ifstream(const char* filename,

0 commit comments

Comments
 (0)