diff --git a/.gitignore b/.gitignore index d34ed114972c5..87fec41415272 100644 --- a/.gitignore +++ b/.gitignore @@ -36,6 +36,7 @@ __pycache__/ /src/libcore/unicode/Scripts.txt /src/libcore/unicode/SpecialCasing.txt /src/libcore/unicode/UnicodeData.txt +/src/libcore/unicode/downloaded /stage[0-9]+/ /target target/ diff --git a/Cargo.lock b/Cargo.lock index afa86eb211184..8401d2d6c9e9f 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2456,7 +2456,6 @@ dependencies = [ "bitflags 1.0.4 (registry+https://github.com/rust-lang/crates.io-index)", "byteorder 1.2.7 (registry+https://github.com/rust-lang/crates.io-index)", "chalk-engine 0.9.0 (registry+https://github.com/rust-lang/crates.io-index)", - "flate2 1.0.6 (registry+https://github.com/rust-lang/crates.io-index)", "fmt_macros 0.0.0", "graphviz 0.0.0", "jobserver 0.1.13 (registry+https://github.com/rust-lang/crates.io-index)", @@ -2479,7 +2478,6 @@ dependencies = [ "smallvec 0.6.10 (registry+https://github.com/rust-lang/crates.io-index)", "syntax 0.0.0", "syntax_pos 0.0.0", - "tempfile 3.0.5 (registry+https://github.com/rust-lang/crates.io-index)", ] [[package]] @@ -2711,7 +2709,6 @@ name = "rustc_apfloat" version = "0.0.0" dependencies = [ "bitflags 1.0.4 (registry+https://github.com/rust-lang/crates.io-index)", - "rustc_cratesio_shim 0.0.0", "smallvec 0.6.10 (registry+https://github.com/rust-lang/crates.io-index)", ] @@ -2749,6 +2746,7 @@ dependencies = [ "num_cpus 1.8.0 (registry+https://github.com/rust-lang/crates.io-index)", "rustc-demangle 0.1.15 (registry+https://github.com/rust-lang/crates.io-index)", "rustc_llvm 0.0.0", + "tempfile 3.0.5 (registry+https://github.com/rust-lang/crates.io-index)", ] [[package]] @@ -2795,15 +2793,6 @@ dependencies = [ "syntax_pos 0.0.0", ] -[[package]] -name = "rustc_cratesio_shim" -version = "0.0.0" -dependencies = [ - "bitflags 1.0.4 (registry+https://github.com/rust-lang/crates.io-index)", - "log 0.4.6 (registry+https://github.com/rust-lang/crates.io-index)", - "unicode-width 0.1.5 (registry+https://github.com/rust-lang/crates.io-index)", -] - [[package]] name = "rustc_data_structures" version = "0.0.0" @@ -2819,7 +2808,6 @@ dependencies = [ "rustc-hash 1.0.1 (registry+https://github.com/rust-lang/crates.io-index)", "rustc-rayon 0.2.0 (registry+https://github.com/rust-lang/crates.io-index)", "rustc-rayon-core 0.2.0 (registry+https://github.com/rust-lang/crates.io-index)", - "rustc_cratesio_shim 0.0.0", "serialize 0.0.0", "smallvec 0.6.10 (registry+https://github.com/rust-lang/crates.io-index)", "stable_deref_trait 1.1.0 (registry+https://github.com/rust-lang/crates.io-index)", @@ -2868,7 +2856,6 @@ dependencies = [ "annotate-snippets 0.5.0 (registry+https://github.com/rust-lang/crates.io-index)", "atty 0.2.11 (registry+https://github.com/rust-lang/crates.io-index)", "log 0.4.6 (registry+https://github.com/rust-lang/crates.io-index)", - "rustc_cratesio_shim 0.0.0", "rustc_data_structures 0.0.0", "serialize 0.0.0", "syntax_pos 0.0.0", @@ -3098,7 +3085,6 @@ version = "0.0.0" dependencies = [ "bitflags 1.0.4 (registry+https://github.com/rust-lang/crates.io-index)", "log 0.4.6 (registry+https://github.com/rust-lang/crates.io-index)", - "rustc_cratesio_shim 0.0.0", "rustc_data_structures 0.0.0", "serialize 0.0.0", "syntax_pos 0.0.0", diff --git a/src/libarena/Cargo.toml b/src/libarena/Cargo.toml index aa1bf38b99597..2643912f6d7d6 100644 --- a/src/libarena/Cargo.toml +++ b/src/libarena/Cargo.toml @@ -7,7 +7,6 @@ edition = "2018" [lib] name = "arena" path = "lib.rs" -crate-type = ["dylib"] [dependencies] rustc_data_structures = { path = "../librustc_data_structures" } diff --git a/src/libcore/unicode/unicode.py b/src/libcore/unicode/unicode.py index ae356c3ff4459..a0539cd9ca9b6 100755 --- a/src/libcore/unicode/unicode.py +++ b/src/libcore/unicode/unicode.py @@ -1,147 +1,350 @@ #!/usr/bin/env python -# This script uses the following Unicode tables: -# - DerivedCoreProperties.txt -# - DerivedNormalizationProps.txt -# - EastAsianWidth.txt -# - auxiliary/GraphemeBreakProperty.txt -# - PropList.txt -# - ReadMe.txt -# - Scripts.txt -# - UnicodeData.txt -# -# Since this should not require frequent updates, we just store this -# out-of-line and check the tables.rs file into git. +""" +Regenerate Unicode tables (tables.rs). +""" -import fileinput, re, os, sys, operator, math, datetime +# This script uses the Unicode tables as defined +# in the UnicodeFiles class. -# The directory in which this file resides. -fdir = os.path.dirname(os.path.realpath(__file__)) + "/" +# Since this should not require frequent updates, we just store this +# out-of-line and check the tables.rs file into git. -preamble = ''' +# Note that the "curl" program is required for operation. +# This script is compatible with Python 2.7 and 3.x. + +import argparse +import datetime +import fileinput +import itertools +import os +import re +import textwrap +import subprocess + +from collections import defaultdict, namedtuple + +try: + # Python 3 + from itertools import zip_longest + from io import StringIO +except ImportError: + # Python 2 compatibility + zip_longest = itertools.izip_longest + from StringIO import StringIO + +try: + # Completely optional type hinting + # (Python 2 compatible using comments, + # see: https://mypy.readthedocs.io/en/latest/python2.html) + # This is very helpful in typing-aware IDE like PyCharm. + from typing import Any, Callable, Dict, Iterable, Iterator, List, Optional, Set, Tuple +except ImportError: + pass + + +# We don't use enum.Enum because of Python 2.7 compatibility. +class UnicodeFiles(object): + # ReadMe does not contain any Unicode data, we + # only use it to extract versions. + README = "ReadMe.txt" + + DERIVED_CORE_PROPERTIES = "DerivedCoreProperties.txt" + DERIVED_NORMALIZATION_PROPS = "DerivedNormalizationProps.txt" + PROPS = "PropList.txt" + SCRIPTS = "Scripts.txt" + SPECIAL_CASING = "SpecialCasing.txt" + UNICODE_DATA = "UnicodeData.txt" + + +# The order doesn't really matter (Python < 3.6 won't preserve it), +# we only want to aggregate all the file names. +ALL_UNICODE_FILES = tuple( + value for name, value in UnicodeFiles.__dict__.items() + if not name.startswith("_") +) + +assert len(ALL_UNICODE_FILES) == 7, "Unexpected number of unicode files" + +# The directory this file is located in. +THIS_DIR = os.path.dirname(os.path.realpath(__file__)) + +# Where to download the Unicode data. The downloaded files +# will be placed in sub-directories named after Unicode version. +FETCH_DIR = os.path.join(THIS_DIR, "downloaded") + +FETCH_URL_LATEST = "ftp://ftp.unicode.org/Public/UNIDATA/{filename}" +FETCH_URL_VERSION = "ftp://ftp.unicode.org/Public/{version}/ucd/{filename}" + +PREAMBLE = """\ // NOTE: The following code was generated by "./unicode.py", do not edit directly #![allow(missing_docs, non_upper_case_globals, non_snake_case)] use unicode::version::UnicodeVersion; use unicode::bool_trie::{{BoolTrie, SmallBoolTrie}}; -'''.format(year = datetime.datetime.now().year) +""".format(year=datetime.datetime.now().year) # Mapping taken from Table 12 from: # http://www.unicode.org/reports/tr44/#General_Category_Values -expanded_categories = { - 'Lu': ['LC', 'L'], 'Ll': ['LC', 'L'], 'Lt': ['LC', 'L'], - 'Lm': ['L'], 'Lo': ['L'], - 'Mn': ['M'], 'Mc': ['M'], 'Me': ['M'], - 'Nd': ['N'], 'Nl': ['N'], 'No': ['N'], - 'Pc': ['P'], 'Pd': ['P'], 'Ps': ['P'], 'Pe': ['P'], - 'Pi': ['P'], 'Pf': ['P'], 'Po': ['P'], - 'Sm': ['S'], 'Sc': ['S'], 'Sk': ['S'], 'So': ['S'], - 'Zs': ['Z'], 'Zl': ['Z'], 'Zp': ['Z'], - 'Cc': ['C'], 'Cf': ['C'], 'Cs': ['C'], 'Co': ['C'], 'Cn': ['C'], +EXPANDED_CATEGORIES = { + "Lu": ["LC", "L"], "Ll": ["LC", "L"], "Lt": ["LC", "L"], + "Lm": ["L"], "Lo": ["L"], + "Mn": ["M"], "Mc": ["M"], "Me": ["M"], + "Nd": ["N"], "Nl": ["N"], "No": ["N"], + "Pc": ["P"], "Pd": ["P"], "Ps": ["P"], "Pe": ["P"], + "Pi": ["P"], "Pf": ["P"], "Po": ["P"], + "Sm": ["S"], "Sc": ["S"], "Sk": ["S"], "So": ["S"], + "Zs": ["Z"], "Zl": ["Z"], "Zp": ["Z"], + "Cc": ["C"], "Cf": ["C"], "Cs": ["C"], "Co": ["C"], "Cn": ["C"], } -# these are the surrogate codepoints, which are not valid rust characters -surrogate_codepoints = (0xd800, 0xdfff) +# This is the (inclusive) range of surrogate codepoints. +# These are not valid Rust characters. +SURROGATE_CODEPOINTS_RANGE = (0xd800, 0xdfff) + +UnicodeData = namedtuple( + "UnicodeData", ( + # Conversions: + "to_upper", "to_lower", "to_title", + + # Decompositions: canonical decompositions, compatibility decomp + "canon_decomp", "compat_decomp", + + # Grouped: general categories and combining characters + "general_categories", "combines", + ) +) + +UnicodeVersion = namedtuple( + "UnicodeVersion", ("major", "minor", "micro", "as_str") +) + + +def fetch_files(version=None): + # type: (str) -> UnicodeVersion + """ + Fetch all the Unicode files from unicode.org. + + This will use cached files (stored in `FETCH_DIR`) if they exist, + creating them if they don't. In any case, the Unicode version + is always returned. + + :param version: The desired Unicode version, as string. + (If None, defaults to latest final release available, + querying the unicode.org service). + """ + have_version = check_stored_version(version) + if have_version: + return have_version + + if version: + # Check if the desired version exists on the server. + get_fetch_url = lambda name: FETCH_URL_VERSION.format(version=version, filename=name) + else: + # Extract the latest version. + get_fetch_url = lambda name: FETCH_URL_LATEST.format(filename=name) + + readme_url = get_fetch_url(UnicodeFiles.README) + + print("Fetching: {}".format(readme_url)) + readme_content = subprocess.check_output(("curl", readme_url)) + + unicode_version = parse_readme_unicode_version( + readme_content.decode("utf8") + ) + + download_dir = get_unicode_dir(unicode_version) + if not os.path.exists(download_dir): + # For 2.7 compat, we don't use `exist_ok=True`. + os.makedirs(download_dir) + + for filename in ALL_UNICODE_FILES: + file_path = get_unicode_file_path(unicode_version, filename) + + if os.path.exists(file_path): + # Assume file on the server didn't change if it's been saved before. + continue + + if filename == UnicodeFiles.README: + with open(file_path, "wb") as fd: + fd.write(readme_content) + else: + url = get_fetch_url(filename) + print("Fetching: {}".format(url)) + subprocess.check_call(("curl", "-o", file_path, url)) + + return unicode_version + + +def check_stored_version(version): + # type: (Optional[str]) -> Optional[UnicodeVersion] + """ + Given desired Unicode version, return the version + if stored files are all present, and `None` otherwise. + """ + if not version: + # If no desired version specified, we should check what's the latest + # version, skipping stored version checks. + return None + + fetch_dir = os.path.join(FETCH_DIR, version) -def fetch(f): - path = fdir + os.path.basename(f) - if not os.path.exists(path): - os.system("curl -o {0}{1} ftp://ftp.unicode.org/Public/UNIDATA/{1}".format(fdir, f)) + for filename in ALL_UNICODE_FILES: + file_path = os.path.join(fetch_dir, filename) + + if not os.path.exists(file_path): + return None + + with open(os.path.join(fetch_dir, UnicodeFiles.README)) as fd: + return parse_readme_unicode_version(fd.read()) + + +def parse_readme_unicode_version(readme_content): + # type: (str) -> UnicodeVersion + """ + Parse the Unicode version contained in their `ReadMe.txt` file. + """ + # "Raw string" is necessary for \d not being treated as escape char + # (for the sake of compat with future Python versions). + # See: https://docs.python.org/3.6/whatsnew/3.6.html#deprecated-python-behavior + pattern = r"for Version (\d+)\.(\d+)\.(\d+) of the Unicode" + groups = re.search(pattern, readme_content).groups() + + return UnicodeVersion(*map(int, groups), as_str=".".join(groups)) + + +def get_unicode_dir(unicode_version): + # type: (UnicodeVersion) -> str + """ + Indicate in which parent dir the Unicode data files should be stored. + + This returns a full, absolute path. + """ + return os.path.join(FETCH_DIR, unicode_version.as_str) + + +def get_unicode_file_path(unicode_version, filename): + # type: (UnicodeVersion, str) -> str + """ + Indicate where the Unicode data file should be stored. + """ + return os.path.join(get_unicode_dir(unicode_version), filename) - if not os.path.exists(path): - sys.stderr.write("cannot load %s" % f) - exit(1) def is_surrogate(n): - return surrogate_codepoints[0] <= n <= surrogate_codepoints[1] - -def load_unicode_data(f): - fetch(f) - gencats = {} - to_lower = {} - to_upper = {} - to_title = {} - combines = {} - canon_decomp = {} - compat_decomp = {} - - udict = {} + # type: (int) -> bool + """ + Tell if given codepoint is a surrogate (not a valid Rust character). + """ + return SURROGATE_CODEPOINTS_RANGE[0] <= n <= SURROGATE_CODEPOINTS_RANGE[1] + + +def load_unicode_data(file_path): + # type: (str) -> UnicodeData + """ + Load main Unicode data. + """ + # Conversions + to_lower = {} # type: Dict[int, Tuple[int, int, int]] + to_upper = {} # type: Dict[int, Tuple[int, int, int]] + to_title = {} # type: Dict[int, Tuple[int, int, int]] + + # Decompositions + compat_decomp = {} # type: Dict[int, List[int]] + canon_decomp = {} # type: Dict[int, List[int]] + + # Combining characters + # FIXME: combines are not used + combines = defaultdict(set) # type: Dict[str, Set[int]] + + # Categories + general_categories = defaultdict(set) # type: Dict[str, Set[int]] + category_assigned_codepoints = set() # type: Set[int] + + all_codepoints = {} + range_start = -1 - for line in fileinput.input(fdir + f): - data = line.split(';') + + for line in fileinput.input(file_path): + data = line.split(";") if len(data) != 15: continue - cp = int(data[0], 16) - if is_surrogate(cp): + codepoint = int(data[0], 16) + if is_surrogate(codepoint): continue if range_start >= 0: - for i in range(range_start, cp): - udict[i] = data + for i in range(range_start, codepoint): + all_codepoints[i] = data range_start = -1 if data[1].endswith(", First>"): - range_start = cp + range_start = codepoint continue - udict[cp] = data + all_codepoints[codepoint] = data - for code in udict: + for code, data in all_codepoints.items(): (code_org, name, gencat, combine, bidi, decomp, deci, digit, num, mirror, - old, iso, upcase, lowcase, titlecase) = udict[code] + old, iso, upcase, lowcase, titlecase) = data + + # Generate char to char direct common and simple conversions: - # generate char to char direct common and simple conversions - # uppercase to lowercase + # Uppercase to lowercase if lowcase != "" and code_org != lowcase: to_lower[code] = (int(lowcase, 16), 0, 0) - # lowercase to uppercase + # Lowercase to uppercase if upcase != "" and code_org != upcase: to_upper[code] = (int(upcase, 16), 0, 0) - # title case + # Title case if titlecase.strip() != "" and code_org != titlecase: to_title[code] = (int(titlecase, 16), 0, 0) - # store decomposition, if given - if decomp != "": - if decomp.startswith('<'): - seq = [] - for i in decomp.split()[1:]: - seq.append(int(i, 16)) - compat_decomp[code] = seq + # Store decomposition, if given + if decomp: + decompositions = decomp.split()[1:] + decomp_code_points = [int(i, 16) for i in decompositions] + + if decomp.startswith("<"): + # Compatibility decomposition + compat_decomp[code] = decomp_code_points else: - seq = [] - for i in decomp.split(): - seq.append(int(i, 16)) - canon_decomp[code] = seq - - # place letter in categories as appropriate - for cat in [gencat, "Assigned"] + expanded_categories.get(gencat, []): - if cat not in gencats: - gencats[cat] = [] - gencats[cat].append(code) - - # record combining class, if any + # Canonical decomposition + canon_decomp[code] = decomp_code_points + + # Place letter in categories as appropriate. + for cat in itertools.chain((gencat, ), EXPANDED_CATEGORIES.get(gencat, [])): + general_categories[cat].add(code) + category_assigned_codepoints.add(code) + + # Record combining class, if any. if combine != "0": - if combine not in combines: - combines[combine] = [] - combines[combine].append(code) - - # generate Not_Assigned from Assigned - gencats["Cn"] = gen_unassigned(gencats["Assigned"]) - # Assigned is not a real category - del(gencats["Assigned"]) + combines[combine].add(code) + + # Generate Not_Assigned from Assigned. + general_categories["Cn"] = get_unassigned_codepoints(category_assigned_codepoints) + # Other contains Not_Assigned - gencats["C"].extend(gencats["Cn"]) - gencats = group_cats(gencats) - combines = to_combines(group_cats(combines)) + general_categories["C"].update(general_categories["Cn"]) + + grouped_categories = group_categories(general_categories) - return (canon_decomp, compat_decomp, gencats, combines, to_upper, to_lower, to_title) + # FIXME: combines are not used + return UnicodeData( + to_lower=to_lower, to_upper=to_upper, to_title=to_title, + compat_decomp=compat_decomp, canon_decomp=canon_decomp, + general_categories=grouped_categories, combines=combines, + ) -def load_special_casing(f, to_upper, to_lower, to_title): - fetch(f) - for line in fileinput.input(fdir + f): - data = line.split('#')[0].split(';') + +def load_special_casing(file_path, unicode_data): + # type: (str, UnicodeData) -> None + """ + Load special casing data and enrich given Unicode data. + """ + for line in fileinput.input(file_path): + data = line.split("#")[0].split(";") if len(data) == 5: code, lower, title, upper, _comment = data elif len(data) == 6: @@ -155,243 +358,399 @@ def load_special_casing(f, to_upper, to_lower, to_title): title = title.strip() upper = upper.strip() key = int(code, 16) - for (map_, values) in [(to_lower, lower), (to_upper, upper), (to_title, title)]: + for (map_, values) in ((unicode_data.to_lower, lower), + (unicode_data.to_upper, upper), + (unicode_data.to_title, title)): if values != code: - values = [int(i, 16) for i in values.split()] - for _ in range(len(values), 3): - values.append(0) - assert len(values) == 3 - map_[key] = values - -def group_cats(cats): - cats_out = {} - for cat in cats: - cats_out[cat] = group_cat(cats[cat]) - return cats_out - -def group_cat(cat): - cat_out = [] - letters = sorted(set(cat)) - cur_start = letters.pop(0) - cur_end = cur_start - for letter in letters: - assert letter > cur_end, \ - "cur_end: %s, letter: %s" % (hex(cur_end), hex(letter)) - if letter == cur_end + 1: - cur_end = letter - else: - cat_out.append((cur_start, cur_end)) - cur_start = cur_end = letter - cat_out.append((cur_start, cur_end)) - return cat_out - -def ungroup_cat(cat): - cat_out = [] - for (lo, hi) in cat: - while lo <= hi: - cat_out.append(lo) - lo += 1 - return cat_out - -def gen_unassigned(assigned): - assigned = set(assigned) - return ([i for i in range(0, 0xd800) if i not in assigned] + - [i for i in range(0xe000, 0x110000) if i not in assigned]) - -def to_combines(combs): - combs_out = [] - for comb in combs: - for (lo, hi) in combs[comb]: - combs_out.append((lo, hi, comb)) - combs_out.sort(key=lambda comb: comb[0]) - return combs_out - -def format_table_content(f, content, indent): - line = " "*indent + split = values.split() + + codepoints = list(itertools.chain( + (int(i, 16) for i in split), + (0 for _ in range(len(split), 3)) + )) + + assert len(codepoints) == 3 + map_[key] = codepoints + + +def group_categories(mapping): + # type: (Dict[Any, Iterable[int]]) -> Dict[str, List[Tuple[int, int]]] + """ + Group codepoints mapped in "categories". + """ + return {category: group_codepoints(codepoints) + for category, codepoints in mapping.items()} + + +def group_codepoints(codepoints): + # type: (Iterable[int]) -> List[Tuple[int, int]] + """ + Group integral values into continuous, disjoint value ranges. + + Performs value deduplication. + + :return: sorted list of pairs denoting start and end of codepoint + group values, both ends inclusive. + + >>> group_codepoints([1, 2, 10, 11, 12, 3, 4]) + [(1, 4), (10, 12)] + >>> group_codepoints([1]) + [(1, 1)] + >>> group_codepoints([1, 5, 6]) + [(1, 1), (5, 6)] + >>> group_codepoints([]) + [] + """ + sorted_codes = sorted(set(codepoints)) + result = [] # type: List[Tuple[int, int]] + + if not sorted_codes: + return result + + next_codes = sorted_codes[1:] + start_code = sorted_codes[0] + + for code, next_code in zip_longest(sorted_codes, next_codes, fillvalue=None): + if next_code is None or next_code - code != 1: + result.append((start_code, code)) + start_code = next_code + + return result + + +def ungroup_codepoints(codepoint_pairs): + # type: (Iterable[Tuple[int, int]]) -> List[int] + """ + The inverse of group_codepoints -- produce a flat list of values + from value range pairs. + + >>> ungroup_codepoints([(1, 4), (10, 12)]) + [1, 2, 3, 4, 10, 11, 12] + >>> ungroup_codepoints([(1, 1), (5, 6)]) + [1, 5, 6] + >>> ungroup_codepoints(group_codepoints([1, 2, 7, 8])) + [1, 2, 7, 8] + >>> ungroup_codepoints([]) + [] + """ + return list(itertools.chain.from_iterable( + range(lo, hi + 1) for lo, hi in codepoint_pairs + )) + + +def get_unassigned_codepoints(assigned_codepoints): + # type: (Set[int]) -> Set[int] + """ + Given a set of "assigned" codepoints, return a set + of these that are not in assigned and not surrogate. + """ + return {i for i in range(0, 0x110000) + if i not in assigned_codepoints and not is_surrogate(i)} + + +def generate_table_lines(items, indent, wrap=98): + # type: (Iterable[str], int, int) -> Iterator[str] + """ + Given table items, generate wrapped lines of text with comma-separated items. + + This is a generator function. + + :param wrap: soft wrap limit (characters per line), integer. + """ + line = " " * indent first = True - for chunk in content.split(","): - if len(line) + len(chunk) < 98: + for item in items: + if len(line) + len(item) < wrap: if first: - line += chunk + line += item else: - line += ", " + chunk + line += ", " + item first = False else: - f.write(line + ",\n") - line = " "*indent + chunk - f.write(line) - -def load_properties(f, interestingprops): - fetch(f) - props = {} - re1 = re.compile("^ *([0-9A-F]+) *; *(\w+)") - re2 = re.compile("^ *([0-9A-F]+)\.\.([0-9A-F]+) *; *(\w+)") - - for line in fileinput.input(fdir + os.path.basename(f)): - prop = None - d_lo = 0 - d_hi = 0 - m = re1.match(line) - if m: - d_lo = m.group(1) - d_hi = m.group(1) - prop = m.group(2) - else: - m = re2.match(line) - if m: - d_lo = m.group(1) - d_hi = m.group(2) - prop = m.group(3) + yield line + ",\n" + line = " " * indent + item + + yield line + + +def load_properties(file_path, interesting_props): + # type: (str, Iterable[str]) -> Dict[str, List[Tuple[int, int]]] + """ + Load properties data and return in grouped form. + """ + props = defaultdict(list) # type: Dict[str, List[Tuple[int, int]]] + # "Raw string" is necessary for `\.` and `\w` not to be treated as escape chars + # (for the sake of compat with future Python versions). + # See: https://docs.python.org/3.6/whatsnew/3.6.html#deprecated-python-behavior + re1 = re.compile(r"^ *([0-9A-F]+) *; *(\w+)") + re2 = re.compile(r"^ *([0-9A-F]+)\.\.([0-9A-F]+) *; *(\w+)") + + for line in fileinput.input(file_path): + match = re1.match(line) or re2.match(line) + if match: + groups = match.groups() + + if len(groups) == 2: + # `re1` matched (2 groups). + d_lo, prop = groups + d_hi = d_lo else: - continue - if interestingprops and prop not in interestingprops: + d_lo, d_hi, prop = groups + else: + continue + + if interesting_props and prop not in interesting_props: continue - d_lo = int(d_lo, 16) - d_hi = int(d_hi, 16) - if prop not in props: - props[prop] = [] - props[prop].append((d_lo, d_hi)) - # optimize if possible + lo_value = int(d_lo, 16) + hi_value = int(d_hi, 16) + + props[prop].append((lo_value, hi_value)) + + # Optimize if possible. for prop in props: - props[prop] = group_cat(ungroup_cat(props[prop])) + props[prop] = group_codepoints(ungroup_codepoints(props[prop])) return props -def escape_char(c): - return "'\\u{%x}'" % c if c != 0 else "'\\0'" -def emit_table(f, name, t_data, t_type = "&[(char, char)]", is_pub=True, - pfun=lambda x: "(%s,%s)" % (escape_char(x[0]), escape_char(x[1]))): +def escape_char(c): + # type: (int) -> str + r""" + Escape a codepoint for use as Rust char literal. + + Outputs are OK to use as Rust source code as char literals + and they also include necessary quotes. + + >>> escape_char(97) + "'\\u{61}'" + >>> escape_char(0) + "'\\0'" + """ + return r"'\u{%x}'" % c if c != 0 else r"'\0'" + + +def format_char_pair(pair): + # type: (Tuple[int, int]) -> str + """ + Format a pair of two Rust chars. + """ + return "(%s,%s)" % (escape_char(pair[0]), escape_char(pair[1])) + + +def generate_table( + name, # type: str + items, # type: List[Tuple[int, int]] + decl_type="&[(char, char)]", # type: str + is_pub=True, # type: bool + format_item=format_char_pair, # type: Callable[[Tuple[int, int]], str] +): + # type: (...) -> Iterator[str] + """ + Generate a nicely formatted Rust constant "table" array. + + This generates actual Rust code. + """ pub_string = "" if is_pub: pub_string = "pub " - f.write(" %sconst %s: %s = &[\n" % (pub_string, name, t_type)) - data = "" + + yield " %sconst %s: %s = &[\n" % (pub_string, name, decl_type) + + data = [] first = True - for dat in t_data: + for item in items: if not first: - data += "," + data.append(",") first = False - data += pfun(dat) - format_table_content(f, data, 8) - f.write("\n ];\n\n") + data.extend(format_item(item)) -def compute_trie(rawdata, chunksize): + for table_line in generate_table_lines("".join(data).split(","), 8): + yield table_line + + yield "\n ];\n\n" + + +def compute_trie(raw_data, chunk_size): + # type: (List[int], int) -> Tuple[List[int], List[int]] + """ + Compute postfix-compressed trie. + + See: bool_trie.rs for more details. + + >>> compute_trie([1, 2, 3, 1, 2, 3, 4, 5, 6], 3) + ([0, 0, 1], [1, 2, 3, 4, 5, 6]) + >>> compute_trie([1, 2, 3, 1, 2, 4, 4, 5, 6], 3) + ([0, 1, 2], [1, 2, 3, 1, 2, 4, 4, 5, 6]) + """ root = [] - childmap = {} + childmap = {} # type: Dict[Tuple[int, ...], int] child_data = [] - for i in range(len(rawdata) // chunksize): - data = rawdata[i * chunksize: (i + 1) * chunksize] - child = '|'.join(map(str, data)) + + assert len(raw_data) % chunk_size == 0, "Chunks must be equally sized" + + for i in range(len(raw_data) // chunk_size): + data = raw_data[i * chunk_size : (i + 1) * chunk_size] + + # Postfix compression of child nodes (data chunks) + # (identical child nodes are shared). + + # Make a tuple out of the list so it's hashable. + child = tuple(data) if child not in childmap: childmap[child] = len(childmap) child_data.extend(data) + root.append(childmap[child]) - return (root, child_data) -def emit_bool_trie(f, name, t_data, is_pub=True): - CHUNK = 64 + return root, child_data + + +def generate_bool_trie(name, codepoint_ranges, is_pub=True): + # type: (str, List[Tuple[int, int]], bool) -> Iterator[str] + """ + Generate Rust code for BoolTrie struct. + + This yields string fragments that should be joined to produce + the final string. + + See: `bool_trie.rs`. + """ + chunk_size = 64 rawdata = [False] * 0x110000 - for (lo, hi) in t_data: + for (lo, hi) in codepoint_ranges: for cp in range(lo, hi + 1): rawdata[cp] = True - # convert to bitmap chunks of 64 bits each + # Convert to bitmap chunks of `chunk_size` bits each. chunks = [] - for i in range(0x110000 // CHUNK): + for i in range(0x110000 // chunk_size): chunk = 0 - for j in range(64): - if rawdata[i * 64 + j]: + for j in range(chunk_size): + if rawdata[i * chunk_size + j]: chunk |= 1 << j chunks.append(chunk) pub_string = "" if is_pub: pub_string = "pub " - f.write(" %sconst %s: &super::BoolTrie = &super::BoolTrie {\n" % (pub_string, name)) - f.write(" r1: [\n") - data = ','.join('0x%016x' % chunk for chunk in chunks[0:0x800 // CHUNK]) - format_table_content(f, data, 12) - f.write("\n ],\n") + yield " %sconst %s: &super::BoolTrie = &super::BoolTrie {\n" % (pub_string, name) + yield " r1: [\n" + data = ("0x%016x" % chunk for chunk in chunks[:0x800 // chunk_size]) + for fragment in generate_table_lines(data, 12): + yield fragment + yield "\n ],\n" # 0x800..0x10000 trie - (r2, r3) = compute_trie(chunks[0x800 // CHUNK : 0x10000 // CHUNK], 64 // CHUNK) - f.write(" r2: [\n") - data = ','.join(str(node) for node in r2) - format_table_content(f, data, 12) - f.write("\n ],\n") - f.write(" r3: &[\n") - data = ','.join('0x%016x' % chunk for chunk in r3) - format_table_content(f, data, 12) - f.write("\n ],\n") + (r2, r3) = compute_trie(chunks[0x800 // chunk_size : 0x10000 // chunk_size], 64 // chunk_size) + yield " r2: [\n" + data = map(str, r2) + for fragment in generate_table_lines(data, 12): + yield fragment + yield "\n ],\n" + + yield " r3: &[\n" + data = ("0x%016x" % node for node in r3) + for fragment in generate_table_lines(data, 12): + yield fragment + yield "\n ],\n" # 0x10000..0x110000 trie - (mid, r6) = compute_trie(chunks[0x10000 // CHUNK : 0x110000 // CHUNK], 64 // CHUNK) + (mid, r6) = compute_trie(chunks[0x10000 // chunk_size : 0x110000 // chunk_size], + 64 // chunk_size) (r4, r5) = compute_trie(mid, 64) - f.write(" r4: [\n") - data = ','.join(str(node) for node in r4) - format_table_content(f, data, 12) - f.write("\n ],\n") - f.write(" r5: &[\n") - data = ','.join(str(node) for node in r5) - format_table_content(f, data, 12) - f.write("\n ],\n") - f.write(" r6: &[\n") - data = ','.join('0x%016x' % chunk for chunk in r6) - format_table_content(f, data, 12) - f.write("\n ],\n") - - f.write(" };\n\n") - -def emit_small_bool_trie(f, name, t_data, is_pub=True): - last_chunk = max(hi // 64 for (lo, hi) in t_data) + + yield " r4: [\n" + data = map(str, r4) + for fragment in generate_table_lines(data, 12): + yield fragment + yield "\n ],\n" + + yield " r5: &[\n" + data = map(str, r5) + for fragment in generate_table_lines(data, 12): + yield fragment + yield "\n ],\n" + + yield " r6: &[\n" + data = ("0x%016x" % node for node in r6) + for fragment in generate_table_lines(data, 12): + yield fragment + yield "\n ],\n" + + yield " };\n\n" + + +def generate_small_bool_trie(name, codepoint_ranges, is_pub=True): + # type: (str, List[Tuple[int, int]], bool) -> Iterator[str] + """ + Generate Rust code for `SmallBoolTrie` struct. + + See: `bool_trie.rs`. + """ + last_chunk = max(hi // 64 for (lo, hi) in codepoint_ranges) n_chunks = last_chunk + 1 chunks = [0] * n_chunks - for (lo, hi) in t_data: + for (lo, hi) in codepoint_ranges: for cp in range(lo, hi + 1): - if cp // 64 >= len(chunks): - print(cp, cp // 64, len(chunks), lo, hi) + assert cp // 64 < len(chunks) chunks[cp // 64] |= 1 << (cp & 63) pub_string = "" if is_pub: pub_string = "pub " - f.write(" %sconst %s: &super::SmallBoolTrie = &super::SmallBoolTrie {\n" - % (pub_string, name)) + + yield (" %sconst %s: &super::SmallBoolTrie = &super::SmallBoolTrie {\n" + % (pub_string, name)) (r1, r2) = compute_trie(chunks, 1) - f.write(" r1: &[\n") - data = ','.join(str(node) for node in r1) - format_table_content(f, data, 12) - f.write("\n ],\n") - - f.write(" r2: &[\n") - data = ','.join('0x%016x' % node for node in r2) - format_table_content(f, data, 12) - f.write("\n ],\n") - - f.write(" };\n\n") - -def emit_property_module(f, mod, tbl, emit): - f.write("pub mod %s {\n" % mod) - for cat in sorted(emit): - if cat in ["Cc", "White_Space", "Pattern_White_Space"]: - emit_small_bool_trie(f, "%s_table" % cat, tbl[cat]) - f.write(" pub fn %s(c: char) -> bool {\n" % cat) - f.write(" %s_table.lookup(c)\n" % cat) - f.write(" }\n\n") + yield " r1: &[\n" + data = (str(node) for node in r1) + for fragment in generate_table_lines(data, 12): + yield fragment + yield "\n ],\n" + + yield " r2: &[\n" + data = ("0x%016x" % node for node in r2) + for fragment in generate_table_lines(data, 12): + yield fragment + yield "\n ],\n" + + yield " };\n\n" + + +def generate_property_module(mod, grouped_categories, category_subset): + # type: (str, Dict[str, List[Tuple[int, int]]], Iterable[str]) -> Iterator[str] + """ + Generate Rust code for module defining properties. + """ + + yield "pub mod %s {\n" % mod + for cat in sorted(category_subset): + if cat in ("Cc", "White_Space", "Pattern_White_Space"): + generator = generate_small_bool_trie("%s_table" % cat, grouped_categories[cat]) else: - emit_bool_trie(f, "%s_table" % cat, tbl[cat]) - f.write(" pub fn %s(c: char) -> bool {\n" % cat) - f.write(" %s_table.lookup(c)\n" % cat) - f.write(" }\n\n") - f.write("}\n\n") - -def emit_conversions_module(f, to_upper, to_lower, to_title): - f.write("pub mod conversions {") - f.write(""" + generator = generate_bool_trie("%s_table" % cat, grouped_categories[cat]) + + for fragment in generator: + yield fragment + + yield " pub fn %s(c: char) -> bool {\n" % cat + yield " %s_table.lookup(c)\n" % cat + yield " }\n\n" + + yield "}\n\n" + + +def generate_conversions_module(unicode_data): + # type: (UnicodeData) -> Iterator[str] + """ + Generate Rust code for module defining conversions. + """ + + yield "pub mod conversions {" + yield """ pub fn to_lower(c: char) -> [char; 3] { match bsearch_case_table(c, to_lowercase_table) { None => [c, '\\0', '\\0'], @@ -408,80 +767,109 @@ def emit_conversions_module(f, to_upper, to_lower, to_title): fn bsearch_case_table(c: char, table: &[(char, [char; 3])]) -> Option { table.binary_search_by(|&(key, _)| key.cmp(&c)).ok() - } + }\n\n""" + + decl_type = "&[(char, [char; 3])]" + format_conversion = lambda x: "({},[{},{},{}])".format(*( + escape_char(c) for c in (x[0], x[1][0], x[1][1], x[1][2]) + )) + + for fragment in generate_table( + name="to_lowercase_table", + items=sorted(unicode_data.to_lower.items(), key=lambda x: x[0]), + decl_type=decl_type, + is_pub=False, + format_item=format_conversion + ): + yield fragment + + for fragment in generate_table( + name="to_uppercase_table", + items=sorted(unicode_data.to_upper.items(), key=lambda x: x[0]), + decl_type=decl_type, + is_pub=False, + format_item=format_conversion + ): + yield fragment + + yield "}\n" + + +def parse_args(): + # type: () -> argparse.Namespace + """ + Parse command line arguments. + """ + parser = argparse.ArgumentParser(description=__doc__) + parser.add_argument("-v", "--version", default=None, type=str, + help="Unicode version to use (if not specified," + " defaults to latest release).") + + return parser.parse_args() + + +def main(): + # type: () -> None + """ + Script entry point. + """ + args = parse_args() + + unicode_version = fetch_files(args.version) + print("Using Unicode version: {}".format(unicode_version.as_str)) + + # All the writing happens entirely in memory, we only write to file + # once we have generated the file content (it's not very large, <1 MB). + buf = StringIO() + buf.write(PREAMBLE) + + unicode_version_notice = textwrap.dedent(""" + /// The version of [Unicode](http://www.unicode.org/) that the Unicode parts of + /// `char` and `str` methods are based on. + #[unstable(feature = "unicode_version", issue = "49726")] + pub const UNICODE_VERSION: UnicodeVersion = UnicodeVersion {{ + major: {version.major}, + minor: {version.minor}, + micro: {version.micro}, + _priv: (), + }}; + """).format(version=unicode_version) + buf.write(unicode_version_notice) + + get_path = lambda f: get_unicode_file_path(unicode_version, f) + + unicode_data = load_unicode_data(get_path(UnicodeFiles.UNICODE_DATA)) + load_special_casing(get_path(UnicodeFiles.SPECIAL_CASING), unicode_data) + + want_derived = {"XID_Start", "XID_Continue", "Alphabetic", "Lowercase", "Uppercase", + "Cased", "Case_Ignorable", "Grapheme_Extend"} + derived = load_properties(get_path(UnicodeFiles.DERIVED_CORE_PROPERTIES), want_derived) + + props = load_properties(get_path(UnicodeFiles.PROPS), + {"White_Space", "Join_Control", "Noncharacter_Code_Point", + "Pattern_White_Space"}) + + # Category tables + for (name, categories, category_subset) in ( + ("general_category", unicode_data.general_categories, ["N", "Cc"]), + ("derived_property", derived, want_derived), + ("property", props, ["White_Space", "Pattern_White_Space"]) + ): + for fragment in generate_property_module(name, categories, category_subset): + buf.write(fragment) + + for fragment in generate_conversions_module(unicode_data): + buf.write(fragment) + + tables_rs_path = os.path.join(THIS_DIR, "tables.rs") + + # Actually write out the file content. + # Will overwrite the file if it exists. + with open(tables_rs_path, "w") as fd: + fd.write(buf.getvalue()) + + print("Regenerated tables.rs.") -""") - t_type = "&[(char, [char; 3])]" - pfun = lambda x: "(%s,[%s,%s,%s])" % ( - escape_char(x[0]), escape_char(x[1][0]), escape_char(x[1][1]), escape_char(x[1][2])) - emit_table(f, "to_lowercase_table", - sorted(to_lower.items(), key=operator.itemgetter(0)), - is_pub=False, t_type = t_type, pfun=pfun) - emit_table(f, "to_uppercase_table", - sorted(to_upper.items(), key=operator.itemgetter(0)), - is_pub=False, t_type = t_type, pfun=pfun) - f.write("}\n\n") - -def emit_norm_module(f, canon, compat, combine, norm_props): - canon_keys = sorted(canon.keys()) - - compat_keys = sorted(compat.keys()) - - canon_comp = {} - comp_exclusions = norm_props["Full_Composition_Exclusion"] - for char in canon_keys: - if any(lo <= char <= hi for lo, hi in comp_exclusions): - continue - decomp = canon[char] - if len(decomp) == 2: - if decomp[0] not in canon_comp: - canon_comp[decomp[0]] = [] - canon_comp[decomp[0]].append( (decomp[1], char) ) - canon_comp_keys = sorted(canon_comp.keys()) if __name__ == "__main__": - r = fdir + "tables.rs" - if os.path.exists(r): - os.remove(r) - with open(r, "w") as rf: - # write the file's preamble - rf.write(preamble) - - # download and parse all the data - fetch("ReadMe.txt") - with open(fdir + "ReadMe.txt") as readme: - pattern = "for Version (\d+)\.(\d+)\.(\d+) of the Unicode" - unicode_version = re.search(pattern, readme.read()).groups() - rf.write(""" -/// The version of [Unicode](http://www.unicode.org/) that the Unicode parts of -/// `char` and `str` methods are based on. -#[unstable(feature = "unicode_version", issue = "49726")] -pub const UNICODE_VERSION: UnicodeVersion = UnicodeVersion { - major: %s, - minor: %s, - micro: %s, - _priv: (), -}; -""" % unicode_version) - (canon_decomp, compat_decomp, gencats, combines, - to_upper, to_lower, to_title) = load_unicode_data("UnicodeData.txt") - load_special_casing("SpecialCasing.txt", to_upper, to_lower, to_title) - want_derived = ["XID_Start", "XID_Continue", "Alphabetic", "Lowercase", "Uppercase", - "Cased", "Case_Ignorable", "Grapheme_Extend"] - derived = load_properties("DerivedCoreProperties.txt", want_derived) - scripts = load_properties("Scripts.txt", []) - props = load_properties("PropList.txt", - ["White_Space", "Join_Control", "Noncharacter_Code_Point", "Pattern_White_Space"]) - norm_props = load_properties("DerivedNormalizationProps.txt", - ["Full_Composition_Exclusion"]) - - # category tables - for (name, cat, pfuns) in ("general_category", gencats, ["N", "Cc"]), \ - ("derived_property", derived, want_derived), \ - ("property", props, ["White_Space", "Pattern_White_Space"]): - emit_property_module(rf, name, cat, pfuns) - - # normalizations and conversions module - emit_norm_module(rf, canon_decomp, compat_decomp, combines, norm_props) - emit_conversions_module(rf, to_upper, to_lower, to_title) - print("Regenerated tables.rs.") + main() diff --git a/src/libfmt_macros/Cargo.toml b/src/libfmt_macros/Cargo.toml index fc32f21ec4e0a..a95193b85952f 100644 --- a/src/libfmt_macros/Cargo.toml +++ b/src/libfmt_macros/Cargo.toml @@ -11,3 +11,4 @@ crate-type = ["dylib"] [dependencies] syntax_pos = { path = "../libsyntax_pos" } + diff --git a/src/libgraphviz/Cargo.toml b/src/libgraphviz/Cargo.toml index a6a3c1a249d64..4a6e41f760319 100644 --- a/src/libgraphviz/Cargo.toml +++ b/src/libgraphviz/Cargo.toml @@ -7,4 +7,3 @@ edition = "2018" [lib] name = "graphviz" path = "lib.rs" -crate-type = ["dylib"] diff --git a/src/librustc/Cargo.toml b/src/librustc/Cargo.toml index 4d50e80d4cf67..b6f3bc62dfd29 100644 --- a/src/librustc/Cargo.toml +++ b/src/librustc/Cargo.toml @@ -7,7 +7,7 @@ edition = "2018" [lib] name = "rustc" path = "lib.rs" -crate-type = ["dylib"] +doctest = false [dependencies] arena = { path = "../libarena" } @@ -37,33 +37,3 @@ chalk-engine = { version = "0.9.0", default-features=false } rustc_fs_util = { path = "../librustc_fs_util" } smallvec = { version = "0.6.7", features = ["union", "may_dangle"] } measureme = "0.3" - -# Note that these dependencies are a lie, they're just here to get linkage to -# work. -# -# We're creating a bunch of dylibs for the compiler but we're also compiling a -# bunch of crates.io crates. Everything in the compiler is compiled as an -# rlib/dylib pair but all crates.io crates tend to just be rlibs. This means -# we've got a problem for dependency graphs that look like: -# -# foo - rustc_codegen_llvm -# / \ -# rustc ---- rustc_driver -# \ / -# foo - rustc_metadata -# -# Here the crate `foo` is linked into the `rustc_codegen_llvm` and the -# `rustc_metadata` dylibs, meaning we've got duplicate copies! When we then -# go to link `rustc_driver` the compiler notices this and gives us a compiler -# error. -# -# To work around this problem we just add these crates.io dependencies to the -# `rustc` crate which is a shared dependency above. That way the crate `foo` -# shows up in the dylib for the `rustc` crate, deduplicating it and allowing -# crates like `rustc_codegen_llvm` to use `foo` *through* the `rustc` crate. -# -# tl;dr; this is not needed to get `rustc` to compile, but if you remove it then -# later crate stop compiling. If you can remove this and everything -# compiles, then please feel free to do so! -flate2 = "1.0" -tempfile = "3.0" diff --git a/src/librustc/lib.rs b/src/librustc/lib.rs index b20f7120bbfa2..dc26140ace5a5 100644 --- a/src/librustc/lib.rs +++ b/src/librustc/lib.rs @@ -46,7 +46,6 @@ #![feature(extern_types)] #![feature(nll)] #![feature(non_exhaustive)] -#![feature(proc_macro_internals)] #![feature(optin_builtin_traits)] #![feature(range_is_empty)] #![feature(rustc_diagnostic_macros)] @@ -90,10 +89,8 @@ extern crate serialize as rustc_serialize; #[macro_use] extern crate smallvec; -// Note that librustc doesn't actually depend on these crates, see the note in -// `Cargo.toml` for this crate about why these are here. -#[allow(unused_extern_crates)] -extern crate flate2; +// Use the test crate here so we depend on getopts through it. This allow tools to link to both +// librustc_driver and libtest. #[allow(unused_extern_crates)] extern crate test; diff --git a/src/librustc_allocator/Cargo.toml b/src/librustc_allocator/Cargo.toml index cf6c598bfb17b..a964f323c9e7d 100644 --- a/src/librustc_allocator/Cargo.toml +++ b/src/librustc_allocator/Cargo.toml @@ -6,7 +6,6 @@ edition = "2018" [lib] path = "lib.rs" -crate-type = ["dylib"] test = false [dependencies] diff --git a/src/librustc_apfloat/Cargo.toml b/src/librustc_apfloat/Cargo.toml index c7496a9547ea6..af6c2feed0072 100644 --- a/src/librustc_apfloat/Cargo.toml +++ b/src/librustc_apfloat/Cargo.toml @@ -10,5 +10,4 @@ path = "lib.rs" [dependencies] bitflags = "1.0" -rustc_cratesio_shim = { path = "../librustc_cratesio_shim" } smallvec = { version = "0.6.7", features = ["union", "may_dangle"] } diff --git a/src/librustc_apfloat/lib.rs b/src/librustc_apfloat/lib.rs index 1b0bcdd0b5b48..ceade5d278838 100644 --- a/src/librustc_apfloat/lib.rs +++ b/src/librustc_apfloat/lib.rs @@ -35,9 +35,6 @@ #![deny(rust_2018_idioms)] #![feature(nll)] -// See librustc_cratesio_shim/Cargo.toml for a comment explaining this. -#[allow(unused_extern_crates)] -extern crate rustc_cratesio_shim; use std::cmp::Ordering; use std::fmt; diff --git a/src/librustc_borrowck/Cargo.toml b/src/librustc_borrowck/Cargo.toml index f293739dec727..e9abc17202e76 100644 --- a/src/librustc_borrowck/Cargo.toml +++ b/src/librustc_borrowck/Cargo.toml @@ -7,8 +7,8 @@ edition = "2018" [lib] name = "rustc_borrowck" path = "lib.rs" -crate-type = ["dylib"] test = false +doctest = false [dependencies] log = "0.4" diff --git a/src/librustc_codegen_llvm/Cargo.toml b/src/librustc_codegen_llvm/Cargo.toml index 4ae8303c76d3c..291d32a06814d 100644 --- a/src/librustc_codegen_llvm/Cargo.toml +++ b/src/librustc_codegen_llvm/Cargo.toml @@ -13,6 +13,7 @@ test = false [dependencies] cc = "1.0.1" # Used to locate MSVC num_cpus = "1.0" +tempfile = "3.0" rustc-demangle = "0.1.15" rustc_llvm = { path = "../librustc_llvm" } memmap = "0.6" diff --git a/src/librustc_codegen_llvm/lib.rs b/src/librustc_codegen_llvm/lib.rs index dbcb20315520b..0f0b9f279175c 100644 --- a/src/librustc_codegen_llvm/lib.rs +++ b/src/librustc_codegen_llvm/lib.rs @@ -39,6 +39,7 @@ extern crate rustc_incremental; extern crate rustc_codegen_utils; extern crate rustc_codegen_ssa; extern crate rustc_fs_util; +extern crate rustc_driver as _; #[macro_use] extern crate log; #[macro_use] extern crate syntax; diff --git a/src/librustc_codegen_ssa/Cargo.toml b/src/librustc_codegen_ssa/Cargo.toml index a4cb517fafed6..343596feed25f 100644 --- a/src/librustc_codegen_ssa/Cargo.toml +++ b/src/librustc_codegen_ssa/Cargo.toml @@ -7,7 +7,6 @@ edition = "2018" [lib] name = "rustc_codegen_ssa" path = "lib.rs" -crate-type = ["dylib"] test = false [dependencies] diff --git a/src/librustc_codegen_utils/Cargo.toml b/src/librustc_codegen_utils/Cargo.toml index b218d18a06ba7..d93589ea84be0 100644 --- a/src/librustc_codegen_utils/Cargo.toml +++ b/src/librustc_codegen_utils/Cargo.toml @@ -7,7 +7,6 @@ edition = "2018" [lib] name = "rustc_codegen_utils" path = "lib.rs" -crate-type = ["dylib"] test = false [dependencies] diff --git a/src/librustc_cratesio_shim/Cargo.toml b/src/librustc_cratesio_shim/Cargo.toml deleted file mode 100644 index 6bdfbe09354b4..0000000000000 --- a/src/librustc_cratesio_shim/Cargo.toml +++ /dev/null @@ -1,26 +0,0 @@ -# This crate exists to allow rustc to link certain crates from crates.io into -# the distribution. This doesn't work normally because: -# -# - Cargo always builds dependencies as rlibs: -# https://github.com/rust-lang/cargo/issues/629 -# - rustc wants to avoid multiple definitions of the same symbol, so it refuses -# to link multiple dylibs containing the same rlib -# - multiple dylibs depend on the same crates.io crates -# -# This solution works by including all the conflicting rlibs in a single dylib, -# which is then linked into all dylibs that depend on these crates.io crates. -# The result is that each rlib only appears once, and things work! - -[package] -authors = ["The Rust Project Developers"] -name = "rustc_cratesio_shim" -version = "0.0.0" -edition = "2018" - -[lib] -crate-type = ["dylib"] - -[dependencies] -bitflags = "1.0" -log = "0.4" -unicode-width = "0.1.4" diff --git a/src/librustc_cratesio_shim/src/lib.rs b/src/librustc_cratesio_shim/src/lib.rs deleted file mode 100644 index 4c170f4f5f6f9..0000000000000 --- a/src/librustc_cratesio_shim/src/lib.rs +++ /dev/null @@ -1,11 +0,0 @@ -#![deny(rust_2018_idioms)] - -// See Cargo.toml for a comment explaining this crate. -#![allow(unused_extern_crates)] - -#![feature(nll)] - -extern crate bitflags; -extern crate log; -extern crate proc_macro; -extern crate unicode_width; diff --git a/src/librustc_data_structures/Cargo.toml b/src/librustc_data_structures/Cargo.toml index cd792d31187bd..acddb3448ca60 100644 --- a/src/librustc_data_structures/Cargo.toml +++ b/src/librustc_data_structures/Cargo.toml @@ -7,7 +7,7 @@ edition = "2018" [lib] name = "rustc_data_structures" path = "lib.rs" -crate-type = ["dylib"] +doctest = false [dependencies] ena = "0.13" @@ -15,7 +15,6 @@ indexmap = "1" log = "0.4" jobserver_crate = { version = "0.1.13", package = "jobserver" } lazy_static = "1" -rustc_cratesio_shim = { path = "../librustc_cratesio_shim" } serialize = { path = "../libserialize" } graphviz = { path = "../libgraphviz" } cfg-if = "0.1.2" diff --git a/src/librustc_data_structures/lib.rs b/src/librustc_data_structures/lib.rs index 38dfb675237b5..b479643a5e8cd 100644 --- a/src/librustc_data_structures/lib.rs +++ b/src/librustc_data_structures/lib.rs @@ -38,10 +38,6 @@ extern crate libc; #[macro_use] extern crate cfg_if; -// See librustc_cratesio_shim/Cargo.toml for a comment explaining this. -#[allow(unused_extern_crates)] -extern crate rustc_cratesio_shim; - pub use rustc_serialize::hex::ToHex; #[inline(never)] diff --git a/src/librustc_errors/Cargo.toml b/src/librustc_errors/Cargo.toml index 3689a463a5c84..4df9632cce26b 100644 --- a/src/librustc_errors/Cargo.toml +++ b/src/librustc_errors/Cargo.toml @@ -7,14 +7,13 @@ edition = "2018" [lib] name = "rustc_errors" path = "lib.rs" -crate-type = ["dylib"] +doctest = false [dependencies] log = "0.4" serialize = { path = "../libserialize" } syntax_pos = { path = "../libsyntax_pos" } rustc_data_structures = { path = "../librustc_data_structures" } -rustc_cratesio_shim = { path = "../librustc_cratesio_shim" } unicode-width = "0.1.4" atty = "0.2" termcolor = "1.0" diff --git a/src/librustc_fs_util/Cargo.toml b/src/librustc_fs_util/Cargo.toml index 47918643f31fe..e74e3809927a0 100644 --- a/src/librustc_fs_util/Cargo.toml +++ b/src/librustc_fs_util/Cargo.toml @@ -7,6 +7,5 @@ edition = "2018" [lib] name = "rustc_fs_util" path = "lib.rs" -crate-type = ["dylib"] [dependencies] diff --git a/src/librustc_incremental/Cargo.toml b/src/librustc_incremental/Cargo.toml index df971ec5bdb85..9678cb4f65545 100644 --- a/src/librustc_incremental/Cargo.toml +++ b/src/librustc_incremental/Cargo.toml @@ -7,7 +7,7 @@ edition = "2018" [lib] name = "rustc_incremental" path = "lib.rs" -crate-type = ["dylib"] +doctest = false [dependencies] graphviz = { path = "../libgraphviz" } diff --git a/src/librustc_interface/Cargo.toml b/src/librustc_interface/Cargo.toml index bcaa4216109aa..82880d2198712 100644 --- a/src/librustc_interface/Cargo.toml +++ b/src/librustc_interface/Cargo.toml @@ -7,7 +7,7 @@ edition = "2018" [lib] name = "rustc_interface" path = "lib.rs" -crate-type = ["dylib"] +doctest = false [dependencies] log = "0.4" diff --git a/src/librustc_lint/Cargo.toml b/src/librustc_lint/Cargo.toml index fd2b635faefb4..041d0aaead913 100644 --- a/src/librustc_lint/Cargo.toml +++ b/src/librustc_lint/Cargo.toml @@ -7,7 +7,6 @@ edition = "2018" [lib] name = "rustc_lint" path = "lib.rs" -crate-type = ["dylib"] [dependencies] log = "0.4" diff --git a/src/librustc_llvm/lib.rs b/src/librustc_llvm/lib.rs index 292ce8b0a01b0..bdf6b09185735 100644 --- a/src/librustc_llvm/lib.rs +++ b/src/librustc_llvm/lib.rs @@ -4,10 +4,6 @@ #![doc(html_root_url = "https://doc.rust-lang.org/nightly/")] -// See librustc_cratesio_shim/Cargo.toml for a comment explaining this. -#[allow(unused_extern_crates)] -extern crate rustc_cratesio_shim; - // NOTE: This crate only exists to allow linking on mingw targets. /// Initialize targets enabled by the build script via `cfg(llvm_component = "...")`. diff --git a/src/librustc_metadata/Cargo.toml b/src/librustc_metadata/Cargo.toml index 76aba33b6a404..e5c9f1bf2057b 100644 --- a/src/librustc_metadata/Cargo.toml +++ b/src/librustc_metadata/Cargo.toml @@ -7,7 +7,7 @@ edition = "2018" [lib] name = "rustc_metadata" path = "lib.rs" -crate-type = ["dylib"] +doctest = false [dependencies] flate2 = "1.0" diff --git a/src/librustc_mir/Cargo.toml b/src/librustc_mir/Cargo.toml index 5de5f5e757119..695bf1f077cd2 100644 --- a/src/librustc_mir/Cargo.toml +++ b/src/librustc_mir/Cargo.toml @@ -7,7 +7,7 @@ edition = "2018" [lib] name = "rustc_mir" path = "lib.rs" -crate-type = ["dylib"] +doctest = false [dependencies] arena = { path = "../libarena" } diff --git a/src/librustc_passes/Cargo.toml b/src/librustc_passes/Cargo.toml index 00bdcdc0cc021..de2476775b07e 100644 --- a/src/librustc_passes/Cargo.toml +++ b/src/librustc_passes/Cargo.toml @@ -7,7 +7,6 @@ edition = "2018" [lib] name = "rustc_passes" path = "lib.rs" -crate-type = ["dylib"] [dependencies] log = "0.4" diff --git a/src/librustc_plugin/Cargo.toml b/src/librustc_plugin/Cargo.toml index 5e23aa0d7f74e..7486281c1eac1 100644 --- a/src/librustc_plugin/Cargo.toml +++ b/src/librustc_plugin/Cargo.toml @@ -8,7 +8,7 @@ edition = "2018" [lib] name = "rustc_plugin" path = "lib.rs" -crate-type = ["dylib"] +doctest = false [dependencies] rustc = { path = "../librustc" } diff --git a/src/librustc_privacy/Cargo.toml b/src/librustc_privacy/Cargo.toml index 5bf8024c56911..7cf3a5d6dcde1 100644 --- a/src/librustc_privacy/Cargo.toml +++ b/src/librustc_privacy/Cargo.toml @@ -7,7 +7,6 @@ edition = "2018" [lib] name = "rustc_privacy" path = "lib.rs" -crate-type = ["dylib"] [dependencies] rustc = { path = "../librustc" } diff --git a/src/librustc_resolve/Cargo.toml b/src/librustc_resolve/Cargo.toml index 8e3359c775288..548f982fe3bf0 100644 --- a/src/librustc_resolve/Cargo.toml +++ b/src/librustc_resolve/Cargo.toml @@ -7,8 +7,8 @@ edition = "2018" [lib] name = "rustc_resolve" path = "lib.rs" -crate-type = ["dylib"] test = false +doctest = false [dependencies] bitflags = "1.0" diff --git a/src/librustc_save_analysis/Cargo.toml b/src/librustc_save_analysis/Cargo.toml index 767c726b761f2..88bb76d2aba3a 100644 --- a/src/librustc_save_analysis/Cargo.toml +++ b/src/librustc_save_analysis/Cargo.toml @@ -7,7 +7,6 @@ edition = "2018" [lib] name = "rustc_save_analysis" path = "lib.rs" -crate-type = ["dylib"] [dependencies] log = "0.4" diff --git a/src/librustc_target/Cargo.toml b/src/librustc_target/Cargo.toml index 3ab25146331c1..f1b21365e4bd4 100644 --- a/src/librustc_target/Cargo.toml +++ b/src/librustc_target/Cargo.toml @@ -7,12 +7,10 @@ edition = "2018" [lib] name = "rustc_target" path = "lib.rs" -crate-type = ["dylib"] [dependencies] bitflags = "1.0" log = "0.4" -rustc_cratesio_shim = { path = "../librustc_cratesio_shim" } rustc_data_structures = { path = "../librustc_data_structures" } serialize = { path = "../libserialize" } syntax_pos = { path = "../libsyntax_pos" } diff --git a/src/librustc_target/lib.rs b/src/librustc_target/lib.rs index b65813fd8e38d..c1ec4e59ef239 100644 --- a/src/librustc_target/lib.rs +++ b/src/librustc_target/lib.rs @@ -23,10 +23,6 @@ #[allow(unused_extern_crates)] extern crate serialize as rustc_serialize; // used by deriving -// See librustc_cratesio_shim/Cargo.toml for a comment explaining this. -#[allow(unused_extern_crates)] -extern crate rustc_cratesio_shim; - #[macro_use] extern crate rustc_data_structures; diff --git a/src/librustc_target/spec/riscv64gc_unknown_none_elf.rs b/src/librustc_target/spec/riscv64gc_unknown_none_elf.rs index a5c13fa28e2ce..8ef197461d92f 100644 --- a/src/librustc_target/spec/riscv64gc_unknown_none_elf.rs +++ b/src/librustc_target/spec/riscv64gc_unknown_none_elf.rs @@ -23,6 +23,7 @@ pub fn target() -> TargetResult { executables: true, panic_strategy: PanicStrategy::Abort, relocation_model: "static".to_string(), + code_model: Some("medium".to_string()), emit_debug_gdb_scripts: false, abi_blacklist: super::riscv_base::abi_blacklist(), eliminate_frame_pointer: false, diff --git a/src/librustc_target/spec/riscv64imac_unknown_none_elf.rs b/src/librustc_target/spec/riscv64imac_unknown_none_elf.rs index 237d615ffcc4b..e8a91f0ef9db0 100644 --- a/src/librustc_target/spec/riscv64imac_unknown_none_elf.rs +++ b/src/librustc_target/spec/riscv64imac_unknown_none_elf.rs @@ -23,6 +23,7 @@ pub fn target() -> TargetResult { executables: true, panic_strategy: PanicStrategy::Abort, relocation_model: "static".to_string(), + code_model: Some("medium".to_string()), emit_debug_gdb_scripts: false, abi_blacklist: super::riscv_base::abi_blacklist(), eliminate_frame_pointer: false, diff --git a/src/librustc_traits/Cargo.toml b/src/librustc_traits/Cargo.toml index da19cc95eb95a..bb28ac839a544 100644 --- a/src/librustc_traits/Cargo.toml +++ b/src/librustc_traits/Cargo.toml @@ -7,7 +7,6 @@ edition = "2018" [lib] name = "rustc_traits" path = "lib.rs" -crate-type = ["dylib"] [dependencies] bitflags = "1.0" diff --git a/src/librustc_typeck/Cargo.toml b/src/librustc_typeck/Cargo.toml index dcfcd74257e6f..ac3966676838a 100644 --- a/src/librustc_typeck/Cargo.toml +++ b/src/librustc_typeck/Cargo.toml @@ -7,8 +7,8 @@ edition = "2018" [lib] name = "rustc_typeck" path = "lib.rs" -crate-type = ["dylib"] test = false +doctest = false [dependencies] arena = { path = "../libarena" } diff --git a/src/libserialize/Cargo.toml b/src/libserialize/Cargo.toml index fa31a68a75b72..c302bcf95dcad 100644 --- a/src/libserialize/Cargo.toml +++ b/src/libserialize/Cargo.toml @@ -7,7 +7,6 @@ edition = "2018" [lib] name = "serialize" path = "lib.rs" -crate-type = ["dylib", "rlib"] [dependencies] indexmap = "1" diff --git a/src/libsyntax/Cargo.toml b/src/libsyntax/Cargo.toml index b48f3c9b8b8d8..c5daa6564767e 100644 --- a/src/libsyntax/Cargo.toml +++ b/src/libsyntax/Cargo.toml @@ -7,7 +7,7 @@ edition = "2018" [lib] name = "syntax" path = "lib.rs" -crate-type = ["dylib"] +doctest = false [dependencies] bitflags = "1.0" diff --git a/src/libsyntax_ext/Cargo.toml b/src/libsyntax_ext/Cargo.toml index 773f0948a8a10..eafbe6371a3c5 100644 --- a/src/libsyntax_ext/Cargo.toml +++ b/src/libsyntax_ext/Cargo.toml @@ -7,7 +7,7 @@ edition = "2018" [lib] name = "syntax_ext" path = "lib.rs" -crate-type = ["dylib"] +doctest = false [dependencies] fmt_macros = { path = "../libfmt_macros" } diff --git a/src/libsyntax_pos/Cargo.toml b/src/libsyntax_pos/Cargo.toml index af7edc0a6bd3e..eebd25d1fafd8 100644 --- a/src/libsyntax_pos/Cargo.toml +++ b/src/libsyntax_pos/Cargo.toml @@ -7,7 +7,7 @@ edition = "2018" [lib] name = "syntax_pos" path = "lib.rs" -crate-type = ["dylib"] +doctest = false [dependencies] serialize = { path = "../libserialize" } diff --git a/src/llvm-project b/src/llvm-project index 1bbe0b3e1d756..8538d56b2dd45 160000 --- a/src/llvm-project +++ b/src/llvm-project @@ -1 +1 @@ -Subproject commit 1bbe0b3e1d756116cbf1fcf049555066ef929008 +Subproject commit 8538d56b2dd450063547a7690f7ffa2ac37c9c65 diff --git a/src/test/run-make-fulldeps/issue-19371/foo.rs b/src/test/run-make-fulldeps/issue-19371/foo.rs index 0cbdf40e2f908..3c4f2cd541f4e 100644 --- a/src/test/run-make-fulldeps/issue-19371/foo.rs +++ b/src/test/run-make-fulldeps/issue-19371/foo.rs @@ -2,6 +2,8 @@ extern crate rustc; extern crate rustc_interface; +#[allow(unused_extern_crates)] +extern crate rustc_driver; extern crate syntax; use rustc::session::DiagnosticOutput; diff --git a/src/test/run-pass/auxiliary/arc_wake.rs b/src/test/run-pass/auxiliary/arc_wake.rs deleted file mode 100644 index c21886f26f467..0000000000000 --- a/src/test/run-pass/auxiliary/arc_wake.rs +++ /dev/null @@ -1,64 +0,0 @@ -// edition:2018 - -use std::sync::Arc; -use std::task::{ - Waker, RawWaker, RawWakerVTable, -}; - -macro_rules! waker_vtable { - ($ty:ident) => { - &RawWakerVTable::new( - clone_arc_raw::<$ty>, - wake_arc_raw::<$ty>, - wake_by_ref_arc_raw::<$ty>, - drop_arc_raw::<$ty>, - ) - }; -} - -pub trait ArcWake { - fn wake(self: Arc); - - fn wake_by_ref(arc_self: &Arc) { - arc_self.clone().wake() - } - - fn into_waker(wake: Arc) -> Waker where Self: Sized - { - let ptr = Arc::into_raw(wake) as *const (); - - unsafe { - Waker::from_raw(RawWaker::new(ptr, waker_vtable!(Self))) - } - } -} - -unsafe fn increase_refcount(data: *const ()) { - // Retain Arc by creating a copy - let arc: Arc = Arc::from_raw(data as *const T); - let arc_clone = arc.clone(); - // Forget the Arcs again, so that the refcount isn't decrased - let _ = Arc::into_raw(arc); - let _ = Arc::into_raw(arc_clone); -} - -unsafe fn clone_arc_raw(data: *const ()) -> RawWaker { - increase_refcount::(data); - RawWaker::new(data, waker_vtable!(T)) -} - -unsafe fn drop_arc_raw(data: *const ()) { - // Drop Arc - let _: Arc = Arc::from_raw(data as *const T); -} - -unsafe fn wake_arc_raw(data: *const ()) { - let arc: Arc = Arc::from_raw(data as *const T); - ArcWake::wake(arc); -} - -unsafe fn wake_by_ref_arc_raw(data: *const ()) { - let arc: Arc = Arc::from_raw(data as *const T); - ArcWake::wake_by_ref(&arc); - let _ = Arc::into_raw(arc); -} diff --git a/src/test/run-pass/async-await/async-fn-size-moved-locals.rs b/src/test/ui/async-await/async-fn-size-moved-locals.rs similarity index 99% rename from src/test/run-pass/async-await/async-fn-size-moved-locals.rs rename to src/test/ui/async-await/async-fn-size-moved-locals.rs index 139be7fe0132b..8d24ffe7a7c8c 100644 --- a/src/test/run-pass/async-await/async-fn-size-moved-locals.rs +++ b/src/test/ui/async-await/async-fn-size-moved-locals.rs @@ -7,6 +7,8 @@ // // See issue #59123 for a full explanation. +// run-pass + // edition:2018 #![feature(async_await)] diff --git a/src/test/run-pass/async-await/async-fn-size.rs b/src/test/ui/async-await/async-fn-size.rs similarity index 97% rename from src/test/run-pass/async-await/async-fn-size.rs rename to src/test/ui/async-await/async-fn-size.rs index 7396918196c08..c6b2ed13b0a8d 100644 --- a/src/test/run-pass/async-await/async-fn-size.rs +++ b/src/test/ui/async-await/async-fn-size.rs @@ -1,9 +1,10 @@ +// run-pass +// aux-build:arc_wake.rs // edition:2018 #![feature(async_await)] -#[path = "../auxiliary/arc_wake.rs"] -mod arc_wake; +extern crate arc_wake; use std::pin::Pin; use std::future::Future; diff --git a/src/test/run-pass/futures-api.rs b/src/test/ui/async-await/futures-api.rs similarity index 98% rename from src/test/run-pass/futures-api.rs rename to src/test/ui/async-await/futures-api.rs index ee77053fd5b6a..a7da058de3081 100644 --- a/src/test/run-pass/futures-api.rs +++ b/src/test/ui/async-await/futures-api.rs @@ -1,3 +1,5 @@ +// run-pass + // aux-build:arc_wake.rs extern crate arc_wake; diff --git a/src/test/run-pass/async-await/issue-60709.rs b/src/test/ui/async-await/issue-60709.rs similarity index 98% rename from src/test/run-pass/async-await/issue-60709.rs rename to src/test/ui/async-await/issue-60709.rs index 5ebb18b999ab6..ad0b49fa4a219 100644 --- a/src/test/run-pass/async-await/issue-60709.rs +++ b/src/test/ui/async-await/issue-60709.rs @@ -2,6 +2,8 @@ // handled incorrectly in generators. // compile-flags: -Copt-level=z -Cdebuginfo=2 --edition=2018 +// run-pass + #![feature(async_await)] #![allow(unused)]