From 6e751a441e0a207e01df6c6c9d18d452d5acb115 Mon Sep 17 00:00:00 2001 From: Allan Banaag Date: Mon, 5 Feb 2024 14:17:49 -0800 Subject: [PATCH] Sync for validator cpp engine and cpp htmlparser (#39797) * Migrate from `cfg = "host"` to `cfg = "exec"` PiperOrigin-RevId: 488423550 * ...Fix risky undefined behavior when host is empty.... PiperOrigin-RevId: 488505778 * Allow ampjs domain for serving amp js files. PiperOrigin-RevId: 489206825 * No description. PiperOrigin-RevId: 491684683 * Explicitly cast a char32_t to uint32_t before streaming it to output. In C++17, the char32_t is silently formatted as a number, but in C++20, overloads of << accepting wide character types are deleted, breaking compilation of this code. The value also was formatted in decimal afaict, but all other values here are formatted in hex, so do that here as well. PiperOrigin-RevId: 499523425 * Fix AMP for Email spec URLs Some of these URLs are pointing to the AMP for Website documentation. PiperOrigin-RevId: 508406092 * Replace greggrothaus (now a xoogler) with Erwin in OWNERs file. Edited README to: - Move htmlparser out of beta. It is pretty stable and running in several production services since 2019. - Remove references of being maintained by AMP working group, as it will be maintained by ex-engineers of working group. PiperOrigin-RevId: 518921324 * Internal Code Change PiperOrigin-RevId: 540320460 * Internal Code Change PiperOrigin-RevId: 540323049 * Update README.md --------- Co-authored-by: Googler Co-authored-by: Amaltas Bohra --- validator/cpp/engine/embed_data.bzl | 2 +- validator/cpp/engine/parse-layout-sizes.h | 2 +- validator/cpp/engine/parse-srcset_test.cc | 2 +- validator/cpp/engine/testing-utils.cc | 2 -- validator/cpp/engine/utf8-util_test.cc | 2 +- validator/cpp/engine/validator-internal.cc | 36 +++++++++---------- validator/cpp/engine/validator_test.cc | 32 +++++++++-------- validator/cpp/engine/wasm/BUILD | 6 ---- validator/cpp/engine/wasm/validator.js | 2 +- validator/cpp/htmlparser/BUILD | 4 +-- validator/cpp/htmlparser/README.md | 17 +-------- validator/cpp/htmlparser/allocator.h | 8 ++--- .../cpp/htmlparser/bin/entitytablegen.cc | 2 +- .../cpp/htmlparser/css/parse-css-urls.cc | 2 +- validator/cpp/htmlparser/css/parse-css.cc | 6 ++-- validator/cpp/htmlparser/css/parse-css.h | 2 +- validator/cpp/htmlparser/data/CaseFolding.txt | 10 +++--- validator/cpp/htmlparser/data/amptags.txt | 1 - validator/cpp/htmlparser/data/jsongrammar.txt | 2 +- validator/cpp/htmlparser/fileutil.cc | 2 +- .../cpp/htmlparser/grammar/tablebuilder.h | 4 +-- validator/cpp/htmlparser/json/types.h | 4 +-- validator/cpp/htmlparser/node.h | 2 +- validator/cpp/htmlparser/parser.cc | 4 +-- validator/cpp/htmlparser/parser_test.cc | 2 +- validator/cpp/htmlparser/strings.cc | 6 ++-- validator/cpp/htmlparser/strings.h | 4 +-- validator/cpp/htmlparser/strings_test.cc | 2 +- validator/cpp/htmlparser/url.cc | 3 +- validator/cpp/htmlparser/url.h | 2 +- validator/cpp/htmlparser/url_test.cc | 6 ++-- .../htmlparser/validators/ipaddress_test.cc | 2 +- .../cpp/htmlparser/validators/json_test.cc | 2 +- .../validators/supported_media_query_test.cc | 2 +- 34 files changed, 84 insertions(+), 103 deletions(-) diff --git a/validator/cpp/engine/embed_data.bzl b/validator/cpp/engine/embed_data.bzl index ff9ee299a9d9..ba0f40fd9867 100644 --- a/validator/cpp/engine/embed_data.bzl +++ b/validator/cpp/engine/embed_data.bzl @@ -39,7 +39,7 @@ embed_data = rule( ), "header_generator": attr.label( executable = True, - cfg = "host", + cfg = "exec", allow_files = True, default = Label( "//cpp/engine/scripts:filecontents_to_cpp_header", diff --git a/validator/cpp/engine/parse-layout-sizes.h b/validator/cpp/engine/parse-layout-sizes.h index 130e20ca8654..2150cf858e03 100644 --- a/validator/cpp/engine/parse-layout-sizes.h +++ b/validator/cpp/engine/parse-layout-sizes.h @@ -9,7 +9,7 @@ namespace amp::validator::parse_layout_sizes { // WARNING: This code is still in development and not ready to be used. -// This is a single representation for the CssSizes object. +// This is a single represenation for the CssSizes object. // It consists of at least a valid size and a possible media condition. // See https://developer.mozilla.org/en-US/docs/Web/HTML/Element/img#Attributes struct CssSize { diff --git a/validator/cpp/engine/parse-srcset_test.cc b/validator/cpp/engine/parse-srcset_test.cc index 97bf3435e9ab..894ae8bf7db8 100644 --- a/validator/cpp/engine/parse-srcset_test.cc +++ b/validator/cpp/engine/parse-srcset_test.cc @@ -150,7 +150,7 @@ TEST(ParseSrcsetTest, LeadingAndTrailingCommasAndCommasInUrl) { EqCandidates({{"example.com/,/,/,/,50w", "1x"}})); } -TEST(ParseSrcsetTest, NoWhitespace) { +TEST(ParseSrcsetTest, NoWhitepsace) { SrcsetParsingResult result = ParseSourceSet("image 100w,image 50w"); EXPECT_TRUE(result.success); EXPECT_THAT(result.srcset_images, diff --git a/validator/cpp/engine/testing-utils.cc b/validator/cpp/engine/testing-utils.cc index b6e115d30963..f6cc635024dc 100644 --- a/validator/cpp/engine/testing-utils.cc +++ b/validator/cpp/engine/testing-utils.cc @@ -97,8 +97,6 @@ const std::map& TestCases() { "external/amphtml-extensions/*/*/test/*.html", &html_files)) << "Test cases file pattern not found."; - CHECK(!html_files.empty()) << "Validator test cases are empty. Will not proceed."; - std::sort(html_files.begin(), html_files.end()); for (const std::string& html_file : html_files) { if (html_file.find("/js_only/") != std::string::npos) continue; diff --git a/validator/cpp/engine/utf8-util_test.cc b/validator/cpp/engine/utf8-util_test.cc index a2a2472a646c..0f5f6b9bf783 100644 --- a/validator/cpp/engine/utf8-util_test.cc +++ b/validator/cpp/engine/utf8-util_test.cc @@ -14,7 +14,7 @@ TEST(Utf8UtilTest, Utf16StrLen) { // It's 34 bytes long and 22 utf-8 characters long. Javascript uses UTF16 // strings and string lengths. // The chars in Iñtërnâtiônàlizætiøn vary between 1 and 2 byte lengths, all - // javascript 1-char lengths. The ⚡ is a 3-byte length character, with a + // javascript 1-char lenghts. The ⚡ is a 3-byte length character, with a // 1-char javascript length. Finally the 💩 is a 4-byte length character with // a 2-char javascript length. EXPECT_EQ(Utf16StrLen("Iñtërnâtiônàlizætiøn☃💩"), 23); diff --git a/validator/cpp/engine/validator-internal.cc b/validator/cpp/engine/validator-internal.cc index fd62ee99e249..63856a5eb540 100644 --- a/validator/cpp/engine/validator-internal.cc +++ b/validator/cpp/engine/validator-internal.cc @@ -644,9 +644,9 @@ struct ParsedReferencePoint { class ParsedReferencePoints { public: ParsedReferencePoints() : parent_(nullptr) {} - ParsedReferencePoints( - const TagSpec& parent, - const unordered_map& tag_spec_ids_by_tag_spec_name) + ParsedReferencePoints(const TagSpec& parent, + const absl::flat_hash_map& + tag_spec_ids_by_tag_spec_name) : parent_(&parent) { for (const ReferencePoint& p : parent.reference_points()) { auto iter = tag_spec_ids_by_tag_spec_name.find(p.tag_spec_name()); @@ -1041,11 +1041,11 @@ RecordValidated ShouldRecordTagspecValidated( // which is unique within its context, the ParsedValidatorRules. class ParsedTagSpec { public: - ParsedTagSpec( - ParsedAttrSpecs* parsed_attr_specs, - const unordered_map& tag_spec_ids_by_tag_spec_name, - RecordValidated should_record_tagspec_validated, const TagSpec* spec, - int32_t id) + ParsedTagSpec(ParsedAttrSpecs* parsed_attr_specs, + const absl::flat_hash_map& + tag_spec_ids_by_tag_spec_name, + RecordValidated should_record_tagspec_validated, + const TagSpec* spec, int32_t id) : spec_(spec), id_(id), reference_points_(*spec, tag_spec_ids_by_tag_spec_name), @@ -1192,7 +1192,7 @@ class ParsedTagSpec { // Whether or not the tag should be recorded via // Context->RecordTagspecValidated if it was validated - // successfully. For performance, this is only done for tags that + // successfullly. For performance, this is only done for tags that // are mandatory, unique, or possibly required by some other tag. RecordValidated ShouldRecordTagspecValidated() const { return should_record_tagspec_validated_; @@ -1218,7 +1218,7 @@ class ParsedTagSpec { const set& implicit_attrspecs() const { return implicit_attrspecs_; } - const unordered_map& attr_ids_by_name() const { + const absl::flat_hash_map& attr_ids_by_name() const { return attr_ids_by_name_; } @@ -1240,7 +1240,7 @@ class ParsedTagSpec { bool is_reference_point_; bool is_type_json_ = false; bool contains_url_ = false; - unordered_map attr_ids_by_name_; + absl::flat_hash_map attr_ids_by_name_; vector disabled_by_; vector enabled_by_; vector mandatory_attr_ids_; @@ -1284,7 +1284,7 @@ std::string TagSpecUrl(const TagSpec& spec) { return StrCat(extension_spec_url_prefix, spec.extension_spec().name()); if (spec.requires_extension_size() > 0) // Return the first |requires_extension|, which should be the most - // representative. + // representitive. return StrCat(extension_spec_url_prefix, spec.requires_extension(0)); return ""; @@ -2476,7 +2476,7 @@ class Context { if (!tag_result.best_match_tag_spec) return; const ParsedTagSpec* parsed_tag_spec = tag_result.best_match_tag_spec; if (!parsed_tag_spec->AttrsCanSatisfyExtension()) return; - const unordered_map& attr_ids_by_name = + const absl::flat_hash_map& attr_ids_by_name = parsed_tag_spec->attr_ids_by_name(); ExtensionsContext* extensions_ctx = mutable_extensions(); for (const ParsedHtmlTagAttr& attr : encountered_tag.Attributes()) { @@ -2834,11 +2834,11 @@ class InvalidRuleVisitor : public htmlparser::css::RuleVisitor { class InvalidDeclVisitor : public htmlparser::css::RuleVisitor { public: InvalidDeclVisitor(const ParsedDocCssSpec& css_spec, Context* context, - const std::string& tag_descriptive_name, + const std::string& tag_decriptive_name, ValidationResult* result) : css_spec_(css_spec), context_(context), - tag_descriptive_name_(tag_descriptive_name), + tag_descriptive_name_(tag_decriptive_name), result_(result) {} void VisitDeclaration( @@ -4412,7 +4412,7 @@ void ValidateAttributes(const ParsedTagSpec& parsed_tag_spec, set mandatory_anyofs_seen; vector parsed_trigger_specs; set attrspecs_validated; - const unordered_map& attr_ids_by_name = + const absl::flat_hash_map& attr_ids_by_name = parsed_tag_spec.attr_ids_by_name(); for (const ParsedHtmlTagAttr& attr : encountered_tag.Attributes()) { @@ -4717,7 +4717,7 @@ ParsedValidatorRules::ParsedValidatorRules(HtmlFormat::Code html_format) // |tag_spec_names_to_track| to identify those tagspecs that are // referenced by others via "also_requires_tag". The ParsedTagSpec // constructor completes this translation to ids. - unordered_map tag_spec_ids_by_tag_spec_name; + absl::flat_hash_map tag_spec_ids_by_tag_spec_name; unordered_set tag_spec_names_to_track; for (int ii = 0; ii < rules_.tags_size(); ++ii) { const TagSpec& tag = rules_.tags(ii); @@ -5622,7 +5622,7 @@ void ReferencePointMatcher::RecordMatch(const ParsedTagSpec& reference_point) { void ReferencePointMatcher::ExitParentTag(const Context& context, ValidationResult* result) const { - absl::node_hash_map reference_point_by_count; + absl::flat_hash_map reference_point_by_count; for (int32_t r : reference_points_matched_) ++reference_point_by_count[r]; for (const ParsedReferencePoint& p : *parsed_reference_points_) { if (p.point->mandatory() && reference_point_by_count.find(p.tag_spec_id) == diff --git a/validator/cpp/engine/validator_test.cc b/validator/cpp/engine/validator_test.cc index a4421cf7f84d..545f14abbe59 100644 --- a/validator/cpp/engine/validator_test.cc +++ b/validator/cpp/engine/validator_test.cc @@ -481,8 +481,8 @@ TEST(ValidatorTest, TestCssLengthAmpEmail) { ":13:2 The author stylesheet specified in tag 'style amp-custom' " "is too long - document contains 75001 bytes whereas the " "limit is 75000 " - "bytes. (see https://amp.dev/documentation/guides-and-tutorials/email/learn/" - "spec/amphtml#maximum-size)"); + "bytes. (see https://amp.dev/documentation/guides-and-tutorials/email/" + "learn/spec/amphtml#maximum-size)"); EXPECT_EQ(expected_output, output) << "test case " << test_case_name; } @@ -527,7 +527,8 @@ TEST(ValidatorTest, TestCssLengthAmpEmail) { ":19:6 The author stylesheet specified in tag 'style amp-custom' " "and the combined inline styles is too large - document contains 75010 " "bytes whereas the limit is 75000 bytes. (see https://amp.dev/" - "documentation/guides-and-tutorials/email/learn/spec/amphtml#maximum-size)"); + "documentation/guides-and-tutorials/email/learn/spec/amphtml" + "#maximum-size)"); EXPECT_EQ(expected_output, output) << "test case " << test_case_name; } @@ -555,7 +556,8 @@ TEST(ValidatorTest, TestCssLengthAmpEmail) { ":7519:6 The author stylesheet specified in tag 'style amp-custom' " "and the combined inline styles is too large - document contains 75014 " "bytes whereas the limit is 75000 bytes. (see https://amp.dev/" - "documentation/guides-and-tutorials/email/learn/spec/amphtml#maximum-size)"); + "documentation/guides-and-tutorials/email/learn/spec/amphtml" + "#maximum-size)"); EXPECT_EQ(expected_output, output) << "test case " << test_case_name; } @@ -649,8 +651,8 @@ TEST(ValidatorTest, TestCssLengthAmpEmailStrict) { ":13:2 The author stylesheet specified in tag 'style amp-custom' " "is too long - document contains 75001 bytes whereas the " "limit is 75000 " - "bytes. (see https://amp.dev/documentation/guides-and-tutorials/email/learn/" - "spec/amphtml#maximum-size)"); + "bytes. (see https://amp.dev/documentation/guides-and-tutorials/email/" + "learn/spec/amphtml#maximum-size)"); EXPECT_EQ(expected_output, output) << "test case " << test_case_name; } @@ -681,7 +683,8 @@ TEST(ValidatorTest, TestCssLengthAmpEmailStrict) { ":19:6 The author stylesheet specified in tag 'style amp-custom' " "and the combined inline styles is too large - document contains 75010 " "bytes whereas the limit is 75000 bytes. (see https://amp.dev/" - "documentation/guides-and-tutorials/email/learn/spec/amphtml#maximum-size)"); + "documentation/guides-and-tutorials/email/learn/spec/amphtml" + "#maximum-size)"); EXPECT_EQ(expected_output, output) << "test case " << test_case_name; } @@ -701,7 +704,8 @@ TEST(ValidatorTest, TestCssLengthAmpEmailStrict) { ":3769:6 The author stylesheet specified in tag 'style amp-custom' " "and the combined inline styles is too large - document contains 75014 " "bytes whereas the limit is 75000 bytes. (see https://amp.dev/" - "documentation/guides-and-tutorials/email/learn/spec/amphtml#maximum-size)"); + "documentation/guides-and-tutorials/email/learn/spec/amphtml" + "#maximum-size)"); EXPECT_EQ(expected_output, output) << "test case " << test_case_name; } @@ -730,12 +734,12 @@ TEST(ValidatorTest, TestCssLengthAmpEmailStrict) { std::string output = RenderResult( /*filename=*/test_case_name, amp::validator::Validate(test_html, HtmlFormat::AMP4EMAIL)); - std::string expected_output = - StrCat("FAIL\n", test_case_name, - ":17:2 The inline style specified in tag 'div' is too long - it " - "contains 1001 bytes whereas the limit is 1000 bytes. (see " - "https://amp.dev/documentation/guides-and-tutorials/email/learn/spec/" - "amphtml#maximum-size)"); + std::string expected_output = StrCat( + "FAIL\n", test_case_name, + ":17:2 The inline style specified in tag 'div' is too long - it " + "contains 1001 bytes whereas the limit is 1000 bytes. (see " + "https://amp.dev/documentation/guides-and-tutorials/email/learn/spec/" + "amphtml#maximum-size)"); EXPECT_EQ(expected_output, output) << "test case " << test_case_name; } } diff --git a/validator/cpp/engine/wasm/BUILD b/validator/cpp/engine/wasm/BUILD index e6ddc86cc49f..4253cb6e5a2d 100644 --- a/validator/cpp/engine/wasm/BUILD +++ b/validator/cpp/engine/wasm/BUILD @@ -1,7 +1,6 @@ # Wraps AMP Validator into a WebAssembly library, # which can be used by javascript files. -load("@bazel_skylib//rules:build_test.bzl", "build_test") load("@emsdk//emscripten_toolchain:wasm_rules.bzl", "wasm_cc_binary") load("@io_bazel_rules_closure//closure:defs.bzl", "closure_js_binary", "closure_js_library") @@ -73,8 +72,3 @@ closure_js_binary( ":validator_js_lib", ], ) - -build_test( - name = "validator_js_test", - targets = [":validator_js_bin"], -) diff --git a/validator/cpp/engine/wasm/validator.js b/validator/cpp/engine/wasm/validator.js index e4008ede60ad..5014482505fa 100644 --- a/validator/cpp/engine/wasm/validator.js +++ b/validator/cpp/engine/wasm/validator.js @@ -83,7 +83,7 @@ function digitizeValidationErrorFields(error) { /** * When transforming validation errors and validation results from jspb to plain * objects, the protobuf base64 string is also attached to the output. - * Hence when a plain object needs to be transformed back to protobuf, + * Hence when a plain object neeeds to be transformed back to protobuf, * the attached base64 could be directly used. */ const PB_BASE64 = '_PB_BASE64'; diff --git a/validator/cpp/htmlparser/BUILD b/validator/cpp/htmlparser/BUILD index 3d31520f96a2..78de2920159d 100644 --- a/validator/cpp/htmlparser/BUILD +++ b/validator/cpp/htmlparser/BUILD @@ -71,7 +71,7 @@ cc_test( ) # Similar to go lang's defer statement. Defers the execution of statement -# until in which it is declared goes out of scope. +# until in which it is decalred goes out of scope. cc_library( name = "defer", hdrs = [ @@ -80,7 +80,7 @@ cc_library( copts = ["-std=c++17"], ) -# Helper library declares various doctype constants and a utility function to +# Helper library decalres various doctype constants and a utility function to # parse doctype string and extract various components in it. cc_library( name = "doctype", diff --git a/validator/cpp/htmlparser/README.md b/validator/cpp/htmlparser/README.md index 53bd5637fea5..b181218b06df 100644 --- a/validator/cpp/htmlparser/README.md +++ b/validator/cpp/htmlparser/README.md @@ -1,21 +1,6 @@ # HTML Parser -This is an HTML5 compliant parser written in C++. It was created to be used by -the -[AMPHTML Validator](https://github.com/ampproject/amphtml/tree/main/validator) -to standardize how AMPHTML documents should be parsed for AMP validation. - -## Maintainers - -This parser is maintained by the [AMP Working Group](https://amp.dev/community/working-groups/amp4email/): -[Caching](https://amp.dev/community/working-groups/caching/) - -## Current Status - -This parser is in active development and has several outstanding TODOs. -These TODOs may cause certain parsing tests to fail. Those tests have been -excluded until the TODOs are resolved. See htmldataset_test.cc for a list of -those tests. +This is an HTML5 compliant parser written in C++. ## Building and Testing with Bazel diff --git a/validator/cpp/htmlparser/allocator.h b/validator/cpp/htmlparser/allocator.h index 09e9d4b9dd50..0620c4ed32fb 100644 --- a/validator/cpp/htmlparser/allocator.h +++ b/validator/cpp/htmlparser/allocator.h @@ -62,7 +62,7 @@ // is naturally aligned if the address used to identify it has an 8-byte // alignment. // -// Following data structure contains members totaling 13 bytes, but it's actual +// Following data struture contains members totaling 13 bytes, but it's actual // size is 24 bytes due to 8 byte alignment. // // Alignment is always equal to the largest sized element in the structure. @@ -201,10 +201,10 @@ class Allocator { Allocator& operator=(const Allocator&) = delete; // Allocates memory of same size required to construct object of type T. - // Returns nullptr if allocation failed. + // Returns nullptr if alloction failed. void* Allocate() { // Checks if remaining bytes in block are less than object size, or - // remaining bytes after alignment is less than object size. + // reamining bytes after alignment is less than object size. // Add a new block. if (object_size_ > remaining_ || !AlignFreeAddress()) { if (!NewBlock()) return nullptr; @@ -338,7 +338,7 @@ class Allocator { } // If the block's address is not aligned, moves the pointer to the address - // that is multiple of alignment_. + // that is multiple of aligment_. bool AlignFreeAddress() { // Checks how many bytes to skip to be at the correct alignment. if (const std::size_t skip = diff --git a/validator/cpp/htmlparser/bin/entitytablegen.cc b/validator/cpp/htmlparser/bin/entitytablegen.cc index 20463ab0b9d0..95810636a429 100644 --- a/validator/cpp/htmlparser/bin/entitytablegen.cc +++ b/validator/cpp/htmlparser/bin/entitytablegen.cc @@ -139,7 +139,7 @@ int main(int argc, char** argv) { if ((code_point & 0xffffff80) == 0) { // 1 byte sequence. // 0b0xxxxxx. - fd << "\\x" << code_point; + fd << "\\x" << std::hex << static_cast(code_point); } else if ((code_point & 0xfffff800) == 0) { // 2 byte sequence. // 0b110xxxxx 0b10xxxxxx. fd << "\\x" << std::hex << ((code_point >> 6) | 0xc0) diff --git a/validator/cpp/htmlparser/css/parse-css-urls.cc b/validator/cpp/htmlparser/css/parse-css-urls.cc index 75ce200899ae..9d2ae3d3033d 100644 --- a/validator/cpp/htmlparser/css/parse-css-urls.cc +++ b/validator/cpp/htmlparser/css/parse-css-urls.cc @@ -47,7 +47,7 @@ void Preprocess(vector* codepoints) { out.push_back('\n'); last_codepoint_was_cr = true; break; - case '\f': // also known as form feed (FF) + case '\f': // also knwon as form feed (FF) out.push_back('\n'); last_codepoint_was_cr = false; break; diff --git a/validator/cpp/htmlparser/css/parse-css.cc b/validator/cpp/htmlparser/css/parse-css.cc index 7e0b4aa9ab26..4df12b588e33 100644 --- a/validator/cpp/htmlparser/css/parse-css.cc +++ b/validator/cpp/htmlparser/css/parse-css.cc @@ -85,7 +85,7 @@ const std::string& Token::StringValue() const { } std::string Token::ToString() const { - // The following are overridden in their class: AT_KEYWORD, CLOSE_CURLY, + // The following are overriden in their class: AT_KEYWORD, CLOSE_CURLY, // CLOSE_PAREN, CLOSE_SQUARE, DELIM, DIMENSION, FUNCTION_TOKEN, IDENT, // NUMBER, OPEN_CURLY, OPEN_PAREN, OPEN_SQUARE, PERCENTAGE, STRING, URL switch (Type()) { @@ -343,7 +343,7 @@ bool Whitespace(char32_t code) { char32_t kMaximumallowedcodepoint = 0x10ffff; // A MarkedPosition object saves position information from the tokenizer -// provided as |line| and |col| to the constructor and can later write that +// rovided as |line| and |col| to the constructor and can later write that // position back to a Token object. class MarkedPosition { public: @@ -2471,7 +2471,7 @@ CombinatorType::Code CombinatorTypeForToken(const Token& token) { if (IsDelim(token, "+")) return CombinatorType::ADJACENT_SIBLING; if (IsDelim(token, "~")) return CombinatorType::GENERAL_SIBLING; // CombinatorTypeForToken is only ever called if the token has one of these - // delimiters, so reaching this point is impossible. + // delimitors, so reaching this point is impossible. CHECK(false) << absl::StrCat( "not a combinator token - type=", TokenType::Code_Name(token.Type()), " value=", token.StringValue()); diff --git a/validator/cpp/htmlparser/css/parse-css.h b/validator/cpp/htmlparser/css/parse-css.h index 2f802e214558..d1612a0c4258 100644 --- a/validator/cpp/htmlparser/css/parse-css.h +++ b/validator/cpp/htmlparser/css/parse-css.h @@ -741,7 +741,7 @@ class Selector : public Token { virtual void Accept(SelectorVisitor* visitor) const = 0; }; -// This node models type selectors and universal selectors. +// This node models type selectors and universial selectors. // http://www.w3.org/TR/css3-selectors/#type-selectors // http://www.w3.org/TR/css3-selectors/#universal-selector class TypeSelector : public Selector { diff --git a/validator/cpp/htmlparser/data/CaseFolding.txt b/validator/cpp/htmlparser/data/CaseFolding.txt index 932ace29e6d4..65aa0fcd6b32 100644 --- a/validator/cpp/htmlparser/data/CaseFolding.txt +++ b/validator/cpp/htmlparser/data/CaseFolding.txt @@ -1,11 +1,11 @@ -# CaseFolding-14.0.0.txt -# Date: 2021-03-08, 19:35:41 GMT -# © 2021 Unicode®, Inc. +# CaseFolding-15.0.0.txt +# Date: 2022-02-02, 23:35:35 GMT +# © 2022 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. -# For terms of use, see http://www.unicode.org/terms_of_use.html +# For terms of use, see https://www.unicode.org/terms_of_use.html # # Unicode Character Database -# For documentation, see http://www.unicode.org/reports/tr44/ +# For documentation, see https://www.unicode.org/reports/tr44/ # # Case Folding Properties # diff --git a/validator/cpp/htmlparser/data/amptags.txt b/validator/cpp/htmlparser/data/amptags.txt index b5ac9625fa0f..032b0b90ad9f 100644 --- a/validator/cpp/htmlparser/data/amptags.txt +++ b/validator/cpp/htmlparser/data/amptags.txt @@ -93,7 +93,6 @@ amp-script amp-selector amp-sidebar amp-skimlinks -amp-slikeplayer amp-smartlinks amp-social-share amp-soundcloud diff --git a/validator/cpp/htmlparser/data/jsongrammar.txt b/validator/cpp/htmlparser/data/jsongrammar.txt index db0cd32d8291..d79b3eb8cd4e 100644 --- a/validator/cpp/htmlparser/data/jsongrammar.txt +++ b/validator/cpp/htmlparser/data/jsongrammar.txt @@ -128,7 +128,7 @@ DICT_KEY_BEGIN_QUOTE "\t\n\r " DICT_KEY_BEGIN_QUOTE; DICT "\t\n\r " DICT; DICT '}' POP DICT_END; DICT_KEY ':' DICT_VALUE DICT_KEY_END; -# Ignore whitespace between key and colon. +# Ignore whitespace betwen key and colon. DICT_KEY "\t\n\r " DICT_KEY; DICT_VALUE '"' STRING|PUSH|DICT_END_OR_SEPARATOR STRING_T; DICT_VALUE 't' TRUE_1|PUSH|DICT_END_OR_SEPARATOR TRUE_T; diff --git a/validator/cpp/htmlparser/fileutil.cc b/validator/cpp/htmlparser/fileutil.cc index a59ca2aa3cb2..58b5834bf9b1 100644 --- a/validator/cpp/htmlparser/fileutil.cc +++ b/validator/cpp/htmlparser/fileutil.cc @@ -18,7 +18,7 @@ // effects is minimized because this is a cc file not an h file. In fact, the // WebAssembly module never calls the glob function, so it is better to split // fileutil.cc into two files, and the WebAssembly module will only depend on -// the one without glob functions. +// the one without glob funcitons. #ifndef GLOB_TILDE #define GLOB_TILDE (1 << 12) #endif diff --git a/validator/cpp/htmlparser/grammar/tablebuilder.h b/validator/cpp/htmlparser/grammar/tablebuilder.h index 4052ac78f263..f62e22f95007 100644 --- a/validator/cpp/htmlparser/grammar/tablebuilder.h +++ b/validator/cpp/htmlparser/grammar/tablebuilder.h @@ -7,7 +7,7 @@ // class must be thoroughly tested. // // Builds a state table by reading grammar file that contains rules for parsing -// a basic (limited), context free, unambiguous grammar. +// a basic (limited), context free, unambigous grammar. // // Using TableBuilder one can generate parser states by writing rules in a // text file. See htmlparser/data/jsongrammar.txt. @@ -15,7 +15,7 @@ // Grammar text file contains rules which lists states and its transition // from one state to another as parser reads input characters. The parse // table is pushdown automation that uses stack to push and pop parsing -// states. Unlike LR parsers there is no shift at each stage of parsing. +// states. Unline LR parsers there is no shift at each stage of parsing. // See grammar.txt tutorial for learning grammar syntax. // TODO: Add grammar tutorial. diff --git a/validator/cpp/htmlparser/json/types.h b/validator/cpp/htmlparser/json/types.h index 5ace320b1131..aa54f9a110c4 100644 --- a/validator/cpp/htmlparser/json/types.h +++ b/validator/cpp/htmlparser/json/types.h @@ -1,7 +1,7 @@ // Declares types in json spec (http://www.json.org): -// JsonArray: List of heterogenous types. [1, true, "foo",...] +// JsonArray: List of hetrogenous types. [1, true, "foo",...] // -// JsonDict = Key value pairs of heterogenous values, key is always std::string. +// JsonDict = Key value pairs of hetrogenous values, key is always std::string. // {"foo": "bar", "count": 1,...} // // JsonObject = Encapsulates any type: diff --git a/validator/cpp/htmlparser/node.h b/validator/cpp/htmlparser/node.h index a9c001bd939b..31b8c9482ce4 100644 --- a/validator/cpp/htmlparser/node.h +++ b/validator/cpp/htmlparser/node.h @@ -61,7 +61,7 @@ class Node { // This does not change order or parent/child relationship of this or child // nodes in the tree. // Generally, treat this as a private function. Part of public interface for - // some specific scenarios: + // some specific sceanrios: // A) Unit testing. // B) When parsing a fragment. // C) Custom error/warning reporting. diff --git a/validator/cpp/htmlparser/parser.cc b/validator/cpp/htmlparser/parser.cc index 31b213a82e67..1ddbd303054d 100644 --- a/validator/cpp/htmlparser/parser.cc +++ b/validator/cpp/htmlparser/parser.cc @@ -396,7 +396,7 @@ void Parser::AddText(const std::string& text) { text_node->data_.assign(text, 0, text.size()); AddChild(text_node); - // Count number of terms in the text node, except if this is