diff --git a/toolchain/lex/lex.cpp b/toolchain/lex/lex.cpp index 64adf32879a08..a04a77813fd5c 100644 --- a/toolchain/lex/lex.cpp +++ b/toolchain/lex/lex.cpp @@ -50,6 +50,9 @@ namespace Carbon::Lex { // `TokenizedBuffer` or undermining the performance constraints of the lexer. class [[clang::internal_linkage]] Lexer { public: + using TokenInfo = TokenizedBuffer::TokenInfo; + using LineInfo = TokenizedBuffer::LineInfo; + // Symbolic result of a lexing action. This indicates whether we successfully // lexed a token, or whether other lexing actions should be attempted. // @@ -94,20 +97,48 @@ class [[clang::internal_linkage]] Lexer { auto current_line() -> LineIndex { return LineIndex(line_index_); } - auto current_line_info() -> TokenizedBuffer::LineInfo* { + auto current_line_info() -> LineInfo* { return &buffer_.line_infos_[line_index_]; } auto next_line() -> LineIndex { return LineIndex(line_index_ + 1); } - auto next_line_info() -> TokenizedBuffer::LineInfo* { + auto next_line_info() -> LineInfo* { CARBON_CHECK(line_index_ + 1 < static_cast(buffer_.line_infos_.size())); return &buffer_.line_infos_[line_index_ + 1]; } - auto NoteWhitespace() -> void { - buffer_.token_infos_.back().has_trailing_space = true; + // Note when the lexer has encountered whitespace, and the next lexed token + // should reflect that it was preceded by some amount of whitespace. + auto NoteWhitespace() -> void { has_leading_space_ = true; } + + // Add a lexed token to the tokenized buffer, and reset any token-specific + // state tracked in the lexer for the next token. + auto AddLexedToken(TokenInfo info) -> TokenIndex { + has_leading_space_ = false; + return buffer_.AddToken(info); + } + + // Lexes a token with no payload: builds the correctly encoded token info, + // adds it to the tokenized buffer and returns the token index. + auto LexToken(TokenKind kind, int32_t byte_offset) -> TokenIndex { + // Check that we don't accidentally call this for one of the token kinds + // that *always* has a payload up front. + CARBON_DCHECK(!kind.IsOneOf( + {TokenKind::Identifier, TokenKind::StringLiteral, TokenKind::IntLiteral, + TokenKind::IntTypeLiteral, TokenKind::UnsignedIntTypeLiteral, + TokenKind::FloatTypeLiteral, TokenKind::RealLiteral, + TokenKind::Error})); + return AddLexedToken(TokenInfo(kind, has_leading_space_, byte_offset)); + } + + // Lexes a token with a payload: builds the correctly encoded token info, + // adds it to the tokenized buffer and returns the token index. + auto LexTokenWithPayload(TokenKind kind, int token_payload, + int32_t byte_offset) -> TokenIndex { + return AddLexedToken( + TokenInfo(kind, has_leading_space_, token_payload, byte_offset)); } auto SkipHorizontalWhitespace(llvm::StringRef source_text, ssize_t& position) @@ -173,6 +204,10 @@ class [[clang::internal_linkage]] Lexer { ssize_t line_index_; + // Tracks whether the lexer has encountered whitespace that will be leading + // whitespace for the next lexed token. Reset after each token lexed. + bool has_leading_space_ = false; + llvm::SmallVector open_groups_; bool has_mismatched_brackets_ = false; @@ -967,29 +1002,21 @@ auto Lexer::LexNumericLiteral(llvm::StringRef source_text, ssize_t& position) return VariantMatch( literal->ComputeValue(emitter_), [&](NumericLiteral::IntValue&& value) { - auto token = buffer_.AddToken( - {.kind = TokenKind::IntLiteral, .byte_offset = byte_offset}); - buffer_.GetTokenInfo(token).int_id = - buffer_.value_stores_->ints().Add(std::move(value.value)); - return token; + return LexTokenWithPayload( + TokenKind::IntLiteral, + buffer_.value_stores_->ints().Add(std::move(value.value)).index, + byte_offset); }, [&](NumericLiteral::RealValue&& value) { - auto token = buffer_.AddToken( - {.kind = TokenKind::RealLiteral, .byte_offset = byte_offset}); - buffer_.GetTokenInfo(token).real_id = - buffer_.value_stores_->reals().Add(Real{ - .mantissa = value.mantissa, - .exponent = value.exponent, - .is_decimal = (value.radix == NumericLiteral::Radix::Decimal)}); - return token; + auto real_id = buffer_.value_stores_->reals().Add(Real{ + .mantissa = value.mantissa, + .exponent = value.exponent, + .is_decimal = (value.radix == NumericLiteral::Radix::Decimal)}); + return LexTokenWithPayload(TokenKind::RealLiteral, real_id.index, + byte_offset); }, [&](NumericLiteral::UnrecoverableError) { - auto token = buffer_.AddToken({ - .kind = TokenKind::Error, - .byte_offset = byte_offset, - .error_length = token_size, - }); - return token; + return LexTokenWithPayload(TokenKind::Error, token_size, byte_offset); }); } @@ -1021,18 +1048,13 @@ auto Lexer::LexStringLiteral(llvm::StringRef source_text, ssize_t& position) if (literal->is_terminated()) { auto string_id = buffer_.value_stores_->string_literal_values().Add( literal->ComputeValue(buffer_.allocator_, emitter_)); - auto token = buffer_.AddToken({.kind = TokenKind::StringLiteral, - .byte_offset = byte_offset, - .string_literal_id = string_id}); - return token; + return LexTokenWithPayload(TokenKind::StringLiteral, string_id.index, + byte_offset); } else { CARBON_DIAGNOSTIC(UnterminatedString, Error, "String is missing a terminator."); emitter_.Emit(literal->text().begin(), UnterminatedString); - return buffer_.AddToken( - {.kind = TokenKind::Error, - .byte_offset = byte_offset, - .error_length = static_cast(literal_size)}); + return LexTokenWithPayload(TokenKind::Error, literal_size, byte_offset); } } @@ -1046,41 +1068,63 @@ auto Lexer::LexOneCharSymbolToken(llvm::StringRef source_text, TokenKind kind, << "' instead of the spelling '" << kind.fixed_spelling() << "' of the incoming token kind '" << kind << "'"; - TokenIndex token = buffer_.AddToken( - {.kind = kind, .byte_offset = static_cast(position)}); + TokenIndex token = LexToken(kind, position); ++position; return token; } auto Lexer::LexOpeningSymbolToken(llvm::StringRef source_text, TokenKind kind, ssize_t& position) -> LexResult { - TokenIndex token = LexOneCharSymbolToken(source_text, kind, position); + CARBON_DCHECK(kind.is_opening_symbol()); + CARBON_DCHECK(kind.fixed_spelling().size() == 1); + CARBON_DCHECK(source_text[position] == kind.fixed_spelling().front()) + << "Source text starts with '" << source_text[position] + << "' instead of the spelling '" << kind.fixed_spelling() + << "' of the incoming token kind '" << kind << "'"; + + int32_t byte_offset = position; + ++position; + + // Lex the opening symbol with a zero closing index. We'll add a payload later + // when we match a closing symbol or in recovery. + TokenIndex token = LexToken(kind, byte_offset); open_groups_.push_back(token); return token; } auto Lexer::LexClosingSymbolToken(llvm::StringRef source_text, TokenKind kind, ssize_t& position) -> LexResult { - TokenIndex token = LexOneCharSymbolToken(source_text, kind, position); - auto& token_info = buffer_.GetTokenInfo(token); + CARBON_DCHECK(kind.is_closing_symbol()); + CARBON_DCHECK(kind.fixed_spelling().size() == 1); + CARBON_DCHECK(source_text[position] == kind.fixed_spelling().front()) + << "Source text starts with '" << source_text[position] + << "' instead of the spelling '" << kind.fixed_spelling() + << "' of the incoming token kind '" << kind << "'"; + + int32_t byte_offset = position; + ++position; // If there's not a matching opening symbol, just track that we had an error. // We will diagnose and recover when we reach the end of the file. See // `DiagnoseAndFixMismatchedBrackets` for details. if (LLVM_UNLIKELY(open_groups_.empty())) { has_mismatched_brackets_ = true; - return token; + // Lex without a matching index payload -- we'll add one during recovery. + return LexToken(kind, byte_offset); } TokenIndex opening_token = open_groups_.pop_back_val(); + TokenIndex token = + LexTokenWithPayload(kind, opening_token.index, byte_offset); + auto& opening_token_info = buffer_.GetTokenInfo(opening_token); - if (LLVM_UNLIKELY(opening_token_info.kind != kind.opening_symbol())) { + if (LLVM_UNLIKELY(opening_token_info.kind() != kind.opening_symbol())) { has_mismatched_brackets_ = true; + buffer_.GetTokenInfo(token).set_opening_token_index(TokenIndex::Invalid); return token; } - opening_token_info.closing_token = token; - token_info.opening_token = opening_token; + opening_token_info.set_closing_token_index(token); return token; } @@ -1100,8 +1144,7 @@ auto Lexer::LexSymbolToken(llvm::StringRef source_text, ssize_t& position) return LexError(source_text, position); } - TokenIndex token = buffer_.AddToken( - {.kind = kind, .byte_offset = static_cast(position)}); + TokenIndex token = LexToken(kind, position); position += kind.fixed_spelling().size(); return token; } @@ -1117,7 +1160,7 @@ auto Lexer::LexWordAsTypeLiteralToken(llvm::StringRef word, int32_t byte_offset) return LexResult::NoMatch(); } - std::optional kind; + TokenKind kind; switch (word.front()) { case 'i': kind = TokenKind::IntTypeLiteral; @@ -1134,20 +1177,16 @@ auto Lexer::LexWordAsTypeLiteralToken(llvm::StringRef word, int32_t byte_offset) llvm::StringRef suffix = word.substr(1); if (!CanLexInt(emitter_, suffix)) { - return buffer_.AddToken( - {.kind = TokenKind::Error, - .byte_offset = byte_offset, - .error_length = static_cast(word.size())}); + return LexTokenWithPayload(TokenKind::Error, word.size(), byte_offset); } llvm::APInt suffix_value; if (suffix.getAsInteger(10, suffix_value)) { return LexResult::NoMatch(); } - auto token = buffer_.AddToken({.kind = *kind, .byte_offset = byte_offset}); - buffer_.GetTokenInfo(token).int_id = - buffer_.value_stores_->ints().Add(std::move(suffix_value)); - return token; + return LexTokenWithPayload( + kind, buffer_.value_stores_->ints().Add(std::move(suffix_value)).index, + byte_offset); } auto Lexer::LexKeywordOrIdentifier(llvm::StringRef source_text, @@ -1180,14 +1219,14 @@ auto Lexer::LexKeywordOrIdentifier(llvm::StringRef source_text, #include "toolchain/lex/token_kind.def" .Default(TokenKind::Error); if (kind != TokenKind::Error) { - return buffer_.AddToken({.kind = kind, .byte_offset = byte_offset}); + return LexToken(kind, byte_offset); } // Otherwise we have a generic identifier. - return buffer_.AddToken( - {.kind = TokenKind::Identifier, - .byte_offset = byte_offset, - .ident_id = buffer_.value_stores_->identifiers().Add(identifier_text)}); + return LexTokenWithPayload( + TokenKind::Identifier, + buffer_.value_stores_->identifiers().Add(identifier_text).index, + byte_offset); } auto Lexer::LexHash(llvm::StringRef source_text, ssize_t& position) @@ -1202,16 +1241,16 @@ auto Lexer::LexHash(llvm::StringRef source_text, ssize_t& position) // If the previous token isn't the identifier `r`, or the character after `#` // isn't the start of an identifier, this is not a raw identifier. - if (prev_token_info.kind != TokenKind::Identifier || + if (prev_token_info.kind() != TokenKind::Identifier || source_text[position - 1] != 'r' || position + 1 == static_cast(source_text.size()) || !IsIdStartByteTable[static_cast( source_text[position + 1])] || - prev_token_info.byte_offset != static_cast(position) - 1) { + prev_token_info.byte_offset() != static_cast(position) - 1) { [[clang::musttail]] return LexStringLiteral(source_text, position); } CARBON_DCHECK(buffer_.value_stores_->identifiers().Get( - prev_token_info.ident_id) == "r"); + prev_token_info.ident_id()) == "r"); // Take the valid characters off the front of the source buffer. llvm::StringRef identifier_text = @@ -1222,8 +1261,8 @@ auto Lexer::LexHash(llvm::StringRef source_text, ssize_t& position) // Replace the `r` identifier's value with the raw identifier. // TODO: This token doesn't carry any indicator that it's raw, so // diagnostics are unclear. - prev_token_info.ident_id = - buffer_.value_stores_->identifiers().Add(identifier_text); + prev_token_info.set_ident_id( + buffer_.value_stores_->identifiers().Add(identifier_text)); return LexResult(TokenIndex(buffer_.token_infos_.size() - 1)); } @@ -1253,10 +1292,8 @@ auto Lexer::LexError(llvm::StringRef source_text, ssize_t& position) error_text = source_text.substr(position, 1); } - auto token = buffer_.AddToken( - {.kind = TokenKind::Error, - .byte_offset = static_cast(position), - .error_length = static_cast(error_text.size())}); + auto token = + LexTokenWithPayload(TokenKind::Error, error_text.size(), position); CARBON_DIAGNOSTIC(UnrecognizedCharacters, Error, "Encountered unrecognized characters while parsing."); emitter_.Emit(error_text.begin(), UnrecognizedCharacters); @@ -1270,11 +1307,11 @@ auto Lexer::LexFileStart(llvm::StringRef source_text, ssize_t& position) CARBON_CHECK(position == 0); // Before lexing any source text, add the start-of-file token so that code - // can assume a non-empty token buffer for the rest of lexing. Note that the - // start-of-file always has trailing space because it *is* whitespace. - buffer_.AddToken({.kind = TokenKind::FileStart, - .has_trailing_space = true, - .byte_offset = 0}); + // can assume a non-empty token buffer for the rest of lexing. + LexToken(TokenKind::FileStart, 0); + + // The file start also represents whitespace. + NoteWhitespace(); // Also skip any horizontal whitespace and record the indentation of the // first line. @@ -1300,8 +1337,7 @@ auto Lexer::LexFileEnd(llvm::StringRef source_text, ssize_t position) -> void { // The end-of-file token is always considered to be whitespace. NoteWhitespace(); - buffer_.AddToken({.kind = TokenKind::FileEnd, - .byte_offset = static_cast(position)}); + LexToken(TokenKind::FileEnd, position); // If we had any mismatched brackets, issue diagnostics and fix them. if (has_mismatched_brackets_ || !open_groups_.empty()) { @@ -1325,30 +1361,35 @@ class Lexer::ErrorRecoveryBuffer { auto InsertBefore(TokenIndex insert_before, TokenKind kind) -> void { CARBON_CHECK(insert_before.index > 0) << "Cannot insert before the start of file token."; + CARBON_CHECK(insert_before.index < + static_cast(buffer_.token_infos_.size())) + << "Cannot insert after the end of file token."; CARBON_CHECK(new_tokens_.empty() || new_tokens_.back().first <= insert_before) << "Insertions performed out of order."; + // If the `insert_before` token has leading whitespace, mark the + // inserted token as also having leading whitespace. This avoids changing + // whether the prior tokens had leading or trailing whitespace when + // inserting. + bool insert_leading_space = buffer_.HasLeadingWhitespace(insert_before); + // Find the end of the token before the target token, and add the new token // there. TokenIndex insert_after(insert_before.index - 1); const auto& prev_info = buffer_.GetTokenInfo(insert_after); int32_t byte_offset = - prev_info.byte_offset + buffer_.GetTokenText(insert_after).size(); + prev_info.byte_offset() + buffer_.GetTokenText(insert_after).size(); new_tokens_.push_back( - {insert_before, - {.kind = kind, - .has_trailing_space = buffer_.HasTrailingWhitespace(insert_after), - .is_recovery = true, - .byte_offset = byte_offset}}); + {insert_before, TokenInfo(kind, insert_leading_space, byte_offset)}); } // Replace the given token with an error token. We do this immediately, // because we don't benefit from buffering it. auto ReplaceWithError(TokenIndex token) -> void { auto& token_info = buffer_.GetTokenInfo(token); - token_info.error_length = buffer_.GetTokenText(token).size(); - token_info.kind = TokenKind::Error; + int error_length = buffer_.GetTokenText(token).size(); + token_info.ResetAsError(error_length); any_error_tokens_ = true; } @@ -1356,13 +1397,16 @@ class Lexer::ErrorRecoveryBuffer { auto Apply() -> void { auto old_tokens = std::move(buffer_.token_infos_); buffer_.token_infos_.clear(); - buffer_.token_infos_.reserve(old_tokens.size() + new_tokens_.size()); + int new_size = old_tokens.size() + new_tokens_.size(); + buffer_.token_infos_.reserve(new_size); + buffer_.recovery_tokens_.resize(new_size); int old_tokens_offset = 0; for (auto [next_offset, info] : new_tokens_) { buffer_.token_infos_.append(old_tokens.begin() + old_tokens_offset, old_tokens.begin() + next_offset.index); buffer_.AddToken(info); + buffer_.recovery_tokens_.set(next_offset.index); old_tokens_offset = next_offset.index; } buffer_.token_infos_.append(old_tokens.begin() + old_tokens_offset, @@ -1382,13 +1426,13 @@ class Lexer::ErrorRecoveryBuffer { CARBON_CHECK(!open_groups.empty()) << "Failed to balance brackets"; auto opening_token = open_groups.pop_back_val(); - CARBON_CHECK(kind == - buffer_.GetTokenInfo(opening_token).kind.closing_symbol()) + CARBON_CHECK( + kind == buffer_.GetTokenInfo(opening_token).kind().closing_symbol()) << "Failed to balance brackets"; auto& opening_token_info = buffer_.GetTokenInfo(opening_token); auto& closing_token_info = buffer_.GetTokenInfo(token); - opening_token_info.closing_token = token; - closing_token_info.opening_token = opening_token; + opening_token_info.set_closing_token_index(token); + closing_token_info.set_opening_token_index(opening_token); } } } @@ -1451,7 +1495,7 @@ auto Lexer::DiagnoseAndFixMismatchedBrackets() -> void { auto opening_it = std::find_if( open_groups_.rbegin(), open_groups_.rend(), [&](TokenIndex opening_token) { - return buffer_.GetTokenInfo(opening_token).kind.closing_symbol() == + return buffer_.GetTokenInfo(opening_token).kind().closing_symbol() == kind; }); if (opening_it == open_groups_.rend()) { diff --git a/toolchain/lex/testdata/basic_syntax.carbon b/toolchain/lex/testdata/basic_syntax.carbon index 17e473305f655..4a68514ca1df6 100644 --- a/toolchain/lex/testdata/basic_syntax.carbon +++ b/toolchain/lex/testdata/basic_syntax.carbon @@ -9,22 +9,22 @@ // TIP: bazel run //toolchain/testing:file_test -- --dump_output --file_tests=toolchain/lex/testdata/basic_syntax.carbon // CHECK:STDOUT: - filename: basic_syntax.carbon // CHECK:STDOUT: tokens: [ -// CHECK:STDOUT: { index: 0, kind: 'FileStart', line: {{ *\d+}}, column: 1, indent: 1, spelling: '', has_trailing_space: true }, +// CHECK:STDOUT: { index: 0, kind: 'FileStart', line: {{ *\d+}}, column: 1, indent: 1, spelling: '' }, fn run(String program) { -// CHECK:STDOUT: { index: 1, kind: 'Fn', line: {{ *}}[[@LINE-1]], column: 1, indent: 1, spelling: 'fn', has_trailing_space: true }, -// CHECK:STDOUT: { index: 2, kind: 'Identifier', line: {{ *}}[[@LINE-2]], column: 4, indent: 1, spelling: 'run', identifier: 0 }, +// CHECK:STDOUT: { index: 1, kind: 'Fn', line: {{ *}}[[@LINE-1]], column: 1, indent: 1, spelling: 'fn', has_leading_space: true }, +// CHECK:STDOUT: { index: 2, kind: 'Identifier', line: {{ *}}[[@LINE-2]], column: 4, indent: 1, spelling: 'run', identifier: 0, has_leading_space: true }, // CHECK:STDOUT: { index: 3, kind: 'OpenParen', line: {{ *}}[[@LINE-3]], column: 7, indent: 1, spelling: '(', closing_token: 6 }, -// CHECK:STDOUT: { index: 4, kind: 'StringTypeLiteral', line: {{ *}}[[@LINE-4]], column: 8, indent: 1, spelling: 'String', has_trailing_space: true }, -// CHECK:STDOUT: { index: 5, kind: 'Identifier', line: {{ *}}[[@LINE-5]], column: 15, indent: 1, spelling: 'program', identifier: 1 }, -// CHECK:STDOUT: { index: 6, kind: 'CloseParen', line: {{ *}}[[@LINE-6]], column: 22, indent: 1, spelling: ')', opening_token: 3, has_trailing_space: true }, -// CHECK:STDOUT: { index: 7, kind: 'OpenCurlyBrace', line: {{ *}}[[@LINE-7]], column: 24, indent: 1, spelling: '{', closing_token: 11, has_trailing_space: true }, +// CHECK:STDOUT: { index: 4, kind: 'StringTypeLiteral', line: {{ *}}[[@LINE-4]], column: 8, indent: 1, spelling: 'String' }, +// CHECK:STDOUT: { index: 5, kind: 'Identifier', line: {{ *}}[[@LINE-5]], column: 15, indent: 1, spelling: 'program', identifier: 1, has_leading_space: true }, +// CHECK:STDOUT: { index: 6, kind: 'CloseParen', line: {{ *}}[[@LINE-6]], column: 22, indent: 1, spelling: ')', opening_token: 3 }, +// CHECK:STDOUT: { index: 7, kind: 'OpenCurlyBrace', line: {{ *}}[[@LINE-7]], column: 24, indent: 1, spelling: '{', closing_token: 11, has_leading_space: true }, return True; - // CHECK:STDOUT: { index: 8, kind: 'Return', line: {{ *}}[[@LINE-1]], column: 3, indent: 3, spelling: 'return', has_trailing_space: true }, - // CHECK:STDOUT: { index: 9, kind: 'Identifier', line: {{ *}}[[@LINE-2]], column: 10, indent: 3, spelling: 'True', identifier: 2 }, - // CHECK:STDOUT: { index: 10, kind: 'Semi', line: {{ *}}[[@LINE-3]], column: 14, indent: 3, spelling: ';', has_trailing_space: true }, + // CHECK:STDOUT: { index: 8, kind: 'Return', line: {{ *}}[[@LINE-1]], column: 3, indent: 3, spelling: 'return', has_leading_space: true }, + // CHECK:STDOUT: { index: 9, kind: 'Identifier', line: {{ *}}[[@LINE-2]], column: 10, indent: 3, spelling: 'True', identifier: 2, has_leading_space: true }, + // CHECK:STDOUT: { index: 10, kind: 'Semi', line: {{ *}}[[@LINE-3]], column: 14, indent: 3, spelling: ';' }, } -// CHECK:STDOUT: { index: 11, kind: 'CloseCurlyBrace', line: {{ *}}[[@LINE-1]], column: 1, indent: 1, spelling: '}', opening_token: 7, has_trailing_space: true }, +// CHECK:STDOUT: { index: 11, kind: 'CloseCurlyBrace', line: {{ *}}[[@LINE-1]], column: 1, indent: 1, spelling: '}', opening_token: 7, has_leading_space: true }, -// CHECK:STDOUT: { index: 12, kind: 'FileEnd', line: {{ *}}[[@LINE+1]], column: {{ *\d+}}, indent: 1, spelling: '' }, +// CHECK:STDOUT: { index: 12, kind: 'FileEnd', line: {{ *}}[[@LINE+1]], column: {{ *\d+}}, indent: 1, spelling: '', has_leading_space: true }, // CHECK:STDOUT: ] diff --git a/toolchain/lex/testdata/fail_bad_comment_introducers.carbon b/toolchain/lex/testdata/fail_bad_comment_introducers.carbon index 1f33ee0a40003..b9e648fb2c47c 100644 --- a/toolchain/lex/testdata/fail_bad_comment_introducers.carbon +++ b/toolchain/lex/testdata/fail_bad_comment_introducers.carbon @@ -9,7 +9,7 @@ // TIP: bazel run //toolchain/testing:file_test -- --dump_output --file_tests=toolchain/lex/testdata/fail_bad_comment_introducers.carbon // CHECK:STDOUT: - filename: fail_bad_comment_introducers.carbon // CHECK:STDOUT: tokens: [ -// CHECK:STDOUT: { index: 0, kind: 'FileStart', line: {{ *\d+}}, column: 1, indent: 1, spelling: '', has_trailing_space: true }, +// CHECK:STDOUT: { index: 0, kind: 'FileStart', line: {{ *\d+}}, column: 1, indent: 1, spelling: '' }, // // Comments have to have whitespace after `//` currently. @@ -58,5 +58,5 @@ // An extra un-indented comment line to anchor the end of the file checks. -// CHECK:STDOUT: { index: 1, kind: 'FileEnd', line: {{ *}}[[@LINE+1]], column: {{ *\d+}}, indent: 1, spelling: '' }, +// CHECK:STDOUT: { index: 1, kind: 'FileEnd', line: {{ *}}[[@LINE+1]], column: {{ *\d+}}, indent: 1, spelling: '', has_leading_space: true }, // CHECK:STDOUT: ] diff --git a/toolchain/lex/testdata/fail_bad_comment_introducers_mid_block_indent_change.carbon b/toolchain/lex/testdata/fail_bad_comment_introducers_mid_block_indent_change.carbon index e460b90b1ac8a..258180b29bd68 100644 --- a/toolchain/lex/testdata/fail_bad_comment_introducers_mid_block_indent_change.carbon +++ b/toolchain/lex/testdata/fail_bad_comment_introducers_mid_block_indent_change.carbon @@ -36,7 +36,7 @@ // CHECK:STDERR: ^ // CHECK:STDOUT: - filename: fail_bad_comment_introducers_mid_block_indent_change.carbon // CHECK:STDOUT: tokens: [ -// CHECK:STDOUT: { index: 0, kind: 'FileStart', line: {{ *\d+}}, column: 1, indent: 1, spelling: '', has_trailing_space: true }, +// CHECK:STDOUT: { index: 0, kind: 'FileStart', line: {{ *\d+}}, column: 1, indent: 1, spelling: '' }, -// CHECK:STDOUT: { index: 1, kind: 'FileEnd', line: {{ *}}[[@LINE+1]], column: {{ *\d+}}, indent: 1, spelling: '' }, +// CHECK:STDOUT: { index: 1, kind: 'FileEnd', line: {{ *}}[[@LINE+1]], column: {{ *\d+}}, indent: 1, spelling: '', has_leading_space: true }, // CHECK:STDOUT: ] diff --git a/toolchain/lex/testdata/fail_bad_raw_identifier.carbon b/toolchain/lex/testdata/fail_bad_raw_identifier.carbon index f8207f213b5ca..ce369822d2d3c 100644 --- a/toolchain/lex/testdata/fail_bad_raw_identifier.carbon +++ b/toolchain/lex/testdata/fail_bad_raw_identifier.carbon @@ -11,7 +11,7 @@ // --- fail_bad_raw_identifier.carbon // CHECK:STDOUT: - filename: fail_bad_raw_identifier.carbon // CHECK:STDOUT: tokens: [ -// CHECK:STDOUT: { index: 0, kind: 'FileStart', line: {{ *\d+}}, column: 1, indent: 1, spelling: '', has_trailing_space: true }, +// CHECK:STDOUT: { index: 0, kind: 'FileStart', line: {{ *\d+}}, column: 1, indent: 1, spelling: '' }, // Missing the character after `#`. @@ -20,8 +20,8 @@ // CHECK:STDERR: ^ // CHECK:STDERR: r# -// CHECK:STDOUT: { index: 1, kind: 'Identifier', line: {{ *}}[[@LINE-1]], column: 1, indent: 1, spelling: 'r', identifier: 0 }, -// CHECK:STDOUT: { index: 2, kind: 'Error', line: {{ *}}[[@LINE-2]], column: 2, indent: 1, spelling: '#', has_trailing_space: true }, +// CHECK:STDOUT: { index: 1, kind: 'Identifier', line: {{ *}}[[@LINE-1]], column: 1, indent: 1, spelling: 'r', identifier: 0, has_leading_space: true }, +// CHECK:STDOUT: { index: 2, kind: 'Error', line: {{ *}}[[@LINE-2]], column: 2, indent: 1, spelling: '#' }, // Not a valid identifier. // CHECK:STDERR: fail_bad_raw_identifier.carbon:[[@LINE+4]]:2: ERROR: Encountered unrecognized characters while parsing. @@ -29,9 +29,9 @@ r# // CHECK:STDERR: ^ // CHECK:STDERR: r#3 -// CHECK:STDOUT: { index: 3, kind: 'Identifier', line: {{ *}}[[@LINE-1]], column: 1, indent: 1, spelling: 'r', identifier: 0 }, +// CHECK:STDOUT: { index: 3, kind: 'Identifier', line: {{ *}}[[@LINE-1]], column: 1, indent: 1, spelling: 'r', identifier: 0, has_leading_space: true }, // CHECK:STDOUT: { index: 4, kind: 'Error', line: {{ *}}[[@LINE-2]], column: 2, indent: 1, spelling: '#' }, -// CHECK:STDOUT: { index: 5, kind: 'IntLiteral', line: {{ *}}[[@LINE-3]], column: 3, indent: 1, spelling: '3', value: `3`, has_trailing_space: true }, +// CHECK:STDOUT: { index: 5, kind: 'IntLiteral', line: {{ *}}[[@LINE-3]], column: 3, indent: 1, spelling: '3', value: `3` }, // Non ascii start to identifier. // CHECK:STDERR: fail_bad_raw_identifier.carbon:[[@LINE+4]]:2: ERROR: Encountered unrecognized characters while parsing. @@ -39,8 +39,8 @@ r#3 // CHECK:STDERR: ^ // CHECK:STDERR: r#á -// CHECK:STDOUT: { index: 6, kind: 'Identifier', line: {{ *}}[[@LINE-1]], column: 1, indent: 1, spelling: 'r', identifier: 0 }, -// CHECK:STDOUT: { index: 7, kind: 'Error', line: {{ *}}[[@LINE-2]], column: 2, indent: 1, spelling: '#á', has_trailing_space: true }, +// CHECK:STDOUT: { index: 6, kind: 'Identifier', line: {{ *}}[[@LINE-1]], column: 1, indent: 1, spelling: 'r', identifier: 0, has_leading_space: true }, +// CHECK:STDOUT: { index: 7, kind: 'Error', line: {{ *}}[[@LINE-2]], column: 2, indent: 1, spelling: '#á' }, // Raw `r` identifier doesn't start a second raw identifier. // CHECK:STDERR: fail_bad_raw_identifier.carbon:[[@LINE+4]]:4: ERROR: Encountered unrecognized characters while parsing. @@ -48,9 +48,9 @@ r#á // CHECK:STDERR: ^ // CHECK:STDERR: r#r#foo -// CHECK:STDOUT: { index: 8, kind: 'Identifier', line: {{ *}}[[@LINE-1]], column: 1, indent: 1, spelling: 'r', identifier: 0 }, +// CHECK:STDOUT: { index: 8, kind: 'Identifier', line: {{ *}}[[@LINE-1]], column: 1, indent: 1, spelling: 'r', identifier: 0, has_leading_space: true }, // CHECK:STDOUT: { index: 9, kind: 'Error', line: {{ *}}[[@LINE-2]], column: 4, indent: 1, spelling: '#' }, -// CHECK:STDOUT: { index: 10, kind: 'Identifier', line: {{ *}}[[@LINE-3]], column: 5, indent: 1, spelling: 'foo', identifier: 1, has_trailing_space: true }, +// CHECK:STDOUT: { index: 10, kind: 'Identifier', line: {{ *}}[[@LINE-3]], column: 5, indent: 1, spelling: 'foo', identifier: 1 }, // Other identifier characters don't start a raw identifier. // CHECK:STDERR: fail_bad_raw_identifier.carbon:[[@LINE+4]]:2: ERROR: Encountered unrecognized characters while parsing. @@ -58,9 +58,9 @@ r#r#foo // CHECK:STDERR: ^ // CHECK:STDERR: s#foo -// CHECK:STDOUT: { index: 11, kind: 'Identifier', line: {{ *}}[[@LINE-1]], column: 1, indent: 1, spelling: 's', identifier: 2 }, +// CHECK:STDOUT: { index: 11, kind: 'Identifier', line: {{ *}}[[@LINE-1]], column: 1, indent: 1, spelling: 's', identifier: 2, has_leading_space: true }, // CHECK:STDOUT: { index: 12, kind: 'Error', line: {{ *}}[[@LINE-2]], column: 2, indent: 1, spelling: '#' }, -// CHECK:STDOUT: { index: 13, kind: 'Identifier', line: {{ *}}[[@LINE-3]], column: 3, indent: 1, spelling: 'foo', identifier: 1, has_trailing_space: true }, +// CHECK:STDOUT: { index: 13, kind: 'Identifier', line: {{ *}}[[@LINE-3]], column: 3, indent: 1, spelling: 'foo', identifier: 1 }, // Identifier ending in `r` doesn't start a raw identifier. // CHECK:STDERR: fail_bad_raw_identifier.carbon:[[@LINE+4]]:4: ERROR: Encountered unrecognized characters while parsing. @@ -68,9 +68,9 @@ s#foo // CHECK:STDERR: ^ // CHECK:STDERR: arr#foo -// CHECK:STDOUT: { index: 14, kind: 'Identifier', line: {{ *}}[[@LINE-1]], column: 1, indent: 1, spelling: 'arr', identifier: 3 }, +// CHECK:STDOUT: { index: 14, kind: 'Identifier', line: {{ *}}[[@LINE-1]], column: 1, indent: 1, spelling: 'arr', identifier: 3, has_leading_space: true }, // CHECK:STDOUT: { index: 15, kind: 'Error', line: {{ *}}[[@LINE-2]], column: 4, indent: 1, spelling: '#' }, -// CHECK:STDOUT: { index: 16, kind: 'Identifier', line: {{ *}}[[@LINE-3]], column: 5, indent: 1, spelling: 'foo', identifier: 1, has_trailing_space: true }, +// CHECK:STDOUT: { index: 16, kind: 'Identifier', line: {{ *}}[[@LINE-3]], column: 5, indent: 1, spelling: 'foo', identifier: 1 }, // Whitespace between `r` and `#` isn't allowed. // CHECK:STDERR: fail_bad_raw_identifier.carbon:[[@LINE+4]]:3: ERROR: Encountered unrecognized characters while parsing. @@ -78,29 +78,29 @@ arr#foo // CHECK:STDERR: ^ // CHECK:STDERR: r #foo -// CHECK:STDOUT: { index: 17, kind: 'Identifier', line: {{ *}}[[@LINE-1]], column: 1, indent: 1, spelling: 'r', identifier: 0, has_trailing_space: true }, -// CHECK:STDOUT: { index: 18, kind: 'Error', line: {{ *}}[[@LINE-2]], column: 3, indent: 1, spelling: '#' }, -// CHECK:STDOUT: { index: 19, kind: 'Identifier', line: {{ *}}[[@LINE-3]], column: 4, indent: 1, spelling: 'foo', identifier: 1, has_trailing_space: true }, +// CHECK:STDOUT: { index: 17, kind: 'Identifier', line: {{ *}}[[@LINE-1]], column: 1, indent: 1, spelling: 'r', identifier: 0, has_leading_space: true }, +// CHECK:STDOUT: { index: 18, kind: 'Error', line: {{ *}}[[@LINE-2]], column: 3, indent: 1, spelling: '#', has_leading_space: true }, +// CHECK:STDOUT: { index: 19, kind: 'Identifier', line: {{ *}}[[@LINE-3]], column: 4, indent: 1, spelling: 'foo', identifier: 1 }, // This is an `r` identifier followed by a string literal. r#"hello"# -// CHECK:STDOUT: { index: 20, kind: 'Identifier', line: {{ *}}[[@LINE-1]], column: 1, indent: 1, spelling: 'r', identifier: 0 }, -// CHECK:STDOUT: { index: 21, kind: 'StringLiteral', line: {{ *}}[[@LINE-2]], column: 2, indent: 1, spelling: '#"hello"#', value: `hello`, has_trailing_space: true }, +// CHECK:STDOUT: { index: 20, kind: 'Identifier', line: {{ *}}[[@LINE-1]], column: 1, indent: 1, spelling: 'r', identifier: 0, has_leading_space: true }, +// CHECK:STDOUT: { index: 21, kind: 'StringLiteral', line: {{ *}}[[@LINE-2]], column: 2, indent: 1, spelling: '#"hello"#', value: `hello` }, -// CHECK:STDOUT: { index: 22, kind: 'FileEnd', line: {{ *}}[[@LINE+1]], column: {{ *\d+}}, indent: 1, spelling: '' }, +// CHECK:STDOUT: { index: 22, kind: 'FileEnd', line: {{ *}}[[@LINE+1]], column: {{ *\d+}}, indent: 1, spelling: '', has_leading_space: true }, // CHECK:STDOUT: ] // --- fail_hash_at_start_of_file.carbon // CHECK:STDOUT: - filename: fail_hash_at_start_of_file.carbon // CHECK:STDOUT: tokens: [ -// CHECK:STDOUT: { index: 0, kind: 'FileStart', line: {{ *\d+}}, column: 1, indent: 1, spelling: '', has_trailing_space: true }, +// CHECK:STDOUT: { index: 0, kind: 'FileStart', line: {{ *\d+}}, column: 1, indent: 1, spelling: '' }, // Ensure that we correctly handle a `#` as the first token in the file. // CHECK:STDERR: fail_hash_at_start_of_file.carbon:[[@LINE+3]]:1: ERROR: Encountered unrecognized characters while parsing. // CHECK:STDERR: #foo // CHECK:STDERR: ^ #foo -// CHECK:STDOUT: { index: 1, kind: 'Error', line: {{ *}}[[@LINE-1]], column: 1, indent: 1, spelling: '#' }, -// CHECK:STDOUT: { index: 2, kind: 'Identifier', line: {{ *}}[[@LINE-2]], column: 2, indent: 1, spelling: 'foo', identifier: 0, has_trailing_space: true }, +// CHECK:STDOUT: { index: 1, kind: 'Error', line: {{ *}}[[@LINE-1]], column: 1, indent: 1, spelling: '#', has_leading_space: true }, +// CHECK:STDOUT: { index: 2, kind: 'Identifier', line: {{ *}}[[@LINE-2]], column: 2, indent: 1, spelling: 'foo', identifier: 0 }, -// CHECK:STDOUT: { index: 3, kind: 'FileEnd', line: {{ *}}[[@LINE+1]], column: {{ *\d+}}, indent: 1, spelling: '' }, +// CHECK:STDOUT: { index: 3, kind: 'FileEnd', line: {{ *}}[[@LINE+1]], column: {{ *\d+}}, indent: 1, spelling: '', has_leading_space: true }, // CHECK:STDOUT: ] diff --git a/toolchain/lex/testdata/fail_block_string_second_line.carbon b/toolchain/lex/testdata/fail_block_string_second_line.carbon index 94d78eb6d1c52..01caf32c49eac 100644 --- a/toolchain/lex/testdata/fail_block_string_second_line.carbon +++ b/toolchain/lex/testdata/fail_block_string_second_line.carbon @@ -18,14 +18,14 @@ var s: String = ''' // CHECK:STDERR: ^ // CHECK:STDOUT: - filename: fail_block_string_second_line.carbon // CHECK:STDOUT: tokens: [ -// CHECK:STDOUT: { index: 0, kind: 'FileStart', line: {{ *\d+}}, column: 1, indent: 1, spelling: '', has_trailing_space: true }, -// CHECK:STDOUT: { index: 1, kind: 'Var', line: {{ *}}[[@LINE-17]], column: 1, indent: 1, spelling: 'var', has_trailing_space: true }, -// CHECK:STDOUT: { index: 2, kind: 'Identifier', line: {{ *}}[[@LINE-18]], column: 5, indent: 1, spelling: 's', identifier: 0 }, -// CHECK:STDOUT: { index: 3, kind: 'Colon', line: {{ *}}[[@LINE-19]], column: 6, indent: 1, spelling: ':', has_trailing_space: true }, -// CHECK:STDOUT: { index: 4, kind: 'StringTypeLiteral', line: {{ *}}[[@LINE-20]], column: 8, indent: 1, spelling: 'String', has_trailing_space: true }, -// CHECK:STDOUT: { index: 5, kind: 'Equal', line: {{ *}}[[@LINE-21]], column: 15, indent: 1, spelling: '=', has_trailing_space: true }, +// CHECK:STDOUT: { index: 0, kind: 'FileStart', line: {{ *\d+}}, column: 1, indent: 1, spelling: '' }, +// CHECK:STDOUT: { index: 1, kind: 'Var', line: {{ *}}[[@LINE-17]], column: 1, indent: 1, spelling: 'var', has_leading_space: true }, +// CHECK:STDOUT: { index: 2, kind: 'Identifier', line: {{ *}}[[@LINE-18]], column: 5, indent: 1, spelling: 's', identifier: 0, has_leading_space: true }, +// CHECK:STDOUT: { index: 3, kind: 'Colon', line: {{ *}}[[@LINE-19]], column: 6, indent: 1, spelling: ':' }, +// CHECK:STDOUT: { index: 4, kind: 'StringTypeLiteral', line: {{ *}}[[@LINE-20]], column: 8, indent: 1, spelling: 'String', has_leading_space: true }, +// CHECK:STDOUT: { index: 5, kind: 'Equal', line: {{ *}}[[@LINE-21]], column: 15, indent: 1, spelling: '=', has_leading_space: true }, // CHECK:STDOUT: { index: 6, kind: 'StringLiteral', line: {{ *}}[[@LINE-22]], column: 17, indent: 1, spelling: '''' -// CHECK:STDOUT: error here: '''', value: `error here: `, has_trailing_space: true }, +// CHECK:STDOUT: error here: '''', value: `error here: `, has_leading_space: true }, -// CHECK:STDOUT: { index: 7, kind: 'FileEnd', line: {{ *}}[[@LINE+1]], column: {{ *\d+}}, indent: 1, spelling: '' }, +// CHECK:STDOUT: { index: 7, kind: 'FileEnd', line: {{ *}}[[@LINE+1]], column: {{ *\d+}}, indent: 1, spelling: '', has_leading_space: true }, // CHECK:STDOUT: ] diff --git a/toolchain/lex/testdata/fail_mismatched_brackets.carbon b/toolchain/lex/testdata/fail_mismatched_brackets.carbon index 4f0a30c914ba8..b65746fba8e4f 100644 --- a/toolchain/lex/testdata/fail_mismatched_brackets.carbon +++ b/toolchain/lex/testdata/fail_mismatched_brackets.carbon @@ -9,29 +9,29 @@ // TIP: bazel run //toolchain/testing:file_test -- --dump_output --file_tests=toolchain/lex/testdata/fail_mismatched_brackets.carbon // CHECK:STDOUT: - filename: fail_mismatched_brackets.carbon // CHECK:STDOUT: tokens: [ -// CHECK:STDOUT: { index: 0, kind: 'FileStart', line: {{ *\d+}}, column: 1, indent: 1, spelling: '', has_trailing_space: true }, +// CHECK:STDOUT: { index: 0, kind: 'FileStart', line: {{ *\d+}}, column: 1, indent: 1, spelling: '' }, // CHECK:STDERR: fail_mismatched_brackets.carbon:[[@LINE+4]]:1: ERROR: Closing symbol without a corresponding opening symbol. // CHECK:STDERR: } // CHECK:STDERR: ^ // CHECK:STDERR: } -// CHECK:STDOUT: { index: 1, kind: 'Error', line: {{ *}}[[@LINE-1]], column: 1, indent: 1, spelling: '}', has_trailing_space: true }, +// CHECK:STDOUT: { index: 1, kind: 'Error', line: {{ *}}[[@LINE-1]], column: 1, indent: 1, spelling: '}', has_leading_space: true }, // CHECK:STDERR: fail_mismatched_brackets.carbon:[[@LINE+4]]:3: ERROR: Closing symbol without a corresponding opening symbol. // CHECK:STDERR: ( } ) // CHECK:STDERR: ^ // CHECK:STDERR: ( } ) -// CHECK:STDOUT: { index: 2, kind: 'OpenParen', line: {{ *}}[[@LINE-1]], column: 1, indent: 1, spelling: '(', closing_token: 4, has_trailing_space: true }, -// CHECK:STDOUT: { index: 3, kind: 'Error', line: {{ *}}[[@LINE-2]], column: 3, indent: 1, spelling: '}', has_trailing_space: true }, -// CHECK:STDOUT: { index: 4, kind: 'CloseParen', line: {{ *}}[[@LINE-3]], column: 5, indent: 1, spelling: ')', opening_token: 2, has_trailing_space: true }, +// CHECK:STDOUT: { index: 2, kind: 'OpenParen', line: {{ *}}[[@LINE-1]], column: 1, indent: 1, spelling: '(', closing_token: 4, has_leading_space: true }, +// CHECK:STDOUT: { index: 3, kind: 'Error', line: {{ *}}[[@LINE-2]], column: 3, indent: 1, spelling: '}', has_leading_space: true }, +// CHECK:STDOUT: { index: 4, kind: 'CloseParen', line: {{ *}}[[@LINE-3]], column: 5, indent: 1, spelling: ')', opening_token: 2, has_leading_space: true }, // CHECK:STDERR: fail_mismatched_brackets.carbon:[[@LINE+3]]:1: ERROR: Opening symbol without a corresponding closing symbol. // CHECK:STDERR: [ // CHECK:STDERR: ^ [ -// CHECK:STDOUT: { index: 5, kind: 'Error', line: {{ *}}[[@LINE-1]], column: 1, indent: 1, spelling: '[', has_trailing_space: true }, +// CHECK:STDOUT: { index: 5, kind: 'Error', line: {{ *}}[[@LINE-1]], column: 1, indent: 1, spelling: '[', has_leading_space: true }, -// CHECK:STDOUT: { index: 6, kind: 'FileEnd', line: {{ *}}[[@LINE+1]], column: {{ *\d+}}, indent: 1, spelling: '' }, +// CHECK:STDOUT: { index: 6, kind: 'FileEnd', line: {{ *}}[[@LINE+1]], column: {{ *\d+}}, indent: 1, spelling: '', has_leading_space: true }, // CHECK:STDOUT: ] diff --git a/toolchain/lex/testdata/fail_mismatched_brackets_2.carbon b/toolchain/lex/testdata/fail_mismatched_brackets_2.carbon index b501b90340df8..2af5cdaa99629 100644 --- a/toolchain/lex/testdata/fail_mismatched_brackets_2.carbon +++ b/toolchain/lex/testdata/fail_mismatched_brackets_2.carbon @@ -23,20 +23,20 @@ fn F() { // CHECK:STDERR: ^ // CHECK:STDOUT: - filename: fail_mismatched_brackets_2.carbon // CHECK:STDOUT: tokens: [ -// CHECK:STDOUT: { index: 0, kind: 'FileStart', line: {{ *\d+}}, column: 1, indent: 1, spelling: '', has_trailing_space: true }, -// CHECK:STDOUT: { index: 1, kind: 'Fn', line: {{ *}}[[@LINE-19]], column: 1, indent: 1, spelling: 'fn', has_trailing_space: true }, -// CHECK:STDOUT: { index: 2, kind: 'Identifier', line: {{ *}}[[@LINE-20]], column: 4, indent: 1, spelling: 'F', identifier: 0 }, +// CHECK:STDOUT: { index: 0, kind: 'FileStart', line: {{ *\d+}}, column: 1, indent: 1, spelling: '' }, +// CHECK:STDOUT: { index: 1, kind: 'Fn', line: {{ *}}[[@LINE-19]], column: 1, indent: 1, spelling: 'fn', has_leading_space: true }, +// CHECK:STDOUT: { index: 2, kind: 'Identifier', line: {{ *}}[[@LINE-20]], column: 4, indent: 1, spelling: 'F', identifier: 0, has_leading_space: true }, // CHECK:STDOUT: { index: 3, kind: 'OpenParen', line: {{ *}}[[@LINE-21]], column: 5, indent: 1, spelling: '(', closing_token: 4 }, -// CHECK:STDOUT: { index: 4, kind: 'CloseParen', line: {{ *}}[[@LINE-22]], column: 6, indent: 1, spelling: ')', opening_token: 3, has_trailing_space: true }, -// CHECK:STDOUT: { index: 5, kind: 'OpenCurlyBrace', line: {{ *}}[[@LINE-23]], column: 8, indent: 1, spelling: '{', closing_token: 12, has_trailing_space: true }, -// CHECK:STDOUT: { index: 6, kind: 'If', line: {{ *}}[[@LINE-23]], column: 3, indent: 3, spelling: 'if', has_trailing_space: true }, -// CHECK:STDOUT: { index: 7, kind: 'OpenParen', line: {{ *}}[[@LINE-24]], column: 6, indent: 3, spelling: '(', closing_token: 9 }, +// CHECK:STDOUT: { index: 4, kind: 'CloseParen', line: {{ *}}[[@LINE-22]], column: 6, indent: 1, spelling: ')', opening_token: 3 }, +// CHECK:STDOUT: { index: 5, kind: 'OpenCurlyBrace', line: {{ *}}[[@LINE-23]], column: 8, indent: 1, spelling: '{', closing_token: 12, has_leading_space: true }, +// CHECK:STDOUT: { index: 6, kind: 'If', line: {{ *}}[[@LINE-23]], column: 3, indent: 3, spelling: 'if', has_leading_space: true }, +// CHECK:STDOUT: { index: 7, kind: 'OpenParen', line: {{ *}}[[@LINE-24]], column: 6, indent: 3, spelling: '(', closing_token: 9, has_leading_space: true }, // CHECK:STDOUT: { index: 8, kind: 'Identifier', line: {{ *}}[[@LINE-25]], column: 7, indent: 3, spelling: 'thing1', identifier: 1 }, -// CHECK:STDOUT: { index: 9, kind: 'CloseParen', line: {{ *}}[[@LINE-26]], column: 13, indent: 3, spelling: ')', opening_token: 7, has_trailing_space: true }, -// CHECK:STDOUT: { index: 10, kind: 'Identifier', line: {{ *}}[[@LINE-26]], column: 5, indent: 5, spelling: 'thing2', identifier: 2 }, -// CHECK:STDOUT: { index: 11, kind: 'Semi', line: {{ *}}[[@LINE-27]], column: 11, indent: 5, spelling: ';', has_trailing_space: true }, -// CHECK:STDOUT: { index: 12, kind: 'CloseCurlyBrace', line: {{ *}}[[@LINE-27]], column: 3, indent: 3, spelling: '}', opening_token: 5, has_trailing_space: true }, -// CHECK:STDOUT: { index: 13, kind: 'Error', line: {{ *}}[[@LINE-27]], column: 1, indent: 1, spelling: '}', has_trailing_space: true }, +// CHECK:STDOUT: { index: 9, kind: 'CloseParen', line: {{ *}}[[@LINE-26]], column: 13, indent: 3, spelling: ')', opening_token: 7 }, +// CHECK:STDOUT: { index: 10, kind: 'Identifier', line: {{ *}}[[@LINE-26]], column: 5, indent: 5, spelling: 'thing2', identifier: 2, has_leading_space: true }, +// CHECK:STDOUT: { index: 11, kind: 'Semi', line: {{ *}}[[@LINE-27]], column: 11, indent: 5, spelling: ';' }, +// CHECK:STDOUT: { index: 12, kind: 'CloseCurlyBrace', line: {{ *}}[[@LINE-27]], column: 3, indent: 3, spelling: '}', opening_token: 5, has_leading_space: true }, +// CHECK:STDOUT: { index: 13, kind: 'Error', line: {{ *}}[[@LINE-27]], column: 1, indent: 1, spelling: '}', has_leading_space: true }, -// CHECK:STDOUT: { index: 14, kind: 'FileEnd', line: {{ *}}[[@LINE+1]], column: {{ *\d+}}, indent: 1, spelling: '' }, +// CHECK:STDOUT: { index: 14, kind: 'FileEnd', line: {{ *}}[[@LINE+1]], column: {{ *\d+}}, indent: 1, spelling: '', has_leading_space: true }, // CHECK:STDOUT: ] diff --git a/toolchain/lex/testdata/fail_multifile.carbon b/toolchain/lex/testdata/fail_multifile.carbon index e3ab12f23edf4..644983a69acef 100644 --- a/toolchain/lex/testdata/fail_multifile.carbon +++ b/toolchain/lex/testdata/fail_multifile.carbon @@ -11,25 +11,25 @@ // --- fail_a.carbon // CHECK:STDOUT: - filename: fail_a.carbon // CHECK:STDOUT: tokens: [ -// CHECK:STDOUT: { index: 0, kind: 'FileStart', line: {{ *\d+}}, column: 1, indent: 1, spelling: '', has_trailing_space: true }, +// CHECK:STDOUT: { index: 0, kind: 'FileStart', line: {{ *\d+}}, column: 1, indent: 1, spelling: '' }, // CHECK:STDERR: fail_a.carbon:[[@LINE+4]]:3: ERROR: Empty digit sequence in numeric literal. // CHECK:STDERR: 1.a // CHECK:STDERR: ^ // CHECK:STDERR: 1.a -// CHECK:STDOUT: { index: 1, kind: 'Error', line: {{ *}}[[@LINE-1]], column: 1, indent: 1, spelling: '1.a', has_trailing_space: true }, +// CHECK:STDOUT: { index: 1, kind: 'Error', line: {{ *}}[[@LINE-1]], column: 1, indent: 1, spelling: '1.a', has_leading_space: true }, -// CHECK:STDOUT: { index: 2, kind: 'FileEnd', line: {{ *}}[[@LINE+1]], column: {{ *\d+}}, indent: 1, spelling: '' }, +// CHECK:STDOUT: { index: 2, kind: 'FileEnd', line: {{ *}}[[@LINE+1]], column: {{ *\d+}}, indent: 1, spelling: '', has_leading_space: true }, // CHECK:STDOUT: ] // --- fail_b.carbon // CHECK:STDOUT: - filename: fail_b.carbon // CHECK:STDOUT: tokens: [ -// CHECK:STDOUT: { index: 0, kind: 'FileStart', line: {{ *\d+}}, column: 1, indent: 1, spelling: '', has_trailing_space: true }, +// CHECK:STDOUT: { index: 0, kind: 'FileStart', line: {{ *\d+}}, column: 1, indent: 1, spelling: '' }, // CHECK:STDERR: fail_b.carbon:[[@LINE+3]]:3: ERROR: Empty digit sequence in numeric literal. // CHECK:STDERR: 2.b // CHECK:STDERR: ^ 2.b -// CHECK:STDOUT: { index: 1, kind: 'Error', line: {{ *}}[[@LINE-1]], column: 1, indent: 1, spelling: '2.b', has_trailing_space: true }, +// CHECK:STDOUT: { index: 1, kind: 'Error', line: {{ *}}[[@LINE-1]], column: 1, indent: 1, spelling: '2.b', has_leading_space: true }, -// CHECK:STDOUT: { index: 2, kind: 'FileEnd', line: {{ *}}[[@LINE+1]], column: {{ *\d+}}, indent: 1, spelling: '' }, +// CHECK:STDOUT: { index: 2, kind: 'FileEnd', line: {{ *}}[[@LINE+1]], column: {{ *\d+}}, indent: 1, spelling: '', has_leading_space: true }, // CHECK:STDOUT: ] diff --git a/toolchain/lex/testdata/fail_trailing_comments.carbon b/toolchain/lex/testdata/fail_trailing_comments.carbon index 096e5f751dcfd..77b27c5bcf04e 100644 --- a/toolchain/lex/testdata/fail_trailing_comments.carbon +++ b/toolchain/lex/testdata/fail_trailing_comments.carbon @@ -29,28 +29,28 @@ var c: i32 = 0.4; // still more trailing comment // CHECK:STDERR: ^ // CHECK:STDOUT: - filename: fail_trailing_comments.carbon // CHECK:STDOUT: tokens: [ -// CHECK:STDOUT: { index: 0, kind: 'FileStart', line: {{ *\d+}}, column: 1, indent: 1, spelling: '', has_trailing_space: true }, -// CHECK:STDOUT: { index: 1, kind: 'Var', line: {{ *}}[[@LINE-25]], column: 1, indent: 1, spelling: 'var', has_trailing_space: true }, -// CHECK:STDOUT: { index: 2, kind: 'Identifier', line: {{ *}}[[@LINE-26]], column: 5, indent: 1, spelling: 'a', identifier: 0 }, -// CHECK:STDOUT: { index: 3, kind: 'Colon', line: {{ *}}[[@LINE-27]], column: 6, indent: 1, spelling: ':', has_trailing_space: true }, -// CHECK:STDOUT: { index: 4, kind: 'IntTypeLiteral', line: {{ *}}[[@LINE-28]], column: 8, indent: 1, spelling: 'i32', has_trailing_space: true }, -// CHECK:STDOUT: { index: 5, kind: 'Equal', line: {{ *}}[[@LINE-29]], column: 12, indent: 1, spelling: '=', has_trailing_space: true }, -// CHECK:STDOUT: { index: 6, kind: 'IntLiteral', line: {{ *}}[[@LINE-30]], column: 14, indent: 1, spelling: '1', value: `1` }, -// CHECK:STDOUT: { index: 7, kind: 'Semi', line: {{ *}}[[@LINE-31]], column: 15, indent: 1, spelling: ';', has_trailing_space: true }, -// CHECK:STDOUT: { index: 8, kind: 'Var', line: {{ *}}[[@LINE-31]], column: 1, indent: 1, spelling: 'var', has_trailing_space: true }, -// CHECK:STDOUT: { index: 9, kind: 'Identifier', line: {{ *}}[[@LINE-32]], column: 5, indent: 1, spelling: 'b', identifier: 1 }, -// CHECK:STDOUT: { index: 10, kind: 'Colon', line: {{ *}}[[@LINE-33]], column: 6, indent: 1, spelling: ':', has_trailing_space: true }, -// CHECK:STDOUT: { index: 11, kind: 'IntLiteral', line: {{ *}}[[@LINE-34]], column: 8, indent: 1, spelling: '32', value: `32`, has_trailing_space: true }, -// CHECK:STDOUT: { index: 12, kind: 'Equal', line: {{ *}}[[@LINE-35]], column: 11, indent: 1, spelling: '=', has_trailing_space: true }, -// CHECK:STDOUT: { index: 13, kind: 'IntLiteral', line: {{ *}}[[@LINE-36]], column: 13, indent: 1, spelling: '13', value: `13` }, -// CHECK:STDOUT: { index: 14, kind: 'Semi', line: {{ *}}[[@LINE-37]], column: 15, indent: 1, spelling: ';', has_trailing_space: true }, -// CHECK:STDOUT: { index: 15, kind: 'Var', line: {{ *}}[[@LINE-37]], column: 1, indent: 1, spelling: 'var', has_trailing_space: true }, -// CHECK:STDOUT: { index: 16, kind: 'Identifier', line: {{ *}}[[@LINE-38]], column: 5, indent: 1, spelling: 'c', identifier: 2 }, -// CHECK:STDOUT: { index: 17, kind: 'Colon', line: {{ *}}[[@LINE-39]], column: 6, indent: 1, spelling: ':', has_trailing_space: true }, -// CHECK:STDOUT: { index: 18, kind: 'IntTypeLiteral', line: {{ *}}[[@LINE-40]], column: 8, indent: 1, spelling: 'i32', has_trailing_space: true }, -// CHECK:STDOUT: { index: 19, kind: 'Equal', line: {{ *}}[[@LINE-41]], column: 12, indent: 1, spelling: '=', has_trailing_space: true }, -// CHECK:STDOUT: { index: 20, kind: 'RealLiteral', line: {{ *}}[[@LINE-42]], column: 14, indent: 1, spelling: '0.4', value: `4*10^-1` }, -// CHECK:STDOUT: { index: 21, kind: 'Semi', line: {{ *}}[[@LINE-43]], column: 17, indent: 1, spelling: ';', has_trailing_space: true }, +// CHECK:STDOUT: { index: 0, kind: 'FileStart', line: {{ *\d+}}, column: 1, indent: 1, spelling: '' }, +// CHECK:STDOUT: { index: 1, kind: 'Var', line: {{ *}}[[@LINE-25]], column: 1, indent: 1, spelling: 'var', has_leading_space: true }, +// CHECK:STDOUT: { index: 2, kind: 'Identifier', line: {{ *}}[[@LINE-26]], column: 5, indent: 1, spelling: 'a', identifier: 0, has_leading_space: true }, +// CHECK:STDOUT: { index: 3, kind: 'Colon', line: {{ *}}[[@LINE-27]], column: 6, indent: 1, spelling: ':' }, +// CHECK:STDOUT: { index: 4, kind: 'IntTypeLiteral', line: {{ *}}[[@LINE-28]], column: 8, indent: 1, spelling: 'i32', has_leading_space: true }, +// CHECK:STDOUT: { index: 5, kind: 'Equal', line: {{ *}}[[@LINE-29]], column: 12, indent: 1, spelling: '=', has_leading_space: true }, +// CHECK:STDOUT: { index: 6, kind: 'IntLiteral', line: {{ *}}[[@LINE-30]], column: 14, indent: 1, spelling: '1', value: `1`, has_leading_space: true }, +// CHECK:STDOUT: { index: 7, kind: 'Semi', line: {{ *}}[[@LINE-31]], column: 15, indent: 1, spelling: ';' }, +// CHECK:STDOUT: { index: 8, kind: 'Var', line: {{ *}}[[@LINE-31]], column: 1, indent: 1, spelling: 'var', has_leading_space: true }, +// CHECK:STDOUT: { index: 9, kind: 'Identifier', line: {{ *}}[[@LINE-32]], column: 5, indent: 1, spelling: 'b', identifier: 1, has_leading_space: true }, +// CHECK:STDOUT: { index: 10, kind: 'Colon', line: {{ *}}[[@LINE-33]], column: 6, indent: 1, spelling: ':' }, +// CHECK:STDOUT: { index: 11, kind: 'IntLiteral', line: {{ *}}[[@LINE-34]], column: 8, indent: 1, spelling: '32', value: `32`, has_leading_space: true }, +// CHECK:STDOUT: { index: 12, kind: 'Equal', line: {{ *}}[[@LINE-35]], column: 11, indent: 1, spelling: '=', has_leading_space: true }, +// CHECK:STDOUT: { index: 13, kind: 'IntLiteral', line: {{ *}}[[@LINE-36]], column: 13, indent: 1, spelling: '13', value: `13`, has_leading_space: true }, +// CHECK:STDOUT: { index: 14, kind: 'Semi', line: {{ *}}[[@LINE-37]], column: 15, indent: 1, spelling: ';' }, +// CHECK:STDOUT: { index: 15, kind: 'Var', line: {{ *}}[[@LINE-37]], column: 1, indent: 1, spelling: 'var', has_leading_space: true }, +// CHECK:STDOUT: { index: 16, kind: 'Identifier', line: {{ *}}[[@LINE-38]], column: 5, indent: 1, spelling: 'c', identifier: 2, has_leading_space: true }, +// CHECK:STDOUT: { index: 17, kind: 'Colon', line: {{ *}}[[@LINE-39]], column: 6, indent: 1, spelling: ':' }, +// CHECK:STDOUT: { index: 18, kind: 'IntTypeLiteral', line: {{ *}}[[@LINE-40]], column: 8, indent: 1, spelling: 'i32', has_leading_space: true }, +// CHECK:STDOUT: { index: 19, kind: 'Equal', line: {{ *}}[[@LINE-41]], column: 12, indent: 1, spelling: '=', has_leading_space: true }, +// CHECK:STDOUT: { index: 20, kind: 'RealLiteral', line: {{ *}}[[@LINE-42]], column: 14, indent: 1, spelling: '0.4', value: `4*10^-1`, has_leading_space: true }, +// CHECK:STDOUT: { index: 21, kind: 'Semi', line: {{ *}}[[@LINE-43]], column: 17, indent: 1, spelling: ';' }, -// CHECK:STDOUT: { index: 22, kind: 'FileEnd', line: {{ *}}[[@LINE+1]], column: {{ *\d+}}, indent: 1, spelling: '' }, +// CHECK:STDOUT: { index: 22, kind: 'FileEnd', line: {{ *}}[[@LINE+1]], column: {{ *\d+}}, indent: 1, spelling: '', has_leading_space: true }, // CHECK:STDOUT: ] diff --git a/toolchain/lex/testdata/keywords.carbon b/toolchain/lex/testdata/keywords.carbon index c56f1da7f25fc..740d2b18d9af8 100644 --- a/toolchain/lex/testdata/keywords.carbon +++ b/toolchain/lex/testdata/keywords.carbon @@ -11,40 +11,40 @@ // --- indented.carbon // CHECK:STDOUT: - filename: indented.carbon // CHECK:STDOUT: tokens: [ -// CHECK:STDOUT: { index: 0, kind: 'FileStart', line: {{ *\d+}}, column: 1, indent: 1, spelling: '', has_trailing_space: true }, +// CHECK:STDOUT: { index: 0, kind: 'FileStart', line: {{ *\d+}}, column: 1, indent: 1, spelling: '' }, fn - // CHECK:STDOUT: { index: 1, kind: 'Fn', line: {{ *}}[[@LINE-1]], column: 4, indent: 4, spelling: 'fn', has_trailing_space: true }, + // CHECK:STDOUT: { index: 1, kind: 'Fn', line: {{ *}}[[@LINE-1]], column: 4, indent: 4, spelling: 'fn', has_leading_space: true }, -// CHECK:STDOUT: { index: 2, kind: 'FileEnd', line: {{ *}}[[@LINE+1]], column: {{ *\d+}}, indent: 1, spelling: '' }, +// CHECK:STDOUT: { index: 2, kind: 'FileEnd', line: {{ *}}[[@LINE+1]], column: {{ *\d+}}, indent: 1, spelling: '', has_leading_space: true }, // CHECK:STDOUT: ] // --- chain.carbon // CHECK:STDOUT: - filename: chain.carbon // CHECK:STDOUT: tokens: [ -// CHECK:STDOUT: { index: 0, kind: 'FileStart', line: {{ *\d+}}, column: 1, indent: 1, spelling: '', has_trailing_space: true }, +// CHECK:STDOUT: { index: 0, kind: 'FileStart', line: {{ *\d+}}, column: 1, indent: 1, spelling: '' }, and or not if else for return var break continue _ -// CHECK:STDOUT: { index: 1, kind: 'And', line: {{ *}}[[@LINE-1]], column: 1, indent: 1, spelling: 'and', has_trailing_space: true }, -// CHECK:STDOUT: { index: 2, kind: 'Or', line: {{ *}}[[@LINE-2]], column: 5, indent: 1, spelling: 'or', has_trailing_space: true }, -// CHECK:STDOUT: { index: 3, kind: 'Not', line: {{ *}}[[@LINE-3]], column: 8, indent: 1, spelling: 'not', has_trailing_space: true }, -// CHECK:STDOUT: { index: 4, kind: 'If', line: {{ *}}[[@LINE-4]], column: 12, indent: 1, spelling: 'if', has_trailing_space: true }, -// CHECK:STDOUT: { index: 5, kind: 'Else', line: {{ *}}[[@LINE-5]], column: 15, indent: 1, spelling: 'else', has_trailing_space: true }, -// CHECK:STDOUT: { index: 6, kind: 'For', line: {{ *}}[[@LINE-6]], column: 20, indent: 1, spelling: 'for', has_trailing_space: true }, -// CHECK:STDOUT: { index: 7, kind: 'Return', line: {{ *}}[[@LINE-7]], column: 24, indent: 1, spelling: 'return', has_trailing_space: true }, -// CHECK:STDOUT: { index: 8, kind: 'Var', line: {{ *}}[[@LINE-8]], column: 31, indent: 1, spelling: 'var', has_trailing_space: true }, -// CHECK:STDOUT: { index: 9, kind: 'Break', line: {{ *}}[[@LINE-9]], column: 35, indent: 1, spelling: 'break', has_trailing_space: true }, -// CHECK:STDOUT: { index: 10, kind: 'Continue', line: {{ *}}[[@LINE-10]], column: 41, indent: 1, spelling: 'continue', has_trailing_space: true }, -// CHECK:STDOUT: { index: 11, kind: 'Underscore', line: {{ *}}[[@LINE-11]], column: 50, indent: 1, spelling: '_', has_trailing_space: true }, +// CHECK:STDOUT: { index: 1, kind: 'And', line: {{ *}}[[@LINE-1]], column: 1, indent: 1, spelling: 'and', has_leading_space: true }, +// CHECK:STDOUT: { index: 2, kind: 'Or', line: {{ *}}[[@LINE-2]], column: 5, indent: 1, spelling: 'or', has_leading_space: true }, +// CHECK:STDOUT: { index: 3, kind: 'Not', line: {{ *}}[[@LINE-3]], column: 8, indent: 1, spelling: 'not', has_leading_space: true }, +// CHECK:STDOUT: { index: 4, kind: 'If', line: {{ *}}[[@LINE-4]], column: 12, indent: 1, spelling: 'if', has_leading_space: true }, +// CHECK:STDOUT: { index: 5, kind: 'Else', line: {{ *}}[[@LINE-5]], column: 15, indent: 1, spelling: 'else', has_leading_space: true }, +// CHECK:STDOUT: { index: 6, kind: 'For', line: {{ *}}[[@LINE-6]], column: 20, indent: 1, spelling: 'for', has_leading_space: true }, +// CHECK:STDOUT: { index: 7, kind: 'Return', line: {{ *}}[[@LINE-7]], column: 24, indent: 1, spelling: 'return', has_leading_space: true }, +// CHECK:STDOUT: { index: 8, kind: 'Var', line: {{ *}}[[@LINE-8]], column: 31, indent: 1, spelling: 'var', has_leading_space: true }, +// CHECK:STDOUT: { index: 9, kind: 'Break', line: {{ *}}[[@LINE-9]], column: 35, indent: 1, spelling: 'break', has_leading_space: true }, +// CHECK:STDOUT: { index: 10, kind: 'Continue', line: {{ *}}[[@LINE-10]], column: 41, indent: 1, spelling: 'continue', has_leading_space: true }, +// CHECK:STDOUT: { index: 11, kind: 'Underscore', line: {{ *}}[[@LINE-11]], column: 50, indent: 1, spelling: '_', has_leading_space: true }, -// CHECK:STDOUT: { index: 12, kind: 'FileEnd', line: {{ *}}[[@LINE+1]], column: {{ *\d+}}, indent: 1, spelling: '' }, +// CHECK:STDOUT: { index: 12, kind: 'FileEnd', line: {{ *}}[[@LINE+1]], column: {{ *\d+}}, indent: 1, spelling: '', has_leading_space: true }, // CHECK:STDOUT: ] // --- notakeyword.carbon // CHECK:STDOUT: - filename: notakeyword.carbon // CHECK:STDOUT: tokens: [ -// CHECK:STDOUT: { index: 0, kind: 'FileStart', line: {{ *\d+}}, column: 1, indent: 1, spelling: '', has_trailing_space: true }, +// CHECK:STDOUT: { index: 0, kind: 'FileStart', line: {{ *\d+}}, column: 1, indent: 1, spelling: '' }, notakeyword -// CHECK:STDOUT: { index: 1, kind: 'Identifier', line: {{ *}}[[@LINE-1]], column: 1, indent: 1, spelling: 'notakeyword', identifier: 0, has_trailing_space: true }, +// CHECK:STDOUT: { index: 1, kind: 'Identifier', line: {{ *}}[[@LINE-1]], column: 1, indent: 1, spelling: 'notakeyword', identifier: 0, has_leading_space: true }, -// CHECK:STDOUT: { index: 2, kind: 'FileEnd', line: {{ *}}[[@LINE+1]], column: {{ *\d+}}, indent: 1, spelling: '' }, +// CHECK:STDOUT: { index: 2, kind: 'FileEnd', line: {{ *}}[[@LINE+1]], column: {{ *\d+}}, indent: 1, spelling: '', has_leading_space: true }, // CHECK:STDOUT: ] diff --git a/toolchain/lex/testdata/multifile.carbon b/toolchain/lex/testdata/multifile.carbon index 45f0177459eb6..024bef9bffb7e 100644 --- a/toolchain/lex/testdata/multifile.carbon +++ b/toolchain/lex/testdata/multifile.carbon @@ -11,23 +11,23 @@ // --- a.carbon // CHECK:STDOUT: - filename: a.carbon // CHECK:STDOUT: tokens: [ -// CHECK:STDOUT: { index: 0, kind: 'FileStart', line: {{ *\d+}}, column: 1, indent: 1, spelling: '', has_trailing_space: true }, +// CHECK:STDOUT: { index: 0, kind: 'FileStart', line: {{ *\d+}}, column: 1, indent: 1, spelling: '' }, a; -// CHECK:STDOUT: { index: 1, kind: 'Identifier', line: {{ *}}[[@LINE-1]], column: 1, indent: 1, spelling: 'a', identifier: 0 }, -// CHECK:STDOUT: { index: 2, kind: 'Semi', line: {{ *}}[[@LINE-2]], column: 2, indent: 1, spelling: ';', has_trailing_space: true }, +// CHECK:STDOUT: { index: 1, kind: 'Identifier', line: {{ *}}[[@LINE-1]], column: 1, indent: 1, spelling: 'a', identifier: 0, has_leading_space: true }, +// CHECK:STDOUT: { index: 2, kind: 'Semi', line: {{ *}}[[@LINE-2]], column: 2, indent: 1, spelling: ';' }, -// CHECK:STDOUT: { index: 3, kind: 'FileEnd', line: {{ *}}[[@LINE+1]], column: {{ *\d+}}, indent: 1, spelling: '' }, +// CHECK:STDOUT: { index: 3, kind: 'FileEnd', line: {{ *}}[[@LINE+1]], column: {{ *\d+}}, indent: 1, spelling: '', has_leading_space: true }, // CHECK:STDOUT: ] // --- b.carbon // CHECK:STDOUT: - filename: b.carbon // CHECK:STDOUT: tokens: [ -// CHECK:STDOUT: { index: 0, kind: 'FileStart', line: {{ *\d+}}, column: 1, indent: 1, spelling: '', has_trailing_space: true }, +// CHECK:STDOUT: { index: 0, kind: 'FileStart', line: {{ *\d+}}, column: 1, indent: 1, spelling: '' }, b; -// CHECK:STDOUT: { index: 1, kind: 'Identifier', line: {{ *}}[[@LINE-1]], column: 1, indent: 1, spelling: 'b', identifier: 0 }, -// CHECK:STDOUT: { index: 2, kind: 'Semi', line: {{ *}}[[@LINE-2]], column: 2, indent: 1, spelling: ';', has_trailing_space: true }, +// CHECK:STDOUT: { index: 1, kind: 'Identifier', line: {{ *}}[[@LINE-1]], column: 1, indent: 1, spelling: 'b', identifier: 0, has_leading_space: true }, +// CHECK:STDOUT: { index: 2, kind: 'Semi', line: {{ *}}[[@LINE-2]], column: 2, indent: 1, spelling: ';' }, a; -// CHECK:STDOUT: { index: 3, kind: 'Identifier', line: {{ *}}[[@LINE-1]], column: 1, indent: 1, spelling: 'a', identifier: 1 }, -// CHECK:STDOUT: { index: 4, kind: 'Semi', line: {{ *}}[[@LINE-2]], column: 2, indent: 1, spelling: ';', has_trailing_space: true }, +// CHECK:STDOUT: { index: 3, kind: 'Identifier', line: {{ *}}[[@LINE-1]], column: 1, indent: 1, spelling: 'a', identifier: 1, has_leading_space: true }, +// CHECK:STDOUT: { index: 4, kind: 'Semi', line: {{ *}}[[@LINE-2]], column: 2, indent: 1, spelling: ';' }, -// CHECK:STDOUT: { index: 5, kind: 'FileEnd', line: {{ *}}[[@LINE+1]], column: {{ *\d+}}, indent: 1, spelling: '' }, +// CHECK:STDOUT: { index: 5, kind: 'FileEnd', line: {{ *}}[[@LINE+1]], column: {{ *\d+}}, indent: 1, spelling: '', has_leading_space: true }, // CHECK:STDOUT: ] diff --git a/toolchain/lex/testdata/numeric_literals.carbon b/toolchain/lex/testdata/numeric_literals.carbon index fd0ee666b2de5..177f374ea9aeb 100644 --- a/toolchain/lex/testdata/numeric_literals.carbon +++ b/toolchain/lex/testdata/numeric_literals.carbon @@ -9,82 +9,82 @@ // TIP: bazel run //toolchain/testing:file_test -- --dump_output --file_tests=toolchain/lex/testdata/numeric_literals.carbon // CHECK:STDOUT: - filename: numeric_literals.carbon // CHECK:STDOUT: tokens: [ -// CHECK:STDOUT: { index: 0, kind: 'FileStart', line: {{ *\d+}}, column: 1, indent: 1, spelling: '', has_trailing_space: true }, +// CHECK:STDOUT: { index: 0, kind: 'FileStart', line: {{ *\d+}}, column: 1, indent: 1, spelling: '' }, fn F() { -// CHECK:STDOUT: { index: 1, kind: 'Fn', line: {{ *}}[[@LINE-1]], column: 1, indent: 1, spelling: 'fn', has_trailing_space: true }, -// CHECK:STDOUT: { index: 2, kind: 'Identifier', line: {{ *}}[[@LINE-2]], column: 4, indent: 1, spelling: 'F', identifier: 0 }, +// CHECK:STDOUT: { index: 1, kind: 'Fn', line: {{ *}}[[@LINE-1]], column: 1, indent: 1, spelling: 'fn', has_leading_space: true }, +// CHECK:STDOUT: { index: 2, kind: 'Identifier', line: {{ *}}[[@LINE-2]], column: 4, indent: 1, spelling: 'F', identifier: 0, has_leading_space: true }, // CHECK:STDOUT: { index: 3, kind: 'OpenParen', line: {{ *}}[[@LINE-3]], column: 5, indent: 1, spelling: '(', closing_token: 4 }, -// CHECK:STDOUT: { index: 4, kind: 'CloseParen', line: {{ *}}[[@LINE-4]], column: 6, indent: 1, spelling: ')', opening_token: 3, has_trailing_space: true }, -// CHECK:STDOUT: { index: 5, kind: 'OpenCurlyBrace', line: {{ *}}[[@LINE-5]], column: 8, indent: 1, spelling: '{', closing_token: 54, has_trailing_space: true }, +// CHECK:STDOUT: { index: 4, kind: 'CloseParen', line: {{ *}}[[@LINE-4]], column: 6, indent: 1, spelling: ')', opening_token: 3 }, +// CHECK:STDOUT: { index: 5, kind: 'OpenCurlyBrace', line: {{ *}}[[@LINE-5]], column: 8, indent: 1, spelling: '{', closing_token: 54, has_leading_space: true }, // 8 and 9 trigger special behavior in APInt when mishandling signed versus // unsigned, so we pay extra attention to those. var ints: [i32; 5] = ( - // CHECK:STDOUT: { index: 6, kind: 'Var', line: {{ *}}[[@LINE-1]], column: 3, indent: 3, spelling: 'var', has_trailing_space: true }, - // CHECK:STDOUT: { index: 7, kind: 'Identifier', line: {{ *}}[[@LINE-2]], column: 7, indent: 3, spelling: 'ints', identifier: 1 }, - // CHECK:STDOUT: { index: 8, kind: 'Colon', line: {{ *}}[[@LINE-3]], column: 11, indent: 3, spelling: ':', has_trailing_space: true }, - // CHECK:STDOUT: { index: 9, kind: 'OpenSquareBracket', line: {{ *}}[[@LINE-4]], column: 13, indent: 3, spelling: '[', closing_token: 13 }, + // CHECK:STDOUT: { index: 6, kind: 'Var', line: {{ *}}[[@LINE-1]], column: 3, indent: 3, spelling: 'var', has_leading_space: true }, + // CHECK:STDOUT: { index: 7, kind: 'Identifier', line: {{ *}}[[@LINE-2]], column: 7, indent: 3, spelling: 'ints', identifier: 1, has_leading_space: true }, + // CHECK:STDOUT: { index: 8, kind: 'Colon', line: {{ *}}[[@LINE-3]], column: 11, indent: 3, spelling: ':' }, + // CHECK:STDOUT: { index: 9, kind: 'OpenSquareBracket', line: {{ *}}[[@LINE-4]], column: 13, indent: 3, spelling: '[', closing_token: 13, has_leading_space: true }, // CHECK:STDOUT: { index: 10, kind: 'IntTypeLiteral', line: {{ *}}[[@LINE-5]], column: 14, indent: 3, spelling: 'i32' }, - // CHECK:STDOUT: { index: 11, kind: 'Semi', line: {{ *}}[[@LINE-6]], column: 17, indent: 3, spelling: ';', has_trailing_space: true }, - // CHECK:STDOUT: { index: 12, kind: 'IntLiteral', line: {{ *}}[[@LINE-7]], column: 19, indent: 3, spelling: '5', value: `5` }, - // CHECK:STDOUT: { index: 13, kind: 'CloseSquareBracket', line: {{ *}}[[@LINE-8]], column: 20, indent: 3, spelling: ']', opening_token: 9, has_trailing_space: true }, - // CHECK:STDOUT: { index: 14, kind: 'Equal', line: {{ *}}[[@LINE-9]], column: 22, indent: 3, spelling: '=', has_trailing_space: true }, - // CHECK:STDOUT: { index: 15, kind: 'OpenParen', line: {{ *}}[[@LINE-10]], column: 24, indent: 3, spelling: '(', closing_token: 26, has_trailing_space: true }, + // CHECK:STDOUT: { index: 11, kind: 'Semi', line: {{ *}}[[@LINE-6]], column: 17, indent: 3, spelling: ';' }, + // CHECK:STDOUT: { index: 12, kind: 'IntLiteral', line: {{ *}}[[@LINE-7]], column: 19, indent: 3, spelling: '5', value: `5`, has_leading_space: true }, + // CHECK:STDOUT: { index: 13, kind: 'CloseSquareBracket', line: {{ *}}[[@LINE-8]], column: 20, indent: 3, spelling: ']', opening_token: 9 }, + // CHECK:STDOUT: { index: 14, kind: 'Equal', line: {{ *}}[[@LINE-9]], column: 22, indent: 3, spelling: '=', has_leading_space: true }, + // CHECK:STDOUT: { index: 15, kind: 'OpenParen', line: {{ *}}[[@LINE-10]], column: 24, indent: 3, spelling: '(', closing_token: 26, has_leading_space: true }, 8, - // CHECK:STDOUT: { index: 16, kind: 'IntLiteral', line: {{ *}}[[@LINE-1]], column: 5, indent: 5, spelling: '8', value: `8` }, - // CHECK:STDOUT: { index: 17, kind: 'Comma', line: {{ *}}[[@LINE-2]], column: 6, indent: 5, spelling: ',', has_trailing_space: true }, + // CHECK:STDOUT: { index: 16, kind: 'IntLiteral', line: {{ *}}[[@LINE-1]], column: 5, indent: 5, spelling: '8', value: `8`, has_leading_space: true }, + // CHECK:STDOUT: { index: 17, kind: 'Comma', line: {{ *}}[[@LINE-2]], column: 6, indent: 5, spelling: ',' }, 9, - // CHECK:STDOUT: { index: 18, kind: 'IntLiteral', line: {{ *}}[[@LINE-1]], column: 5, indent: 5, spelling: '9', value: `9` }, - // CHECK:STDOUT: { index: 19, kind: 'Comma', line: {{ *}}[[@LINE-2]], column: 6, indent: 5, spelling: ',', has_trailing_space: true }, + // CHECK:STDOUT: { index: 18, kind: 'IntLiteral', line: {{ *}}[[@LINE-1]], column: 5, indent: 5, spelling: '9', value: `9`, has_leading_space: true }, + // CHECK:STDOUT: { index: 19, kind: 'Comma', line: {{ *}}[[@LINE-2]], column: 6, indent: 5, spelling: ',' }, 0x8, - // CHECK:STDOUT: { index: 20, kind: 'IntLiteral', line: {{ *}}[[@LINE-1]], column: 5, indent: 5, spelling: '0x8', value: `8` }, - // CHECK:STDOUT: { index: 21, kind: 'Comma', line: {{ *}}[[@LINE-2]], column: 8, indent: 5, spelling: ',', has_trailing_space: true }, + // CHECK:STDOUT: { index: 20, kind: 'IntLiteral', line: {{ *}}[[@LINE-1]], column: 5, indent: 5, spelling: '0x8', value: `8`, has_leading_space: true }, + // CHECK:STDOUT: { index: 21, kind: 'Comma', line: {{ *}}[[@LINE-2]], column: 8, indent: 5, spelling: ',' }, 0b1000, - // CHECK:STDOUT: { index: 22, kind: 'IntLiteral', line: {{ *}}[[@LINE-1]], column: 5, indent: 5, spelling: '0b1000', value: `8` }, - // CHECK:STDOUT: { index: 23, kind: 'Comma', line: {{ *}}[[@LINE-2]], column: 11, indent: 5, spelling: ',', has_trailing_space: true }, + // CHECK:STDOUT: { index: 22, kind: 'IntLiteral', line: {{ *}}[[@LINE-1]], column: 5, indent: 5, spelling: '0b1000', value: `8`, has_leading_space: true }, + // CHECK:STDOUT: { index: 23, kind: 'Comma', line: {{ *}}[[@LINE-2]], column: 11, indent: 5, spelling: ',' }, 39999999999999999993, - // CHECK:STDOUT: { index: 24, kind: 'IntLiteral', line: {{ *}}[[@LINE-1]], column: 5, indent: 5, spelling: '39999999999999999993', value: `39999999999999999993` }, - // CHECK:STDOUT: { index: 25, kind: 'Comma', line: {{ *}}[[@LINE-2]], column: 25, indent: 5, spelling: ',', has_trailing_space: true }, + // CHECK:STDOUT: { index: 24, kind: 'IntLiteral', line: {{ *}}[[@LINE-1]], column: 5, indent: 5, spelling: '39999999999999999993', value: `39999999999999999993`, has_leading_space: true }, + // CHECK:STDOUT: { index: 25, kind: 'Comma', line: {{ *}}[[@LINE-2]], column: 25, indent: 5, spelling: ',' }, ); - // CHECK:STDOUT: { index: 26, kind: 'CloseParen', line: {{ *}}[[@LINE-1]], column: 3, indent: 3, spelling: ')', opening_token: 15 }, - // CHECK:STDOUT: { index: 27, kind: 'Semi', line: {{ *}}[[@LINE-2]], column: 4, indent: 3, spelling: ';', has_trailing_space: true }, + // CHECK:STDOUT: { index: 26, kind: 'CloseParen', line: {{ *}}[[@LINE-1]], column: 3, indent: 3, spelling: ')', opening_token: 15, has_leading_space: true }, + // CHECK:STDOUT: { index: 27, kind: 'Semi', line: {{ *}}[[@LINE-2]], column: 4, indent: 3, spelling: ';' }, var floats: [f64; 7] = ( - // CHECK:STDOUT: { index: 28, kind: 'Var', line: {{ *}}[[@LINE-1]], column: 3, indent: 3, spelling: 'var', has_trailing_space: true }, - // CHECK:STDOUT: { index: 29, kind: 'Identifier', line: {{ *}}[[@LINE-2]], column: 7, indent: 3, spelling: 'floats', identifier: 2 }, - // CHECK:STDOUT: { index: 30, kind: 'Colon', line: {{ *}}[[@LINE-3]], column: 13, indent: 3, spelling: ':', has_trailing_space: true }, - // CHECK:STDOUT: { index: 31, kind: 'OpenSquareBracket', line: {{ *}}[[@LINE-4]], column: 15, indent: 3, spelling: '[', closing_token: 35 }, + // CHECK:STDOUT: { index: 28, kind: 'Var', line: {{ *}}[[@LINE-1]], column: 3, indent: 3, spelling: 'var', has_leading_space: true }, + // CHECK:STDOUT: { index: 29, kind: 'Identifier', line: {{ *}}[[@LINE-2]], column: 7, indent: 3, spelling: 'floats', identifier: 2, has_leading_space: true }, + // CHECK:STDOUT: { index: 30, kind: 'Colon', line: {{ *}}[[@LINE-3]], column: 13, indent: 3, spelling: ':' }, + // CHECK:STDOUT: { index: 31, kind: 'OpenSquareBracket', line: {{ *}}[[@LINE-4]], column: 15, indent: 3, spelling: '[', closing_token: 35, has_leading_space: true }, // CHECK:STDOUT: { index: 32, kind: 'FloatTypeLiteral', line: {{ *}}[[@LINE-5]], column: 16, indent: 3, spelling: 'f64' }, - // CHECK:STDOUT: { index: 33, kind: 'Semi', line: {{ *}}[[@LINE-6]], column: 19, indent: 3, spelling: ';', has_trailing_space: true }, - // CHECK:STDOUT: { index: 34, kind: 'IntLiteral', line: {{ *}}[[@LINE-7]], column: 21, indent: 3, spelling: '7', value: `7` }, - // CHECK:STDOUT: { index: 35, kind: 'CloseSquareBracket', line: {{ *}}[[@LINE-8]], column: 22, indent: 3, spelling: ']', opening_token: 31, has_trailing_space: true }, - // CHECK:STDOUT: { index: 36, kind: 'Equal', line: {{ *}}[[@LINE-9]], column: 24, indent: 3, spelling: '=', has_trailing_space: true }, - // CHECK:STDOUT: { index: 37, kind: 'OpenParen', line: {{ *}}[[@LINE-10]], column: 26, indent: 3, spelling: '(', closing_token: 52, has_trailing_space: true }, + // CHECK:STDOUT: { index: 33, kind: 'Semi', line: {{ *}}[[@LINE-6]], column: 19, indent: 3, spelling: ';' }, + // CHECK:STDOUT: { index: 34, kind: 'IntLiteral', line: {{ *}}[[@LINE-7]], column: 21, indent: 3, spelling: '7', value: `7`, has_leading_space: true }, + // CHECK:STDOUT: { index: 35, kind: 'CloseSquareBracket', line: {{ *}}[[@LINE-8]], column: 22, indent: 3, spelling: ']', opening_token: 31 }, + // CHECK:STDOUT: { index: 36, kind: 'Equal', line: {{ *}}[[@LINE-9]], column: 24, indent: 3, spelling: '=', has_leading_space: true }, + // CHECK:STDOUT: { index: 37, kind: 'OpenParen', line: {{ *}}[[@LINE-10]], column: 26, indent: 3, spelling: '(', closing_token: 52, has_leading_space: true }, 0.9, - // CHECK:STDOUT: { index: 38, kind: 'RealLiteral', line: {{ *}}[[@LINE-1]], column: 5, indent: 5, spelling: '0.9', value: `9*10^-1` }, - // CHECK:STDOUT: { index: 39, kind: 'Comma', line: {{ *}}[[@LINE-2]], column: 8, indent: 5, spelling: ',', has_trailing_space: true }, + // CHECK:STDOUT: { index: 38, kind: 'RealLiteral', line: {{ *}}[[@LINE-1]], column: 5, indent: 5, spelling: '0.9', value: `9*10^-1`, has_leading_space: true }, + // CHECK:STDOUT: { index: 39, kind: 'Comma', line: {{ *}}[[@LINE-2]], column: 8, indent: 5, spelling: ',' }, 8.0, - // CHECK:STDOUT: { index: 40, kind: 'RealLiteral', line: {{ *}}[[@LINE-1]], column: 5, indent: 5, spelling: '8.0', value: `80*10^-1` }, - // CHECK:STDOUT: { index: 41, kind: 'Comma', line: {{ *}}[[@LINE-2]], column: 8, indent: 5, spelling: ',', has_trailing_space: true }, + // CHECK:STDOUT: { index: 40, kind: 'RealLiteral', line: {{ *}}[[@LINE-1]], column: 5, indent: 5, spelling: '8.0', value: `80*10^-1`, has_leading_space: true }, + // CHECK:STDOUT: { index: 41, kind: 'Comma', line: {{ *}}[[@LINE-2]], column: 8, indent: 5, spelling: ',' }, 80.0, - // CHECK:STDOUT: { index: 42, kind: 'RealLiteral', line: {{ *}}[[@LINE-1]], column: 5, indent: 5, spelling: '80.0', value: `800*10^-1` }, - // CHECK:STDOUT: { index: 43, kind: 'Comma', line: {{ *}}[[@LINE-2]], column: 9, indent: 5, spelling: ',', has_trailing_space: true }, + // CHECK:STDOUT: { index: 42, kind: 'RealLiteral', line: {{ *}}[[@LINE-1]], column: 5, indent: 5, spelling: '80.0', value: `800*10^-1`, has_leading_space: true }, + // CHECK:STDOUT: { index: 43, kind: 'Comma', line: {{ *}}[[@LINE-2]], column: 9, indent: 5, spelling: ',' }, 1.0e7, - // CHECK:STDOUT: { index: 44, kind: 'RealLiteral', line: {{ *}}[[@LINE-1]], column: 5, indent: 5, spelling: '1.0e7', value: `10*10^6` }, - // CHECK:STDOUT: { index: 45, kind: 'Comma', line: {{ *}}[[@LINE-2]], column: 10, indent: 5, spelling: ',', has_trailing_space: true }, + // CHECK:STDOUT: { index: 44, kind: 'RealLiteral', line: {{ *}}[[@LINE-1]], column: 5, indent: 5, spelling: '1.0e7', value: `10*10^6`, has_leading_space: true }, + // CHECK:STDOUT: { index: 45, kind: 'Comma', line: {{ *}}[[@LINE-2]], column: 10, indent: 5, spelling: ',' }, 1.0e8, - // CHECK:STDOUT: { index: 46, kind: 'RealLiteral', line: {{ *}}[[@LINE-1]], column: 5, indent: 5, spelling: '1.0e8', value: `10*10^7` }, - // CHECK:STDOUT: { index: 47, kind: 'Comma', line: {{ *}}[[@LINE-2]], column: 10, indent: 5, spelling: ',', has_trailing_space: true }, + // CHECK:STDOUT: { index: 46, kind: 'RealLiteral', line: {{ *}}[[@LINE-1]], column: 5, indent: 5, spelling: '1.0e8', value: `10*10^7`, has_leading_space: true }, + // CHECK:STDOUT: { index: 47, kind: 'Comma', line: {{ *}}[[@LINE-2]], column: 10, indent: 5, spelling: ',' }, 1.0e-8, - // CHECK:STDOUT: { index: 48, kind: 'RealLiteral', line: {{ *}}[[@LINE-1]], column: 5, indent: 5, spelling: '1.0e-8', value: `10*10^-9` }, - // CHECK:STDOUT: { index: 49, kind: 'Comma', line: {{ *}}[[@LINE-2]], column: 11, indent: 5, spelling: ',', has_trailing_space: true }, + // CHECK:STDOUT: { index: 48, kind: 'RealLiteral', line: {{ *}}[[@LINE-1]], column: 5, indent: 5, spelling: '1.0e-8', value: `10*10^-9`, has_leading_space: true }, + // CHECK:STDOUT: { index: 49, kind: 'Comma', line: {{ *}}[[@LINE-2]], column: 11, indent: 5, spelling: ',' }, 39999999999999999993.0e39999999999999999993, - // CHECK:STDOUT: { index: 50, kind: 'RealLiteral', line: {{ *}}[[@LINE-1]], column: 5, indent: 5, spelling: '39999999999999999993.0e39999999999999999993', value: `399999999999999999930*10^39999999999999999992` }, - // CHECK:STDOUT: { index: 51, kind: 'Comma', line: {{ *}}[[@LINE-2]], column: 48, indent: 5, spelling: ',', has_trailing_space: true }, + // CHECK:STDOUT: { index: 50, kind: 'RealLiteral', line: {{ *}}[[@LINE-1]], column: 5, indent: 5, spelling: '39999999999999999993.0e39999999999999999993', value: `399999999999999999930*10^39999999999999999992`, has_leading_space: true }, + // CHECK:STDOUT: { index: 51, kind: 'Comma', line: {{ *}}[[@LINE-2]], column: 48, indent: 5, spelling: ',' }, ); - // CHECK:STDOUT: { index: 52, kind: 'CloseParen', line: {{ *}}[[@LINE-1]], column: 3, indent: 3, spelling: ')', opening_token: 37 }, - // CHECK:STDOUT: { index: 53, kind: 'Semi', line: {{ *}}[[@LINE-2]], column: 4, indent: 3, spelling: ';', has_trailing_space: true }, + // CHECK:STDOUT: { index: 52, kind: 'CloseParen', line: {{ *}}[[@LINE-1]], column: 3, indent: 3, spelling: ')', opening_token: 37, has_leading_space: true }, + // CHECK:STDOUT: { index: 53, kind: 'Semi', line: {{ *}}[[@LINE-2]], column: 4, indent: 3, spelling: ';' }, } -// CHECK:STDOUT: { index: 54, kind: 'CloseCurlyBrace', line: {{ *}}[[@LINE-1]], column: 1, indent: 1, spelling: '}', opening_token: 5, has_trailing_space: true }, +// CHECK:STDOUT: { index: 54, kind: 'CloseCurlyBrace', line: {{ *}}[[@LINE-1]], column: 1, indent: 1, spelling: '}', opening_token: 5, has_leading_space: true }, -// CHECK:STDOUT: { index: 55, kind: 'FileEnd', line: {{ *}}[[@LINE+1]], column: {{ *\d+}}, indent: 1, spelling: '' }, +// CHECK:STDOUT: { index: 55, kind: 'FileEnd', line: {{ *}}[[@LINE+1]], column: {{ *\d+}}, indent: 1, spelling: '', has_leading_space: true }, // CHECK:STDOUT: ] diff --git a/toolchain/lex/testdata/printing_digit_padding.carbon b/toolchain/lex/testdata/printing_digit_padding.carbon index 12431c564fe95..0766eebe362f2 100644 --- a/toolchain/lex/testdata/printing_digit_padding.carbon +++ b/toolchain/lex/testdata/printing_digit_padding.carbon @@ -9,16 +9,16 @@ // TIP: bazel run //toolchain/testing:file_test -- --dump_output --file_tests=toolchain/lex/testdata/printing_digit_padding.carbon // CHECK:STDOUT: - filename: printing_digit_padding.carbon // CHECK:STDOUT: tokens: [ -// CHECK:STDOUT: { index: 0, kind: 'FileStart', line: {{ *\d+}}, column: 1, indent: 1, spelling: '', has_trailing_space: true }, +// CHECK:STDOUT: { index: 0, kind: 'FileStart', line: {{ *\d+}}, column: 1, indent: 1, spelling: '' }, ; -// CHECK:STDOUT: { index: 1, kind: 'Semi', line: {{ *}}[[@LINE-1]], column: 1, indent: 1, spelling: ';', has_trailing_space: true }, +// CHECK:STDOUT: { index: 1, kind: 'Semi', line: {{ *}}[[@LINE-1]], column: 1, indent: 1, spelling: ';', has_leading_space: true }, // Test digit padding with values of 9, 10, and 11. ;;; - // CHECK:STDOUT: { index: 2, kind: 'Semi', line: {{ *}}[[@LINE-1]], column: 9, indent: 9, spelling: ';' }, + // CHECK:STDOUT: { index: 2, kind: 'Semi', line: {{ *}}[[@LINE-1]], column: 9, indent: 9, spelling: ';', has_leading_space: true }, // CHECK:STDOUT: { index: 3, kind: 'Semi', line: {{ *}}[[@LINE-2]], column: 10, indent: 9, spelling: ';' }, - // CHECK:STDOUT: { index: 4, kind: 'Semi', line: {{ *}}[[@LINE-3]], column: 11, indent: 9, spelling: ';', has_trailing_space: true }, + // CHECK:STDOUT: { index: 4, kind: 'Semi', line: {{ *}}[[@LINE-3]], column: 11, indent: 9, spelling: ';' }, -// CHECK:STDOUT: { index: 5, kind: 'FileEnd', line: {{ *}}[[@LINE+1]], column: {{ *\d+}}, indent: 1, spelling: '' }, +// CHECK:STDOUT: { index: 5, kind: 'FileEnd', line: {{ *}}[[@LINE+1]], column: {{ *\d+}}, indent: 1, spelling: '', has_leading_space: true }, // CHECK:STDOUT: ] diff --git a/toolchain/lex/testdata/printing_integer_literal.carbon b/toolchain/lex/testdata/printing_integer_literal.carbon index 1d95ad61cdbcc..a5801ae761c1b 100644 --- a/toolchain/lex/testdata/printing_integer_literal.carbon +++ b/toolchain/lex/testdata/printing_integer_literal.carbon @@ -9,10 +9,10 @@ // TIP: bazel run //toolchain/testing:file_test -- --dump_output --file_tests=toolchain/lex/testdata/printing_integer_literal.carbon // CHECK:STDOUT: - filename: printing_integer_literal.carbon // CHECK:STDOUT: tokens: [ -// CHECK:STDOUT: { index: 0, kind: 'FileStart', line: {{ *\d+}}, column: 1, indent: 1, spelling: '', has_trailing_space: true }, +// CHECK:STDOUT: { index: 0, kind: 'FileStart', line: {{ *\d+}}, column: 1, indent: 1, spelling: '' }, 123 -// CHECK:STDOUT: { index: 1, kind: 'IntLiteral', line: {{ *}}[[@LINE-1]], column: 1, indent: 1, spelling: '123', value: `123`, has_trailing_space: true }, +// CHECK:STDOUT: { index: 1, kind: 'IntLiteral', line: {{ *}}[[@LINE-1]], column: 1, indent: 1, spelling: '123', value: `123`, has_leading_space: true }, -// CHECK:STDOUT: { index: 2, kind: 'FileEnd', line: {{ *}}[[@LINE+1]], column: {{ *\d+}}, indent: 1, spelling: '' }, +// CHECK:STDOUT: { index: 2, kind: 'FileEnd', line: {{ *}}[[@LINE+1]], column: {{ *\d+}}, indent: 1, spelling: '', has_leading_space: true }, // CHECK:STDOUT: ] diff --git a/toolchain/lex/testdata/printing_real_literal.carbon b/toolchain/lex/testdata/printing_real_literal.carbon index 674032301bfe6..726c206c7c512 100644 --- a/toolchain/lex/testdata/printing_real_literal.carbon +++ b/toolchain/lex/testdata/printing_real_literal.carbon @@ -9,10 +9,10 @@ // TIP: bazel run //toolchain/testing:file_test -- --dump_output --file_tests=toolchain/lex/testdata/printing_real_literal.carbon // CHECK:STDOUT: - filename: printing_real_literal.carbon // CHECK:STDOUT: tokens: [ -// CHECK:STDOUT: { index: 0, kind: 'FileStart', line: {{ *\d+}}, column: 1, indent: 1, spelling: '', has_trailing_space: true }, +// CHECK:STDOUT: { index: 0, kind: 'FileStart', line: {{ *\d+}}, column: 1, indent: 1, spelling: '' }, 2.5 -// CHECK:STDOUT: { index: 1, kind: 'RealLiteral', line: {{ *}}[[@LINE-1]], column: 1, indent: 1, spelling: '2.5', value: `25*10^-1`, has_trailing_space: true }, +// CHECK:STDOUT: { index: 1, kind: 'RealLiteral', line: {{ *}}[[@LINE-1]], column: 1, indent: 1, spelling: '2.5', value: `25*10^-1`, has_leading_space: true }, -// CHECK:STDOUT: { index: 2, kind: 'FileEnd', line: {{ *}}[[@LINE+1]], column: {{ *\d+}}, indent: 1, spelling: '' }, +// CHECK:STDOUT: { index: 2, kind: 'FileEnd', line: {{ *}}[[@LINE+1]], column: {{ *\d+}}, indent: 1, spelling: '', has_leading_space: true }, // CHECK:STDOUT: ] diff --git a/toolchain/lex/testdata/printing_token.carbon b/toolchain/lex/testdata/printing_token.carbon index a985a75f9902b..606c819b45774 100644 --- a/toolchain/lex/testdata/printing_token.carbon +++ b/toolchain/lex/testdata/printing_token.carbon @@ -9,10 +9,10 @@ // TIP: bazel run //toolchain/testing:file_test -- --dump_output --file_tests=toolchain/lex/testdata/printing_token.carbon // CHECK:STDOUT: - filename: printing_token.carbon // CHECK:STDOUT: tokens: [ -// CHECK:STDOUT: { index: 0, kind: 'FileStart', line: {{ *\d+}}, column: 1, indent: 1, spelling: '', has_trailing_space: true }, +// CHECK:STDOUT: { index: 0, kind: 'FileStart', line: {{ *\d+}}, column: 1, indent: 1, spelling: '' }, 0x9 -// CHECK:STDOUT: { index: 1, kind: 'IntLiteral', line: {{ *}}[[@LINE-1]], column: 1, indent: 1, spelling: '0x9', value: `9`, has_trailing_space: true }, +// CHECK:STDOUT: { index: 1, kind: 'IntLiteral', line: {{ *}}[[@LINE-1]], column: 1, indent: 1, spelling: '0x9', value: `9`, has_leading_space: true }, -// CHECK:STDOUT: { index: 2, kind: 'FileEnd', line: {{ *}}[[@LINE+1]], column: {{ *\d+}}, indent: 1, spelling: '' }, +// CHECK:STDOUT: { index: 2, kind: 'FileEnd', line: {{ *}}[[@LINE+1]], column: {{ *\d+}}, indent: 1, spelling: '', has_leading_space: true }, // CHECK:STDOUT: ] diff --git a/toolchain/lex/testdata/raw_identifier.carbon b/toolchain/lex/testdata/raw_identifier.carbon index 0312fc04666dd..593d6c3ff99ec 100644 --- a/toolchain/lex/testdata/raw_identifier.carbon +++ b/toolchain/lex/testdata/raw_identifier.carbon @@ -9,31 +9,31 @@ // TIP: bazel run //toolchain/testing:file_test -- --dump_output --file_tests=toolchain/lex/testdata/raw_identifier.carbon // CHECK:STDOUT: - filename: raw_identifier.carbon // CHECK:STDOUT: tokens: [ -// CHECK:STDOUT: { index: 0, kind: 'FileStart', line: {{ *\d+}}, column: 1, indent: 1, spelling: '', has_trailing_space: true }, +// CHECK:STDOUT: { index: 0, kind: 'FileStart', line: {{ *\d+}}, column: 1, indent: 1, spelling: '' }, // A non-keyword identifier. r#foo -// CHECK:STDOUT: { index: 1, kind: 'Identifier', line: {{ *}}[[@LINE-1]], column: 1, indent: 1, spelling: 'foo', identifier: 1, has_trailing_space: true }, +// CHECK:STDOUT: { index: 1, kind: 'Identifier', line: {{ *}}[[@LINE-1]], column: 1, indent: 1, spelling: 'foo', identifier: 1, has_leading_space: true }, // The same non-keyword identifier, for comparison. foo -// CHECK:STDOUT: { index: 2, kind: 'Identifier', line: {{ *}}[[@LINE-1]], column: 1, indent: 1, spelling: 'foo', identifier: 1, has_trailing_space: true }, +// CHECK:STDOUT: { index: 2, kind: 'Identifier', line: {{ *}}[[@LINE-1]], column: 1, indent: 1, spelling: 'foo', identifier: 1, has_leading_space: true }, // A keyword as a raw identifier. r#self -// CHECK:STDOUT: { index: 3, kind: 'Identifier', line: {{ *}}[[@LINE-1]], column: 1, indent: 1, spelling: 'self', identifier: 2, has_trailing_space: true }, +// CHECK:STDOUT: { index: 3, kind: 'Identifier', line: {{ *}}[[@LINE-1]], column: 1, indent: 1, spelling: 'self', identifier: 2, has_leading_space: true }, // The same keyword, for comparison. self -// CHECK:STDOUT: { index: 4, kind: 'SelfValueIdentifier', line: {{ *}}[[@LINE-1]], column: 1, indent: 1, spelling: 'self', has_trailing_space: true }, +// CHECK:STDOUT: { index: 4, kind: 'SelfValueIdentifier', line: {{ *}}[[@LINE-1]], column: 1, indent: 1, spelling: 'self', has_leading_space: true }, // A type literal as a raw identifier. r#i32 -// CHECK:STDOUT: { index: 5, kind: 'Identifier', line: {{ *}}[[@LINE-1]], column: 1, indent: 1, spelling: 'i32', identifier: 3, has_trailing_space: true }, +// CHECK:STDOUT: { index: 5, kind: 'Identifier', line: {{ *}}[[@LINE-1]], column: 1, indent: 1, spelling: 'i32', identifier: 3, has_leading_space: true }, // The same type literal, for comparison. i32 -// CHECK:STDOUT: { index: 6, kind: 'IntTypeLiteral', line: {{ *}}[[@LINE-1]], column: 1, indent: 1, spelling: 'i32', has_trailing_space: true }, +// CHECK:STDOUT: { index: 6, kind: 'IntTypeLiteral', line: {{ *}}[[@LINE-1]], column: 1, indent: 1, spelling: 'i32', has_leading_space: true }, -// CHECK:STDOUT: { index: 7, kind: 'FileEnd', line: {{ *}}[[@LINE+1]], column: {{ *\d+}}, indent: 1, spelling: '' }, +// CHECK:STDOUT: { index: 7, kind: 'FileEnd', line: {{ *}}[[@LINE+1]], column: {{ *\d+}}, indent: 1, spelling: '', has_leading_space: true }, // CHECK:STDOUT: ] diff --git a/toolchain/lex/tokenized_buffer.cpp b/toolchain/lex/tokenized_buffer.cpp index f20972a36d9ca..8fd58dc524060 100644 --- a/toolchain/lex/tokenized_buffer.cpp +++ b/toolchain/lex/tokenized_buffer.cpp @@ -21,11 +21,11 @@ namespace Carbon::Lex { auto TokenizedBuffer::GetKind(TokenIndex token) const -> TokenKind { - return GetTokenInfo(token).kind; + return GetTokenInfo(token).kind(); } auto TokenizedBuffer::GetLine(TokenIndex token) const -> LineIndex { - return FindLineIndex(GetTokenInfo(token).byte_offset); + return FindLineIndex(GetTokenInfo(token).byte_offset()); } auto TokenizedBuffer::GetLineNumber(TokenIndex token) const -> int { @@ -34,8 +34,8 @@ auto TokenizedBuffer::GetLineNumber(TokenIndex token) const -> int { auto TokenizedBuffer::GetColumnNumber(TokenIndex token) const -> int { const auto& token_info = GetTokenInfo(token); - const auto& line_info = GetLineInfo(FindLineIndex(token_info.byte_offset)); - return token_info.byte_offset - line_info.start + 1; + const auto& line_info = GetLineInfo(FindLineIndex(token_info.byte_offset())); + return token_info.byte_offset() - line_info.start + 1; } auto TokenizedBuffer::GetEndLoc(TokenIndex token) const @@ -59,111 +59,117 @@ auto TokenizedBuffer::GetEndLoc(TokenIndex token) const auto TokenizedBuffer::GetTokenText(TokenIndex token) const -> llvm::StringRef { const auto& token_info = GetTokenInfo(token); - llvm::StringRef fixed_spelling = token_info.kind.fixed_spelling(); + llvm::StringRef fixed_spelling = token_info.kind().fixed_spelling(); if (!fixed_spelling.empty()) { return fixed_spelling; } - if (token_info.kind == TokenKind::Error) { - return source_->text().substr(token_info.byte_offset, - token_info.error_length); + if (token_info.kind() == TokenKind::Error) { + return source_->text().substr(token_info.byte_offset(), + token_info.error_length()); } // Refer back to the source text to preserve oddities like radix or digit // separators the author included. - if (token_info.kind == TokenKind::IntLiteral || - token_info.kind == TokenKind::RealLiteral) { + if (token_info.kind() == TokenKind::IntLiteral || + token_info.kind() == TokenKind::RealLiteral) { std::optional relexed_token = - NumericLiteral::Lex(source_->text().substr(token_info.byte_offset)); + NumericLiteral::Lex(source_->text().substr(token_info.byte_offset())); CARBON_CHECK(relexed_token) << "Could not reform numeric literal token."; return relexed_token->text(); } // Refer back to the source text to find the original spelling, including // escape sequences etc. - if (token_info.kind == TokenKind::StringLiteral) { + if (token_info.kind() == TokenKind::StringLiteral) { std::optional relexed_token = - StringLiteral::Lex(source_->text().substr(token_info.byte_offset)); + StringLiteral::Lex(source_->text().substr(token_info.byte_offset())); CARBON_CHECK(relexed_token) << "Could not reform string literal token."; return relexed_token->text(); } // Refer back to the source text to avoid needing to reconstruct the // spelling from the size. - if (token_info.kind.is_sized_type_literal()) { + if (token_info.kind().is_sized_type_literal()) { llvm::StringRef suffix = source_->text() - .substr(token_info.byte_offset + 1) + .substr(token_info.byte_offset() + 1) .take_while(IsDecimalDigit); return llvm::StringRef(suffix.data() - 1, suffix.size() + 1); } - if (token_info.kind == TokenKind::FileStart || - token_info.kind == TokenKind::FileEnd) { + if (token_info.kind() == TokenKind::FileStart || + token_info.kind() == TokenKind::FileEnd) { return llvm::StringRef(); } - CARBON_CHECK(token_info.kind == TokenKind::Identifier) << token_info.kind; - return value_stores_->identifiers().Get(token_info.ident_id); + CARBON_CHECK(token_info.kind() == TokenKind::Identifier) << token_info.kind(); + return value_stores_->identifiers().Get(token_info.ident_id()); } auto TokenizedBuffer::GetIdentifier(TokenIndex token) const -> IdentifierId { const auto& token_info = GetTokenInfo(token); - CARBON_CHECK(token_info.kind == TokenKind::Identifier) << token_info.kind; - return token_info.ident_id; + CARBON_CHECK(token_info.kind() == TokenKind::Identifier) << token_info.kind(); + return token_info.ident_id(); } auto TokenizedBuffer::GetIntLiteral(TokenIndex token) const -> IntId { const auto& token_info = GetTokenInfo(token); - CARBON_CHECK(token_info.kind == TokenKind::IntLiteral) << token_info.kind; - return token_info.int_id; + CARBON_CHECK(token_info.kind() == TokenKind::IntLiteral) << token_info.kind(); + return token_info.int_id(); } auto TokenizedBuffer::GetRealLiteral(TokenIndex token) const -> RealId { const auto& token_info = GetTokenInfo(token); - CARBON_CHECK(token_info.kind == TokenKind::RealLiteral) << token_info.kind; - return token_info.real_id; + CARBON_CHECK(token_info.kind() == TokenKind::RealLiteral) + << token_info.kind(); + return token_info.real_id(); } auto TokenizedBuffer::GetStringLiteralValue(TokenIndex token) const -> StringLiteralValueId { const auto& token_info = GetTokenInfo(token); - CARBON_CHECK(token_info.kind == TokenKind::StringLiteral) << token_info.kind; - return token_info.string_literal_id; + CARBON_CHECK(token_info.kind() == TokenKind::StringLiteral) + << token_info.kind(); + return token_info.string_literal_id(); } auto TokenizedBuffer::GetTypeLiteralSize(TokenIndex token) const -> IntId { const auto& token_info = GetTokenInfo(token); - CARBON_CHECK(token_info.kind.is_sized_type_literal()) << token_info.kind; - return token_info.int_id; + CARBON_CHECK(token_info.kind().is_sized_type_literal()) << token_info.kind(); + return token_info.int_id(); } auto TokenizedBuffer::GetMatchedClosingToken(TokenIndex opening_token) const -> TokenIndex { const auto& opening_token_info = GetTokenInfo(opening_token); - CARBON_CHECK(opening_token_info.kind.is_opening_symbol()) - << opening_token_info.kind; - return opening_token_info.closing_token; + CARBON_CHECK(opening_token_info.kind().is_opening_symbol()) + << opening_token_info.kind(); + return opening_token_info.closing_token_index(); } auto TokenizedBuffer::GetMatchedOpeningToken(TokenIndex closing_token) const -> TokenIndex { const auto& closing_token_info = GetTokenInfo(closing_token); - CARBON_CHECK(closing_token_info.kind.is_closing_symbol()) - << closing_token_info.kind; - return closing_token_info.opening_token; + CARBON_CHECK(closing_token_info.kind().is_closing_symbol()) + << closing_token_info.kind(); + return closing_token_info.opening_token_index(); } auto TokenizedBuffer::HasLeadingWhitespace(TokenIndex token) const -> bool { - auto it = TokenIterator(token); - return it == tokens().begin() || GetTokenInfo(*(it - 1)).has_trailing_space; + return GetTokenInfo(token).has_leading_space(); } auto TokenizedBuffer::HasTrailingWhitespace(TokenIndex token) const -> bool { - return GetTokenInfo(token).has_trailing_space; + TokenIterator it(token); + ++it; + return it != tokens().end() && GetTokenInfo(*it).has_leading_space(); } auto TokenizedBuffer::IsRecoveryToken(TokenIndex token) const -> bool { - return GetTokenInfo(token).is_recovery; + if (recovery_tokens_.empty()) { + return false; + } + return recovery_tokens_[token.index]; } auto TokenizedBuffer::GetLineNumber(LineIndex line) const -> int { @@ -251,7 +257,7 @@ auto TokenizedBuffer::PrintToken(llvm::raw_ostream& output_stream, widths.Widen(GetTokenPrintWidths(token)); int token_index = token.index; const auto& token_info = GetTokenInfo(token); - LineIndex line_index = FindLineIndex(token_info.byte_offset); + LineIndex line_index = FindLineIndex(token_info.byte_offset()); llvm::StringRef token_text = GetTokenText(token); // Output the main chunk using one format string. We have to do the @@ -261,14 +267,15 @@ auto TokenizedBuffer::PrintToken(llvm::raw_ostream& output_stream, " { index: {0}, kind: {1}, line: {2}, column: {3}, indent: {4}, " "spelling: '{5}'", llvm::format_decimal(token_index, widths.index), - llvm::right_justify(llvm::formatv("'{0}'", token_info.kind.name()).str(), - widths.kind + 2), + llvm::right_justify( + llvm::formatv("'{0}'", token_info.kind().name()).str(), + widths.kind + 2), llvm::format_decimal(GetLineNumber(GetLine(token)), widths.line), llvm::format_decimal(GetColumnNumber(token), widths.column), llvm::format_decimal(GetIndentColumnNumber(line_index), widths.indent), token_text); - switch (token_info.kind) { + switch (token_info.kind()) { case TokenKind::Identifier: output_stream << ", identifier: " << GetIdentifier(token).index; break; @@ -290,20 +297,20 @@ auto TokenizedBuffer::PrintToken(llvm::raw_ostream& output_stream, << "`"; break; default: - if (token_info.kind.is_opening_symbol()) { + if (token_info.kind().is_opening_symbol()) { output_stream << ", closing_token: " << GetMatchedClosingToken(token).index; - } else if (token_info.kind.is_closing_symbol()) { + } else if (token_info.kind().is_closing_symbol()) { output_stream << ", opening_token: " << GetMatchedOpeningToken(token).index; } break; } - if (token_info.has_trailing_space) { - output_stream << ", has_trailing_space: true"; + if (token_info.has_leading_space()) { + output_stream << ", has_leading_space: true"; } - if (token_info.is_recovery) { + if (IsRecoveryToken(token)) { output_stream << ", recovery: true"; } @@ -358,7 +365,7 @@ auto TokenizedBuffer::GetTokenInfo(TokenIndex token) const -> const TokenInfo& { auto TokenizedBuffer::AddToken(TokenInfo info) -> TokenIndex { token_infos_.push_back(info); - expected_parse_tree_size_ += info.kind.expected_parse_tree_size(); + expected_parse_tree_size_ += info.kind().expected_parse_tree_size(); return TokenIndex(static_cast(token_infos_.size()) - 1); } @@ -416,7 +423,7 @@ auto TokenDiagnosticConverter::ConvertLoc(TokenIndex token, // Map the token location into a position within the source buffer. const auto& token_info = buffer_->GetTokenInfo(token); const char* token_start = - buffer_->source_->text().begin() + token_info.byte_offset; + buffer_->source_->text().begin() + token_info.byte_offset(); // Find the corresponding file location. // TODO: Should we somehow indicate in the diagnostic location if this token diff --git a/toolchain/lex/tokenized_buffer.h b/toolchain/lex/tokenized_buffer.h index fbb39b15ba36c..8e0c38a74be6b 100644 --- a/toolchain/lex/tokenized_buffer.h +++ b/toolchain/lex/tokenized_buffer.h @@ -260,33 +260,161 @@ class TokenizedBuffer : public Printable { int indent; }; - struct TokenInfo { - TokenKind kind; - - // Whether the token has trailing whitespace. - bool has_trailing_space = false; - - // Whether the token was injected artificially during error recovery. - bool is_recovery = false; - - // Zero-based byte offset of the token within the file. - int32_t byte_offset; - - // We may have up to 32 bits of payload, based on the kind of token. - union { - static_assert( - sizeof(TokenIndex) <= sizeof(int32_t), - "Unable to pack token and identifier index into the same space!"); - - IdentifierId ident_id = IdentifierId::Invalid; - StringLiteralValueId string_literal_id; - IntId int_id; - RealId real_id; - TokenIndex closing_token; - TokenIndex opening_token; - int32_t error_length; - }; + // Storage for the information about a specific token in the buffer. + // + // This provides a friendly accessor API to the carefully space-optimized + // storage model of the information we associated with each token. + // + // There are four pieces of information stored here: + // - The kind of the token. + // - Whether that token has leading whitespace before it. + // - A kind-specific payload that can be compressed into a small integer. + // - This class provides dedicated accessors for each different form of + // payload that check the kind and payload correspond correctly. + // - A 32-bit byte offset of the token within the source text. + // + // These are compressed and stored in 8-bytes for each token. + // + // Note that while the class provides some limited setters for payloads and + // mutating methods, setters on this type may be unexpectedly expensive due to + // the bit-packed representation and should be avoided. As such, only the + // minimal necessary setters are provided. + // + // TODO: It might be worth considering a struct-of-arrays data layout in order + // to move the byte offset to a separate array from the rest as it is only hot + // during lexing, and then cold during parsing and semantic analysis. However, + // a trivial approach to that adds more overhead than it saves due to tracking + // two separate vectors and their growth. Making this profitable would likely + // at least require a highly specialized single vector that manages the growth + // once and then provides separate storage areas for the two arrays. + class TokenInfo { + public: + // The kind for this token. + auto kind() const -> TokenKind { return TokenKind::Make(kind_); } + + // Whether this token is preceded by whitespace. We only store the preceding + // state, and look at the next token to check for trailing whitespace. + auto has_leading_space() const -> bool { return has_leading_space_; } + + // A collection of methods to access the specific payload included with + // particular kinds of tokens. Only the specific payload accessor below may + // be used for an info entry of a token with a particular kind, and these + // check that the kind is valid. Some tokens do not include a payload at all + // and none of these methods may be called. + auto ident_id() const -> IdentifierId { + CARBON_DCHECK(kind() == TokenKind::Identifier); + return IdentifierId(token_payload_); + } + auto set_ident_id(IdentifierId ident_id) -> void { + CARBON_DCHECK(kind() == TokenKind::Identifier); + CARBON_DCHECK(ident_id.index < (2 << PayloadBits)); + token_payload_ = ident_id.index; + } + + auto string_literal_id() const -> StringLiteralValueId { + CARBON_DCHECK(kind() == TokenKind::StringLiteral); + return StringLiteralValueId(token_payload_); + } + + auto int_id() const -> IntId { + CARBON_DCHECK(kind() == TokenKind::IntLiteral || + kind() == TokenKind::IntTypeLiteral || + kind() == TokenKind::UnsignedIntTypeLiteral || + kind() == TokenKind::FloatTypeLiteral); + return IntId(token_payload_); + } + + auto real_id() const -> RealId { + CARBON_DCHECK(kind() == TokenKind::RealLiteral); + return RealId(token_payload_); + } + + auto closing_token_index() const -> TokenIndex { + CARBON_DCHECK(kind().is_opening_symbol()); + return TokenIndex(token_payload_); + } + auto set_closing_token_index(TokenIndex closing_index) -> void { + CARBON_DCHECK(kind().is_opening_symbol()); + CARBON_DCHECK(closing_index.index < (2 << PayloadBits)); + token_payload_ = closing_index.index; + } + + auto opening_token_index() const -> TokenIndex { + CARBON_DCHECK(kind().is_closing_symbol()); + return TokenIndex(token_payload_); + } + auto set_opening_token_index(TokenIndex opening_index) -> void { + CARBON_DCHECK(kind().is_closing_symbol()); + CARBON_DCHECK(opening_index.index < (2 << PayloadBits)); + token_payload_ = opening_index.index; + } + + auto error_length() const -> int { + CARBON_DCHECK(kind() == TokenKind::Error); + return token_payload_; + } + + // Zero-based byte offset of the token within the file. This can be combined + // with the buffer's line information to locate the line and column of the + // token as well. + auto byte_offset() const -> int32_t { return byte_offset_; } + + // Transforms the token into an error token of the given length but at its + // original position and with the same whitespace adjacency. + auto ResetAsError(int error_length) -> void { + // Construct a fresh token to establish any needed invariants and replace + // this token with it. + TokenInfo error(TokenKind::Error, has_leading_space(), error_length, + byte_offset()); + *this = error; + } + + private: + friend class Lexer; + + static constexpr int PayloadBits = 23; + + // Constructor for a TokenKind that carries no payload, or where the payload + // will be set later. + // + // Only used by the lexer which enforces only the correct kinds are used. + // + // When the payload is not being set, we leave it uninitialized. At least in + // some cases, this will allow MSan to correctly detect erroneous attempts + // to access the payload, as it works to track uninitialized memory + // bit-for-bit specifically to handle complex cases like bitfields. + TokenInfo(TokenKind kind, bool has_leading_space, int32_t byte_offset) + : kind_(kind), + has_leading_space_(has_leading_space), + byte_offset_(byte_offset) {} + + // Constructor for a TokenKind that carries a payload. + // + // Only used by the lexer which enforces the correct kind and payload types. + TokenInfo(TokenKind kind, bool has_leading_space, int payload, + int32_t byte_offset) + : kind_(kind), + has_leading_space_(has_leading_space), + token_payload_(payload), + byte_offset_(byte_offset) { + CARBON_DCHECK(payload >= 0 && payload < (2 << PayloadBits)) + << "Payload won't fit into unsigned bit pack: " << payload; + } + + // A bitfield that encodes the token's kind, the leading space flag, and the + // remaining bits in a payload. These are encoded together as a bitfield for + // density and because these are the hottest fields of tokens for consumers + // after lexing. + TokenKind::RawEnumType kind_ : sizeof(TokenKind) * 8; + bool has_leading_space_ : 1; + unsigned token_payload_ : PayloadBits; + + // Separate storage for the byte offset, this is hot while lexing but then + // generally cold. + int32_t byte_offset_; }; + static_assert(sizeof(TokenInfo) == 8, + "Expected `TokenInfo` to pack to an 8-byte structure."); struct LineInfo { explicit LineInfo(int32_t start) : start(start), indent(0) {} @@ -333,6 +461,11 @@ class TokenizedBuffer : public Printable { int expected_parse_tree_size_ = 0; bool has_errors_ = false; + + // A vector of flags for recovery tokens. If empty, there are none. When doing + // token recovery, this will be extended to be indexable by token indices and + // contain true for the tokens that were synthesized for recovery. + llvm::BitVector recovery_tokens_; }; // A diagnostic emitter that uses positions within a source buffer's text as diff --git a/toolchain/lex/tokenized_buffer_test.cpp b/toolchain/lex/tokenized_buffer_test.cpp index 6883595ca3142..41cef671ae893 100644 --- a/toolchain/lex/tokenized_buffer_test.cpp +++ b/toolchain/lex/tokenized_buffer_test.cpp @@ -637,7 +637,7 @@ TEST_F(LexerTest, Whitespace) { auto buffer = Lex("{( } {("); // Whether there should be whitespace before/after each token. - bool space[] = {true, + bool space[] = {false, // start-of-file true, // { @@ -1126,32 +1126,31 @@ TEST_F(LexerTest, PrintingOutputYaml) { Yaml::Value::FromText(print_stream.TakeStr()), IsYaml(ElementsAre(Yaml::Sequence(ElementsAre(Yaml::Mapping(ElementsAre( Pair("filename", source_storage_.front().filename().str()), - Pair("tokens", - Yaml::Sequence(ElementsAre( - Yaml::Mapping(ElementsAre( - Pair("index", "0"), Pair("kind", "FileStart"), - Pair("line", "1"), Pair("column", "1"), - Pair("indent", "1"), Pair("spelling", ""), - Pair("has_trailing_space", "true"))), - Yaml::Mapping( - ElementsAre(Pair("index", "1"), Pair("kind", "Semi"), - Pair("line", "2"), Pair("column", "2"), - Pair("indent", "2"), Pair("spelling", ";"), - Pair("has_trailing_space", "true"))), - Yaml::Mapping( - ElementsAre(Pair("index", "2"), Pair("kind", "Semi"), - Pair("line", "5"), Pair("column", "1"), - Pair("indent", "1"), Pair("spelling", ";"), - Pair("has_trailing_space", "true"))), - Yaml::Mapping( - ElementsAre(Pair("index", "3"), Pair("kind", "Semi"), - Pair("line", "5"), Pair("column", "3"), - Pair("indent", "1"), Pair("spelling", ";"), - Pair("has_trailing_space", "true"))), - Yaml::Mapping(ElementsAre( - Pair("index", "4"), Pair("kind", "FileEnd"), - Pair("line", "15"), Pair("column", "1"), - Pair("indent", "1"), Pair("spelling", ""))))))))))))); + Pair("tokens", Yaml::Sequence(ElementsAre( + Yaml::Mapping(ElementsAre( + Pair("index", "0"), Pair("kind", "FileStart"), + Pair("line", "1"), Pair("column", "1"), + Pair("indent", "1"), Pair("spelling", ""))), + Yaml::Mapping(ElementsAre( + Pair("index", "1"), Pair("kind", "Semi"), + Pair("line", "2"), Pair("column", "2"), + Pair("indent", "2"), Pair("spelling", ";"), + Pair("has_leading_space", "true"))), + Yaml::Mapping(ElementsAre( + Pair("index", "2"), Pair("kind", "Semi"), + Pair("line", "5"), Pair("column", "1"), + Pair("indent", "1"), Pair("spelling", ";"), + Pair("has_leading_space", "true"))), + Yaml::Mapping(ElementsAre( + Pair("index", "3"), Pair("kind", "Semi"), + Pair("line", "5"), Pair("column", "3"), + Pair("indent", "1"), Pair("spelling", ";"), + Pair("has_leading_space", "true"))), + Yaml::Mapping(ElementsAre( + Pair("index", "4"), Pair("kind", "FileEnd"), + Pair("line", "15"), Pair("column", "1"), + Pair("indent", "1"), Pair("spelling", ""), + Pair("has_leading_space", "true"))))))))))))); } } // namespace