Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

std.zig.tokenizer: simplification and spec conformance #20885

Merged
merged 3 commits into from
Aug 1, 2024
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions lib/std/crypto/ml_kem.zig
Original file line number Diff line number Diff line change
@@ -677,10 +677,10 @@ fn montReduce(x: i32) i16 {
// Note gcd(2¹⁶, q) = 1 as q is prime. Write q' := 62209 = q⁻¹ mod R.
// First we compute
//
// m := ((x mod R) q') mod R
// m := ((x mod R) q') mod R
// = x q' mod R
// = int16(x q')
// = int16(int32(x) * int32(q'))
// = int16(x q')
// = int16(int32(x) * int32(q'))
//
// Note that x q' might be as big as 2³² and could overflow the int32
// multiplication in the last line. However for any int32s a and b,
9 changes: 4 additions & 5 deletions lib/std/macho.zig
Original file line number Diff line number Diff line change
@@ -203,8 +203,7 @@ pub const symtab_command = extern struct {
/// local symbols (static and debugging symbols) - grouped by module
/// defined external symbols - grouped by module (sorted by name if not lib)
/// undefined external symbols (sorted by name if MH_BINDATLOAD is not set,
/// and in order the were seen by the static
/// linker if MH_BINDATLOAD is set)
/// and in order the were seen by the static linker if MH_BINDATLOAD is set)
/// In this load command there are offsets and counts to each of the three groups
/// of symbols.
///
@@ -219,9 +218,9 @@ pub const symtab_command = extern struct {
/// shared library. For executable and object modules, which are files
/// containing only one module, the information that would be in these three
/// tables is determined as follows:
/// table of contents - the defined external symbols are sorted by name
/// module table - the file contains only one module so everything in the
/// file is part of the module.
/// table of contents - the defined external symbols are sorted by name
/// module table - the file contains only one module so everything in the file
/// is part of the module.
/// reference symbol table - is the defined and undefined external symbols
///
/// For dynamically linked shared library files this load command also contains
33 changes: 14 additions & 19 deletions lib/std/unicode.zig
Original file line number Diff line number Diff line change
@@ -95,16 +95,13 @@ pub inline fn utf8EncodeComptime(comptime c: u21) [

const Utf8DecodeError = Utf8Decode2Error || Utf8Decode3Error || Utf8Decode4Error;

/// Decodes the UTF-8 codepoint encoded in the given slice of bytes.
/// bytes.len must be equal to utf8ByteSequenceLength(bytes[0]) catch unreachable.
/// If you already know the length at comptime, you can call one of
/// utf8Decode2,utf8Decode3,utf8Decode4 directly instead of this function.
/// Deprecated. This function has an awkward API that is too easy to use incorrectly.
pub fn utf8Decode(bytes: []const u8) Utf8DecodeError!u21 {
return switch (bytes.len) {
1 => @as(u21, bytes[0]),
2 => utf8Decode2(bytes),
3 => utf8Decode3(bytes),
4 => utf8Decode4(bytes),
1 => bytes[0],
2 => utf8Decode2(bytes[0..2].*),
3 => utf8Decode3(bytes[0..3].*),
4 => utf8Decode4(bytes[0..4].*),
else => unreachable,
};
}
@@ -113,8 +110,7 @@ const Utf8Decode2Error = error{
Utf8ExpectedContinuation,
Utf8OverlongEncoding,
};
pub fn utf8Decode2(bytes: []const u8) Utf8Decode2Error!u21 {
assert(bytes.len == 2);
pub fn utf8Decode2(bytes: [2]u8) Utf8Decode2Error!u21 {
assert(bytes[0] & 0b11100000 == 0b11000000);
var value: u21 = bytes[0] & 0b00011111;

@@ -130,7 +126,7 @@ pub fn utf8Decode2(bytes: []const u8) Utf8Decode2Error!u21 {
const Utf8Decode3Error = Utf8Decode3AllowSurrogateHalfError || error{
Utf8EncodesSurrogateHalf,
};
pub fn utf8Decode3(bytes: []const u8) Utf8Decode3Error!u21 {
pub fn utf8Decode3(bytes: [3]u8) Utf8Decode3Error!u21 {
const value = try utf8Decode3AllowSurrogateHalf(bytes);

if (0xd800 <= value and value <= 0xdfff) return error.Utf8EncodesSurrogateHalf;
@@ -142,8 +138,7 @@ const Utf8Decode3AllowSurrogateHalfError = error{
Utf8ExpectedContinuation,
Utf8OverlongEncoding,
};
pub fn utf8Decode3AllowSurrogateHalf(bytes: []const u8) Utf8Decode3AllowSurrogateHalfError!u21 {
assert(bytes.len == 3);
pub fn utf8Decode3AllowSurrogateHalf(bytes: [3]u8) Utf8Decode3AllowSurrogateHalfError!u21 {
assert(bytes[0] & 0b11110000 == 0b11100000);
var value: u21 = bytes[0] & 0b00001111;

@@ -165,8 +160,7 @@ const Utf8Decode4Error = error{
Utf8OverlongEncoding,
Utf8CodepointTooLarge,
};
pub fn utf8Decode4(bytes: []const u8) Utf8Decode4Error!u21 {
assert(bytes.len == 4);
pub fn utf8Decode4(bytes: [4]u8) Utf8Decode4Error!u21 {
assert(bytes[0] & 0b11111000 == 0b11110000);
var value: u21 = bytes[0] & 0b00000111;

@@ -1637,12 +1631,13 @@ pub fn wtf8Encode(c: u21, out: []u8) error{CodepointTooLarge}!u3 {

const Wtf8DecodeError = Utf8Decode2Error || Utf8Decode3AllowSurrogateHalfError || Utf8Decode4Error;

/// Deprecated. This function has an awkward API that is too easy to use incorrectly.
pub fn wtf8Decode(bytes: []const u8) Wtf8DecodeError!u21 {
return switch (bytes.len) {
1 => @as(u21, bytes[0]),
2 => utf8Decode2(bytes),
3 => utf8Decode3AllowSurrogateHalf(bytes),
4 => utf8Decode4(bytes),
1 => bytes[0],
2 => utf8Decode2(bytes[0..2].*),
3 => utf8Decode3AllowSurrogateHalf(bytes[0..3].*),
4 => utf8Decode4(bytes[0..4].*),
else => unreachable,
};
}
2 changes: 1 addition & 1 deletion lib/std/zig/Ast.zig
Original file line number Diff line number Diff line change
@@ -69,7 +69,7 @@ pub fn parse(gpa: Allocator, source: [:0]const u8, mode: Mode) Allocator.Error!A
const token = tokenizer.next();
try tokens.append(gpa, .{
.tag = token.tag,
.start = @as(u32, @intCast(token.loc.start)),
.start = @intCast(token.loc.start),
});
if (token.tag == .eof) break;
}
15 changes: 3 additions & 12 deletions lib/std/zig/AstGen.zig
Original file line number Diff line number Diff line change
@@ -11351,6 +11351,9 @@ fn failWithStrLitError(astgen: *AstGen, err: std.zig.string_literal.Error, token
.{raw_string[bad_index]},
);
},
.empty_char_literal => {
return astgen.failOff(token, offset, "empty character literal", .{});
},
}
}

@@ -13820,21 +13823,9 @@ fn lowerAstErrors(astgen: *AstGen) !void {
var msg: std.ArrayListUnmanaged(u8) = .{};
defer msg.deinit(gpa);

const token_starts = tree.tokens.items(.start);
const token_tags = tree.tokens.items(.tag);

var notes: std.ArrayListUnmanaged(u32) = .{};
defer notes.deinit(gpa);

const tok = parse_err.token + @intFromBool(parse_err.token_is_prev);
if (token_tags[tok] == .invalid) {
const bad_off: u32 = @intCast(tree.tokenSlice(tok).len);
const byte_abs = token_starts[tok] + bad_off;
try notes.append(gpa, try astgen.errNoteTokOff(tok, bad_off, "invalid byte: '{'}'", .{
std.zig.fmtEscapes(tree.source[byte_abs..][0..1]),
}));
}

for (tree.errors[1..]) |note| {
if (!note.is_note) break;

1 change: 0 additions & 1 deletion lib/std/zig/parser_test.zig
Original file line number Diff line number Diff line change
@@ -6061,7 +6061,6 @@ test "recovery: invalid container members" {
, &[_]Error{
.expected_expr,
.expected_comma_after_field,
.expected_type_expr,
.expected_semi_after_stmt,
});
}
24 changes: 19 additions & 5 deletions lib/std/zig/string_literal.zig
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
const std = @import("../std.zig");
const assert = std.debug.assert;
const utf8Decode = std.unicode.utf8Decode;
const utf8Encode = std.unicode.utf8Encode;

pub const ParseError = error{
@@ -37,12 +36,16 @@ pub const Error = union(enum) {
expected_single_quote: usize,
/// The character at this index cannot be represented without an escape sequence.
invalid_character: usize,
/// `''`. Not returned for string literals.
empty_char_literal,
};

/// Only validates escape sequence characters.
/// Slice must be valid utf8 starting and ending with "'" and exactly one codepoint in between.
/// Asserts the slice starts and ends with single-quotes.
/// Returns an error if there is not exactly one UTF-8 codepoint in between.
pub fn parseCharLiteral(slice: []const u8) ParsedCharLiteral {
assert(slice.len >= 3 and slice[0] == '\'' and slice[slice.len - 1] == '\'');
if (slice.len < 3) return .{ .failure = .empty_char_literal };
assert(slice[0] == '\'');
assert(slice[slice.len - 1] == '\'');

switch (slice[1]) {
'\\' => {
@@ -55,7 +58,18 @@ pub fn parseCharLiteral(slice: []const u8) ParsedCharLiteral {
},
0 => return .{ .failure = .{ .invalid_character = 1 } },
else => {
const codepoint = utf8Decode(slice[1 .. slice.len - 1]) catch unreachable;
const inner = slice[1 .. slice.len - 1];
const n = std.unicode.utf8ByteSequenceLength(inner[0]) catch return .{
.failure = .{ .invalid_unicode_codepoint = 1 },
};
if (inner.len > n) return .{ .failure = .{ .expected_single_quote = 1 + n } };
const codepoint = switch (n) {
1 => inner[0],
2 => std.unicode.utf8Decode2(inner[0..2].*),
3 => std.unicode.utf8Decode3(inner[0..3].*),
4 => std.unicode.utf8Decode4(inner[0..4].*),
else => unreachable,
} catch return .{ .failure = .{ .invalid_unicode_codepoint = 1 } };
return .{ .success = codepoint };
},
}
92 changes: 46 additions & 46 deletions lib/std/zig/system/darwin/macos.zig
Original file line number Diff line number Diff line change
@@ -303,16 +303,16 @@ test "detect" {
\\<!DOCTYPE plist PUBLIC "-//Apple Computer//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
\\<plist version="1.0">
\\<dict>
\\ <key>ProductBuildVersion</key>
\\ <string>7W98</string>
\\ <key>ProductCopyright</key>
\\ <string>Apple Computer, Inc. 1983-2004</string>
\\ <key>ProductName</key>
\\ <string>Mac OS X</string>
\\ <key>ProductUserVisibleVersion</key>
\\ <string>10.3.9</string>
\\ <key>ProductVersion</key>
\\ <string>10.3.9</string>
\\ <key>ProductBuildVersion</key>
\\ <string>7W98</string>
\\ <key>ProductCopyright</key>
\\ <string>Apple Computer, Inc. 1983-2004</string>
\\ <key>ProductName</key>
\\ <string>Mac OS X</string>
\\ <key>ProductUserVisibleVersion</key>
\\ <string>10.3.9</string>
\\ <key>ProductVersion</key>
\\ <string>10.3.9</string>
\\</dict>
\\</plist>
,
@@ -323,18 +323,18 @@ test "detect" {
\\<!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
\\<plist version="1.0">
\\<dict>
\\ <key>ProductBuildVersion</key>
\\ <string>19G68</string>
\\ <key>ProductCopyright</key>
\\ <string>1983-2020 Apple Inc.</string>
\\ <key>ProductName</key>
\\ <string>Mac OS X</string>
\\ <key>ProductUserVisibleVersion</key>
\\ <string>10.15.6</string>
\\ <key>ProductVersion</key>
\\ <string>10.15.6</string>
\\ <key>iOSSupportVersion</key>
\\ <string>13.6</string>
\\ <key>ProductBuildVersion</key>
\\ <string>19G68</string>
\\ <key>ProductCopyright</key>
\\ <string>1983-2020 Apple Inc.</string>
\\ <key>ProductName</key>
\\ <string>Mac OS X</string>
\\ <key>ProductUserVisibleVersion</key>
\\ <string>10.15.6</string>
\\ <key>ProductVersion</key>
\\ <string>10.15.6</string>
\\ <key>iOSSupportVersion</key>
\\ <string>13.6</string>
\\</dict>
\\</plist>
,
@@ -345,18 +345,18 @@ test "detect" {
\\<!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
\\<plist version="1.0">
\\<dict>
\\ <key>ProductBuildVersion</key>
\\ <string>20A2408</string>
\\ <key>ProductCopyright</key>
\\ <string>1983-2020 Apple Inc.</string>
\\ <key>ProductName</key>
\\ <string>macOS</string>
\\ <key>ProductUserVisibleVersion</key>
\\ <string>11.0</string>
\\ <key>ProductVersion</key>
\\ <string>11.0</string>
\\ <key>iOSSupportVersion</key>
\\ <string>14.2</string>
\\ <key>ProductBuildVersion</key>
\\ <string>20A2408</string>
\\ <key>ProductCopyright</key>
\\ <string>1983-2020 Apple Inc.</string>
\\ <key>ProductName</key>
\\ <string>macOS</string>
\\ <key>ProductUserVisibleVersion</key>
\\ <string>11.0</string>
\\ <key>ProductVersion</key>
\\ <string>11.0</string>
\\ <key>iOSSupportVersion</key>
\\ <string>14.2</string>
\\</dict>
\\</plist>
,
@@ -367,18 +367,18 @@ test "detect" {
\\<!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
\\<plist version="1.0">
\\<dict>
\\ <key>ProductBuildVersion</key>
\\ <string>20C63</string>
\\ <key>ProductCopyright</key>
\\ <string>1983-2020 Apple Inc.</string>
\\ <key>ProductName</key>
\\ <string>macOS</string>
\\ <key>ProductUserVisibleVersion</key>
\\ <string>11.1</string>
\\ <key>ProductVersion</key>
\\ <string>11.1</string>
\\ <key>iOSSupportVersion</key>
\\ <string>14.3</string>
\\ <key>ProductBuildVersion</key>
\\ <string>20C63</string>
\\ <key>ProductCopyright</key>
\\ <string>1983-2020 Apple Inc.</string>
\\ <key>ProductName</key>
\\ <string>macOS</string>
\\ <key>ProductUserVisibleVersion</key>
\\ <string>11.1</string>
\\ <key>ProductVersion</key>
\\ <string>11.1</string>
\\ <key>iOSSupportVersion</key>
\\ <string>14.3</string>
\\</dict>
\\</plist>
,
Loading