Skip to content

Commit

Permalink
Merge pull request #19174 from squeek502/lazy-resinator
Browse files Browse the repository at this point in the history
Lazily compile the `zig rc` subcommand and use it during `zig build-exe`, etc
  • Loading branch information
andrewrk authored Mar 12, 2024
2 parents f60c24c + 3f92cbe commit 0c61466
Show file tree
Hide file tree
Showing 25 changed files with 1,797 additions and 1,091 deletions.
File renamed without changes.
File renamed without changes.
File renamed without changes.
142 changes: 105 additions & 37 deletions src/resinator/cli.zig → lib/compiler/resinator/cli.zig

Large diffs are not rendered by default.

Original file line number Diff line number Diff line change
Expand Up @@ -279,6 +279,9 @@ pub const CodePage = enum(u16) {
pub const Utf8 = struct {
/// Implements decoding with rejection of ill-formed UTF-8 sequences based on section
/// D92 of Chapter 3 of the Unicode standard (Table 3-7 specifically).
///
/// Note: This does not match "U+FFFD Substitution of Maximal Subparts", but instead
/// matches the behavior of the Windows RC compiler.
pub const WellFormedDecoder = struct {
/// Like std.unicode.utf8ByteSequenceLength, but:
/// - Rejects non-well-formed first bytes, i.e. C0-C1, F5-FF
Expand Down Expand Up @@ -347,9 +350,6 @@ pub const Utf8 = struct {
// Only include the byte in the invalid sequence if it's in the range
// of a continuation byte. All other values should not be included in the
// invalid sequence.
//
// Note: This is how the Windows RC compiler handles this, this may not
// be the correct-as-according-to-the-Unicode-standard way to do it.
if (isContinuationByte(byte)) len += 1;
return .{ .value = Codepoint.invalid, .byte_len = len };
}
Expand Down Expand Up @@ -437,6 +437,19 @@ test "codepointAt invalid utf8" {
}, CodePage.utf8.codepointAt(1, invalid_utf8).?);
try std.testing.expectEqual(@as(?Codepoint, null), CodePage.windows1252.codepointAt(2, invalid_utf8));
}

{
// encoded high surrogate
const invalid_utf8 = "\xED\xA0\xBD";
try std.testing.expectEqual(Codepoint{
.value = Codepoint.invalid,
.byte_len = 2,
}, CodePage.utf8.codepointAt(0, invalid_utf8).?);
try std.testing.expectEqual(Codepoint{
.value = Codepoint.invalid,
.byte_len = 1,
}, CodePage.utf8.codepointAt(2, invalid_utf8).?);
}
}

test "codepointAt utf8 encoded" {
Expand Down
48 changes: 33 additions & 15 deletions src/resinator/comments.zig → lib/compiler/resinator/comments.zig
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ const formsLineEndingPair = @import("source_mapping.zig").formsLineEndingPair;

/// `buf` must be at least as long as `source`
/// In-place transformation is supported (i.e. `source` and `buf` can be the same slice)
pub fn removeComments(source: []const u8, buf: []u8, source_mappings: ?*SourceMappings) []u8 {
pub fn removeComments(source: []const u8, buf: []u8, source_mappings: ?*SourceMappings) ![]u8 {
std.debug.assert(buf.len >= source.len);
var result = UncheckedSliceWriter{ .slice = buf };
const State = enum {
Expand Down Expand Up @@ -85,7 +85,7 @@ pub fn removeComments(source: []const u8, buf: []u8, source_mappings: ?*SourceMa
else => {},
},
.multiline_comment => switch (c) {
'\r' => handleMultilineCarriageReturn(source, &line_handler, index, &result, source_mappings),
'\r' => try handleMultilineCarriageReturn(source, &line_handler, index, &result, source_mappings),
'\n' => {
_ = line_handler.incrementLineNumber(index);
result.write(c);
Expand All @@ -95,7 +95,7 @@ pub fn removeComments(source: []const u8, buf: []u8, source_mappings: ?*SourceMa
},
.multiline_comment_end => switch (c) {
'\r' => {
handleMultilineCarriageReturn(source, &line_handler, index, &result, source_mappings);
try handleMultilineCarriageReturn(source, &line_handler, index, &result, source_mappings);
// We only want to treat this as a newline if it's part of a CRLF pair. If it's
// not, then we still want to stay in .multiline_comment_end, so that e.g. `*<\r>/` still
// functions as a `*/` comment ending. Kinda crazy, but that's how the Win32 implementation works.
Expand Down Expand Up @@ -184,13 +184,21 @@ inline fn handleMultilineCarriageReturn(
index: usize,
result: *UncheckedSliceWriter,
source_mappings: ?*SourceMappings,
) void {
) !void {
// This is a dumb way to go about this, but basically we want to determine
// if this is part of a distinct CRLF or LFCR pair. This function call will detect
// LFCR pairs correctly since the function we're in will only be called on CR,
// but will not detect CRLF pairs since it only looks at the line ending before the
// CR. So, we do a second (forward) check if the first fails to detect CRLF that is
// not part of another pair.
const is_lfcr_pair = line_handler.currentIndexFormsLineEndingPair(index);
const is_crlf_pair = !is_lfcr_pair and formsLineEndingPair(source, '\r', index + 1);
// Note: Bare \r within a multiline comment should *not* be treated as a line ending for the
// purposes of removing comments, but *should* be treated as a line ending for the
// purposes of line counting/source mapping
_ = line_handler.incrementLineNumber(index);
// So only write the \r if it's part of a CRLF pair
if (formsLineEndingPair(source, '\r', index + 1)) {
// So only write the \r if it's part of a CRLF/LFCR pair
if (is_lfcr_pair or is_crlf_pair) {
result.write('\r');
}
// And otherwise, we want to collapse the source mapping so that we can still know which
Expand All @@ -200,15 +208,15 @@ inline fn handleMultilineCarriageReturn(
// the next collapse acts on the first of the collapsed line numbers
line_handler.line_number -= 1;
if (source_mappings) |mappings| {
mappings.collapse(line_handler.line_number, 1);
try mappings.collapse(line_handler.line_number, 1);
}
}
}

pub fn removeCommentsAlloc(allocator: Allocator, source: []const u8, source_mappings: ?*SourceMappings) ![]u8 {
const buf = try allocator.alloc(u8, source.len);
errdefer allocator.free(buf);
const result = removeComments(source, buf, source_mappings);
const result = try removeComments(source, buf, source_mappings);
return allocator.realloc(buf, result.len);
}

Expand Down Expand Up @@ -252,6 +260,16 @@ test "line comments retain newlines" {
try testRemoveComments("\r\n", "//comment\r\n");
}

test "unfinished multiline comment" {
try testRemoveComments(
\\unfinished
\\
,
\\unfinished/*
\\
);
}

test "crazy" {
try testRemoveComments(
\\blah"/*som*/\""BLAH
Expand Down Expand Up @@ -321,20 +339,20 @@ test "remove comments with mappings" {
var mut_source = "blah/*\rcommented line*\r/blah".*;
var mappings = SourceMappings{};
_ = try mappings.files.put(allocator, "test.rc");
try mappings.set(allocator, 1, .{ .start_line = 1, .end_line = 1, .filename_offset = 0 });
try mappings.set(allocator, 2, .{ .start_line = 2, .end_line = 2, .filename_offset = 0 });
try mappings.set(allocator, 3, .{ .start_line = 3, .end_line = 3, .filename_offset = 0 });
try mappings.set(1, 1, 0);
try mappings.set(2, 2, 0);
try mappings.set(3, 3, 0);
defer mappings.deinit(allocator);

const result = removeComments(&mut_source, &mut_source, &mappings);
const result = try removeComments(&mut_source, &mut_source, &mappings);

try std.testing.expectEqualStrings("blahblah", result);
try std.testing.expectEqual(@as(usize, 1), mappings.mapping.items.len);
try std.testing.expectEqual(@as(usize, 3), mappings.mapping.items[0].end_line);
try std.testing.expectEqual(@as(usize, 1), mappings.end_line);
try std.testing.expectEqual(@as(usize, 3), mappings.getCorrespondingSpan(1).?.end_line);
}

test "in place" {
var mut_source = "blah /* comment */ blah".*;
const result = removeComments(&mut_source, &mut_source, null);
const result = try removeComments(&mut_source, &mut_source, null);
try std.testing.expectEqualStrings("blah blah", result);
}
107 changes: 78 additions & 29 deletions src/resinator/compile.zig → lib/compiler/resinator/compile.zig
Original file line number Diff line number Diff line change
Expand Up @@ -321,10 +321,7 @@ pub const Compiler = struct {

return buf.toOwnedSlice();
},
else => {
std.debug.print("unexpected filename token type: {}\n", .{literal_node.token});
unreachable; // no other token types should be in a filename literal node
},
else => unreachable, // no other token types should be in a filename literal node
}
},
.binary_expression => {
Expand Down Expand Up @@ -404,6 +401,72 @@ pub const Compiler = struct {
return first_error orelse error.FileNotFound;
}

pub fn parseDlgIncludeString(self: *Compiler, token: Token) ![]u8 {
// For the purposes of parsing, we want to strip the L prefix
// if it exists since we want escaped integers to be limited to
// their ascii string range.
//
// We keep track of whether or not there was an L prefix, though,
// since there's more weirdness to come.
var bytes = self.sourceBytesForToken(token);
var was_wide_string = false;
if (bytes.slice[0] == 'L' or bytes.slice[0] == 'l') {
was_wide_string = true;
bytes.slice = bytes.slice[1..];
}

var buf = try std.ArrayList(u8).initCapacity(self.allocator, bytes.slice.len);
errdefer buf.deinit();

var iterative_parser = literals.IterativeStringParser.init(bytes, .{
.start_column = token.calculateColumn(self.source, 8, null),
.diagnostics = .{ .diagnostics = self.diagnostics, .token = token },
});

// No real idea what's going on here, but this matches the rc.exe behavior
while (try iterative_parser.next()) |parsed| {
const c = parsed.codepoint;
switch (was_wide_string) {
true => {
switch (c) {
0...0x7F, 0xA0...0xFF => try buf.append(@intCast(c)),
0x80...0x9F => {
if (windows1252.bestFitFromCodepoint(c)) |_| {
try buf.append(@intCast(c));
} else {
try buf.append('?');
}
},
else => {
if (windows1252.bestFitFromCodepoint(c)) |best_fit| {
try buf.append(best_fit);
} else if (c < 0x10000 or c == code_pages.Codepoint.invalid) {
try buf.append('?');
} else {
try buf.appendSlice("??");
}
},
}
},
false => {
if (parsed.from_escaped_integer) {
try buf.append(@truncate(c));
} else {
if (windows1252.bestFitFromCodepoint(c)) |best_fit| {
try buf.append(best_fit);
} else if (c < 0x10000 or c == code_pages.Codepoint.invalid) {
try buf.append('?');
} else {
try buf.appendSlice("??");
}
}
},
}
}

return buf.toOwnedSlice();
}

pub fn writeResourceExternal(self: *Compiler, node: *Node.ResourceExternal, writer: anytype) !void {
// Init header with data size zero for now, will need to fill it in later
var header = try self.resourceHeader(node.id, node.type, .{});
Expand All @@ -414,13 +477,16 @@ pub const Compiler = struct {
// DLGINCLUDE has special handling that doesn't actually need the file to exist
if (maybe_predefined_type != null and maybe_predefined_type.? == .DLGINCLUDE) {
const filename_token = node.filename.cast(.literal).?.token;
const parsed_filename = try self.parseQuotedStringAsAsciiString(filename_token);
const parsed_filename = try self.parseDlgIncludeString(filename_token);
defer self.allocator.free(parsed_filename);

// NUL within the parsed string acts as a terminator
const parsed_filename_terminated = std.mem.sliceTo(parsed_filename, 0);

header.applyMemoryFlags(node.common_resource_attributes, self.source);
header.data_size = @intCast(parsed_filename.len + 1);
header.data_size = @intCast(parsed_filename_terminated.len + 1);
try header.write(writer, .{ .diagnostics = self.diagnostics, .token = node.id });
try writer.writeAll(parsed_filename);
try writer.writeAll(parsed_filename_terminated);
try writer.writeByte(0);
try writeDataPadding(writer, header.data_size);
return;
Expand Down Expand Up @@ -1141,21 +1207,15 @@ pub const Compiler = struct {
errdefer self.allocator.free(parsed_string);
return .{ .wide_string = parsed_string };
},
else => {
std.debug.print("unexpected token in literal node: {}\n", .{literal_node.token});
unreachable; // no other token types should be in a data literal node
},
else => unreachable, // no other token types should be in a data literal node
}
},
.binary_expression, .grouped_expression => {
const result = evaluateNumberExpression(expression_node, self.source, self.input_code_pages);
return .{ .number = result };
},
.not_expression => unreachable,
else => {
std.debug.print("{}\n", .{expression_node.id});
@panic("TODO: evaluateDataExpression");
},
else => unreachable,
}
}

Expand Down Expand Up @@ -1669,6 +1729,7 @@ pub const Compiler = struct {
};
}

// We know the data_buffer len is limited to u32 max.
const data_size: u32 = @intCast(data_buffer.items.len);
var header = try self.resourceHeader(node.id, node.type, .{
.data_size = data_size,
Expand Down Expand Up @@ -1966,6 +2027,7 @@ pub const Compiler = struct {
try data_writer.writeInt(u16, 1, .little);
try data_writer.writeInt(u16, button_width.asWord(), .little);
try data_writer.writeInt(u16, button_height.asWord(), .little);
// Number of buttons is guaranteed by the parser to be within maxInt(u16).
try data_writer.writeInt(u16, @as(u16, @intCast(node.buttons.len)), .little);

for (node.buttons) |button_or_sep| {
Expand Down Expand Up @@ -2806,19 +2868,6 @@ pub const Compiler = struct {
);
}

/// Helper that calls parseQuotedStringAsAsciiString with the relevant context
/// Resulting slice is allocated by `self.allocator`.
pub fn parseQuotedStringAsAsciiString(self: *Compiler, token: Token) ![]u8 {
return literals.parseQuotedStringAsAsciiString(
self.allocator,
self.sourceBytesForToken(token),
.{
.start_column = token.calculateColumn(self.source, 8, null),
.diagnostics = .{ .diagnostics = self.diagnostics, .token = token },
},
);
}

fn addErrorDetails(self: *Compiler, details: ErrorDetails) Allocator.Error!void {
try self.diagnostics.append(details);
}
Expand Down Expand Up @@ -3356,7 +3405,7 @@ test "StringTable" {
}
break :ids buf;
};
var prng = std.Random.DefaultPrng.init(0);
var prng = std.rand.DefaultPrng.init(0);
var random = prng.random();
random.shuffle(u16, &ids);

Expand Down
Loading

0 comments on commit 0c61466

Please sign in to comment.