From 9272d16db0f3a44c002e5f595b1d59380f75bdfb Mon Sep 17 00:00:00 2001 From: Ian Johnson Date: Wed, 20 Nov 2024 00:47:00 -0500 Subject: [PATCH] zig fetch: support SHA-256 Git repositories Closes #21888 WIP: needs more testing --- src/Package/Fetch.zig | 29 +- src/Package/Fetch/git.zig | 306 +++++++++++++----- .../{testrepo.idx => testrepo-sha1.idx} | Bin .../{testrepo.pack => testrepo-sha1.pack} | Bin .../Fetch/git/testdata/testrepo-sha256.idx | Bin 0 -> 4536 bytes .../Fetch/git/testdata/testrepo-sha256.pack | Bin 0 -> 8429 bytes src/main.zig | 4 +- 7 files changed, 242 insertions(+), 97 deletions(-) rename src/Package/Fetch/git/testdata/{testrepo.idx => testrepo-sha1.idx} (100%) rename src/Package/Fetch/git/testdata/{testrepo.pack => testrepo-sha1.pack} (100%) create mode 100644 src/Package/Fetch/git/testdata/testrepo-sha256.idx create mode 100644 src/Package/Fetch/git/testdata/testrepo-sha256.pack diff --git a/src/Package/Fetch.zig b/src/Package/Fetch.zig index 95aa41a44434..d2c0214f5efa 100644 --- a/src/Package/Fetch.zig +++ b/src/Package/Fetch.zig @@ -814,7 +814,7 @@ const Resource = union(enum) { const Git = struct { session: git.Session, fetch_stream: git.Session.FetchStream, - want_oid: [git.oid_length]u8, + want_oid: git.Oid, }; fn deinit(resource: *Resource) void { @@ -976,7 +976,7 @@ fn initResource(f: *Fetch, uri: std.Uri, server_header_buffer: []u8) RunError!Re const want_oid = want_oid: { const want_ref = if (uri.fragment) |fragment| try fragment.toRawMaybeAlloc(arena) else "HEAD"; - if (git.parseOid(want_ref)) |oid| break :want_oid oid else |_| {} + if (git.Oid.parseAny(want_ref)) |oid| break :want_oid oid else |_| {} const want_ref_head = try std.fmt.allocPrint(arena, "refs/heads/{s}", .{want_ref}); const want_ref_tag = try std.fmt.allocPrint(arena, "refs/tags/{s}", .{want_ref}); @@ -1018,17 +1018,13 @@ fn initResource(f: *Fetch, uri: std.Uri, server_header_buffer: []u8) RunError!Re }); const notes_start = try eb.reserveNotes(notes_len); eb.extra.items[notes_start] = @intFromEnum(try eb.addErrorMessage(.{ - .msg = try eb.printString("try .url = \"{;+/}#{}\",", .{ - uri, std.fmt.fmtSliceHexLower(&want_oid), - }), + .msg = try eb.printString("try .url = \"{;+/}#{}\",", .{ uri, want_oid }), })); return error.FetchFailed; } - var want_oid_buf: [git.fmt_oid_length]u8 = undefined; - _ = std.fmt.bufPrint(&want_oid_buf, "{}", .{ - std.fmt.fmtSliceHexLower(&want_oid), - }) catch unreachable; + var want_oid_buf: [git.Oid.max_formatted_length]u8 = undefined; + _ = std.fmt.bufPrint(&want_oid_buf, "{}", .{want_oid}) catch unreachable; var fetch_stream = session.fetch(&.{&want_oid_buf}, server_header_buffer) catch |err| { return f.fail(f.location_tok, try eb.printString( "unable to create fetch stream: {s}", @@ -1163,7 +1159,7 @@ fn unpackResource( }); return try unpackTarball(f, tmp_directory.handle, dcp.reader()); }, - .git_pack => return unpackGitPack(f, tmp_directory.handle, resource) catch |err| switch (err) { + .git_pack => return unpackGitPack(f, tmp_directory.handle, &resource.git) catch |err| switch (err) { error.FetchFailed => return error.FetchFailed, error.OutOfMemory => return error.OutOfMemory, else => |e| return f.fail(f.location_tok, try eb.printString( @@ -1298,11 +1294,10 @@ fn unzip(f: *Fetch, out_dir: fs.Dir, reader: anytype) RunError!UnpackResult { return res; } -fn unpackGitPack(f: *Fetch, out_dir: fs.Dir, resource: *Resource) anyerror!UnpackResult { +fn unpackGitPack(f: *Fetch, out_dir: fs.Dir, resource: *Resource.Git) anyerror!UnpackResult { const arena = f.arena.allocator(); const gpa = f.arena.child_allocator; - const want_oid = resource.git.want_oid; - const reader = resource.git.fetch_stream.reader(); + const object_format: git.Oid.Format = resource.want_oid; var res: UnpackResult = .{}; // The .git directory is used to store the packfile and associated index, but @@ -1314,7 +1309,7 @@ fn unpackGitPack(f: *Fetch, out_dir: fs.Dir, resource: *Resource) anyerror!Unpac var pack_file = try pack_dir.createFile("pkg.pack", .{ .read = true }); defer pack_file.close(); var fifo = std.fifo.LinearFifo(u8, .{ .Static = 4096 }).init(); - try fifo.pump(reader, pack_file.writer()); + try fifo.pump(resource.fetch_stream.reader(), pack_file.writer()); try pack_file.sync(); var index_file = try pack_dir.createFile("pkg.idx", .{ .read = true }); @@ -1323,7 +1318,7 @@ fn unpackGitPack(f: *Fetch, out_dir: fs.Dir, resource: *Resource) anyerror!Unpac const index_prog_node = f.prog_node.start("Index pack", 0); defer index_prog_node.end(); var index_buffered_writer = std.io.bufferedWriter(index_file.writer()); - try git.indexPack(gpa, pack_file, index_buffered_writer.writer()); + try git.indexPack(gpa, object_format, pack_file, index_buffered_writer.writer()); try index_buffered_writer.flush(); try index_file.sync(); } @@ -1331,10 +1326,10 @@ fn unpackGitPack(f: *Fetch, out_dir: fs.Dir, resource: *Resource) anyerror!Unpac { const checkout_prog_node = f.prog_node.start("Checkout", 0); defer checkout_prog_node.end(); - var repository = try git.Repository.init(gpa, pack_file, index_file); + var repository = try git.Repository.init(gpa, object_format, pack_file, index_file); defer repository.deinit(); var diagnostics: git.Diagnostics = .{ .allocator = arena }; - try repository.checkout(out_dir, want_oid, &diagnostics); + try repository.checkout(out_dir, resource.want_oid, &diagnostics); if (diagnostics.errors.items.len > 0) { try res.allocErrors(arena, diagnostics.errors.items.len, "unable to unpack packfile"); diff --git a/src/Package/Fetch/git.zig b/src/Package/Fetch/git.zig index b4ca3a98761b..d1ba3446c777 100644 --- a/src/Package/Fetch/git.zig +++ b/src/Package/Fetch/git.zig @@ -9,32 +9,132 @@ const mem = std.mem; const testing = std.testing; const Allocator = mem.Allocator; const Sha1 = std.crypto.hash.Sha1; +const Sha256 = std.crypto.hash.sha2.Sha256; const assert = std.debug.assert; -pub const oid_length = Sha1.digest_length; -pub const fmt_oid_length = 2 * oid_length; /// The ID of a Git object (an SHA-1 hash). -pub const Oid = [oid_length]u8; +pub const Oid = union(Format) { + sha1: [Sha1.digest_length]u8, + sha256: [Sha256.digest_length]u8, + + pub const max_formatted_length = len: { + var max: usize = 0; + for (std.enums.values(Format)) |f| { + max = @max(max, f.formattedLength()); + } + break :len max; + }; + + pub const Format = enum { + sha1, + sha256, + + pub fn byteLength(f: Format) usize { + return switch (f) { + .sha1 => Sha1.digest_length, + .sha256 => Sha256.digest_length, + }; + } -pub fn parseOid(s: []const u8) !Oid { - if (s.len != fmt_oid_length) return error.InvalidOid; - var oid: Oid = undefined; - for (&oid, 0..) |*b, i| { - b.* = std.fmt.parseUnsigned(u8, s[2 * i ..][0..2], 16) catch return error.InvalidOid; + pub fn formattedLength(f: Format) usize { + return 2 * f.byteLength(); + } + }; + + const Hasher = union(Format) { + sha1: Sha1, + sha256: Sha256, + + fn init(oid_format: Format) Hasher { + return switch (oid_format) { + .sha1 => .{ .sha1 = Sha1.init(.{}) }, + .sha256 => .{ .sha256 = Sha256.init(.{}) }, + }; + } + + pub fn update(hasher: *Hasher, b: []const u8) void { + switch (hasher.*) { + inline else => |*inner| inner.update(b), + } + } + + fn finalResult(hasher: *Hasher) Oid { + return switch (hasher.*) { + inline else => |*inner, tag| @unionInit(Oid, @tagName(tag), inner.finalResult()), + }; + } + }; + + pub fn fromBytes(oid_format: Format, bytes: []const u8) Oid { + assert(bytes.len == oid_format.byteLength()); + return switch (oid_format) { + inline else => |tag| @unionInit(Oid, @tagName(tag), bytes[0..comptime tag.byteLength()].*), + }; } - return oid; -} -test parseOid { - try testing.expectEqualSlices( - u8, - &.{ 0xCE, 0x91, 0x9C, 0xCF, 0x45, 0x95, 0x18, 0x56, 0xA7, 0x62, 0xFF, 0xDB, 0x8E, 0xF8, 0x50, 0x30, 0x1C, 0xD8, 0xC5, 0x88 }, - &try parseOid("ce919ccf45951856a762ffdb8ef850301cd8c588"), - ); - try testing.expectError(error.InvalidOid, parseOid("ce919ccf")); - try testing.expectError(error.InvalidOid, parseOid("master")); - try testing.expectError(error.InvalidOid, parseOid("HEAD")); -} + pub fn readBytes(oid_format: Format, reader: anytype) @TypeOf(reader).NoEofError!Oid { + return switch (oid_format) { + inline else => |tag| @unionInit(Oid, @tagName(tag), try reader.readBytesNoEof(tag.byteLength())), + }; + } + + pub fn parse(oid_format: Format, s: []const u8) error{InvalidOid}!Oid { + switch (oid_format) { + inline else => |tag| { + if (s.len != tag.formattedLength()) return error.InvalidOid; + var bytes: [tag.byteLength()]u8 = undefined; + for (&bytes, 0..) |*b, i| { + b.* = std.fmt.parseUnsigned(u8, s[2 * i ..][0..2], 16) catch return error.InvalidOid; + } + return @unionInit(Oid, @tagName(tag), bytes); + }, + } + } + + test parse { + try testing.expectEqualSlices( + u8, + &.{ 0xCE, 0x91, 0x9C, 0xCF, 0x45, 0x95, 0x18, 0x56, 0xA7, 0x62, 0xFF, 0xDB, 0x8E, 0xF8, 0x50, 0x30, 0x1C, 0xD8, 0xC5, 0x88 }, + &(try parse(.sha1, "ce919ccf45951856a762ffdb8ef850301cd8c588")).sha1, + ); + try testing.expectError(error.InvalidOid, parse(.sha256, "ce919ccf45951856a762ffdb8ef850301cd8c588")); + try testing.expectError(error.InvalidOid, parse(.sha1, "7f444a92bd4572ee4a28b2c63059924a9ca1829138553ef3e7c41ee159afae7a")); + try testing.expectEqualSlices( + u8, + &.{ 0x7F, 0x44, 0x4A, 0x92, 0xBD, 0x45, 0x72, 0xEE, 0x4A, 0x28, 0xB2, 0xC6, 0x30, 0x59, 0x92, 0x4A, 0x9C, 0xA1, 0x82, 0x91, 0x38, 0x55, 0x3E, 0xF3, 0xE7, 0xC4, 0x1E, 0xE1, 0x59, 0xAF, 0xAE, 0x7A }, + &(try parse(.sha256, "7f444a92bd4572ee4a28b2c63059924a9ca1829138553ef3e7c41ee159afae7a")).sha256, + ); + try testing.expectError(error.InvalidOid, parse(.sha1, "ce919ccf")); + try testing.expectError(error.InvalidOid, parse(.sha256, "ce919ccf")); + try testing.expectError(error.InvalidOid, parse(.sha1, "master")); + try testing.expectError(error.InvalidOid, parse(.sha256, "master")); + try testing.expectError(error.InvalidOid, parse(.sha1, "HEAD")); + try testing.expectError(error.InvalidOid, parse(.sha256, "HEAD")); + } + + pub fn parseAny(s: []const u8) error{InvalidOid}!Oid { + return for (std.enums.values(Format)) |f| { + if (s.len == f.formattedLength()) break parse(f, s); + } else error.InvalidOid; + } + + pub fn format( + oid: Oid, + comptime fmt: []const u8, + options: std.fmt.FormatOptions, + writer: anytype, + ) @TypeOf(writer).Error!void { + _ = fmt; + _ = options; + try writer.print("{}", .{std.fmt.fmtSliceHexLower(oid.slice())}); + } + + pub fn slice(oid: *const Oid) []const u8 { + return switch (oid.*) { + inline else => |*bytes| bytes, + }; + } +}; pub const Diagnostics = struct { allocator: Allocator, @@ -72,8 +172,8 @@ pub const Diagnostics = struct { pub const Repository = struct { odb: Odb, - pub fn init(allocator: Allocator, pack_file: std.fs.File, index_file: std.fs.File) !Repository { - return .{ .odb = try Odb.init(allocator, pack_file, index_file) }; + pub fn init(allocator: Allocator, format: Oid.Format, pack_file: std.fs.File, index_file: std.fs.File) !Repository { + return .{ .odb = try Odb.init(allocator, format, pack_file, index_file) }; } pub fn deinit(repository: *Repository) void { @@ -92,7 +192,7 @@ pub const Repository = struct { const tree_oid = tree_oid: { const commit_object = try repository.odb.readObject(); if (commit_object.type != .commit) return error.NotACommit; - break :tree_oid try getCommitTree(commit_object.data); + break :tree_oid try getCommitTree(repository.odb.format, commit_object.data); }; try repository.checkoutTree(worktree, tree_oid, "", diagnostics); } @@ -114,7 +214,11 @@ pub const Repository = struct { const tree_data = try repository.odb.allocator.dupe(u8, tree_object.data); defer repository.odb.allocator.free(tree_data); - var tree_iter: TreeIterator = .{ .data = tree_data }; + var tree_iter: TreeIterator = .{ + .format = repository.odb.format, + .data = tree_data, + .pos = 0, + }; while (try tree_iter.next()) |entry| { switch (entry.type) { .directory => { @@ -170,19 +274,20 @@ pub const Repository = struct { /// Returns the ID of the tree associated with the given commit (provided as /// raw object data). - fn getCommitTree(commit_data: []const u8) !Oid { + fn getCommitTree(format: Oid.Format, commit_data: []const u8) !Oid { if (!mem.startsWith(u8, commit_data, "tree ") or - commit_data.len < "tree ".len + fmt_oid_length + "\n".len or - commit_data["tree ".len + fmt_oid_length] != '\n') + commit_data.len < "tree ".len + format.formattedLength() + "\n".len or + commit_data["tree ".len + format.formattedLength()] != '\n') { return error.InvalidCommit; } - return try parseOid(commit_data["tree ".len..][0..fmt_oid_length]); + return try .parse(format, commit_data["tree ".len..][0..format.formattedLength()]); } const TreeIterator = struct { + format: Oid.Format, data: []const u8, - pos: usize = 0, + pos: usize, const Entry = struct { type: Type, @@ -220,8 +325,9 @@ pub const Repository = struct { const name = iterator.data[iterator.pos..name_end :0]; iterator.pos = name_end + 1; + const oid_length = iterator.format.byteLength(); if (iterator.pos + oid_length > iterator.data.len) return error.InvalidTree; - const oid = iterator.data[iterator.pos..][0..oid_length].*; + const oid: Oid = .fromBytes(iterator.format, iterator.data[iterator.pos..][0..oid_length]); iterator.pos += oid_length; return .{ .type = @"type", .executable = executable, .name = name, .oid = oid }; @@ -235,6 +341,7 @@ pub const Repository = struct { /// The format of the packfile and its associated index are documented in /// [pack-format](https://git-scm.com/docs/pack-format). const Odb = struct { + format: Oid.Format, pack_file: std.fs.File, index_header: IndexHeader, index_file: std.fs.File, @@ -242,11 +349,12 @@ const Odb = struct { allocator: Allocator, /// Initializes the database from open pack and index files. - fn init(allocator: Allocator, pack_file: std.fs.File, index_file: std.fs.File) !Odb { + fn init(allocator: Allocator, format: Oid.Format, pack_file: std.fs.File, index_file: std.fs.File) !Odb { try pack_file.seekTo(0); try index_file.seekTo(0); const index_header = try IndexHeader.read(index_file.reader()); return .{ + .format = format, .pack_file = pack_file, .index_header = index_header, .index_file = index_file, @@ -268,7 +376,7 @@ const Odb = struct { const base_object = while (true) { if (odb.cache.get(base_offset)) |base_object| break base_object; - base_header = try EntryHeader.read(odb.pack_file.reader()); + base_header = try EntryHeader.read(odb.format, odb.pack_file.reader()); switch (base_header) { .ofs_delta => |ofs_delta| { try delta_offsets.append(odb.allocator, base_offset); @@ -292,6 +400,7 @@ const Odb = struct { const base_data = try resolveDeltaChain( odb.allocator, + odb.format, odb.pack_file, base_object, delta_offsets.items, @@ -302,15 +411,18 @@ const Odb = struct { } /// Seeks to the beginning of the object with the given ID. + /// Asserts that the format of `oid` matches that of `odb`. fn seekOid(odb: *Odb, oid: Oid) !void { - const key = oid[0]; + assert(oid == odb.format); + const oid_length = odb.format.byteLength(); + const key = oid.slice()[0]; var start_index = if (key > 0) odb.index_header.fan_out_table[key - 1] else 0; var end_index = odb.index_header.fan_out_table[key]; const found_index = while (start_index < end_index) { const mid_index = start_index + (end_index - start_index) / 2; try odb.index_file.seekTo(IndexHeader.size + mid_index * oid_length); - const mid_oid = try odb.index_file.reader().readBytesNoEof(oid_length); - switch (mem.order(u8, &mid_oid, &oid)) { + const mid_oid = try Oid.readBytes(odb.format, odb.index_file.reader()); + switch (mem.order(u8, mid_oid.slice(), oid.slice())) { .lt => start_index = mid_index + 1, .gt => end_index = mid_index, .eq => break mid_index, @@ -495,6 +607,7 @@ pub const Session = struct { location: Location, supports_agent: bool, supports_shallow: bool, + object_format: Oid.Format, allocator: Allocator, const agent = "zig/" ++ @import("builtin").zig_version_string; @@ -513,6 +626,7 @@ pub const Session = struct { .location = try .init(allocator, uri), .supports_agent = false, .supports_shallow = false, + .object_format = .sha1, .allocator = allocator, }; errdefer session.deinit(); @@ -528,6 +642,10 @@ pub const Session = struct { session.supports_shallow = true; } } + } else if (mem.eql(u8, capability.key, "object-format")) { + if (std.meta.stringToEnum(Oid.Format, capability.value orelse continue)) |format| { + session.object_format = format; + } } } return session; @@ -708,6 +826,11 @@ pub const Session = struct { if (session.supports_agent) { try Packet.write(.{ .data = agent_capability }, body_writer); } + { + const object_format_packet = try std.fmt.allocPrint(session.allocator, "object-format={s}\n", .{@tagName(session.object_format)}); + defer session.allocator.free(object_format_packet); + try Packet.write(.{ .data = object_format_packet }, body_writer); + } try Packet.write(.delimiter, body_writer); for (options.ref_prefixes) |ref_prefix| { const ref_prefix_packet = try std.fmt.allocPrint(session.allocator, "ref-prefix {s}\n", .{ref_prefix}); @@ -739,10 +862,14 @@ pub const Session = struct { try request.wait(); if (request.response.status != .ok) return error.ProtocolError; - return .{ .request = request }; + return .{ + .format = session.object_format, + .request = request, + }; } pub const RefIterator = struct { + format: Oid.Format, request: std.http.Client.Request, buf: [Packet.max_data_length]u8 = undefined, @@ -764,7 +891,7 @@ pub const Session = struct { .data => |data| { const ref_data = Packet.normalizeText(data); const oid_sep_pos = mem.indexOfScalar(u8, ref_data, ' ') orelse return error.InvalidRefPacket; - const oid = parseOid(data[0..oid_sep_pos]) catch return error.InvalidRefPacket; + const oid = Oid.parse(iterator.format, data[0..oid_sep_pos]) catch return error.InvalidRefPacket; const name_sep_pos = mem.indexOfScalarPos(u8, ref_data, oid_sep_pos + 1, ' ') orelse ref_data.len; const name = ref_data[oid_sep_pos + 1 .. name_sep_pos]; @@ -778,7 +905,7 @@ pub const Session = struct { if (mem.startsWith(u8, attribute, "symref-target:")) { symref_target = attribute["symref-target:".len..]; } else if (mem.startsWith(u8, attribute, "peeled:")) { - peeled = parseOid(attribute["peeled:".len..]) catch return error.InvalidRefPacket; + peeled = Oid.parse(iterator.format, attribute["peeled:".len..]) catch return error.InvalidRefPacket; } last_sep_pos = next_sep_pos; } @@ -814,6 +941,11 @@ pub const Session = struct { if (session.supports_agent) { try Packet.write(.{ .data = agent_capability }, body_writer); } + { + const object_format_packet = try std.fmt.allocPrint(session.allocator, "object-format={s}\n", .{@tagName(session.object_format)}); + defer session.allocator.free(object_format_packet); + try Packet.write(.{ .data = object_format_packet }, body_writer); + } try Packet.write(.delimiter, body_writer); // Our packfile parser supports the OFS_DELTA object type try Packet.write(.{ .data = "ofs-delta\n" }, body_writer); @@ -997,7 +1129,7 @@ const EntryHeader = union(Type) { }; } - fn read(reader: anytype) !EntryHeader { + fn read(format: Oid.Format, reader: anytype) !EntryHeader { const InitialByte = packed struct { len: u4, type: u3, has_next: bool }; const initial: InitialByte = @bitCast(reader.readByte() catch |e| switch (e) { error.EndOfStream => return error.InvalidFormat, @@ -1016,7 +1148,7 @@ const EntryHeader = union(Type) { .uncompressed_length = uncompressed_length, } }, .ref_delta => .{ .ref_delta = .{ - .base_object = reader.readBytesNoEof(oid_length) catch |e| switch (e) { + .base_object = Oid.readBytes(format, reader) catch |e| switch (e) { error.EndOfStream => return error.InvalidFormat, else => |other| return other, }, @@ -1081,7 +1213,7 @@ const IndexEntry = struct { /// Writes out a version 2 index for the given packfile, as documented in /// [pack-format](https://git-scm.com/docs/pack-format). -pub fn indexPack(allocator: Allocator, pack: std.fs.File, index_writer: anytype) !void { +pub fn indexPack(allocator: Allocator, format: Oid.Format, pack: std.fs.File, index_writer: anytype) !void { try pack.seekTo(0); var index_entries: std.AutoHashMapUnmanaged(Oid, IndexEntry) = .empty; @@ -1089,7 +1221,7 @@ pub fn indexPack(allocator: Allocator, pack: std.fs.File, index_writer: anytype) var pending_deltas: std.ArrayListUnmanaged(IndexEntry) = .empty; defer pending_deltas.deinit(allocator); - const pack_checksum = try indexPackFirstPass(allocator, pack, &index_entries, &pending_deltas); + const pack_checksum = try indexPackFirstPass(allocator, format, pack, &index_entries, &pending_deltas); var cache: ObjectCache = .{}; defer cache.deinit(allocator); @@ -1099,7 +1231,7 @@ pub fn indexPack(allocator: Allocator, pack: std.fs.File, index_writer: anytype) while (i > 0) { i -= 1; const delta = pending_deltas.items[i]; - if (try indexPackHashDelta(allocator, pack, delta, index_entries, &cache)) |oid| { + if (try indexPackHashDelta(allocator, format, pack, delta, index_entries, &cache)) |oid| { try index_entries.put(allocator, oid, delta); _ = pending_deltas.swapRemove(i); } @@ -1117,7 +1249,7 @@ pub fn indexPack(allocator: Allocator, pack: std.fs.File, index_writer: anytype) } mem.sortUnstable(Oid, oids.items, {}, struct { fn lessThan(_: void, o1: Oid, o2: Oid) bool { - return mem.lessThan(u8, &o1, &o2); + return mem.lessThan(u8, o1.slice(), o2.slice()); } }.lessThan); @@ -1125,15 +1257,16 @@ pub fn indexPack(allocator: Allocator, pack: std.fs.File, index_writer: anytype) var count: u32 = 0; var fan_out_index: u8 = 0; for (oids.items) |oid| { - if (oid[0] > fan_out_index) { - @memset(fan_out_table[fan_out_index..oid[0]], count); - fan_out_index = oid[0]; + const fan_out_byte = oid.slice()[0]; + if (fan_out_byte > fan_out_index) { + @memset(fan_out_table[fan_out_index..fan_out_byte], count); + fan_out_index = fan_out_byte; } count += 1; } @memset(fan_out_table[fan_out_index..], count); - var index_hashed_writer = hashedWriter(index_writer, Sha1.init(.{})); + var index_hashed_writer = hashedWriter(index_writer, Oid.Hasher.init(format)); const writer = index_hashed_writer.writer(); try writer.writeAll(IndexHeader.signature); try writer.writeInt(u32, IndexHeader.supported_version, .big); @@ -1142,7 +1275,7 @@ pub fn indexPack(allocator: Allocator, pack: std.fs.File, index_writer: anytype) } for (oids.items) |oid| { - try writer.writeAll(&oid); + try writer.writeAll(oid.slice()); } for (oids.items) |oid| { @@ -1165,9 +1298,9 @@ pub fn indexPack(allocator: Allocator, pack: std.fs.File, index_writer: anytype) try writer.writeInt(u64, offset, .big); } - try writer.writeAll(&pack_checksum); + try writer.writeAll(pack_checksum.slice()); const index_checksum = index_hashed_writer.hasher.finalResult(); - try index_writer.writeAll(&index_checksum); + try index_writer.writeAll(index_checksum.slice()); } /// Performs the first pass over the packfile data for index construction. @@ -1176,13 +1309,14 @@ pub fn indexPack(allocator: Allocator, pack: std.fs.File, index_writer: anytype) /// format). fn indexPackFirstPass( allocator: Allocator, + format: Oid.Format, pack: std.fs.File, index_entries: *std.AutoHashMapUnmanaged(Oid, IndexEntry), pending_deltas: *std.ArrayListUnmanaged(IndexEntry), -) ![Sha1.digest_length]u8 { +) !Oid { var pack_buffered_reader = std.io.bufferedReader(pack.reader()); var pack_counting_reader = std.io.countingReader(pack_buffered_reader.reader()); - var pack_hashed_reader = std.compress.hashedReader(pack_counting_reader.reader(), Sha1.init(.{})); + var pack_hashed_reader = std.compress.hashedReader(pack_counting_reader.reader(), Oid.Hasher.init(format)); const pack_reader = pack_hashed_reader.reader(); const pack_header = try PackHeader.read(pack_reader); @@ -1191,12 +1325,12 @@ fn indexPackFirstPass( while (current_entry < pack_header.total_objects) : (current_entry += 1) { const entry_offset = pack_counting_reader.bytes_read; var entry_crc32_reader = std.compress.hashedReader(pack_reader, std.hash.Crc32.init()); - const entry_header = try EntryHeader.read(entry_crc32_reader.reader()); + const entry_header = try EntryHeader.read(format, entry_crc32_reader.reader()); switch (entry_header) { .commit, .tree, .blob, .tag => |object| { var entry_decompress_stream = std.compress.zlib.decompressor(entry_crc32_reader.reader()); var entry_counting_reader = std.io.countingReader(entry_decompress_stream.reader()); - var entry_hashed_writer = hashedWriter(std.io.null_writer, Sha1.init(.{})); + var entry_hashed_writer = hashedWriter(std.io.null_writer, Oid.Hasher.init(format)); const entry_writer = entry_hashed_writer.writer(); // The object header is not included in the pack data but is // part of the object's ID @@ -1229,8 +1363,8 @@ fn indexPackFirstPass( } const pack_checksum = pack_hashed_reader.hasher.finalResult(); - const recorded_checksum = try pack_buffered_reader.reader().readBytesNoEof(Sha1.digest_length); - if (!mem.eql(u8, &pack_checksum, &recorded_checksum)) { + const recorded_checksum = try Oid.readBytes(format, pack_buffered_reader.reader()); + if (!mem.eql(u8, pack_checksum.slice(), recorded_checksum.slice())) { return error.CorruptedPack; } _ = pack_reader.readByte() catch |e| switch (e) { @@ -1245,6 +1379,7 @@ fn indexPackFirstPass( /// delta and we do not yet know the offset of the base object). fn indexPackHashDelta( allocator: Allocator, + format: Oid.Format, pack: std.fs.File, delta: IndexEntry, index_entries: std.AutoHashMapUnmanaged(Oid, IndexEntry), @@ -1259,7 +1394,7 @@ fn indexPackHashDelta( if (cache.get(base_offset)) |base_object| break base_object; try pack.seekTo(base_offset); - base_header = try EntryHeader.read(pack.reader()); + base_header = try EntryHeader.read(format, pack.reader()); switch (base_header) { .ofs_delta => |ofs_delta| { try delta_offsets.append(allocator, base_offset); @@ -1279,9 +1414,9 @@ fn indexPackHashDelta( } }; - const base_data = try resolveDeltaChain(allocator, pack, base_object, delta_offsets.items, cache); + const base_data = try resolveDeltaChain(allocator, format, pack, base_object, delta_offsets.items, cache); - var entry_hasher = Sha1.init(.{}); + var entry_hasher: Oid.Hasher = .init(format); var entry_hashed_writer = hashedWriter(std.io.null_writer, &entry_hasher); try entry_hashed_writer.writer().print("{s} {}\x00", .{ @tagName(base_object.type), base_data.len }); entry_hasher.update(base_data); @@ -1294,6 +1429,7 @@ fn indexPackHashDelta( /// to obtain the final object. fn resolveDeltaChain( allocator: Allocator, + format: Oid.Format, pack: std.fs.File, base_object: Object, delta_offsets: []const u64, @@ -1306,7 +1442,7 @@ fn resolveDeltaChain( const delta_offset = delta_offsets[i]; try pack.seekTo(delta_offset); - const delta_header = try EntryHeader.read(pack.reader()); + const delta_header = try EntryHeader.read(format, pack.reader()); const delta_data = try readObjectRaw(allocator, pack.reader(), delta_header.uncompressedLength()); defer allocator.free(delta_data); var delta_stream = std.io.fixedBufferStream(delta_data); @@ -1424,16 +1560,22 @@ fn hashedWriter( return .{ .child_writer = writer, .hasher = hasher }; } -test "packfile indexing and checkout" { - // To verify the contents of this packfile without using the code in this - // file: - // - // 1. Create a new empty Git repository (`git init`) - // 2. `git unpack-objects note the "dangling commit" ID (which matches the commit - // checked out below) - // 4. `git checkout dd582c0720819ab7130b103635bd7271b9fd4feb` - const testrepo_pack = @embedFile("git/testdata/testrepo.pack"); +/// Runs the packfile indexing and checkout test. +/// +/// The two testrepo repositories under testdata contain identical commit +/// histories and contents. +/// +/// To verify the contents of the packfiles using Git alone, run the +/// following commands in an empty directory: +/// +/// 1. `git init --object-format=(sha1|sha256)` +/// 2. `git unpack-objects rRY$dUa8a*+QU6kzWclt5FRgBs}HL4e#Z zXn;P0_7Bj3+@H7%@@FudgAwR6n1165h?)O|1+4!cY@p^BSAjm`+BvQR&G8$YAU=cZ zFK~nWFL;3F1@Qd=evlLR3xXgo^c%t;K92~9MFHafBNCuq5`g@d8(@|C3(_Dj^Isqf zYR)4EV);Ly0CHz2{fi1%&rtm@xCLs?qXy#NQ3tv6Xn^=US|HXw$8Dg`&^?D9(EkTk z#aHqrw4Jbr>IB6}PZwNieCuuo3vhdCP zhD(MIKiaa#bk}K`_&Yh$v$~I*#7xQ?TOck)bl(3quNNRgUg@E$Ls1=WD;JeG-<;!r zu#5MMo1nhUuRd~3q?afhF@#Pfln`1|7i-gsNIk034;*H}I3b3qzev60R0zL7mmPpV z*55yv2V>;juod-e7K>mo!dt)B=&#OkZ&0g-VY3%rzqMO&8ZC6f=TvK$Ok*U{@~l<+ zWU`x~SG4OgBXqD*9Z@Rk&mt|{azIqyCr9w~$Af(N+W4yznbcZ(WQw%4L_aLOu9SqdC$j6} z##2d$Z&veHs&e@uBMPhjL}fq7!zy36Lf@6m9%{z&1}ciWb|ype zB0t2-ss8*YATVHJNKez}C8sz_=(UbP+{mE;xkB`A8lj}B=h3wpJLe?+id7l*J(n2a z?jO4D&DTY|(%^`%`h~~&3w(8n2>)_@)#VH)D;{)HiKyX&w*JI+%2BM|6>DT1N^^kPXielggBIfdT%*5P^`DxQo`)_7w*O!ZnflCCdTRw^AnoqKm+#`*o3@&9J zX{gQamAYDH$4gnKxX6`@Y;_g9zNwEGQ{Si`k|;5tVqZQcF`%@5hyTE*Q844~@SSMi zEe$EAzz)w=jeyNKQ(lEHD~_D@A$Skw8cc}tA@BY2p-QZwf2_^$#oPG9&4K?Xn1VQ*>Wy6dKLPTGMiL2 zZ3Rsc@d}UM;%)melsm>jdXtqp6e~6t%a<##WHo*~Tkvu2O5j#z(0%_)dUWE`VwIP^ z+gt0xqy1?-jP22CVgge`92TZm3%u6! zNhKpEG*MLd#_sA{`gw!$GT&fz;!VZu_@~BboresfHG}zFl9e`e9*fiIOAd5QhIfNE zv24Yc-$`Ru$A?D+ehLw&Uc^cZ*jYde@{luA+?EgDS^vyF6j~h86>+scq=lY*JMSGs zzX{$fB_{cn^5STunp++fg=7A$zRkomSjY=jqbkWKPrQ>alTSGDDLfR3Ba5hmMbV<4 zbtL$r!)@fp+fc!TisV(d4mUnn)($hqGanvTKNvUv-d!2d|B;f`9!Vv__KJcJOZMvH z=)PLAgx$%G9TVHZy!_j9EMMS*tY(Nt(E z6=fuiv38*|{ONDIXGP?OBkr>8UU@>jfAQN4G0P%*T48OUCVbakVyPu2bAz zGg|-gH zPfB|}iPu_svhT0Z__AHRU7#AH=TrK)GpkG=NqpbmkE3o#1wW7vHj`s~wH^B= z^{t(E1~rM?e9NSlUjFipJ z3dXOlgj=`XTfCkYF49n5NwpA@2ZM^-#OaD4oC|7+lj*IjXQ>d`DnGeW#oyEU8crZ@ zkw2*9*eLq_P=f4zfFA9~0AU*jyjI0-Q5V4{9sH(Q4voCt^rqJ=`wqSPxN)(ggD`Gv zaBT_t&FLu`Sv5*4nj7>-hLUj3fQ)2jNvXdXrt)&;x?2ZCnOF$t%AaK&yGRTc|oJ{?Fhb0pv*Y{Do1pXD%up8OE?UgF2 z4wAFU2N`_heaBK$JI63;XU4(xhZR=djpV!H5euFx92}lp+)a^_^A1}36mm%CAy>}1 zWClcpioGAi`tn5nD<$FShc;#l=Js1{S)>qw-U1%vb3wdb`_Hj&1pEAbdf(n_$i%K) z5lNYJiBA;9F$)tu+*@MK@1KF^+gWb#XB#SbW9*_gIRprYIX^9j{JgEg?u?;U=G**! zgLuO?~^5@|)3R#33`; z0M6bHlXm~T`wZ$VA#@c3(@jc+`)qZB2h~j1bAFJ9jYW{lCy}w+P1xwq){#ZCSm7D} z@R19iy|7L+?Rj+$PrngIcw=8-EZ)Q{uVdGa?6j4vK61B+Z`78r+azk}d8NP$S3*Y^ zYE3rPY5U|Ko5{Cb><|57=1S^X_ax9kSwaVbQ{s)%D8VR9fW2JbozfYh1^2-#8rIKT zWh$$tFth{^o{|1XZAibaVv!H6i#zO(z9sv?VqTd{OVN6Dy>+2Ls;>O>CH==ZJP76} zp6;GgYwcSf(h`ckwDsoIMTb8M8AGXG>P`cXK(L$OAp%n_fV zk!6t=O>l+K@+56os(yk`nVa}^)>1Y}>R5WA4+|w+@b7j|^O0Am2p)tMRKr+^pP5=B zE=?heJ-M>|MCyQOC-bl;AuYc5tET-5jl)a4!DY1V>nk{?5uS|4k7r{b5Zq#5k%5;b z1%8*16=>l3po9>}rAUyQ29_82rc1!z(Heti0pQhW_JNIU19jcN%ANw-3_LK!BCz1V zOVa^Q1_vI5(hRJ`;Ou+wOfUrMLct2W^F`pT$bfgD0A7+2_;>{H_Ot;I2(cTekp^b~ z*nuE!2KhK(|I>hn0e69Pf|+DsHaJT+0rt^gzBL3wPz`zoLm)5*a2^AC0>4A446Hlw z!la-t;U}PhPor@L+y{5X2XP;O0L%h@m{=16p$`E)bHLMx4pC(03EmkAkNVQ}F+EfVp5kRX%v`hyiy4-*^es6Q_W4L7-E$d-D` F{RezNZ=wJI literal 0 HcmV?d00001 diff --git a/src/Package/Fetch/git/testdata/testrepo-sha256.pack b/src/Package/Fetch/git/testdata/testrepo-sha256.pack new file mode 100644 index 0000000000000000000000000000000000000000..3801c14451f2554c8202eff9bf15293a910cc5ff GIT binary patch literal 8429 zcmb`MbySpF+s2=PA%`xda{%cQ1q7r+x}=*CX+#w1E=A(dB`w`8C7?qLAxJ17AOaGS z3K9d~pvUvcIezQB@ACU&{@H7uXaDBf_kCUWUZWu;a{~YXG}K2glgJ}I3L)i$&WLyY zmH5*+v?2#9K1JOGM=uDve2TI~F;4G6NwLWFM8EA5QN%GjSG%D(Rx74(6Mzc{v|_KUX%j29aiDq*lVy<@|-hDGlQtr!=1!V3fyEkJ|k}1%_ zid=X_9#6kkY<%F{WBbMuP6M&wOlCr3&0*rHbMIH2GEslRiOqs_|Av8-{N;^e8aWqP z{*9~%PX~E}%7hP9MrE0Iy5xKM97ikM#RpwpQND{_JH7_A$OG10T%Nx!Pb65N<* zcC|j2N2_LxSFB^>j>s~yNgSb#zi8}=eXj)P4T7JLUX@ex zcCHhlh);P|C;_-gm`>tiRR@ELc=P==R!sreLmlP#53j;vSV98&#^b&J*mM^gbV;C; zcszA13iFt;Y1OqmhfJ_2Qg zXoA0ki=n`YfMH+tc?dWPo*4}dw(a6}7}7V$^T>Vi;h4+?=)2j10xzY-&gZujj7kTI zM|?J<_IymMa7XElV_`{?kQ{juG3mwZx=)%Y!8dj|Q$pF2*#_??q|g{u6uvz7QQue}~*T z0~SC^btP`(HD_%qz^xp~&?+kZB4=Ee)DUxZBUH3GC~L8tc6877=>WNkQc5;qjszkv zE3JF75PAhp!7@z&uC;edNAH%FE6hU0 zXVD5c;_L^>c!)stzSEmWhI>!aQW#|NsdyO96t6}C#=9Eu@8CDoy7*9v`>4%B$fA}P zC{Z29-EeV=^s=IRF_EV)g$t1AygkUw)RZ955*D_fG92jS}&*+23T8~Qq=E3HweKZ*f1Kj<&w zl4Rph>-fQaR^r3RbfydJ)w?y^jNSFqovt+QF{wqN!@D*9=#}0N8`Spwi{$$92K z5h=#I1fZcR<@yxiXA^bcJGhh@n2`tr<#2OoRuaP)x)J$lb)l^(kJUho8x zoVUksFUgd!)8;S321M-w z=x^{?6WTRwTESe!`yf=iPu3icZJ`~7K~C*Coq7j|t0YRRPeq5b8_vk6r}a}q6GuWc z!-8dd$n2GctgZX`Z##{Pg~GHr3NCR(E_IG!-X?!KyLK=Aq6~cK|C+OTRd#odLa^cK@g?5xIOP(j29?6lmIP543@W)1g-Rom)z+8x@5eUS_d` zT-ATPJrkU#-9^jDC{Oh!bp1%A1O4it#nMV<_YSMms+JXLP_sxyWC+Mw1@k}<9oG|r9@lodW1J$1{sCK@hXL0RGP zVsPUzaM+~&<_GwNY&=I zWZ#x^qFVQwc`Rt^_%eDyxcxDu+5NQ5X9Fk?zw6ibLMPrp2j@KoHiN^Tof$|87P4;v z(1Y7)vEm);HB^xC{&(Q22CR>%9|j^%T<1Wvu42z+L0VzLN6LxsDxO$|alnpu2l*pN#_ zd`j7t>B30Ei3Pk18S}`-!6B>AB$MT-^4*1rp)R@#MwkT;6U#gfw)xsw*rOT~gOn`h zvm2%PQ9tARJ$%fU>doJ{5#I{s`YRvxSO{J3UBz6|h>adHhH@W`EnKvhFB>~YHkREbM~u=|eA)*qdq?sCDjMlmj$ZzJzb4c45(6IC=idPdGXybI^S0VZp;YTS3l#njFQtYKeNYjW+d zJtoCrG;EY>xmO!hn=(dp^+#$F7hS?uex>DhnLt6@tK4%D}&Yt++s z=&6ZR8Me>NQ~&%4dm%hg6sI zXcYUHh+;IQDMg3W99YjTJ4%j>$sbNSFWA$sxYumAz4?0lI}M&B#a%w?j>aVn@bmx%d^Pp=A1htV1pfZm=g_(vlCt za>pGItYS>PVDZ|xj8%8$tT#tZ9Gk!O=8EZ$-mE?Gc9SSp#FU-J08Or*nfRB}>F!`K z)cpc>CDLYjbPt{Ef?DGb#sAk+-b7gaU!J0%1`EU&_oBFu=C-Y>6YB=LjIJ8TnUtuY zS-RR~tFdVnCIq%Db^ddZq)k1`c{q> zj(s+Yvqwd_RZvFBPNqWPW#n|hahH^y=ZuLxqo6ksO|u&;=E6}9WcUWo@fUc%_pHsI z;01l*;0U{x4osr;@8Asyc=%V{S^zYZ8u`R4{!a!{@F>}HAq75eB9$B9(P1DW$~rxp zpM<*CL59T&kGl!rx+joYgZA^2^C)_J{Mg9?V_<<1(sOPvp|ic({zfIVh+2FJ2F`zT z0kzlz2AyAAMlJS%(efBQ(gpeX`1mvwd$}yYh2><>H}|nU1k1RH zKgcnPIO)@a*6Eb#xc2GfX(P|QWZ%!c@3@%vjAc&NQt_YS;o~7xRLDjI+_Wg^ptssZ zgv?@|e?@5dOpUQR+^U|vw<#=Qr2Sw?aXRw>Vk+h}6~Bj3`BeAY?s{+O(I?NmxuYle zk*^u&gAIi3C25HWwe|O+F%fv4nTijFSi7ZTO8DZS^8@G~|>b z!Fv6{P<;0mtEAyg#_;fsOg`D9dS+4tMVe1r4Xki4d5t@hc)hMmPV1cxT9oA+ z3i6Z*He?Rn)GTFsM6RbDrOI0?ndBUMjPSOQm?Y`Lq4jf>-nz^tPRm>nR zVD#K?ctjG?2nnMu&Ug{ySsZLDJJFf!k0(!BqjIo1W>Y`H;PjSM`tMlmW_r*SJOX4>322Gq?qWef9cFS zoqSm19~3f+G%x0g2u+KYNT!fS)8$mOTliUHthKu>yHqaqM>)|JbkT*+@r4nr zBIDq%{mZ7kc*%^^N*xX)_x= ziqcy4-IeZwdl-N?ntgkIzS z$;tFiu{ z;g2A?=I5U%!JGJB9bjb@)5b64Ay&~w$%BO0wER*!@Mt^)upTGmKd;neqG7=oWQtA# z)vX?_&GO2=Iv@B_#ly;k-7P(0@lEOiM_Y4qm}O2(V#*(Vm9||=@iHf-pQH*(?TpNf zTY5aZw8q6!%miGw%fmYpJCqddqAu2}#U@3+C&+Y$p1jmwuU;*Qw^HDiHWo;^&vPTK zZtt@=CvX=-z~J|VqF+N0Hm~nio<>|o@;lWV##0$x>BxAlg?!UTkGR@HI~NRn`gaTX zT0VO0SJars9kf)EE6C!r)L8d9)>brM9Z-FRsvfOk0QT+H3u;iOX^P?dX0f(AO(+bQ%y8;cib_Ps?V8oA2bKmzEq>~h z7~k~P+B+4!>B%?dGjyGgKasCJPNe-@;WP7x`P|`#K{-JjYBnm<{;-(a7_@8@(ault ztn`p$aZ>XkQIg71M*j$`Ox$M(7dT`Q%Kc@hFrW=rt5KF+^=7#)-0>B^c|Zm??btm} zwmy1G3TA^oc+OJ(ty7>YY>ch{v<;M{pl%=j5a6x{p_z367fPI{bRrx8><#M3g8ZBm zE(Y}8TbiexjpSkUPk7#mcja`%eF{L72+Ou0tqGx9~sOw2PZN+uJugLQ8yS*0Nfc*>g zShYo$MXy=&7#}1$9>}u%M-x1tL3Cs9_ciSG(W9RB5w@krd@kR2)kP@vg@E}JhgLoh zI&*fLfdh|mb7teeyN&HGw;>m6+_UcG^|gr6ChCbodFT3+%O#ePrCU+gWF`pQ0HZ>p zSHF<{+dXtR1|_{Sp?Sl(f#my5uL*!1VfFsbDFo4_O{knq7(FWYvlqMCuwCulzn?&j zI3`VG8*JP*Vr!HnRVpLyYErHU@_MwWCZdaYHI;djaqFaW&tJGsJ}e9o!Z};b{UkhZ zGTCBz*2pNOWqG!NIM&BfOvdI#jRT01dNyd3i}0)C%#pGb8BjK$^4>bc6;(v488Ns3 zxD+(`JNR7zba276dqoa>el;f*Be2|2_`MysraZ2uxy)x1@2Ax58P8&SnWL3pgFjJh zQHNi_*~>Jf;|MZ^{`NH$EuFxHa_|0nZ>Bg)tI!NTXGfczOut!q*}aHQcnRb~)fmHU7|;{KHW<)BN4lL0 zryn%KoF(6kS5aDcT~#UuTG8RU4|7DBbB}kDYo`f-y@@t=>&&csS~6}2i7*4W9B9JJ z=>8fgoGU;N3a149M1U$~7D#JS(vNG-OvB2CrE@#`& za**}^(i;A&>+`=^L-8t;EiA|OENUiuuV#}1SO#dK1=Lbq4;Y!}>>@##Hba^{uh*++ RVxek!R|_1GlKqCO{{yzYcZUD~ literal 0 HcmV?d00001 diff --git a/src/main.zig b/src/main.zig index 291820cb7523..354704c97276 100644 --- a/src/main.zig +++ b/src/main.zig @@ -6987,8 +6987,8 @@ fn cmdFetch( var saved_path_or_url = path_or_url; - if (fetch.latest_commit) |*latest_commit| resolved: { - const latest_commit_hex = try std.fmt.allocPrint(arena, "{}", .{std.fmt.fmtSliceHexLower(latest_commit)}); + if (fetch.latest_commit) |latest_commit| resolved: { + const latest_commit_hex = try std.fmt.allocPrint(arena, "{}", .{latest_commit}); var uri = try std.Uri.parse(path_or_url);