-
-
Notifications
You must be signed in to change notification settings - Fork 2.6k
/
Copy pathFetch.zig
1632 lines (1453 loc) · 63.7 KB
/
Fetch.zig
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
//! Represents one independent job whose responsibility is to:
//!
//! 1. Check the global zig package cache to see if the hash already exists.
//! If so, load, parse, and validate the build.zig.zon file therein, and
//! goto step 8. Likewise if the location is a relative path, treat this
//! the same as a cache hit. Otherwise, proceed.
//! 2. Fetch and unpack a URL into a temporary directory.
//! 3. Load, parse, and validate the build.zig.zon file therein. It is allowed
//! for the file to be missing, in which case this fetched package is considered
//! to be a "naked" package.
//! 4. Apply inclusion rules of the build.zig.zon to the temporary directory by
//! deleting excluded files. If any files had errors for files that were
//! ultimately excluded, those errors should be ignored, such as failure to
//! create symlinks that weren't supposed to be included anyway.
//! 5. Compute the package hash based on the remaining files in the temporary
//! directory.
//! 6. Rename the temporary directory into the global zig package cache
//! directory. If the hash already exists, delete the temporary directory and
//! leave the zig package cache directory untouched as it may be in use by the
//! system. This is done even if the hash is invalid, in case the package with
//! the different hash is used in the future.
//! 7. Validate the computed hash against the expected hash. If invalid,
//! this job is done.
//! 8. Spawn a new fetch job for each dependency in the manifest file. Use
//! a mutex and a hash map so that redundant jobs do not get queued up.
//!
//! All of this must be done with only referring to the state inside this struct
//! because this work will be done in a dedicated thread.
arena: std.heap.ArenaAllocator,
location: Location,
location_tok: std.zig.Ast.TokenIndex,
hash_tok: std.zig.Ast.TokenIndex,
parent_package_root: Package.Path,
parent_manifest_ast: ?*const std.zig.Ast,
prog_node: *std.Progress.Node,
job_queue: *JobQueue,
/// If true, don't add an error for a missing hash. This flag is not passed
/// down to recursive dependencies. It's intended to be used only be the CLI.
omit_missing_hash_error: bool,
/// If true, don't fail when a manifest file is missing the `paths` field,
/// which specifies inclusion rules. This is intended to be true for the first
/// fetch task and false for the recursive dependencies.
allow_missing_paths_field: bool,
// Above this are fields provided as inputs to `run`.
// Below this are fields populated by `run`.
/// This will either be relative to `global_cache`, or to the build root of
/// the root package.
package_root: Package.Path,
error_bundle: ErrorBundle.Wip,
manifest: ?Manifest,
manifest_ast: std.zig.Ast,
actual_hash: Manifest.Digest,
/// Fetch logic notices whether a package has a build.zig file and sets this flag.
has_build_zig: bool,
/// Indicates whether the task aborted due to an out-of-memory condition.
oom_flag: bool,
// This field is used by the CLI only, untouched by this file.
/// The module for this `Fetch` tasks's package, which exposes `build.zig` as
/// the root source file.
module: ?*Package.Module,
/// Contains shared state among all `Fetch` tasks.
pub const JobQueue = struct {
mutex: std.Thread.Mutex = .{},
/// It's an array hash map so that it can be sorted before rendering the
/// dependencies.zig source file.
/// Protected by `mutex`.
table: Table = .{},
/// `table` may be missing some tasks such as ones that failed, so this
/// field contains references to all of them.
/// Protected by `mutex`.
all_fetches: std.ArrayListUnmanaged(*Fetch) = .{},
http_client: *std.http.Client,
thread_pool: *ThreadPool,
wait_group: WaitGroup = .{},
global_cache: Cache.Directory,
recursive: bool,
/// Dumps hash information to stdout which can be used to troubleshoot why
/// two hashes of the same package do not match.
/// If this is true, `recursive` must be false.
debug_hash: bool,
work_around_btrfs_bug: bool,
pub const Table = std.AutoArrayHashMapUnmanaged(Manifest.MultiHashHexDigest, *Fetch);
pub fn deinit(jq: *JobQueue) void {
if (jq.all_fetches.items.len == 0) return;
const gpa = jq.all_fetches.items[0].arena.child_allocator;
jq.table.deinit(gpa);
// These must be deinitialized in reverse order because subsequent
// `Fetch` instances are allocated in prior ones' arenas.
// Sorry, I know it's a bit weird, but it slightly simplifies the
// critical section.
while (jq.all_fetches.popOrNull()) |f| f.deinit();
jq.all_fetches.deinit(gpa);
jq.* = undefined;
}
/// Dumps all subsequent error bundles into the first one.
pub fn consolidateErrors(jq: *JobQueue) !void {
const root = &jq.all_fetches.items[0].error_bundle;
const gpa = root.gpa;
for (jq.all_fetches.items[1..]) |fetch| {
if (fetch.error_bundle.root_list.items.len > 0) {
var bundle = try fetch.error_bundle.toOwnedBundle("");
defer bundle.deinit(gpa);
try root.addBundleAsRoots(bundle);
}
}
}
/// Creates the dependencies.zig source code for the build runner to obtain
/// via `@import("@dependencies")`.
pub fn createDependenciesSource(jq: *JobQueue, buf: *std.ArrayList(u8)) Allocator.Error!void {
const keys = jq.table.keys();
assert(keys.len != 0); // caller should have added the first one
if (keys.len == 1) {
// This is the first one. It must have no dependencies.
return createEmptyDependenciesSource(buf);
}
try buf.appendSlice("pub const packages = struct {\n");
// Ensure the generated .zig file is deterministic.
jq.table.sortUnstable(@as(struct {
keys: []const Manifest.MultiHashHexDigest,
pub fn lessThan(ctx: @This(), a_index: usize, b_index: usize) bool {
return std.mem.lessThan(u8, &ctx.keys[a_index], &ctx.keys[b_index]);
}
}, .{ .keys = keys }));
for (keys, jq.table.values()) |hash, fetch| {
if (fetch == jq.all_fetches.items[0]) {
// The first one is a dummy package for the current project.
continue;
}
try buf.writer().print(
\\ pub const {} = struct {{
\\ pub const build_root = "{q}";
\\
, .{ std.zig.fmtId(&hash), fetch.package_root });
if (fetch.has_build_zig) {
try buf.writer().print(
\\ pub const build_zig = @import("{}");
\\
, .{std.zig.fmtEscapes(&hash)});
}
if (fetch.manifest) |*manifest| {
try buf.appendSlice(
\\ pub const deps: []const struct { []const u8, []const u8 } = &.{
\\
);
for (manifest.dependencies.keys(), manifest.dependencies.values()) |name, dep| {
const h = depDigest(fetch.package_root, jq.global_cache, dep) orelse continue;
try buf.writer().print(
" .{{ \"{}\", \"{}\" }},\n",
.{ std.zig.fmtEscapes(name), std.zig.fmtEscapes(&h) },
);
}
try buf.appendSlice(
\\ };
\\ };
\\
);
} else {
try buf.appendSlice(
\\ pub const deps: []const struct { []const u8, []const u8 } = &.{};
\\ };
\\
);
}
}
try buf.appendSlice(
\\};
\\
\\pub const root_deps: []const struct { []const u8, []const u8 } = &.{
\\
);
const root_fetch = jq.all_fetches.items[0];
const root_manifest = &root_fetch.manifest.?;
for (root_manifest.dependencies.keys(), root_manifest.dependencies.values()) |name, dep| {
const h = depDigest(root_fetch.package_root, jq.global_cache, dep) orelse continue;
try buf.writer().print(
" .{{ \"{}\", \"{}\" }},\n",
.{ std.zig.fmtEscapes(name), std.zig.fmtEscapes(&h) },
);
}
try buf.appendSlice("};\n");
}
pub fn createEmptyDependenciesSource(buf: *std.ArrayList(u8)) Allocator.Error!void {
try buf.appendSlice(
\\pub const packages = struct {};
\\pub const root_deps: []const struct { []const u8, []const u8 } = &.{};
\\
);
}
};
pub const Location = union(enum) {
remote: Remote,
/// A directory found inside the parent package.
relative_path: Package.Path,
/// Recursive Fetch tasks will never use this Location, but it may be
/// passed in by the CLI. Indicates the file contents here should be copied
/// into the global package cache. It may be a file relative to the cwd or
/// absolute, in which case it should be treated exactly like a `file://`
/// URL, or a directory, in which case it should be treated as an
/// already-unpacked directory (but still needs to be copied into the
/// global package cache and have inclusion rules applied).
path_or_url: []const u8,
pub const Remote = struct {
url: []const u8,
/// If this is null it means the user omitted the hash field from a dependency.
/// It will be an error but the logic should still fetch and print the discovered hash.
hash: ?Manifest.MultiHashHexDigest,
};
};
pub const RunError = error{
OutOfMemory,
/// This error code is intended to be handled by inspecting the
/// `error_bundle` field.
FetchFailed,
};
pub fn run(f: *Fetch) RunError!void {
const eb = &f.error_bundle;
const arena = f.arena.allocator();
const gpa = f.arena.child_allocator;
const cache_root = f.job_queue.global_cache;
try eb.init(gpa);
// Check the global zig package cache to see if the hash already exists. If
// so, load, parse, and validate the build.zig.zon file therein, and skip
// ahead to queuing up jobs for dependencies. Likewise if the location is a
// relative path, treat this the same as a cache hit. Otherwise, proceed.
const remote = switch (f.location) {
.relative_path => |pkg_root| {
if (fs.path.isAbsolute(pkg_root.sub_path)) return f.fail(
f.location_tok,
try eb.addString("expected path relative to build root; found absolute path"),
);
if (f.hash_tok != 0) return f.fail(
f.hash_tok,
try eb.addString("path-based dependencies are not hashed"),
);
if ((std.mem.startsWith(u8, pkg_root.sub_path, "../") or
std.mem.eql(u8, pkg_root.sub_path, "..")) and
pkg_root.root_dir.eql(cache_root))
{
return f.fail(
f.location_tok,
try eb.printString("dependency path outside project: '{}{s}'", .{
pkg_root.root_dir, pkg_root.sub_path,
}),
);
}
f.package_root = pkg_root;
try loadManifest(f, pkg_root);
if (!f.has_build_zig) try checkBuildFileExistence(f);
if (!f.job_queue.recursive) return;
return queueJobsForDeps(f);
},
.remote => |remote| remote,
.path_or_url => |path_or_url| {
if (fs.cwd().openIterableDir(path_or_url, .{})) |dir| {
var resource: Resource = .{ .dir = dir };
return runResource(f, path_or_url, &resource, null);
} else |dir_err| {
const file_err = if (dir_err == error.NotDir) e: {
if (fs.cwd().openFile(path_or_url, .{})) |file| {
var resource: Resource = .{ .file = file };
return runResource(f, path_or_url, &resource, null);
} else |err| break :e err;
} else dir_err;
const uri = std.Uri.parse(path_or_url) catch |uri_err| {
return f.fail(0, try eb.printString(
"'{s}' could not be recognized as a file path ({s}) or an URL ({s})",
.{ path_or_url, @errorName(file_err), @errorName(uri_err) },
));
};
var resource = try f.initResource(uri);
return runResource(f, uri.path, &resource, null);
}
},
};
const s = fs.path.sep_str;
if (remote.hash) |expected_hash| {
const pkg_sub_path = "p" ++ s ++ expected_hash;
if (cache_root.handle.access(pkg_sub_path, .{})) |_| {
f.package_root = .{
.root_dir = cache_root,
.sub_path = try arena.dupe(u8, pkg_sub_path),
};
try loadManifest(f, f.package_root);
try checkBuildFileExistence(f);
if (!f.job_queue.recursive) return;
return queueJobsForDeps(f);
} else |err| switch (err) {
error.FileNotFound => {},
else => |e| {
try eb.addRootErrorMessage(.{
.msg = try eb.printString("unable to open global package cache directory '{}{s}': {s}", .{
cache_root, pkg_sub_path, @errorName(e),
}),
});
return error.FetchFailed;
},
}
}
// Fetch and unpack the remote into a temporary directory.
const uri = std.Uri.parse(remote.url) catch |err| return f.fail(
f.location_tok,
try eb.printString("invalid URI: {s}", .{@errorName(err)}),
);
var resource = try f.initResource(uri);
return runResource(f, uri.path, &resource, remote.hash);
}
pub fn deinit(f: *Fetch) void {
f.error_bundle.deinit();
f.arena.deinit();
}
/// Consumes `resource`, even if an error is returned.
fn runResource(
f: *Fetch,
uri_path: []const u8,
resource: *Resource,
remote_hash: ?Manifest.MultiHashHexDigest,
) RunError!void {
defer resource.deinit();
const arena = f.arena.allocator();
const eb = &f.error_bundle;
const s = fs.path.sep_str;
const cache_root = f.job_queue.global_cache;
const rand_int = std.crypto.random.int(u64);
const tmp_dir_sub_path = "tmp" ++ s ++ Manifest.hex64(rand_int);
{
const tmp_directory_path = try cache_root.join(arena, &.{tmp_dir_sub_path});
var tmp_directory: Cache.Directory = .{
.path = tmp_directory_path,
.handle = handle: {
const dir = cache_root.handle.makeOpenPathIterable(tmp_dir_sub_path, .{}) catch |err| {
try eb.addRootErrorMessage(.{
.msg = try eb.printString("unable to create temporary directory '{s}': {s}", .{
tmp_directory_path, @errorName(err),
}),
});
return error.FetchFailed;
};
break :handle dir.dir;
},
};
defer tmp_directory.handle.close();
try unpackResource(f, resource, uri_path, tmp_directory);
// Load, parse, and validate the unpacked build.zig.zon file. It is allowed
// for the file to be missing, in which case this fetched package is
// considered to be a "naked" package.
try loadManifest(f, .{ .root_dir = tmp_directory });
// Apply the manifest's inclusion rules to the temporary directory by
// deleting excluded files. If any error occurred for files that were
// ultimately excluded, those errors should be ignored, such as failure to
// create symlinks that weren't supposed to be included anyway.
// Empty directories have already been omitted by `unpackResource`.
const filter: Filter = .{
.include_paths = if (f.manifest) |m| m.paths else .{},
};
// Compute the package hash based on the remaining files in the temporary
// directory.
if (builtin.os.tag == .linux and f.job_queue.work_around_btrfs_bug) {
// https://github.com/ziglang/zig/issues/17095
tmp_directory.handle.close();
const iterable_dir = cache_root.handle.makeOpenPathIterable(tmp_dir_sub_path, .{}) catch
@panic("btrfs workaround failed");
tmp_directory.handle = iterable_dir.dir;
}
f.actual_hash = try computeHash(f, tmp_directory, filter);
}
// Rename the temporary directory into the global zig package cache
// directory. If the hash already exists, delete the temporary directory
// and leave the zig package cache directory untouched as it may be in use
// by the system. This is done even if the hash is invalid, in case the
// package with the different hash is used in the future.
f.package_root = .{
.root_dir = cache_root,
.sub_path = try arena.dupe(u8, "p" ++ s ++ Manifest.hexDigest(f.actual_hash)),
};
renameTmpIntoCache(cache_root.handle, tmp_dir_sub_path, f.package_root.sub_path) catch |err| {
const src = try cache_root.join(arena, &.{tmp_dir_sub_path});
const dest = try cache_root.join(arena, &.{f.package_root.sub_path});
try eb.addRootErrorMessage(.{ .msg = try eb.printString(
"unable to rename temporary directory '{s}' into package cache directory '{s}': {s}",
.{ src, dest, @errorName(err) },
) });
return error.FetchFailed;
};
// Validate the computed hash against the expected hash. If invalid, this
// job is done.
const actual_hex = Manifest.hexDigest(f.actual_hash);
if (remote_hash) |declared_hash| {
if (!std.mem.eql(u8, &declared_hash, &actual_hex)) {
return f.fail(f.hash_tok, try eb.printString(
"hash mismatch: manifest declares {s} but the fetched package has {s}",
.{ declared_hash, actual_hex },
));
}
} else if (!f.omit_missing_hash_error) {
const notes_len = 1;
try eb.addRootErrorMessage(.{
.msg = try eb.addString("dependency is missing hash field"),
.src_loc = try f.srcLoc(f.location_tok),
.notes_len = notes_len,
});
const notes_start = try eb.reserveNotes(notes_len);
eb.extra.items[notes_start] = @intFromEnum(try eb.addErrorMessage(.{
.msg = try eb.printString("expected .hash = \"{s}\",", .{&actual_hex}),
}));
return error.FetchFailed;
}
// Spawn a new fetch job for each dependency in the manifest file. Use
// a mutex and a hash map so that redundant jobs do not get queued up.
if (!f.job_queue.recursive) return;
return queueJobsForDeps(f);
}
/// `computeHash` gets a free check for the existence of `build.zig`, but when
/// not computing a hash, we need to do a syscall to check for it.
fn checkBuildFileExistence(f: *Fetch) RunError!void {
const eb = &f.error_bundle;
if (f.package_root.access(Package.build_zig_basename, .{})) |_| {
f.has_build_zig = true;
} else |err| switch (err) {
error.FileNotFound => {},
else => |e| {
try eb.addRootErrorMessage(.{
.msg = try eb.printString("unable to access '{}{s}': {s}", .{
f.package_root, Package.build_zig_basename, @errorName(e),
}),
});
return error.FetchFailed;
},
}
}
/// This function populates `f.manifest` or leaves it `null`.
fn loadManifest(f: *Fetch, pkg_root: Package.Path) RunError!void {
const eb = &f.error_bundle;
const arena = f.arena.allocator();
const manifest_bytes = pkg_root.root_dir.handle.readFileAllocOptions(
arena,
try fs.path.join(arena, &.{ pkg_root.sub_path, Manifest.basename }),
Manifest.max_bytes,
null,
1,
0,
) catch |err| switch (err) {
error.FileNotFound => return,
else => |e| {
const file_path = try pkg_root.join(arena, Manifest.basename);
try eb.addRootErrorMessage(.{
.msg = try eb.printString("unable to load package manifest '{}': {s}", .{
file_path, @errorName(e),
}),
});
return error.FetchFailed;
},
};
const ast = &f.manifest_ast;
ast.* = try std.zig.Ast.parse(arena, manifest_bytes, .zon);
if (ast.errors.len > 0) {
const file_path = try std.fmt.allocPrint(arena, "{}" ++ Manifest.basename, .{pkg_root});
try main.putAstErrorsIntoBundle(arena, ast.*, file_path, eb);
return error.FetchFailed;
}
f.manifest = try Manifest.parse(arena, ast.*, .{
.allow_missing_paths_field = f.allow_missing_paths_field,
});
const manifest = &f.manifest.?;
if (manifest.errors.len > 0) {
const src_path = try eb.printString("{}{s}", .{ pkg_root, Manifest.basename });
const token_starts = ast.tokens.items(.start);
for (manifest.errors) |msg| {
const start_loc = ast.tokenLocation(0, msg.tok);
try eb.addRootErrorMessage(.{
.msg = try eb.addString(msg.msg),
.src_loc = try eb.addSourceLocation(.{
.src_path = src_path,
.span_start = token_starts[msg.tok],
.span_end = @intCast(token_starts[msg.tok] + ast.tokenSlice(msg.tok).len),
.span_main = token_starts[msg.tok] + msg.off,
.line = @intCast(start_loc.line),
.column = @intCast(start_loc.column),
.source_line = try eb.addString(ast.source[start_loc.line_start..start_loc.line_end]),
}),
});
}
return error.FetchFailed;
}
}
fn queueJobsForDeps(f: *Fetch) RunError!void {
assert(f.job_queue.recursive);
// If the package does not have a build.zig.zon file then there are no dependencies.
const manifest = f.manifest orelse return;
const new_fetches, const prog_names = nf: {
const parent_arena = f.arena.allocator();
const gpa = f.arena.child_allocator;
const cache_root = f.job_queue.global_cache;
const dep_names = manifest.dependencies.keys();
const deps = manifest.dependencies.values();
// Grab the new tasks into a temporary buffer so we can unlock that mutex
// as fast as possible.
// This overallocates any fetches that get skipped by the `continue` in the
// loop below.
const new_fetches = try parent_arena.alloc(Fetch, deps.len);
const prog_names = try parent_arena.alloc([]const u8, deps.len);
var new_fetch_index: usize = 0;
f.job_queue.mutex.lock();
defer f.job_queue.mutex.unlock();
try f.job_queue.all_fetches.ensureUnusedCapacity(gpa, new_fetches.len);
try f.job_queue.table.ensureUnusedCapacity(gpa, @intCast(new_fetches.len));
// There are four cases here:
// * Correct hash is provided by manifest.
// - Hash map already has the entry, no need to add it again.
// * Incorrect hash is provided by manifest.
// - Hash mismatch error emitted; `queueJobsForDeps` is not called.
// * Hash is not provided by manifest.
// - Hash missing error emitted; `queueJobsForDeps` is not called.
// * path-based location is used without a hash.
// - Hash is added to the table based on the path alone before
// calling run(); no need to add it again.
for (dep_names, deps) |dep_name, dep| {
const new_fetch = &new_fetches[new_fetch_index];
const location: Location = switch (dep.location) {
.url => |url| .{ .remote = .{
.url = url,
.hash = h: {
const h = dep.hash orelse break :h null;
const digest_len = @typeInfo(Manifest.MultiHashHexDigest).Array.len;
const multihash_digest = h[0..digest_len].*;
const gop = f.job_queue.table.getOrPutAssumeCapacity(multihash_digest);
if (gop.found_existing) continue;
gop.value_ptr.* = new_fetch;
break :h multihash_digest;
},
} },
.path => |rel_path| l: {
// This might produce an invalid path, which is checked for
// at the beginning of run().
const new_root = try f.package_root.resolvePosix(parent_arena, rel_path);
const multihash_digest = relativePathDigest(new_root, cache_root);
const gop = f.job_queue.table.getOrPutAssumeCapacity(multihash_digest);
if (gop.found_existing) continue;
gop.value_ptr.* = new_fetch;
break :l .{ .relative_path = new_root };
},
};
prog_names[new_fetch_index] = dep_name;
new_fetch_index += 1;
f.job_queue.all_fetches.appendAssumeCapacity(new_fetch);
new_fetch.* = .{
.arena = std.heap.ArenaAllocator.init(gpa),
.location = location,
.location_tok = dep.location_tok,
.hash_tok = dep.hash_tok,
.parent_package_root = f.package_root,
.parent_manifest_ast = &f.manifest_ast,
.prog_node = f.prog_node,
.job_queue = f.job_queue,
.omit_missing_hash_error = false,
.allow_missing_paths_field = true,
.package_root = undefined,
.error_bundle = undefined,
.manifest = null,
.manifest_ast = undefined,
.actual_hash = undefined,
.has_build_zig = false,
.oom_flag = false,
.module = null,
};
}
// job_queue mutex is locked so this is OK.
f.prog_node.unprotected_estimated_total_items += new_fetch_index;
break :nf .{ new_fetches[0..new_fetch_index], prog_names[0..new_fetch_index] };
};
// Now it's time to give tasks to the thread pool.
const thread_pool = f.job_queue.thread_pool;
for (new_fetches, prog_names) |*new_fetch, prog_name| {
f.job_queue.wait_group.start();
thread_pool.spawn(workerRun, .{ new_fetch, prog_name }) catch |err| switch (err) {
error.OutOfMemory => {
new_fetch.oom_flag = true;
f.job_queue.wait_group.finish();
continue;
},
};
}
}
pub fn relativePathDigest(
pkg_root: Package.Path,
cache_root: Cache.Directory,
) Manifest.MultiHashHexDigest {
var hasher = Manifest.Hash.init(.{});
// This hash is a tuple of:
// * whether it relative to the global cache directory or to the root package
// * the relative file path from there to the build root of the package
hasher.update(if (pkg_root.root_dir.eql(cache_root))
&package_hash_prefix_cached
else
&package_hash_prefix_project);
hasher.update(pkg_root.sub_path);
return Manifest.hexDigest(hasher.finalResult());
}
pub fn workerRun(f: *Fetch, prog_name: []const u8) void {
defer f.job_queue.wait_group.finish();
var prog_node = f.prog_node.start(prog_name, 0);
defer prog_node.end();
prog_node.activate();
run(f) catch |err| switch (err) {
error.OutOfMemory => f.oom_flag = true,
error.FetchFailed => {
// Nothing to do because the errors are already reported in `error_bundle`,
// and a reference is kept to the `Fetch` task inside `all_fetches`.
},
};
}
fn srcLoc(
f: *Fetch,
tok: std.zig.Ast.TokenIndex,
) Allocator.Error!ErrorBundle.SourceLocationIndex {
const ast = f.parent_manifest_ast orelse return .none;
const eb = &f.error_bundle;
const token_starts = ast.tokens.items(.start);
const start_loc = ast.tokenLocation(0, tok);
const src_path = try eb.printString("{}" ++ Manifest.basename, .{f.parent_package_root});
const msg_off = 0;
return eb.addSourceLocation(.{
.src_path = src_path,
.span_start = token_starts[tok],
.span_end = @intCast(token_starts[tok] + ast.tokenSlice(tok).len),
.span_main = token_starts[tok] + msg_off,
.line = @intCast(start_loc.line),
.column = @intCast(start_loc.column),
.source_line = try eb.addString(ast.source[start_loc.line_start..start_loc.line_end]),
});
}
fn fail(f: *Fetch, msg_tok: std.zig.Ast.TokenIndex, msg_str: u32) RunError {
const eb = &f.error_bundle;
try eb.addRootErrorMessage(.{
.msg = msg_str,
.src_loc = try f.srcLoc(msg_tok),
});
return error.FetchFailed;
}
const Resource = union(enum) {
file: fs.File,
http_request: std.http.Client.Request,
git: Git,
dir: fs.IterableDir,
const Git = struct {
fetch_stream: git.Session.FetchStream,
want_oid: [git.oid_length]u8,
};
fn deinit(resource: *Resource) void {
switch (resource.*) {
.file => |*file| file.close(),
.http_request => |*req| req.deinit(),
.git => |*git_resource| git_resource.fetch_stream.deinit(),
.dir => |*dir| dir.close(),
}
resource.* = undefined;
}
fn reader(resource: *Resource) std.io.AnyReader {
return .{
.context = resource,
.readFn = read,
};
}
fn read(context: *const anyopaque, buffer: []u8) anyerror!usize {
const resource: *Resource = @constCast(@ptrCast(@alignCast(context)));
switch (resource.*) {
.file => |*f| return f.read(buffer),
.http_request => |*r| return r.read(buffer),
.git => |*g| return g.fetch_stream.read(buffer),
.dir => unreachable,
}
}
};
const FileType = enum {
tar,
@"tar.gz",
@"tar.xz",
@"tar.zst",
git_pack,
fn fromPath(file_path: []const u8) ?FileType {
if (ascii.endsWithIgnoreCase(file_path, ".tar")) return .tar;
if (ascii.endsWithIgnoreCase(file_path, ".tgz")) return .@"tar.gz";
if (ascii.endsWithIgnoreCase(file_path, ".tar.gz")) return .@"tar.gz";
if (ascii.endsWithIgnoreCase(file_path, ".txz")) return .@"tar.xz";
if (ascii.endsWithIgnoreCase(file_path, ".tar.xz")) return .@"tar.xz";
if (ascii.endsWithIgnoreCase(file_path, ".tzst")) return .@"tar.zst";
if (ascii.endsWithIgnoreCase(file_path, ".tar.zst")) return .@"tar.zst";
return null;
}
/// Parameter is a content-disposition header value.
fn fromContentDisposition(cd_header: []const u8) ?FileType {
const attach_end = ascii.indexOfIgnoreCase(cd_header, "attachment;") orelse
return null;
var value_start = ascii.indexOfIgnoreCasePos(cd_header, attach_end + 1, "filename") orelse
return null;
value_start += "filename".len;
if (cd_header[value_start] == '*') {
value_start += 1;
}
if (cd_header[value_start] != '=') return null;
value_start += 1;
var value_end = std.mem.indexOfPos(u8, cd_header, value_start, ";") orelse cd_header.len;
if (cd_header[value_end - 1] == '\"') {
value_end -= 1;
}
return fromPath(cd_header[value_start..value_end]);
}
test fromContentDisposition {
try std.testing.expectEqual(@as(?FileType, .@"tar.gz"), fromContentDisposition("attaChment; FILENAME=\"stuff.tar.gz\"; size=42"));
try std.testing.expectEqual(@as(?FileType, .@"tar.gz"), fromContentDisposition("attachment; filename*=\"stuff.tar.gz\""));
try std.testing.expectEqual(@as(?FileType, .@"tar.xz"), fromContentDisposition("ATTACHMENT; filename=\"stuff.tar.xz\""));
try std.testing.expectEqual(@as(?FileType, .@"tar.xz"), fromContentDisposition("attachment; FileName=\"stuff.tar.xz\""));
try std.testing.expectEqual(@as(?FileType, .@"tar.gz"), fromContentDisposition("attachment; FileName*=UTF-8\'\'xyz%2Fstuff.tar.gz"));
try std.testing.expect(fromContentDisposition("attachment FileName=\"stuff.tar.gz\"") == null);
try std.testing.expect(fromContentDisposition("attachment; FileName=\"stuff.tar\"") == null);
try std.testing.expect(fromContentDisposition("attachment; FileName\"stuff.gz\"") == null);
try std.testing.expect(fromContentDisposition("attachment; size=42") == null);
try std.testing.expect(fromContentDisposition("inline; size=42") == null);
try std.testing.expect(fromContentDisposition("FileName=\"stuff.tar.gz\"; attachment;") == null);
try std.testing.expect(fromContentDisposition("FileName=\"stuff.tar.gz\";") == null);
}
};
fn initResource(f: *Fetch, uri: std.Uri) RunError!Resource {
const gpa = f.arena.child_allocator;
const arena = f.arena.allocator();
const eb = &f.error_bundle;
if (ascii.eqlIgnoreCase(uri.scheme, "file")) return .{
.file = f.parent_package_root.openFile(uri.path, .{}) catch |err| {
return f.fail(f.location_tok, try eb.printString("unable to open '{}{s}': {s}", .{
f.parent_package_root, uri.path, @errorName(err),
}));
},
};
const http_client = f.job_queue.http_client;
if (ascii.eqlIgnoreCase(uri.scheme, "http") or
ascii.eqlIgnoreCase(uri.scheme, "https"))
{
var h = std.http.Headers{ .allocator = gpa };
defer h.deinit();
var req = http_client.open(.GET, uri, h, .{}) catch |err| {
return f.fail(f.location_tok, try eb.printString(
"unable to connect to server: {s}",
.{@errorName(err)},
));
};
errdefer req.deinit(); // releases more than memory
req.send(.{}) catch |err| {
return f.fail(f.location_tok, try eb.printString(
"HTTP request failed: {s}",
.{@errorName(err)},
));
};
req.wait() catch |err| {
return f.fail(f.location_tok, try eb.printString(
"invalid HTTP response: {s}",
.{@errorName(err)},
));
};
if (req.response.status != .ok) {
return f.fail(f.location_tok, try eb.printString(
"bad HTTP response code: '{d} {s}'",
.{ @intFromEnum(req.response.status), req.response.status.phrase() orelse "" },
));
}
return .{ .http_request = req };
}
if (ascii.eqlIgnoreCase(uri.scheme, "git+http") or
ascii.eqlIgnoreCase(uri.scheme, "git+https"))
{
var transport_uri = uri;
transport_uri.scheme = uri.scheme["git+".len..];
var redirect_uri: []u8 = undefined;
var session: git.Session = .{ .transport = http_client, .uri = transport_uri };
session.discoverCapabilities(gpa, &redirect_uri) catch |err| switch (err) {
error.Redirected => {
defer gpa.free(redirect_uri);
return f.fail(f.location_tok, try eb.printString(
"repository moved to {s}",
.{redirect_uri},
));
},
else => |e| {
return f.fail(f.location_tok, try eb.printString(
"unable to discover remote git server capabilities: {s}",
.{@errorName(e)},
));
},
};
const want_oid = want_oid: {
const want_ref = uri.fragment orelse "HEAD";
if (git.parseOid(want_ref)) |oid| break :want_oid oid else |_| {}
const want_ref_head = try std.fmt.allocPrint(arena, "refs/heads/{s}", .{want_ref});
const want_ref_tag = try std.fmt.allocPrint(arena, "refs/tags/{s}", .{want_ref});
var ref_iterator = session.listRefs(gpa, .{
.ref_prefixes = &.{ want_ref, want_ref_head, want_ref_tag },
.include_peeled = true,
}) catch |err| {
return f.fail(f.location_tok, try eb.printString(
"unable to list refs: {s}",
.{@errorName(err)},
));
};
defer ref_iterator.deinit();
while (ref_iterator.next() catch |err| {
return f.fail(f.location_tok, try eb.printString(
"unable to iterate refs: {s}",
.{@errorName(err)},
));
}) |ref| {
if (std.mem.eql(u8, ref.name, want_ref) or
std.mem.eql(u8, ref.name, want_ref_head) or
std.mem.eql(u8, ref.name, want_ref_tag))
{
break :want_oid ref.peeled orelse ref.oid;
}
}
return f.fail(f.location_tok, try eb.printString("ref not found: {s}", .{want_ref}));
};
if (uri.fragment == null) {
const notes_len = 1;
try eb.addRootErrorMessage(.{
.msg = try eb.addString("url field is missing an explicit ref"),
.src_loc = try f.srcLoc(f.location_tok),
.notes_len = notes_len,
});
const notes_start = try eb.reserveNotes(notes_len);
eb.extra.items[notes_start] = @intFromEnum(try eb.addErrorMessage(.{
.msg = try eb.printString("try .url = \"{+/}#{}\",", .{
uri, std.fmt.fmtSliceHexLower(&want_oid),
}),
}));
return error.FetchFailed;
}
var want_oid_buf: [git.fmt_oid_length]u8 = undefined;
_ = std.fmt.bufPrint(&want_oid_buf, "{}", .{
std.fmt.fmtSliceHexLower(&want_oid),
}) catch unreachable;
var fetch_stream = session.fetch(gpa, &.{&want_oid_buf}) catch |err| {
return f.fail(f.location_tok, try eb.printString(
"unable to create fetch stream: {s}",
.{@errorName(err)},
));
};
errdefer fetch_stream.deinit();
return .{ .git = .{
.fetch_stream = fetch_stream,
.want_oid = want_oid,
} };
}
return f.fail(f.location_tok, try eb.printString(
"unsupported URL scheme: {s}",
.{uri.scheme},
));
}
fn unpackResource(
f: *Fetch,
resource: *Resource,
uri_path: []const u8,
tmp_directory: Cache.Directory,
) RunError!void {
const eb = &f.error_bundle;
const file_type = switch (resource.*) {
.file => FileType.fromPath(uri_path) orelse
return f.fail(f.location_tok, try eb.printString("unknown file type: '{s}'", .{uri_path})),
.http_request => |req| ft: {
// Content-Type takes first precedence.
const content_type = req.response.headers.getFirstValue("Content-Type") orelse
return f.fail(f.location_tok, try eb.addString("missing 'Content-Type' header"));
if (ascii.eqlIgnoreCase(content_type, "application/x-tar"))
break :ft .tar;
if (ascii.eqlIgnoreCase(content_type, "application/gzip") or
ascii.eqlIgnoreCase(content_type, "application/x-gzip") or
ascii.eqlIgnoreCase(content_type, "application/tar+gzip"))
{
break :ft .@"tar.gz";
}
if (ascii.eqlIgnoreCase(content_type, "application/x-xz"))
break :ft .@"tar.xz";
if (ascii.eqlIgnoreCase(content_type, "application/zstd"))
break :ft .@"tar.zst";
if (!ascii.eqlIgnoreCase(content_type, "application/octet-stream")) {
return f.fail(f.location_tok, try eb.printString(
"unrecognized 'Content-Type' header: '{s}'",
.{content_type},
));
}
// Next, the filename from 'content-disposition: attachment' takes precedence.
if (req.response.headers.getFirstValue("Content-Disposition")) |cd_header| {
break :ft FileType.fromContentDisposition(cd_header) orelse {
return f.fail(f.location_tok, try eb.printString(