From bb00b11418577545d5e877d5eb647a4079762270 Mon Sep 17 00:00:00 2001 From: Andrew Kelley Date: Sun, 21 Jul 2024 18:12:22 -0700 Subject: [PATCH] initial support for integrated fuzzing * Add the `-ffuzz` and `-fno-fuzz` CLI arguments. * Detect fuzz testing flags from zig cc. * Set the correct clang flags when fuzz testing is requested. It can be combined with TSAN and UBSAN. * Compilation: build fuzzer library when needed which is currently an empty zig file. * Add optforfuzzing to every function in the llvm backend for modules that have requested fuzzing. * In ZigLLVMTargetMachineEmitToFile, add the optimization passes for sanitizer coverage. * std.mem.eql uses a naive implementation optimized for fuzzing when builtin.fuzz is true. Tracked by #20702 --- lib/fuzzer.zig | 0 lib/std/mem.zig | 12 ++--- src/Builtin.zig | 3 ++ src/Compilation.zig | 93 ++++++++++++++++++++++------------- src/Compilation/Config.zig | 3 ++ src/Package/Module.zig | 13 +++++ src/codegen/llvm.zig | 6 +++ src/codegen/llvm/bindings.zig | 1 + src/main.zig | 34 ++++++++++--- src/zig_llvm.cpp | 19 ++++--- src/zig_llvm.h | 2 +- 11 files changed, 133 insertions(+), 53 deletions(-) create mode 100644 lib/fuzzer.zig diff --git a/lib/fuzzer.zig b/lib/fuzzer.zig new file mode 100644 index 000000000000..e69de29bb2d1 diff --git a/lib/std/mem.zig b/lib/std/mem.zig index 20d68b893791..4cd5ee841d27 100644 --- a/lib/std/mem.zig +++ b/lib/std/mem.zig @@ -636,18 +636,20 @@ test lessThan { try testing.expect(lessThan(u8, "", "a")); } -const backend_can_use_eql_bytes = switch (builtin.zig_backend) { +const eqlBytes_allowed = switch (builtin.zig_backend) { // The SPIR-V backend does not support the optimized path yet. .stage2_spirv64 => false, // The RISC-V does not support vectors. .stage2_riscv64 => false, - else => true, + // The naive memory comparison implementation is more useful for fuzzers to + // find interesting inputs. + else => !builtin.fuzz, }; /// Compares two slices and returns whether they are equal. pub fn eql(comptime T: type, a: []const T, b: []const T) bool { if (@sizeOf(T) == 0) return true; - if (!@inComptime() and std.meta.hasUniqueRepresentation(T) and backend_can_use_eql_bytes) return eqlBytes(sliceAsBytes(a), sliceAsBytes(b)); + if (!@inComptime() and std.meta.hasUniqueRepresentation(T) and eqlBytes_allowed) return eqlBytes(sliceAsBytes(a), sliceAsBytes(b)); if (a.len != b.len) return false; if (a.len == 0 or a.ptr == b.ptr) return true; @@ -660,9 +662,7 @@ pub fn eql(comptime T: type, a: []const T, b: []const T) bool { /// std.mem.eql heavily optimized for slices of bytes. fn eqlBytes(a: []const u8, b: []const u8) bool { - if (!backend_can_use_eql_bytes) { - return eql(u8, a, b); - } + comptime assert(eqlBytes_allowed); if (a.len != b.len) return false; if (a.len == 0 or a.ptr == b.ptr) return true; diff --git a/src/Builtin.zig b/src/Builtin.zig index dbdd746bcd3c..6e573d843f5f 100644 --- a/src/Builtin.zig +++ b/src/Builtin.zig @@ -10,6 +10,7 @@ optimize_mode: std.builtin.OptimizeMode, error_tracing: bool, valgrind: bool, sanitize_thread: bool, +fuzz: bool, pic: bool, pie: bool, strip: bool, @@ -185,6 +186,7 @@ pub fn append(opts: @This(), buffer: *std.ArrayList(u8)) Allocator.Error!void { \\pub const have_error_return_tracing = {}; \\pub const valgrind_support = {}; \\pub const sanitize_thread = {}; + \\pub const fuzz = {}; \\pub const position_independent_code = {}; \\pub const position_independent_executable = {}; \\pub const strip_debug_info = {}; @@ -199,6 +201,7 @@ pub fn append(opts: @This(), buffer: *std.ArrayList(u8)) Allocator.Error!void { opts.error_tracing, opts.valgrind, opts.sanitize_thread, + opts.fuzz, opts.pic, opts.pie, opts.strip, diff --git a/src/Compilation.zig b/src/Compilation.zig index 98d2fd552a10..8ef8adfa5303 100644 --- a/src/Compilation.zig +++ b/src/Compilation.zig @@ -190,6 +190,7 @@ debug_compile_errors: bool, incremental: bool, job_queued_compiler_rt_lib: bool = false, job_queued_compiler_rt_obj: bool = false, +job_queued_fuzzer_lib: bool = false, job_queued_update_builtin_zig: bool, alloc_failure_occurred: bool = false, formatted_panics: bool = false, @@ -231,6 +232,10 @@ compiler_rt_lib: ?CRTFile = null, /// Populated when we build the compiler_rt_obj object. A Job to build this is indicated /// by setting `job_queued_compiler_rt_obj` and resolved before calling linker.flush(). compiler_rt_obj: ?CRTFile = null, +/// Populated when we build the libfuzzer static library. A Job to build this +/// is indicated by setting `job_queued_fuzzer_lib` and resolved before +/// calling linker.flush(). +fuzzer_lib: ?CRTFile = null, glibc_so_files: ?glibc.BuiltSharedObjects = null, wasi_emulated_libs: []const wasi_libc.CRTFile, @@ -799,6 +804,7 @@ pub const MiscTask = enum { libcxx, libcxxabi, libtsan, + libfuzzer, wasi_libc_crt_file, compiler_rt, zig_libc, @@ -887,6 +893,7 @@ pub const cache_helpers = struct { hh.add(mod.red_zone); hh.add(mod.sanitize_c); hh.add(mod.sanitize_thread); + hh.add(mod.fuzz); hh.add(mod.unwind_tables); hh.add(mod.structured_cfg); hh.addListOfBytes(mod.cc_argv); @@ -1302,6 +1309,7 @@ pub fn create(gpa: Allocator, arena: Allocator, options: CreateOptions) !*Compil const any_unwind_tables = options.config.any_unwind_tables or options.root_mod.unwind_tables; const any_non_single_threaded = options.config.any_non_single_threaded or !options.root_mod.single_threaded; const any_sanitize_thread = options.config.any_sanitize_thread or options.root_mod.sanitize_thread; + const any_fuzz = options.config.any_fuzz or options.root_mod.fuzz; const link_eh_frame_hdr = options.link_eh_frame_hdr or any_unwind_tables; const build_id = options.build_id orelse .none; @@ -1563,6 +1571,7 @@ pub fn create(gpa: Allocator, arena: Allocator, options: CreateOptions) !*Compil comp.config.any_unwind_tables = any_unwind_tables; comp.config.any_non_single_threaded = any_non_single_threaded; comp.config.any_sanitize_thread = any_sanitize_thread; + comp.config.any_fuzz = any_fuzz; const lf_open_opts: link.File.OpenOptions = .{ .linker_script = options.linker_script, @@ -1908,6 +1917,13 @@ pub fn create(gpa: Allocator, arena: Allocator, options: CreateOptions) !*Compil } } + if (comp.config.any_fuzz and capable_of_building_compiler_rt) { + if (is_exe_or_dyn_lib) { + log.debug("queuing a job to build libfuzzer", .{}); + comp.job_queued_fuzzer_lib = true; + } + } + if (!comp.skip_linker_dependencies and is_exe_or_dyn_lib and !comp.config.link_libc and capable_of_building_zig_libc) { @@ -1956,6 +1972,9 @@ pub fn destroy(comp: *Compilation) void { if (comp.compiler_rt_obj) |*crt_file| { crt_file.deinit(gpa); } + if (comp.fuzzer_lib) |*crt_file| { + crt_file.deinit(gpa); + } if (comp.libc_static_lib) |*crt_file| { crt_file.deinit(gpa); } @@ -2721,6 +2740,7 @@ pub fn emitLlvmObject( .is_small = comp.root_mod.optimize_mode == .ReleaseSmall, .time_report = comp.time_report, .sanitize_thread = comp.config.any_sanitize_thread, + .fuzz = comp.config.any_fuzz, .lto = comp.config.lto, }); } @@ -3641,15 +3661,9 @@ fn performAllTheWorkInner( break; } - if (comp.job_queued_compiler_rt_lib) { - comp.job_queued_compiler_rt_lib = false; - buildCompilerRtOneShot(comp, .Lib, &comp.compiler_rt_lib, main_progress_node); - } - - if (comp.job_queued_compiler_rt_obj) { - comp.job_queued_compiler_rt_obj = false; - buildCompilerRtOneShot(comp, .Obj, &comp.compiler_rt_obj, main_progress_node); - } + buildCompilerRtOneShot(comp, &comp.job_queued_compiler_rt_lib, "compiler_rt.zig", .compiler_rt, .Lib, &comp.compiler_rt_lib, main_progress_node); + buildCompilerRtOneShot(comp, &comp.job_queued_compiler_rt_obj, "compiler_rt.zig", .compiler_rt, .Obj, &comp.compiler_rt_obj, main_progress_node); + buildCompilerRtOneShot(comp, &comp.job_queued_fuzzer_lib, "fuzzer.zig", .libfuzzer, .Lib, &comp.fuzzer_lib, main_progress_node); } const JobError = Allocator.Error; @@ -4655,23 +4669,27 @@ fn workerUpdateWin32Resource( fn buildCompilerRtOneShot( comp: *Compilation, + job_queued: *bool, + root_source_name: []const u8, + misc_task: MiscTask, output_mode: std.builtin.OutputMode, out: *?CRTFile, prog_node: std.Progress.Node, ) void { + if (!job_queued.*) return; + job_queued.* = false; + comp.buildOutputFromZig( - "compiler_rt.zig", + root_source_name, output_mode, out, - .compiler_rt, + misc_task, prog_node, ) catch |err| switch (err) { error.SubCompilationFailed => return, // error reported already - else => comp.lockAndSetMiscFailure( - .compiler_rt, - "unable to build compiler_rt: {s}", - .{@errorName(err)}, - ), + else => comp.lockAndSetMiscFailure(misc_task, "unable to build {s}: {s}", .{ + @tagName(misc_task), @errorName(err), + }), }; } @@ -5602,23 +5620,32 @@ pub fn addCCArgs( try argv.append("-mthumb"); } - if (mod.sanitize_c and !mod.sanitize_thread) { - try argv.append("-fsanitize=undefined"); - try argv.append("-fsanitize-trap=undefined"); - // It is very common, and well-defined, for a pointer on one side of a C ABI - // to have a different but compatible element type. Examples include: - // `char*` vs `uint8_t*` on a system with 8-bit bytes - // `const char*` vs `char*` - // `char*` vs `unsigned char*` - // Without this flag, Clang would invoke UBSAN when such an extern - // function was called. - try argv.append("-fno-sanitize=function"); - } else if (mod.sanitize_c and mod.sanitize_thread) { - try argv.append("-fsanitize=undefined,thread"); - try argv.append("-fsanitize-trap=undefined"); - try argv.append("-fno-sanitize=function"); - } else if (!mod.sanitize_c and mod.sanitize_thread) { - try argv.append("-fsanitize=thread"); + { + var san_arg: std.ArrayListUnmanaged(u8) = .{}; + const prefix = "-fsanitize="; + if (mod.sanitize_c) { + if (san_arg.items.len == 0) try san_arg.appendSlice(arena, prefix); + try san_arg.appendSlice(arena, "undefined,"); + try argv.append("-fsanitize-trap=undefined"); + // It is very common, and well-defined, for a pointer on one side of a C ABI + // to have a different but compatible element type. Examples include: + // `char*` vs `uint8_t*` on a system with 8-bit bytes + // `const char*` vs `char*` + // `char*` vs `unsigned char*` + // Without this flag, Clang would invoke UBSAN when such an extern + // function was called. + try argv.append("-fno-sanitize=function"); + } + if (mod.sanitize_thread) { + if (san_arg.items.len == 0) try san_arg.appendSlice(arena, prefix); + try san_arg.appendSlice(arena, "thread,"); + } + if (mod.fuzz) { + if (san_arg.items.len == 0) try san_arg.appendSlice(arena, prefix); + try san_arg.appendSlice(arena, "fuzzer-no-link,"); + } + // Chop off the trailing comma and append to argv. + if (san_arg.popOrNull()) |_| try argv.append(san_arg.items); } if (mod.red_zone) { diff --git a/src/Compilation/Config.zig b/src/Compilation/Config.zig index 6e28f5028c02..63bd4c6fa203 100644 --- a/src/Compilation/Config.zig +++ b/src/Compilation/Config.zig @@ -32,6 +32,7 @@ any_non_single_threaded: bool, /// per-Module setting. any_error_tracing: bool, any_sanitize_thread: bool, +any_fuzz: bool, pie: bool, /// If this is true then linker code is responsible for making an LLVM IR /// Module, outputting it to an object file, and then linking that together @@ -82,6 +83,7 @@ pub const Options = struct { ensure_libcpp_on_non_freestanding: bool = false, any_non_single_threaded: bool = false, any_sanitize_thread: bool = false, + any_fuzz: bool = false, any_unwind_tables: bool = false, any_dyn_libs: bool = false, any_c_source_files: bool = false, @@ -486,6 +488,7 @@ pub fn resolve(options: Options) ResolveError!Config { .any_non_single_threaded = options.any_non_single_threaded, .any_error_tracing = any_error_tracing, .any_sanitize_thread = options.any_sanitize_thread, + .any_fuzz = options.any_fuzz, .root_error_tracing = root_error_tracing, .pie = pie, .lto = lto, diff --git a/src/Package/Module.zig b/src/Package/Module.zig index 61b7d2ac4dc8..02d9921016d4 100644 --- a/src/Package/Module.zig +++ b/src/Package/Module.zig @@ -26,6 +26,7 @@ stack_protector: u32, red_zone: bool, sanitize_c: bool, sanitize_thread: bool, +fuzz: bool, unwind_tables: bool, cc_argv: []const []const u8, /// (SPIR-V) whether to generate a structured control flow graph or not @@ -92,6 +93,7 @@ pub const CreateOptions = struct { unwind_tables: ?bool = null, sanitize_c: ?bool = null, sanitize_thread: ?bool = null, + fuzz: ?bool = null, structured_cfg: ?bool = null, }; }; @@ -106,6 +108,7 @@ pub const ResolvedTarget = struct { /// At least one of `parent` and `resolved_target` must be non-null. pub fn create(arena: Allocator, options: CreateOptions) !*Package.Module { if (options.inherited.sanitize_thread == true) assert(options.global.any_sanitize_thread); + if (options.inherited.fuzz == true) assert(options.global.any_fuzz); if (options.inherited.single_threaded == false) assert(options.global.any_non_single_threaded); if (options.inherited.unwind_tables == true) assert(options.global.any_unwind_tables); if (options.inherited.error_tracing == true) assert(options.global.any_error_tracing); @@ -210,6 +213,12 @@ pub fn create(arena: Allocator, options: CreateOptions) !*Package.Module { break :b false; }; + const fuzz = b: { + if (options.inherited.fuzz) |x| break :b x; + if (options.parent) |p| break :b p.fuzz; + break :b false; + }; + const code_model = b: { if (options.inherited.code_model) |x| break :b x; if (options.parent) |p| break :b p.code_model; @@ -337,6 +346,7 @@ pub fn create(arena: Allocator, options: CreateOptions) !*Package.Module { .red_zone = red_zone, .sanitize_c = sanitize_c, .sanitize_thread = sanitize_thread, + .fuzz = fuzz, .unwind_tables = unwind_tables, .cc_argv = options.cc_argv, .structured_cfg = structured_cfg, @@ -359,6 +369,7 @@ pub fn create(arena: Allocator, options: CreateOptions) !*Package.Module { .error_tracing = error_tracing, .valgrind = valgrind, .sanitize_thread = sanitize_thread, + .fuzz = fuzz, .pic = pic, .pie = options.global.pie, .strip = strip, @@ -427,6 +438,7 @@ pub fn create(arena: Allocator, options: CreateOptions) !*Package.Module { .red_zone = red_zone, .sanitize_c = sanitize_c, .sanitize_thread = sanitize_thread, + .fuzz = fuzz, .unwind_tables = unwind_tables, .cc_argv = &.{}, .structured_cfg = structured_cfg, @@ -485,6 +497,7 @@ pub fn createLimited(gpa: Allocator, options: LimitedOptions) Allocator.Error!*P .red_zone = undefined, .sanitize_c = undefined, .sanitize_thread = undefined, + .fuzz = undefined, .unwind_tables = undefined, .cc_argv = undefined, .structured_cfg = undefined, diff --git a/src/codegen/llvm.zig b/src/codegen/llvm.zig index a12689761464..1d63d66b4496 100644 --- a/src/codegen/llvm.zig +++ b/src/codegen/llvm.zig @@ -1121,6 +1121,7 @@ pub const Object = struct { is_small: bool, time_report: bool, sanitize_thread: bool, + fuzz: bool, lto: bool, }; @@ -1307,6 +1308,7 @@ pub const Object = struct { options.is_small, options.time_report, options.sanitize_thread, + options.fuzz, options.lto, null, emit_bin_path, @@ -1331,6 +1333,7 @@ pub const Object = struct { options.is_small, options.time_report, options.sanitize_thread, + options.fuzz, options.lto, options.asm_path, emit_bin_path, @@ -3002,6 +3005,9 @@ pub const Object = struct { if (owner_mod.sanitize_thread) { try attributes.addFnAttr(.sanitize_thread, &o.builder); } + if (owner_mod.fuzz) { + try attributes.addFnAttr(.optforfuzzing, &o.builder); + } const target = owner_mod.resolved_target.result; if (target.cpu.model.llvm_name) |s| { try attributes.addFnAttr(.{ .string = .{ diff --git a/src/codegen/llvm/bindings.zig b/src/codegen/llvm/bindings.zig index a32f1d74bc53..f49214b6608c 100644 --- a/src/codegen/llvm/bindings.zig +++ b/src/codegen/llvm/bindings.zig @@ -93,6 +93,7 @@ pub const TargetMachine = opaque { is_small: bool, time_report: bool, tsan: bool, + sancov: bool, lto: bool, asm_filename: ?[*:0]const u8, bin_filename: ?[*:0]const u8, diff --git a/src/main.zig b/src/main.zig index 561dd2f28e2a..1d9d8d94f9a6 100644 --- a/src/main.zig +++ b/src/main.zig @@ -499,12 +499,14 @@ const usage_build_generic = \\ -fno-stack-check Disable stack probing in safe builds \\ -fstack-protector Enable stack protection in unsafe builds \\ -fno-stack-protector Disable stack protection in safe builds - \\ -fsanitize-c Enable C undefined behavior detection in unsafe builds - \\ -fno-sanitize-c Disable C undefined behavior detection in safe builds \\ -fvalgrind Include valgrind client requests in release builds \\ -fno-valgrind Omit valgrind client requests in debug builds + \\ -fsanitize-c Enable C undefined behavior detection in unsafe builds + \\ -fno-sanitize-c Disable C undefined behavior detection in safe builds \\ -fsanitize-thread Enable Thread Sanitizer \\ -fno-sanitize-thread Disable Thread Sanitizer + \\ -ffuzz Enable fuzz testing instrumentation + \\ -fno-fuzz Disable fuzz testing instrumentation \\ -funwind-tables Always produce unwind table entries for all functions \\ -fno-unwind-tables Never produce unwind table entries \\ -ferror-tracing Enable error tracing in ReleaseFast mode @@ -1429,6 +1431,10 @@ fn buildOutputType( mod_opts.sanitize_thread = true; } else if (mem.eql(u8, arg, "-fno-sanitize-thread")) { mod_opts.sanitize_thread = false; + } else if (mem.eql(u8, arg, "-ffuzz")) { + mod_opts.fuzz = true; + } else if (mem.eql(u8, arg, "-fno-fuzz")) { + mod_opts.fuzz = false; } else if (mem.eql(u8, arg, "-fllvm")) { create_module.opts.use_llvm = true; } else if (mem.eql(u8, arg, "-fno-llvm")) { @@ -2060,11 +2066,21 @@ fn buildOutputType( create_module.opts.debug_format = .{ .dwarf = .@"64" }; }, .sanitize => { - if (mem.eql(u8, it.only_arg, "undefined")) { - mod_opts.sanitize_c = true; - } else if (mem.eql(u8, it.only_arg, "thread")) { - mod_opts.sanitize_thread = true; - } else { + var san_it = std.mem.splitScalar(u8, it.only_arg, ','); + var recognized_any = false; + while (san_it.next()) |sub_arg| { + if (mem.eql(u8, sub_arg, "undefined")) { + mod_opts.sanitize_c = true; + recognized_any = true; + } else if (mem.eql(u8, sub_arg, "thread")) { + mod_opts.sanitize_thread = true; + recognized_any = true; + } else if (mem.eql(u8, sub_arg, "fuzzer") or mem.eql(u8, sub_arg, "fuzzer-no-link")) { + mod_opts.fuzz = true; + recognized_any = true; + } + } + if (!recognized_any) { try cc_argv.appendSlice(arena, it.other_args); } }, @@ -2642,6 +2658,8 @@ fn buildOutputType( create_module.opts.any_non_single_threaded = true; if (mod_opts.sanitize_thread == true) create_module.opts.any_sanitize_thread = true; + if (mod_opts.fuzz == true) + create_module.opts.any_fuzz = true; if (mod_opts.unwind_tables == true) create_module.opts.any_unwind_tables = true; if (mod_opts.strip == false) @@ -7491,6 +7509,8 @@ fn handleModArg( create_module.opts.any_non_single_threaded = true; if (mod_opts.sanitize_thread == true) create_module.opts.any_sanitize_thread = true; + if (mod_opts.fuzz == true) + create_module.opts.any_fuzz = true; if (mod_opts.unwind_tables == true) create_module.opts.any_unwind_tables = true; if (mod_opts.strip == false) diff --git a/src/zig_llvm.cpp b/src/zig_llvm.cpp index 72f1026617fa..86861ef42723 100644 --- a/src/zig_llvm.cpp +++ b/src/zig_llvm.cpp @@ -54,6 +54,7 @@ #include #include #include +#include #include #include #include @@ -188,9 +189,10 @@ struct TimeTracerRAII { }; } // end anonymous namespace + bool ZigLLVMTargetMachineEmitToFile(LLVMTargetMachineRef targ_machine_ref, LLVMModuleRef module_ref, char **error_message, bool is_debug, - bool is_small, bool time_report, bool tsan, bool lto, + bool is_small, bool time_report, bool tsan, bool sancov, bool lto, const char *asm_filename, const char *bin_filename, const char *llvm_ir_filename, const char *bitcode_filename) { @@ -303,13 +305,18 @@ bool ZigLLVMTargetMachineEmitToFile(LLVMTargetMachineRef targ_machine_ref, LLVMM }); } - // Thread sanitizer - if (tsan) { - pass_builder.registerOptimizerLastEPCallback([](ModulePassManager &module_pm, OptimizationLevel level) { + pass_builder.registerOptimizerLastEPCallback([&](ModulePassManager &module_pm, OptimizationLevel level) { + // Code coverage instrumentation. + if (sancov) { + module_pm.addPass(SanitizerCoveragePass()); + } + + // Thread sanitizer + if (tsan) { module_pm.addPass(ModuleThreadSanitizerPass()); module_pm.addPass(createModuleToFunctionPassAdaptor(ThreadSanitizerPass())); - }); - } + } + }); ModulePassManager module_pm; OptimizationLevel opt_level; diff --git a/src/zig_llvm.h b/src/zig_llvm.h index 89b53a802f59..7ac632fe02f1 100644 --- a/src/zig_llvm.h +++ b/src/zig_llvm.h @@ -26,7 +26,7 @@ ZIG_EXTERN_C bool ZigLLVMTargetMachineEmitToFile(LLVMTargetMachineRef targ_machine_ref, LLVMModuleRef module_ref, char **error_message, bool is_debug, - bool is_small, bool time_report, bool tsan, bool lto, + bool is_small, bool time_report, bool tsan, bool sancov, bool lto, const char *asm_filename, const char *bin_filename, const char *llvm_ir_filename, const char *bitcode_filename);