-
Notifications
You must be signed in to change notification settings - Fork 12.6k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
-fsanitize=alignment: check memcpy/memmove arguments #67766
Conversation
@llvm/pr-subscribers-compiler-rt-sanitizer @llvm/pr-subscribers-clang ChangesSimilar to https://reviews.llvm.org/D9673, emit -fsanitize=alignment check for
For a reference parameter, we emit a -fsanitize=alignment check as well, which
Technically builtin memset functions can be checked for -fsanitize=alignment as Full diff: https://github.com/llvm/llvm-project/pull/67766.diff 3 Files Affected:
diff --git a/clang/include/clang/Basic/Sanitizers.h b/clang/include/clang/Basic/Sanitizers.h
index 4659e45c7883419..16d241cbf809bbe 100644
--- a/clang/include/clang/Basic/Sanitizers.h
+++ b/clang/include/clang/Basic/Sanitizers.h
@@ -170,6 +170,8 @@ struct SanitizerSet {
Mask = Value ? (Mask | K) : (Mask & ~K);
}
+ void set(SanitizerMask K) { Mask = K; }
+
/// Disable the sanitizers specified in \p K.
void clear(SanitizerMask K = SanitizerKind::All) { Mask &= ~K; }
diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp
index b0fd38408806566..2102d0aaff9d620 100644
--- a/clang/lib/CodeGen/CGBuiltin.cpp
+++ b/clang/lib/CodeGen/CGBuiltin.cpp
@@ -2730,6 +2730,19 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
}
}
+ auto EmitArgCheck = [&](TypeCheckKind Kind, Address A, const Expr *Arg,
+ unsigned ParmNum) {
+ Value *Val = A.getPointer();
+ EmitNonNullArgCheck(RValue::get(Val), Arg->getType(), Arg->getExprLoc(), FD,
+ ParmNum);
+
+ SanitizerSet SkippedChecks;
+ SkippedChecks.set(SanitizerKind::All);
+ SkippedChecks.clear(SanitizerKind::Alignment);
+ EmitTypeCheck(Kind, Arg->getExprLoc(), Val, Arg->getType(),
+ A.getAlignment(), SkippedChecks);
+ };
+
switch (BuiltinIDIfNoAsmLabel) {
default: break;
case Builtin::BI__builtin___CFStringMakeConstantString:
@@ -3720,10 +3733,8 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
Address Dest = EmitPointerWithAlignment(E->getArg(0));
Address Src = EmitPointerWithAlignment(E->getArg(1));
Value *SizeVal = EmitScalarExpr(E->getArg(2));
- EmitNonNullArgCheck(RValue::get(Dest.getPointer()), E->getArg(0)->getType(),
- E->getArg(0)->getExprLoc(), FD, 0);
- EmitNonNullArgCheck(RValue::get(Src.getPointer()), E->getArg(1)->getType(),
- E->getArg(1)->getExprLoc(), FD, 1);
+ EmitArgCheck(TCK_Store, Dest, E->getArg(0), 0);
+ EmitArgCheck(TCK_Load, Src, E->getArg(1), 1);
Builder.CreateMemCpy(Dest, Src, SizeVal, false);
if (BuiltinID == Builtin::BImempcpy ||
BuiltinID == Builtin::BI__builtin_mempcpy)
@@ -3738,10 +3749,8 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
Address Src = EmitPointerWithAlignment(E->getArg(1));
uint64_t Size =
E->getArg(2)->EvaluateKnownConstInt(getContext()).getZExtValue();
- EmitNonNullArgCheck(RValue::get(Dest.getPointer()), E->getArg(0)->getType(),
- E->getArg(0)->getExprLoc(), FD, 0);
- EmitNonNullArgCheck(RValue::get(Src.getPointer()), E->getArg(1)->getType(),
- E->getArg(1)->getExprLoc(), FD, 1);
+ EmitArgCheck(TCK_Store, Dest, E->getArg(0), 0);
+ EmitArgCheck(TCK_Load, Src, E->getArg(1), 1);
Builder.CreateMemCpyInline(Dest, Src, Size);
return RValue::get(nullptr);
}
@@ -3798,10 +3807,8 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
Address Dest = EmitPointerWithAlignment(E->getArg(0));
Address Src = EmitPointerWithAlignment(E->getArg(1));
Value *SizeVal = EmitScalarExpr(E->getArg(2));
- EmitNonNullArgCheck(RValue::get(Dest.getPointer()), E->getArg(0)->getType(),
- E->getArg(0)->getExprLoc(), FD, 0);
- EmitNonNullArgCheck(RValue::get(Src.getPointer()), E->getArg(1)->getType(),
- E->getArg(1)->getExprLoc(), FD, 1);
+ EmitArgCheck(TCK_Store, Dest, E->getArg(0), 0);
+ EmitArgCheck(TCK_Load, Src, E->getArg(1), 1);
Builder.CreateMemMove(Dest, Src, SizeVal, false);
return RValue::get(Dest.getPointer());
}
diff --git a/clang/test/CodeGen/catch-undef-behavior.c b/clang/test/CodeGen/catch-undef-behavior.c
index ca0df0f002f8912..3956a475f319ea9 100644
--- a/clang/test/CodeGen/catch-undef-behavior.c
+++ b/clang/test/CodeGen/catch-undef-behavior.c
@@ -354,21 +354,63 @@ void call_decl_nonnull(int *a) {
decl_nonnull(a);
}
-extern void *memcpy (void *, const void *, unsigned) __attribute__((nonnull(1, 2)));
+extern void *memcpy(void *, const void *, unsigned long) __attribute__((nonnull(1, 2)));
// CHECK-COMMON-LABEL: @call_memcpy_nonnull
void call_memcpy_nonnull(void *p, void *q, int sz) {
// CHECK-COMMON: icmp ne ptr {{.*}}, null
// CHECK-UBSAN: call void @__ubsan_handle_nonnull_arg
// CHECK-TRAP: call void @llvm.ubsantrap(i8 16)
+ // CHECK-COMMON-NOT: call
// CHECK-COMMON: icmp ne ptr {{.*}}, null
// CHECK-UBSAN: call void @__ubsan_handle_nonnull_arg
// CHECK-TRAP: call void @llvm.ubsantrap(i8 16)
+ // CHECK-COMMON-NOT: call
+
+ // CHECK-COMMON: call void @llvm.memcpy.p0.p0.i64(ptr align 1 %0, ptr align 1 %1, i64 %conv, i1 false)
memcpy(p, q, sz);
}
-extern void *memmove (void *, const void *, unsigned) __attribute__((nonnull(1, 2)));
+// CHECK-COMMON-LABEL: define{{.*}} void @call_memcpy(
+void call_memcpy(long *p, short *q, int sz) {
+ // CHECK-COMMON: icmp ne ptr {{.*}}, null
+ // CHECK-UBSAN: call void @__ubsan_handle_nonnull_arg(
+ // CHECK-TRAP: call void @llvm.ubsantrap(i8 16)
+ // CHECK-COMMON: and i64 %[[#]], 7, !nosanitize
+ // CHECK-COMMON: icmp eq i64 %[[#]], 0, !nosanitize
+ // CHECK-UBSAN: call void @__ubsan_handle_type_mismatch_v1(
+ // CHECK-TRAP: call void @llvm.ubsantrap(i8 22)
+
+ // CHECK-COMMON: icmp ne ptr {{.*}}, null
+ // CHECK-UBSAN: call void @__ubsan_handle_nonnull_arg(
+ // CHECK-TRAP: call void @llvm.ubsantrap(i8 16)
+ // CHECK-COMMON: and i64 %[[#]], 1, !nosanitize
+ // CHECK-COMMON: icmp eq i64 %[[#]], 0, !nosanitize
+ // CHECK-UBSAN: call void @__ubsan_handle_type_mismatch_v1(
+ // CHECK-TRAP: call void @llvm.ubsantrap(i8 22)
+
+ // CHECK-COMMON: call void @llvm.memcpy.p0.p0.i64(ptr align 8 %0, ptr align 2 %1, i64 %conv, i1 false)
+ memcpy(p, q, sz);
+}
+
+// CHECK-COMMON-LABEL: define{{.*}} void @call_memcpy_inline(
+void call_memcpy_inline(long *p, short *q) {
+ // CHECK-COMMON: and i64 %[[#]], 7, !nosanitize
+ // CHECK-COMMON: icmp eq i64 %[[#]], 0, !nosanitize
+ // CHECK-UBSAN: call void @__ubsan_handle_type_mismatch_v1(
+ // CHECK-TRAP: call void @llvm.ubsantrap(i8 22)
+
+ // CHECK-COMMON: and i64 %[[#]], 1, !nosanitize
+ // CHECK-COMMON: icmp eq i64 %[[#]], 0, !nosanitize
+ // CHECK-UBSAN: call void @__ubsan_handle_type_mismatch_v1(
+ // CHECK-TRAP: call void @llvm.ubsantrap(i8 22)
+
+ // CHECK-COMMON: call void @llvm.memcpy.inline.p0.p0.i64(ptr align 8 %0, ptr align 2 %1, i64 2, i1 false)
+ __builtin_memcpy_inline(p, q, 2);
+}
+
+extern void *memmove(void *, const void *, unsigned long) __attribute__((nonnull(1, 2)));
// CHECK-COMMON-LABEL: @call_memmove_nonnull
void call_memmove_nonnull(void *p, void *q, int sz) {
@@ -382,6 +424,28 @@ void call_memmove_nonnull(void *p, void *q, int sz) {
memmove(p, q, sz);
}
+// CHECK-COMMON-LABEL: define{{.*}} void @call_memmove(
+void call_memmove(long *p, short *q, int sz) {
+ // CHECK-COMMON: icmp ne ptr {{.*}}, null
+ // CHECK-UBSAN: call void @__ubsan_handle_nonnull_arg(
+ // CHECK-TRAP: call void @llvm.ubsantrap(i8 16)
+ // CHECK-COMMON: and i64 %[[#]], 7, !nosanitize
+ // CHECK-COMMON: icmp eq i64 %[[#]], 0, !nosanitize
+ // CHECK-UBSAN: call void @__ubsan_handle_type_mismatch_v1(
+ // CHECK-TRAP: call void @llvm.ubsantrap(i8 22)
+
+ // CHECK-COMMON: icmp ne ptr {{.*}}, null
+ // CHECK-UBSAN: call void @__ubsan_handle_nonnull_arg(
+ // CHECK-TRAP: call void @llvm.ubsantrap(i8 16)
+ // CHECK-COMMON: and i64 %[[#]], 1, !nosanitize
+ // CHECK-COMMON: icmp eq i64 %[[#]], 0, !nosanitize
+ // CHECK-UBSAN: call void @__ubsan_handle_type_mismatch_v1(
+ // CHECK-TRAP: call void @llvm.ubsantrap(i8 22)
+
+ // CHECK-COMMON: call void @llvm.memmove.p0.p0.i64(ptr align 8 %0, ptr align 2 %1, i64 %conv, i1 false)
+ memmove(p, q, sz);
+}
+
// CHECK-COMMON-LABEL: @call_nonnull_variadic
__attribute__((nonnull)) void nonnull_variadic(int a, ...);
void call_nonnull_variadic(int a, int *b) {
|
17d767e
to
b1201f3
Compare
@@ -47,6 +50,16 @@ int main(int, char **argv) { | |||
return *p && 0; | |||
// CHECK-STACK-LOAD: #0 {{.*}}main{{.*}}misaligned.cpp | |||
|
|||
case 'L': { | |||
int x; | |||
// CHECK-MEMCPY-LOAD: misaligned.cpp:[[#@LINE+4]]{{(:16)?}}: runtime error: load of misaligned address [[PTR:0x[0-9a-f]*]] for type 'const void *', which requires 4 byte alignment |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
'const void *' is wrong here :(
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Maybe not wrong, but unhelpful.
Issue is that 'int*' was misaligned, not that we pass it into void*.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Yes, the issue was mentioned in the description
The error message refers to void * instead of int *, which is not perfect
but should be acceptable.
runtime error: store to misaligned address [[PTR:0x[0-9a-f]*]] for type 'void *'
Anyhow, BI__builtin_HEXAGON_V6_vmaskedstoreq
strips an implicit cast. I added a similar mechanism in 9b705127b17513ee17e76fb903e85500c832d8aa
bba6a45
to
9b70512
Compare
Uh, why are we allowed to assume that memcpy pointer arguments are aligned? This looks like a miscompile to me. A plain |
This is definitely a bit weird, but...
... a plain Clang generally chooses to define the semantics of misaligned pointer values as doing the "obvious" thing (@rjmccall circulated an RFC on this a few years ago). In short: we allow them to be formed, and we allow pointer arithmetic on them, but don't allow them to be used to actually access misaligned memory. But I think the choice we're making here is probably worth it, though we should probably document it better. I think you can remove the alignment assumption by explicitly casting the operands to |
Thanks for the comment.
Yes. The correct implementation correctly drops the alignment check on the dst parameter for
@zygoloid
|
I predict we'll get a bug report saying "I didn't do a store, I used |
Thanks. Let's wait until someone complains :) |
@zygoloid Thanks for the explanation! I wasn't aware this fell under unspecified behavior. It's weird that alignment information can survive a What seems worrying here is that apparently GCC and Clang do not agree on semantics in this case (https://godbolt.org/z/1r9df5a4a). GCC does not assume that the pointers are aligned. The perils of unspecified behavior.... |
Yes,
Yes... I am out of town and will merge this PR later. |
9b70512
to
c5d2254
Compare
The -fsanitize=alignment implementation follows the model that we allow forming unaligned pointers but disallow accessing unaligned pointers. See [RFC: Enforcing pointer type alignment in Clang](https://lists.llvm.org/pipermail/llvm-dev/2016-January/094012.html) for detail. memcpy is a memory access and we require an `int *` argument to be aligned. Similar to https://reviews.llvm.org/D9673 , emit -fsanitize=alignment check for arguments of builtin memcpy and memmove functions to catch misaligned load like: ``` // Check the alignment of a but ignore the alignment of b void unaligned_load(int *a, void *b) { memcpy(a, b, sizeof(*a)); } ``` For a reference parameter, we emit a -fsanitize=alignment check as well, which can be optimized out by InstCombinePass. We rely on the call site `TCK_ReferenceBinding` check instead. ``` // The alignment check of a will be optimized out. void unaligned_load(int &a, void *b) { memcpy(&a, b, sizeof(a)); } ``` The diagnostic message looks like ``` runtime error: store to misaligned address [[PTR:0x[0-9a-f]*]] for type 'int *' ``` We could use a better message for memcpy, but we don't do it for now as it would require a new check name like misaligned-pointer-use, which is probably not necessary. *RFC: Enforcing pointer type alignment in Clang* is not well documented, but this patch does not intend to change the that. Technically builtin memset functions can be checked for -fsanitize=alignment as well, but it does not seem too useful.
c5d2254
to
ca81007
Compare
Add the test to
|
The -fsanitize=alignment implementation follows the model that we allow forming unaligned pointers but disallow accessing unaligned pointers. See [RFC: Enforcing pointer type alignment in Clang](https://lists.llvm.org/pipermail/llvm-dev/2016-January/094012.html) for detail. memcpy is a memory access and we require an `int *` argument to be aligned. Similar to https://reviews.llvm.org/D9673 , emit -fsanitize=alignment check for arguments of builtin memcpy and memmove functions to catch misaligned load like: ``` // Check the alignment of a but ignore the alignment of b void unaligned_load(int *a, void *b) { memcpy(a, b, sizeof(*a)); } ``` For a reference parameter, we emit a -fsanitize=alignment check as well, which can be optimized out by InstCombinePass. We rely on the call site `TCK_ReferenceBinding` check instead. ``` // The alignment check of a will be optimized out. void unaligned_load(int &a, void *b) { memcpy(&a, b, sizeof(a)); } ``` The diagnostic message looks like ``` runtime error: store to misaligned address [[PTR:0x[0-9a-f]*]] for type 'int *' ``` We could use a better message for memcpy, but we don't do it for now as it would require a new check name like misaligned-pointer-use, which is probably not necessary. *RFC: Enforcing pointer type alignment in Clang* is not well documented, but this patch does not intend to change the that. Technically builtin memset functions can be checked for -fsanitize=alignment as well, but it does not seem too useful.
…n to disable memcpy instrumentation Deploying llvm#67766 to a large internal codebase uncovers many bugs (many are probably benign but need cleaning up). There are also issues in high-profile open-source projects like v8. Add a cl::opt to disable builtin instrumentation for -fsanitize=alignment to help large codebase users. In the long term, this cl::opt option may still be useful to debug -fsanitize=alignment instrumentation on builtins, so we probably want to keep it around.
…n to disable memcpy instrumentation (#69240) Deploying #67766 to a large internal codebase uncovers many bugs (many are probably benign but need cleaning up). There are also issues in high-profile open-source projects like v8. Add a cl::opt to disable builtin instrumentation for -fsanitize=alignment to help large codebase users. In the long term, this cl::opt option may still be useful to debug -fsanitize=alignment instrumentation on builtins, so we probably want to keep it around.
See llvm/llvm-project#67766 (comment) and https://lists.llvm.org/pipermail/llvm-dev/2016-January/094012.html . An unaligned pointer has an unspecified value per C++ expr.static.cast, in which the compiler is permitted to disallow it being passed to memcpy. Clang -fsanitize=alignment recently becomes stricter (memcpy is now instrumented) in part to complement AddressSanitizer detection. There are quite a few LoadUnaligned overloads in the code base that tickles this unspecified behavior, but they are benign for Clang. PiperOrigin-RevId: 578755824
See llvm/llvm-project#67766 (comment) and https://lists.llvm.org/pipermail/llvm-dev/2016-January/094012.html . An unaligned pointer has an unspecified value per C++ expr.static.cast, in which the compiler is permitted to disallow it being passed to memcpy. Clang -fsanitize=alignment recently becomes stricter (memcpy is now instrumented) in part to complement AddressSanitizer detection. PiperOrigin-RevId: 579015567
…es.c.src OBJECT_nonzero may be called with misaligned pointers, manifesting as a -fsanitize=alignment failure. This is UB per C11 6.3.2.3 > A pointer to an object type may be converted to a pointer to a different object type. If the resulting pointer is not correctly aligned) for the referenced type, the behavior is undefined. Nevertheless, Clang only checks alignment when the unaligned pointer is accessed. https://lists.llvm.org/pipermail/llvm-dev/2016-January/094012.html Call memcpy with unaligned arguments instead to work with new Clang (llvm/llvm-project#67766).
…es.c.src OBJECT_nonzero may be called with misaligned pointers, manifesting as a -fsanitize=alignment failure. This is UB per C11 6.3.2.3 > A pointer to an object type may be converted to a pointer to a different object type. If the resulting pointer is not correctly aligned) for the referenced type, the behavior is undefined. Nevertheless, Clang only checks alignment when the unaligned pointer is accessed. https://lists.llvm.org/pipermail/llvm-dev/2016-January/094012.html Call memcpy with unaligned arguments instead to work with new Clang (llvm/llvm-project#67766).
Judging by the list of notifications at the bottom of this PR, this does seem to have hit a number of projects (including our own). |
…ng-18 CLang 18 complains about undefined behavior when memcpy is called on a T-unaligned `T*` pointer. Workaround by casting to `void*`. This is due to this upstream change: llvm/llvm-project#67766
…44468) ### Rationale for this change CLang 18 complains about undefined behavior when memcpy is called on a T-unaligned `T*` pointer. This is due to this upstream change: llvm/llvm-project#67766 ### What changes are included in this PR? Workaround by casting to `void*`. ### Are these changes tested? By existing CI tests. ### Are there any user-facing changes? No. * GitHub Issue: #44372 Lead-authored-by: Antoine Pitrou <pitrou@free.fr> Co-authored-by: Antoine Pitrou <antoine@python.org> Signed-off-by: Antoine Pitrou <antoine@python.org>
The -fsanitize=alignment implementation uses the model that we allow
forming unaligned pointers but disallow accessing unaligned pointers.
RFC: Enforcing pointer type alignment in Clang
memcpy is a memory access and given
int *
we require it to be aligned. Similarto https://reviews.llvm.org/D9673 , emit -fsanitize=alignment check for
arguments of builtin memcpy and memmove functions to catch misaligned load like:
For a reference parameter, we emit a -fsanitize=alignment check as well, which
can be optimized out by InstCombinePass. We rely on the call site
TCK_ReferenceBinding
check instead.Technically builtin memset functions can be checked for -fsanitize=alignment as
well, but it does not seem too useful.