[Utils][Local] Preserve !nosanitize in combineMetadata when merging instructions (#148376)

`combineMetadata` helper currently drops `!nosanitize` metadata when
merging two instructions, even if both originally carried `!nosanitize`.

This is problematic because `!nosanitize` is a key mechanism used by
sanitizer (e.g., ASan) to suppress instrumentation. Removing it can lead
to unintended sanitizer behavior.

This patch adds `nosanitize` to the whitelist in combineMetadata,
preserving it only if both instructions carry `!nosanitize`; otherwise,
it is dropped. This patch also adds corresponding tests in a test file
and regenerates it.

---
### Details

**Example (see [Godbolt](https://godbolt.org/z/83P5eWczx) for
details**):

```llvm
%v1 = load i32, ptr %p, !nosanitize
%v2 = load i32, ptr %p, !nosanitize
```
When merged via `combineMetadata(%v1, %v2, ...)`, the resulting
instruction loses its `!nosanitize` metadata.

Tools such as UBSan and AFL rely on `nosanitize` to prevent unwanted
transformations or checks. However, the current implementation of
combineMetadata mistakenly drops !nosanitize. This may lead to
unintended behavior during optimization.

For example, under `-fsanitize=address,undefined -O2`, IR emitted by
UBSan may lose its `!nosanitize` metadata due to the incorrect metadata
merging in optimization. As a result, ASan could unexpectedly instrument
those instructions.
> Note: due to the current UBSan handlers having relatively
coarse-grained attributes, this specific case is difficult to reproduce
end-to-end from source code—UBSan currently inhibits such optimizations
(refer to #135135 for details).

Still, I believe it's necessary to fix this now, to support future
versions of UBSan that might allow such optimizations, and to support
third-party tools (such as AFL-based fuzzers) that rely on the presence
of !nosanitize.
This commit is contained in:
Kunqiu Chen 2025-07-14 15:45:08 +08:00 committed by GitHub
parent cad62df49a
commit a6e1700fa6
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 89 additions and 9 deletions

View File

@ -3381,7 +3381,11 @@ static void combineMetadata(Instruction *K, const Instruction *J,
K->setMetadata(Kind,
MDNode::getMostGenericNoaliasAddrspace(JMD, KMD));
break;
}
case LLVMContext::MD_nosanitize:
// Preserve !nosanitize if both K and J have it.
K->setMetadata(Kind, JMD);
break;
}
}
// Set !invariant.group from J if J has it. If both instructions have it
// then we will just pick it from J - even when they are different.

View File

@ -112,7 +112,7 @@ define i32 @test8(ptr %p) {
define i32 @load_noundef_load(ptr %p) {
; CHECK-LABEL: define i32 @load_noundef_load
; CHECK-SAME: (ptr [[P:%.*]]) {
; CHECK-NEXT: [[A:%.*]] = load i32, ptr [[P]], align 4, !range [[RNG0]], !noundef !6
; CHECK-NEXT: [[A:%.*]] = load i32, ptr [[P]], align 4, !range [[RNG0]], !noundef [[META6:![0-9]+]]
; CHECK-NEXT: [[C:%.*]] = add i32 [[A]], [[A]]
; CHECK-NEXT: ret i32 [[C]]
;
@ -138,7 +138,7 @@ define i32 @load_load_noundef(ptr %p) {
define void @load_dereferenceable_dominating(ptr %p) {
; CHECK-LABEL: define void @load_dereferenceable_dominating
; CHECK-SAME: (ptr [[P:%.*]]) {
; CHECK-NEXT: [[A:%.*]] = load ptr, ptr [[P]], align 8, !dereferenceable !7
; CHECK-NEXT: [[A:%.*]] = load ptr, ptr [[P]], align 8, !dereferenceable [[META7:![0-9]+]]
; CHECK-NEXT: call void @use.ptr(ptr [[A]])
; CHECK-NEXT: call void @use.ptr(ptr [[A]])
; CHECK-NEXT: ret void
@ -185,7 +185,7 @@ define void @load_ptr_nonnull_to_i64(ptr %p) {
define void @load_ptr_nonnull_noundef_to_i64(ptr %p) {
; CHECK-LABEL: define void @load_ptr_nonnull_noundef_to_i64
; CHECK-SAME: (ptr [[P:%.*]]) {
; CHECK-NEXT: [[VAL:%.*]] = load ptr, ptr [[P]], align 8, !nonnull !6, !noundef !6
; CHECK-NEXT: [[VAL:%.*]] = load ptr, ptr [[P]], align 8, !nonnull [[META6]], !noundef [[META6]]
; CHECK-NEXT: [[VAL_INT:%.*]] = ptrtoint ptr [[VAL]] to i64
; CHECK-NEXT: call void @use.i64(i64 [[VAL_INT]])
; CHECK-NEXT: call void @use.i64(i64 [[VAL_INT]])
@ -202,7 +202,7 @@ define void @load_ptr_nonnull_noundef_to_i64(ptr %p) {
define void @load_ptr_invariant_load_to_i64(ptr %p) {
; CHECK-LABEL: define void @load_ptr_invariant_load_to_i64
; CHECK-SAME: (ptr [[P:%.*]]) {
; CHECK-NEXT: [[VAL:%.*]] = load ptr, ptr [[P]], align 8, !invariant.load !6
; CHECK-NEXT: [[VAL:%.*]] = load ptr, ptr [[P]], align 8, !invariant.load [[META6]]
; CHECK-NEXT: [[VAL_INT:%.*]] = ptrtoint ptr [[VAL]] to i64
; CHECK-NEXT: call void @use.i64(i64 [[VAL_INT]])
; CHECK-NEXT: call void @use.i64(i64 [[VAL_INT]])
@ -219,7 +219,7 @@ define void @load_ptr_invariant_load_to_i64(ptr %p) {
define void @load_ptr_dereferenceable_to_i64(ptr %p) {
; CHECK-LABEL: define void @load_ptr_dereferenceable_to_i64
; CHECK-SAME: (ptr [[P:%.*]]) {
; CHECK-NEXT: [[VAL:%.*]] = load ptr, ptr [[P]], align 8, !dereferenceable !7
; CHECK-NEXT: [[VAL:%.*]] = load ptr, ptr [[P]], align 8, !dereferenceable [[META7]]
; CHECK-NEXT: [[VAL_INT:%.*]] = ptrtoint ptr [[VAL]] to i64
; CHECK-NEXT: call void @use.i64(i64 [[VAL_INT]])
; CHECK-NEXT: call void @use.i64(i64 [[VAL_INT]])
@ -236,7 +236,7 @@ define void @load_ptr_dereferenceable_to_i64(ptr %p) {
define void @load_ptr_dereferenceable_or_null_to_i64(ptr %p) {
; CHECK-LABEL: define void @load_ptr_dereferenceable_or_null_to_i64
; CHECK-SAME: (ptr [[P:%.*]]) {
; CHECK-NEXT: [[VAL:%.*]] = load ptr, ptr [[P]], align 8, !dereferenceable_or_null !7
; CHECK-NEXT: [[VAL:%.*]] = load ptr, ptr [[P]], align 8, !dereferenceable_or_null [[META7]]
; CHECK-NEXT: [[VAL_INT:%.*]] = ptrtoint ptr [[VAL]] to i64
; CHECK-NEXT: call void @use.i64(i64 [[VAL_INT]])
; CHECK-NEXT: call void @use.i64(i64 [[VAL_INT]])
@ -409,6 +409,82 @@ join:
ret void
}
; We should preserve the !nosanitize if both insns have it.
define void @test_nosanitize1(ptr %p) {
; CHECK-LABEL: define void @test_nosanitize1
; CHECK-SAME: (ptr [[P:%.*]]) {
; CHECK-NEXT: [[V1:%.*]] = load i32, ptr [[P]], align 4, !nosanitize [[META6]]
; CHECK-NEXT: [[COND:%.*]] = icmp eq i32 [[V1]], 0
; CHECK-NEXT: br i1 [[COND]], label [[IF:%.*]], label [[JOIN:%.*]]
; CHECK: if:
; CHECK-NEXT: call void @use.i32(i32 0)
; CHECK-NEXT: br label [[JOIN]]
; CHECK: join:
; CHECK-NEXT: ret void
;
%v1 = load i32, ptr %p, !nosanitize !11
%cond = icmp eq i32 %v1, 0
br i1 %cond, label %if, label %join
if:
%v2 = load i32, ptr %p, !nosanitize !11
call void @use.i32(i32 %v2)
br label %join
join:
ret void
}
define void @test_nosanitize2(ptr %p) {
; CHECK-LABEL: define void @test_nosanitize2
; CHECK-SAME: (ptr [[P:%.*]]) {
; CHECK-NEXT: [[V1:%.*]] = load i32, ptr [[P]], align 4
; CHECK-NEXT: [[COND:%.*]] = icmp eq i32 [[V1]], 0
; CHECK-NEXT: br i1 [[COND]], label [[IF:%.*]], label [[JOIN:%.*]]
; CHECK: if:
; CHECK-NEXT: call void @use.i32(i32 0)
; CHECK-NEXT: br label [[JOIN]]
; CHECK: join:
; CHECK-NEXT: ret void
;
%v1 = load i32, ptr %p, !nosanitize !11
%cond = icmp eq i32 %v1, 0
br i1 %cond, label %if, label %join
if:
%v2 = load i32, ptr %p
call void @use.i32(i32 %v2)
br label %join
join:
ret void
}
define void @test_nosanitize3(ptr %p) {
; CHECK-LABEL: define void @test_nosanitize3
; CHECK-SAME: (ptr [[P:%.*]]) {
; CHECK-NEXT: [[V1:%.*]] = load i32, ptr [[P]], align 4
; CHECK-NEXT: [[COND:%.*]] = icmp eq i32 [[V1]], 0
; CHECK-NEXT: br i1 [[COND]], label [[IF:%.*]], label [[JOIN:%.*]]
; CHECK: if:
; CHECK-NEXT: call void @use.i32(i32 0)
; CHECK-NEXT: br label [[JOIN]]
; CHECK: join:
; CHECK-NEXT: ret void
;
%v1 = load i32, ptr %p
%cond = icmp eq i32 %v1, 0
br i1 %cond, label %if, label %join
if:
%v2 = load i32, ptr %p, !nosanitize !11
call void @use.i32(i32 %v2)
br label %join
join:
ret void
}
!0 = !{i32 0, i32 2}
!1 = !{i32 3, i32 5}
!2 = !{i32 2, i32 5}
@ -430,8 +506,8 @@ join:
; CHECK: [[RNG3]] = !{i32 -5, i32 -2, i32 1, i32 5}
; CHECK: [[RNG4]] = !{i32 10, i32 1}
; CHECK: [[RNG5]] = !{i32 3, i32 4, i32 5, i32 2}
; CHECK: [[META6:![0-9]+]] = !{}
; CHECK: [[META7:![0-9]+]] = !{i64 10}
; CHECK: [[META6]] = !{}
; CHECK: [[META7]] = !{i64 10}
; CHECK: [[RNG8]] = !{i64 0, i64 10}
; CHECK: [[RNG9]] = !{i64 0, i64 10, i64 20, i64 30}
; CHECK: [[RNG10]] = !{i64 10, i64 30}