[msan] Automatically print shadow for failing outlined checks (#145107)

A commonly used aid for debugging MSan reports is `__msan_print_shadow()`, which requires manual app code annotations (typically of the variable in the UUM report or nearby). This is in contrast to ASan, which automatically prints out the shadow map when a check fails.

This patch changes MSan to print the shadow that failed an outlined check (checks are outlined per function after the `-msan-instrumentation-with-call-threshold` is exceeded) if verbosity >= 1. Note that we do not print out the shadow map of "neighboring" variables because this is technically infeasible; see "Caveat" below.

This patch can be easier to use than `__msan_print_shadow()` because this does not require manual app code annotations. Additionally, due to optimizations, `__msan_print_shadow()` calls can sometimes spuriously affect whether a variable is initialized.

As a side effect, this patch also enables outlined checks for arbitrary-sized shadows (vs. the current hardcoded handlers for {1,2,4,8}-byte shadows).

Caveat: the shadow does not necessarily correspond to an individual user variable, because MSan instrumentation may combine and/or truncate multiple shadows prior to emitting a check that the mangled shadow is zero (e.g., `convertShadowToScalar()`, `handleSSEVectorConvertIntrinsic()`, `materializeInstructionChecks()`). OTOH it is arguably a strength that this feature emit the shadow that directly matters for the MSan check, but which cannot be obtained using the MSan API.
This commit is contained in:
Thurston Dang 2025-06-24 15:09:44 -07:00 committed by GitHub
parent a93eb14e85
commit 1b71ea411a
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
5 changed files with 142 additions and 22 deletions

View File

@ -352,10 +352,32 @@ void __sanitizer::BufferedStackTrace::UnwindImpl(
using namespace __msan;
// N.B. Only [shadow, shadow+size) is defined. shadow is *not* a pointer into
// an MSan shadow region.
static void print_shadow_value(void *shadow, u64 size) {
Printf("Shadow value (%llu byte%s):", size, size == 1 ? "" : "s");
for (unsigned int i = 0; i < size; i++) {
if (i % 4 == 0)
Printf(" ");
unsigned char x = ((unsigned char *)shadow)[i];
Printf("%x%x", x >> 4, x & 0xf);
}
Printf("\n");
Printf(
"Caveat: the shadow value does not necessarily directly correspond to a "
"single user variable. The correspondence is stronger, but not always "
"perfect, when origin tracking is enabled.\n");
Printf("\n");
}
#define MSAN_MAYBE_WARNING(type, size) \
void __msan_maybe_warning_##size(type s, u32 o) { \
GET_CALLER_PC_BP; \
\
if (UNLIKELY(s)) { \
if (Verbosity() >= 1) \
print_shadow_value((void *)(&s), sizeof(s)); \
PrintWarningWithOrigin(pc, bp, o); \
if (__msan::flags()->halt_on_error) { \
Printf("Exiting\n"); \
@ -369,6 +391,30 @@ MSAN_MAYBE_WARNING(u16, 2)
MSAN_MAYBE_WARNING(u32, 4)
MSAN_MAYBE_WARNING(u64, 8)
// N.B. Only [shadow, shadow+size) is defined. shadow is *not* a pointer into
// an MSan shadow region.
void __msan_maybe_warning_N(void *shadow, u64 size, u32 o) {
GET_CALLER_PC_BP;
bool allZero = true;
for (unsigned int i = 0; i < size; i++) {
if (((char *)shadow)[i]) {
allZero = false;
break;
}
}
if (UNLIKELY(!allZero)) {
if (Verbosity() >= 1)
print_shadow_value(shadow, size);
PrintWarningWithOrigin(pc, bp, o);
if (__msan::flags()->halt_on_error) {
Printf("Exiting\n");
Die();
}
}
}
#define MSAN_MAYBE_STORE_ORIGIN(type, size) \
void __msan_maybe_store_origin_##size(type s, void *p, u32 o) { \
if (UNLIKELY(s)) { \

View File

@ -60,6 +60,8 @@ SANITIZER_INTERFACE_ATTRIBUTE
void __msan_maybe_warning_4(u32 s, u32 o);
SANITIZER_INTERFACE_ATTRIBUTE
void __msan_maybe_warning_8(u64 s, u32 o);
SANITIZER_INTERFACE_ATTRIBUTE
void __msan_maybe_warning_N(void *shadow, u64 size, u32 o);
SANITIZER_INTERFACE_ATTRIBUTE
void __msan_maybe_store_origin_1(u8 s, void *p, u32 o);

View File

@ -0,0 +1,39 @@
// RUN: %clangxx_msan -fsanitize-recover=memory -mllvm -msan-instrumentation-with-call-threshold=0 -g %s -o %t \
// RUN: && not env MSAN_OPTIONS=verbosity=1 %run %t 2>&1 | FileCheck %s
#include <ctype.h>
#include <stdio.h>
#include <sanitizer/msan_interface.h>
int main(int argc, char *argv[]) {
long double a;
printf("a: %Lf\n", a);
// CHECK: Shadow value (16 bytes): ffffffff ffffffff ffff0000 00000000
unsigned long long b;
printf("b: %llu\n", b);
// CHECK: Shadow value (8 bytes): ffffffff ffffffff
char *p = (char *)(&b);
p[2] = 36;
printf("b: %lld\n", b);
// CHECK: Shadow value (8 bytes): ffff00ff ffffffff
b = b << 8;
printf("b: %lld\n", b);
__msan_print_shadow(&b, sizeof(b));
// CHECK: Shadow value (8 bytes): 00ffff00 ffffffff
unsigned int c;
printf("c: %u\n", c);
// CHECK: Shadow value (4 bytes): ffffffff
// Converted to boolean
if (c) {
// CHECK: Shadow value (1 byte): 01
printf("Hello\n");
}
return 0;
}

View File

@ -665,6 +665,7 @@ private:
// These arrays are indexed by log2(AccessSize).
FunctionCallee MaybeWarningFn[kNumberOfAccessSizes];
FunctionCallee MaybeWarningVarSizeFn;
FunctionCallee MaybeStoreOriginFn[kNumberOfAccessSizes];
/// Run-time helper that generates a new origin value for a stack
@ -939,7 +940,9 @@ void MemorySanitizer::createUserspaceApi(Module &M,
MaybeWarningFn[AccessSizeIndex] = M.getOrInsertFunction(
FunctionName, TLI.getAttrList(C, {0, 1}, /*Signed=*/false),
IRB.getVoidTy(), IRB.getIntNTy(AccessSize * 8), IRB.getInt32Ty());
MaybeWarningVarSizeFn = M.getOrInsertFunction(
"__msan_maybe_warning_N", TLI.getAttrList(C, {}, /*Signed=*/false),
IRB.getVoidTy(), PtrTy, IRB.getInt64Ty(), IRB.getInt32Ty());
FunctionName = "__msan_maybe_store_origin_" + itostr(AccessSize);
MaybeStoreOriginFn[AccessSizeIndex] = M.getOrInsertFunction(
FunctionName, TLI.getAttrList(C, {0, 2}, /*Signed=*/false),
@ -1248,7 +1251,6 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
// Constants likely will be eliminated by follow-up passes.
if (isa<Constant>(V))
return false;
++SplittableBlocksCount;
return ClInstrumentationWithCallThreshold >= 0 &&
SplittableBlocksCount > ClInstrumentationWithCallThreshold;
@ -1447,18 +1449,32 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
const DataLayout &DL = F.getDataLayout();
TypeSize TypeSizeInBits = DL.getTypeSizeInBits(ConvertedShadow->getType());
unsigned SizeIndex = TypeSizeToSizeIndex(TypeSizeInBits);
if (instrumentWithCalls(ConvertedShadow) &&
SizeIndex < kNumberOfAccessSizes && !MS.CompileKernel) {
FunctionCallee Fn = MS.MaybeWarningFn[SizeIndex];
if (instrumentWithCalls(ConvertedShadow) && !MS.CompileKernel) {
// ZExt cannot convert between vector and scalar
ConvertedShadow = convertShadowToScalar(ConvertedShadow, IRB);
Value *ConvertedShadow2 =
IRB.CreateZExt(ConvertedShadow, IRB.getIntNTy(8 * (1 << SizeIndex)));
if (SizeIndex < kNumberOfAccessSizes) {
FunctionCallee Fn = MS.MaybeWarningFn[SizeIndex];
CallBase *CB = IRB.CreateCall(
Fn, {ConvertedShadow2,
Fn,
{ConvertedShadow2,
MS.TrackOrigins && Origin ? Origin : (Value *)IRB.getInt32(0)});
CB->addParamAttr(0, Attribute::ZExt);
CB->addParamAttr(1, Attribute::ZExt);
} else {
FunctionCallee Fn = MS.MaybeWarningVarSizeFn;
Value *ShadowAlloca = IRB.CreateAlloca(ConvertedShadow2->getType(), 0u);
IRB.CreateStore(ConvertedShadow2, ShadowAlloca);
unsigned ShadowSize = DL.getTypeAllocSize(ConvertedShadow2->getType());
CallBase *CB = IRB.CreateCall(
Fn,
{ShadowAlloca, ConstantInt::get(IRB.getInt64Ty(), ShadowSize),
MS.TrackOrigins && Origin ? Origin : (Value *)IRB.getInt32(0)});
CB->addParamAttr(1, Attribute::ZExt);
CB->addParamAttr(2, Attribute::ZExt);
}
} else {
Value *Cmp = convertToBool(ConvertedShadow, IRB, "_mscmp");
Instruction *CheckTerm = SplitBlockAndInsertIfThen(

View File

@ -73,13 +73,30 @@ define <4 x i32> @test64(<4 x i32> %vec, i64 %idx, i32 %x) sanitize_memory {
; CHECK: call void @__msan_maybe_warning_8(i64 zeroext %{{.*}}, i32 zeroext 0)
; CHECK: ret <4 x i32>
; Type size too large => inline check.
; Type size too large => use variable-size handler.
define <4 x i32> @test65(<4 x i32> %vec, i65 %idx, i32 %x) sanitize_memory {
%vec1 = insertelement <4 x i32> %vec, i32 %x, i65 %idx
ret <4 x i32> %vec1
}
; CHECK-LABEL: @test65(
; CHECK: call void @__msan_warning_noreturn
; CHECK: %[[A:.*]] = zext i65 %1 to i128
; CHECK: call void @__msan_maybe_warning_N(ptr %{{.*}}, i64 zeroext 16, i32 zeroext 0)
; CHECK: ret <4 x i32>
define <4 x i32> @test128(<4 x i32> %vec, i128 %idx, i32 %x) sanitize_memory {
%vec1 = insertelement <4 x i32> %vec, i32 %x, i128 %idx
ret <4 x i32> %vec1
}
; CHECK-LABEL: @test128(
; CHECK: call void @__msan_maybe_warning_N(ptr %{{.*}}, i64 zeroext 16, i32 zeroext 0)
; CHECK: ret <4 x i32>
define <4 x i32> @test256(<4 x i32> %vec, i256 %idx, i32 %x) sanitize_memory {
%vec1 = insertelement <4 x i32> %vec, i32 %x, i256 %idx
ret <4 x i32> %vec1
}
; CHECK-LABEL: @test256(
; CHECK: call void @__msan_maybe_warning_N(ptr %{{.*}}, i64 zeroext 32, i32 zeroext 0)
; CHECK: ret <4 x i32>
define <4 x i32> @testUndef(<4 x i32> %vec, i32 %x) sanitize_memory {