llvm-project/llvm/lib/Target/AMDGPU/AMDGPUAsanInstrumentation.cpp
Nikita Popov 573ca36753
[IR] Replace alignment argument with attribute on masked intrinsics (#163802)
The `masked.load`, `masked.store`, `masked.gather` and `masked.scatter`
intrinsics currently accept a separate alignment immarg. Replace this
with an `align` attribute on the pointer / vector of pointers argument.

This is the standard representation for alignment information on
intrinsics, and is already used by all other memory intrinsics. This
means the signatures now match llvm.expandload, llvm.vp.load, etc.
(Things like llvm.memcpy used to have a separate alignment argument as
well, but were already migrated a long time ago.)

It's worth noting that the masked.gather and masked.scatter intrinsics
previously accepted a zero alignment to indicate the ABI type alignment
of the element type. This special case is gone now: If the align
attribute is omitted, the implied alignment is 1, as usual. If ABI
alignment is desired, it needs to be explicitly emitted (which the
IRBuilder API already requires anyway).
2025-10-20 08:50:09 +00:00

368 lines
16 KiB
C++

//===AMDGPUAsanInstrumentation.cpp - ASAN related helper functions===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===-------------------------------------------------------------===//
#include "AMDGPUAsanInstrumentation.h"
#define DEBUG_TYPE "amdgpu-asan-instrumentation"
using namespace llvm;
namespace llvm {
namespace AMDGPU {
static uint64_t getRedzoneSizeForScale(int AsanScale) {
// Redzone used for stack and globals is at least 32 bytes.
// For scales 6 and 7, the redzone has to be 64 and 128 bytes respectively.
return std::max(32U, 1U << AsanScale);
}
static uint64_t getMinRedzoneSizeForGlobal(int AsanScale) {
return getRedzoneSizeForScale(AsanScale);
}
uint64_t getRedzoneSizeForGlobal(int AsanScale, uint64_t SizeInBytes) {
constexpr uint64_t kMaxRZ = 1 << 18;
const uint64_t MinRZ = getMinRedzoneSizeForGlobal(AsanScale);
uint64_t RZ = 0;
if (SizeInBytes <= MinRZ / 2) {
// Reduce redzone size for small size objects, e.g. int, char[1]. MinRZ is
// at least 32 bytes, optimize when SizeInBytes is less than or equal to
// half of MinRZ.
RZ = MinRZ - SizeInBytes;
} else {
// Calculate RZ, where MinRZ <= RZ <= MaxRZ, and RZ ~ 1/4 * SizeInBytes.
RZ = std::clamp((SizeInBytes / MinRZ / 4) * MinRZ, MinRZ, kMaxRZ);
// Round up to multiple of MinRZ.
if (SizeInBytes % MinRZ)
RZ += MinRZ - (SizeInBytes % MinRZ);
}
assert((RZ + SizeInBytes) % MinRZ == 0);
return RZ;
}
static size_t TypeStoreSizeToSizeIndex(uint32_t TypeSize) {
size_t Res = llvm::countr_zero(TypeSize / 8);
return Res;
}
static Instruction *genAMDGPUReportBlock(Module &M, IRBuilder<> &IRB,
Value *Cond, bool Recover) {
Value *ReportCond = Cond;
if (!Recover) {
auto *Ballot =
IRB.CreateIntrinsic(Intrinsic::amdgcn_ballot, IRB.getInt64Ty(), {Cond});
ReportCond = IRB.CreateIsNotNull(Ballot);
}
auto *Trm = SplitBlockAndInsertIfThen(
ReportCond, &*IRB.GetInsertPoint(), false,
MDBuilder(M.getContext()).createUnlikelyBranchWeights());
Trm->getParent()->setName("asan.report");
if (Recover)
return Trm;
Trm = SplitBlockAndInsertIfThen(Cond, Trm, false);
IRB.SetInsertPoint(Trm);
return IRB.CreateIntrinsic(Intrinsic::amdgcn_unreachable, {});
}
static Value *createSlowPathCmp(Module &M, IRBuilder<> &IRB, Type *IntptrTy,
Value *AddrLong, Value *ShadowValue,
uint32_t TypeStoreSize, int AsanScale) {
uint64_t Granularity = static_cast<uint64_t>(1) << AsanScale;
// Addr & (Granularity - 1)
Value *LastAccessedByte =
IRB.CreateAnd(AddrLong, ConstantInt::get(IntptrTy, Granularity - 1));
// (Addr & (Granularity - 1)) + size - 1
if (TypeStoreSize / 8 > 1)
LastAccessedByte = IRB.CreateAdd(
LastAccessedByte, ConstantInt::get(IntptrTy, TypeStoreSize / 8 - 1));
// (uint8_t) ((Addr & (Granularity-1)) + size - 1)
LastAccessedByte =
IRB.CreateIntCast(LastAccessedByte, ShadowValue->getType(), false);
// ((uint8_t) ((Addr & (Granularity-1)) + size - 1)) >= ShadowValue
return IRB.CreateICmpSGE(LastAccessedByte, ShadowValue);
}
static Instruction *generateCrashCode(Module &M, IRBuilder<> &IRB,
Type *IntptrTy, Instruction *InsertBefore,
Value *Addr, bool IsWrite,
size_t AccessSizeIndex,
Value *SizeArgument, bool Recover) {
IRB.SetInsertPoint(InsertBefore);
CallInst *Call = nullptr;
SmallString<128> kAsanReportErrorTemplate{"__asan_report_"};
SmallString<64> TypeStr{IsWrite ? "store" : "load"};
SmallString<64> EndingStr{Recover ? "_noabort" : ""};
SmallString<128> AsanErrorCallbackSizedString;
raw_svector_ostream AsanErrorCallbackSizedOS(AsanErrorCallbackSizedString);
AsanErrorCallbackSizedOS << kAsanReportErrorTemplate << TypeStr << "_n"
<< EndingStr;
SmallVector<Type *, 3> Args2 = {IntptrTy, IntptrTy};
AttributeList AL2;
FunctionCallee AsanErrorCallbackSized = M.getOrInsertFunction(
AsanErrorCallbackSizedOS.str(),
FunctionType::get(IRB.getVoidTy(), Args2, false), AL2);
SmallVector<Type *, 2> Args1{1, IntptrTy};
AttributeList AL1;
SmallString<128> AsanErrorCallbackString;
raw_svector_ostream AsanErrorCallbackOS(AsanErrorCallbackString);
AsanErrorCallbackOS << kAsanReportErrorTemplate << TypeStr
<< (1ULL << AccessSizeIndex) << EndingStr;
FunctionCallee AsanErrorCallback = M.getOrInsertFunction(
AsanErrorCallbackOS.str(),
FunctionType::get(IRB.getVoidTy(), Args1, false), AL1);
if (SizeArgument) {
Call = IRB.CreateCall(AsanErrorCallbackSized, {Addr, SizeArgument});
} else {
Call = IRB.CreateCall(AsanErrorCallback, Addr);
}
Call->setCannotMerge();
return Call;
}
static Value *memToShadow(Module &M, IRBuilder<> &IRB, Type *IntptrTy,
Value *Shadow, int AsanScale, uint32_t AsanOffset) {
// Shadow >> scale
Shadow = IRB.CreateLShr(Shadow, AsanScale);
if (AsanOffset == 0)
return Shadow;
// (Shadow >> scale) | offset
Value *ShadowBase = ConstantInt::get(IntptrTy, AsanOffset);
return IRB.CreateAdd(Shadow, ShadowBase);
}
static void instrumentAddressImpl(Module &M, IRBuilder<> &IRB,
Instruction *OrigIns,
Instruction *InsertBefore, Value *Addr,
Align Alignment, uint32_t TypeStoreSize,
bool IsWrite, Value *SizeArgument,
bool UseCalls, bool Recover, int AsanScale,
int AsanOffset) {
Type *AddrTy = Addr->getType();
Type *IntptrTy = M.getDataLayout().getIntPtrType(
M.getContext(), AddrTy->getPointerAddressSpace());
IRB.SetInsertPoint(InsertBefore);
size_t AccessSizeIndex = TypeStoreSizeToSizeIndex(TypeStoreSize);
Type *ShadowTy = IntegerType::get(M.getContext(),
std::max(8U, TypeStoreSize >> AsanScale));
Type *ShadowPtrTy = PointerType::get(M.getContext(), 0);
Value *AddrLong = IRB.CreatePtrToInt(Addr, IntptrTy);
Value *ShadowPtr =
memToShadow(M, IRB, IntptrTy, AddrLong, AsanScale, AsanOffset);
const uint64_t ShadowAlign =
std::max<uint64_t>(Alignment.value() >> AsanScale, 1);
Value *ShadowValue = IRB.CreateAlignedLoad(
ShadowTy, IRB.CreateIntToPtr(ShadowPtr, ShadowPtrTy), Align(ShadowAlign));
Value *Cmp = IRB.CreateIsNotNull(ShadowValue);
auto *Cmp2 = createSlowPathCmp(M, IRB, IntptrTy, AddrLong, ShadowValue,
TypeStoreSize, AsanScale);
Cmp = IRB.CreateAnd(Cmp, Cmp2);
Instruction *CrashTerm = genAMDGPUReportBlock(M, IRB, Cmp, Recover);
Instruction *Crash =
generateCrashCode(M, IRB, IntptrTy, CrashTerm, AddrLong, IsWrite,
AccessSizeIndex, SizeArgument, Recover);
Crash->setDebugLoc(OrigIns->getDebugLoc());
}
void instrumentAddress(Module &M, IRBuilder<> &IRB, Instruction *OrigIns,
Instruction *InsertBefore, Value *Addr, Align Alignment,
TypeSize TypeStoreSize, bool IsWrite,
Value *SizeArgument, bool UseCalls, bool Recover,
int AsanScale, int AsanOffset) {
if (!TypeStoreSize.isScalable()) {
unsigned Granularity = 1 << AsanScale;
const auto FixedSize = TypeStoreSize.getFixedValue();
switch (FixedSize) {
case 8:
case 16:
case 32:
case 64:
case 128:
if (Alignment.value() >= Granularity ||
Alignment.value() >= FixedSize / 8)
return instrumentAddressImpl(
M, IRB, OrigIns, InsertBefore, Addr, Alignment, FixedSize, IsWrite,
SizeArgument, UseCalls, Recover, AsanScale, AsanOffset);
}
}
// Instrument unusual size or unusual alignment.
IRB.SetInsertPoint(InsertBefore);
Type *AddrTy = Addr->getType();
Type *IntptrTy = M.getDataLayout().getIntPtrType(AddrTy);
Value *NumBits = IRB.CreateTypeSize(IntptrTy, TypeStoreSize);
Value *Size = IRB.CreateLShr(NumBits, ConstantInt::get(IntptrTy, 3));
Value *AddrLong = IRB.CreatePtrToInt(Addr, IntptrTy);
Value *SizeMinusOne = IRB.CreateAdd(Size, ConstantInt::get(IntptrTy, -1));
Value *LastByte =
IRB.CreateIntToPtr(IRB.CreateAdd(AddrLong, SizeMinusOne), AddrTy);
instrumentAddressImpl(M, IRB, OrigIns, InsertBefore, Addr, {}, 8, IsWrite,
SizeArgument, UseCalls, Recover, AsanScale, AsanOffset);
instrumentAddressImpl(M, IRB, OrigIns, InsertBefore, LastByte, {}, 8, IsWrite,
SizeArgument, UseCalls, Recover, AsanScale, AsanOffset);
}
void getInterestingMemoryOperands(
Module &M, Instruction *I,
SmallVectorImpl<InterestingMemoryOperand> &Interesting) {
const DataLayout &DL = M.getDataLayout();
if (LoadInst *LI = dyn_cast<LoadInst>(I)) {
Interesting.emplace_back(I, LI->getPointerOperandIndex(), false,
LI->getType(), LI->getAlign());
} else if (StoreInst *SI = dyn_cast<StoreInst>(I)) {
Interesting.emplace_back(I, SI->getPointerOperandIndex(), true,
SI->getValueOperand()->getType(), SI->getAlign());
} else if (AtomicRMWInst *RMW = dyn_cast<AtomicRMWInst>(I)) {
Interesting.emplace_back(I, RMW->getPointerOperandIndex(), true,
RMW->getValOperand()->getType(), std::nullopt);
} else if (AtomicCmpXchgInst *XCHG = dyn_cast<AtomicCmpXchgInst>(I)) {
Interesting.emplace_back(I, XCHG->getPointerOperandIndex(), true,
XCHG->getCompareOperand()->getType(),
std::nullopt);
} else if (auto *CI = dyn_cast<CallInst>(I)) {
switch (CI->getIntrinsicID()) {
case Intrinsic::masked_load:
case Intrinsic::masked_store:
case Intrinsic::masked_gather:
case Intrinsic::masked_scatter: {
bool IsWrite = CI->getType()->isVoidTy();
// Masked store has an initial operand for the value.
unsigned OpOffset = IsWrite ? 1 : 0;
Type *Ty = IsWrite ? CI->getArgOperand(0)->getType() : CI->getType();
MaybeAlign Alignment = CI->getParamAlign(OpOffset);
Value *Mask = CI->getOperand(1 + OpOffset);
Interesting.emplace_back(I, OpOffset, IsWrite, Ty, Alignment, Mask);
break;
}
case Intrinsic::masked_expandload:
case Intrinsic::masked_compressstore: {
bool IsWrite = CI->getIntrinsicID() == Intrinsic::masked_compressstore;
unsigned OpOffset = IsWrite ? 1 : 0;
auto *BasePtr = CI->getOperand(OpOffset);
MaybeAlign Alignment = BasePtr->getPointerAlignment(DL);
Type *Ty = IsWrite ? CI->getArgOperand(0)->getType() : CI->getType();
IRBuilder<> IB(I);
Value *Mask = CI->getOperand(1 + OpOffset);
Type *IntptrTy = M.getDataLayout().getIntPtrType(
M.getContext(), BasePtr->getType()->getPointerAddressSpace());
// Use the popcount of Mask as the effective vector length.
Type *ExtTy = VectorType::get(IntptrTy, cast<VectorType>(Ty));
Value *ExtMask = IB.CreateZExt(Mask, ExtTy);
Value *EVL = IB.CreateAddReduce(ExtMask);
Value *TrueMask = ConstantInt::get(Mask->getType(), 1);
Interesting.emplace_back(I, OpOffset, IsWrite, Ty, Alignment, TrueMask,
EVL);
break;
}
case Intrinsic::vp_load:
case Intrinsic::vp_store:
case Intrinsic::experimental_vp_strided_load:
case Intrinsic::experimental_vp_strided_store: {
auto *VPI = cast<VPIntrinsic>(CI);
unsigned IID = CI->getIntrinsicID();
bool IsWrite = CI->getType()->isVoidTy();
unsigned PtrOpNo = *VPI->getMemoryPointerParamPos(IID);
Type *Ty = IsWrite ? CI->getArgOperand(0)->getType() : CI->getType();
MaybeAlign Alignment = VPI->getOperand(PtrOpNo)->getPointerAlignment(DL);
Value *Stride = nullptr;
if (IID == Intrinsic::experimental_vp_strided_store ||
IID == Intrinsic::experimental_vp_strided_load) {
Stride = VPI->getOperand(PtrOpNo + 1);
// Use the pointer alignment as the element alignment if the stride is a
// mutiple of the pointer alignment. Otherwise, the element alignment
// should be Align(1).
unsigned PointerAlign = Alignment.valueOrOne().value();
if (!isa<ConstantInt>(Stride) ||
cast<ConstantInt>(Stride)->getZExtValue() % PointerAlign != 0)
Alignment = Align(1);
}
Interesting.emplace_back(I, PtrOpNo, IsWrite, Ty, Alignment,
VPI->getMaskParam(), VPI->getVectorLengthParam(),
Stride);
break;
}
case Intrinsic::vp_gather:
case Intrinsic::vp_scatter: {
auto *VPI = cast<VPIntrinsic>(CI);
unsigned IID = CI->getIntrinsicID();
bool IsWrite = IID == Intrinsic::vp_scatter;
unsigned PtrOpNo = *VPI->getMemoryPointerParamPos(IID);
Type *Ty = IsWrite ? CI->getArgOperand(0)->getType() : CI->getType();
MaybeAlign Alignment = VPI->getPointerAlignment();
Interesting.emplace_back(I, PtrOpNo, IsWrite, Ty, Alignment,
VPI->getMaskParam(),
VPI->getVectorLengthParam());
break;
}
case Intrinsic::amdgcn_raw_buffer_load:
case Intrinsic::amdgcn_raw_ptr_buffer_load:
case Intrinsic::amdgcn_raw_buffer_load_format:
case Intrinsic::amdgcn_raw_ptr_buffer_load_format:
case Intrinsic::amdgcn_raw_tbuffer_load:
case Intrinsic::amdgcn_raw_ptr_tbuffer_load:
case Intrinsic::amdgcn_struct_buffer_load:
case Intrinsic::amdgcn_struct_ptr_buffer_load:
case Intrinsic::amdgcn_struct_buffer_load_format:
case Intrinsic::amdgcn_struct_ptr_buffer_load_format:
case Intrinsic::amdgcn_struct_tbuffer_load:
case Intrinsic::amdgcn_struct_ptr_tbuffer_load:
case Intrinsic::amdgcn_s_buffer_load:
case Intrinsic::amdgcn_global_load_tr_b64:
case Intrinsic::amdgcn_global_load_tr_b128: {
unsigned PtrOpNo = 0;
bool IsWrite = false;
Type *Ty = CI->getType();
Value *Ptr = CI->getArgOperand(PtrOpNo);
MaybeAlign Alignment = Ptr->getPointerAlignment(DL);
Interesting.emplace_back(I, PtrOpNo, IsWrite, Ty, Alignment);
break;
}
case Intrinsic::amdgcn_raw_tbuffer_store:
case Intrinsic::amdgcn_raw_ptr_tbuffer_store:
case Intrinsic::amdgcn_raw_buffer_store:
case Intrinsic::amdgcn_raw_ptr_buffer_store:
case Intrinsic::amdgcn_raw_buffer_store_format:
case Intrinsic::amdgcn_raw_ptr_buffer_store_format:
case Intrinsic::amdgcn_struct_buffer_store:
case Intrinsic::amdgcn_struct_ptr_buffer_store:
case Intrinsic::amdgcn_struct_buffer_store_format:
case Intrinsic::amdgcn_struct_ptr_buffer_store_format:
case Intrinsic::amdgcn_struct_tbuffer_store:
case Intrinsic::amdgcn_struct_ptr_tbuffer_store: {
unsigned PtrOpNo = 1;
bool IsWrite = true;
Value *Ptr = CI->getArgOperand(PtrOpNo);
Type *Ty = Ptr->getType();
MaybeAlign Alignment = Ptr->getPointerAlignment(DL);
Interesting.emplace_back(I, PtrOpNo, IsWrite, Ty, Alignment);
break;
}
default:
for (unsigned ArgNo = 0; ArgNo < CI->arg_size(); ArgNo++) {
if (Type *Ty = CI->getParamByRefType(ArgNo)) {
Interesting.emplace_back(I, ArgNo, false, Ty, Align(1));
} else if (Type *Ty = CI->getParamByValType(ArgNo)) {
Interesting.emplace_back(I, ArgNo, false, Ty, Align(1));
}
}
}
}
}
} // end namespace AMDGPU
} // end namespace llvm