[NVPTXLowerArgs] Add align attribute to return value of addrspace.wrap intrinsic (#153889)
If alignment inference happens after NVPTXLowerArgs these addrspace wrap intrinsics can prevent computeKnownBits from deriving alignment of loads/stores from parameters. To solve this, we can insert an alignment annotation on the generated intrinsic so that computeKnownBits does not need to traverse through it to find the alignment.
This commit is contained in:
parent
67ca5dad16
commit
069ad2353c
@ -1027,9 +1027,16 @@ static inline bool isAddLike(const SDValue V) {
|
|||||||
(V->getOpcode() == ISD::OR && V->getFlags().hasDisjoint());
|
(V->getOpcode() == ISD::OR && V->getFlags().hasDisjoint());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static SDValue stripAssertAlign(SDValue N) {
|
||||||
|
if (N.getOpcode() == ISD::AssertAlign)
|
||||||
|
N = N.getOperand(0);
|
||||||
|
return N;
|
||||||
|
}
|
||||||
|
|
||||||
// selectBaseADDR - Match a dag node which will serve as the base address for an
|
// selectBaseADDR - Match a dag node which will serve as the base address for an
|
||||||
// ADDR operand pair.
|
// ADDR operand pair.
|
||||||
static SDValue selectBaseADDR(SDValue N, SelectionDAG *DAG) {
|
static SDValue selectBaseADDR(SDValue N, SelectionDAG *DAG) {
|
||||||
|
N = stripAssertAlign(N);
|
||||||
if (const auto *GA = dyn_cast<GlobalAddressSDNode>(N))
|
if (const auto *GA = dyn_cast<GlobalAddressSDNode>(N))
|
||||||
return DAG->getTargetGlobalAddress(GA->getGlobal(), SDLoc(N),
|
return DAG->getTargetGlobalAddress(GA->getGlobal(), SDLoc(N),
|
||||||
GA->getValueType(0), GA->getOffset(),
|
GA->getValueType(0), GA->getOffset(),
|
||||||
@ -1044,6 +1051,7 @@ static SDValue selectBaseADDR(SDValue N, SelectionDAG *DAG) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
static SDValue accumulateOffset(SDValue &Addr, SDLoc DL, SelectionDAG *DAG) {
|
static SDValue accumulateOffset(SDValue &Addr, SDLoc DL, SelectionDAG *DAG) {
|
||||||
|
Addr = stripAssertAlign(Addr);
|
||||||
APInt AccumulatedOffset(64u, 0);
|
APInt AccumulatedOffset(64u, 0);
|
||||||
while (isAddLike(Addr)) {
|
while (isAddLike(Addr)) {
|
||||||
const auto *CN = dyn_cast<ConstantSDNode>(Addr.getOperand(1));
|
const auto *CN = dyn_cast<ConstantSDNode>(Addr.getOperand(1));
|
||||||
@ -1055,7 +1063,7 @@ static SDValue accumulateOffset(SDValue &Addr, SDLoc DL, SelectionDAG *DAG) {
|
|||||||
break;
|
break;
|
||||||
|
|
||||||
AccumulatedOffset += CI;
|
AccumulatedOffset += CI;
|
||||||
Addr = Addr->getOperand(0);
|
Addr = stripAssertAlign(Addr->getOperand(0));
|
||||||
}
|
}
|
||||||
return DAG->getSignedTargetConstant(AccumulatedOffset.getSExtValue(), DL,
|
return DAG->getSignedTargetConstant(AccumulatedOffset.getSExtValue(), DL,
|
||||||
MVT::i32);
|
MVT::i32);
|
||||||
|
@ -412,6 +412,22 @@ static void adjustByValArgAlignment(Argument *Arg, Value *ArgInParamAS,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Create a call to the nvvm_internal_addrspace_wrap intrinsic and set the
|
||||||
|
// alignment of the return value based on the alignment of the argument.
|
||||||
|
static CallInst *createNVVMInternalAddrspaceWrap(IRBuilder<> &IRB,
|
||||||
|
Argument &Arg) {
|
||||||
|
CallInst *ArgInParam =
|
||||||
|
IRB.CreateIntrinsic(Intrinsic::nvvm_internal_addrspace_wrap,
|
||||||
|
{IRB.getPtrTy(ADDRESS_SPACE_PARAM), Arg.getType()},
|
||||||
|
&Arg, {}, Arg.getName() + ".param");
|
||||||
|
|
||||||
|
if (MaybeAlign ParamAlign = Arg.getParamAlign())
|
||||||
|
ArgInParam->addRetAttr(
|
||||||
|
Attribute::getWithAlignment(ArgInParam->getContext(), *ParamAlign));
|
||||||
|
|
||||||
|
return ArgInParam;
|
||||||
|
}
|
||||||
|
|
||||||
namespace {
|
namespace {
|
||||||
struct ArgUseChecker : PtrUseVisitor<ArgUseChecker> {
|
struct ArgUseChecker : PtrUseVisitor<ArgUseChecker> {
|
||||||
using Base = PtrUseVisitor<ArgUseChecker>;
|
using Base = PtrUseVisitor<ArgUseChecker>;
|
||||||
@ -515,10 +531,7 @@ void copyByValParam(Function &F, Argument &Arg) {
|
|||||||
Arg.getParamAlign().value_or(DL.getPrefTypeAlign(StructType)));
|
Arg.getParamAlign().value_or(DL.getPrefTypeAlign(StructType)));
|
||||||
Arg.replaceAllUsesWith(AllocA);
|
Arg.replaceAllUsesWith(AllocA);
|
||||||
|
|
||||||
Value *ArgInParam =
|
CallInst *ArgInParam = createNVVMInternalAddrspaceWrap(IRB, Arg);
|
||||||
IRB.CreateIntrinsic(Intrinsic::nvvm_internal_addrspace_wrap,
|
|
||||||
{IRB.getPtrTy(ADDRESS_SPACE_PARAM), Arg.getType()},
|
|
||||||
&Arg, {}, Arg.getName());
|
|
||||||
|
|
||||||
// Be sure to propagate alignment to this load; LLVM doesn't know that NVPTX
|
// Be sure to propagate alignment to this load; LLVM doesn't know that NVPTX
|
||||||
// addrspacecast preserves alignment. Since params are constant, this load
|
// addrspacecast preserves alignment. Since params are constant, this load
|
||||||
@ -549,9 +562,7 @@ static void handleByValParam(const NVPTXTargetMachine &TM, Argument *Arg) {
|
|||||||
SmallVector<Use *, 16> UsesToUpdate(llvm::make_pointer_range(Arg->uses()));
|
SmallVector<Use *, 16> UsesToUpdate(llvm::make_pointer_range(Arg->uses()));
|
||||||
|
|
||||||
IRBuilder<> IRB(&*FirstInst);
|
IRBuilder<> IRB(&*FirstInst);
|
||||||
Value *ArgInParamAS = IRB.CreateIntrinsic(
|
CallInst *ArgInParamAS = createNVVMInternalAddrspaceWrap(IRB, *Arg);
|
||||||
Intrinsic::nvvm_internal_addrspace_wrap,
|
|
||||||
{IRB.getPtrTy(ADDRESS_SPACE_PARAM), Arg->getType()}, {Arg});
|
|
||||||
|
|
||||||
for (Use *U : UsesToUpdate)
|
for (Use *U : UsesToUpdate)
|
||||||
convertToParamAS(U, ArgInParamAS, HasCvtaParam, IsGridConstant);
|
convertToParamAS(U, ArgInParamAS, HasCvtaParam, IsGridConstant);
|
||||||
@ -581,10 +592,7 @@ static void handleByValParam(const NVPTXTargetMachine &TM, Argument *Arg) {
|
|||||||
// argument already in the param address space, we need to use the noop
|
// argument already in the param address space, we need to use the noop
|
||||||
// intrinsic, this had the added benefit of preventing other optimizations
|
// intrinsic, this had the added benefit of preventing other optimizations
|
||||||
// from folding away this pair of addrspacecasts.
|
// from folding away this pair of addrspacecasts.
|
||||||
auto *ParamSpaceArg =
|
auto *ParamSpaceArg = createNVVMInternalAddrspaceWrap(IRB, *Arg);
|
||||||
IRB.CreateIntrinsic(Intrinsic::nvvm_internal_addrspace_wrap,
|
|
||||||
{IRB.getPtrTy(ADDRESS_SPACE_PARAM), Arg->getType()},
|
|
||||||
Arg, {}, Arg->getName() + ".param");
|
|
||||||
|
|
||||||
// Cast param address to generic address space.
|
// Cast param address to generic address space.
|
||||||
Value *GenericArg = IRB.CreateAddrSpaceCast(
|
Value *GenericArg = IRB.CreateAddrSpaceCast(
|
||||||
|
36
llvm/test/CodeGen/NVPTX/lower-args-alignment.ll
Normal file
36
llvm/test/CodeGen/NVPTX/lower-args-alignment.ll
Normal file
@ -0,0 +1,36 @@
|
|||||||
|
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
|
||||||
|
; RUN: opt < %s -passes=nvptx-lower-args,infer-alignment -S | FileCheck %s
|
||||||
|
|
||||||
|
target triple = "nvptx64-nvidia-cuda"
|
||||||
|
|
||||||
|
; ------------------------------------------------------------------------------
|
||||||
|
; Test that alignment can be inferred through llvm.nvvm.internal.addrspace.wrap.p101.p0 intrinsics
|
||||||
|
; thanks to the alignment attribute on the intrinsic
|
||||||
|
; ------------------------------------------------------------------------------
|
||||||
|
|
||||||
|
%struct.S1 = type { i32, i32, i32, i32 }
|
||||||
|
define ptx_kernel i32 @test_align8(ptr noundef readonly byval(%struct.S1) align 8 captures(none) %params) {
|
||||||
|
; CHECK-LABEL: define ptx_kernel i32 @test_align8(
|
||||||
|
; CHECK-SAME: ptr noundef readonly byval([[STRUCT_S1:%.*]]) align 8 captures(none) [[PARAMS:%.*]]) {
|
||||||
|
; CHECK-NEXT: [[ENTRY:.*:]]
|
||||||
|
; CHECK-NEXT: [[TMP0:%.*]] = call align 8 ptr addrspace(101) @llvm.nvvm.internal.addrspace.wrap.p101.p0(ptr [[PARAMS]])
|
||||||
|
; CHECK-NEXT: [[LOAD:%.*]] = load i32, ptr addrspace(101) [[TMP0]], align 8
|
||||||
|
; CHECK-NEXT: ret i32 [[LOAD]]
|
||||||
|
;
|
||||||
|
entry:
|
||||||
|
%load = load i32, ptr %params, align 4
|
||||||
|
ret i32 %load
|
||||||
|
}
|
||||||
|
|
||||||
|
define ptx_kernel i32 @test_align1(ptr noundef readonly byval(%struct.S1) align 1 captures(none) %params) {
|
||||||
|
; CHECK-LABEL: define ptx_kernel i32 @test_align1(
|
||||||
|
; CHECK-SAME: ptr noundef readonly byval([[STRUCT_S1:%.*]]) align 4 captures(none) [[PARAMS:%.*]]) {
|
||||||
|
; CHECK-NEXT: [[ENTRY:.*:]]
|
||||||
|
; CHECK-NEXT: [[TMP0:%.*]] = call align 1 ptr addrspace(101) @llvm.nvvm.internal.addrspace.wrap.p101.p0(ptr [[PARAMS]])
|
||||||
|
; CHECK-NEXT: [[LOAD:%.*]] = load i32, ptr addrspace(101) [[TMP0]], align 4
|
||||||
|
; CHECK-NEXT: ret i32 [[LOAD]]
|
||||||
|
;
|
||||||
|
entry:
|
||||||
|
%load = load i32, ptr %params, align 4
|
||||||
|
ret i32 %load
|
||||||
|
}
|
@ -72,7 +72,7 @@ define ptx_kernel void @grid_const_int(ptr byval(i32) align 4 %input1, i32 %inpu
|
|||||||
; PTX-NEXT: ret;
|
; PTX-NEXT: ret;
|
||||||
; OPT-LABEL: define ptx_kernel void @grid_const_int(
|
; OPT-LABEL: define ptx_kernel void @grid_const_int(
|
||||||
; OPT-SAME: ptr byval(i32) align 4 [[INPUT1:%.*]], i32 [[INPUT2:%.*]], ptr [[OUT:%.*]], i32 [[N:%.*]]) #[[ATTR0]] {
|
; OPT-SAME: ptr byval(i32) align 4 [[INPUT1:%.*]], i32 [[INPUT2:%.*]], ptr [[OUT:%.*]], i32 [[N:%.*]]) #[[ATTR0]] {
|
||||||
; OPT-NEXT: [[INPUT11:%.*]] = call ptr addrspace(101) @llvm.nvvm.internal.addrspace.wrap.p101.p0(ptr [[INPUT1]])
|
; OPT-NEXT: [[INPUT11:%.*]] = call align 4 ptr addrspace(101) @llvm.nvvm.internal.addrspace.wrap.p101.p0(ptr [[INPUT1]])
|
||||||
; OPT-NEXT: [[TMP:%.*]] = load i32, ptr addrspace(101) [[INPUT11]], align 4
|
; OPT-NEXT: [[TMP:%.*]] = load i32, ptr addrspace(101) [[INPUT11]], align 4
|
||||||
; OPT-NEXT: [[ADD:%.*]] = add i32 [[TMP]], [[INPUT2]]
|
; OPT-NEXT: [[ADD:%.*]] = add i32 [[TMP]], [[INPUT2]]
|
||||||
; OPT-NEXT: store i32 [[ADD]], ptr [[OUT]], align 4
|
; OPT-NEXT: store i32 [[ADD]], ptr [[OUT]], align 4
|
||||||
@ -101,7 +101,7 @@ define ptx_kernel void @grid_const_struct(ptr byval(%struct.s) align 4 %input, p
|
|||||||
; PTX-NEXT: ret;
|
; PTX-NEXT: ret;
|
||||||
; OPT-LABEL: define ptx_kernel void @grid_const_struct(
|
; OPT-LABEL: define ptx_kernel void @grid_const_struct(
|
||||||
; OPT-SAME: ptr byval([[STRUCT_S:%.*]]) align 4 [[INPUT:%.*]], ptr [[OUT:%.*]]) #[[ATTR0]] {
|
; OPT-SAME: ptr byval([[STRUCT_S:%.*]]) align 4 [[INPUT:%.*]], ptr [[OUT:%.*]]) #[[ATTR0]] {
|
||||||
; OPT-NEXT: [[INPUT1:%.*]] = call ptr addrspace(101) @llvm.nvvm.internal.addrspace.wrap.p101.p0(ptr [[INPUT]])
|
; OPT-NEXT: [[INPUT1:%.*]] = call align 4 ptr addrspace(101) @llvm.nvvm.internal.addrspace.wrap.p101.p0(ptr [[INPUT]])
|
||||||
; OPT-NEXT: [[GEP13:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr addrspace(101) [[INPUT1]], i32 0, i32 0
|
; OPT-NEXT: [[GEP13:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr addrspace(101) [[INPUT1]], i32 0, i32 0
|
||||||
; OPT-NEXT: [[GEP22:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr addrspace(101) [[INPUT1]], i32 0, i32 1
|
; OPT-NEXT: [[GEP22:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr addrspace(101) [[INPUT1]], i32 0, i32 1
|
||||||
; OPT-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(101) [[GEP13]], align 4
|
; OPT-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(101) [[GEP13]], align 4
|
||||||
@ -137,7 +137,7 @@ define ptx_kernel void @grid_const_escape(ptr byval(%struct.s) align 4 %input) {
|
|||||||
; PTX-NEXT: ret;
|
; PTX-NEXT: ret;
|
||||||
; OPT-LABEL: define ptx_kernel void @grid_const_escape(
|
; OPT-LABEL: define ptx_kernel void @grid_const_escape(
|
||||||
; OPT-SAME: ptr byval([[STRUCT_S:%.*]]) align 4 [[INPUT:%.*]]) #[[ATTR0]] {
|
; OPT-SAME: ptr byval([[STRUCT_S:%.*]]) align 4 [[INPUT:%.*]]) #[[ATTR0]] {
|
||||||
; OPT-NEXT: [[TMP1:%.*]] = call ptr addrspace(101) @llvm.nvvm.internal.addrspace.wrap.p101.p0(ptr [[INPUT]])
|
; OPT-NEXT: [[TMP1:%.*]] = call align 4 ptr addrspace(101) @llvm.nvvm.internal.addrspace.wrap.p101.p0(ptr [[INPUT]])
|
||||||
; OPT-NEXT: [[INPUT_PARAM_GEN:%.*]] = addrspacecast ptr addrspace(101) [[TMP1]] to ptr
|
; OPT-NEXT: [[INPUT_PARAM_GEN:%.*]] = addrspacecast ptr addrspace(101) [[TMP1]] to ptr
|
||||||
; OPT-NEXT: [[CALL:%.*]] = call i32 @escape(ptr [[INPUT_PARAM_GEN]])
|
; OPT-NEXT: [[CALL:%.*]] = call i32 @escape(ptr [[INPUT_PARAM_GEN]])
|
||||||
; OPT-NEXT: ret void
|
; OPT-NEXT: ret void
|
||||||
@ -180,9 +180,9 @@ define ptx_kernel void @multiple_grid_const_escape(ptr byval(%struct.s) align 4
|
|||||||
; PTX-NEXT: ret;
|
; PTX-NEXT: ret;
|
||||||
; OPT-LABEL: define ptx_kernel void @multiple_grid_const_escape(
|
; OPT-LABEL: define ptx_kernel void @multiple_grid_const_escape(
|
||||||
; OPT-SAME: ptr byval([[STRUCT_S:%.*]]) align 4 [[INPUT:%.*]], i32 [[A:%.*]], ptr byval(i32) align 4 [[B:%.*]]) #[[ATTR0]] {
|
; OPT-SAME: ptr byval([[STRUCT_S:%.*]]) align 4 [[INPUT:%.*]], i32 [[A:%.*]], ptr byval(i32) align 4 [[B:%.*]]) #[[ATTR0]] {
|
||||||
; OPT-NEXT: [[TMP1:%.*]] = call ptr addrspace(101) @llvm.nvvm.internal.addrspace.wrap.p101.p0(ptr [[B]])
|
; OPT-NEXT: [[TMP1:%.*]] = call align 4 ptr addrspace(101) @llvm.nvvm.internal.addrspace.wrap.p101.p0(ptr [[B]])
|
||||||
; OPT-NEXT: [[B_PARAM_GEN:%.*]] = addrspacecast ptr addrspace(101) [[TMP1]] to ptr
|
; OPT-NEXT: [[B_PARAM_GEN:%.*]] = addrspacecast ptr addrspace(101) [[TMP1]] to ptr
|
||||||
; OPT-NEXT: [[TMP2:%.*]] = call ptr addrspace(101) @llvm.nvvm.internal.addrspace.wrap.p101.p0(ptr [[INPUT]])
|
; OPT-NEXT: [[TMP2:%.*]] = call align 4 ptr addrspace(101) @llvm.nvvm.internal.addrspace.wrap.p101.p0(ptr [[INPUT]])
|
||||||
; OPT-NEXT: [[INPUT_PARAM_GEN:%.*]] = addrspacecast ptr addrspace(101) [[TMP2]] to ptr
|
; OPT-NEXT: [[INPUT_PARAM_GEN:%.*]] = addrspacecast ptr addrspace(101) [[TMP2]] to ptr
|
||||||
; OPT-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4
|
; OPT-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4
|
||||||
; OPT-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4
|
; OPT-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4
|
||||||
@ -208,7 +208,7 @@ define ptx_kernel void @grid_const_memory_escape(ptr byval(%struct.s) align 4 %i
|
|||||||
; PTX-NEXT: ret;
|
; PTX-NEXT: ret;
|
||||||
; OPT-LABEL: define ptx_kernel void @grid_const_memory_escape(
|
; OPT-LABEL: define ptx_kernel void @grid_const_memory_escape(
|
||||||
; OPT-SAME: ptr byval([[STRUCT_S:%.*]]) align 4 [[INPUT:%.*]], ptr [[ADDR:%.*]]) #[[ATTR0]] {
|
; OPT-SAME: ptr byval([[STRUCT_S:%.*]]) align 4 [[INPUT:%.*]], ptr [[ADDR:%.*]]) #[[ATTR0]] {
|
||||||
; OPT-NEXT: [[TMP1:%.*]] = call ptr addrspace(101) @llvm.nvvm.internal.addrspace.wrap.p101.p0(ptr [[INPUT]])
|
; OPT-NEXT: [[TMP1:%.*]] = call align 4 ptr addrspace(101) @llvm.nvvm.internal.addrspace.wrap.p101.p0(ptr [[INPUT]])
|
||||||
; OPT-NEXT: [[INPUT1:%.*]] = addrspacecast ptr addrspace(101) [[TMP1]] to ptr
|
; OPT-NEXT: [[INPUT1:%.*]] = addrspacecast ptr addrspace(101) [[TMP1]] to ptr
|
||||||
; OPT-NEXT: store ptr [[INPUT1]], ptr [[ADDR]], align 8
|
; OPT-NEXT: store ptr [[INPUT1]], ptr [[ADDR]], align 8
|
||||||
; OPT-NEXT: ret void
|
; OPT-NEXT: ret void
|
||||||
@ -235,7 +235,7 @@ define ptx_kernel void @grid_const_inlineasm_escape(ptr byval(%struct.s) align 4
|
|||||||
; PTX-NOT .local
|
; PTX-NOT .local
|
||||||
; OPT-LABEL: define ptx_kernel void @grid_const_inlineasm_escape(
|
; OPT-LABEL: define ptx_kernel void @grid_const_inlineasm_escape(
|
||||||
; OPT-SAME: ptr byval([[STRUCT_S:%.*]]) align 4 [[INPUT:%.*]], ptr [[RESULT:%.*]]) #[[ATTR0]] {
|
; OPT-SAME: ptr byval([[STRUCT_S:%.*]]) align 4 [[INPUT:%.*]], ptr [[RESULT:%.*]]) #[[ATTR0]] {
|
||||||
; OPT-NEXT: [[TMP1:%.*]] = call ptr addrspace(101) @llvm.nvvm.internal.addrspace.wrap.p101.p0(ptr [[INPUT]])
|
; OPT-NEXT: [[TMP1:%.*]] = call align 4 ptr addrspace(101) @llvm.nvvm.internal.addrspace.wrap.p101.p0(ptr [[INPUT]])
|
||||||
; OPT-NEXT: [[INPUT1:%.*]] = addrspacecast ptr addrspace(101) [[TMP1]] to ptr
|
; OPT-NEXT: [[INPUT1:%.*]] = addrspacecast ptr addrspace(101) [[TMP1]] to ptr
|
||||||
; OPT-NEXT: [[TMPPTR1:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[INPUT1]], i32 0, i32 0
|
; OPT-NEXT: [[TMPPTR1:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[INPUT1]], i32 0, i32 0
|
||||||
; OPT-NEXT: [[TMPPTR2:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[INPUT1]], i32 0, i32 1
|
; OPT-NEXT: [[TMPPTR2:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[INPUT1]], i32 0, i32 1
|
||||||
@ -357,7 +357,7 @@ define ptx_kernel void @grid_const_phi(ptr byval(%struct.s) align 4 %input1, ptr
|
|||||||
; PTX-NEXT: ret;
|
; PTX-NEXT: ret;
|
||||||
; OPT-LABEL: define ptx_kernel void @grid_const_phi(
|
; OPT-LABEL: define ptx_kernel void @grid_const_phi(
|
||||||
; OPT-SAME: ptr byval([[STRUCT_S:%.*]]) align 4 [[INPUT1:%.*]], ptr [[INOUT:%.*]]) #[[ATTR0]] {
|
; OPT-SAME: ptr byval([[STRUCT_S:%.*]]) align 4 [[INPUT1:%.*]], ptr [[INOUT:%.*]]) #[[ATTR0]] {
|
||||||
; OPT-NEXT: [[TMP1:%.*]] = call ptr addrspace(101) @llvm.nvvm.internal.addrspace.wrap.p101.p0(ptr [[INPUT1]])
|
; OPT-NEXT: [[TMP1:%.*]] = call align 4 ptr addrspace(101) @llvm.nvvm.internal.addrspace.wrap.p101.p0(ptr [[INPUT1]])
|
||||||
; OPT-NEXT: [[INPUT1_PARAM_GEN:%.*]] = addrspacecast ptr addrspace(101) [[TMP1]] to ptr
|
; OPT-NEXT: [[INPUT1_PARAM_GEN:%.*]] = addrspacecast ptr addrspace(101) [[TMP1]] to ptr
|
||||||
; OPT-NEXT: [[VAL:%.*]] = load i32, ptr [[INOUT]], align 4
|
; OPT-NEXT: [[VAL:%.*]] = load i32, ptr [[INOUT]], align 4
|
||||||
; OPT-NEXT: [[LESS:%.*]] = icmp slt i32 [[VAL]], 0
|
; OPT-NEXT: [[LESS:%.*]] = icmp slt i32 [[VAL]], 0
|
||||||
@ -416,7 +416,7 @@ define ptx_kernel void @grid_const_phi_ngc(ptr byval(%struct.s) align 4 %input1,
|
|||||||
; OPT-SAME: ptr byval([[STRUCT_S:%.*]]) align 4 [[INPUT1:%.*]], ptr byval([[STRUCT_S]]) [[INPUT2:%.*]], ptr [[INOUT:%.*]]) #[[ATTR0]] {
|
; OPT-SAME: ptr byval([[STRUCT_S:%.*]]) align 4 [[INPUT1:%.*]], ptr byval([[STRUCT_S]]) [[INPUT2:%.*]], ptr [[INOUT:%.*]]) #[[ATTR0]] {
|
||||||
; OPT-NEXT: [[TMP1:%.*]] = call ptr addrspace(101) @llvm.nvvm.internal.addrspace.wrap.p101.p0(ptr [[INPUT2]])
|
; OPT-NEXT: [[TMP1:%.*]] = call ptr addrspace(101) @llvm.nvvm.internal.addrspace.wrap.p101.p0(ptr [[INPUT2]])
|
||||||
; OPT-NEXT: [[INPUT2_PARAM_GEN:%.*]] = addrspacecast ptr addrspace(101) [[TMP1]] to ptr
|
; OPT-NEXT: [[INPUT2_PARAM_GEN:%.*]] = addrspacecast ptr addrspace(101) [[TMP1]] to ptr
|
||||||
; OPT-NEXT: [[TMP2:%.*]] = call ptr addrspace(101) @llvm.nvvm.internal.addrspace.wrap.p101.p0(ptr [[INPUT1]])
|
; OPT-NEXT: [[TMP2:%.*]] = call align 4 ptr addrspace(101) @llvm.nvvm.internal.addrspace.wrap.p101.p0(ptr [[INPUT1]])
|
||||||
; OPT-NEXT: [[INPUT1_PARAM_GEN:%.*]] = addrspacecast ptr addrspace(101) [[TMP2]] to ptr
|
; OPT-NEXT: [[INPUT1_PARAM_GEN:%.*]] = addrspacecast ptr addrspace(101) [[TMP2]] to ptr
|
||||||
; OPT-NEXT: [[VAL:%.*]] = load i32, ptr [[INOUT]], align 4
|
; OPT-NEXT: [[VAL:%.*]] = load i32, ptr [[INOUT]], align 4
|
||||||
; OPT-NEXT: [[LESS:%.*]] = icmp slt i32 [[VAL]], 0
|
; OPT-NEXT: [[LESS:%.*]] = icmp slt i32 [[VAL]], 0
|
||||||
@ -471,7 +471,7 @@ define ptx_kernel void @grid_const_select(ptr byval(i32) align 4 %input1, ptr by
|
|||||||
; OPT-SAME: ptr byval(i32) align 4 [[INPUT1:%.*]], ptr byval(i32) [[INPUT2:%.*]], ptr [[INOUT:%.*]]) #[[ATTR0]] {
|
; OPT-SAME: ptr byval(i32) align 4 [[INPUT1:%.*]], ptr byval(i32) [[INPUT2:%.*]], ptr [[INOUT:%.*]]) #[[ATTR0]] {
|
||||||
; OPT-NEXT: [[TMP1:%.*]] = call ptr addrspace(101) @llvm.nvvm.internal.addrspace.wrap.p101.p0(ptr [[INPUT2]])
|
; OPT-NEXT: [[TMP1:%.*]] = call ptr addrspace(101) @llvm.nvvm.internal.addrspace.wrap.p101.p0(ptr [[INPUT2]])
|
||||||
; OPT-NEXT: [[INPUT2_PARAM_GEN:%.*]] = addrspacecast ptr addrspace(101) [[TMP1]] to ptr
|
; OPT-NEXT: [[INPUT2_PARAM_GEN:%.*]] = addrspacecast ptr addrspace(101) [[TMP1]] to ptr
|
||||||
; OPT-NEXT: [[TMP2:%.*]] = call ptr addrspace(101) @llvm.nvvm.internal.addrspace.wrap.p101.p0(ptr [[INPUT1]])
|
; OPT-NEXT: [[TMP2:%.*]] = call align 4 ptr addrspace(101) @llvm.nvvm.internal.addrspace.wrap.p101.p0(ptr [[INPUT1]])
|
||||||
; OPT-NEXT: [[INPUT1_PARAM_GEN:%.*]] = addrspacecast ptr addrspace(101) [[TMP2]] to ptr
|
; OPT-NEXT: [[INPUT1_PARAM_GEN:%.*]] = addrspacecast ptr addrspace(101) [[TMP2]] to ptr
|
||||||
; OPT-NEXT: [[VAL:%.*]] = load i32, ptr [[INOUT]], align 4
|
; OPT-NEXT: [[VAL:%.*]] = load i32, ptr [[INOUT]], align 4
|
||||||
; OPT-NEXT: [[LESS:%.*]] = icmp slt i32 [[VAL]], 0
|
; OPT-NEXT: [[LESS:%.*]] = icmp slt i32 [[VAL]], 0
|
||||||
@ -520,7 +520,7 @@ declare void @device_func(ptr byval(i32) align 4)
|
|||||||
define ptx_kernel void @test_forward_byval_arg(ptr byval(i32) align 4 %input) {
|
define ptx_kernel void @test_forward_byval_arg(ptr byval(i32) align 4 %input) {
|
||||||
; OPT-LABEL: define ptx_kernel void @test_forward_byval_arg(
|
; OPT-LABEL: define ptx_kernel void @test_forward_byval_arg(
|
||||||
; OPT-SAME: ptr byval(i32) align 4 [[INPUT:%.*]]) #[[ATTR0]] {
|
; OPT-SAME: ptr byval(i32) align 4 [[INPUT:%.*]]) #[[ATTR0]] {
|
||||||
; OPT-NEXT: [[INPUT_PARAM:%.*]] = call ptr addrspace(101) @llvm.nvvm.internal.addrspace.wrap.p101.p0(ptr [[INPUT]])
|
; OPT-NEXT: [[INPUT_PARAM:%.*]] = call align 4 ptr addrspace(101) @llvm.nvvm.internal.addrspace.wrap.p101.p0(ptr [[INPUT]])
|
||||||
; OPT-NEXT: [[INPUT_PARAM_GEN:%.*]] = addrspacecast ptr addrspace(101) [[INPUT_PARAM]] to ptr
|
; OPT-NEXT: [[INPUT_PARAM_GEN:%.*]] = addrspacecast ptr addrspace(101) [[INPUT_PARAM]] to ptr
|
||||||
; OPT-NEXT: call void @device_func(ptr byval(i32) align 4 [[INPUT_PARAM_GEN]])
|
; OPT-NEXT: call void @device_func(ptr byval(i32) align 4 [[INPUT_PARAM_GEN]])
|
||||||
; OPT-NEXT: ret void
|
; OPT-NEXT: ret void
|
||||||
|
@ -200,7 +200,7 @@ define ptx_kernel void @ptr_as_int(i64 noundef %i, i32 noundef %v) {
|
|||||||
define ptx_kernel void @ptr_as_int_aggr(ptr nocapture noundef readonly byval(%struct.S) align 8 %s, i32 noundef %v) {
|
define ptx_kernel void @ptr_as_int_aggr(ptr nocapture noundef readonly byval(%struct.S) align 8 %s, i32 noundef %v) {
|
||||||
; IRC-LABEL: define ptx_kernel void @ptr_as_int_aggr(
|
; IRC-LABEL: define ptx_kernel void @ptr_as_int_aggr(
|
||||||
; IRC-SAME: ptr noundef readonly byval([[STRUCT_S:%.*]]) align 8 captures(none) [[S:%.*]], i32 noundef [[V:%.*]]) {
|
; IRC-SAME: ptr noundef readonly byval([[STRUCT_S:%.*]]) align 8 captures(none) [[S:%.*]], i32 noundef [[V:%.*]]) {
|
||||||
; IRC-NEXT: [[S3:%.*]] = call ptr addrspace(101) @llvm.nvvm.internal.addrspace.wrap.p101.p0(ptr [[S]])
|
; IRC-NEXT: [[S3:%.*]] = call align 8 ptr addrspace(101) @llvm.nvvm.internal.addrspace.wrap.p101.p0(ptr [[S]])
|
||||||
; IRC-NEXT: [[I:%.*]] = load i64, ptr addrspace(101) [[S3]], align 8
|
; IRC-NEXT: [[I:%.*]] = load i64, ptr addrspace(101) [[S3]], align 8
|
||||||
; IRC-NEXT: [[P:%.*]] = inttoptr i64 [[I]] to ptr
|
; IRC-NEXT: [[P:%.*]] = inttoptr i64 [[I]] to ptr
|
||||||
; IRC-NEXT: [[P1:%.*]] = addrspacecast ptr [[P]] to ptr addrspace(1)
|
; IRC-NEXT: [[P1:%.*]] = addrspacecast ptr [[P]] to ptr addrspace(1)
|
||||||
@ -210,7 +210,7 @@ define ptx_kernel void @ptr_as_int_aggr(ptr nocapture noundef readonly byval(%st
|
|||||||
;
|
;
|
||||||
; IRO-LABEL: define ptx_kernel void @ptr_as_int_aggr(
|
; IRO-LABEL: define ptx_kernel void @ptr_as_int_aggr(
|
||||||
; IRO-SAME: ptr noundef readonly byval([[STRUCT_S:%.*]]) align 8 captures(none) [[S:%.*]], i32 noundef [[V:%.*]]) {
|
; IRO-SAME: ptr noundef readonly byval([[STRUCT_S:%.*]]) align 8 captures(none) [[S:%.*]], i32 noundef [[V:%.*]]) {
|
||||||
; IRO-NEXT: [[S1:%.*]] = call ptr addrspace(101) @llvm.nvvm.internal.addrspace.wrap.p101.p0(ptr [[S]])
|
; IRO-NEXT: [[S1:%.*]] = call align 8 ptr addrspace(101) @llvm.nvvm.internal.addrspace.wrap.p101.p0(ptr [[S]])
|
||||||
; IRO-NEXT: [[I:%.*]] = load i64, ptr addrspace(101) [[S1]], align 8
|
; IRO-NEXT: [[I:%.*]] = load i64, ptr addrspace(101) [[S1]], align 8
|
||||||
; IRO-NEXT: [[P:%.*]] = inttoptr i64 [[I]] to ptr
|
; IRO-NEXT: [[P:%.*]] = inttoptr i64 [[I]] to ptr
|
||||||
; IRO-NEXT: store i32 [[V]], ptr [[P]], align 4
|
; IRO-NEXT: store i32 [[V]], ptr [[P]], align 4
|
||||||
|
@ -32,7 +32,7 @@ define dso_local ptx_kernel void @read_only(ptr nocapture noundef writeonly %out
|
|||||||
; LOWER-ARGS-LABEL: define dso_local ptx_kernel void @read_only(
|
; LOWER-ARGS-LABEL: define dso_local ptx_kernel void @read_only(
|
||||||
; LOWER-ARGS-SAME: ptr noundef writeonly captures(none) [[OUT:%.*]], ptr noundef readonly byval([[STRUCT_S:%.*]]) align 4 captures(none) [[S:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] {
|
; LOWER-ARGS-SAME: ptr noundef writeonly captures(none) [[OUT:%.*]], ptr noundef readonly byval([[STRUCT_S:%.*]]) align 4 captures(none) [[S:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] {
|
||||||
; LOWER-ARGS-NEXT: [[ENTRY:.*:]]
|
; LOWER-ARGS-NEXT: [[ENTRY:.*:]]
|
||||||
; LOWER-ARGS-NEXT: [[S3:%.*]] = call ptr addrspace(101) @llvm.nvvm.internal.addrspace.wrap.p101.p0(ptr [[S]])
|
; LOWER-ARGS-NEXT: [[S3:%.*]] = call align 4 ptr addrspace(101) @llvm.nvvm.internal.addrspace.wrap.p101.p0(ptr [[S]])
|
||||||
; LOWER-ARGS-NEXT: [[I:%.*]] = load i32, ptr addrspace(101) [[S3]], align 4
|
; LOWER-ARGS-NEXT: [[I:%.*]] = load i32, ptr addrspace(101) [[S3]], align 4
|
||||||
; LOWER-ARGS-NEXT: store i32 [[I]], ptr [[OUT]], align 4
|
; LOWER-ARGS-NEXT: store i32 [[I]], ptr [[OUT]], align 4
|
||||||
; LOWER-ARGS-NEXT: ret void
|
; LOWER-ARGS-NEXT: ret void
|
||||||
@ -66,7 +66,7 @@ define dso_local ptx_kernel void @read_only_gep(ptr nocapture noundef writeonly
|
|||||||
; LOWER-ARGS-LABEL: define dso_local ptx_kernel void @read_only_gep(
|
; LOWER-ARGS-LABEL: define dso_local ptx_kernel void @read_only_gep(
|
||||||
; LOWER-ARGS-SAME: ptr noundef writeonly captures(none) [[OUT:%.*]], ptr noundef readonly byval([[STRUCT_S:%.*]]) align 4 captures(none) [[S:%.*]]) local_unnamed_addr #[[ATTR0]] {
|
; LOWER-ARGS-SAME: ptr noundef writeonly captures(none) [[OUT:%.*]], ptr noundef readonly byval([[STRUCT_S:%.*]]) align 4 captures(none) [[S:%.*]]) local_unnamed_addr #[[ATTR0]] {
|
||||||
; LOWER-ARGS-NEXT: [[ENTRY:.*:]]
|
; LOWER-ARGS-NEXT: [[ENTRY:.*:]]
|
||||||
; LOWER-ARGS-NEXT: [[S3:%.*]] = call ptr addrspace(101) @llvm.nvvm.internal.addrspace.wrap.p101.p0(ptr [[S]])
|
; LOWER-ARGS-NEXT: [[S3:%.*]] = call align 4 ptr addrspace(101) @llvm.nvvm.internal.addrspace.wrap.p101.p0(ptr [[S]])
|
||||||
; LOWER-ARGS-NEXT: [[B4:%.*]] = getelementptr inbounds i8, ptr addrspace(101) [[S3]], i64 4
|
; LOWER-ARGS-NEXT: [[B4:%.*]] = getelementptr inbounds i8, ptr addrspace(101) [[S3]], i64 4
|
||||||
; LOWER-ARGS-NEXT: [[I:%.*]] = load i32, ptr addrspace(101) [[B4]], align 4
|
; LOWER-ARGS-NEXT: [[I:%.*]] = load i32, ptr addrspace(101) [[B4]], align 4
|
||||||
; LOWER-ARGS-NEXT: store i32 [[I]], ptr [[OUT]], align 4
|
; LOWER-ARGS-NEXT: store i32 [[I]], ptr [[OUT]], align 4
|
||||||
@ -128,7 +128,7 @@ define dso_local ptx_kernel void @escape_ptr(ptr nocapture noundef readnone %out
|
|||||||
; COMMON-SAME: ptr noundef readnone captures(none) [[OUT:%.*]], ptr noundef byval([[STRUCT_S:%.*]]) align 4 [[S:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] {
|
; COMMON-SAME: ptr noundef readnone captures(none) [[OUT:%.*]], ptr noundef byval([[STRUCT_S:%.*]]) align 4 [[S:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] {
|
||||||
; COMMON-NEXT: [[ENTRY:.*:]]
|
; COMMON-NEXT: [[ENTRY:.*:]]
|
||||||
; COMMON-NEXT: [[S1:%.*]] = alloca [[STRUCT_S]], align 4
|
; COMMON-NEXT: [[S1:%.*]] = alloca [[STRUCT_S]], align 4
|
||||||
; COMMON-NEXT: [[S2:%.*]] = call ptr addrspace(101) @llvm.nvvm.internal.addrspace.wrap.p101.p0(ptr [[S]])
|
; COMMON-NEXT: [[S2:%.*]] = call align 4 ptr addrspace(101) @llvm.nvvm.internal.addrspace.wrap.p101.p0(ptr [[S]])
|
||||||
; COMMON-NEXT: call void @llvm.memcpy.p0.p101.i64(ptr align 4 [[S1]], ptr addrspace(101) align 4 [[S2]], i64 8, i1 false)
|
; COMMON-NEXT: call void @llvm.memcpy.p0.p101.i64(ptr align 4 [[S1]], ptr addrspace(101) align 4 [[S2]], i64 8, i1 false)
|
||||||
; COMMON-NEXT: call void @_Z6escapePv(ptr noundef nonnull [[S1]]) #[[ATTR6:[0-9]+]]
|
; COMMON-NEXT: call void @_Z6escapePv(ptr noundef nonnull [[S1]]) #[[ATTR6:[0-9]+]]
|
||||||
; COMMON-NEXT: ret void
|
; COMMON-NEXT: ret void
|
||||||
@ -167,7 +167,7 @@ define dso_local ptx_kernel void @escape_ptr_gep(ptr nocapture noundef readnone
|
|||||||
; COMMON-SAME: ptr noundef readnone captures(none) [[OUT:%.*]], ptr noundef byval([[STRUCT_S:%.*]]) align 4 [[S:%.*]]) local_unnamed_addr #[[ATTR0]] {
|
; COMMON-SAME: ptr noundef readnone captures(none) [[OUT:%.*]], ptr noundef byval([[STRUCT_S:%.*]]) align 4 [[S:%.*]]) local_unnamed_addr #[[ATTR0]] {
|
||||||
; COMMON-NEXT: [[ENTRY:.*:]]
|
; COMMON-NEXT: [[ENTRY:.*:]]
|
||||||
; COMMON-NEXT: [[S1:%.*]] = alloca [[STRUCT_S]], align 4
|
; COMMON-NEXT: [[S1:%.*]] = alloca [[STRUCT_S]], align 4
|
||||||
; COMMON-NEXT: [[S2:%.*]] = call ptr addrspace(101) @llvm.nvvm.internal.addrspace.wrap.p101.p0(ptr [[S]])
|
; COMMON-NEXT: [[S2:%.*]] = call align 4 ptr addrspace(101) @llvm.nvvm.internal.addrspace.wrap.p101.p0(ptr [[S]])
|
||||||
; COMMON-NEXT: call void @llvm.memcpy.p0.p101.i64(ptr align 4 [[S1]], ptr addrspace(101) align 4 [[S2]], i64 8, i1 false)
|
; COMMON-NEXT: call void @llvm.memcpy.p0.p101.i64(ptr align 4 [[S1]], ptr addrspace(101) align 4 [[S2]], i64 8, i1 false)
|
||||||
; COMMON-NEXT: [[B:%.*]] = getelementptr inbounds nuw i8, ptr [[S1]], i64 4
|
; COMMON-NEXT: [[B:%.*]] = getelementptr inbounds nuw i8, ptr [[S1]], i64 4
|
||||||
; COMMON-NEXT: call void @_Z6escapePv(ptr noundef nonnull [[B]]) #[[ATTR6]]
|
; COMMON-NEXT: call void @_Z6escapePv(ptr noundef nonnull [[B]]) #[[ATTR6]]
|
||||||
@ -209,7 +209,7 @@ define dso_local ptx_kernel void @escape_ptr_store(ptr nocapture noundef writeon
|
|||||||
; COMMON-SAME: ptr noundef writeonly captures(none) [[OUT:%.*]], ptr noundef byval([[STRUCT_S:%.*]]) align 4 [[S:%.*]]) local_unnamed_addr #[[ATTR0]] {
|
; COMMON-SAME: ptr noundef writeonly captures(none) [[OUT:%.*]], ptr noundef byval([[STRUCT_S:%.*]]) align 4 [[S:%.*]]) local_unnamed_addr #[[ATTR0]] {
|
||||||
; COMMON-NEXT: [[ENTRY:.*:]]
|
; COMMON-NEXT: [[ENTRY:.*:]]
|
||||||
; COMMON-NEXT: [[S1:%.*]] = alloca [[STRUCT_S]], align 4
|
; COMMON-NEXT: [[S1:%.*]] = alloca [[STRUCT_S]], align 4
|
||||||
; COMMON-NEXT: [[S2:%.*]] = call ptr addrspace(101) @llvm.nvvm.internal.addrspace.wrap.p101.p0(ptr [[S]])
|
; COMMON-NEXT: [[S2:%.*]] = call align 4 ptr addrspace(101) @llvm.nvvm.internal.addrspace.wrap.p101.p0(ptr [[S]])
|
||||||
; COMMON-NEXT: call void @llvm.memcpy.p0.p101.i64(ptr align 4 [[S1]], ptr addrspace(101) align 4 [[S2]], i64 8, i1 false)
|
; COMMON-NEXT: call void @llvm.memcpy.p0.p101.i64(ptr align 4 [[S1]], ptr addrspace(101) align 4 [[S2]], i64 8, i1 false)
|
||||||
; COMMON-NEXT: store ptr [[S1]], ptr [[OUT]], align 8
|
; COMMON-NEXT: store ptr [[S1]], ptr [[OUT]], align 8
|
||||||
; COMMON-NEXT: ret void
|
; COMMON-NEXT: ret void
|
||||||
@ -246,7 +246,7 @@ define dso_local ptx_kernel void @escape_ptr_gep_store(ptr nocapture noundef wri
|
|||||||
; COMMON-SAME: ptr noundef writeonly captures(none) [[OUT:%.*]], ptr noundef byval([[STRUCT_S:%.*]]) align 4 [[S:%.*]]) local_unnamed_addr #[[ATTR0]] {
|
; COMMON-SAME: ptr noundef writeonly captures(none) [[OUT:%.*]], ptr noundef byval([[STRUCT_S:%.*]]) align 4 [[S:%.*]]) local_unnamed_addr #[[ATTR0]] {
|
||||||
; COMMON-NEXT: [[ENTRY:.*:]]
|
; COMMON-NEXT: [[ENTRY:.*:]]
|
||||||
; COMMON-NEXT: [[S1:%.*]] = alloca [[STRUCT_S]], align 4
|
; COMMON-NEXT: [[S1:%.*]] = alloca [[STRUCT_S]], align 4
|
||||||
; COMMON-NEXT: [[S2:%.*]] = call ptr addrspace(101) @llvm.nvvm.internal.addrspace.wrap.p101.p0(ptr [[S]])
|
; COMMON-NEXT: [[S2:%.*]] = call align 4 ptr addrspace(101) @llvm.nvvm.internal.addrspace.wrap.p101.p0(ptr [[S]])
|
||||||
; COMMON-NEXT: call void @llvm.memcpy.p0.p101.i64(ptr align 4 [[S1]], ptr addrspace(101) align 4 [[S2]], i64 8, i1 false)
|
; COMMON-NEXT: call void @llvm.memcpy.p0.p101.i64(ptr align 4 [[S1]], ptr addrspace(101) align 4 [[S2]], i64 8, i1 false)
|
||||||
; COMMON-NEXT: [[B:%.*]] = getelementptr inbounds nuw i8, ptr [[S1]], i64 4
|
; COMMON-NEXT: [[B:%.*]] = getelementptr inbounds nuw i8, ptr [[S1]], i64 4
|
||||||
; COMMON-NEXT: store ptr [[B]], ptr [[OUT]], align 8
|
; COMMON-NEXT: store ptr [[B]], ptr [[OUT]], align 8
|
||||||
@ -286,7 +286,7 @@ define dso_local ptx_kernel void @escape_ptrtoint(ptr nocapture noundef writeonl
|
|||||||
; COMMON-SAME: ptr noundef writeonly captures(none) [[OUT:%.*]], ptr noundef byval([[STRUCT_S:%.*]]) align 4 [[S:%.*]]) local_unnamed_addr #[[ATTR0]] {
|
; COMMON-SAME: ptr noundef writeonly captures(none) [[OUT:%.*]], ptr noundef byval([[STRUCT_S:%.*]]) align 4 [[S:%.*]]) local_unnamed_addr #[[ATTR0]] {
|
||||||
; COMMON-NEXT: [[ENTRY:.*:]]
|
; COMMON-NEXT: [[ENTRY:.*:]]
|
||||||
; COMMON-NEXT: [[S1:%.*]] = alloca [[STRUCT_S]], align 4
|
; COMMON-NEXT: [[S1:%.*]] = alloca [[STRUCT_S]], align 4
|
||||||
; COMMON-NEXT: [[S2:%.*]] = call ptr addrspace(101) @llvm.nvvm.internal.addrspace.wrap.p101.p0(ptr [[S]])
|
; COMMON-NEXT: [[S2:%.*]] = call align 4 ptr addrspace(101) @llvm.nvvm.internal.addrspace.wrap.p101.p0(ptr [[S]])
|
||||||
; COMMON-NEXT: call void @llvm.memcpy.p0.p101.i64(ptr align 4 [[S1]], ptr addrspace(101) align 4 [[S2]], i64 8, i1 false)
|
; COMMON-NEXT: call void @llvm.memcpy.p0.p101.i64(ptr align 4 [[S1]], ptr addrspace(101) align 4 [[S2]], i64 8, i1 false)
|
||||||
; COMMON-NEXT: [[I:%.*]] = ptrtoint ptr [[S1]] to i64
|
; COMMON-NEXT: [[I:%.*]] = ptrtoint ptr [[S1]] to i64
|
||||||
; COMMON-NEXT: store i64 [[I]], ptr [[OUT]], align 8
|
; COMMON-NEXT: store i64 [[I]], ptr [[OUT]], align 8
|
||||||
@ -324,7 +324,7 @@ define dso_local ptx_kernel void @memcpy_from_param(ptr nocapture noundef writeo
|
|||||||
; LOWER-ARGS-LABEL: define dso_local ptx_kernel void @memcpy_from_param(
|
; LOWER-ARGS-LABEL: define dso_local ptx_kernel void @memcpy_from_param(
|
||||||
; LOWER-ARGS-SAME: ptr noundef writeonly captures(none) [[OUT:%.*]], ptr noundef readonly byval([[STRUCT_S:%.*]]) align 4 captures(none) [[S:%.*]]) local_unnamed_addr #[[ATTR0]] {
|
; LOWER-ARGS-SAME: ptr noundef writeonly captures(none) [[OUT:%.*]], ptr noundef readonly byval([[STRUCT_S:%.*]]) align 4 captures(none) [[S:%.*]]) local_unnamed_addr #[[ATTR0]] {
|
||||||
; LOWER-ARGS-NEXT: [[ENTRY:.*:]]
|
; LOWER-ARGS-NEXT: [[ENTRY:.*:]]
|
||||||
; LOWER-ARGS-NEXT: [[S3:%.*]] = call ptr addrspace(101) @llvm.nvvm.internal.addrspace.wrap.p101.p0(ptr [[S]])
|
; LOWER-ARGS-NEXT: [[S3:%.*]] = call align 4 ptr addrspace(101) @llvm.nvvm.internal.addrspace.wrap.p101.p0(ptr [[S]])
|
||||||
; LOWER-ARGS-NEXT: call void @llvm.memcpy.p0.p101.i64(ptr [[OUT]], ptr addrspace(101) [[S3]], i64 16, i1 true)
|
; LOWER-ARGS-NEXT: call void @llvm.memcpy.p0.p101.i64(ptr [[OUT]], ptr addrspace(101) [[S3]], i64 16, i1 true)
|
||||||
; LOWER-ARGS-NEXT: ret void
|
; LOWER-ARGS-NEXT: ret void
|
||||||
;
|
;
|
||||||
@ -445,7 +445,7 @@ define dso_local ptx_kernel void @memcpy_to_param(ptr nocapture noundef readonly
|
|||||||
; COMMON-SAME: ptr noundef readonly captures(none) [[IN:%.*]], ptr noundef byval([[STRUCT_S:%.*]]) align 4 captures(none) [[S:%.*]]) local_unnamed_addr #[[ATTR0]] {
|
; COMMON-SAME: ptr noundef readonly captures(none) [[IN:%.*]], ptr noundef byval([[STRUCT_S:%.*]]) align 4 captures(none) [[S:%.*]]) local_unnamed_addr #[[ATTR0]] {
|
||||||
; COMMON-NEXT: [[ENTRY:.*:]]
|
; COMMON-NEXT: [[ENTRY:.*:]]
|
||||||
; COMMON-NEXT: [[S1:%.*]] = alloca [[STRUCT_S]], align 4
|
; COMMON-NEXT: [[S1:%.*]] = alloca [[STRUCT_S]], align 4
|
||||||
; COMMON-NEXT: [[S2:%.*]] = call ptr addrspace(101) @llvm.nvvm.internal.addrspace.wrap.p101.p0(ptr [[S]])
|
; COMMON-NEXT: [[S2:%.*]] = call align 4 ptr addrspace(101) @llvm.nvvm.internal.addrspace.wrap.p101.p0(ptr [[S]])
|
||||||
; COMMON-NEXT: call void @llvm.memcpy.p0.p101.i64(ptr align 4 [[S1]], ptr addrspace(101) align 4 [[S2]], i64 8, i1 false)
|
; COMMON-NEXT: call void @llvm.memcpy.p0.p101.i64(ptr align 4 [[S1]], ptr addrspace(101) align 4 [[S2]], i64 8, i1 false)
|
||||||
; COMMON-NEXT: tail call void @llvm.memcpy.p0.p0.i64(ptr [[S1]], ptr [[IN]], i64 16, i1 true)
|
; COMMON-NEXT: tail call void @llvm.memcpy.p0.p0.i64(ptr [[S1]], ptr [[IN]], i64 16, i1 true)
|
||||||
; COMMON-NEXT: ret void
|
; COMMON-NEXT: ret void
|
||||||
@ -525,7 +525,7 @@ define dso_local ptx_kernel void @copy_on_store(ptr nocapture noundef readonly %
|
|||||||
; COMMON-SAME: ptr noundef readonly captures(none) [[IN:%.*]], ptr noundef byval([[STRUCT_S:%.*]]) align 4 captures(none) [[S:%.*]], i1 noundef zeroext [[B:%.*]]) local_unnamed_addr #[[ATTR0]] {
|
; COMMON-SAME: ptr noundef readonly captures(none) [[IN:%.*]], ptr noundef byval([[STRUCT_S:%.*]]) align 4 captures(none) [[S:%.*]], i1 noundef zeroext [[B:%.*]]) local_unnamed_addr #[[ATTR0]] {
|
||||||
; COMMON-NEXT: [[BB:.*:]]
|
; COMMON-NEXT: [[BB:.*:]]
|
||||||
; COMMON-NEXT: [[S1:%.*]] = alloca [[STRUCT_S]], align 4
|
; COMMON-NEXT: [[S1:%.*]] = alloca [[STRUCT_S]], align 4
|
||||||
; COMMON-NEXT: [[S2:%.*]] = call ptr addrspace(101) @llvm.nvvm.internal.addrspace.wrap.p101.p0(ptr [[S]])
|
; COMMON-NEXT: [[S2:%.*]] = call align 4 ptr addrspace(101) @llvm.nvvm.internal.addrspace.wrap.p101.p0(ptr [[S]])
|
||||||
; COMMON-NEXT: call void @llvm.memcpy.p0.p101.i64(ptr align 4 [[S1]], ptr addrspace(101) align 4 [[S2]], i64 8, i1 false)
|
; COMMON-NEXT: call void @llvm.memcpy.p0.p101.i64(ptr align 4 [[S1]], ptr addrspace(101) align 4 [[S2]], i64 8, i1 false)
|
||||||
; COMMON-NEXT: [[I:%.*]] = load i32, ptr [[IN]], align 4
|
; COMMON-NEXT: [[I:%.*]] = load i32, ptr [[IN]], align 4
|
||||||
; COMMON-NEXT: store i32 [[I]], ptr [[S1]], align 4
|
; COMMON-NEXT: store i32 [[I]], ptr [[S1]], align 4
|
||||||
@ -551,7 +551,7 @@ define ptx_kernel void @test_select(ptr byval(i32) align 4 %input1, ptr byval(i3
|
|||||||
; SM_60-NEXT: [[INPUT25:%.*]] = call ptr addrspace(101) @llvm.nvvm.internal.addrspace.wrap.p101.p0(ptr [[INPUT2]])
|
; SM_60-NEXT: [[INPUT25:%.*]] = call ptr addrspace(101) @llvm.nvvm.internal.addrspace.wrap.p101.p0(ptr [[INPUT2]])
|
||||||
; SM_60-NEXT: call void @llvm.memcpy.p0.p101.i64(ptr align 4 [[INPUT24]], ptr addrspace(101) align 4 [[INPUT25]], i64 4, i1 false)
|
; SM_60-NEXT: call void @llvm.memcpy.p0.p101.i64(ptr align 4 [[INPUT24]], ptr addrspace(101) align 4 [[INPUT25]], i64 4, i1 false)
|
||||||
; SM_60-NEXT: [[INPUT11:%.*]] = alloca i32, align 4
|
; SM_60-NEXT: [[INPUT11:%.*]] = alloca i32, align 4
|
||||||
; SM_60-NEXT: [[INPUT12:%.*]] = call ptr addrspace(101) @llvm.nvvm.internal.addrspace.wrap.p101.p0(ptr [[INPUT1]])
|
; SM_60-NEXT: [[INPUT12:%.*]] = call align 4 ptr addrspace(101) @llvm.nvvm.internal.addrspace.wrap.p101.p0(ptr [[INPUT1]])
|
||||||
; SM_60-NEXT: call void @llvm.memcpy.p0.p101.i64(ptr align 4 [[INPUT11]], ptr addrspace(101) align 4 [[INPUT12]], i64 4, i1 false)
|
; SM_60-NEXT: call void @llvm.memcpy.p0.p101.i64(ptr align 4 [[INPUT11]], ptr addrspace(101) align 4 [[INPUT12]], i64 4, i1 false)
|
||||||
; SM_60-NEXT: [[PTRNEW:%.*]] = select i1 [[COND]], ptr [[INPUT11]], ptr [[INPUT24]]
|
; SM_60-NEXT: [[PTRNEW:%.*]] = select i1 [[COND]], ptr [[INPUT11]], ptr [[INPUT24]]
|
||||||
; SM_60-NEXT: [[VALLOADED:%.*]] = load i32, ptr [[PTRNEW]], align 4
|
; SM_60-NEXT: [[VALLOADED:%.*]] = load i32, ptr [[PTRNEW]], align 4
|
||||||
@ -563,7 +563,7 @@ define ptx_kernel void @test_select(ptr byval(i32) align 4 %input1, ptr byval(i3
|
|||||||
; SM_70-NEXT: [[BB:.*:]]
|
; SM_70-NEXT: [[BB:.*:]]
|
||||||
; SM_70-NEXT: [[TMP0:%.*]] = call ptr addrspace(101) @llvm.nvvm.internal.addrspace.wrap.p101.p0(ptr [[INPUT2]])
|
; SM_70-NEXT: [[TMP0:%.*]] = call ptr addrspace(101) @llvm.nvvm.internal.addrspace.wrap.p101.p0(ptr [[INPUT2]])
|
||||||
; SM_70-NEXT: [[INPUT2_PARAM_GEN:%.*]] = addrspacecast ptr addrspace(101) [[TMP0]] to ptr
|
; SM_70-NEXT: [[INPUT2_PARAM_GEN:%.*]] = addrspacecast ptr addrspace(101) [[TMP0]] to ptr
|
||||||
; SM_70-NEXT: [[TMP1:%.*]] = call ptr addrspace(101) @llvm.nvvm.internal.addrspace.wrap.p101.p0(ptr [[INPUT1]])
|
; SM_70-NEXT: [[TMP1:%.*]] = call align 4 ptr addrspace(101) @llvm.nvvm.internal.addrspace.wrap.p101.p0(ptr [[INPUT1]])
|
||||||
; SM_70-NEXT: [[INPUT1_PARAM_GEN:%.*]] = addrspacecast ptr addrspace(101) [[TMP1]] to ptr
|
; SM_70-NEXT: [[INPUT1_PARAM_GEN:%.*]] = addrspacecast ptr addrspace(101) [[TMP1]] to ptr
|
||||||
; SM_70-NEXT: [[PTRNEW:%.*]] = select i1 [[COND]], ptr [[INPUT1_PARAM_GEN]], ptr [[INPUT2_PARAM_GEN]]
|
; SM_70-NEXT: [[PTRNEW:%.*]] = select i1 [[COND]], ptr [[INPUT1_PARAM_GEN]], ptr [[INPUT2_PARAM_GEN]]
|
||||||
; SM_70-NEXT: [[VALLOADED:%.*]] = load i32, ptr [[PTRNEW]], align 4
|
; SM_70-NEXT: [[VALLOADED:%.*]] = load i32, ptr [[PTRNEW]], align 4
|
||||||
@ -577,7 +577,7 @@ define ptx_kernel void @test_select(ptr byval(i32) align 4 %input1, ptr byval(i3
|
|||||||
; COPY-NEXT: [[INPUT24:%.*]] = call ptr addrspace(101) @llvm.nvvm.internal.addrspace.wrap.p101.p0(ptr [[INPUT2]])
|
; COPY-NEXT: [[INPUT24:%.*]] = call ptr addrspace(101) @llvm.nvvm.internal.addrspace.wrap.p101.p0(ptr [[INPUT2]])
|
||||||
; COPY-NEXT: call void @llvm.memcpy.p0.p101.i64(ptr align 4 [[INPUT23]], ptr addrspace(101) align 4 [[INPUT24]], i64 4, i1 false)
|
; COPY-NEXT: call void @llvm.memcpy.p0.p101.i64(ptr align 4 [[INPUT23]], ptr addrspace(101) align 4 [[INPUT24]], i64 4, i1 false)
|
||||||
; COPY-NEXT: [[INPUT11:%.*]] = alloca i32, align 4
|
; COPY-NEXT: [[INPUT11:%.*]] = alloca i32, align 4
|
||||||
; COPY-NEXT: [[INPUT12:%.*]] = call ptr addrspace(101) @llvm.nvvm.internal.addrspace.wrap.p101.p0(ptr [[INPUT1]])
|
; COPY-NEXT: [[INPUT12:%.*]] = call align 4 ptr addrspace(101) @llvm.nvvm.internal.addrspace.wrap.p101.p0(ptr [[INPUT1]])
|
||||||
; COPY-NEXT: call void @llvm.memcpy.p0.p101.i64(ptr align 4 [[INPUT11]], ptr addrspace(101) align 4 [[INPUT12]], i64 4, i1 false)
|
; COPY-NEXT: call void @llvm.memcpy.p0.p101.i64(ptr align 4 [[INPUT11]], ptr addrspace(101) align 4 [[INPUT12]], i64 4, i1 false)
|
||||||
; COPY-NEXT: [[PTRNEW:%.*]] = select i1 [[COND]], ptr [[INPUT11]], ptr [[INPUT23]]
|
; COPY-NEXT: [[PTRNEW:%.*]] = select i1 [[COND]], ptr [[INPUT11]], ptr [[INPUT23]]
|
||||||
; COPY-NEXT: [[VALLOADED:%.*]] = load i32, ptr [[PTRNEW]], align 4
|
; COPY-NEXT: [[VALLOADED:%.*]] = load i32, ptr [[PTRNEW]], align 4
|
||||||
@ -637,7 +637,7 @@ define ptx_kernel void @test_select_write(ptr byval(i32) align 4 %input1, ptr by
|
|||||||
; COMMON-NEXT: [[INPUT24:%.*]] = call ptr addrspace(101) @llvm.nvvm.internal.addrspace.wrap.p101.p0(ptr [[INPUT2]])
|
; COMMON-NEXT: [[INPUT24:%.*]] = call ptr addrspace(101) @llvm.nvvm.internal.addrspace.wrap.p101.p0(ptr [[INPUT2]])
|
||||||
; COMMON-NEXT: call void @llvm.memcpy.p0.p101.i64(ptr align 4 [[INPUT23]], ptr addrspace(101) align 4 [[INPUT24]], i64 4, i1 false)
|
; COMMON-NEXT: call void @llvm.memcpy.p0.p101.i64(ptr align 4 [[INPUT23]], ptr addrspace(101) align 4 [[INPUT24]], i64 4, i1 false)
|
||||||
; COMMON-NEXT: [[INPUT11:%.*]] = alloca i32, align 4
|
; COMMON-NEXT: [[INPUT11:%.*]] = alloca i32, align 4
|
||||||
; COMMON-NEXT: [[INPUT12:%.*]] = call ptr addrspace(101) @llvm.nvvm.internal.addrspace.wrap.p101.p0(ptr [[INPUT1]])
|
; COMMON-NEXT: [[INPUT12:%.*]] = call align 4 ptr addrspace(101) @llvm.nvvm.internal.addrspace.wrap.p101.p0(ptr [[INPUT1]])
|
||||||
; COMMON-NEXT: call void @llvm.memcpy.p0.p101.i64(ptr align 4 [[INPUT11]], ptr addrspace(101) align 4 [[INPUT12]], i64 4, i1 false)
|
; COMMON-NEXT: call void @llvm.memcpy.p0.p101.i64(ptr align 4 [[INPUT11]], ptr addrspace(101) align 4 [[INPUT12]], i64 4, i1 false)
|
||||||
; COMMON-NEXT: [[PTRNEW:%.*]] = select i1 [[COND]], ptr [[INPUT11]], ptr [[INPUT23]]
|
; COMMON-NEXT: [[PTRNEW:%.*]] = select i1 [[COND]], ptr [[INPUT11]], ptr [[INPUT23]]
|
||||||
; COMMON-NEXT: store i32 1, ptr [[PTRNEW]], align 4
|
; COMMON-NEXT: store i32 1, ptr [[PTRNEW]], align 4
|
||||||
@ -682,7 +682,7 @@ define ptx_kernel void @test_phi(ptr byval(%struct.S) align 4 %input1, ptr byval
|
|||||||
; SM_60-NEXT: [[INPUT25:%.*]] = call ptr addrspace(101) @llvm.nvvm.internal.addrspace.wrap.p101.p0(ptr [[INPUT2]])
|
; SM_60-NEXT: [[INPUT25:%.*]] = call ptr addrspace(101) @llvm.nvvm.internal.addrspace.wrap.p101.p0(ptr [[INPUT2]])
|
||||||
; SM_60-NEXT: call void @llvm.memcpy.p0.p101.i64(ptr align 8 [[INPUT24]], ptr addrspace(101) align 8 [[INPUT25]], i64 8, i1 false)
|
; SM_60-NEXT: call void @llvm.memcpy.p0.p101.i64(ptr align 8 [[INPUT24]], ptr addrspace(101) align 8 [[INPUT25]], i64 8, i1 false)
|
||||||
; SM_60-NEXT: [[INPUT11:%.*]] = alloca [[STRUCT_S]], align 4
|
; SM_60-NEXT: [[INPUT11:%.*]] = alloca [[STRUCT_S]], align 4
|
||||||
; SM_60-NEXT: [[INPUT12:%.*]] = call ptr addrspace(101) @llvm.nvvm.internal.addrspace.wrap.p101.p0(ptr [[INPUT1]])
|
; SM_60-NEXT: [[INPUT12:%.*]] = call align 4 ptr addrspace(101) @llvm.nvvm.internal.addrspace.wrap.p101.p0(ptr [[INPUT1]])
|
||||||
; SM_60-NEXT: call void @llvm.memcpy.p0.p101.i64(ptr align 4 [[INPUT11]], ptr addrspace(101) align 4 [[INPUT12]], i64 8, i1 false)
|
; SM_60-NEXT: call void @llvm.memcpy.p0.p101.i64(ptr align 4 [[INPUT11]], ptr addrspace(101) align 4 [[INPUT12]], i64 8, i1 false)
|
||||||
; SM_60-NEXT: br i1 [[COND]], label %[[FIRST:.*]], label %[[SECOND:.*]]
|
; SM_60-NEXT: br i1 [[COND]], label %[[FIRST:.*]], label %[[SECOND:.*]]
|
||||||
; SM_60: [[FIRST]]:
|
; SM_60: [[FIRST]]:
|
||||||
@ -702,7 +702,7 @@ define ptx_kernel void @test_phi(ptr byval(%struct.S) align 4 %input1, ptr byval
|
|||||||
; SM_70-NEXT: [[BB:.*:]]
|
; SM_70-NEXT: [[BB:.*:]]
|
||||||
; SM_70-NEXT: [[TMP0:%.*]] = call ptr addrspace(101) @llvm.nvvm.internal.addrspace.wrap.p101.p0(ptr [[INPUT2]])
|
; SM_70-NEXT: [[TMP0:%.*]] = call ptr addrspace(101) @llvm.nvvm.internal.addrspace.wrap.p101.p0(ptr [[INPUT2]])
|
||||||
; SM_70-NEXT: [[INPUT2_PARAM_GEN:%.*]] = addrspacecast ptr addrspace(101) [[TMP0]] to ptr
|
; SM_70-NEXT: [[INPUT2_PARAM_GEN:%.*]] = addrspacecast ptr addrspace(101) [[TMP0]] to ptr
|
||||||
; SM_70-NEXT: [[TMP1:%.*]] = call ptr addrspace(101) @llvm.nvvm.internal.addrspace.wrap.p101.p0(ptr [[INPUT1]])
|
; SM_70-NEXT: [[TMP1:%.*]] = call align 4 ptr addrspace(101) @llvm.nvvm.internal.addrspace.wrap.p101.p0(ptr [[INPUT1]])
|
||||||
; SM_70-NEXT: [[INPUT1_PARAM_GEN:%.*]] = addrspacecast ptr addrspace(101) [[TMP1]] to ptr
|
; SM_70-NEXT: [[INPUT1_PARAM_GEN:%.*]] = addrspacecast ptr addrspace(101) [[TMP1]] to ptr
|
||||||
; SM_70-NEXT: br i1 [[COND]], label %[[FIRST:.*]], label %[[SECOND:.*]]
|
; SM_70-NEXT: br i1 [[COND]], label %[[FIRST:.*]], label %[[SECOND:.*]]
|
||||||
; SM_70: [[FIRST]]:
|
; SM_70: [[FIRST]]:
|
||||||
@ -724,7 +724,7 @@ define ptx_kernel void @test_phi(ptr byval(%struct.S) align 4 %input1, ptr byval
|
|||||||
; COPY-NEXT: [[INPUT24:%.*]] = call ptr addrspace(101) @llvm.nvvm.internal.addrspace.wrap.p101.p0(ptr [[INPUT2]])
|
; COPY-NEXT: [[INPUT24:%.*]] = call ptr addrspace(101) @llvm.nvvm.internal.addrspace.wrap.p101.p0(ptr [[INPUT2]])
|
||||||
; COPY-NEXT: call void @llvm.memcpy.p0.p101.i64(ptr align 8 [[INPUT23]], ptr addrspace(101) align 8 [[INPUT24]], i64 8, i1 false)
|
; COPY-NEXT: call void @llvm.memcpy.p0.p101.i64(ptr align 8 [[INPUT23]], ptr addrspace(101) align 8 [[INPUT24]], i64 8, i1 false)
|
||||||
; COPY-NEXT: [[INPUT11:%.*]] = alloca [[STRUCT_S]], align 4
|
; COPY-NEXT: [[INPUT11:%.*]] = alloca [[STRUCT_S]], align 4
|
||||||
; COPY-NEXT: [[INPUT12:%.*]] = call ptr addrspace(101) @llvm.nvvm.internal.addrspace.wrap.p101.p0(ptr [[INPUT1]])
|
; COPY-NEXT: [[INPUT12:%.*]] = call align 4 ptr addrspace(101) @llvm.nvvm.internal.addrspace.wrap.p101.p0(ptr [[INPUT1]])
|
||||||
; COPY-NEXT: call void @llvm.memcpy.p0.p101.i64(ptr align 4 [[INPUT11]], ptr addrspace(101) align 4 [[INPUT12]], i64 8, i1 false)
|
; COPY-NEXT: call void @llvm.memcpy.p0.p101.i64(ptr align 4 [[INPUT11]], ptr addrspace(101) align 4 [[INPUT12]], i64 8, i1 false)
|
||||||
; COPY-NEXT: br i1 [[COND]], label %[[FIRST:.*]], label %[[SECOND:.*]]
|
; COPY-NEXT: br i1 [[COND]], label %[[FIRST:.*]], label %[[SECOND:.*]]
|
||||||
; COPY: [[FIRST]]:
|
; COPY: [[FIRST]]:
|
||||||
@ -808,7 +808,7 @@ define ptx_kernel void @test_phi_write(ptr byval(%struct.S) align 4 %input1, ptr
|
|||||||
; COMMON-NEXT: [[INPUT25:%.*]] = call ptr addrspace(101) @llvm.nvvm.internal.addrspace.wrap.p101.p0(ptr [[INPUT2]])
|
; COMMON-NEXT: [[INPUT25:%.*]] = call ptr addrspace(101) @llvm.nvvm.internal.addrspace.wrap.p101.p0(ptr [[INPUT2]])
|
||||||
; COMMON-NEXT: call void @llvm.memcpy.p0.p101.i64(ptr align 8 [[INPUT24]], ptr addrspace(101) align 8 [[INPUT25]], i64 8, i1 false)
|
; COMMON-NEXT: call void @llvm.memcpy.p0.p101.i64(ptr align 8 [[INPUT24]], ptr addrspace(101) align 8 [[INPUT25]], i64 8, i1 false)
|
||||||
; COMMON-NEXT: [[INPUT11:%.*]] = alloca [[STRUCT_S]], align 4
|
; COMMON-NEXT: [[INPUT11:%.*]] = alloca [[STRUCT_S]], align 4
|
||||||
; COMMON-NEXT: [[INPUT12:%.*]] = call ptr addrspace(101) @llvm.nvvm.internal.addrspace.wrap.p101.p0(ptr [[INPUT1]])
|
; COMMON-NEXT: [[INPUT12:%.*]] = call align 4 ptr addrspace(101) @llvm.nvvm.internal.addrspace.wrap.p101.p0(ptr [[INPUT1]])
|
||||||
; COMMON-NEXT: call void @llvm.memcpy.p0.p101.i64(ptr align 4 [[INPUT11]], ptr addrspace(101) align 4 [[INPUT12]], i64 8, i1 false)
|
; COMMON-NEXT: call void @llvm.memcpy.p0.p101.i64(ptr align 4 [[INPUT11]], ptr addrspace(101) align 4 [[INPUT12]], i64 8, i1 false)
|
||||||
; COMMON-NEXT: br i1 [[COND]], label %[[FIRST:.*]], label %[[SECOND:.*]]
|
; COMMON-NEXT: br i1 [[COND]], label %[[FIRST:.*]], label %[[SECOND:.*]]
|
||||||
; COMMON: [[FIRST]]:
|
; COMMON: [[FIRST]]:
|
||||||
@ -871,7 +871,7 @@ define ptx_kernel void @test_forward_byval_arg(ptr byval(i32) align 4 %input) {
|
|||||||
; COMMON-LABEL: define ptx_kernel void @test_forward_byval_arg(
|
; COMMON-LABEL: define ptx_kernel void @test_forward_byval_arg(
|
||||||
; COMMON-SAME: ptr byval(i32) align 4 [[INPUT:%.*]]) #[[ATTR3]] {
|
; COMMON-SAME: ptr byval(i32) align 4 [[INPUT:%.*]]) #[[ATTR3]] {
|
||||||
; COMMON-NEXT: [[INPUT1:%.*]] = alloca i32, align 4
|
; COMMON-NEXT: [[INPUT1:%.*]] = alloca i32, align 4
|
||||||
; COMMON-NEXT: [[INPUT2:%.*]] = call ptr addrspace(101) @llvm.nvvm.internal.addrspace.wrap.p101.p0(ptr [[INPUT]])
|
; COMMON-NEXT: [[INPUT2:%.*]] = call align 4 ptr addrspace(101) @llvm.nvvm.internal.addrspace.wrap.p101.p0(ptr [[INPUT]])
|
||||||
; COMMON-NEXT: call void @llvm.memcpy.p0.p101.i64(ptr align 4 [[INPUT1]], ptr addrspace(101) align 4 [[INPUT2]], i64 4, i1 false)
|
; COMMON-NEXT: call void @llvm.memcpy.p0.p101.i64(ptr align 4 [[INPUT1]], ptr addrspace(101) align 4 [[INPUT2]], i64 4, i1 false)
|
||||||
; COMMON-NEXT: call void @device_func(ptr byval(i32) align 4 [[INPUT1]])
|
; COMMON-NEXT: call void @device_func(ptr byval(i32) align 4 [[INPUT1]])
|
||||||
; COMMON-NEXT: ret void
|
; COMMON-NEXT: ret void
|
||||||
|
Loading…
x
Reference in New Issue
Block a user