[NVPTX] Fix v2i8 call lowering, use generic ld/st nodes for call params (#146930)
This commit is contained in:
parent
741df45bc3
commit
35693daa70
@ -35,8 +35,8 @@ __device__ __bf16 external_func( __bf16 in);
|
||||
// CHECK: .param .align 2 .b8 _Z9test_callDF16b_param_0[2]
|
||||
__device__ __bf16 test_call( __bf16 in) {
|
||||
// CHECK: ld.param.b16 %[[R:rs[0-9]+]], [_Z9test_callDF16b_param_0];
|
||||
// CHECK: st.param.b16 [param0], %[[R]];
|
||||
// CHECK: .param .align 2 .b8 retval0[2];
|
||||
// CHECK: st.param.b16 [param0], %[[R]];
|
||||
// CHECK: call.uni (retval0), _Z13external_funcDF16b, (param0);
|
||||
// CHECK: ld.param.b16 %[[RET:rs[0-9]+]], [retval0];
|
||||
return external_func(in);
|
||||
|
@ -145,18 +145,6 @@ void NVPTXDAGToDAGISel::Select(SDNode *N) {
|
||||
if (tryStoreVector(N))
|
||||
return;
|
||||
break;
|
||||
case NVPTXISD::LoadParam:
|
||||
case NVPTXISD::LoadParamV2:
|
||||
case NVPTXISD::LoadParamV4:
|
||||
if (tryLoadParam(N))
|
||||
return;
|
||||
break;
|
||||
case NVPTXISD::StoreParam:
|
||||
case NVPTXISD::StoreParamV2:
|
||||
case NVPTXISD::StoreParamV4:
|
||||
if (tryStoreParam(N))
|
||||
return;
|
||||
break;
|
||||
case ISD::INTRINSIC_W_CHAIN:
|
||||
if (tryIntrinsicChain(N))
|
||||
return;
|
||||
@ -1462,267 +1450,6 @@ bool NVPTXDAGToDAGISel::tryStoreVector(SDNode *N) {
|
||||
return true;
|
||||
}
|
||||
|
||||
bool NVPTXDAGToDAGISel::tryLoadParam(SDNode *Node) {
|
||||
SDValue Chain = Node->getOperand(0);
|
||||
SDValue Offset = Node->getOperand(2);
|
||||
SDValue Glue = Node->getOperand(3);
|
||||
SDLoc DL(Node);
|
||||
MemSDNode *Mem = cast<MemSDNode>(Node);
|
||||
|
||||
unsigned VecSize;
|
||||
switch (Node->getOpcode()) {
|
||||
default:
|
||||
return false;
|
||||
case NVPTXISD::LoadParam:
|
||||
VecSize = 1;
|
||||
break;
|
||||
case NVPTXISD::LoadParamV2:
|
||||
VecSize = 2;
|
||||
break;
|
||||
case NVPTXISD::LoadParamV4:
|
||||
VecSize = 4;
|
||||
break;
|
||||
}
|
||||
|
||||
EVT EltVT = Node->getValueType(0);
|
||||
EVT MemVT = Mem->getMemoryVT();
|
||||
|
||||
std::optional<unsigned> Opcode;
|
||||
|
||||
switch (VecSize) {
|
||||
default:
|
||||
return false;
|
||||
case 1:
|
||||
Opcode = pickOpcodeForVT(MemVT.getSimpleVT().SimpleTy,
|
||||
NVPTX::LoadParamMemI8, NVPTX::LoadParamMemI16,
|
||||
NVPTX::LoadParamMemI32, NVPTX::LoadParamMemI64);
|
||||
break;
|
||||
case 2:
|
||||
Opcode =
|
||||
pickOpcodeForVT(MemVT.getSimpleVT().SimpleTy, NVPTX::LoadParamMemV2I8,
|
||||
NVPTX::LoadParamMemV2I16, NVPTX::LoadParamMemV2I32,
|
||||
NVPTX::LoadParamMemV2I64);
|
||||
break;
|
||||
case 4:
|
||||
Opcode = pickOpcodeForVT(MemVT.getSimpleVT().SimpleTy,
|
||||
NVPTX::LoadParamMemV4I8, NVPTX::LoadParamMemV4I16,
|
||||
NVPTX::LoadParamMemV4I32, {/* no v4i64 */});
|
||||
break;
|
||||
}
|
||||
if (!Opcode)
|
||||
return false;
|
||||
|
||||
SDVTList VTs;
|
||||
if (VecSize == 1) {
|
||||
VTs = CurDAG->getVTList(EltVT, MVT::Other, MVT::Glue);
|
||||
} else if (VecSize == 2) {
|
||||
VTs = CurDAG->getVTList(EltVT, EltVT, MVT::Other, MVT::Glue);
|
||||
} else {
|
||||
EVT EVTs[] = { EltVT, EltVT, EltVT, EltVT, MVT::Other, MVT::Glue };
|
||||
VTs = CurDAG->getVTList(EVTs);
|
||||
}
|
||||
|
||||
unsigned OffsetVal = Offset->getAsZExtVal();
|
||||
|
||||
SmallVector<SDValue, 2> Ops(
|
||||
{CurDAG->getTargetConstant(OffsetVal, DL, MVT::i32), Chain, Glue});
|
||||
|
||||
ReplaceNode(Node, CurDAG->getMachineNode(*Opcode, DL, VTs, Ops));
|
||||
return true;
|
||||
}
|
||||
|
||||
// Helpers for constructing opcode (ex: NVPTX::StoreParamV4F32_iiri)
|
||||
#define getOpcV2H(ty, opKind0, opKind1) \
|
||||
NVPTX::StoreParamV2##ty##_##opKind0##opKind1
|
||||
|
||||
#define getOpcV2H1(ty, opKind0, isImm1) \
|
||||
(isImm1) ? getOpcV2H(ty, opKind0, i) : getOpcV2H(ty, opKind0, r)
|
||||
|
||||
#define getOpcodeForVectorStParamV2(ty, isimm) \
|
||||
(isimm[0]) ? getOpcV2H1(ty, i, isimm[1]) : getOpcV2H1(ty, r, isimm[1])
|
||||
|
||||
#define getOpcV4H(ty, opKind0, opKind1, opKind2, opKind3) \
|
||||
NVPTX::StoreParamV4##ty##_##opKind0##opKind1##opKind2##opKind3
|
||||
|
||||
#define getOpcV4H3(ty, opKind0, opKind1, opKind2, isImm3) \
|
||||
(isImm3) ? getOpcV4H(ty, opKind0, opKind1, opKind2, i) \
|
||||
: getOpcV4H(ty, opKind0, opKind1, opKind2, r)
|
||||
|
||||
#define getOpcV4H2(ty, opKind0, opKind1, isImm2, isImm3) \
|
||||
(isImm2) ? getOpcV4H3(ty, opKind0, opKind1, i, isImm3) \
|
||||
: getOpcV4H3(ty, opKind0, opKind1, r, isImm3)
|
||||
|
||||
#define getOpcV4H1(ty, opKind0, isImm1, isImm2, isImm3) \
|
||||
(isImm1) ? getOpcV4H2(ty, opKind0, i, isImm2, isImm3) \
|
||||
: getOpcV4H2(ty, opKind0, r, isImm2, isImm3)
|
||||
|
||||
#define getOpcodeForVectorStParamV4(ty, isimm) \
|
||||
(isimm[0]) ? getOpcV4H1(ty, i, isimm[1], isimm[2], isimm[3]) \
|
||||
: getOpcV4H1(ty, r, isimm[1], isimm[2], isimm[3])
|
||||
|
||||
#define getOpcodeForVectorStParam(n, ty, isimm) \
|
||||
(n == 2) ? getOpcodeForVectorStParamV2(ty, isimm) \
|
||||
: getOpcodeForVectorStParamV4(ty, isimm)
|
||||
|
||||
static unsigned pickOpcodeForVectorStParam(SmallVector<SDValue, 8> &Ops,
|
||||
unsigned NumElts,
|
||||
MVT::SimpleValueType MemTy,
|
||||
SelectionDAG *CurDAG, SDLoc DL) {
|
||||
// Determine which inputs are registers and immediates make new operators
|
||||
// with constant values
|
||||
SmallVector<bool, 4> IsImm(NumElts, false);
|
||||
for (unsigned i = 0; i < NumElts; i++) {
|
||||
IsImm[i] = (isa<ConstantSDNode>(Ops[i]) || isa<ConstantFPSDNode>(Ops[i]));
|
||||
if (IsImm[i]) {
|
||||
SDValue Imm = Ops[i];
|
||||
if (MemTy == MVT::f32 || MemTy == MVT::f64) {
|
||||
const ConstantFPSDNode *ConstImm = cast<ConstantFPSDNode>(Imm);
|
||||
const ConstantFP *CF = ConstImm->getConstantFPValue();
|
||||
Imm = CurDAG->getTargetConstantFP(*CF, DL, Imm->getValueType(0));
|
||||
} else {
|
||||
const ConstantSDNode *ConstImm = cast<ConstantSDNode>(Imm);
|
||||
const ConstantInt *CI = ConstImm->getConstantIntValue();
|
||||
Imm = CurDAG->getTargetConstant(*CI, DL, Imm->getValueType(0));
|
||||
}
|
||||
Ops[i] = Imm;
|
||||
}
|
||||
}
|
||||
|
||||
// Get opcode for MemTy, size, and register/immediate operand ordering
|
||||
switch (MemTy) {
|
||||
case MVT::i8:
|
||||
return getOpcodeForVectorStParam(NumElts, I8, IsImm);
|
||||
case MVT::i16:
|
||||
return getOpcodeForVectorStParam(NumElts, I16, IsImm);
|
||||
case MVT::i32:
|
||||
return getOpcodeForVectorStParam(NumElts, I32, IsImm);
|
||||
case MVT::i64:
|
||||
assert(NumElts == 2 && "MVT too large for NumElts > 2");
|
||||
return getOpcodeForVectorStParamV2(I64, IsImm);
|
||||
case MVT::f32:
|
||||
return getOpcodeForVectorStParam(NumElts, F32, IsImm);
|
||||
case MVT::f64:
|
||||
assert(NumElts == 2 && "MVT too large for NumElts > 2");
|
||||
return getOpcodeForVectorStParamV2(F64, IsImm);
|
||||
|
||||
// These cases don't support immediates, just use the all register version
|
||||
// and generate moves.
|
||||
case MVT::i1:
|
||||
return (NumElts == 2) ? NVPTX::StoreParamV2I8_rr
|
||||
: NVPTX::StoreParamV4I8_rrrr;
|
||||
case MVT::f16:
|
||||
case MVT::bf16:
|
||||
return (NumElts == 2) ? NVPTX::StoreParamV2I16_rr
|
||||
: NVPTX::StoreParamV4I16_rrrr;
|
||||
case MVT::v2f16:
|
||||
case MVT::v2bf16:
|
||||
case MVT::v2i16:
|
||||
case MVT::v4i8:
|
||||
return (NumElts == 2) ? NVPTX::StoreParamV2I32_rr
|
||||
: NVPTX::StoreParamV4I32_rrrr;
|
||||
default:
|
||||
llvm_unreachable("Cannot select st.param for unknown MemTy");
|
||||
}
|
||||
}
|
||||
|
||||
bool NVPTXDAGToDAGISel::tryStoreParam(SDNode *N) {
|
||||
SDLoc DL(N);
|
||||
SDValue Chain = N->getOperand(0);
|
||||
SDValue Param = N->getOperand(1);
|
||||
unsigned ParamVal = Param->getAsZExtVal();
|
||||
SDValue Offset = N->getOperand(2);
|
||||
unsigned OffsetVal = Offset->getAsZExtVal();
|
||||
MemSDNode *Mem = cast<MemSDNode>(N);
|
||||
SDValue Glue = N->getOperand(N->getNumOperands() - 1);
|
||||
|
||||
// How many elements do we have?
|
||||
unsigned NumElts;
|
||||
switch (N->getOpcode()) {
|
||||
default:
|
||||
llvm_unreachable("Unexpected opcode");
|
||||
case NVPTXISD::StoreParam:
|
||||
NumElts = 1;
|
||||
break;
|
||||
case NVPTXISD::StoreParamV2:
|
||||
NumElts = 2;
|
||||
break;
|
||||
case NVPTXISD::StoreParamV4:
|
||||
NumElts = 4;
|
||||
break;
|
||||
}
|
||||
|
||||
// Build vector of operands
|
||||
SmallVector<SDValue, 8> Ops;
|
||||
for (unsigned i = 0; i < NumElts; ++i)
|
||||
Ops.push_back(N->getOperand(i + 3));
|
||||
Ops.append({CurDAG->getTargetConstant(ParamVal, DL, MVT::i32),
|
||||
CurDAG->getTargetConstant(OffsetVal, DL, MVT::i32), Chain, Glue});
|
||||
|
||||
// Determine target opcode
|
||||
// If we have an i1, use an 8-bit store. The lowering code in
|
||||
// NVPTXISelLowering will have already emitted an upcast.
|
||||
std::optional<unsigned> Opcode;
|
||||
switch (NumElts) {
|
||||
default:
|
||||
llvm_unreachable("Unexpected NumElts");
|
||||
case 1: {
|
||||
MVT::SimpleValueType MemTy = Mem->getMemoryVT().getSimpleVT().SimpleTy;
|
||||
SDValue Imm = Ops[0];
|
||||
if (MemTy != MVT::f16 && MemTy != MVT::bf16 &&
|
||||
(isa<ConstantSDNode>(Imm) || isa<ConstantFPSDNode>(Imm))) {
|
||||
// Convert immediate to target constant
|
||||
if (MemTy == MVT::f32 || MemTy == MVT::f64) {
|
||||
const ConstantFPSDNode *ConstImm = cast<ConstantFPSDNode>(Imm);
|
||||
const ConstantFP *CF = ConstImm->getConstantFPValue();
|
||||
Imm = CurDAG->getTargetConstantFP(*CF, DL, Imm->getValueType(0));
|
||||
} else {
|
||||
const ConstantSDNode *ConstImm = cast<ConstantSDNode>(Imm);
|
||||
const ConstantInt *CI = ConstImm->getConstantIntValue();
|
||||
Imm = CurDAG->getTargetConstant(*CI, DL, Imm->getValueType(0));
|
||||
}
|
||||
Ops[0] = Imm;
|
||||
// Use immediate version of store param
|
||||
Opcode =
|
||||
pickOpcodeForVT(MemTy, NVPTX::StoreParamI8_i, NVPTX::StoreParamI16_i,
|
||||
NVPTX::StoreParamI32_i, NVPTX::StoreParamI64_i);
|
||||
} else
|
||||
Opcode = pickOpcodeForVT(Mem->getMemoryVT().getSimpleVT().SimpleTy,
|
||||
NVPTX::StoreParamI8_r, NVPTX::StoreParamI16_r,
|
||||
NVPTX::StoreParamI32_r, NVPTX::StoreParamI64_r);
|
||||
if (Opcode == NVPTX::StoreParamI8_r) {
|
||||
// Fine tune the opcode depending on the size of the operand.
|
||||
// This helps to avoid creating redundant COPY instructions in
|
||||
// InstrEmitter::AddRegisterOperand().
|
||||
switch (Ops[0].getSimpleValueType().SimpleTy) {
|
||||
default:
|
||||
break;
|
||||
case MVT::i32:
|
||||
Opcode = NVPTX::StoreParamI8TruncI32_r;
|
||||
break;
|
||||
case MVT::i64:
|
||||
Opcode = NVPTX::StoreParamI8TruncI64_r;
|
||||
break;
|
||||
}
|
||||
}
|
||||
break;
|
||||
}
|
||||
case 2:
|
||||
case 4: {
|
||||
MVT::SimpleValueType MemTy = Mem->getMemoryVT().getSimpleVT().SimpleTy;
|
||||
Opcode = pickOpcodeForVectorStParam(Ops, NumElts, MemTy, CurDAG, DL);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
SDVTList RetVTs = CurDAG->getVTList(MVT::Other, MVT::Glue);
|
||||
SDNode *Ret = CurDAG->getMachineNode(*Opcode, DL, RetVTs, Ops);
|
||||
MachineMemOperand *MemRef = cast<MemSDNode>(N)->getMemOperand();
|
||||
CurDAG->setNodeMemRefs(cast<MachineSDNode>(Ret), {MemRef});
|
||||
|
||||
ReplaceNode(N, Ret);
|
||||
return true;
|
||||
}
|
||||
|
||||
/// SelectBFE - Look for instruction sequences that can be made more efficient
|
||||
/// by using the 'bfe' (bit-field extract) PTX instruction
|
||||
bool NVPTXDAGToDAGISel::tryBFE(SDNode *N) {
|
||||
|
@ -78,8 +78,6 @@ private:
|
||||
bool tryLDG(MemSDNode *N);
|
||||
bool tryStore(SDNode *N);
|
||||
bool tryStoreVector(SDNode *N);
|
||||
bool tryLoadParam(SDNode *N);
|
||||
bool tryStoreParam(SDNode *N);
|
||||
bool tryFence(SDNode *N);
|
||||
void SelectAddrSpaceCast(SDNode *N);
|
||||
bool tryBFE(SDNode *N);
|
||||
|
@ -1075,12 +1075,6 @@ const char *NVPTXTargetLowering::getTargetNodeName(unsigned Opcode) const {
|
||||
MAKE_CASE(NVPTXISD::DeclareArrayParam)
|
||||
MAKE_CASE(NVPTXISD::DeclareScalarParam)
|
||||
MAKE_CASE(NVPTXISD::CALL)
|
||||
MAKE_CASE(NVPTXISD::LoadParam)
|
||||
MAKE_CASE(NVPTXISD::LoadParamV2)
|
||||
MAKE_CASE(NVPTXISD::LoadParamV4)
|
||||
MAKE_CASE(NVPTXISD::StoreParam)
|
||||
MAKE_CASE(NVPTXISD::StoreParamV2)
|
||||
MAKE_CASE(NVPTXISD::StoreParamV4)
|
||||
MAKE_CASE(NVPTXISD::MoveParam)
|
||||
MAKE_CASE(NVPTXISD::UNPACK_VECTOR)
|
||||
MAKE_CASE(NVPTXISD::BUILD_VECTOR)
|
||||
@ -1318,105 +1312,6 @@ Align NVPTXTargetLowering::getArgumentAlignment(const CallBase *CB, Type *Ty,
|
||||
return DL.getABITypeAlign(Ty);
|
||||
}
|
||||
|
||||
static bool adjustElementType(EVT &ElementType) {
|
||||
switch (ElementType.getSimpleVT().SimpleTy) {
|
||||
default:
|
||||
return false;
|
||||
case MVT::f16:
|
||||
case MVT::bf16:
|
||||
ElementType = MVT::i16;
|
||||
return true;
|
||||
case MVT::f32:
|
||||
case MVT::v2f16:
|
||||
case MVT::v2bf16:
|
||||
ElementType = MVT::i32;
|
||||
return true;
|
||||
case MVT::f64:
|
||||
ElementType = MVT::i64;
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
// Use byte-store when the param address of the argument value is unaligned.
|
||||
// This may happen when the return value is a field of a packed structure.
|
||||
//
|
||||
// This is called in LowerCall() when passing the param values.
|
||||
static SDValue LowerUnalignedStoreParam(SelectionDAG &DAG, SDValue Chain,
|
||||
uint64_t Offset, EVT ElementType,
|
||||
SDValue StVal, SDValue &InGlue,
|
||||
unsigned ArgID, const SDLoc &dl) {
|
||||
// Bit logic only works on integer types
|
||||
if (adjustElementType(ElementType))
|
||||
StVal = DAG.getNode(ISD::BITCAST, dl, ElementType, StVal);
|
||||
|
||||
// Store each byte
|
||||
SDVTList StoreVTs = DAG.getVTList(MVT::Other, MVT::Glue);
|
||||
for (unsigned i = 0, n = ElementType.getSizeInBits() / 8; i < n; i++) {
|
||||
// Shift the byte to the last byte position
|
||||
SDValue ShiftVal = DAG.getNode(ISD::SRL, dl, ElementType, StVal,
|
||||
DAG.getConstant(i * 8, dl, MVT::i32));
|
||||
SDValue StoreOperands[] = {Chain, DAG.getConstant(ArgID, dl, MVT::i32),
|
||||
DAG.getConstant(Offset + i, dl, MVT::i32),
|
||||
ShiftVal, InGlue};
|
||||
// Trunc store only the last byte by using
|
||||
// st.param.b8
|
||||
// The register type can be larger than b8.
|
||||
Chain = DAG.getMemIntrinsicNode(
|
||||
NVPTXISD::StoreParam, dl, StoreVTs, StoreOperands, MVT::i8,
|
||||
MachinePointerInfo(), Align(1), MachineMemOperand::MOStore);
|
||||
InGlue = Chain.getValue(1);
|
||||
}
|
||||
return Chain;
|
||||
}
|
||||
|
||||
// Use byte-load when the param adress of the returned value is unaligned.
|
||||
// This may happen when the returned value is a field of a packed structure.
|
||||
static SDValue
|
||||
LowerUnalignedLoadRetParam(SelectionDAG &DAG, SDValue &Chain, uint64_t Offset,
|
||||
EVT ElementType, SDValue &InGlue,
|
||||
SmallVectorImpl<SDValue> &TempProxyRegOps,
|
||||
const SDLoc &dl) {
|
||||
// Bit logic only works on integer types
|
||||
EVT MergedType = ElementType;
|
||||
adjustElementType(MergedType);
|
||||
|
||||
// Load each byte and construct the whole value. Initial value to 0
|
||||
SDValue RetVal = DAG.getConstant(0, dl, MergedType);
|
||||
// LoadParamMemI8 loads into i16 register only
|
||||
SDVTList LoadVTs = DAG.getVTList(MVT::i16, MVT::Other, MVT::Glue);
|
||||
for (unsigned i = 0, n = ElementType.getSizeInBits() / 8; i < n; i++) {
|
||||
SDValue LoadOperands[] = {Chain, DAG.getConstant(1, dl, MVT::i32),
|
||||
DAG.getConstant(Offset + i, dl, MVT::i32),
|
||||
InGlue};
|
||||
// This will be selected to LoadParamMemI8
|
||||
SDValue LdVal =
|
||||
DAG.getMemIntrinsicNode(NVPTXISD::LoadParam, dl, LoadVTs, LoadOperands,
|
||||
MVT::i8, MachinePointerInfo(), Align(1));
|
||||
SDValue TmpLdVal = LdVal.getValue(0);
|
||||
Chain = LdVal.getValue(1);
|
||||
InGlue = LdVal.getValue(2);
|
||||
|
||||
TmpLdVal = DAG.getNode(NVPTXISD::ProxyReg, dl,
|
||||
TmpLdVal.getSimpleValueType(), TmpLdVal);
|
||||
TempProxyRegOps.push_back(TmpLdVal);
|
||||
|
||||
SDValue CMask = DAG.getConstant(255, dl, MergedType);
|
||||
SDValue CShift = DAG.getConstant(i * 8, dl, MVT::i32);
|
||||
// Need to extend the i16 register to the whole width.
|
||||
TmpLdVal = DAG.getNode(ISD::ZERO_EXTEND, dl, MergedType, TmpLdVal);
|
||||
// Mask off the high bits. Leave only the lower 8bits.
|
||||
// Do this because we are using loadparam.b8.
|
||||
TmpLdVal = DAG.getNode(ISD::AND, dl, MergedType, TmpLdVal, CMask);
|
||||
// Shift and merge
|
||||
TmpLdVal = DAG.getNode(ISD::SHL, dl, MergedType, TmpLdVal, CShift);
|
||||
RetVal = DAG.getNode(ISD::OR, dl, MergedType, RetVal, TmpLdVal);
|
||||
}
|
||||
if (ElementType != MergedType)
|
||||
RetVal = DAG.getNode(ISD::BITCAST, dl, ElementType, RetVal);
|
||||
|
||||
return RetVal;
|
||||
}
|
||||
|
||||
static bool shouldConvertToIndirectCall(const CallBase *CB,
|
||||
const GlobalAddressSDNode *Func) {
|
||||
if (!Func)
|
||||
@ -1483,10 +1378,8 @@ SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
|
||||
|
||||
SelectionDAG &DAG = CLI.DAG;
|
||||
SDLoc dl = CLI.DL;
|
||||
SmallVectorImpl<ISD::InputArg> &Ins = CLI.Ins;
|
||||
SDValue Chain = CLI.Chain;
|
||||
const SmallVectorImpl<ISD::InputArg> &Ins = CLI.Ins;
|
||||
SDValue Callee = CLI.Callee;
|
||||
bool &isTailCall = CLI.IsTailCall;
|
||||
ArgListTy &Args = CLI.getArgs();
|
||||
Type *RetTy = CLI.RetTy;
|
||||
const CallBase *CB = CLI.CB;
|
||||
@ -1496,6 +1389,36 @@ SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
|
||||
return DAG.getConstant(I, dl, MVT::i32);
|
||||
};
|
||||
|
||||
const unsigned UniqueCallSite = GlobalUniqueCallSite++;
|
||||
const SDValue CallChain = CLI.Chain;
|
||||
const SDValue StartChain =
|
||||
DAG.getCALLSEQ_START(CallChain, UniqueCallSite, 0, dl);
|
||||
SDValue DeclareGlue = StartChain.getValue(1);
|
||||
|
||||
SmallVector<SDValue, 16> CallPrereqs{StartChain};
|
||||
|
||||
const auto MakeDeclareScalarParam = [&](SDValue Symbol, unsigned Size) {
|
||||
// PTX ABI requires integral types to be at least 32 bits in size. FP16 is
|
||||
// loaded/stored using i16, so it's handled here as well.
|
||||
const unsigned SizeBits = promoteScalarArgumentSize(Size * 8);
|
||||
SDValue Declare =
|
||||
DAG.getNode(NVPTXISD::DeclareScalarParam, dl, {MVT::Other, MVT::Glue},
|
||||
{StartChain, Symbol, GetI32(SizeBits), DeclareGlue});
|
||||
CallPrereqs.push_back(Declare);
|
||||
DeclareGlue = Declare.getValue(1);
|
||||
return Declare;
|
||||
};
|
||||
|
||||
const auto MakeDeclareArrayParam = [&](SDValue Symbol, Align Align,
|
||||
unsigned Size) {
|
||||
SDValue Declare = DAG.getNode(
|
||||
NVPTXISD::DeclareArrayParam, dl, {MVT::Other, MVT::Glue},
|
||||
{StartChain, Symbol, GetI32(Align.value()), GetI32(Size), DeclareGlue});
|
||||
CallPrereqs.push_back(Declare);
|
||||
DeclareGlue = Declare.getValue(1);
|
||||
return Declare;
|
||||
};
|
||||
|
||||
// Variadic arguments.
|
||||
//
|
||||
// Normally, for each argument, we declare a param scalar or a param
|
||||
@ -1511,15 +1434,17 @@ SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
|
||||
//
|
||||
// After all vararg is processed, 'VAOffset' holds the size of the
|
||||
// vararg byte array.
|
||||
assert((CLI.IsVarArg || CLI.Args.size() == CLI.NumFixedArgs) &&
|
||||
"Non-VarArg function with extra arguments");
|
||||
|
||||
SDValue VADeclareParam; // vararg byte array
|
||||
const unsigned FirstVAArg = CLI.NumFixedArgs; // position of first variadic
|
||||
unsigned VAOffset = 0; // current offset in the param array
|
||||
unsigned VAOffset = 0; // current offset in the param array
|
||||
|
||||
const unsigned UniqueCallSite = GlobalUniqueCallSite++;
|
||||
SDValue TempChain = Chain;
|
||||
Chain = DAG.getCALLSEQ_START(Chain, UniqueCallSite, 0, dl);
|
||||
SDValue InGlue = Chain.getValue(1);
|
||||
const SDValue VADeclareParam =
|
||||
CLI.Args.size() > FirstVAArg
|
||||
? MakeDeclareArrayParam(getCallParamSymbol(DAG, FirstVAArg, MVT::i32),
|
||||
Align(STI.getMaxRequiredAlignment()), 0)
|
||||
: SDValue();
|
||||
|
||||
// Args.size() and Outs.size() need not match.
|
||||
// Outs.size() will be larger
|
||||
@ -1580,43 +1505,19 @@ SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
|
||||
assert((!IsByVal || TypeSize == ArgOuts[0].Flags.getByValSize()) &&
|
||||
"type size mismatch");
|
||||
|
||||
const std::optional<SDValue> ArgDeclare = [&]() -> std::optional<SDValue> {
|
||||
if (IsVAArg) {
|
||||
if (ArgI == FirstVAArg) {
|
||||
VADeclareParam = DAG.getNode(
|
||||
NVPTXISD::DeclareArrayParam, dl, {MVT::Other, MVT::Glue},
|
||||
{Chain, ParamSymbol, GetI32(STI.getMaxRequiredAlignment()),
|
||||
GetI32(0), InGlue});
|
||||
return VADeclareParam;
|
||||
}
|
||||
return std::nullopt;
|
||||
}
|
||||
if (IsByVal || shouldPassAsArray(Arg.Ty)) {
|
||||
// declare .param .align <align> .b8 .param<n>[<size>];
|
||||
return DAG.getNode(NVPTXISD::DeclareArrayParam, dl,
|
||||
{MVT::Other, MVT::Glue},
|
||||
{Chain, ParamSymbol, GetI32(ArgAlign.value()),
|
||||
GetI32(TypeSize), InGlue});
|
||||
}
|
||||
const SDValue ArgDeclare = [&]() {
|
||||
if (IsVAArg)
|
||||
return VADeclareParam;
|
||||
|
||||
if (IsByVal || shouldPassAsArray(Arg.Ty))
|
||||
return MakeDeclareArrayParam(ParamSymbol, ArgAlign, TypeSize);
|
||||
|
||||
assert(ArgOuts.size() == 1 && "We must pass only one value as non-array");
|
||||
// declare .param .b<size> .param<n>;
|
||||
assert((ArgOuts[0].VT.isInteger() || ArgOuts[0].VT.isFloatingPoint()) &&
|
||||
"Only int and float types are supported as non-array arguments");
|
||||
|
||||
// PTX ABI requires integral types to be at least 32 bits in
|
||||
// size. FP16 is loaded/stored using i16, so it's handled
|
||||
// here as well.
|
||||
const unsigned PromotedSize =
|
||||
(ArgOuts[0].VT.isInteger() || ArgOuts[0].VT.isFloatingPoint())
|
||||
? promoteScalarArgumentSize(TypeSize * 8)
|
||||
: TypeSize * 8;
|
||||
|
||||
return DAG.getNode(NVPTXISD::DeclareScalarParam, dl,
|
||||
{MVT::Other, MVT::Glue},
|
||||
{Chain, ParamSymbol, GetI32(PromotedSize), InGlue});
|
||||
return MakeDeclareScalarParam(ParamSymbol, TypeSize);
|
||||
}();
|
||||
if (ArgDeclare) {
|
||||
Chain = ArgDeclare->getValue(0);
|
||||
InGlue = ArgDeclare->getValue(1);
|
||||
}
|
||||
|
||||
// PTX Interoperability Guide 3.3(A): [Integer] Values shorter
|
||||
// than 32-bits are sign extended or zero extended, depending on
|
||||
@ -1626,36 +1527,25 @@ SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
|
||||
Arg.Ty->isIntegerTy() && DL.getTypeAllocSizeInBits(Arg.Ty) < 32;
|
||||
|
||||
const auto GetStoredValue = [&](const unsigned I, EVT EltVT,
|
||||
const Align PartAlign) {
|
||||
SDValue StVal;
|
||||
const MaybeAlign PartAlign) {
|
||||
if (IsByVal) {
|
||||
SDValue Ptr = ArgOutVals[0];
|
||||
auto MPI = refinePtrAS(Ptr, DAG, DL, *this);
|
||||
SDValue SrcAddr =
|
||||
DAG.getObjectPtrOffset(dl, Ptr, TypeSize::getFixed(Offsets[I]));
|
||||
|
||||
StVal = DAG.getLoad(EltVT, dl, TempChain, SrcAddr, MPI, PartAlign);
|
||||
} else {
|
||||
StVal = ArgOutVals[I];
|
||||
|
||||
auto PromotedVT = promoteScalarIntegerPTX(StVal.getValueType());
|
||||
if (PromotedVT != StVal.getValueType()) {
|
||||
StVal = DAG.getNode(getExtOpcode(ArgOuts[I].Flags), dl, PromotedVT,
|
||||
StVal);
|
||||
}
|
||||
return DAG.getLoad(EltVT, dl, CallChain, SrcAddr, MPI, PartAlign);
|
||||
}
|
||||
SDValue StVal = ArgOutVals[I];
|
||||
assert(promoteScalarIntegerPTX(StVal.getValueType()) ==
|
||||
StVal.getValueType() &&
|
||||
"OutVal type should always be legal");
|
||||
|
||||
if (ExtendIntegerParam) {
|
||||
assert(VTs.size() == 1 && "Scalar can't have multiple parts.");
|
||||
// zext/sext to i32
|
||||
StVal =
|
||||
DAG.getNode(getExtOpcode(ArgOuts[I].Flags), dl, MVT::i32, StVal);
|
||||
} else if (EltVT.getSizeInBits() < 16) {
|
||||
// Use 16-bit registers for small stores as it's the
|
||||
// smallest general purpose register size supported by NVPTX.
|
||||
StVal = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i16, StVal);
|
||||
}
|
||||
return StVal;
|
||||
const EVT VTI = promoteScalarIntegerPTX(VTs[I]);
|
||||
const EVT StoreVT =
|
||||
ExtendIntegerParam ? MVT::i32 : (VTI == MVT::i1 ? MVT::i8 : VTI);
|
||||
|
||||
return correctParamType(StVal, StoreVT, ArgOuts[I].Flags, DAG, dl);
|
||||
};
|
||||
|
||||
const auto VectorInfo =
|
||||
@ -1664,23 +1554,7 @@ SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
|
||||
unsigned J = 0;
|
||||
for (const unsigned NumElts : VectorInfo) {
|
||||
const int CurOffset = Offsets[J];
|
||||
EVT EltVT = promoteScalarIntegerPTX(VTs[J]);
|
||||
const Align PartAlign = commonAlignment(ArgAlign, CurOffset);
|
||||
|
||||
// If we have a PVF_SCALAR entry, it may not be sufficiently aligned for a
|
||||
// scalar store. In such cases, fall back to byte stores.
|
||||
if (NumElts == 1 && !IsVAArg && PartAlign < DAG.getEVTAlign(EltVT)) {
|
||||
|
||||
SDValue StVal = GetStoredValue(J, EltVT, PartAlign);
|
||||
Chain = LowerUnalignedStoreParam(DAG, Chain,
|
||||
CurOffset + (IsByVal ? VAOffset : 0),
|
||||
EltVT, StVal, InGlue, ArgI, dl);
|
||||
|
||||
// LowerUnalignedStoreParam took care of inserting the necessary nodes
|
||||
// into the SDAG, so just move on to the next element.
|
||||
J++;
|
||||
continue;
|
||||
}
|
||||
const EVT EltVT = promoteScalarIntegerPTX(VTs[J]);
|
||||
|
||||
if (IsVAArg && !IsByVal)
|
||||
// Align each part of the variadic argument to their type.
|
||||
@ -1688,44 +1562,45 @@ SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
|
||||
|
||||
assert((IsVAArg || VAOffset == 0) &&
|
||||
"VAOffset must be 0 for non-VA args");
|
||||
SmallVector<SDValue, 6> StoreOperands{
|
||||
Chain, GetI32(IsVAArg ? FirstVAArg : ArgI),
|
||||
GetI32(VAOffset + ((IsVAArg && !IsByVal) ? 0 : CurOffset))};
|
||||
|
||||
// Record the values to store.
|
||||
for (const unsigned K : llvm::seq(NumElts))
|
||||
StoreOperands.push_back(GetStoredValue(J + K, EltVT, PartAlign));
|
||||
StoreOperands.push_back(InGlue);
|
||||
const unsigned Offset =
|
||||
(VAOffset + ((IsVAArg && !IsByVal) ? 0 : CurOffset));
|
||||
SDValue Ptr =
|
||||
DAG.getObjectPtrOffset(dl, ParamSymbol, TypeSize::getFixed(Offset));
|
||||
|
||||
NVPTXISD::NodeType Op;
|
||||
switch (NumElts) {
|
||||
case 1:
|
||||
Op = NVPTXISD::StoreParam;
|
||||
break;
|
||||
case 2:
|
||||
Op = NVPTXISD::StoreParamV2;
|
||||
break;
|
||||
case 4:
|
||||
Op = NVPTXISD::StoreParamV4;
|
||||
break;
|
||||
default:
|
||||
llvm_unreachable("Invalid vector info.");
|
||||
const MaybeAlign CurrentAlign = ExtendIntegerParam
|
||||
? MaybeAlign(std::nullopt)
|
||||
: commonAlignment(ArgAlign, Offset);
|
||||
|
||||
SDValue Val;
|
||||
if (NumElts == 1) {
|
||||
Val = GetStoredValue(J, EltVT, CurrentAlign);
|
||||
} else {
|
||||
SmallVector<SDValue, 8> StoreVals;
|
||||
for (const unsigned K : llvm::seq(NumElts)) {
|
||||
SDValue ValJ = GetStoredValue(J + K, EltVT, CurrentAlign);
|
||||
if (ValJ.getValueType().isVector())
|
||||
DAG.ExtractVectorElements(ValJ, StoreVals);
|
||||
else
|
||||
StoreVals.push_back(ValJ);
|
||||
}
|
||||
|
||||
EVT VT = EVT::getVectorVT(
|
||||
*DAG.getContext(), StoreVals[0].getValueType(), StoreVals.size());
|
||||
Val = DAG.getBuildVector(VT, dl, StoreVals);
|
||||
}
|
||||
// Adjust type of the store op if we've extended the scalar
|
||||
// return value.
|
||||
EVT TheStoreType = ExtendIntegerParam ? MVT::i32 : EltVT;
|
||||
|
||||
Chain = DAG.getMemIntrinsicNode(
|
||||
Op, dl, DAG.getVTList(MVT::Other, MVT::Glue), StoreOperands,
|
||||
TheStoreType, MachinePointerInfo(), PartAlign,
|
||||
MachineMemOperand::MOStore);
|
||||
InGlue = Chain.getValue(1);
|
||||
SDValue StoreParam =
|
||||
DAG.getStore(ArgDeclare, dl, Val, Ptr,
|
||||
MachinePointerInfo(ADDRESS_SPACE_PARAM), CurrentAlign);
|
||||
CallPrereqs.push_back(StoreParam);
|
||||
|
||||
// TODO: We may need to support vector types that can be passed
|
||||
// as scalars in variadic arguments.
|
||||
if (IsVAArg && !IsByVal) {
|
||||
assert(NumElts == 1 &&
|
||||
"Vectorization is expected to be disabled for variadics.");
|
||||
const EVT TheStoreType = ExtendIntegerParam ? MVT::i32 : EltVT;
|
||||
VAOffset +=
|
||||
DL.getTypeAllocSize(TheStoreType.getTypeForEVT(*DAG.getContext()));
|
||||
}
|
||||
@ -1736,33 +1611,21 @@ SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
|
||||
VAOffset += TypeSize;
|
||||
}
|
||||
|
||||
GlobalAddressSDNode *Func = dyn_cast<GlobalAddressSDNode>(Callee.getNode());
|
||||
|
||||
// Handle Result
|
||||
if (!Ins.empty()) {
|
||||
const SDValue RetDeclare = [&]() {
|
||||
const SDValue RetSymbol = DAG.getExternalSymbol("retval0", MVT::i32);
|
||||
const unsigned ResultSize = DL.getTypeAllocSizeInBits(RetTy);
|
||||
if (shouldPassAsArray(RetTy)) {
|
||||
const Align RetAlign = getArgumentAlignment(CB, RetTy, 0, DL);
|
||||
return DAG.getNode(NVPTXISD::DeclareArrayParam, dl,
|
||||
{MVT::Other, MVT::Glue},
|
||||
{Chain, RetSymbol, GetI32(RetAlign.value()),
|
||||
GetI32(ResultSize / 8), InGlue});
|
||||
}
|
||||
const auto PromotedResultSize = promoteScalarArgumentSize(ResultSize);
|
||||
return DAG.getNode(
|
||||
NVPTXISD::DeclareScalarParam, dl, {MVT::Other, MVT::Glue},
|
||||
{Chain, RetSymbol, GetI32(PromotedResultSize), InGlue});
|
||||
}();
|
||||
Chain = RetDeclare.getValue(0);
|
||||
InGlue = RetDeclare.getValue(1);
|
||||
const SDValue RetSymbol = DAG.getExternalSymbol("retval0", MVT::i32);
|
||||
const unsigned ResultSize = DL.getTypeAllocSize(RetTy);
|
||||
if (shouldPassAsArray(RetTy)) {
|
||||
const Align RetAlign = getArgumentAlignment(CB, RetTy, 0, DL);
|
||||
MakeDeclareArrayParam(RetSymbol, RetAlign, ResultSize);
|
||||
} else {
|
||||
MakeDeclareScalarParam(RetSymbol, ResultSize);
|
||||
}
|
||||
}
|
||||
|
||||
const bool HasVAArgs = CLI.IsVarArg && (CLI.Args.size() > CLI.NumFixedArgs);
|
||||
// Set the size of the vararg param byte array if the callee is a variadic
|
||||
// function and the variadic part is not empty.
|
||||
if (HasVAArgs) {
|
||||
if (VADeclareParam) {
|
||||
SDValue DeclareParamOps[] = {VADeclareParam.getOperand(0),
|
||||
VADeclareParam.getOperand(1),
|
||||
VADeclareParam.getOperand(2), GetI32(VAOffset),
|
||||
@ -1771,6 +1634,7 @@ SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
|
||||
VADeclareParam->getVTList(), DeclareParamOps);
|
||||
}
|
||||
|
||||
const auto *Func = dyn_cast<GlobalAddressSDNode>(Callee.getNode());
|
||||
// If the type of the callsite does not match that of the function, convert
|
||||
// the callsite to an indirect call.
|
||||
const bool ConvertToIndirectCall = shouldConvertToIndirectCall(CB, Func);
|
||||
@ -1800,15 +1664,16 @@ SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
|
||||
// instruction.
|
||||
// The prototype is embedded in a string and put as the operand for a
|
||||
// CallPrototype SDNode which will print out to the value of the string.
|
||||
const bool HasVAArgs = CLI.IsVarArg && (CLI.Args.size() > CLI.NumFixedArgs);
|
||||
std::string Proto =
|
||||
getPrototype(DL, RetTy, Args, CLI.Outs,
|
||||
HasVAArgs ? std::optional(FirstVAArg) : std::nullopt, *CB,
|
||||
UniqueCallSite);
|
||||
const char *ProtoStr = nvTM->getStrPool().save(Proto).data();
|
||||
Chain = DAG.getNode(
|
||||
NVPTXISD::CallPrototype, dl, {MVT::Other, MVT::Glue},
|
||||
{Chain, DAG.getTargetExternalSymbol(ProtoStr, MVT::i32), InGlue});
|
||||
InGlue = Chain.getValue(1);
|
||||
const SDValue PrototypeDeclare = DAG.getNode(
|
||||
NVPTXISD::CallPrototype, dl, MVT::Other,
|
||||
{StartChain, DAG.getTargetExternalSymbol(ProtoStr, MVT::i32)});
|
||||
CallPrereqs.push_back(PrototypeDeclare);
|
||||
}
|
||||
|
||||
if (ConvertToIndirectCall) {
|
||||
@ -1826,24 +1691,15 @@ SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
|
||||
const unsigned NumArgs =
|
||||
std::min<unsigned>(CLI.NumFixedArgs + 1, Args.size());
|
||||
/// CALL(Chain, IsConvergent, IsIndirectCall/IsUniform, NumReturns,
|
||||
/// NumParams, Callee, Proto, InGlue)
|
||||
Chain = DAG.getNode(NVPTXISD::CALL, dl, {MVT::Other, MVT::Glue},
|
||||
{Chain, GetI32(CLI.IsConvergent), GetI32(IsIndirectCall),
|
||||
GetI32(Ins.empty() ? 0 : 1), GetI32(NumArgs), Callee,
|
||||
GetI32(Proto), InGlue});
|
||||
InGlue = Chain.getValue(1);
|
||||
/// NumParams, Callee, Proto)
|
||||
const SDValue CallToken = DAG.getTokenFactor(dl, CallPrereqs);
|
||||
const SDValue Call = DAG.getNode(
|
||||
NVPTXISD::CALL, dl, MVT::Other,
|
||||
{CallToken, GetI32(CLI.IsConvergent), GetI32(IsIndirectCall),
|
||||
GetI32(Ins.empty() ? 0 : 1), GetI32(NumArgs), Callee, GetI32(Proto)});
|
||||
|
||||
SmallVector<SDValue, 16> LoadChains{Call};
|
||||
SmallVector<SDValue, 16> ProxyRegOps;
|
||||
// An item of the vector is filled if the element does not need a ProxyReg
|
||||
// operation on it and should be added to InVals as is. ProxyRegOps and
|
||||
// ProxyRegTruncates contain empty/none items at the same index.
|
||||
SmallVector<SDValue, 16> RetElts;
|
||||
// A temporary ProxyReg operations inserted in `LowerUnalignedLoadRetParam()`
|
||||
// to use the values of `LoadParam`s and to be replaced later then
|
||||
// `CALLSEQ_END` is added.
|
||||
SmallVector<SDValue, 16> TempProxyRegOps;
|
||||
|
||||
// Generate loads from param memory/moves from registers for result
|
||||
if (!Ins.empty()) {
|
||||
SmallVector<EVT, 16> VTs;
|
||||
SmallVector<uint64_t, 16> Offsets;
|
||||
@ -1860,104 +1716,65 @@ SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
|
||||
|
||||
const auto VectorInfo = VectorizePTXValueVTs(VTs, Offsets, RetAlign);
|
||||
unsigned I = 0;
|
||||
for (const unsigned VectorizedSize : VectorInfo) {
|
||||
EVT TheLoadType = promoteScalarIntegerPTX(VTs[I]);
|
||||
EVT EltType = Ins[I].VT;
|
||||
const Align EltAlign = commonAlignment(RetAlign, Offsets[I]);
|
||||
for (const unsigned NumElts : VectorInfo) {
|
||||
const MaybeAlign CurrentAlign =
|
||||
ExtendIntegerRetVal ? MaybeAlign(std::nullopt)
|
||||
: commonAlignment(RetAlign, Offsets[I]);
|
||||
|
||||
if (TheLoadType != VTs[I])
|
||||
EltType = TheLoadType;
|
||||
const EVT VTI = promoteScalarIntegerPTX(VTs[I]);
|
||||
const EVT LoadVT =
|
||||
ExtendIntegerRetVal ? MVT::i32 : (VTI == MVT::i1 ? MVT::i8 : VTI);
|
||||
|
||||
if (ExtendIntegerRetVal) {
|
||||
TheLoadType = MVT::i32;
|
||||
EltType = MVT::i32;
|
||||
} else if (TheLoadType.getSizeInBits() < 16) {
|
||||
EltType = MVT::i16;
|
||||
}
|
||||
const unsigned PackingAmt =
|
||||
LoadVT.isVector() ? LoadVT.getVectorNumElements() : 1;
|
||||
|
||||
// If we have a PVF_SCALAR entry, it may not be sufficiently aligned for a
|
||||
// scalar load. In such cases, fall back to byte loads.
|
||||
if (VectorizedSize == 1 && RetTy->isAggregateType() &&
|
||||
EltAlign < DAG.getEVTAlign(TheLoadType)) {
|
||||
SDValue Ret = LowerUnalignedLoadRetParam(
|
||||
DAG, Chain, Offsets[I], TheLoadType, InGlue, TempProxyRegOps, dl);
|
||||
ProxyRegOps.push_back(SDValue());
|
||||
RetElts.resize(I);
|
||||
RetElts.push_back(Ret);
|
||||
const EVT VecVT = NumElts == 1 ? LoadVT
|
||||
: EVT::getVectorVT(*DAG.getContext(),
|
||||
LoadVT.getScalarType(),
|
||||
NumElts * PackingAmt);
|
||||
|
||||
I++;
|
||||
continue;
|
||||
}
|
||||
const SDValue RetSymbol = DAG.getExternalSymbol("retval0", MVT::i32);
|
||||
SDValue Ptr =
|
||||
DAG.getObjectPtrOffset(dl, RetSymbol, TypeSize::getFixed(Offsets[I]));
|
||||
|
||||
SmallVector<EVT, 6> LoadVTs(VectorizedSize, EltType);
|
||||
LoadVTs.append({MVT::Other, MVT::Glue});
|
||||
SDValue R =
|
||||
DAG.getLoad(VecVT, dl, Call, Ptr,
|
||||
MachinePointerInfo(ADDRESS_SPACE_PARAM), CurrentAlign);
|
||||
|
||||
NVPTXISD::NodeType Op;
|
||||
switch (VectorizedSize) {
|
||||
case 1:
|
||||
Op = NVPTXISD::LoadParam;
|
||||
break;
|
||||
case 2:
|
||||
Op = NVPTXISD::LoadParamV2;
|
||||
break;
|
||||
case 4:
|
||||
Op = NVPTXISD::LoadParamV4;
|
||||
break;
|
||||
default:
|
||||
llvm_unreachable("Invalid vector info.");
|
||||
}
|
||||
LoadChains.push_back(R.getValue(1));
|
||||
|
||||
SDValue LoadOperands[] = {Chain, GetI32(1), GetI32(Offsets[I]), InGlue};
|
||||
SDValue RetVal = DAG.getMemIntrinsicNode(
|
||||
Op, dl, DAG.getVTList(LoadVTs), LoadOperands, TheLoadType,
|
||||
MachinePointerInfo(), EltAlign, MachineMemOperand::MOLoad);
|
||||
|
||||
for (const unsigned J : llvm::seq(VectorizedSize)) {
|
||||
ProxyRegOps.push_back(RetVal.getValue(J));
|
||||
}
|
||||
|
||||
Chain = RetVal.getValue(VectorizedSize);
|
||||
InGlue = RetVal.getValue(VectorizedSize + 1);
|
||||
|
||||
I += VectorizedSize;
|
||||
if (NumElts == 1)
|
||||
ProxyRegOps.push_back(R);
|
||||
else
|
||||
for (const unsigned J : llvm::seq(NumElts)) {
|
||||
SDValue Elt = DAG.getNode(
|
||||
LoadVT.isVector() ? ISD::EXTRACT_SUBVECTOR
|
||||
: ISD::EXTRACT_VECTOR_ELT,
|
||||
dl, LoadVT, R, DAG.getVectorIdxConstant(J * PackingAmt, dl));
|
||||
ProxyRegOps.push_back(Elt);
|
||||
}
|
||||
I += NumElts;
|
||||
}
|
||||
}
|
||||
|
||||
Chain =
|
||||
DAG.getCALLSEQ_END(Chain, UniqueCallSite, UniqueCallSite + 1, InGlue, dl);
|
||||
InGlue = Chain.getValue(1);
|
||||
const SDValue EndToken = DAG.getTokenFactor(dl, LoadChains);
|
||||
const SDValue CallEnd = DAG.getCALLSEQ_END(EndToken, UniqueCallSite,
|
||||
UniqueCallSite + 1, SDValue(), dl);
|
||||
|
||||
// Append ProxyReg instructions to the chain to make sure that `callseq_end`
|
||||
// will not get lost. Otherwise, during libcalls expansion, the nodes can become
|
||||
// dangling.
|
||||
for (const unsigned I : llvm::seq(ProxyRegOps.size())) {
|
||||
if (I < RetElts.size() && RetElts[I]) {
|
||||
InVals.push_back(RetElts[I]);
|
||||
continue;
|
||||
}
|
||||
|
||||
SDValue Ret =
|
||||
DAG.getNode(NVPTXISD::ProxyReg, dl, ProxyRegOps[I].getSimpleValueType(),
|
||||
{Chain, ProxyRegOps[I]});
|
||||
|
||||
const EVT ExpectedVT = Ins[I].VT;
|
||||
if (!Ret.getValueType().bitsEq(ExpectedVT)) {
|
||||
Ret = DAG.getNode(ISD::TRUNCATE, dl, ExpectedVT, Ret);
|
||||
}
|
||||
for (const auto [I, Reg] : llvm::enumerate(ProxyRegOps)) {
|
||||
SDValue Proxy =
|
||||
DAG.getNode(NVPTXISD::ProxyReg, dl, Reg.getValueType(), {CallEnd, Reg});
|
||||
SDValue Ret = correctParamType(Proxy, Ins[I].VT, Ins[I].Flags, DAG, dl);
|
||||
InVals.push_back(Ret);
|
||||
}
|
||||
|
||||
for (SDValue &T : TempProxyRegOps) {
|
||||
SDValue Repl = DAG.getNode(NVPTXISD::ProxyReg, dl, T.getSimpleValueType(),
|
||||
{Chain, T.getOperand(0)});
|
||||
DAG.ReplaceAllUsesWith(T, Repl);
|
||||
DAG.RemoveDeadNode(T.getNode());
|
||||
}
|
||||
|
||||
// set isTailCall to false for now, until we figure out how to express
|
||||
// set IsTailCall to false for now, until we figure out how to express
|
||||
// tail call optimization in PTX
|
||||
isTailCall = false;
|
||||
return Chain;
|
||||
CLI.IsTailCall = false;
|
||||
return CallEnd;
|
||||
}
|
||||
|
||||
SDValue NVPTXTargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op,
|
||||
@ -5117,10 +4934,6 @@ combineUnpackingMovIntoLoad(SDNode *N, TargetLowering::DAGCombinerInfo &DCI) {
|
||||
Operands.push_back(DCI.DAG.getIntPtrConstant(
|
||||
cast<LoadSDNode>(LD)->getExtensionType(), DL));
|
||||
break;
|
||||
case NVPTXISD::LoadParamV2:
|
||||
OldNumOutputs = 2;
|
||||
Opcode = NVPTXISD::LoadParamV4;
|
||||
break;
|
||||
case NVPTXISD::LoadV2:
|
||||
OldNumOutputs = 2;
|
||||
Opcode = NVPTXISD::LoadV4;
|
||||
@ -5201,12 +5014,6 @@ static SDValue combinePackingMovIntoStore(SDNode *N,
|
||||
MemVT = ST->getMemoryVT();
|
||||
Opcode = NVPTXISD::StoreV2;
|
||||
break;
|
||||
case NVPTXISD::StoreParam:
|
||||
Opcode = NVPTXISD::StoreParamV2;
|
||||
break;
|
||||
case NVPTXISD::StoreParamV2:
|
||||
Opcode = NVPTXISD::StoreParamV4;
|
||||
break;
|
||||
case NVPTXISD::StoreV2:
|
||||
MemVT = ST->getMemoryVT();
|
||||
Opcode = NVPTXISD::StoreV4;
|
||||
@ -5218,7 +5025,6 @@ static SDValue combinePackingMovIntoStore(SDNode *N,
|
||||
return SDValue();
|
||||
Opcode = NVPTXISD::StoreV8;
|
||||
break;
|
||||
case NVPTXISD::StoreParamV4:
|
||||
case NVPTXISD::StoreV8:
|
||||
// PTX doesn't support the next doubling of operands
|
||||
return SDValue();
|
||||
@ -5263,30 +5069,11 @@ static SDValue combinePackingMovIntoStore(SDNode *N,
|
||||
MemVT, ST->getMemOperand());
|
||||
}
|
||||
|
||||
static SDValue PerformStoreCombineHelper(SDNode *N,
|
||||
TargetLowering::DAGCombinerInfo &DCI,
|
||||
unsigned Front, unsigned Back) {
|
||||
if (all_of(N->ops().drop_front(Front).drop_back(Back),
|
||||
[](const SDUse &U) { return U.get()->isUndef(); }))
|
||||
// Operand 0 is the previous value in the chain. Cannot return EntryToken
|
||||
// as the previous value will become unused and eliminated later.
|
||||
return N->getOperand(0);
|
||||
|
||||
return combinePackingMovIntoStore(N, DCI, Front, Back);
|
||||
}
|
||||
|
||||
static SDValue PerformStoreCombine(SDNode *N,
|
||||
TargetLowering::DAGCombinerInfo &DCI) {
|
||||
return combinePackingMovIntoStore(N, DCI, 1, 2);
|
||||
}
|
||||
|
||||
static SDValue PerformStoreParamCombine(SDNode *N,
|
||||
TargetLowering::DAGCombinerInfo &DCI) {
|
||||
// Operands from the 3rd to the 2nd last one are the values to be stored.
|
||||
// {Chain, ArgID, Offset, Val, Glue}
|
||||
return PerformStoreCombineHelper(N, DCI, 3, 1);
|
||||
}
|
||||
|
||||
/// PerformADDCombine - Target-specific dag combine xforms for ISD::ADD.
|
||||
///
|
||||
static SDValue PerformADDCombine(SDNode *N,
|
||||
@ -5942,6 +5729,86 @@ static SDValue combinePRMT(SDNode *N, TargetLowering::DAGCombinerInfo &DCI,
|
||||
N->getConstantOperandAPInt(2),
|
||||
N->getConstantOperandVal(3)),
|
||||
SDLoc(N), N->getValueType(0));
|
||||
return SDValue();
|
||||
}
|
||||
|
||||
// During call lowering we wrap the return values in a ProxyReg node which
|
||||
// depend on the chain value produced by the completed call. This ensures that
|
||||
// the full call is emitted in cases where libcalls are used to legalize
|
||||
// operations. To improve the functioning of other DAG combines we pull all
|
||||
// operations we can through one of these nodes, ensuring that the ProxyReg
|
||||
// directly wraps a load. That is:
|
||||
//
|
||||
// (ProxyReg (zext (load retval0))) => (zext (ProxyReg (load retval0)))
|
||||
//
|
||||
static SDValue sinkProxyReg(SDValue R, SDValue Chain,
|
||||
TargetLowering::DAGCombinerInfo &DCI) {
|
||||
switch (R.getOpcode()) {
|
||||
case ISD::TRUNCATE:
|
||||
case ISD::ANY_EXTEND:
|
||||
case ISD::SIGN_EXTEND:
|
||||
case ISD::ZERO_EXTEND:
|
||||
case ISD::BITCAST: {
|
||||
if (SDValue V = sinkProxyReg(R.getOperand(0), Chain, DCI))
|
||||
return DCI.DAG.getNode(R.getOpcode(), SDLoc(R), R.getValueType(), V);
|
||||
return SDValue();
|
||||
}
|
||||
case ISD::SHL:
|
||||
case ISD::SRL:
|
||||
case ISD::SRA:
|
||||
case ISD::OR: {
|
||||
if (SDValue A = sinkProxyReg(R.getOperand(0), Chain, DCI))
|
||||
if (SDValue B = sinkProxyReg(R.getOperand(1), Chain, DCI))
|
||||
return DCI.DAG.getNode(R.getOpcode(), SDLoc(R), R.getValueType(), A, B);
|
||||
return SDValue();
|
||||
}
|
||||
case ISD::Constant:
|
||||
return R;
|
||||
case ISD::LOAD:
|
||||
case NVPTXISD::LoadV2:
|
||||
case NVPTXISD::LoadV4: {
|
||||
return DCI.DAG.getNode(NVPTXISD::ProxyReg, SDLoc(R), R.getValueType(),
|
||||
{Chain, R});
|
||||
}
|
||||
case ISD::BUILD_VECTOR: {
|
||||
if (DCI.isBeforeLegalize())
|
||||
return SDValue();
|
||||
|
||||
SmallVector<SDValue, 16> Ops;
|
||||
for (auto &Op : R->ops()) {
|
||||
SDValue V = sinkProxyReg(Op, Chain, DCI);
|
||||
if (!V)
|
||||
return SDValue();
|
||||
Ops.push_back(V);
|
||||
}
|
||||
return DCI.DAG.getNode(ISD::BUILD_VECTOR, SDLoc(R), R.getValueType(), Ops);
|
||||
}
|
||||
case ISD::EXTRACT_VECTOR_ELT: {
|
||||
if (DCI.isBeforeLegalize())
|
||||
return SDValue();
|
||||
|
||||
if (SDValue V = sinkProxyReg(R.getOperand(0), Chain, DCI))
|
||||
return DCI.DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(R),
|
||||
R.getValueType(), V, R.getOperand(1));
|
||||
return SDValue();
|
||||
}
|
||||
default:
|
||||
return SDValue();
|
||||
}
|
||||
}
|
||||
|
||||
static SDValue combineProxyReg(SDNode *N,
|
||||
TargetLowering::DAGCombinerInfo &DCI) {
|
||||
|
||||
SDValue Chain = N->getOperand(0);
|
||||
SDValue Reg = N->getOperand(1);
|
||||
|
||||
// If the ProxyReg is not wrapping a load, try to pull the operations through
|
||||
// the ProxyReg.
|
||||
if (Reg.getOpcode() != ISD::LOAD) {
|
||||
if (SDValue V = sinkProxyReg(Reg, Chain, DCI))
|
||||
return V;
|
||||
}
|
||||
|
||||
return SDValue();
|
||||
}
|
||||
@ -5965,7 +5832,6 @@ SDValue NVPTXTargetLowering::PerformDAGCombine(SDNode *N,
|
||||
case ISD::FADD:
|
||||
return PerformFADDCombine(N, DCI, OptLevel);
|
||||
case ISD::LOAD:
|
||||
case NVPTXISD::LoadParamV2:
|
||||
case NVPTXISD::LoadV2:
|
||||
case NVPTXISD::LoadV4:
|
||||
return combineUnpackingMovIntoLoad(N, DCI);
|
||||
@ -5973,6 +5839,8 @@ SDValue NVPTXTargetLowering::PerformDAGCombine(SDNode *N,
|
||||
return PerformMULCombine(N, DCI, OptLevel);
|
||||
case NVPTXISD::PRMT:
|
||||
return combinePRMT(N, DCI, OptLevel);
|
||||
case NVPTXISD::ProxyReg:
|
||||
return combineProxyReg(N, DCI);
|
||||
case ISD::SETCC:
|
||||
return PerformSETCCCombine(N, DCI, STI.getSmVersion());
|
||||
case ISD::SHL:
|
||||
@ -5980,10 +5848,6 @@ SDValue NVPTXTargetLowering::PerformDAGCombine(SDNode *N,
|
||||
case ISD::SREM:
|
||||
case ISD::UREM:
|
||||
return PerformREMCombine(N, DCI, OptLevel);
|
||||
case NVPTXISD::StoreParam:
|
||||
case NVPTXISD::StoreParamV2:
|
||||
case NVPTXISD::StoreParamV4:
|
||||
return PerformStoreParamCombine(N, DCI);
|
||||
case ISD::STORE:
|
||||
case NVPTXISD::StoreV2:
|
||||
case NVPTXISD::StoreV4:
|
||||
@ -6332,6 +6196,22 @@ static void ReplaceCopyFromReg_128(SDNode *N, SelectionDAG &DAG,
|
||||
Results.push_back(NewValue.getValue(3));
|
||||
}
|
||||
|
||||
static void replaceProxyReg(SDNode *N, SelectionDAG &DAG,
|
||||
const TargetLowering &TLI,
|
||||
SmallVectorImpl<SDValue> &Results) {
|
||||
SDValue Chain = N->getOperand(0);
|
||||
SDValue Reg = N->getOperand(1);
|
||||
|
||||
MVT VT = TLI.getRegisterType(*DAG.getContext(), Reg.getValueType());
|
||||
|
||||
SDValue NewReg = DAG.getAnyExtOrTrunc(Reg, SDLoc(N), VT);
|
||||
SDValue NewProxy =
|
||||
DAG.getNode(NVPTXISD::ProxyReg, SDLoc(N), VT, {Chain, NewReg});
|
||||
SDValue Res = DAG.getAnyExtOrTrunc(NewProxy, SDLoc(N), N->getValueType(0));
|
||||
|
||||
Results.push_back(Res);
|
||||
}
|
||||
|
||||
void NVPTXTargetLowering::ReplaceNodeResults(
|
||||
SDNode *N, SmallVectorImpl<SDValue> &Results, SelectionDAG &DAG) const {
|
||||
switch (N->getOpcode()) {
|
||||
@ -6349,6 +6229,9 @@ void NVPTXTargetLowering::ReplaceNodeResults(
|
||||
case ISD::CopyFromReg:
|
||||
ReplaceCopyFromReg_128(N, DAG, Results);
|
||||
return;
|
||||
case NVPTXISD::ProxyReg:
|
||||
replaceProxyReg(N, DAG, *this, Results);
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -38,7 +38,7 @@ enum NodeType : unsigned {
|
||||
/// This node represents a PTX call instruction. It's operands are as follows:
|
||||
///
|
||||
/// CALL(Chain, IsConvergent, IsIndirectCall/IsUniform, NumReturns,
|
||||
/// NumParams, Callee, Proto, InGlue)
|
||||
/// NumParams, Callee, Proto)
|
||||
CALL,
|
||||
|
||||
MoveParam,
|
||||
@ -84,13 +84,7 @@ enum NodeType : unsigned {
|
||||
StoreV2,
|
||||
StoreV4,
|
||||
StoreV8,
|
||||
LoadParam,
|
||||
LoadParamV2,
|
||||
LoadParamV4,
|
||||
StoreParam,
|
||||
StoreParamV2,
|
||||
StoreParamV4,
|
||||
LAST_MEMORY_OPCODE = StoreParamV4,
|
||||
LAST_MEMORY_OPCODE = StoreV8,
|
||||
};
|
||||
}
|
||||
|
||||
|
@ -1757,12 +1757,6 @@ def SDTDeclareArrayParam :
|
||||
SDTypeProfile<0, 3, [SDTCisVT<0, i32>, SDTCisVT<1, i32>, SDTCisVT<2, i32>]>;
|
||||
def SDTDeclareScalarParam :
|
||||
SDTypeProfile<0, 2, [SDTCisVT<0, i32>, SDTCisVT<1, i32>]>;
|
||||
def SDTLoadParamProfile : SDTypeProfile<1, 2, [SDTCisInt<1>, SDTCisInt<2>]>;
|
||||
def SDTLoadParamV2Profile : SDTypeProfile<2, 2, [SDTCisSameAs<0, 1>, SDTCisInt<2>, SDTCisInt<3>]>;
|
||||
def SDTLoadParamV4Profile : SDTypeProfile<4, 2, [SDTCisInt<4>, SDTCisInt<5>]>;
|
||||
def SDTStoreParamProfile : SDTypeProfile<0, 3, [SDTCisInt<0>, SDTCisInt<1>]>;
|
||||
def SDTStoreParamV2Profile : SDTypeProfile<0, 4, [SDTCisInt<0>, SDTCisInt<1>]>;
|
||||
def SDTStoreParamV4Profile : SDTypeProfile<0, 6, [SDTCisInt<0>, SDTCisInt<1>]>;
|
||||
def SDTMoveParamProfile : SDTypeProfile<1, 1, [SDTCisInt<0>, SDTCisSameAs<0, 1>]>;
|
||||
|
||||
def SDTProxyReg : SDTypeProfile<1, 1, [SDTCisSameAs<0, 1>]>;
|
||||
@ -1774,104 +1768,20 @@ def declare_array_param :
|
||||
def declare_scalar_param :
|
||||
SDNode<"NVPTXISD::DeclareScalarParam", SDTDeclareScalarParam,
|
||||
[SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>;
|
||||
|
||||
def LoadParam :
|
||||
SDNode<"NVPTXISD::LoadParam", SDTLoadParamProfile,
|
||||
[SDNPHasChain, SDNPMayLoad, SDNPOutGlue, SDNPInGlue]>;
|
||||
def LoadParamV2 :
|
||||
SDNode<"NVPTXISD::LoadParamV2", SDTLoadParamV2Profile,
|
||||
[SDNPHasChain, SDNPMayLoad, SDNPOutGlue, SDNPInGlue]>;
|
||||
def LoadParamV4 :
|
||||
SDNode<"NVPTXISD::LoadParamV4", SDTLoadParamV4Profile,
|
||||
[SDNPHasChain, SDNPMayLoad, SDNPOutGlue, SDNPInGlue]>;
|
||||
def StoreParam :
|
||||
SDNode<"NVPTXISD::StoreParam", SDTStoreParamProfile,
|
||||
[SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>;
|
||||
def StoreParamV2 :
|
||||
SDNode<"NVPTXISD::StoreParamV2", SDTStoreParamV2Profile,
|
||||
[SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>;
|
||||
def StoreParamV4 :
|
||||
SDNode<"NVPTXISD::StoreParamV4", SDTStoreParamV4Profile,
|
||||
[SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>;
|
||||
def MoveParam :
|
||||
SDNode<"NVPTXISD::MoveParam", SDTMoveParamProfile, []>;
|
||||
def proxy_reg :
|
||||
SDNode<"NVPTXISD::ProxyReg", SDTProxyReg, [SDNPHasChain]>;
|
||||
|
||||
/// CALL(Chain, IsConvergent, IsIndirectCall/IsUniform, NumReturns,
|
||||
/// NumParams, Callee, Proto, InGlue)
|
||||
/// NumParams, Callee, Proto)
|
||||
def SDTCallProfile : SDTypeProfile<0, 6,
|
||||
[SDTCisVT<0, i32>, SDTCisVT<1, i32>, SDTCisVT<2, i32>,
|
||||
SDTCisVT<3, i32>, SDTCisVT<5, i32>]>;
|
||||
def call :
|
||||
SDNode<"NVPTXISD::CALL", SDTCallProfile,
|
||||
[SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>;
|
||||
|
||||
let mayLoad = true in {
|
||||
class LoadParamMemInst<NVPTXRegClass regclass, string opstr> :
|
||||
NVPTXInst<(outs regclass:$dst), (ins Offseti32imm:$b),
|
||||
!strconcat("ld.param", opstr, " \t$dst, [retval0$b];"),
|
||||
[]>;
|
||||
|
||||
class LoadParamV2MemInst<NVPTXRegClass regclass, string opstr> :
|
||||
NVPTXInst<(outs regclass:$dst, regclass:$dst2), (ins Offseti32imm:$b),
|
||||
!strconcat("ld.param.v2", opstr,
|
||||
" \t{{$dst, $dst2}}, [retval0$b];"), []>;
|
||||
|
||||
class LoadParamV4MemInst<NVPTXRegClass regclass, string opstr> :
|
||||
NVPTXInst<(outs regclass:$dst, regclass:$dst2, regclass:$dst3,
|
||||
regclass:$dst4),
|
||||
(ins Offseti32imm:$b),
|
||||
!strconcat("ld.param.v4", opstr,
|
||||
" \t{{$dst, $dst2, $dst3, $dst4}}, [retval0$b];"),
|
||||
[]>;
|
||||
}
|
||||
|
||||
let mayStore = true in {
|
||||
|
||||
multiclass StoreParamInst<NVPTXRegClass regclass, Operand IMMType, string opstr, bit support_imm = true> {
|
||||
foreach op = [IMMType, regclass] in
|
||||
if !or(support_imm, !isa<NVPTXRegClass>(op)) then
|
||||
def _ # !if(!isa<NVPTXRegClass>(op), "r", "i")
|
||||
: NVPTXInst<(outs),
|
||||
(ins op:$val, i32imm:$a, Offseti32imm:$b),
|
||||
"st.param" # opstr # " \t[param$a$b], $val;",
|
||||
[]>;
|
||||
}
|
||||
|
||||
multiclass StoreParamV2Inst<NVPTXRegClass regclass, Operand IMMType, string opstr> {
|
||||
foreach op1 = [IMMType, regclass] in
|
||||
foreach op2 = [IMMType, regclass] in
|
||||
def _ # !if(!isa<NVPTXRegClass>(op1), "r", "i")
|
||||
# !if(!isa<NVPTXRegClass>(op2), "r", "i")
|
||||
: NVPTXInst<(outs),
|
||||
(ins op1:$val1, op2:$val2,
|
||||
i32imm:$a, Offseti32imm:$b),
|
||||
"st.param.v2" # opstr # " \t[param$a$b], {{$val1, $val2}};",
|
||||
[]>;
|
||||
}
|
||||
|
||||
multiclass StoreParamV4Inst<NVPTXRegClass regclass, Operand IMMType, string opstr> {
|
||||
foreach op1 = [IMMType, regclass] in
|
||||
foreach op2 = [IMMType, regclass] in
|
||||
foreach op3 = [IMMType, regclass] in
|
||||
foreach op4 = [IMMType, regclass] in
|
||||
def _ # !if(!isa<NVPTXRegClass>(op1), "r", "i")
|
||||
# !if(!isa<NVPTXRegClass>(op2), "r", "i")
|
||||
# !if(!isa<NVPTXRegClass>(op3), "r", "i")
|
||||
# !if(!isa<NVPTXRegClass>(op4), "r", "i")
|
||||
|
||||
: NVPTXInst<(outs),
|
||||
(ins op1:$val1, op2:$val2, op3:$val3, op4:$val4,
|
||||
i32imm:$a, Offseti32imm:$b),
|
||||
"st.param.v4" # opstr #
|
||||
" \t[param$a$b], {{$val1, $val2, $val3, $val4}};",
|
||||
[]>;
|
||||
}
|
||||
}
|
||||
def call : SDNode<"NVPTXISD::CALL", SDTCallProfile, [SDNPHasChain, SDNPSideEffect]>;
|
||||
|
||||
/// CALL(Chain, IsConvergent, IsIndirectCall/IsUniform, NumReturns,
|
||||
/// NumParams, Callee, Proto, InGlue)
|
||||
/// NumParams, Callee, Proto)
|
||||
|
||||
def CallOperand : Operand<i32> { let PrintMethod = "printCallOperand"; }
|
||||
|
||||
@ -1908,43 +1818,6 @@ foreach is_convergent = [0, 1] in {
|
||||
(call_uni_inst $addr, imm:$rets, imm:$params)>;
|
||||
}
|
||||
|
||||
def LoadParamMemI64 : LoadParamMemInst<B64, ".b64">;
|
||||
def LoadParamMemI32 : LoadParamMemInst<B32, ".b32">;
|
||||
def LoadParamMemI16 : LoadParamMemInst<B16, ".b16">;
|
||||
def LoadParamMemI8 : LoadParamMemInst<B16, ".b8">;
|
||||
def LoadParamMemV2I64 : LoadParamV2MemInst<B64, ".b64">;
|
||||
def LoadParamMemV2I32 : LoadParamV2MemInst<B32, ".b32">;
|
||||
def LoadParamMemV2I16 : LoadParamV2MemInst<B16, ".b16">;
|
||||
def LoadParamMemV2I8 : LoadParamV2MemInst<B16, ".b8">;
|
||||
def LoadParamMemV4I32 : LoadParamV4MemInst<B32, ".b32">;
|
||||
def LoadParamMemV4I16 : LoadParamV4MemInst<B16, ".b16">;
|
||||
def LoadParamMemV4I8 : LoadParamV4MemInst<B16, ".b8">;
|
||||
|
||||
defm StoreParamI64 : StoreParamInst<B64, i64imm, ".b64">;
|
||||
defm StoreParamI32 : StoreParamInst<B32, i32imm, ".b32">;
|
||||
defm StoreParamI16 : StoreParamInst<B16, i16imm, ".b16">;
|
||||
defm StoreParamI8 : StoreParamInst<B16, i8imm, ".b8">;
|
||||
|
||||
defm StoreParamI8TruncI32 : StoreParamInst<B32, i8imm, ".b8", /* support_imm */ false>;
|
||||
defm StoreParamI8TruncI64 : StoreParamInst<B64, i8imm, ".b8", /* support_imm */ false>;
|
||||
|
||||
defm StoreParamV2I64 : StoreParamV2Inst<B64, i64imm, ".b64">;
|
||||
defm StoreParamV2I32 : StoreParamV2Inst<B32, i32imm, ".b32">;
|
||||
defm StoreParamV2I16 : StoreParamV2Inst<B16, i16imm, ".b16">;
|
||||
defm StoreParamV2I8 : StoreParamV2Inst<B16, i8imm, ".b8">;
|
||||
|
||||
defm StoreParamV4I32 : StoreParamV4Inst<B32, i32imm, ".b32">;
|
||||
defm StoreParamV4I16 : StoreParamV4Inst<B16, i16imm, ".b16">;
|
||||
defm StoreParamV4I8 : StoreParamV4Inst<B16, i8imm, ".b8">;
|
||||
|
||||
defm StoreParamF32 : StoreParamInst<B32, f32imm, ".b32">;
|
||||
defm StoreParamF64 : StoreParamInst<B64, f64imm, ".b64">;
|
||||
|
||||
defm StoreParamV2F32 : StoreParamV2Inst<B32, f32imm, ".b32">;
|
||||
defm StoreParamV2F64 : StoreParamV2Inst<B64, f64imm, ".b64">;
|
||||
|
||||
defm StoreParamV4F32 : StoreParamV4Inst<B32, f32imm, ".b32">;
|
||||
|
||||
def DECLARE_PARAM_array :
|
||||
NVPTXInst<(outs), (ins i32imm:$a, i32imm:$align, i32imm:$size),
|
||||
".param .align $align .b8 \t$a[$size];", []>;
|
||||
|
@ -16,8 +16,8 @@ define void @test_v2f32(<2 x float> %input, ptr %output) {
|
||||
; CHECK-NEXT: ld.param.b64 %rd1, [test_v2f32_param_0];
|
||||
; CHECK-NEXT: { // callseq 0, 0
|
||||
; CHECK-NEXT: .param .align 8 .b8 param0[8];
|
||||
; CHECK-NEXT: st.param.b64 [param0], %rd1;
|
||||
; CHECK-NEXT: .param .align 8 .b8 retval0[8];
|
||||
; CHECK-NEXT: st.param.b64 [param0], %rd1;
|
||||
; CHECK-NEXT: call.uni (retval0), barv, (param0);
|
||||
; CHECK-NEXT: ld.param.b64 %rd2, [retval0];
|
||||
; CHECK-NEXT: } // callseq 0
|
||||
@ -32,24 +32,24 @@ define void @test_v2f32(<2 x float> %input, ptr %output) {
|
||||
define void @test_v3f32(<3 x float> %input, ptr %output) {
|
||||
; CHECK-LABEL: test_v3f32(
|
||||
; CHECK: {
|
||||
; CHECK-NEXT: .reg .b32 %r<10>;
|
||||
; CHECK-NEXT: .reg .b64 %rd<2>;
|
||||
; CHECK-NEXT: .reg .b32 %r<4>;
|
||||
; CHECK-NEXT: .reg .b64 %rd<5>;
|
||||
; CHECK-EMPTY:
|
||||
; CHECK-NEXT: // %bb.0:
|
||||
; CHECK-NEXT: ld.param.v2.b32 {%r1, %r2}, [test_v3f32_param_0];
|
||||
; CHECK-NEXT: ld.param.b32 %r3, [test_v3f32_param_0+8];
|
||||
; CHECK-NEXT: ld.param.b64 %rd1, [test_v3f32_param_0];
|
||||
; CHECK-NEXT: ld.param.b32 %r1, [test_v3f32_param_0+8];
|
||||
; CHECK-NEXT: { // callseq 1, 0
|
||||
; CHECK-NEXT: .param .align 16 .b8 param0[16];
|
||||
; CHECK-NEXT: st.param.v2.b32 [param0], {%r1, %r2};
|
||||
; CHECK-NEXT: st.param.b32 [param0+8], %r3;
|
||||
; CHECK-NEXT: .param .align 16 .b8 retval0[16];
|
||||
; CHECK-NEXT: st.param.b32 [param0+8], %r1;
|
||||
; CHECK-NEXT: st.param.b64 [param0], %rd1;
|
||||
; CHECK-NEXT: call.uni (retval0), barv3, (param0);
|
||||
; CHECK-NEXT: ld.param.v2.b32 {%r4, %r5}, [retval0];
|
||||
; CHECK-NEXT: ld.param.b32 %r6, [retval0+8];
|
||||
; CHECK-NEXT: ld.param.b32 %r2, [retval0+8];
|
||||
; CHECK-NEXT: ld.param.b64 %rd2, [retval0];
|
||||
; CHECK-NEXT: } // callseq 1
|
||||
; CHECK-NEXT: ld.param.b64 %rd1, [test_v3f32_param_1];
|
||||
; CHECK-NEXT: st.v2.b32 [%rd1], {%r4, %r5};
|
||||
; CHECK-NEXT: st.b32 [%rd1+8], %r6;
|
||||
; CHECK-NEXT: ld.param.b64 %rd4, [test_v3f32_param_1];
|
||||
; CHECK-NEXT: st.b32 [%rd4+8], %r2;
|
||||
; CHECK-NEXT: st.b64 [%rd4], %rd2;
|
||||
; CHECK-NEXT: ret;
|
||||
%call = tail call <3 x float> @barv3(<3 x float> %input)
|
||||
; Make sure we don't load more values than than we need to.
|
||||
@ -68,16 +68,16 @@ define void @test_a2f32([2 x float] %input, ptr %output) {
|
||||
; CHECK-NEXT: ld.param.b32 %r2, [test_a2f32_param_0+4];
|
||||
; CHECK-NEXT: { // callseq 2, 0
|
||||
; CHECK-NEXT: .param .align 4 .b8 param0[8];
|
||||
; CHECK-NEXT: st.param.b32 [param0], %r1;
|
||||
; CHECK-NEXT: st.param.b32 [param0+4], %r2;
|
||||
; CHECK-NEXT: .param .align 4 .b8 retval0[8];
|
||||
; CHECK-NEXT: st.param.b32 [param0+4], %r2;
|
||||
; CHECK-NEXT: st.param.b32 [param0], %r1;
|
||||
; CHECK-NEXT: call.uni (retval0), bara, (param0);
|
||||
; CHECK-NEXT: ld.param.b32 %r3, [retval0];
|
||||
; CHECK-NEXT: ld.param.b32 %r4, [retval0+4];
|
||||
; CHECK-NEXT: ld.param.b32 %r3, [retval0+4];
|
||||
; CHECK-NEXT: ld.param.b32 %r4, [retval0];
|
||||
; CHECK-NEXT: } // callseq 2
|
||||
; CHECK-NEXT: ld.param.b64 %rd1, [test_a2f32_param_1];
|
||||
; CHECK-NEXT: st.b32 [%rd1+4], %r4;
|
||||
; CHECK-NEXT: st.b32 [%rd1], %r3;
|
||||
; CHECK-NEXT: st.b32 [%rd1+4], %r3;
|
||||
; CHECK-NEXT: st.b32 [%rd1], %r4;
|
||||
; CHECK-NEXT: ret;
|
||||
%call = tail call [2 x float] @bara([2 x float] %input)
|
||||
store [2 x float] %call, ptr %output, align 4
|
||||
@ -95,16 +95,16 @@ define void @test_s2f32({float, float} %input, ptr %output) {
|
||||
; CHECK-NEXT: ld.param.b32 %r2, [test_s2f32_param_0+4];
|
||||
; CHECK-NEXT: { // callseq 3, 0
|
||||
; CHECK-NEXT: .param .align 4 .b8 param0[8];
|
||||
; CHECK-NEXT: st.param.b32 [param0], %r1;
|
||||
; CHECK-NEXT: st.param.b32 [param0+4], %r2;
|
||||
; CHECK-NEXT: .param .align 4 .b8 retval0[8];
|
||||
; CHECK-NEXT: st.param.b32 [param0+4], %r2;
|
||||
; CHECK-NEXT: st.param.b32 [param0], %r1;
|
||||
; CHECK-NEXT: call.uni (retval0), bars, (param0);
|
||||
; CHECK-NEXT: ld.param.b32 %r3, [retval0];
|
||||
; CHECK-NEXT: ld.param.b32 %r4, [retval0+4];
|
||||
; CHECK-NEXT: ld.param.b32 %r3, [retval0+4];
|
||||
; CHECK-NEXT: ld.param.b32 %r4, [retval0];
|
||||
; CHECK-NEXT: } // callseq 3
|
||||
; CHECK-NEXT: ld.param.b64 %rd1, [test_s2f32_param_1];
|
||||
; CHECK-NEXT: st.b32 [%rd1+4], %r4;
|
||||
; CHECK-NEXT: st.b32 [%rd1], %r3;
|
||||
; CHECK-NEXT: st.b32 [%rd1+4], %r3;
|
||||
; CHECK-NEXT: st.b32 [%rd1], %r4;
|
||||
; CHECK-NEXT: ret;
|
||||
%call = tail call {float, float} @bars({float, float} %input)
|
||||
store {float, float} %call, ptr %output, align 4
|
||||
|
@ -208,13 +208,13 @@ define <2 x bfloat> @test_call(<2 x bfloat> %a, <2 x bfloat> %b) #0 {
|
||||
; CHECK-EMPTY:
|
||||
; CHECK-NEXT: // %bb.0:
|
||||
; CHECK-NEXT: ld.param.b32 %r1, [test_call_param_0];
|
||||
; CHECK-NEXT: ld.param.b32 %r2, [test_call_param_1];
|
||||
; CHECK-NEXT: { // callseq 0, 0
|
||||
; CHECK-NEXT: .param .align 4 .b8 param0[4];
|
||||
; CHECK-NEXT: st.param.b32 [param0], %r1;
|
||||
; CHECK-NEXT: .param .align 4 .b8 param1[4];
|
||||
; CHECK-NEXT: st.param.b32 [param1], %r2;
|
||||
; CHECK-NEXT: .param .align 4 .b8 retval0[4];
|
||||
; CHECK-NEXT: ld.param.b32 %r2, [test_call_param_1];
|
||||
; CHECK-NEXT: st.param.b32 [param1], %r2;
|
||||
; CHECK-NEXT: st.param.b32 [param0], %r1;
|
||||
; CHECK-NEXT: call.uni (retval0), test_callee, (param0, param1);
|
||||
; CHECK-NEXT: ld.param.b32 %r3, [retval0];
|
||||
; CHECK-NEXT: } // callseq 0
|
||||
|
@ -13,12 +13,12 @@ define void @foo() {
|
||||
; CHECK-NEXT: .reg .b64 %rd<3>;
|
||||
; CHECK-EMPTY:
|
||||
; CHECK-NEXT: // %bb.0:
|
||||
; CHECK-NEXT: ld.global.b64 %rd1, [G];
|
||||
; CHECK-NEXT: ld.global.b64 %rd2, [G+8];
|
||||
; CHECK-NEXT: { // callseq 0, 0
|
||||
; CHECK-NEXT: .param .align 8 .b8 param0[16];
|
||||
; CHECK-NEXT: st.param.b64 [param0], %rd1;
|
||||
; CHECK-NEXT: st.param.b64 [param0+8], %rd2;
|
||||
; CHECK-NEXT: ld.global.b64 %rd1, [G+8];
|
||||
; CHECK-NEXT: st.param.b64 [param0+8], %rd1;
|
||||
; CHECK-NEXT: ld.global.b64 %rd2, [G];
|
||||
; CHECK-NEXT: st.param.b64 [param0], %rd2;
|
||||
; CHECK-NEXT: call.uni bar, (param0);
|
||||
; CHECK-NEXT: } // callseq 0
|
||||
; CHECK-NEXT: ret;
|
||||
|
@ -44,11 +44,11 @@ entry:
|
||||
%arrayidx7 = getelementptr inbounds [16 x i8], ptr %buf, i64 0, i64 3
|
||||
store float %3, ptr %arrayidx7, align 4
|
||||
|
||||
; CHECK: .param .b64 param0;
|
||||
; CHECK-NEXT: st.param.b64 [param0], %rd[[A_REG]]
|
||||
; CHECK-NEXT: .param .b64 param1;
|
||||
; CHECK-NEXT: st.param.b64 [param1], %rd[[SP_REG]]
|
||||
; CHECK-NEXT: call.uni callee,
|
||||
; CHECK-DAG: .param .b64 param0;
|
||||
; CHECK-DAG: .param .b64 param1;
|
||||
; CHECK-DAG: st.param.b64 [param0], %rd[[A_REG]]
|
||||
; CHECK-DAG: st.param.b64 [param1], %rd[[SP_REG]]
|
||||
; CHECK: call.uni callee,
|
||||
|
||||
call void @callee(ptr %a, ptr %buf) #2
|
||||
ret void
|
||||
|
@ -14,11 +14,11 @@ target triple = "nvptx64-nvidia-cuda"
|
||||
%complex_half = type { half, half }
|
||||
|
||||
; CHECK: .param .align 2 .b8 param2[4];
|
||||
; CHECK: st.param.b16 [param2], %rs1;
|
||||
; CHECK: st.param.b16 [param2+2], %rs2;
|
||||
; CHECK: .param .align 2 .b8 retval0[4];
|
||||
; CHECK-NEXT: prototype_0 : .callprototype (.param .align 2 .b8 _[4]) _ (.param .b32 _, .param .b32 _, .param .align 2 .b8 _[4]);
|
||||
; CHECK-NEXT: call (retval0),
|
||||
; CHECK-DAG: st.param.b16 [param2], %rs{{[0-9]+}};
|
||||
; CHECK-DAG: st.param.b16 [param2+2], %rs{{[0-9]+}};
|
||||
; CHECK: prototype_0 : .callprototype (.param .align 2 .b8 _[4]) _ (.param .b32 _, .param .b32 _, .param .align 2 .b8 _[4]);
|
||||
; CHECK: call (retval0),
|
||||
define weak_odr void @foo() {
|
||||
entry:
|
||||
%call.i.i.i = tail call %"class.complex" @_Z20__spirv_GroupCMulKHRjjN5__spv12complex_halfE(i32 0, i32 0, ptr byval(%"class.complex") null)
|
||||
@ -36,10 +36,10 @@ define internal void @callee(ptr byval(%"class.complex") %byval_arg) {
|
||||
}
|
||||
define void @boom() {
|
||||
%fp = call ptr @usefp(ptr @callee)
|
||||
; CHECK: .param .align 2 .b8 param0[4];
|
||||
; CHECK: st.param.b16 [param0], %rs1;
|
||||
; CHECK: st.param.b16 [param0+2], %rs2;
|
||||
; CHECK: .callprototype ()_ (.param .align 2 .b8 _[4]);
|
||||
; CHECK-DAG: .param .align 2 .b8 param0[4];
|
||||
; CHECK-DAG: st.param.b16 [param0], %rs{{[0-9]+}};
|
||||
; CHECK-DAG: st.param.b16 [param0+2], %rs{{[0-9]+}};
|
||||
; CHECK-DAG: .callprototype ()_ (.param .align 2 .b8 _[4]);
|
||||
call void %fp(ptr byval(%"class.complex") null)
|
||||
ret void
|
||||
}
|
||||
|
@ -199,10 +199,10 @@ define i32 @test_mad_multi_use(i32 %a, i32 %b, i32 %c) {
|
||||
; CHECK-NEXT: add.s32 %r5, %r3, %r4;
|
||||
; CHECK-NEXT: { // callseq 0, 0
|
||||
; CHECK-NEXT: .param .b32 param0;
|
||||
; CHECK-NEXT: st.param.b32 [param0], %r3;
|
||||
; CHECK-NEXT: .param .b32 param1;
|
||||
; CHECK-NEXT: st.param.b32 [param1], %r5;
|
||||
; CHECK-NEXT: .param .b32 retval0;
|
||||
; CHECK-NEXT: st.param.b32 [param0], %r3;
|
||||
; CHECK-NEXT: st.param.b32 [param1], %r5;
|
||||
; CHECK-NEXT: call.uni (retval0), use, (param0, param1);
|
||||
; CHECK-NEXT: ld.param.b32 %r6, [retval0];
|
||||
; CHECK-NEXT: } // callseq 0
|
||||
|
@ -1,3 +1,4 @@
|
||||
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
|
||||
; RUN: llc < %s -mtriple=nvptx -mcpu=sm_20 | FileCheck %s
|
||||
; RUN: llc < %s -mtriple=nvptx64 -mcpu=sm_20 | FileCheck %s
|
||||
; RUN: %if ptxas && !ptxas-12.0 %{ llc < %s -mtriple=nvptx -mcpu=sm_20 | %ptxas-verify %}
|
||||
@ -11,90 +12,180 @@
|
||||
;;; i64
|
||||
|
||||
define i64 @icmp_eq_i64(i64 %a, i64 %b) {
|
||||
; CHECK: setp.eq.b64 %p[[P0:[0-9]+]], %rd{{[0-9]+}}, %rd{{[0-9]+}}
|
||||
; CHECK: selp.b64 %rd{{[0-9]+}}, 1, 0, %p[[P0]]
|
||||
; CHECK: ret
|
||||
; CHECK-LABEL: icmp_eq_i64(
|
||||
; CHECK: {
|
||||
; CHECK-NEXT: .reg .pred %p<2>;
|
||||
; CHECK-NEXT: .reg .b64 %rd<4>;
|
||||
; CHECK-EMPTY:
|
||||
; CHECK-NEXT: // %bb.0:
|
||||
; CHECK-NEXT: ld.param.b64 %rd1, [icmp_eq_i64_param_0];
|
||||
; CHECK-NEXT: ld.param.b64 %rd2, [icmp_eq_i64_param_1];
|
||||
; CHECK-NEXT: setp.eq.b64 %p1, %rd1, %rd2;
|
||||
; CHECK-NEXT: selp.b64 %rd3, 1, 0, %p1;
|
||||
; CHECK-NEXT: st.param.b64 [func_retval0], %rd3;
|
||||
; CHECK-NEXT: ret;
|
||||
%cmp = icmp eq i64 %a, %b
|
||||
%ret = zext i1 %cmp to i64
|
||||
ret i64 %ret
|
||||
}
|
||||
|
||||
define i64 @icmp_ne_i64(i64 %a, i64 %b) {
|
||||
; CHECK: setp.ne.b64 %p[[P0:[0-9]+]], %rd{{[0-9]+}}, %rd{{[0-9]+}}
|
||||
; CHECK: selp.b64 %rd{{[0-9]+}}, 1, 0, %p[[P0]]
|
||||
; CHECK: ret
|
||||
; CHECK-LABEL: icmp_ne_i64(
|
||||
; CHECK: {
|
||||
; CHECK-NEXT: .reg .pred %p<2>;
|
||||
; CHECK-NEXT: .reg .b64 %rd<4>;
|
||||
; CHECK-EMPTY:
|
||||
; CHECK-NEXT: // %bb.0:
|
||||
; CHECK-NEXT: ld.param.b64 %rd1, [icmp_ne_i64_param_0];
|
||||
; CHECK-NEXT: ld.param.b64 %rd2, [icmp_ne_i64_param_1];
|
||||
; CHECK-NEXT: setp.ne.b64 %p1, %rd1, %rd2;
|
||||
; CHECK-NEXT: selp.b64 %rd3, 1, 0, %p1;
|
||||
; CHECK-NEXT: st.param.b64 [func_retval0], %rd3;
|
||||
; CHECK-NEXT: ret;
|
||||
%cmp = icmp ne i64 %a, %b
|
||||
%ret = zext i1 %cmp to i64
|
||||
ret i64 %ret
|
||||
}
|
||||
|
||||
define i64 @icmp_ugt_i64(i64 %a, i64 %b) {
|
||||
; CHECK: setp.gt.u64 %p[[P0:[0-9]+]], %rd{{[0-9]+}}, %rd{{[0-9]+}}
|
||||
; CHECK: selp.b64 %rd{{[0-9]+}}, 1, 0, %p[[P0]]
|
||||
; CHECK: ret
|
||||
; CHECK-LABEL: icmp_ugt_i64(
|
||||
; CHECK: {
|
||||
; CHECK-NEXT: .reg .pred %p<2>;
|
||||
; CHECK-NEXT: .reg .b64 %rd<4>;
|
||||
; CHECK-EMPTY:
|
||||
; CHECK-NEXT: // %bb.0:
|
||||
; CHECK-NEXT: ld.param.b64 %rd1, [icmp_ugt_i64_param_0];
|
||||
; CHECK-NEXT: ld.param.b64 %rd2, [icmp_ugt_i64_param_1];
|
||||
; CHECK-NEXT: setp.gt.u64 %p1, %rd1, %rd2;
|
||||
; CHECK-NEXT: selp.b64 %rd3, 1, 0, %p1;
|
||||
; CHECK-NEXT: st.param.b64 [func_retval0], %rd3;
|
||||
; CHECK-NEXT: ret;
|
||||
%cmp = icmp ugt i64 %a, %b
|
||||
%ret = zext i1 %cmp to i64
|
||||
ret i64 %ret
|
||||
}
|
||||
|
||||
define i64 @icmp_uge_i64(i64 %a, i64 %b) {
|
||||
; CHECK: setp.ge.u64 %p[[P0:[0-9]+]], %rd{{[0-9]+}}, %rd{{[0-9]+}}
|
||||
; CHECK: selp.b64 %rd{{[0-9]+}}, 1, 0, %p[[P0]]
|
||||
; CHECK: ret
|
||||
; CHECK-LABEL: icmp_uge_i64(
|
||||
; CHECK: {
|
||||
; CHECK-NEXT: .reg .pred %p<2>;
|
||||
; CHECK-NEXT: .reg .b64 %rd<4>;
|
||||
; CHECK-EMPTY:
|
||||
; CHECK-NEXT: // %bb.0:
|
||||
; CHECK-NEXT: ld.param.b64 %rd1, [icmp_uge_i64_param_0];
|
||||
; CHECK-NEXT: ld.param.b64 %rd2, [icmp_uge_i64_param_1];
|
||||
; CHECK-NEXT: setp.ge.u64 %p1, %rd1, %rd2;
|
||||
; CHECK-NEXT: selp.b64 %rd3, 1, 0, %p1;
|
||||
; CHECK-NEXT: st.param.b64 [func_retval0], %rd3;
|
||||
; CHECK-NEXT: ret;
|
||||
%cmp = icmp uge i64 %a, %b
|
||||
%ret = zext i1 %cmp to i64
|
||||
ret i64 %ret
|
||||
}
|
||||
|
||||
define i64 @icmp_ult_i64(i64 %a, i64 %b) {
|
||||
; CHECK: setp.lt.u64 %p[[P0:[0-9]+]], %rd{{[0-9]+}}, %rd{{[0-9]+}}
|
||||
; CHECK: selp.b64 %rd{{[0-9]+}}, 1, 0, %p[[P0]]
|
||||
; CHECK: ret
|
||||
; CHECK-LABEL: icmp_ult_i64(
|
||||
; CHECK: {
|
||||
; CHECK-NEXT: .reg .pred %p<2>;
|
||||
; CHECK-NEXT: .reg .b64 %rd<4>;
|
||||
; CHECK-EMPTY:
|
||||
; CHECK-NEXT: // %bb.0:
|
||||
; CHECK-NEXT: ld.param.b64 %rd1, [icmp_ult_i64_param_0];
|
||||
; CHECK-NEXT: ld.param.b64 %rd2, [icmp_ult_i64_param_1];
|
||||
; CHECK-NEXT: setp.lt.u64 %p1, %rd1, %rd2;
|
||||
; CHECK-NEXT: selp.b64 %rd3, 1, 0, %p1;
|
||||
; CHECK-NEXT: st.param.b64 [func_retval0], %rd3;
|
||||
; CHECK-NEXT: ret;
|
||||
%cmp = icmp ult i64 %a, %b
|
||||
%ret = zext i1 %cmp to i64
|
||||
ret i64 %ret
|
||||
}
|
||||
|
||||
define i64 @icmp_ule_i64(i64 %a, i64 %b) {
|
||||
; CHECK: setp.le.u64 %p[[P0:[0-9]+]], %rd{{[0-9]+}}, %rd{{[0-9]+}}
|
||||
; CHECK: selp.b64 %rd{{[0-9]+}}, 1, 0, %p[[P0]]
|
||||
; CHECK: ret
|
||||
; CHECK-LABEL: icmp_ule_i64(
|
||||
; CHECK: {
|
||||
; CHECK-NEXT: .reg .pred %p<2>;
|
||||
; CHECK-NEXT: .reg .b64 %rd<4>;
|
||||
; CHECK-EMPTY:
|
||||
; CHECK-NEXT: // %bb.0:
|
||||
; CHECK-NEXT: ld.param.b64 %rd1, [icmp_ule_i64_param_0];
|
||||
; CHECK-NEXT: ld.param.b64 %rd2, [icmp_ule_i64_param_1];
|
||||
; CHECK-NEXT: setp.le.u64 %p1, %rd1, %rd2;
|
||||
; CHECK-NEXT: selp.b64 %rd3, 1, 0, %p1;
|
||||
; CHECK-NEXT: st.param.b64 [func_retval0], %rd3;
|
||||
; CHECK-NEXT: ret;
|
||||
%cmp = icmp ule i64 %a, %b
|
||||
%ret = zext i1 %cmp to i64
|
||||
ret i64 %ret
|
||||
}
|
||||
|
||||
define i64 @icmp_sgt_i64(i64 %a, i64 %b) {
|
||||
; CHECK: setp.gt.s64 %p[[P0:[0-9]+]], %rd{{[0-9]+}}, %rd{{[0-9]+}}
|
||||
; CHECK: selp.b64 %rd{{[0-9]+}}, 1, 0, %p[[P0]]
|
||||
; CHECK: ret
|
||||
; CHECK-LABEL: icmp_sgt_i64(
|
||||
; CHECK: {
|
||||
; CHECK-NEXT: .reg .pred %p<2>;
|
||||
; CHECK-NEXT: .reg .b64 %rd<4>;
|
||||
; CHECK-EMPTY:
|
||||
; CHECK-NEXT: // %bb.0:
|
||||
; CHECK-NEXT: ld.param.b64 %rd1, [icmp_sgt_i64_param_0];
|
||||
; CHECK-NEXT: ld.param.b64 %rd2, [icmp_sgt_i64_param_1];
|
||||
; CHECK-NEXT: setp.gt.s64 %p1, %rd1, %rd2;
|
||||
; CHECK-NEXT: selp.b64 %rd3, 1, 0, %p1;
|
||||
; CHECK-NEXT: st.param.b64 [func_retval0], %rd3;
|
||||
; CHECK-NEXT: ret;
|
||||
%cmp = icmp sgt i64 %a, %b
|
||||
%ret = zext i1 %cmp to i64
|
||||
ret i64 %ret
|
||||
}
|
||||
|
||||
define i64 @icmp_sge_i64(i64 %a, i64 %b) {
|
||||
; CHECK: setp.ge.s64 %p[[P0:[0-9]+]], %rd{{[0-9]+}}, %rd{{[0-9]+}}
|
||||
; CHECK: selp.b64 %rd{{[0-9]+}}, 1, 0, %p[[P0]]
|
||||
; CHECK: ret
|
||||
; CHECK-LABEL: icmp_sge_i64(
|
||||
; CHECK: {
|
||||
; CHECK-NEXT: .reg .pred %p<2>;
|
||||
; CHECK-NEXT: .reg .b64 %rd<4>;
|
||||
; CHECK-EMPTY:
|
||||
; CHECK-NEXT: // %bb.0:
|
||||
; CHECK-NEXT: ld.param.b64 %rd1, [icmp_sge_i64_param_0];
|
||||
; CHECK-NEXT: ld.param.b64 %rd2, [icmp_sge_i64_param_1];
|
||||
; CHECK-NEXT: setp.ge.s64 %p1, %rd1, %rd2;
|
||||
; CHECK-NEXT: selp.b64 %rd3, 1, 0, %p1;
|
||||
; CHECK-NEXT: st.param.b64 [func_retval0], %rd3;
|
||||
; CHECK-NEXT: ret;
|
||||
%cmp = icmp sge i64 %a, %b
|
||||
%ret = zext i1 %cmp to i64
|
||||
ret i64 %ret
|
||||
}
|
||||
|
||||
define i64 @icmp_slt_i64(i64 %a, i64 %b) {
|
||||
; CHECK: setp.lt.s64 %p[[P0:[0-9]+]], %rd{{[0-9]+}}, %rd{{[0-9]+}}
|
||||
; CHECK: selp.b64 %rd{{[0-9]+}}, 1, 0, %p[[P0]]
|
||||
; CHECK: ret
|
||||
; CHECK-LABEL: icmp_slt_i64(
|
||||
; CHECK: {
|
||||
; CHECK-NEXT: .reg .pred %p<2>;
|
||||
; CHECK-NEXT: .reg .b64 %rd<4>;
|
||||
; CHECK-EMPTY:
|
||||
; CHECK-NEXT: // %bb.0:
|
||||
; CHECK-NEXT: ld.param.b64 %rd1, [icmp_slt_i64_param_0];
|
||||
; CHECK-NEXT: ld.param.b64 %rd2, [icmp_slt_i64_param_1];
|
||||
; CHECK-NEXT: setp.lt.s64 %p1, %rd1, %rd2;
|
||||
; CHECK-NEXT: selp.b64 %rd3, 1, 0, %p1;
|
||||
; CHECK-NEXT: st.param.b64 [func_retval0], %rd3;
|
||||
; CHECK-NEXT: ret;
|
||||
%cmp = icmp slt i64 %a, %b
|
||||
%ret = zext i1 %cmp to i64
|
||||
ret i64 %ret
|
||||
}
|
||||
|
||||
define i64 @icmp_sle_i64(i64 %a, i64 %b) {
|
||||
; CHECK: setp.le.s64 %p[[P0:[0-9]+]], %rd{{[0-9]+}}, %rd{{[0-9]+}}
|
||||
; CHECK: selp.b64 %rd{{[0-9]+}}, 1, 0, %p[[P0]]
|
||||
; CHECK: ret
|
||||
; CHECK-LABEL: icmp_sle_i64(
|
||||
; CHECK: {
|
||||
; CHECK-NEXT: .reg .pred %p<2>;
|
||||
; CHECK-NEXT: .reg .b64 %rd<4>;
|
||||
; CHECK-EMPTY:
|
||||
; CHECK-NEXT: // %bb.0:
|
||||
; CHECK-NEXT: ld.param.b64 %rd1, [icmp_sle_i64_param_0];
|
||||
; CHECK-NEXT: ld.param.b64 %rd2, [icmp_sle_i64_param_1];
|
||||
; CHECK-NEXT: setp.le.s64 %p1, %rd1, %rd2;
|
||||
; CHECK-NEXT: selp.b64 %rd3, 1, 0, %p1;
|
||||
; CHECK-NEXT: st.param.b64 [func_retval0], %rd3;
|
||||
; CHECK-NEXT: ret;
|
||||
%cmp = icmp sle i64 %a, %b
|
||||
%ret = zext i1 %cmp to i64
|
||||
ret i64 %ret
|
||||
@ -103,90 +194,180 @@ define i64 @icmp_sle_i64(i64 %a, i64 %b) {
|
||||
;;; i32
|
||||
|
||||
define i32 @icmp_eq_i32(i32 %a, i32 %b) {
|
||||
; CHECK: setp.eq.b32 %p[[P0:[0-9]+]], %r{{[0-9]+}}, %r{{[0-9]+}}
|
||||
; CHECK: selp.b32 %r{{[0-9]+}}, 1, 0, %p[[P0]]
|
||||
; CHECK: ret
|
||||
; CHECK-LABEL: icmp_eq_i32(
|
||||
; CHECK: {
|
||||
; CHECK-NEXT: .reg .pred %p<2>;
|
||||
; CHECK-NEXT: .reg .b32 %r<4>;
|
||||
; CHECK-EMPTY:
|
||||
; CHECK-NEXT: // %bb.0:
|
||||
; CHECK-NEXT: ld.param.b32 %r1, [icmp_eq_i32_param_0];
|
||||
; CHECK-NEXT: ld.param.b32 %r2, [icmp_eq_i32_param_1];
|
||||
; CHECK-NEXT: setp.eq.b32 %p1, %r1, %r2;
|
||||
; CHECK-NEXT: selp.b32 %r3, 1, 0, %p1;
|
||||
; CHECK-NEXT: st.param.b32 [func_retval0], %r3;
|
||||
; CHECK-NEXT: ret;
|
||||
%cmp = icmp eq i32 %a, %b
|
||||
%ret = zext i1 %cmp to i32
|
||||
ret i32 %ret
|
||||
}
|
||||
|
||||
define i32 @icmp_ne_i32(i32 %a, i32 %b) {
|
||||
; CHECK: setp.ne.b32 %p[[P0:[0-9]+]], %r{{[0-9]+}}, %r{{[0-9]+}}
|
||||
; CHECK: selp.b32 %r{{[0-9]+}}, 1, 0, %p[[P0]]
|
||||
; CHECK: ret
|
||||
; CHECK-LABEL: icmp_ne_i32(
|
||||
; CHECK: {
|
||||
; CHECK-NEXT: .reg .pred %p<2>;
|
||||
; CHECK-NEXT: .reg .b32 %r<4>;
|
||||
; CHECK-EMPTY:
|
||||
; CHECK-NEXT: // %bb.0:
|
||||
; CHECK-NEXT: ld.param.b32 %r1, [icmp_ne_i32_param_0];
|
||||
; CHECK-NEXT: ld.param.b32 %r2, [icmp_ne_i32_param_1];
|
||||
; CHECK-NEXT: setp.ne.b32 %p1, %r1, %r2;
|
||||
; CHECK-NEXT: selp.b32 %r3, 1, 0, %p1;
|
||||
; CHECK-NEXT: st.param.b32 [func_retval0], %r3;
|
||||
; CHECK-NEXT: ret;
|
||||
%cmp = icmp ne i32 %a, %b
|
||||
%ret = zext i1 %cmp to i32
|
||||
ret i32 %ret
|
||||
}
|
||||
|
||||
define i32 @icmp_ugt_i32(i32 %a, i32 %b) {
|
||||
; CHECK: setp.gt.u32 %p[[P0:[0-9]+]], %r{{[0-9]+}}, %r{{[0-9]+}}
|
||||
; CHECK: selp.b32 %r{{[0-9]+}}, 1, 0, %p[[P0]]
|
||||
; CHECK: ret
|
||||
; CHECK-LABEL: icmp_ugt_i32(
|
||||
; CHECK: {
|
||||
; CHECK-NEXT: .reg .pred %p<2>;
|
||||
; CHECK-NEXT: .reg .b32 %r<4>;
|
||||
; CHECK-EMPTY:
|
||||
; CHECK-NEXT: // %bb.0:
|
||||
; CHECK-NEXT: ld.param.b32 %r1, [icmp_ugt_i32_param_0];
|
||||
; CHECK-NEXT: ld.param.b32 %r2, [icmp_ugt_i32_param_1];
|
||||
; CHECK-NEXT: setp.gt.u32 %p1, %r1, %r2;
|
||||
; CHECK-NEXT: selp.b32 %r3, 1, 0, %p1;
|
||||
; CHECK-NEXT: st.param.b32 [func_retval0], %r3;
|
||||
; CHECK-NEXT: ret;
|
||||
%cmp = icmp ugt i32 %a, %b
|
||||
%ret = zext i1 %cmp to i32
|
||||
ret i32 %ret
|
||||
}
|
||||
|
||||
define i32 @icmp_uge_i32(i32 %a, i32 %b) {
|
||||
; CHECK: setp.ge.u32 %p[[P0:[0-9]+]], %r{{[0-9]+}}, %r{{[0-9]+}}
|
||||
; CHECK: selp.b32 %r{{[0-9]+}}, 1, 0, %p[[P0]]
|
||||
; CHECK: ret
|
||||
; CHECK-LABEL: icmp_uge_i32(
|
||||
; CHECK: {
|
||||
; CHECK-NEXT: .reg .pred %p<2>;
|
||||
; CHECK-NEXT: .reg .b32 %r<4>;
|
||||
; CHECK-EMPTY:
|
||||
; CHECK-NEXT: // %bb.0:
|
||||
; CHECK-NEXT: ld.param.b32 %r1, [icmp_uge_i32_param_0];
|
||||
; CHECK-NEXT: ld.param.b32 %r2, [icmp_uge_i32_param_1];
|
||||
; CHECK-NEXT: setp.ge.u32 %p1, %r1, %r2;
|
||||
; CHECK-NEXT: selp.b32 %r3, 1, 0, %p1;
|
||||
; CHECK-NEXT: st.param.b32 [func_retval0], %r3;
|
||||
; CHECK-NEXT: ret;
|
||||
%cmp = icmp uge i32 %a, %b
|
||||
%ret = zext i1 %cmp to i32
|
||||
ret i32 %ret
|
||||
}
|
||||
|
||||
define i32 @icmp_ult_i32(i32 %a, i32 %b) {
|
||||
; CHECK: setp.lt.u32 %p[[P0:[0-9]+]], %r{{[0-9]+}}, %r{{[0-9]+}}
|
||||
; CHECK: selp.b32 %r{{[0-9]+}}, 1, 0, %p[[P0]]
|
||||
; CHECK: ret
|
||||
; CHECK-LABEL: icmp_ult_i32(
|
||||
; CHECK: {
|
||||
; CHECK-NEXT: .reg .pred %p<2>;
|
||||
; CHECK-NEXT: .reg .b32 %r<4>;
|
||||
; CHECK-EMPTY:
|
||||
; CHECK-NEXT: // %bb.0:
|
||||
; CHECK-NEXT: ld.param.b32 %r1, [icmp_ult_i32_param_0];
|
||||
; CHECK-NEXT: ld.param.b32 %r2, [icmp_ult_i32_param_1];
|
||||
; CHECK-NEXT: setp.lt.u32 %p1, %r1, %r2;
|
||||
; CHECK-NEXT: selp.b32 %r3, 1, 0, %p1;
|
||||
; CHECK-NEXT: st.param.b32 [func_retval0], %r3;
|
||||
; CHECK-NEXT: ret;
|
||||
%cmp = icmp ult i32 %a, %b
|
||||
%ret = zext i1 %cmp to i32
|
||||
ret i32 %ret
|
||||
}
|
||||
|
||||
define i32 @icmp_ule_i32(i32 %a, i32 %b) {
|
||||
; CHECK: setp.le.u32 %p[[P0:[0-9]+]], %r{{[0-9]+}}, %r{{[0-9]+}}
|
||||
; CHECK: selp.b32 %r{{[0-9]+}}, 1, 0, %p[[P0]]
|
||||
; CHECK: ret
|
||||
; CHECK-LABEL: icmp_ule_i32(
|
||||
; CHECK: {
|
||||
; CHECK-NEXT: .reg .pred %p<2>;
|
||||
; CHECK-NEXT: .reg .b32 %r<4>;
|
||||
; CHECK-EMPTY:
|
||||
; CHECK-NEXT: // %bb.0:
|
||||
; CHECK-NEXT: ld.param.b32 %r1, [icmp_ule_i32_param_0];
|
||||
; CHECK-NEXT: ld.param.b32 %r2, [icmp_ule_i32_param_1];
|
||||
; CHECK-NEXT: setp.le.u32 %p1, %r1, %r2;
|
||||
; CHECK-NEXT: selp.b32 %r3, 1, 0, %p1;
|
||||
; CHECK-NEXT: st.param.b32 [func_retval0], %r3;
|
||||
; CHECK-NEXT: ret;
|
||||
%cmp = icmp ule i32 %a, %b
|
||||
%ret = zext i1 %cmp to i32
|
||||
ret i32 %ret
|
||||
}
|
||||
|
||||
define i32 @icmp_sgt_i32(i32 %a, i32 %b) {
|
||||
; CHECK: setp.gt.s32 %p[[P0:[0-9]+]], %r{{[0-9]+}}, %r{{[0-9]+}}
|
||||
; CHECK: selp.b32 %r{{[0-9]+}}, 1, 0, %p[[P0]]
|
||||
; CHECK: ret
|
||||
; CHECK-LABEL: icmp_sgt_i32(
|
||||
; CHECK: {
|
||||
; CHECK-NEXT: .reg .pred %p<2>;
|
||||
; CHECK-NEXT: .reg .b32 %r<4>;
|
||||
; CHECK-EMPTY:
|
||||
; CHECK-NEXT: // %bb.0:
|
||||
; CHECK-NEXT: ld.param.b32 %r1, [icmp_sgt_i32_param_0];
|
||||
; CHECK-NEXT: ld.param.b32 %r2, [icmp_sgt_i32_param_1];
|
||||
; CHECK-NEXT: setp.gt.s32 %p1, %r1, %r2;
|
||||
; CHECK-NEXT: selp.b32 %r3, 1, 0, %p1;
|
||||
; CHECK-NEXT: st.param.b32 [func_retval0], %r3;
|
||||
; CHECK-NEXT: ret;
|
||||
%cmp = icmp sgt i32 %a, %b
|
||||
%ret = zext i1 %cmp to i32
|
||||
ret i32 %ret
|
||||
}
|
||||
|
||||
define i32 @icmp_sge_i32(i32 %a, i32 %b) {
|
||||
; CHECK: setp.ge.s32 %p[[P0:[0-9]+]], %r{{[0-9]+}}, %r{{[0-9]+}}
|
||||
; CHECK: selp.b32 %r{{[0-9]+}}, 1, 0, %p[[P0]]
|
||||
; CHECK: ret
|
||||
; CHECK-LABEL: icmp_sge_i32(
|
||||
; CHECK: {
|
||||
; CHECK-NEXT: .reg .pred %p<2>;
|
||||
; CHECK-NEXT: .reg .b32 %r<4>;
|
||||
; CHECK-EMPTY:
|
||||
; CHECK-NEXT: // %bb.0:
|
||||
; CHECK-NEXT: ld.param.b32 %r1, [icmp_sge_i32_param_0];
|
||||
; CHECK-NEXT: ld.param.b32 %r2, [icmp_sge_i32_param_1];
|
||||
; CHECK-NEXT: setp.ge.s32 %p1, %r1, %r2;
|
||||
; CHECK-NEXT: selp.b32 %r3, 1, 0, %p1;
|
||||
; CHECK-NEXT: st.param.b32 [func_retval0], %r3;
|
||||
; CHECK-NEXT: ret;
|
||||
%cmp = icmp sge i32 %a, %b
|
||||
%ret = zext i1 %cmp to i32
|
||||
ret i32 %ret
|
||||
}
|
||||
|
||||
define i32 @icmp_slt_i32(i32 %a, i32 %b) {
|
||||
; CHECK: setp.lt.s32 %p[[P0:[0-9]+]], %r{{[0-9]+}}, %r{{[0-9]+}}
|
||||
; CHECK: selp.b32 %r{{[0-9]+}}, 1, 0, %p[[P0]]
|
||||
; CHECK: ret
|
||||
; CHECK-LABEL: icmp_slt_i32(
|
||||
; CHECK: {
|
||||
; CHECK-NEXT: .reg .pred %p<2>;
|
||||
; CHECK-NEXT: .reg .b32 %r<4>;
|
||||
; CHECK-EMPTY:
|
||||
; CHECK-NEXT: // %bb.0:
|
||||
; CHECK-NEXT: ld.param.b32 %r1, [icmp_slt_i32_param_0];
|
||||
; CHECK-NEXT: ld.param.b32 %r2, [icmp_slt_i32_param_1];
|
||||
; CHECK-NEXT: setp.lt.s32 %p1, %r1, %r2;
|
||||
; CHECK-NEXT: selp.b32 %r3, 1, 0, %p1;
|
||||
; CHECK-NEXT: st.param.b32 [func_retval0], %r3;
|
||||
; CHECK-NEXT: ret;
|
||||
%cmp = icmp slt i32 %a, %b
|
||||
%ret = zext i1 %cmp to i32
|
||||
ret i32 %ret
|
||||
}
|
||||
|
||||
define i32 @icmp_sle_i32(i32 %a, i32 %b) {
|
||||
; CHECK: setp.le.s32 %p[[P0:[0-9]+]], %r{{[0-9]+}}, %r{{[0-9]+}}
|
||||
; CHECK: selp.b32 %r{{[0-9]+}}, 1, 0, %p[[P0]]
|
||||
; CHECK: ret
|
||||
; CHECK-LABEL: icmp_sle_i32(
|
||||
; CHECK: {
|
||||
; CHECK-NEXT: .reg .pred %p<2>;
|
||||
; CHECK-NEXT: .reg .b32 %r<4>;
|
||||
; CHECK-EMPTY:
|
||||
; CHECK-NEXT: // %bb.0:
|
||||
; CHECK-NEXT: ld.param.b32 %r1, [icmp_sle_i32_param_0];
|
||||
; CHECK-NEXT: ld.param.b32 %r2, [icmp_sle_i32_param_1];
|
||||
; CHECK-NEXT: setp.le.s32 %p1, %r1, %r2;
|
||||
; CHECK-NEXT: selp.b32 %r3, 1, 0, %p1;
|
||||
; CHECK-NEXT: st.param.b32 [func_retval0], %r3;
|
||||
; CHECK-NEXT: ret;
|
||||
%cmp = icmp sle i32 %a, %b
|
||||
%ret = zext i1 %cmp to i32
|
||||
ret i32 %ret
|
||||
@ -196,90 +377,190 @@ define i32 @icmp_sle_i32(i32 %a, i32 %b) {
|
||||
;;; i16
|
||||
|
||||
define i16 @icmp_eq_i16(i16 %a, i16 %b) {
|
||||
; CHECK: setp.eq.b16 %p[[P0:[0-9]+]], %rs{{[0-9]+}}, %rs{{[0-9]+}}
|
||||
; CHECK: selp.b32 %r{{[0-9]+}}, 1, 0, %p[[P0]]
|
||||
; CHECK: ret
|
||||
; CHECK-LABEL: icmp_eq_i16(
|
||||
; CHECK: {
|
||||
; CHECK-NEXT: .reg .pred %p<2>;
|
||||
; CHECK-NEXT: .reg .b16 %rs<3>;
|
||||
; CHECK-NEXT: .reg .b32 %r<2>;
|
||||
; CHECK-EMPTY:
|
||||
; CHECK-NEXT: // %bb.0:
|
||||
; CHECK-NEXT: ld.param.b16 %rs1, [icmp_eq_i16_param_0];
|
||||
; CHECK-NEXT: ld.param.b16 %rs2, [icmp_eq_i16_param_1];
|
||||
; CHECK-NEXT: setp.eq.b16 %p1, %rs1, %rs2;
|
||||
; CHECK-NEXT: selp.b32 %r1, 1, 0, %p1;
|
||||
; CHECK-NEXT: st.param.b32 [func_retval0], %r1;
|
||||
; CHECK-NEXT: ret;
|
||||
%cmp = icmp eq i16 %a, %b
|
||||
%ret = zext i1 %cmp to i16
|
||||
ret i16 %ret
|
||||
}
|
||||
|
||||
define i16 @icmp_ne_i16(i16 %a, i16 %b) {
|
||||
; CHECK: setp.ne.b16 %p[[P0:[0-9]+]], %rs{{[0-9]+}}, %rs{{[0-9]+}}
|
||||
; CHECK: selp.b32 %r{{[0-9]+}}, 1, 0, %p[[P0]]
|
||||
; CHECK: ret
|
||||
; CHECK-LABEL: icmp_ne_i16(
|
||||
; CHECK: {
|
||||
; CHECK-NEXT: .reg .pred %p<2>;
|
||||
; CHECK-NEXT: .reg .b16 %rs<3>;
|
||||
; CHECK-NEXT: .reg .b32 %r<2>;
|
||||
; CHECK-EMPTY:
|
||||
; CHECK-NEXT: // %bb.0:
|
||||
; CHECK-NEXT: ld.param.b16 %rs1, [icmp_ne_i16_param_0];
|
||||
; CHECK-NEXT: ld.param.b16 %rs2, [icmp_ne_i16_param_1];
|
||||
; CHECK-NEXT: setp.ne.b16 %p1, %rs1, %rs2;
|
||||
; CHECK-NEXT: selp.b32 %r1, 1, 0, %p1;
|
||||
; CHECK-NEXT: st.param.b32 [func_retval0], %r1;
|
||||
; CHECK-NEXT: ret;
|
||||
%cmp = icmp ne i16 %a, %b
|
||||
%ret = zext i1 %cmp to i16
|
||||
ret i16 %ret
|
||||
}
|
||||
|
||||
define i16 @icmp_ugt_i16(i16 %a, i16 %b) {
|
||||
; CHECK: setp.gt.u16 %p[[P0:[0-9]+]], %rs{{[0-9]+}}, %rs{{[0-9]+}}
|
||||
; CHECK: selp.b32 %r{{[0-9]+}}, 1, 0, %p[[P0]]
|
||||
; CHECK: ret
|
||||
; CHECK-LABEL: icmp_ugt_i16(
|
||||
; CHECK: {
|
||||
; CHECK-NEXT: .reg .pred %p<2>;
|
||||
; CHECK-NEXT: .reg .b16 %rs<3>;
|
||||
; CHECK-NEXT: .reg .b32 %r<2>;
|
||||
; CHECK-EMPTY:
|
||||
; CHECK-NEXT: // %bb.0:
|
||||
; CHECK-NEXT: ld.param.b16 %rs1, [icmp_ugt_i16_param_0];
|
||||
; CHECK-NEXT: ld.param.b16 %rs2, [icmp_ugt_i16_param_1];
|
||||
; CHECK-NEXT: setp.gt.u16 %p1, %rs1, %rs2;
|
||||
; CHECK-NEXT: selp.b32 %r1, 1, 0, %p1;
|
||||
; CHECK-NEXT: st.param.b32 [func_retval0], %r1;
|
||||
; CHECK-NEXT: ret;
|
||||
%cmp = icmp ugt i16 %a, %b
|
||||
%ret = zext i1 %cmp to i16
|
||||
ret i16 %ret
|
||||
}
|
||||
|
||||
define i16 @icmp_uge_i16(i16 %a, i16 %b) {
|
||||
; CHECK: setp.ge.u16 %p[[P0:[0-9]+]], %rs{{[0-9]+}}, %rs{{[0-9]+}}
|
||||
; CHECK: selp.b32 %r{{[0-9]+}}, 1, 0, %p[[P0]]
|
||||
; CHECK: ret
|
||||
; CHECK-LABEL: icmp_uge_i16(
|
||||
; CHECK: {
|
||||
; CHECK-NEXT: .reg .pred %p<2>;
|
||||
; CHECK-NEXT: .reg .b16 %rs<3>;
|
||||
; CHECK-NEXT: .reg .b32 %r<2>;
|
||||
; CHECK-EMPTY:
|
||||
; CHECK-NEXT: // %bb.0:
|
||||
; CHECK-NEXT: ld.param.b16 %rs1, [icmp_uge_i16_param_0];
|
||||
; CHECK-NEXT: ld.param.b16 %rs2, [icmp_uge_i16_param_1];
|
||||
; CHECK-NEXT: setp.ge.u16 %p1, %rs1, %rs2;
|
||||
; CHECK-NEXT: selp.b32 %r1, 1, 0, %p1;
|
||||
; CHECK-NEXT: st.param.b32 [func_retval0], %r1;
|
||||
; CHECK-NEXT: ret;
|
||||
%cmp = icmp uge i16 %a, %b
|
||||
%ret = zext i1 %cmp to i16
|
||||
ret i16 %ret
|
||||
}
|
||||
|
||||
define i16 @icmp_ult_i16(i16 %a, i16 %b) {
|
||||
; CHECK: setp.lt.u16 %p[[P0:[0-9]+]], %rs{{[0-9]+}}, %rs{{[0-9]+}}
|
||||
; CHECK: selp.b32 %r{{[0-9]+}}, 1, 0, %p[[P0]]
|
||||
; CHECK: ret
|
||||
; CHECK-LABEL: icmp_ult_i16(
|
||||
; CHECK: {
|
||||
; CHECK-NEXT: .reg .pred %p<2>;
|
||||
; CHECK-NEXT: .reg .b16 %rs<3>;
|
||||
; CHECK-NEXT: .reg .b32 %r<2>;
|
||||
; CHECK-EMPTY:
|
||||
; CHECK-NEXT: // %bb.0:
|
||||
; CHECK-NEXT: ld.param.b16 %rs1, [icmp_ult_i16_param_0];
|
||||
; CHECK-NEXT: ld.param.b16 %rs2, [icmp_ult_i16_param_1];
|
||||
; CHECK-NEXT: setp.lt.u16 %p1, %rs1, %rs2;
|
||||
; CHECK-NEXT: selp.b32 %r1, 1, 0, %p1;
|
||||
; CHECK-NEXT: st.param.b32 [func_retval0], %r1;
|
||||
; CHECK-NEXT: ret;
|
||||
%cmp = icmp ult i16 %a, %b
|
||||
%ret = zext i1 %cmp to i16
|
||||
ret i16 %ret
|
||||
}
|
||||
|
||||
define i16 @icmp_ule_i16(i16 %a, i16 %b) {
|
||||
; CHECK: setp.le.u16 %p[[P0:[0-9]+]], %rs{{[0-9]+}}, %rs{{[0-9]+}}
|
||||
; CHECK: selp.b32 %r{{[0-9]+}}, 1, 0, %p[[P0]]
|
||||
; CHECK: ret
|
||||
; CHECK-LABEL: icmp_ule_i16(
|
||||
; CHECK: {
|
||||
; CHECK-NEXT: .reg .pred %p<2>;
|
||||
; CHECK-NEXT: .reg .b16 %rs<3>;
|
||||
; CHECK-NEXT: .reg .b32 %r<2>;
|
||||
; CHECK-EMPTY:
|
||||
; CHECK-NEXT: // %bb.0:
|
||||
; CHECK-NEXT: ld.param.b16 %rs1, [icmp_ule_i16_param_0];
|
||||
; CHECK-NEXT: ld.param.b16 %rs2, [icmp_ule_i16_param_1];
|
||||
; CHECK-NEXT: setp.le.u16 %p1, %rs1, %rs2;
|
||||
; CHECK-NEXT: selp.b32 %r1, 1, 0, %p1;
|
||||
; CHECK-NEXT: st.param.b32 [func_retval0], %r1;
|
||||
; CHECK-NEXT: ret;
|
||||
%cmp = icmp ule i16 %a, %b
|
||||
%ret = zext i1 %cmp to i16
|
||||
ret i16 %ret
|
||||
}
|
||||
|
||||
define i16 @icmp_sgt_i16(i16 %a, i16 %b) {
|
||||
; CHECK: setp.gt.s16 %p[[P0:[0-9]+]], %rs{{[0-9]+}}, %rs{{[0-9]+}}
|
||||
; CHECK: selp.b32 %r{{[0-9]+}}, 1, 0, %p[[P0]]
|
||||
; CHECK: ret
|
||||
; CHECK-LABEL: icmp_sgt_i16(
|
||||
; CHECK: {
|
||||
; CHECK-NEXT: .reg .pred %p<2>;
|
||||
; CHECK-NEXT: .reg .b16 %rs<3>;
|
||||
; CHECK-NEXT: .reg .b32 %r<2>;
|
||||
; CHECK-EMPTY:
|
||||
; CHECK-NEXT: // %bb.0:
|
||||
; CHECK-NEXT: ld.param.b16 %rs1, [icmp_sgt_i16_param_0];
|
||||
; CHECK-NEXT: ld.param.b16 %rs2, [icmp_sgt_i16_param_1];
|
||||
; CHECK-NEXT: setp.gt.s16 %p1, %rs1, %rs2;
|
||||
; CHECK-NEXT: selp.b32 %r1, 1, 0, %p1;
|
||||
; CHECK-NEXT: st.param.b32 [func_retval0], %r1;
|
||||
; CHECK-NEXT: ret;
|
||||
%cmp = icmp sgt i16 %a, %b
|
||||
%ret = zext i1 %cmp to i16
|
||||
ret i16 %ret
|
||||
}
|
||||
|
||||
define i16 @icmp_sge_i16(i16 %a, i16 %b) {
|
||||
; CHECK: setp.ge.s16 %p[[P0:[0-9]+]], %rs{{[0-9]+}}, %rs{{[0-9]+}}
|
||||
; CHECK: selp.b32 %r{{[0-9]+}}, 1, 0, %p[[P0]]
|
||||
; CHECK: ret
|
||||
; CHECK-LABEL: icmp_sge_i16(
|
||||
; CHECK: {
|
||||
; CHECK-NEXT: .reg .pred %p<2>;
|
||||
; CHECK-NEXT: .reg .b16 %rs<3>;
|
||||
; CHECK-NEXT: .reg .b32 %r<2>;
|
||||
; CHECK-EMPTY:
|
||||
; CHECK-NEXT: // %bb.0:
|
||||
; CHECK-NEXT: ld.param.b16 %rs1, [icmp_sge_i16_param_0];
|
||||
; CHECK-NEXT: ld.param.b16 %rs2, [icmp_sge_i16_param_1];
|
||||
; CHECK-NEXT: setp.ge.s16 %p1, %rs1, %rs2;
|
||||
; CHECK-NEXT: selp.b32 %r1, 1, 0, %p1;
|
||||
; CHECK-NEXT: st.param.b32 [func_retval0], %r1;
|
||||
; CHECK-NEXT: ret;
|
||||
%cmp = icmp sge i16 %a, %b
|
||||
%ret = zext i1 %cmp to i16
|
||||
ret i16 %ret
|
||||
}
|
||||
|
||||
define i16 @icmp_slt_i16(i16 %a, i16 %b) {
|
||||
; CHECK: setp.lt.s16 %p[[P0:[0-9]+]], %rs{{[0-9]+}}, %rs{{[0-9]+}}
|
||||
; CHECK: selp.b32 %r{{[0-9]+}}, 1, 0, %p[[P0]]
|
||||
; CHECK: ret
|
||||
; CHECK-LABEL: icmp_slt_i16(
|
||||
; CHECK: {
|
||||
; CHECK-NEXT: .reg .pred %p<2>;
|
||||
; CHECK-NEXT: .reg .b16 %rs<3>;
|
||||
; CHECK-NEXT: .reg .b32 %r<2>;
|
||||
; CHECK-EMPTY:
|
||||
; CHECK-NEXT: // %bb.0:
|
||||
; CHECK-NEXT: ld.param.b16 %rs1, [icmp_slt_i16_param_0];
|
||||
; CHECK-NEXT: ld.param.b16 %rs2, [icmp_slt_i16_param_1];
|
||||
; CHECK-NEXT: setp.lt.s16 %p1, %rs1, %rs2;
|
||||
; CHECK-NEXT: selp.b32 %r1, 1, 0, %p1;
|
||||
; CHECK-NEXT: st.param.b32 [func_retval0], %r1;
|
||||
; CHECK-NEXT: ret;
|
||||
%cmp = icmp slt i16 %a, %b
|
||||
%ret = zext i1 %cmp to i16
|
||||
ret i16 %ret
|
||||
}
|
||||
|
||||
define i16 @icmp_sle_i16(i16 %a, i16 %b) {
|
||||
; CHECK: setp.le.s16 %p[[P0:[0-9]+]], %rs{{[0-9]+}}, %rs{{[0-9]+}}
|
||||
; CHECK: selp.b32 %r{{[0-9]+}}, 1, 0, %p[[P0]]
|
||||
; CHECK: ret
|
||||
; CHECK-LABEL: icmp_sle_i16(
|
||||
; CHECK: {
|
||||
; CHECK-NEXT: .reg .pred %p<2>;
|
||||
; CHECK-NEXT: .reg .b16 %rs<3>;
|
||||
; CHECK-NEXT: .reg .b32 %r<2>;
|
||||
; CHECK-EMPTY:
|
||||
; CHECK-NEXT: // %bb.0:
|
||||
; CHECK-NEXT: ld.param.b16 %rs1, [icmp_sle_i16_param_0];
|
||||
; CHECK-NEXT: ld.param.b16 %rs2, [icmp_sle_i16_param_1];
|
||||
; CHECK-NEXT: setp.le.s16 %p1, %rs1, %rs2;
|
||||
; CHECK-NEXT: selp.b32 %r1, 1, 0, %p1;
|
||||
; CHECK-NEXT: st.param.b32 [func_retval0], %r1;
|
||||
; CHECK-NEXT: ret;
|
||||
%cmp = icmp sle i16 %a, %b
|
||||
%ret = zext i1 %cmp to i16
|
||||
ret i16 %ret
|
||||
@ -290,9 +571,19 @@ define i16 @icmp_sle_i16(i16 %a, i16 %b) {
|
||||
|
||||
define i8 @icmp_eq_i8(i8 %a, i8 %b) {
|
||||
; Comparison happens in 16-bit
|
||||
; CHECK: setp.eq.b16 %p[[P0:[0-9]+]], %rs{{[0-9]+}}, %rs{{[0-9]+}}
|
||||
; CHECK: selp.b32 %r{{[0-9]+}}, 1, 0, %p[[P0]]
|
||||
; CHECK: ret
|
||||
; CHECK-LABEL: icmp_eq_i8(
|
||||
; CHECK: {
|
||||
; CHECK-NEXT: .reg .pred %p<2>;
|
||||
; CHECK-NEXT: .reg .b16 %rs<3>;
|
||||
; CHECK-NEXT: .reg .b32 %r<2>;
|
||||
; CHECK-EMPTY:
|
||||
; CHECK-NEXT: // %bb.0:
|
||||
; CHECK-NEXT: ld.param.b8 %rs1, [icmp_eq_i8_param_0];
|
||||
; CHECK-NEXT: ld.param.b8 %rs2, [icmp_eq_i8_param_1];
|
||||
; CHECK-NEXT: setp.eq.b16 %p1, %rs1, %rs2;
|
||||
; CHECK-NEXT: selp.b32 %r1, 1, 0, %p1;
|
||||
; CHECK-NEXT: st.param.b32 [func_retval0], %r1;
|
||||
; CHECK-NEXT: ret;
|
||||
%cmp = icmp eq i8 %a, %b
|
||||
%ret = zext i1 %cmp to i8
|
||||
ret i8 %ret
|
||||
@ -300,9 +591,19 @@ define i8 @icmp_eq_i8(i8 %a, i8 %b) {
|
||||
|
||||
define i8 @icmp_ne_i8(i8 %a, i8 %b) {
|
||||
; Comparison happens in 16-bit
|
||||
; CHECK: setp.ne.b16 %p[[P0:[0-9]+]], %rs{{[0-9]+}}, %rs{{[0-9]+}}
|
||||
; CHECK: selp.b32 %r{{[0-9]+}}, 1, 0, %p[[P0]]
|
||||
; CHECK: ret
|
||||
; CHECK-LABEL: icmp_ne_i8(
|
||||
; CHECK: {
|
||||
; CHECK-NEXT: .reg .pred %p<2>;
|
||||
; CHECK-NEXT: .reg .b16 %rs<3>;
|
||||
; CHECK-NEXT: .reg .b32 %r<2>;
|
||||
; CHECK-EMPTY:
|
||||
; CHECK-NEXT: // %bb.0:
|
||||
; CHECK-NEXT: ld.param.b8 %rs1, [icmp_ne_i8_param_0];
|
||||
; CHECK-NEXT: ld.param.b8 %rs2, [icmp_ne_i8_param_1];
|
||||
; CHECK-NEXT: setp.ne.b16 %p1, %rs1, %rs2;
|
||||
; CHECK-NEXT: selp.b32 %r1, 1, 0, %p1;
|
||||
; CHECK-NEXT: st.param.b32 [func_retval0], %r1;
|
||||
; CHECK-NEXT: ret;
|
||||
%cmp = icmp ne i8 %a, %b
|
||||
%ret = zext i1 %cmp to i8
|
||||
ret i8 %ret
|
||||
@ -310,9 +611,19 @@ define i8 @icmp_ne_i8(i8 %a, i8 %b) {
|
||||
|
||||
define i8 @icmp_ugt_i8(i8 %a, i8 %b) {
|
||||
; Comparison happens in 16-bit
|
||||
; CHECK: setp.gt.u16 %p[[P0:[0-9]+]], %rs{{[0-9]+}}, %rs{{[0-9]+}}
|
||||
; CHECK: selp.b32 %r{{[0-9]+}}, 1, 0, %p[[P0]]
|
||||
; CHECK: ret
|
||||
; CHECK-LABEL: icmp_ugt_i8(
|
||||
; CHECK: {
|
||||
; CHECK-NEXT: .reg .pred %p<2>;
|
||||
; CHECK-NEXT: .reg .b16 %rs<3>;
|
||||
; CHECK-NEXT: .reg .b32 %r<2>;
|
||||
; CHECK-EMPTY:
|
||||
; CHECK-NEXT: // %bb.0:
|
||||
; CHECK-NEXT: ld.param.b8 %rs1, [icmp_ugt_i8_param_0];
|
||||
; CHECK-NEXT: ld.param.b8 %rs2, [icmp_ugt_i8_param_1];
|
||||
; CHECK-NEXT: setp.gt.u16 %p1, %rs1, %rs2;
|
||||
; CHECK-NEXT: selp.b32 %r1, 1, 0, %p1;
|
||||
; CHECK-NEXT: st.param.b32 [func_retval0], %r1;
|
||||
; CHECK-NEXT: ret;
|
||||
%cmp = icmp ugt i8 %a, %b
|
||||
%ret = zext i1 %cmp to i8
|
||||
ret i8 %ret
|
||||
@ -320,9 +631,19 @@ define i8 @icmp_ugt_i8(i8 %a, i8 %b) {
|
||||
|
||||
define i8 @icmp_uge_i8(i8 %a, i8 %b) {
|
||||
; Comparison happens in 16-bit
|
||||
; CHECK: setp.ge.u16 %p[[P0:[0-9]+]], %rs{{[0-9]+}}, %rs{{[0-9]+}}
|
||||
; CHECK: selp.b32 %r{{[0-9]+}}, 1, 0, %p[[P0]]
|
||||
; CHECK: ret
|
||||
; CHECK-LABEL: icmp_uge_i8(
|
||||
; CHECK: {
|
||||
; CHECK-NEXT: .reg .pred %p<2>;
|
||||
; CHECK-NEXT: .reg .b16 %rs<3>;
|
||||
; CHECK-NEXT: .reg .b32 %r<2>;
|
||||
; CHECK-EMPTY:
|
||||
; CHECK-NEXT: // %bb.0:
|
||||
; CHECK-NEXT: ld.param.b8 %rs1, [icmp_uge_i8_param_0];
|
||||
; CHECK-NEXT: ld.param.b8 %rs2, [icmp_uge_i8_param_1];
|
||||
; CHECK-NEXT: setp.ge.u16 %p1, %rs1, %rs2;
|
||||
; CHECK-NEXT: selp.b32 %r1, 1, 0, %p1;
|
||||
; CHECK-NEXT: st.param.b32 [func_retval0], %r1;
|
||||
; CHECK-NEXT: ret;
|
||||
%cmp = icmp uge i8 %a, %b
|
||||
%ret = zext i1 %cmp to i8
|
||||
ret i8 %ret
|
||||
@ -330,9 +651,19 @@ define i8 @icmp_uge_i8(i8 %a, i8 %b) {
|
||||
|
||||
define i8 @icmp_ult_i8(i8 %a, i8 %b) {
|
||||
; Comparison happens in 16-bit
|
||||
; CHECK: setp.lt.u16 %p[[P0:[0-9]+]], %rs{{[0-9]+}}, %rs{{[0-9]+}}
|
||||
; CHECK: selp.b32 %r{{[0-9]+}}, 1, 0, %p[[P0]]
|
||||
; CHECK: ret
|
||||
; CHECK-LABEL: icmp_ult_i8(
|
||||
; CHECK: {
|
||||
; CHECK-NEXT: .reg .pred %p<2>;
|
||||
; CHECK-NEXT: .reg .b16 %rs<3>;
|
||||
; CHECK-NEXT: .reg .b32 %r<2>;
|
||||
; CHECK-EMPTY:
|
||||
; CHECK-NEXT: // %bb.0:
|
||||
; CHECK-NEXT: ld.param.b8 %rs1, [icmp_ult_i8_param_0];
|
||||
; CHECK-NEXT: ld.param.b8 %rs2, [icmp_ult_i8_param_1];
|
||||
; CHECK-NEXT: setp.lt.u16 %p1, %rs1, %rs2;
|
||||
; CHECK-NEXT: selp.b32 %r1, 1, 0, %p1;
|
||||
; CHECK-NEXT: st.param.b32 [func_retval0], %r1;
|
||||
; CHECK-NEXT: ret;
|
||||
%cmp = icmp ult i8 %a, %b
|
||||
%ret = zext i1 %cmp to i8
|
||||
ret i8 %ret
|
||||
@ -340,9 +671,19 @@ define i8 @icmp_ult_i8(i8 %a, i8 %b) {
|
||||
|
||||
define i8 @icmp_ule_i8(i8 %a, i8 %b) {
|
||||
; Comparison happens in 16-bit
|
||||
; CHECK: setp.le.u16 %p[[P0:[0-9]+]], %rs{{[0-9]+}}, %rs{{[0-9]+}}
|
||||
; CHECK: selp.b32 %r{{[0-9]+}}, 1, 0, %p[[P0]]
|
||||
; CHECK: ret
|
||||
; CHECK-LABEL: icmp_ule_i8(
|
||||
; CHECK: {
|
||||
; CHECK-NEXT: .reg .pred %p<2>;
|
||||
; CHECK-NEXT: .reg .b16 %rs<3>;
|
||||
; CHECK-NEXT: .reg .b32 %r<2>;
|
||||
; CHECK-EMPTY:
|
||||
; CHECK-NEXT: // %bb.0:
|
||||
; CHECK-NEXT: ld.param.b8 %rs1, [icmp_ule_i8_param_0];
|
||||
; CHECK-NEXT: ld.param.b8 %rs2, [icmp_ule_i8_param_1];
|
||||
; CHECK-NEXT: setp.le.u16 %p1, %rs1, %rs2;
|
||||
; CHECK-NEXT: selp.b32 %r1, 1, 0, %p1;
|
||||
; CHECK-NEXT: st.param.b32 [func_retval0], %r1;
|
||||
; CHECK-NEXT: ret;
|
||||
%cmp = icmp ule i8 %a, %b
|
||||
%ret = zext i1 %cmp to i8
|
||||
ret i8 %ret
|
||||
@ -350,9 +691,19 @@ define i8 @icmp_ule_i8(i8 %a, i8 %b) {
|
||||
|
||||
define i8 @icmp_sgt_i8(i8 %a, i8 %b) {
|
||||
; Comparison happens in 16-bit
|
||||
; CHECK: setp.gt.s16 %p[[P0:[0-9]+]], %rs{{[0-9]+}}, %rs{{[0-9]+}}
|
||||
; CHECK: selp.b32 %r{{[0-9]+}}, 1, 0, %p[[P0]]
|
||||
; CHECK: ret
|
||||
; CHECK-LABEL: icmp_sgt_i8(
|
||||
; CHECK: {
|
||||
; CHECK-NEXT: .reg .pred %p<2>;
|
||||
; CHECK-NEXT: .reg .b16 %rs<3>;
|
||||
; CHECK-NEXT: .reg .b32 %r<2>;
|
||||
; CHECK-EMPTY:
|
||||
; CHECK-NEXT: // %bb.0:
|
||||
; CHECK-NEXT: ld.param.s8 %rs1, [icmp_sgt_i8_param_0];
|
||||
; CHECK-NEXT: ld.param.s8 %rs2, [icmp_sgt_i8_param_1];
|
||||
; CHECK-NEXT: setp.gt.s16 %p1, %rs1, %rs2;
|
||||
; CHECK-NEXT: selp.b32 %r1, 1, 0, %p1;
|
||||
; CHECK-NEXT: st.param.b32 [func_retval0], %r1;
|
||||
; CHECK-NEXT: ret;
|
||||
%cmp = icmp sgt i8 %a, %b
|
||||
%ret = zext i1 %cmp to i8
|
||||
ret i8 %ret
|
||||
@ -360,9 +711,19 @@ define i8 @icmp_sgt_i8(i8 %a, i8 %b) {
|
||||
|
||||
define i8 @icmp_sge_i8(i8 %a, i8 %b) {
|
||||
; Comparison happens in 16-bit
|
||||
; CHECK: setp.ge.s16 %p[[P0:[0-9]+]], %rs{{[0-9]+}}, %rs{{[0-9]+}}
|
||||
; CHECK: selp.b32 %r{{[0-9]+}}, 1, 0, %p[[P0]]
|
||||
; CHECK: ret
|
||||
; CHECK-LABEL: icmp_sge_i8(
|
||||
; CHECK: {
|
||||
; CHECK-NEXT: .reg .pred %p<2>;
|
||||
; CHECK-NEXT: .reg .b16 %rs<3>;
|
||||
; CHECK-NEXT: .reg .b32 %r<2>;
|
||||
; CHECK-EMPTY:
|
||||
; CHECK-NEXT: // %bb.0:
|
||||
; CHECK-NEXT: ld.param.s8 %rs1, [icmp_sge_i8_param_0];
|
||||
; CHECK-NEXT: ld.param.s8 %rs2, [icmp_sge_i8_param_1];
|
||||
; CHECK-NEXT: setp.ge.s16 %p1, %rs1, %rs2;
|
||||
; CHECK-NEXT: selp.b32 %r1, 1, 0, %p1;
|
||||
; CHECK-NEXT: st.param.b32 [func_retval0], %r1;
|
||||
; CHECK-NEXT: ret;
|
||||
%cmp = icmp sge i8 %a, %b
|
||||
%ret = zext i1 %cmp to i8
|
||||
ret i8 %ret
|
||||
@ -370,9 +731,19 @@ define i8 @icmp_sge_i8(i8 %a, i8 %b) {
|
||||
|
||||
define i8 @icmp_slt_i8(i8 %a, i8 %b) {
|
||||
; Comparison happens in 16-bit
|
||||
; CHECK: setp.lt.s16 %p[[P0:[0-9]+]], %rs{{[0-9]+}}, %rs{{[0-9]+}}
|
||||
; CHECK: selp.b32 %r{{[0-9]+}}, 1, 0, %p[[P0]]
|
||||
; CHECK: ret
|
||||
; CHECK-LABEL: icmp_slt_i8(
|
||||
; CHECK: {
|
||||
; CHECK-NEXT: .reg .pred %p<2>;
|
||||
; CHECK-NEXT: .reg .b16 %rs<3>;
|
||||
; CHECK-NEXT: .reg .b32 %r<2>;
|
||||
; CHECK-EMPTY:
|
||||
; CHECK-NEXT: // %bb.0:
|
||||
; CHECK-NEXT: ld.param.s8 %rs1, [icmp_slt_i8_param_0];
|
||||
; CHECK-NEXT: ld.param.s8 %rs2, [icmp_slt_i8_param_1];
|
||||
; CHECK-NEXT: setp.lt.s16 %p1, %rs1, %rs2;
|
||||
; CHECK-NEXT: selp.b32 %r1, 1, 0, %p1;
|
||||
; CHECK-NEXT: st.param.b32 [func_retval0], %r1;
|
||||
; CHECK-NEXT: ret;
|
||||
%cmp = icmp slt i8 %a, %b
|
||||
%ret = zext i1 %cmp to i8
|
||||
ret i8 %ret
|
||||
@ -380,9 +751,19 @@ define i8 @icmp_slt_i8(i8 %a, i8 %b) {
|
||||
|
||||
define i8 @icmp_sle_i8(i8 %a, i8 %b) {
|
||||
; Comparison happens in 16-bit
|
||||
; CHECK: setp.le.s16 %p[[P0:[0-9]+]], %rs{{[0-9]+}}, %rs{{[0-9]+}}
|
||||
; CHECK: selp.b32 %r{{[0-9]+}}, 1, 0, %p[[P0]]
|
||||
; CHECK: ret
|
||||
; CHECK-LABEL: icmp_sle_i8(
|
||||
; CHECK: {
|
||||
; CHECK-NEXT: .reg .pred %p<2>;
|
||||
; CHECK-NEXT: .reg .b16 %rs<3>;
|
||||
; CHECK-NEXT: .reg .b32 %r<2>;
|
||||
; CHECK-EMPTY:
|
||||
; CHECK-NEXT: // %bb.0:
|
||||
; CHECK-NEXT: ld.param.s8 %rs1, [icmp_sle_i8_param_0];
|
||||
; CHECK-NEXT: ld.param.s8 %rs2, [icmp_sle_i8_param_1];
|
||||
; CHECK-NEXT: setp.le.s16 %p1, %rs1, %rs2;
|
||||
; CHECK-NEXT: selp.b32 %r1, 1, 0, %p1;
|
||||
; CHECK-NEXT: st.param.b32 [func_retval0], %r1;
|
||||
; CHECK-NEXT: ret;
|
||||
%cmp = icmp sle i8 %a, %b
|
||||
%ret = zext i1 %cmp to i8
|
||||
ret i8 %ret
|
||||
|
@ -1,3 +1,4 @@
|
||||
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
|
||||
; RUN: llc < %s -march=nvptx64 -mcpu=sm_90 | FileCheck %s
|
||||
; RUN: %if ptxas %{ llc < %s -march=nvptx64 -mcpu=sm_90 | %ptxas-verify -arch=sm_90 %}
|
||||
|
||||
@ -7,52 +8,203 @@ declare i64 @callee_variadic(ptr %p, ...);
|
||||
|
||||
define %struct.64 @test_return_type_mismatch(ptr %p) {
|
||||
; CHECK-LABEL: test_return_type_mismatch(
|
||||
; CHECK: .param .align 1 .b8 retval0[8];
|
||||
; CHECK: {
|
||||
; CHECK-NEXT: .reg .b64 %rd<40>;
|
||||
; CHECK-EMPTY:
|
||||
; CHECK-NEXT: // %bb.0:
|
||||
; CHECK-NEXT: ld.param.b64 %rd2, [test_return_type_mismatch_param_0];
|
||||
; CHECK-NEXT: { // callseq 0, 0
|
||||
; CHECK-NEXT: .param .b64 param0;
|
||||
; CHECK-NEXT: .param .align 1 .b8 retval0[8];
|
||||
; CHECK-NEXT: st.param.b64 [param0], %rd2;
|
||||
; CHECK-NEXT: prototype_0 : .callprototype (.param .align 1 .b8 _[8]) _ (.param .b64 _);
|
||||
; CHECK-NEXT: call (retval0), %rd{{[0-9]+}}, (param0), prototype_0;
|
||||
; CHECK-NEXT: mov.b64 %rd1, callee;
|
||||
; CHECK-NEXT: call (retval0), %rd1, (param0), prototype_0;
|
||||
; CHECK-NEXT: ld.param.b8 %rd3, [retval0+7];
|
||||
; CHECK-NEXT: ld.param.b8 %rd4, [retval0+6];
|
||||
; CHECK-NEXT: ld.param.b8 %rd5, [retval0+5];
|
||||
; CHECK-NEXT: ld.param.b8 %rd6, [retval0+4];
|
||||
; CHECK-NEXT: ld.param.b8 %rd7, [retval0+3];
|
||||
; CHECK-NEXT: ld.param.b8 %rd8, [retval0+2];
|
||||
; CHECK-NEXT: ld.param.b8 %rd9, [retval0+1];
|
||||
; CHECK-NEXT: ld.param.b8 %rd10, [retval0];
|
||||
; CHECK-NEXT: } // callseq 0
|
||||
; CHECK-NEXT: shl.b64 %rd13, %rd9, 8;
|
||||
; CHECK-NEXT: or.b64 %rd14, %rd13, %rd10;
|
||||
; CHECK-NEXT: shl.b64 %rd16, %rd8, 16;
|
||||
; CHECK-NEXT: shl.b64 %rd18, %rd7, 24;
|
||||
; CHECK-NEXT: or.b64 %rd19, %rd18, %rd16;
|
||||
; CHECK-NEXT: or.b64 %rd20, %rd19, %rd14;
|
||||
; CHECK-NEXT: shl.b64 %rd23, %rd5, 8;
|
||||
; CHECK-NEXT: or.b64 %rd24, %rd23, %rd6;
|
||||
; CHECK-NEXT: shl.b64 %rd26, %rd4, 16;
|
||||
; CHECK-NEXT: shl.b64 %rd28, %rd3, 24;
|
||||
; CHECK-NEXT: or.b64 %rd29, %rd28, %rd26;
|
||||
; CHECK-NEXT: or.b64 %rd30, %rd29, %rd24;
|
||||
; CHECK-NEXT: shl.b64 %rd31, %rd30, 32;
|
||||
; CHECK-NEXT: or.b64 %rd32, %rd31, %rd20;
|
||||
; CHECK-NEXT: st.param.b8 [func_retval0], %rd10;
|
||||
; CHECK-NEXT: shr.u64 %rd33, %rd32, 56;
|
||||
; CHECK-NEXT: st.param.b8 [func_retval0+7], %rd33;
|
||||
; CHECK-NEXT: shr.u64 %rd34, %rd32, 48;
|
||||
; CHECK-NEXT: st.param.b8 [func_retval0+6], %rd34;
|
||||
; CHECK-NEXT: shr.u64 %rd35, %rd32, 40;
|
||||
; CHECK-NEXT: st.param.b8 [func_retval0+5], %rd35;
|
||||
; CHECK-NEXT: shr.u64 %rd36, %rd32, 32;
|
||||
; CHECK-NEXT: st.param.b8 [func_retval0+4], %rd36;
|
||||
; CHECK-NEXT: shr.u64 %rd37, %rd32, 24;
|
||||
; CHECK-NEXT: st.param.b8 [func_retval0+3], %rd37;
|
||||
; CHECK-NEXT: shr.u64 %rd38, %rd32, 16;
|
||||
; CHECK-NEXT: st.param.b8 [func_retval0+2], %rd38;
|
||||
; CHECK-NEXT: shr.u64 %rd39, %rd32, 8;
|
||||
; CHECK-NEXT: st.param.b8 [func_retval0+1], %rd39;
|
||||
; CHECK-NEXT: ret;
|
||||
%ret = call %struct.64 @callee(ptr %p)
|
||||
ret %struct.64 %ret
|
||||
}
|
||||
|
||||
define i64 @test_param_type_mismatch(ptr %p) {
|
||||
; CHECK-LABEL: test_param_type_mismatch(
|
||||
; CHECK: .param .b64 retval0;
|
||||
; CHECK: {
|
||||
; CHECK-NEXT: .reg .b64 %rd<4>;
|
||||
; CHECK-EMPTY:
|
||||
; CHECK-NEXT: // %bb.0:
|
||||
; CHECK-NEXT: { // callseq 1, 0
|
||||
; CHECK-NEXT: .param .b64 param0;
|
||||
; CHECK-NEXT: .param .b64 retval0;
|
||||
; CHECK-NEXT: prototype_1 : .callprototype (.param .b64 _) _ (.param .b64 _);
|
||||
; CHECK-NEXT: call (retval0), %rd{{[0-9]+}}, (param0), prototype_1;
|
||||
; CHECK-NEXT: st.param.b64 [param0], 7;
|
||||
; CHECK-NEXT: mov.b64 %rd1, callee;
|
||||
; CHECK-NEXT: call (retval0), %rd1, (param0), prototype_1;
|
||||
; CHECK-NEXT: ld.param.b64 %rd2, [retval0];
|
||||
; CHECK-NEXT: } // callseq 1
|
||||
; CHECK-NEXT: st.param.b64 [func_retval0], %rd2;
|
||||
; CHECK-NEXT: ret;
|
||||
%ret = call i64 @callee(i64 7)
|
||||
ret i64 %ret
|
||||
}
|
||||
|
||||
define i64 @test_param_count_mismatch(ptr %p) {
|
||||
; CHECK-LABEL: test_param_count_mismatch(
|
||||
; CHECK: .param .b64 retval0;
|
||||
; CHECK: {
|
||||
; CHECK-NEXT: .reg .b64 %rd<5>;
|
||||
; CHECK-EMPTY:
|
||||
; CHECK-NEXT: // %bb.0:
|
||||
; CHECK-NEXT: ld.param.b64 %rd2, [test_param_count_mismatch_param_0];
|
||||
; CHECK-NEXT: { // callseq 2, 0
|
||||
; CHECK-NEXT: .param .b64 param0;
|
||||
; CHECK-NEXT: .param .b64 param1;
|
||||
; CHECK-NEXT: .param .b64 retval0;
|
||||
; CHECK-NEXT: st.param.b64 [param0], %rd2;
|
||||
; CHECK-NEXT: prototype_2 : .callprototype (.param .b64 _) _ (.param .b64 _, .param .b64 _);
|
||||
; CHECK-NEXT: call (retval0), %rd{{[0-9]+}}, (param0, param1), prototype_2;
|
||||
; CHECK-NEXT: st.param.b64 [param1], 7;
|
||||
; CHECK-NEXT: mov.b64 %rd1, callee;
|
||||
; CHECK-NEXT: call (retval0), %rd1, (param0, param1), prototype_2;
|
||||
; CHECK-NEXT: ld.param.b64 %rd3, [retval0];
|
||||
; CHECK-NEXT: } // callseq 2
|
||||
; CHECK-NEXT: st.param.b64 [func_retval0], %rd3;
|
||||
; CHECK-NEXT: ret;
|
||||
%ret = call i64 @callee(ptr %p, i64 7)
|
||||
ret i64 %ret
|
||||
}
|
||||
|
||||
define %struct.64 @test_return_type_mismatch_variadic(ptr %p) {
|
||||
; CHECK-LABEL: test_return_type_mismatch_variadic(
|
||||
; CHECK: .param .align 1 .b8 retval0[8];
|
||||
; CHECK: {
|
||||
; CHECK-NEXT: .reg .b64 %rd<40>;
|
||||
; CHECK-EMPTY:
|
||||
; CHECK-NEXT: // %bb.0:
|
||||
; CHECK-NEXT: ld.param.b64 %rd2, [test_return_type_mismatch_variadic_param_0];
|
||||
; CHECK-NEXT: { // callseq 3, 0
|
||||
; CHECK-NEXT: .param .b64 param0;
|
||||
; CHECK-NEXT: .param .align 1 .b8 retval0[8];
|
||||
; CHECK-NEXT: st.param.b64 [param0], %rd2;
|
||||
; CHECK-NEXT: prototype_3 : .callprototype (.param .align 1 .b8 _[8]) _ (.param .b64 _);
|
||||
; CHECK-NEXT: call (retval0), %rd{{[0-9]+}}, (param0), prototype_3;
|
||||
; CHECK-NEXT: mov.b64 %rd1, callee_variadic;
|
||||
; CHECK-NEXT: call (retval0), %rd1, (param0), prototype_3;
|
||||
; CHECK-NEXT: ld.param.b8 %rd3, [retval0+7];
|
||||
; CHECK-NEXT: ld.param.b8 %rd4, [retval0+6];
|
||||
; CHECK-NEXT: ld.param.b8 %rd5, [retval0+5];
|
||||
; CHECK-NEXT: ld.param.b8 %rd6, [retval0+4];
|
||||
; CHECK-NEXT: ld.param.b8 %rd7, [retval0+3];
|
||||
; CHECK-NEXT: ld.param.b8 %rd8, [retval0+2];
|
||||
; CHECK-NEXT: ld.param.b8 %rd9, [retval0+1];
|
||||
; CHECK-NEXT: ld.param.b8 %rd10, [retval0];
|
||||
; CHECK-NEXT: } // callseq 3
|
||||
; CHECK-NEXT: shl.b64 %rd13, %rd9, 8;
|
||||
; CHECK-NEXT: or.b64 %rd14, %rd13, %rd10;
|
||||
; CHECK-NEXT: shl.b64 %rd16, %rd8, 16;
|
||||
; CHECK-NEXT: shl.b64 %rd18, %rd7, 24;
|
||||
; CHECK-NEXT: or.b64 %rd19, %rd18, %rd16;
|
||||
; CHECK-NEXT: or.b64 %rd20, %rd19, %rd14;
|
||||
; CHECK-NEXT: shl.b64 %rd23, %rd5, 8;
|
||||
; CHECK-NEXT: or.b64 %rd24, %rd23, %rd6;
|
||||
; CHECK-NEXT: shl.b64 %rd26, %rd4, 16;
|
||||
; CHECK-NEXT: shl.b64 %rd28, %rd3, 24;
|
||||
; CHECK-NEXT: or.b64 %rd29, %rd28, %rd26;
|
||||
; CHECK-NEXT: or.b64 %rd30, %rd29, %rd24;
|
||||
; CHECK-NEXT: shl.b64 %rd31, %rd30, 32;
|
||||
; CHECK-NEXT: or.b64 %rd32, %rd31, %rd20;
|
||||
; CHECK-NEXT: st.param.b8 [func_retval0], %rd10;
|
||||
; CHECK-NEXT: shr.u64 %rd33, %rd32, 56;
|
||||
; CHECK-NEXT: st.param.b8 [func_retval0+7], %rd33;
|
||||
; CHECK-NEXT: shr.u64 %rd34, %rd32, 48;
|
||||
; CHECK-NEXT: st.param.b8 [func_retval0+6], %rd34;
|
||||
; CHECK-NEXT: shr.u64 %rd35, %rd32, 40;
|
||||
; CHECK-NEXT: st.param.b8 [func_retval0+5], %rd35;
|
||||
; CHECK-NEXT: shr.u64 %rd36, %rd32, 32;
|
||||
; CHECK-NEXT: st.param.b8 [func_retval0+4], %rd36;
|
||||
; CHECK-NEXT: shr.u64 %rd37, %rd32, 24;
|
||||
; CHECK-NEXT: st.param.b8 [func_retval0+3], %rd37;
|
||||
; CHECK-NEXT: shr.u64 %rd38, %rd32, 16;
|
||||
; CHECK-NEXT: st.param.b8 [func_retval0+2], %rd38;
|
||||
; CHECK-NEXT: shr.u64 %rd39, %rd32, 8;
|
||||
; CHECK-NEXT: st.param.b8 [func_retval0+1], %rd39;
|
||||
; CHECK-NEXT: ret;
|
||||
%ret = call %struct.64 (ptr, ...) @callee_variadic(ptr %p)
|
||||
ret %struct.64 %ret
|
||||
}
|
||||
|
||||
define i64 @test_param_type_mismatch_variadic(ptr %p) {
|
||||
; CHECK-LABEL: test_param_type_mismatch_variadic(
|
||||
; CHECK: .param .b64 retval0;
|
||||
; CHECK: {
|
||||
; CHECK-NEXT: .reg .b64 %rd<4>;
|
||||
; CHECK-EMPTY:
|
||||
; CHECK-NEXT: // %bb.0:
|
||||
; CHECK-NEXT: ld.param.b64 %rd1, [test_param_type_mismatch_variadic_param_0];
|
||||
; CHECK-NEXT: { // callseq 4, 0
|
||||
; CHECK-NEXT: .param .align 8 .b8 param1[8];
|
||||
; CHECK-NEXT: .param .b64 param0;
|
||||
; CHECK-NEXT: .param .b64 retval0;
|
||||
; CHECK-NEXT: st.param.b64 [param0], %rd1;
|
||||
; CHECK-NEXT: st.param.b64 [param1], 7;
|
||||
; CHECK-NEXT: call.uni (retval0), callee_variadic, (param0, param1);
|
||||
; CHECK-NEXT: ld.param.b64 %rd2, [retval0];
|
||||
; CHECK-NEXT: } // callseq 4
|
||||
; CHECK-NEXT: st.param.b64 [func_retval0], %rd2;
|
||||
; CHECK-NEXT: ret;
|
||||
%ret = call i64 (ptr, ...) @callee_variadic(ptr %p, i64 7)
|
||||
ret i64 %ret
|
||||
}
|
||||
|
||||
define i64 @test_param_count_mismatch_variadic(ptr %p) {
|
||||
; CHECK-LABEL: test_param_count_mismatch_variadic(
|
||||
; CHECK: .param .b64 retval0;
|
||||
; CHECK: {
|
||||
; CHECK-NEXT: .reg .b64 %rd<4>;
|
||||
; CHECK-EMPTY:
|
||||
; CHECK-NEXT: // %bb.0:
|
||||
; CHECK-NEXT: ld.param.b64 %rd1, [test_param_count_mismatch_variadic_param_0];
|
||||
; CHECK-NEXT: { // callseq 5, 0
|
||||
; CHECK-NEXT: .param .align 8 .b8 param1[8];
|
||||
; CHECK-NEXT: .param .b64 param0;
|
||||
; CHECK-NEXT: .param .b64 retval0;
|
||||
; CHECK-NEXT: st.param.b64 [param0], %rd1;
|
||||
; CHECK-NEXT: st.param.b64 [param1], 7;
|
||||
; CHECK-NEXT: call.uni (retval0), callee_variadic, (param0, param1);
|
||||
; CHECK-NEXT: ld.param.b64 %rd2, [retval0];
|
||||
; CHECK-NEXT: } // callseq 5
|
||||
; CHECK-NEXT: st.param.b64 [func_retval0], %rd2;
|
||||
; CHECK-NEXT: ret;
|
||||
%ret = call i64 (ptr, ...) @callee_variadic(ptr %p, i64 7)
|
||||
ret i64 %ret
|
||||
}
|
||||
|
@ -22,8 +22,8 @@ define i32 @test_dynamic_stackalloc(i64 %n) {
|
||||
; CHECK-32-NEXT: cvta.local.u32 %r5, %r4;
|
||||
; CHECK-32-NEXT: { // callseq 0, 0
|
||||
; CHECK-32-NEXT: .param .b32 param0;
|
||||
; CHECK-32-NEXT: st.param.b32 [param0], %r5;
|
||||
; CHECK-32-NEXT: .param .b32 retval0;
|
||||
; CHECK-32-NEXT: st.param.b32 [param0], %r5;
|
||||
; CHECK-32-NEXT: call.uni (retval0), bar, (param0);
|
||||
; CHECK-32-NEXT: ld.param.b32 %r6, [retval0];
|
||||
; CHECK-32-NEXT: } // callseq 0
|
||||
@ -43,8 +43,8 @@ define i32 @test_dynamic_stackalloc(i64 %n) {
|
||||
; CHECK-64-NEXT: cvta.local.u64 %rd5, %rd4;
|
||||
; CHECK-64-NEXT: { // callseq 0, 0
|
||||
; CHECK-64-NEXT: .param .b64 param0;
|
||||
; CHECK-64-NEXT: st.param.b64 [param0], %rd5;
|
||||
; CHECK-64-NEXT: .param .b32 retval0;
|
||||
; CHECK-64-NEXT: st.param.b64 [param0], %rd5;
|
||||
; CHECK-64-NEXT: call.uni (retval0), bar, (param0);
|
||||
; CHECK-64-NEXT: ld.param.b32 %r1, [retval0];
|
||||
; CHECK-64-NEXT: } // callseq 0
|
||||
|
@ -462,10 +462,10 @@ define <2 x half> @test_call(<2 x half> %a, <2 x half> %b) #0 {
|
||||
; CHECK-NEXT: ld.param.b32 %r1, [test_call_param_0];
|
||||
; CHECK-NEXT: { // callseq 0, 0
|
||||
; CHECK-NEXT: .param .align 4 .b8 param0[4];
|
||||
; CHECK-NEXT: st.param.b32 [param0], %r1;
|
||||
; CHECK-NEXT: .param .align 4 .b8 param1[4];
|
||||
; CHECK-NEXT: st.param.b32 [param1], %r2;
|
||||
; CHECK-NEXT: .param .align 4 .b8 retval0[4];
|
||||
; CHECK-NEXT: st.param.b32 [param1], %r2;
|
||||
; CHECK-NEXT: st.param.b32 [param0], %r1;
|
||||
; CHECK-NEXT: call.uni (retval0), test_callee, (param0, param1);
|
||||
; CHECK-NEXT: ld.param.b32 %r3, [retval0];
|
||||
; CHECK-NEXT: } // callseq 0
|
||||
@ -485,10 +485,10 @@ define <2 x half> @test_call_flipped(<2 x half> %a, <2 x half> %b) #0 {
|
||||
; CHECK-NEXT: ld.param.b32 %r1, [test_call_flipped_param_0];
|
||||
; CHECK-NEXT: { // callseq 1, 0
|
||||
; CHECK-NEXT: .param .align 4 .b8 param0[4];
|
||||
; CHECK-NEXT: st.param.b32 [param0], %r2;
|
||||
; CHECK-NEXT: .param .align 4 .b8 param1[4];
|
||||
; CHECK-NEXT: st.param.b32 [param1], %r1;
|
||||
; CHECK-NEXT: .param .align 4 .b8 retval0[4];
|
||||
; CHECK-NEXT: st.param.b32 [param1], %r1;
|
||||
; CHECK-NEXT: st.param.b32 [param0], %r2;
|
||||
; CHECK-NEXT: call.uni (retval0), test_callee, (param0, param1);
|
||||
; CHECK-NEXT: ld.param.b32 %r3, [retval0];
|
||||
; CHECK-NEXT: } // callseq 1
|
||||
@ -508,10 +508,10 @@ define <2 x half> @test_tailcall_flipped(<2 x half> %a, <2 x half> %b) #0 {
|
||||
; CHECK-NEXT: ld.param.b32 %r1, [test_tailcall_flipped_param_0];
|
||||
; CHECK-NEXT: { // callseq 2, 0
|
||||
; CHECK-NEXT: .param .align 4 .b8 param0[4];
|
||||
; CHECK-NEXT: st.param.b32 [param0], %r2;
|
||||
; CHECK-NEXT: .param .align 4 .b8 param1[4];
|
||||
; CHECK-NEXT: st.param.b32 [param1], %r1;
|
||||
; CHECK-NEXT: .param .align 4 .b8 retval0[4];
|
||||
; CHECK-NEXT: st.param.b32 [param1], %r1;
|
||||
; CHECK-NEXT: st.param.b32 [param0], %r2;
|
||||
; CHECK-NEXT: call.uni (retval0), test_callee, (param0, param1);
|
||||
; CHECK-NEXT: ld.param.b32 %r3, [retval0];
|
||||
; CHECK-NEXT: } // callseq 2
|
||||
|
@ -859,10 +859,10 @@ define <2 x float> @test_call(<2 x float> %a, <2 x float> %b) #0 {
|
||||
; CHECK-NEXT: ld.param.b64 %rd1, [test_call_param_0];
|
||||
; CHECK-NEXT: { // callseq 0, 0
|
||||
; CHECK-NEXT: .param .align 8 .b8 param0[8];
|
||||
; CHECK-NEXT: st.param.b64 [param0], %rd1;
|
||||
; CHECK-NEXT: .param .align 8 .b8 param1[8];
|
||||
; CHECK-NEXT: st.param.b64 [param1], %rd2;
|
||||
; CHECK-NEXT: .param .align 8 .b8 retval0[8];
|
||||
; CHECK-NEXT: st.param.b64 [param1], %rd2;
|
||||
; CHECK-NEXT: st.param.b64 [param0], %rd1;
|
||||
; CHECK-NEXT: call.uni (retval0), test_callee, (param0, param1);
|
||||
; CHECK-NEXT: ld.param.b64 %rd3, [retval0];
|
||||
; CHECK-NEXT: } // callseq 0
|
||||
@ -882,10 +882,10 @@ define <2 x float> @test_call_flipped(<2 x float> %a, <2 x float> %b) #0 {
|
||||
; CHECK-NEXT: ld.param.b64 %rd1, [test_call_flipped_param_0];
|
||||
; CHECK-NEXT: { // callseq 1, 0
|
||||
; CHECK-NEXT: .param .align 8 .b8 param0[8];
|
||||
; CHECK-NEXT: st.param.b64 [param0], %rd2;
|
||||
; CHECK-NEXT: .param .align 8 .b8 param1[8];
|
||||
; CHECK-NEXT: st.param.b64 [param1], %rd1;
|
||||
; CHECK-NEXT: .param .align 8 .b8 retval0[8];
|
||||
; CHECK-NEXT: st.param.b64 [param1], %rd1;
|
||||
; CHECK-NEXT: st.param.b64 [param0], %rd2;
|
||||
; CHECK-NEXT: call.uni (retval0), test_callee, (param0, param1);
|
||||
; CHECK-NEXT: ld.param.b64 %rd3, [retval0];
|
||||
; CHECK-NEXT: } // callseq 1
|
||||
@ -905,10 +905,10 @@ define <2 x float> @test_tailcall_flipped(<2 x float> %a, <2 x float> %b) #0 {
|
||||
; CHECK-NEXT: ld.param.b64 %rd1, [test_tailcall_flipped_param_0];
|
||||
; CHECK-NEXT: { // callseq 2, 0
|
||||
; CHECK-NEXT: .param .align 8 .b8 param0[8];
|
||||
; CHECK-NEXT: st.param.b64 [param0], %rd2;
|
||||
; CHECK-NEXT: .param .align 8 .b8 param1[8];
|
||||
; CHECK-NEXT: st.param.b64 [param1], %rd1;
|
||||
; CHECK-NEXT: .param .align 8 .b8 retval0[8];
|
||||
; CHECK-NEXT: st.param.b64 [param1], %rd1;
|
||||
; CHECK-NEXT: st.param.b64 [param0], %rd2;
|
||||
; CHECK-NEXT: call.uni (retval0), test_callee, (param0, param1);
|
||||
; CHECK-NEXT: ld.param.b64 %rd3, [retval0];
|
||||
; CHECK-NEXT: } // callseq 2
|
||||
|
@ -36,10 +36,10 @@ define ptx_device float @t2_f32(float %x, float %y, float %z, float %w) {
|
||||
; CHECK-NEXT: fma.rn.f32 %r6, %r1, %r2, %r5;
|
||||
; CHECK-NEXT: { // callseq 0, 0
|
||||
; CHECK-NEXT: .param .b32 param0;
|
||||
; CHECK-NEXT: st.param.b32 [param0], %r4;
|
||||
; CHECK-NEXT: .param .b32 param1;
|
||||
; CHECK-NEXT: st.param.b32 [param1], %r6;
|
||||
; CHECK-NEXT: .param .b32 retval0;
|
||||
; CHECK-NEXT: st.param.b32 [param1], %r6;
|
||||
; CHECK-NEXT: st.param.b32 [param0], %r4;
|
||||
; CHECK-NEXT: call.uni (retval0), dummy_f32, (param0, param1);
|
||||
; CHECK-NEXT: ld.param.b32 %r7, [retval0];
|
||||
; CHECK-NEXT: } // callseq 0
|
||||
@ -83,10 +83,10 @@ define ptx_device double @t2_f64(double %x, double %y, double %z, double %w) {
|
||||
; CHECK-NEXT: fma.rn.f64 %rd6, %rd1, %rd2, %rd5;
|
||||
; CHECK-NEXT: { // callseq 1, 0
|
||||
; CHECK-NEXT: .param .b64 param0;
|
||||
; CHECK-NEXT: st.param.b64 [param0], %rd4;
|
||||
; CHECK-NEXT: .param .b64 param1;
|
||||
; CHECK-NEXT: st.param.b64 [param1], %rd6;
|
||||
; CHECK-NEXT: .param .b64 retval0;
|
||||
; CHECK-NEXT: st.param.b64 [param1], %rd6;
|
||||
; CHECK-NEXT: st.param.b64 [param0], %rd4;
|
||||
; CHECK-NEXT: call.uni (retval0), dummy_f64, (param0, param1);
|
||||
; CHECK-NEXT: ld.param.b64 %rd7, [retval0];
|
||||
; CHECK-NEXT: } // callseq 1
|
||||
|
@ -64,9 +64,9 @@ define void @test_ld_param_byval(ptr byval(i32) %a) {
|
||||
; CHECK-NEXT: .reg .b64 %rd<2>;
|
||||
; CHECK-EMPTY:
|
||||
; CHECK-NEXT: // %bb.0:
|
||||
; CHECK-NEXT: ld.param.b32 %r1, [test_ld_param_byval_param_0];
|
||||
; CHECK-NEXT: { // callseq 1, 0
|
||||
; CHECK-NEXT: .param .align 4 .b8 param0[4];
|
||||
; CHECK-NEXT: ld.param.b32 %r1, [test_ld_param_byval_param_0];
|
||||
; CHECK-NEXT: st.param.b32 [param0], %r1;
|
||||
; CHECK-NEXT: call.uni byval_user, (param0);
|
||||
; CHECK-NEXT: } // callseq 1
|
||||
|
@ -29,11 +29,11 @@ start:
|
||||
; CHECK-DAG: ld.param.v2.b64 {%[[REG2:rd[0-9]+]], %[[REG3:rd[0-9]+]]}, [caller_kernel_param_1];
|
||||
|
||||
; CHECK: { // callseq [[CALLSEQ_ID:[0-9]]], 0
|
||||
; CHECK: .param .align 16 .b8 param0[16];
|
||||
; CHECK-NEXT: st.param.v2.b64 [param0], {%[[REG0]], %[[REG1]]}
|
||||
; CHECK: .param .align 16 .b8 param1[16];
|
||||
; CHECK-NEXT: st.param.v2.b64 [param1], {%[[REG2]], %[[REG3]]}
|
||||
; CHECK: } // callseq [[CALLSEQ_ID]]
|
||||
; CHECK-DAG: .param .align 16 .b8 param0[16];
|
||||
; CHECK-DAG: .param .align 16 .b8 param1[16];
|
||||
; CHECK-DAG: st.param.v2.b64 [param0], {%[[REG0]], %[[REG1]]}
|
||||
; CHECK-DAG: st.param.v2.b64 [param1], {%[[REG2]], %[[REG3]]}
|
||||
; CHECK: } // callseq [[CALLSEQ_ID]]
|
||||
call void @callee(i128 %0, i128 %1, ptr %2)
|
||||
|
||||
ret void
|
||||
@ -48,11 +48,11 @@ start:
|
||||
; CHECK-DAG: ld.param.v2.b64 {%[[REG2:rd[0-9]+]], %[[REG3:rd[0-9]+]]}, [caller_func_param_1]
|
||||
|
||||
; CHECK: { // callseq [[CALLSEQ_ID:[0-9]]], 0
|
||||
; CHECK: .param .align 16 .b8 param0[16];
|
||||
; CHECK: st.param.v2.b64 [param0], {%[[REG0]], %[[REG1]]}
|
||||
; CHECK: .param .align 16 .b8 param1[16];
|
||||
; CHECK: st.param.v2.b64 [param1], {%[[REG2]], %[[REG3]]}
|
||||
; CHECK: } // callseq [[CALLSEQ_ID]]
|
||||
; CHECK-DAG: .param .align 16 .b8 param0[16];
|
||||
; CHECK-DAG: .param .align 16 .b8 param1[16];
|
||||
; CHECK-DAG: st.param.v2.b64 [param0], {%[[REG0]], %[[REG1]]}
|
||||
; CHECK-DAG: st.param.v2.b64 [param1], {%[[REG2]], %[[REG3]]}
|
||||
; CHECK: } // callseq [[CALLSEQ_ID]]
|
||||
call void @callee(i128 %0, i128 %1, ptr %2)
|
||||
|
||||
ret void
|
||||
|
@ -642,10 +642,10 @@ define <2 x i16> @test_call(<2 x i16> %a, <2 x i16> %b) #0 {
|
||||
; COMMON-NEXT: ld.param.b32 %r1, [test_call_param_0];
|
||||
; COMMON-NEXT: { // callseq 0, 0
|
||||
; COMMON-NEXT: .param .align 4 .b8 param0[4];
|
||||
; COMMON-NEXT: st.param.b32 [param0], %r1;
|
||||
; COMMON-NEXT: .param .align 4 .b8 param1[4];
|
||||
; COMMON-NEXT: st.param.b32 [param1], %r2;
|
||||
; COMMON-NEXT: .param .align 4 .b8 retval0[4];
|
||||
; COMMON-NEXT: st.param.b32 [param1], %r2;
|
||||
; COMMON-NEXT: st.param.b32 [param0], %r1;
|
||||
; COMMON-NEXT: call.uni (retval0), test_callee, (param0, param1);
|
||||
; COMMON-NEXT: ld.param.b32 %r3, [retval0];
|
||||
; COMMON-NEXT: } // callseq 0
|
||||
@ -665,10 +665,10 @@ define <2 x i16> @test_call_flipped(<2 x i16> %a, <2 x i16> %b) #0 {
|
||||
; COMMON-NEXT: ld.param.b32 %r1, [test_call_flipped_param_0];
|
||||
; COMMON-NEXT: { // callseq 1, 0
|
||||
; COMMON-NEXT: .param .align 4 .b8 param0[4];
|
||||
; COMMON-NEXT: st.param.b32 [param0], %r2;
|
||||
; COMMON-NEXT: .param .align 4 .b8 param1[4];
|
||||
; COMMON-NEXT: st.param.b32 [param1], %r1;
|
||||
; COMMON-NEXT: .param .align 4 .b8 retval0[4];
|
||||
; COMMON-NEXT: st.param.b32 [param1], %r1;
|
||||
; COMMON-NEXT: st.param.b32 [param0], %r2;
|
||||
; COMMON-NEXT: call.uni (retval0), test_callee, (param0, param1);
|
||||
; COMMON-NEXT: ld.param.b32 %r3, [retval0];
|
||||
; COMMON-NEXT: } // callseq 1
|
||||
@ -688,10 +688,10 @@ define <2 x i16> @test_tailcall_flipped(<2 x i16> %a, <2 x i16> %b) #0 {
|
||||
; COMMON-NEXT: ld.param.b32 %r1, [test_tailcall_flipped_param_0];
|
||||
; COMMON-NEXT: { // callseq 2, 0
|
||||
; COMMON-NEXT: .param .align 4 .b8 param0[4];
|
||||
; COMMON-NEXT: st.param.b32 [param0], %r2;
|
||||
; COMMON-NEXT: .param .align 4 .b8 param1[4];
|
||||
; COMMON-NEXT: st.param.b32 [param1], %r1;
|
||||
; COMMON-NEXT: .param .align 4 .b8 retval0[4];
|
||||
; COMMON-NEXT: st.param.b32 [param1], %r1;
|
||||
; COMMON-NEXT: st.param.b32 [param0], %r2;
|
||||
; COMMON-NEXT: call.uni (retval0), test_callee, (param0, param1);
|
||||
; COMMON-NEXT: ld.param.b32 %r3, [retval0];
|
||||
; COMMON-NEXT: } // callseq 2
|
||||
|
@ -1,42 +1,107 @@
|
||||
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
|
||||
; RUN: llc < %s -mtriple=nvptx64-nvidia-cuda -mcpu=sm_90 -mattr=+ptx80 \
|
||||
; RUN: -O0 -disable-post-ra -frame-pointer=all -verify-machineinstrs \
|
||||
; RUN: | FileCheck %s
|
||||
; RUN: %if ptxas %{ \
|
||||
; RUN: llc < %s -mtriple=nvptx64-nvidia-cuda -mcpu=sm_90 -asm-verbose=false \
|
||||
; RUN: -O0 -disable-post-ra -frame-pointer=all -verify-machineinstrs \
|
||||
; RUN: | %ptxas-verify -arch=sm_90 \
|
||||
; RUN: llc < %s -mcpu=sm_90 -mattr=+ptx80 -disable-post-ra -frame-pointer=all \
|
||||
; RUN: -verify-machineinstrs -O0 | FileCheck %s --check-prefixes=O0,COMMON
|
||||
; RUN: llc < %s -mcpu=sm_90 -mattr=+ptx80 -disable-post-ra -frame-pointer=all \
|
||||
; RUN: -verify-machineinstrs | FileCheck %s --check-prefixes=O3,COMMON
|
||||
; RUN: %if ptxas %{ \
|
||||
; RUN: llc < %s -mcpu=sm_90 -mattr=+ptx80 -disable-post-ra -frame-pointer=all \
|
||||
; RUN: -verify-machineinstrs -O0 \
|
||||
; RUN: | %ptxas-verify -arch=sm_90 \
|
||||
; RUN: %}
|
||||
; RUN: %if ptxas %{ \
|
||||
; RUN: llc < %s -mcpu=sm_90 -mattr=+ptx80 -disable-post-ra -frame-pointer=all \
|
||||
; RUN: -verify-machineinstrs \
|
||||
; RUN: | %ptxas-verify -arch=sm_90 \
|
||||
; RUN: %}
|
||||
|
||||
target triple = "nvptx64-nvidia-cuda"
|
||||
target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128"
|
||||
|
||||
define i16 @test_bitcast_2xi8_i16(<2 x i8> %a) {
|
||||
; CHECK-LABEL: test_bitcast_2xi8_i16(
|
||||
; CHECK: {
|
||||
; CHECK-NEXT: .reg .b16 %rs<5>;
|
||||
; CHECK-NEXT: .reg .b32 %r<3>;
|
||||
; CHECK-EMPTY:
|
||||
; CHECK-NEXT: // %bb.0:
|
||||
; CHECK-NEXT: ld.param.v2.b8 {%rs1, %rs2}, [test_bitcast_2xi8_i16_param_0];
|
||||
; CHECK-NEXT: mov.b32 %r1, {%rs1, %rs2};
|
||||
; CHECK-NEXT: shl.b16 %rs3, %rs2, 8;
|
||||
; CHECK-NEXT: or.b16 %rs4, %rs1, %rs3;
|
||||
; CHECK-NEXT: cvt.u32.u16 %r2, %rs4;
|
||||
; CHECK-NEXT: st.param.b32 [func_retval0], %r2;
|
||||
; CHECK-NEXT: ret;
|
||||
; O0-LABEL: test_bitcast_2xi8_i16(
|
||||
; O0: {
|
||||
; O0-NEXT: .reg .b16 %rs<5>;
|
||||
; O0-NEXT: .reg .b32 %r<3>;
|
||||
; O0-EMPTY:
|
||||
; O0-NEXT: // %bb.0:
|
||||
; O0-NEXT: ld.param.v2.b8 {%rs1, %rs2}, [test_bitcast_2xi8_i16_param_0];
|
||||
; O0-NEXT: mov.b32 %r1, {%rs1, %rs2};
|
||||
; O0-NEXT: shl.b16 %rs3, %rs2, 8;
|
||||
; O0-NEXT: or.b16 %rs4, %rs1, %rs3;
|
||||
; O0-NEXT: cvt.u32.u16 %r2, %rs4;
|
||||
; O0-NEXT: st.param.b32 [func_retval0], %r2;
|
||||
; O0-NEXT: ret;
|
||||
;
|
||||
; O3-LABEL: test_bitcast_2xi8_i16(
|
||||
; O3: {
|
||||
; O3-NEXT: .reg .b32 %r<2>;
|
||||
; O3-EMPTY:
|
||||
; O3-NEXT: // %bb.0:
|
||||
; O3-NEXT: ld.param.b16 %r1, [test_bitcast_2xi8_i16_param_0];
|
||||
; O3-NEXT: st.param.b32 [func_retval0], %r1;
|
||||
; O3-NEXT: ret;
|
||||
%res = bitcast <2 x i8> %a to i16
|
||||
ret i16 %res
|
||||
}
|
||||
|
||||
define <2 x i8> @test_bitcast_i16_2xi8(i16 %a) {
|
||||
; CHECK-LABEL: test_bitcast_i16_2xi8(
|
||||
; CHECK: {
|
||||
; CHECK-NEXT: .reg .b16 %rs<2>;
|
||||
; CHECK-EMPTY:
|
||||
; CHECK-NEXT: // %bb.0:
|
||||
; CHECK-NEXT: ld.param.b16 %rs1, [test_bitcast_i16_2xi8_param_0];
|
||||
; CHECK-NEXT: st.param.b16 [func_retval0], %rs1;
|
||||
; CHECK-NEXT: ret;
|
||||
; O0-LABEL: test_bitcast_i16_2xi8(
|
||||
; O0: {
|
||||
; O0-NEXT: .reg .b16 %rs<2>;
|
||||
; O0-EMPTY:
|
||||
; O0-NEXT: // %bb.0:
|
||||
; O0-NEXT: ld.param.b16 %rs1, [test_bitcast_i16_2xi8_param_0];
|
||||
; O0-NEXT: st.param.b16 [func_retval0], %rs1;
|
||||
; O0-NEXT: ret;
|
||||
;
|
||||
; O3-LABEL: test_bitcast_i16_2xi8(
|
||||
; O3: {
|
||||
; O3-NEXT: .reg .b16 %rs<2>;
|
||||
; O3-EMPTY:
|
||||
; O3-NEXT: // %bb.0:
|
||||
; O3-NEXT: ld.param.b16 %rs1, [test_bitcast_i16_2xi8_param_0];
|
||||
; O3-NEXT: st.param.b16 [func_retval0], %rs1;
|
||||
; O3-NEXT: ret;
|
||||
%res = bitcast i16 %a to <2 x i8>
|
||||
ret <2 x i8> %res
|
||||
}
|
||||
|
||||
define <2 x i8> @test_call_2xi8(<2 x i8> %a) {
|
||||
; O0-LABEL: test_call_2xi8(
|
||||
; O0: {
|
||||
; O0-NEXT: .reg .b16 %rs<7>;
|
||||
; O0-NEXT: .reg .b32 %r<2>;
|
||||
; O0-EMPTY:
|
||||
; O0-NEXT: // %bb.0:
|
||||
; O0-NEXT: ld.param.v2.b8 {%rs1, %rs2}, [test_call_2xi8_param_0];
|
||||
; O0-NEXT: mov.b32 %r1, {%rs1, %rs2};
|
||||
; O0-NEXT: { // callseq 0, 0
|
||||
; O0-NEXT: .param .align 2 .b8 param0[2];
|
||||
; O0-NEXT: .param .align 2 .b8 retval0[2];
|
||||
; O0-NEXT: st.param.v2.b8 [param0], {%rs1, %rs2};
|
||||
; O0-NEXT: call.uni (retval0), test_call_2xi8, (param0);
|
||||
; O0-NEXT: ld.param.v2.b8 {%rs3, %rs4}, [retval0];
|
||||
; O0-NEXT: } // callseq 0
|
||||
; O0-NEXT: st.param.v2.b8 [func_retval0], {%rs3, %rs4};
|
||||
; O0-NEXT: ret;
|
||||
;
|
||||
; O3-LABEL: test_call_2xi8(
|
||||
; O3: {
|
||||
; O3-NEXT: .reg .b16 %rs<7>;
|
||||
; O3-EMPTY:
|
||||
; O3-NEXT: // %bb.0:
|
||||
; O3-NEXT: ld.param.v2.b8 {%rs1, %rs2}, [test_call_2xi8_param_0];
|
||||
; O3-NEXT: { // callseq 0, 0
|
||||
; O3-NEXT: .param .align 2 .b8 param0[2];
|
||||
; O3-NEXT: .param .align 2 .b8 retval0[2];
|
||||
; O3-NEXT: st.param.v2.b8 [param0], {%rs1, %rs2};
|
||||
; O3-NEXT: call.uni (retval0), test_call_2xi8, (param0);
|
||||
; O3-NEXT: ld.param.v2.b8 {%rs3, %rs4}, [retval0];
|
||||
; O3-NEXT: } // callseq 0
|
||||
; O3-NEXT: st.param.v2.b8 [func_retval0], {%rs3, %rs4};
|
||||
; O3-NEXT: ret;
|
||||
%res = call <2 x i8> @test_call_2xi8(<2 x i8> %a)
|
||||
ret <2 x i8> %res
|
||||
}
|
||||
;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
|
||||
; COMMON: {{.*}}
|
||||
|
@ -1273,10 +1273,10 @@ define <4 x i8> @test_call(<4 x i8> %a, <4 x i8> %b) #0 {
|
||||
; O0-NEXT: ld.param.b32 %r1, [test_call_param_0];
|
||||
; O0-NEXT: { // callseq 0, 0
|
||||
; O0-NEXT: .param .align 4 .b8 param0[4];
|
||||
; O0-NEXT: st.param.b32 [param0], %r1;
|
||||
; O0-NEXT: .param .align 4 .b8 param1[4];
|
||||
; O0-NEXT: st.param.b32 [param1], %r2;
|
||||
; O0-NEXT: .param .align 4 .b8 retval0[4];
|
||||
; O0-NEXT: st.param.b32 [param1], %r2;
|
||||
; O0-NEXT: st.param.b32 [param0], %r1;
|
||||
; O0-NEXT: call.uni (retval0), test_callee, (param0, param1);
|
||||
; O0-NEXT: ld.param.b32 %r3, [retval0];
|
||||
; O0-NEXT: } // callseq 0
|
||||
@ -1289,13 +1289,13 @@ define <4 x i8> @test_call(<4 x i8> %a, <4 x i8> %b) #0 {
|
||||
; O3-EMPTY:
|
||||
; O3-NEXT: // %bb.0:
|
||||
; O3-NEXT: ld.param.b32 %r1, [test_call_param_0];
|
||||
; O3-NEXT: ld.param.b32 %r2, [test_call_param_1];
|
||||
; O3-NEXT: { // callseq 0, 0
|
||||
; O3-NEXT: .param .align 4 .b8 param0[4];
|
||||
; O3-NEXT: st.param.b32 [param0], %r1;
|
||||
; O3-NEXT: .param .align 4 .b8 param1[4];
|
||||
; O3-NEXT: st.param.b32 [param1], %r2;
|
||||
; O3-NEXT: .param .align 4 .b8 retval0[4];
|
||||
; O3-NEXT: ld.param.b32 %r2, [test_call_param_1];
|
||||
; O3-NEXT: st.param.b32 [param1], %r2;
|
||||
; O3-NEXT: st.param.b32 [param0], %r1;
|
||||
; O3-NEXT: call.uni (retval0), test_callee, (param0, param1);
|
||||
; O3-NEXT: ld.param.b32 %r3, [retval0];
|
||||
; O3-NEXT: } // callseq 0
|
||||
@ -1315,10 +1315,10 @@ define <4 x i8> @test_call_flipped(<4 x i8> %a, <4 x i8> %b) #0 {
|
||||
; O0-NEXT: ld.param.b32 %r1, [test_call_flipped_param_0];
|
||||
; O0-NEXT: { // callseq 1, 0
|
||||
; O0-NEXT: .param .align 4 .b8 param0[4];
|
||||
; O0-NEXT: st.param.b32 [param0], %r2;
|
||||
; O0-NEXT: .param .align 4 .b8 param1[4];
|
||||
; O0-NEXT: st.param.b32 [param1], %r1;
|
||||
; O0-NEXT: .param .align 4 .b8 retval0[4];
|
||||
; O0-NEXT: st.param.b32 [param1], %r1;
|
||||
; O0-NEXT: st.param.b32 [param0], %r2;
|
||||
; O0-NEXT: call.uni (retval0), test_callee, (param0, param1);
|
||||
; O0-NEXT: ld.param.b32 %r3, [retval0];
|
||||
; O0-NEXT: } // callseq 1
|
||||
@ -1331,13 +1331,13 @@ define <4 x i8> @test_call_flipped(<4 x i8> %a, <4 x i8> %b) #0 {
|
||||
; O3-EMPTY:
|
||||
; O3-NEXT: // %bb.0:
|
||||
; O3-NEXT: ld.param.b32 %r1, [test_call_flipped_param_0];
|
||||
; O3-NEXT: ld.param.b32 %r2, [test_call_flipped_param_1];
|
||||
; O3-NEXT: { // callseq 1, 0
|
||||
; O3-NEXT: .param .align 4 .b8 param0[4];
|
||||
; O3-NEXT: st.param.b32 [param0], %r2;
|
||||
; O3-NEXT: .param .align 4 .b8 param1[4];
|
||||
; O3-NEXT: st.param.b32 [param1], %r1;
|
||||
; O3-NEXT: .param .align 4 .b8 retval0[4];
|
||||
; O3-NEXT: st.param.b32 [param1], %r1;
|
||||
; O3-NEXT: ld.param.b32 %r2, [test_call_flipped_param_1];
|
||||
; O3-NEXT: st.param.b32 [param0], %r2;
|
||||
; O3-NEXT: call.uni (retval0), test_callee, (param0, param1);
|
||||
; O3-NEXT: ld.param.b32 %r3, [retval0];
|
||||
; O3-NEXT: } // callseq 1
|
||||
@ -1357,10 +1357,10 @@ define <4 x i8> @test_tailcall_flipped(<4 x i8> %a, <4 x i8> %b) #0 {
|
||||
; O0-NEXT: ld.param.b32 %r1, [test_tailcall_flipped_param_0];
|
||||
; O0-NEXT: { // callseq 2, 0
|
||||
; O0-NEXT: .param .align 4 .b8 param0[4];
|
||||
; O0-NEXT: st.param.b32 [param0], %r2;
|
||||
; O0-NEXT: .param .align 4 .b8 param1[4];
|
||||
; O0-NEXT: st.param.b32 [param1], %r1;
|
||||
; O0-NEXT: .param .align 4 .b8 retval0[4];
|
||||
; O0-NEXT: st.param.b32 [param1], %r1;
|
||||
; O0-NEXT: st.param.b32 [param0], %r2;
|
||||
; O0-NEXT: call.uni (retval0), test_callee, (param0, param1);
|
||||
; O0-NEXT: ld.param.b32 %r3, [retval0];
|
||||
; O0-NEXT: } // callseq 2
|
||||
@ -1373,13 +1373,13 @@ define <4 x i8> @test_tailcall_flipped(<4 x i8> %a, <4 x i8> %b) #0 {
|
||||
; O3-EMPTY:
|
||||
; O3-NEXT: // %bb.0:
|
||||
; O3-NEXT: ld.param.b32 %r1, [test_tailcall_flipped_param_0];
|
||||
; O3-NEXT: ld.param.b32 %r2, [test_tailcall_flipped_param_1];
|
||||
; O3-NEXT: { // callseq 2, 0
|
||||
; O3-NEXT: .param .align 4 .b8 param0[4];
|
||||
; O3-NEXT: st.param.b32 [param0], %r2;
|
||||
; O3-NEXT: .param .align 4 .b8 param1[4];
|
||||
; O3-NEXT: st.param.b32 [param1], %r1;
|
||||
; O3-NEXT: .param .align 4 .b8 retval0[4];
|
||||
; O3-NEXT: st.param.b32 [param1], %r1;
|
||||
; O3-NEXT: ld.param.b32 %r2, [test_tailcall_flipped_param_1];
|
||||
; O3-NEXT: st.param.b32 [param0], %r2;
|
||||
; O3-NEXT: call.uni (retval0), test_callee, (param0, param1);
|
||||
; O3-NEXT: ld.param.b32 %r3, [retval0];
|
||||
; O3-NEXT: } // callseq 2
|
||||
|
@ -173,8 +173,8 @@ define %struct.S16 @i32_to_2xi16_shr(i32 noundef %i){
|
||||
; CHECK-NEXT: } // callseq 0
|
||||
; CHECK-NEXT: shr.s32 %r2, %r1, 16;
|
||||
; CHECK-NEXT: shr.u32 %r3, %r2, 16;
|
||||
; CHECK-NEXT: st.param.b16 [func_retval0], %r2;
|
||||
; CHECK-NEXT: st.param.b16 [func_retval0+2], %r3;
|
||||
; CHECK-NEXT: st.param.b16 [func_retval0], %r2;
|
||||
; CHECK-NEXT: ret;
|
||||
call void @escape_int(i32 %i); // Force %i to be loaded completely.
|
||||
%i1 = ashr i32 %i, 16
|
||||
|
@ -23,15 +23,15 @@ define internal i32 @foo() {
|
||||
; CHECK-NEXT: mov.b64 %SPL, __local_depot0;
|
||||
; CHECK-NEXT: cvta.local.u64 %SP, %SPL;
|
||||
; CHECK-NEXT: ld.global.b64 %rd1, [ptr];
|
||||
; CHECK-NEXT: add.u64 %rd3, %SPL, 1;
|
||||
; CHECK-NEXT: ld.local.b8 %rs1, [%rd3];
|
||||
; CHECK-NEXT: add.u64 %rd4, %SP, 0;
|
||||
; CHECK-NEXT: { // callseq 0, 0
|
||||
; CHECK-NEXT: .param .align 1 .b8 param0[1];
|
||||
; CHECK-NEXT: st.param.b8 [param0], %rs1;
|
||||
; CHECK-NEXT: .param .b64 param1;
|
||||
; CHECK-NEXT: st.param.b64 [param1], %rd4;
|
||||
; CHECK-NEXT: .param .b32 retval0;
|
||||
; CHECK-NEXT: add.u64 %rd2, %SP, 0;
|
||||
; CHECK-NEXT: st.param.b64 [param1], %rd2;
|
||||
; CHECK-NEXT: add.u64 %rd4, %SPL, 1;
|
||||
; CHECK-NEXT: ld.local.b8 %rs1, [%rd4];
|
||||
; CHECK-NEXT: st.param.b8 [param0], %rs1;
|
||||
; CHECK-NEXT: prototype_0 : .callprototype (.param .b32 _) _ (.param .align 1 .b8 _[1], .param .b64 _);
|
||||
; CHECK-NEXT: call (retval0), %rd1, (param0, param1), prototype_0;
|
||||
; CHECK-NEXT: ld.param.b32 %r1, [retval0];
|
||||
@ -60,15 +60,15 @@ define internal i32 @bar() {
|
||||
; CHECK-NEXT: mov.b64 %SPL, __local_depot1;
|
||||
; CHECK-NEXT: cvta.local.u64 %SP, %SPL;
|
||||
; CHECK-NEXT: ld.global.b64 %rd1, [ptr];
|
||||
; CHECK-NEXT: add.u64 %rd3, %SPL, 8;
|
||||
; CHECK-NEXT: ld.local.b64 %rd4, [%rd3];
|
||||
; CHECK-NEXT: add.u64 %rd5, %SP, 0;
|
||||
; CHECK-NEXT: { // callseq 1, 0
|
||||
; CHECK-NEXT: .param .align 8 .b8 param0[8];
|
||||
; CHECK-NEXT: st.param.b64 [param0], %rd4;
|
||||
; CHECK-NEXT: .param .b64 param1;
|
||||
; CHECK-NEXT: st.param.b64 [param1], %rd5;
|
||||
; CHECK-NEXT: .param .b32 retval0;
|
||||
; CHECK-NEXT: add.u64 %rd2, %SP, 0;
|
||||
; CHECK-NEXT: st.param.b64 [param1], %rd2;
|
||||
; CHECK-NEXT: add.u64 %rd4, %SPL, 8;
|
||||
; CHECK-NEXT: ld.local.b64 %rd5, [%rd4];
|
||||
; CHECK-NEXT: st.param.b64 [param0], %rd5;
|
||||
; CHECK-NEXT: prototype_1 : .callprototype (.param .b32 _) _ (.param .align 8 .b8 _[8], .param .b64 _);
|
||||
; CHECK-NEXT: call (retval0), %rd1, (param0, param1), prototype_1;
|
||||
; CHECK-NEXT: ld.param.b32 %r1, [retval0];
|
||||
|
@ -121,20 +121,18 @@ define ptx_kernel void @grid_const_struct(ptr byval(%struct.s) align 4 %input, p
|
||||
define ptx_kernel void @grid_const_escape(ptr byval(%struct.s) align 4 %input) {
|
||||
; PTX-LABEL: grid_const_escape(
|
||||
; PTX: {
|
||||
; PTX-NEXT: .reg .b32 %r<2>;
|
||||
; PTX-NEXT: .reg .b64 %rd<4>;
|
||||
; PTX-EMPTY:
|
||||
; PTX-NEXT: // %bb.0:
|
||||
; PTX-NEXT: mov.b64 %rd2, grid_const_escape_param_0;
|
||||
; PTX-NEXT: cvta.param.u64 %rd3, %rd2;
|
||||
; PTX-NEXT: mov.b64 %rd1, escape;
|
||||
; PTX-NEXT: { // callseq 0, 0
|
||||
; PTX-NEXT: .param .b64 param0;
|
||||
; PTX-NEXT: st.param.b64 [param0], %rd3;
|
||||
; PTX-NEXT: .param .b32 retval0;
|
||||
; PTX-NEXT: st.param.b64 [param0], %rd3;
|
||||
; PTX-NEXT: prototype_0 : .callprototype (.param .b32 _) _ (.param .b64 _);
|
||||
; PTX-NEXT: mov.b64 %rd1, escape;
|
||||
; PTX-NEXT: call (retval0), %rd1, (param0), prototype_0;
|
||||
; PTX-NEXT: ld.param.b32 %r1, [retval0];
|
||||
; PTX-NEXT: } // callseq 0
|
||||
; PTX-NEXT: ret;
|
||||
; OPT-LABEL: define ptx_kernel void @grid_const_escape(
|
||||
@ -153,7 +151,7 @@ define ptx_kernel void @multiple_grid_const_escape(ptr byval(%struct.s) align 4
|
||||
; PTX-NEXT: .local .align 4 .b8 __local_depot4[4];
|
||||
; PTX-NEXT: .reg .b64 %SP;
|
||||
; PTX-NEXT: .reg .b64 %SPL;
|
||||
; PTX-NEXT: .reg .b32 %r<3>;
|
||||
; PTX-NEXT: .reg .b32 %r<2>;
|
||||
; PTX-NEXT: .reg .b64 %rd<8>;
|
||||
; PTX-EMPTY:
|
||||
; PTX-NEXT: // %bb.0:
|
||||
@ -167,18 +165,17 @@ define ptx_kernel void @multiple_grid_const_escape(ptr byval(%struct.s) align 4
|
||||
; PTX-NEXT: add.u64 %rd6, %SP, 0;
|
||||
; PTX-NEXT: add.u64 %rd7, %SPL, 0;
|
||||
; PTX-NEXT: st.local.b32 [%rd7], %r1;
|
||||
; PTX-NEXT: mov.b64 %rd1, escape3;
|
||||
; PTX-NEXT: { // callseq 1, 0
|
||||
; PTX-NEXT: .param .b64 param0;
|
||||
; PTX-NEXT: st.param.b64 [param0], %rd5;
|
||||
; PTX-NEXT: .param .b64 param1;
|
||||
; PTX-NEXT: st.param.b64 [param1], %rd6;
|
||||
; PTX-NEXT: .param .b64 param2;
|
||||
; PTX-NEXT: st.param.b64 [param2], %rd4;
|
||||
; PTX-NEXT: .param .b32 retval0;
|
||||
; PTX-NEXT: st.param.b64 [param2], %rd4;
|
||||
; PTX-NEXT: st.param.b64 [param1], %rd6;
|
||||
; PTX-NEXT: st.param.b64 [param0], %rd5;
|
||||
; PTX-NEXT: prototype_1 : .callprototype (.param .b32 _) _ (.param .b64 _, .param .b64 _, .param .b64 _);
|
||||
; PTX-NEXT: mov.b64 %rd1, escape3;
|
||||
; PTX-NEXT: call (retval0), %rd1, (param0, param1, param2), prototype_1;
|
||||
; PTX-NEXT: ld.param.b32 %r2, [retval0];
|
||||
; PTX-NEXT: } // callseq 1
|
||||
; PTX-NEXT: ret;
|
||||
; OPT-LABEL: define ptx_kernel void @multiple_grid_const_escape(
|
||||
@ -255,7 +252,7 @@ define ptx_kernel void @grid_const_inlineasm_escape(ptr byval(%struct.s) align 4
|
||||
define ptx_kernel void @grid_const_partial_escape(ptr byval(i32) %input, ptr %output) {
|
||||
; PTX-LABEL: grid_const_partial_escape(
|
||||
; PTX: {
|
||||
; PTX-NEXT: .reg .b32 %r<4>;
|
||||
; PTX-NEXT: .reg .b32 %r<3>;
|
||||
; PTX-NEXT: .reg .b64 %rd<6>;
|
||||
; PTX-EMPTY:
|
||||
; PTX-NEXT: // %bb.0:
|
||||
@ -266,14 +263,13 @@ define ptx_kernel void @grid_const_partial_escape(ptr byval(i32) %input, ptr %ou
|
||||
; PTX-NEXT: ld.param.b32 %r1, [grid_const_partial_escape_param_0];
|
||||
; PTX-NEXT: add.s32 %r2, %r1, %r1;
|
||||
; PTX-NEXT: st.global.b32 [%rd4], %r2;
|
||||
; PTX-NEXT: mov.b64 %rd1, escape;
|
||||
; PTX-NEXT: { // callseq 2, 0
|
||||
; PTX-NEXT: .param .b64 param0;
|
||||
; PTX-NEXT: st.param.b64 [param0], %rd5;
|
||||
; PTX-NEXT: .param .b32 retval0;
|
||||
; PTX-NEXT: st.param.b64 [param0], %rd5;
|
||||
; PTX-NEXT: prototype_2 : .callprototype (.param .b32 _) _ (.param .b64 _);
|
||||
; PTX-NEXT: mov.b64 %rd1, escape;
|
||||
; PTX-NEXT: call (retval0), %rd1, (param0), prototype_2;
|
||||
; PTX-NEXT: ld.param.b32 %r3, [retval0];
|
||||
; PTX-NEXT: } // callseq 2
|
||||
; PTX-NEXT: ret;
|
||||
; OPT-LABEL: define ptx_kernel void @grid_const_partial_escape(
|
||||
@ -295,7 +291,7 @@ define ptx_kernel void @grid_const_partial_escape(ptr byval(i32) %input, ptr %ou
|
||||
define ptx_kernel i32 @grid_const_partial_escapemem(ptr byval(%struct.s) %input, ptr %output) {
|
||||
; PTX-LABEL: grid_const_partial_escapemem(
|
||||
; PTX: {
|
||||
; PTX-NEXT: .reg .b32 %r<5>;
|
||||
; PTX-NEXT: .reg .b32 %r<4>;
|
||||
; PTX-NEXT: .reg .b64 %rd<6>;
|
||||
; PTX-EMPTY:
|
||||
; PTX-NEXT: // %bb.0:
|
||||
@ -307,14 +303,13 @@ define ptx_kernel i32 @grid_const_partial_escapemem(ptr byval(%struct.s) %input,
|
||||
; PTX-NEXT: ld.param.b32 %r2, [grid_const_partial_escapemem_param_0+4];
|
||||
; PTX-NEXT: st.global.b64 [%rd4], %rd5;
|
||||
; PTX-NEXT: add.s32 %r3, %r1, %r2;
|
||||
; PTX-NEXT: mov.b64 %rd1, escape;
|
||||
; PTX-NEXT: { // callseq 3, 0
|
||||
; PTX-NEXT: .param .b64 param0;
|
||||
; PTX-NEXT: st.param.b64 [param0], %rd5;
|
||||
; PTX-NEXT: .param .b32 retval0;
|
||||
; PTX-NEXT: st.param.b64 [param0], %rd5;
|
||||
; PTX-NEXT: prototype_3 : .callprototype (.param .b32 _) _ (.param .b64 _);
|
||||
; PTX-NEXT: mov.b64 %rd1, escape;
|
||||
; PTX-NEXT: call (retval0), %rd1, (param0), prototype_3;
|
||||
; PTX-NEXT: ld.param.b32 %r4, [retval0];
|
||||
; PTX-NEXT: } // callseq 3
|
||||
; PTX-NEXT: st.param.b32 [func_retval0], %r3;
|
||||
; PTX-NEXT: ret;
|
||||
@ -535,9 +530,9 @@ define ptx_kernel void @test_forward_byval_arg(ptr byval(i32) align 4 %input) {
|
||||
; PTX-NEXT: .reg .b32 %r<2>;
|
||||
; PTX-EMPTY:
|
||||
; PTX-NEXT: // %bb.0:
|
||||
; PTX-NEXT: ld.param.b32 %r1, [test_forward_byval_arg_param_0];
|
||||
; PTX-NEXT: { // callseq 4, 0
|
||||
; PTX-NEXT: .param .align 4 .b8 param0[4];
|
||||
; PTX-NEXT: ld.param.b32 %r1, [test_forward_byval_arg_param_0];
|
||||
; PTX-NEXT: st.param.b32 [param0], %r1;
|
||||
; PTX-NEXT: call.uni device_func, (param0);
|
||||
; PTX-NEXT: } // callseq 4
|
||||
|
@ -31,7 +31,7 @@ define void @load_alignment(ptr nocapture readonly byval(%class.outer) align 8 %
|
||||
; PTX-LABEL: load_alignment(
|
||||
; PTX: {
|
||||
; PTX-NEXT: .reg .b32 %r<4>;
|
||||
; PTX-NEXT: .reg .b64 %rd<7>;
|
||||
; PTX-NEXT: .reg .b64 %rd<6>;
|
||||
; PTX-EMPTY:
|
||||
; PTX-NEXT: // %bb.0: // %entry
|
||||
; PTX-NEXT: mov.b64 %rd1, load_alignment_param_0;
|
||||
@ -45,10 +45,9 @@ define void @load_alignment(ptr nocapture readonly byval(%class.outer) align 8 %
|
||||
; PTX-NEXT: st.b32 [%rd3], %r3;
|
||||
; PTX-NEXT: { // callseq 0, 0
|
||||
; PTX-NEXT: .param .b64 param0;
|
||||
; PTX-NEXT: st.param.b64 [param0], %rd5;
|
||||
; PTX-NEXT: .param .b64 retval0;
|
||||
; PTX-NEXT: st.param.b64 [param0], %rd5;
|
||||
; PTX-NEXT: call.uni (retval0), escape, (param0);
|
||||
; PTX-NEXT: ld.param.b64 %rd6, [retval0];
|
||||
; PTX-NEXT: } // callseq 0
|
||||
; PTX-NEXT: ret;
|
||||
entry:
|
||||
@ -76,17 +75,16 @@ define void @load_padding(ptr nocapture readonly byval(%class.padded) %arg) {
|
||||
;
|
||||
; PTX-LABEL: load_padding(
|
||||
; PTX: {
|
||||
; PTX-NEXT: .reg .b64 %rd<4>;
|
||||
; PTX-NEXT: .reg .b64 %rd<3>;
|
||||
; PTX-EMPTY:
|
||||
; PTX-NEXT: // %bb.0:
|
||||
; PTX-NEXT: mov.b64 %rd1, load_padding_param_0;
|
||||
; PTX-NEXT: cvta.local.u64 %rd2, %rd1;
|
||||
; PTX-NEXT: { // callseq 1, 0
|
||||
; PTX-NEXT: .param .b64 param0;
|
||||
; PTX-NEXT: st.param.b64 [param0], %rd2;
|
||||
; PTX-NEXT: .param .b64 retval0;
|
||||
; PTX-NEXT: st.param.b64 [param0], %rd2;
|
||||
; PTX-NEXT: call.uni (retval0), escape, (param0);
|
||||
; PTX-NEXT: ld.param.b64 %rd3, [retval0];
|
||||
; PTX-NEXT: } // callseq 1
|
||||
; PTX-NEXT: ret;
|
||||
%tmp = call ptr @escape(ptr nonnull align 16 %arg)
|
||||
|
@ -911,9 +911,9 @@ define void @device_func(ptr byval(i32) align 4 %input) {
|
||||
; PTX-NEXT: .reg .b64 %rd<2>;
|
||||
; PTX-EMPTY:
|
||||
; PTX-NEXT: // %bb.0:
|
||||
; PTX-NEXT: ld.param.b32 %r1, [device_func_param_0];
|
||||
; PTX-NEXT: { // callseq 3, 0
|
||||
; PTX-NEXT: .param .align 4 .b8 param0[4];
|
||||
; PTX-NEXT: ld.param.b32 %r1, [device_func_param_0];
|
||||
; PTX-NEXT: st.param.b32 [param0], %r1;
|
||||
; PTX-NEXT: call.uni device_func, (param0);
|
||||
; PTX-NEXT: } // callseq 3
|
||||
|
@ -8,7 +8,7 @@ define ptx_kernel void @wombat(i32 %arg, i32 %arg1, i32 %arg2) {
|
||||
; CHECK-LABEL: wombat(
|
||||
; CHECK: {
|
||||
; CHECK-NEXT: .reg .b32 %r<11>;
|
||||
; CHECK-NEXT: .reg .b64 %rd<6>;
|
||||
; CHECK-NEXT: .reg .b64 %rd<5>;
|
||||
; CHECK-EMPTY:
|
||||
; CHECK-NEXT: // %bb.0: // %bb
|
||||
; CHECK-NEXT: ld.param.b32 %r4, [wombat_param_2];
|
||||
@ -19,19 +19,18 @@ define ptx_kernel void @wombat(i32 %arg, i32 %arg1, i32 %arg2) {
|
||||
; CHECK-NEXT: // =>This Inner Loop Header: Depth=1
|
||||
; CHECK-NEXT: { // callseq 0, 0
|
||||
; CHECK-NEXT: .param .b64 param0;
|
||||
; CHECK-NEXT: st.param.b64 [param0], 0d0000000000000000;
|
||||
; CHECK-NEXT: .param .b64 retval0;
|
||||
; CHECK-NEXT: st.param.b64 [param0], 0;
|
||||
; CHECK-NEXT: call.uni (retval0), quux, (param0);
|
||||
; CHECK-NEXT: ld.param.b64 %rd1, [retval0];
|
||||
; CHECK-NEXT: } // callseq 0
|
||||
; CHECK-NEXT: mul.lo.s32 %r7, %r10, %r3;
|
||||
; CHECK-NEXT: or.b32 %r8, %r4, %r7;
|
||||
; CHECK-NEXT: mul.lo.s32 %r9, %r2, %r8;
|
||||
; CHECK-NEXT: cvt.rn.f64.s32 %rd2, %r9;
|
||||
; CHECK-NEXT: cvt.rn.f64.u32 %rd3, %r10;
|
||||
; CHECK-NEXT: add.rn.f64 %rd4, %rd3, %rd2;
|
||||
; CHECK-NEXT: mov.b64 %rd5, 0;
|
||||
; CHECK-NEXT: st.global.b64 [%rd5], %rd4;
|
||||
; CHECK-NEXT: cvt.rn.f64.s32 %rd1, %r9;
|
||||
; CHECK-NEXT: cvt.rn.f64.u32 %rd2, %r10;
|
||||
; CHECK-NEXT: add.rn.f64 %rd3, %rd2, %rd1;
|
||||
; CHECK-NEXT: mov.b64 %rd4, 0;
|
||||
; CHECK-NEXT: st.global.b64 [%rd4], %rd3;
|
||||
; CHECK-NEXT: mov.b32 %r10, 1;
|
||||
; CHECK-NEXT: bra.uni $L__BB0_1;
|
||||
bb:
|
||||
|
@ -18,16 +18,16 @@ define i32 @test(%struct.1float alignstack(32) %data) {
|
||||
; CHECK-EMPTY:
|
||||
; CHECK-NEXT: // %bb.0:
|
||||
; CHECK-NEXT: ld.param.b32 %r1, [test_param_0];
|
||||
; CHECK-NEXT: shr.u32 %r2, %r1, 8;
|
||||
; CHECK-NEXT: shr.u32 %r3, %r1, 16;
|
||||
; CHECK-NEXT: shr.u32 %r4, %r1, 24;
|
||||
; CHECK-NEXT: { // callseq 0, 0
|
||||
; CHECK-NEXT: .param .align 1 .b8 param0[4];
|
||||
; CHECK-NEXT: st.param.b8 [param0], %r1;
|
||||
; CHECK-NEXT: st.param.b8 [param0+1], %r2;
|
||||
; CHECK-NEXT: st.param.b8 [param0+2], %r3;
|
||||
; CHECK-NEXT: st.param.b8 [param0+3], %r4;
|
||||
; CHECK-NEXT: .param .b32 retval0;
|
||||
; CHECK-NEXT: st.param.b8 [param0], %r1;
|
||||
; CHECK-NEXT: shr.u32 %r2, %r1, 8;
|
||||
; CHECK-NEXT: st.param.b8 [param0+1], %r2;
|
||||
; CHECK-NEXT: shr.u32 %r3, %r1, 16;
|
||||
; CHECK-NEXT: st.param.b8 [param0+2], %r3;
|
||||
; CHECK-NEXT: shr.u32 %r4, %r3, 8;
|
||||
; CHECK-NEXT: st.param.b8 [param0+3], %r4;
|
||||
; CHECK-NEXT: call.uni (retval0), callee, (param0);
|
||||
; CHECK-NEXT: ld.param.b32 %r5, [retval0];
|
||||
; CHECK-NEXT: } // callseq 0
|
||||
|
@ -27,10 +27,10 @@
|
||||
; CHECK: ld.param.b8 [[A8:%rs[0-9]+]], [test_i1_param_0];
|
||||
; CHECK: and.b16 [[A:%rs[0-9]+]], [[A8]], 1;
|
||||
; CHECK: setp.ne.b16 %p1, [[A]], 0
|
||||
; CHECK-DAG: .param .b32 param0;
|
||||
; CHECK-DAG: .param .b32 retval0;
|
||||
; CHECK: cvt.u32.u16 [[B:%r[0-9]+]], [[A8]]
|
||||
; CHECK: .param .b32 param0;
|
||||
; CHECK: st.param.b32 [param0], [[B]]
|
||||
; CHECK: .param .b32 retval0;
|
||||
; CHECK-DAG: st.param.b32 [param0], [[B]]
|
||||
; CHECK: call.uni (retval0), test_i1,
|
||||
; CHECK: ld.param.b32 [[R8:%r[0-9]+]], [retval0];
|
||||
; CHECK: st.param.b32 [func_retval0], [[R8]];
|
||||
@ -47,11 +47,11 @@ define i1 @test_i1(i1 %a) {
|
||||
; CHECK-NEXT: .param .b32 test_i1s_param_0
|
||||
; CHECK: ld.param.b8 [[A8:%rs[0-9]+]], [test_i1s_param_0];
|
||||
; CHECK: cvt.u32.u16 [[A32:%r[0-9]+]], [[A8]];
|
||||
; CHECK: .param .b32 param0;
|
||||
; CHECK: .param .b32 retval0;
|
||||
; CHECK: and.b32 [[A1:%r[0-9]+]], [[A32]], 1;
|
||||
; CHECK: neg.s32 [[A:%r[0-9]+]], [[A1]];
|
||||
; CHECK: .param .b32 param0;
|
||||
; CHECK: st.param.b32 [param0], [[A]];
|
||||
; CHECK: .param .b32 retval0;
|
||||
; CHECK: call.uni
|
||||
; CHECK: ld.param.b32 [[R8:%r[0-9]+]], [retval0];
|
||||
; CHECK: and.b32 [[R1:%r[0-9]+]], [[R8]], 1;
|
||||
@ -70,9 +70,9 @@ define signext i1 @test_i1s(i1 signext %a) {
|
||||
; CHECK-DAG: ld.param.b8 [[E2:%rs[0-9]+]], [test_v3i1_param_0+2];
|
||||
; CHECK-DAG: ld.param.b8 [[E0:%rs[0-9]+]], [test_v3i1_param_0]
|
||||
; CHECK: .param .align 1 .b8 param0[1];
|
||||
; CHECK: .param .align 1 .b8 retval0[1];
|
||||
; CHECK-DAG: st.param.b8 [param0], [[E0]];
|
||||
; CHECK-DAG: st.param.b8 [param0+2], [[E2]];
|
||||
; CHECK: .param .align 1 .b8 retval0[1];
|
||||
; CHECK: call.uni (retval0), test_v3i1,
|
||||
; CHECK-DAG: ld.param.b8 [[RE0:%rs[0-9]+]], [retval0];
|
||||
; CHECK-DAG: ld.param.b8 [[RE2:%rs[0-9]+]], [retval0+2];
|
||||
@ -89,8 +89,8 @@ define <3 x i1> @test_v3i1(<3 x i1> %a) {
|
||||
; CHECK-NEXT: .param .align 1 .b8 test_v4i1_param_0[1]
|
||||
; CHECK: ld.param.b8 [[E0:%rs[0-9]+]], [test_v4i1_param_0]
|
||||
; CHECK: .param .align 1 .b8 param0[1];
|
||||
; CHECK: st.param.b8 [param0], [[E0]];
|
||||
; CHECK: .param .align 1 .b8 retval0[1];
|
||||
; CHECK: st.param.b8 [param0], [[E0]];
|
||||
; CHECK: call.uni (retval0), test_v4i1,
|
||||
; CHECK: ld.param.b8 [[RE0:%rs[0-9]+]], [retval0];
|
||||
; CHECK: ld.param.b8 [[RE1:%rs[0-9]+]], [retval0+1];
|
||||
@ -112,9 +112,9 @@ define <4 x i1> @test_v4i1(<4 x i1> %a) {
|
||||
; CHECK-DAG: ld.param.b8 [[E4:%rs[0-9]+]], [test_v5i1_param_0+4];
|
||||
; CHECK-DAG: ld.param.b8 [[E0:%rs[0-9]+]], [test_v5i1_param_0]
|
||||
; CHECK: .param .align 1 .b8 param0[1];
|
||||
; CHECK: .param .align 1 .b8 retval0[1];
|
||||
; CHECK-DAG: st.param.b8 [param0], [[E0]];
|
||||
; CHECK-DAG: st.param.b8 [param0+4], [[E4]];
|
||||
; CHECK: .param .align 1 .b8 retval0[1];
|
||||
; CHECK: call.uni (retval0), test_v5i1,
|
||||
; CHECK-DAG: ld.param.b8 [[RE0:%rs[0-9]+]], [retval0];
|
||||
; CHECK-DAG: ld.param.b8 [[RE4:%rs[0-9]+]], [retval0+4];
|
||||
@ -131,8 +131,8 @@ define <5 x i1> @test_v5i1(<5 x i1> %a) {
|
||||
; CHECK-NEXT: .param .b32 test_i2_param_0
|
||||
; CHECK: ld.param.b8 {{%rs[0-9]+}}, [test_i2_param_0];
|
||||
; CHECK: .param .b32 param0;
|
||||
; CHECK: st.param.b32 [param0], {{%r[0-9]+}};
|
||||
; CHECK: .param .b32 retval0;
|
||||
; CHECK: st.param.b32 [param0], {{%r[0-9]+}};
|
||||
; CHECK: call.uni (retval0), test_i2,
|
||||
; CHECK: ld.param.b32 {{%r[0-9]+}}, [retval0];
|
||||
; CHECK: st.param.b32 [func_retval0], {{%r[0-9]+}};
|
||||
@ -147,8 +147,8 @@ define i2 @test_i2(i2 %a) {
|
||||
; CHECK-NEXT: .param .b32 test_i3_param_0
|
||||
; CHECK: ld.param.b8 {{%rs[0-9]+}}, [test_i3_param_0];
|
||||
; CHECK: .param .b32 param0;
|
||||
; CHECK: st.param.b32 [param0], {{%r[0-9]+}};
|
||||
; CHECK: .param .b32 retval0;
|
||||
; CHECK: st.param.b32 [param0], {{%r[0-9]+}};
|
||||
; CHECK: call.uni (retval0), test_i3,
|
||||
; CHECK: ld.param.b32 {{%r[0-9]+}}, [retval0];
|
||||
; CHECK: st.param.b32 [func_retval0], {{%r[0-9]+}};
|
||||
@ -163,10 +163,10 @@ define i3 @test_i3(i3 %a) {
|
||||
; CHECK-LABEL: test_i8(
|
||||
; CHECK-NEXT: .param .b32 test_i8_param_0
|
||||
; CHECK: ld.param.b8 [[A8:%rs[0-9]+]], [test_i8_param_0];
|
||||
; CHECK: cvt.u32.u16 [[A32:%r[0-9]+]], [[A8]];
|
||||
; CHECK: .param .b32 param0;
|
||||
; CHECK: st.param.b32 [param0], [[A32]];
|
||||
; CHECK: .param .b32 retval0;
|
||||
; CHECK: cvt.u32.u16 [[A32:%r[0-9]+]], [[A8]];
|
||||
; CHECK: st.param.b32 [param0], [[A32]];
|
||||
; CHECK: call.uni (retval0), test_i8,
|
||||
; CHECK: ld.param.b32 [[R32:%r[0-9]+]], [retval0];
|
||||
; CHECK: st.param.b32 [func_retval0], [[R32]];
|
||||
@ -181,10 +181,10 @@ define i8 @test_i8(i8 %a) {
|
||||
; CHECK-LABEL: test_i8s(
|
||||
; CHECK-NEXT: .param .b32 test_i8s_param_0
|
||||
; CHECK: ld.param.s8 [[A8:%rs[0-9]+]], [test_i8s_param_0];
|
||||
; CHECK: cvt.s32.s16 [[A:%r[0-9]+]], [[A8]];
|
||||
; CHECK: .param .b32 param0;
|
||||
; CHECK: st.param.b32 [param0], [[A]];
|
||||
; CHECK: .param .b32 retval0;
|
||||
; CHECK: cvt.s32.s16 [[A:%r[0-9]+]], [[A8]];
|
||||
; CHECK: st.param.b32 [param0], [[A]];
|
||||
; CHECK: call.uni (retval0), test_i8s,
|
||||
; CHECK: ld.param.b32 [[R32:%r[0-9]+]], [retval0];
|
||||
; -- This is suspicious (though correct) -- why not cvt.u8.u32, cvt.s8.s32 ?
|
||||
@ -202,8 +202,8 @@ define signext i8 @test_i8s(i8 signext %a) {
|
||||
; CHECK-NEXT: .param .align 4 .b8 test_v3i8_param_0[4]
|
||||
; CHECK: ld.param.b32 [[R:%r[0-9]+]], [test_v3i8_param_0];
|
||||
; CHECK: .param .align 4 .b8 param0[4];
|
||||
; CHECK: st.param.b32 [param0], [[R]]
|
||||
; CHECK: .param .align 4 .b8 retval0[4];
|
||||
; CHECK: st.param.b32 [param0], [[R]]
|
||||
; CHECK: call.uni (retval0), test_v3i8,
|
||||
; CHECK: ld.param.b32 [[RE:%r[0-9]+]], [retval0];
|
||||
; v4i8/i32->{v3i8 elements}->v4i8/i32 conversion is messy and not very
|
||||
@ -220,8 +220,8 @@ define <3 x i8> @test_v3i8(<3 x i8> %a) {
|
||||
; CHECK-NEXT: .param .align 4 .b8 test_v4i8_param_0[4]
|
||||
; CHECK: ld.param.b32 [[R:%r[0-9]+]], [test_v4i8_param_0]
|
||||
; CHECK: .param .align 4 .b8 param0[4];
|
||||
; CHECK: st.param.b32 [param0], [[R]];
|
||||
; CHECK: .param .align 4 .b8 retval0[4];
|
||||
; CHECK: st.param.b32 [param0], [[R]];
|
||||
; CHECK: call.uni (retval0), test_v4i8,
|
||||
; CHECK: ld.param.b32 [[RET:%r[0-9]+]], [retval0];
|
||||
; CHECK: st.param.b32 [func_retval0], [[RET]];
|
||||
@ -237,20 +237,13 @@ define <4 x i8> @test_v4i8(<4 x i8> %a) {
|
||||
; CHECK-DAG: ld.param.b32 [[E0:%r[0-9]+]], [test_v5i8_param_0]
|
||||
; CHECK-DAG: ld.param.b8 [[E4:%rs[0-9]+]], [test_v5i8_param_0+4];
|
||||
; CHECK: .param .align 8 .b8 param0[8];
|
||||
; CHECK-DAG: st.param.v4.b8 [param0],
|
||||
; CHECK-DAG: st.param.b8 [param0+4], [[E4]];
|
||||
; CHECK: .param .align 8 .b8 retval0[8];
|
||||
; CHECK-DAG: st.param.b32 [param0], [[E0]];
|
||||
; CHECK-DAG: st.param.b8 [param0+4], [[E4]];
|
||||
; CHECK: call.uni (retval0), test_v5i8,
|
||||
; CHECK-DAG: ld.param.v4.b8 {[[RE0:%rs[0-9]+]], [[RE1:%rs[0-9]+]], [[RE2:%rs[0-9]+]], [[RE3:%rs[0-9]+]]}, [retval0];
|
||||
; CHECK-DAG: ld.param.b32 [[RE0:%r[0-9]+]], [retval0];
|
||||
; CHECK-DAG: ld.param.b8 [[RE4:%rs[0-9]+]], [retval0+4];
|
||||
; CHECK-DAG: cvt.u32.u16 [[R3:%r[0-9]+]], [[RE3]];
|
||||
; CHECK-DAG: cvt.u32.u16 [[R2:%r[0-9]+]], [[RE2]];
|
||||
; CHECK-DAG: prmt.b32 [[P0:%r[0-9]+]], [[R2]], [[R3]], 0x3340U;
|
||||
; CHECK-DAG: cvt.u32.u16 [[R1:%r[0-9]+]], [[RE1]];
|
||||
; CHECK-DAG: cvt.u32.u16 [[R0:%r[0-9]+]], [[RE0]];
|
||||
; CHECK-DAG: prmt.b32 [[P1:%r[0-9]+]], [[R0]], [[R1]], 0x3340U;
|
||||
; CHECK-DAG: prmt.b32 [[P2:%r[0-9]+]], [[P1]], [[P0]], 0x5410U;
|
||||
; CHECK-DAG: st.param.b32 [func_retval0], [[P2]];
|
||||
; CHECK-DAG: st.param.b32 [func_retval0], [[RE0]];
|
||||
; CHECK-DAG: st.param.b8 [func_retval0+4], [[RE4]];
|
||||
; CHECK-NEXT: ret;
|
||||
define <5 x i8> @test_v5i8(<5 x i8> %a) {
|
||||
@ -262,8 +255,8 @@ define <5 x i8> @test_v5i8(<5 x i8> %a) {
|
||||
; CHECK-LABEL: test_i11(
|
||||
; CHECK-NEXT: .param .b32 test_i11_param_0
|
||||
; CHECK: ld.param.b16 {{%rs[0-9]+}}, [test_i11_param_0];
|
||||
; CHECK: st.param.b32 [param0], {{%r[0-9]+}};
|
||||
; CHECK: .param .b32 retval0;
|
||||
; CHECK: st.param.b32 [param0], {{%r[0-9]+}};
|
||||
; CHECK: call.uni (retval0), test_i11,
|
||||
; CHECK: ld.param.b32 {{%r[0-9]+}}, [retval0];
|
||||
; CHECK: st.param.b32 [func_retval0], {{%r[0-9]+}};
|
||||
@ -277,10 +270,10 @@ define i11 @test_i11(i11 %a) {
|
||||
; CHECK-LABEL: test_i16(
|
||||
; CHECK-NEXT: .param .b32 test_i16_param_0
|
||||
; CHECK: ld.param.b16 [[E16:%rs[0-9]+]], [test_i16_param_0];
|
||||
; CHECK: cvt.u32.u16 [[E32:%r[0-9]+]], [[E16]];
|
||||
; CHECK: .param .b32 param0;
|
||||
; CHECK: st.param.b32 [param0], [[E32]];
|
||||
; CHECK: .param .b32 retval0;
|
||||
; CHECK: cvt.u32.u16 [[E32:%r[0-9]+]], [[E16]];
|
||||
; CHECK: st.param.b32 [param0], [[E32]];
|
||||
; CHECK: call.uni (retval0), test_i16,
|
||||
; CHECK: ld.param.b32 [[RE32:%r[0-9]+]], [retval0];
|
||||
; CHECK: st.param.b32 [func_retval0], [[RE32]];
|
||||
@ -294,10 +287,10 @@ define i16 @test_i16(i16 %a) {
|
||||
; CHECK-LABEL: test_i16s(
|
||||
; CHECK-NEXT: .param .b32 test_i16s_param_0
|
||||
; CHECK: ld.param.b16 [[E16:%rs[0-9]+]], [test_i16s_param_0];
|
||||
; CHECK: cvt.s32.s16 [[E32:%r[0-9]+]], [[E16]];
|
||||
; CHECK: .param .b32 param0;
|
||||
; CHECK: st.param.b32 [param0], [[E32]];
|
||||
; CHECK: .param .b32 retval0;
|
||||
; CHECK: cvt.s32.s16 [[E32:%r[0-9]+]], [[E16]];
|
||||
; CHECK: st.param.b32 [param0], [[E32]];
|
||||
; CHECK: call.uni (retval0), test_i16s,
|
||||
; CHECK: ld.param.b32 [[RE32:%r[0-9]+]], [retval0];
|
||||
; CHECK: cvt.s32.s16 [[R:%r[0-9]+]], [[RE32]];
|
||||
@ -312,14 +305,15 @@ define signext i16 @test_i16s(i16 signext %a) {
|
||||
; CHECK-LABEL: test_v3i16(
|
||||
; CHECK-NEXT: .param .align 8 .b8 test_v3i16_param_0[8]
|
||||
; CHECK-DAG: ld.param.b16 [[E2:%rs[0-9]+]], [test_v3i16_param_0+4];
|
||||
; CHECK-DAG: ld.param.v2.b16 {[[E0:%rs[0-9]+]], [[E1:%rs[0-9]+]]}, [test_v3i16_param_0];
|
||||
; CHECK-DAG: ld.param.b32 [[E0:%r[0-9]+]], [test_v3i16_param_0];
|
||||
; CHECK: .param .align 8 .b8 param0[8];
|
||||
; CHECK: st.param.v2.b16 [param0], {[[E0]], [[E1]]};
|
||||
; CHECK: st.param.b16 [param0+4], [[E2]];
|
||||
; CHECK: .param .align 8 .b8 retval0[8];
|
||||
; CHECK-DAG: st.param.b32 [param0], [[E0]];
|
||||
; CHECK-DAG: st.param.b16 [param0+4], [[E2]];
|
||||
; CHECK: call.uni (retval0), test_v3i16,
|
||||
; CHECK: ld.param.v2.b16 {[[RE0:%rs[0-9]+]], [[RE1:%rs[0-9]+]]}, [retval0];
|
||||
; CHECK: ld.param.b32 [[RE:%r[0-9]+]], [retval0];
|
||||
; CHECK: ld.param.b16 [[RE2:%rs[0-9]+]], [retval0+4];
|
||||
; CHECK-DAG: mov.b32 {[[RE0:%rs[0-9]+]], [[RE1:%rs[0-9]+]]}, [[RE]];
|
||||
; CHECK-DAG: st.param.v2.b16 [func_retval0], {[[RE0]], [[RE1]]};
|
||||
; CHECK-DAG: st.param.b16 [func_retval0+4], [[RE2]];
|
||||
; CHECK-NEXT: ret;
|
||||
@ -333,8 +327,8 @@ define <3 x i16> @test_v3i16(<3 x i16> %a) {
|
||||
; CHECK-NEXT: .param .align 8 .b8 test_v4i16_param_0[8]
|
||||
; CHECK: ld.param.v2.b32 {[[E0:%r[0-9]+]], [[E1:%r[0-9]+]]}, [test_v4i16_param_0]
|
||||
; CHECK: .param .align 8 .b8 param0[8];
|
||||
; CHECK: st.param.v2.b32 [param0], {[[E0]], [[E1]]};
|
||||
; CHECK: .param .align 8 .b8 retval0[8];
|
||||
; CHECK: st.param.v2.b32 [param0], {[[E0]], [[E1]]};
|
||||
; CHECK: call.uni (retval0), test_v4i16,
|
||||
; CHECK: ld.param.v2.b32 {[[RE0:%r[0-9]+]], [[RE1:%r[0-9]+]]}, [retval0];
|
||||
; CHECK: st.param.v2.b32 [func_retval0], {[[RE0]], [[RE1]]}
|
||||
@ -348,15 +342,15 @@ define <4 x i16> @test_v4i16(<4 x i16> %a) {
|
||||
; CHECK-LABEL: test_v5i16(
|
||||
; CHECK-NEXT: .param .align 16 .b8 test_v5i16_param_0[16]
|
||||
; CHECK-DAG: ld.param.b16 [[E4:%rs[0-9]+]], [test_v5i16_param_0+8];
|
||||
; CHECK-DAG: ld.param.v4.b16 {[[E0:%rs[0-9]+]], [[E1:%rs[0-9]+]], [[E2:%rs[0-9]+]], [[E3:%rs[0-9]+]]}, [test_v5i16_param_0]
|
||||
; CHECK-DAG: ld.param.v2.b32 {[[E0:%r[0-9]+]], [[E1:%r[0-9]+]]}, [test_v5i16_param_0]
|
||||
; CHECK: .param .align 16 .b8 param0[16];
|
||||
; CHECK-DAG: st.param.v4.b16 [param0], {[[E0]], [[E1]], [[E2]], [[E3]]};
|
||||
; CHECK-DAG: st.param.b16 [param0+8], [[E4]];
|
||||
; CHECK: .param .align 16 .b8 retval0[16];
|
||||
; CHECK-DAG: st.param.v2.b32 [param0], {[[E0]], [[E1]]};
|
||||
; CHECK-DAG: st.param.b16 [param0+8], [[E4]];
|
||||
; CHECK: call.uni (retval0), test_v5i16,
|
||||
; CHECK-DAG: ld.param.v4.b16 {[[RE0:%rs[0-9]+]], [[RE1:%rs[0-9]+]], [[RE2:%rs[0-9]+]], [[RE3:%rs[0-9]+]]}, [retval0];
|
||||
; CHECK-DAG: ld.param.v2.b32 {[[RE0:%r[0-9]+]], [[RE1:%r[0-9]+]]}, [retval0];
|
||||
; CHECK-DAG: ld.param.b16 [[RE4:%rs[0-9]+]], [retval0+8];
|
||||
; CHECK-DAG: st.param.v4.b16 [func_retval0], {[[RE0]], [[RE1]], [[RE2]], [[RE3]]}
|
||||
; CHECK-DAG: st.param.v2.b32 [func_retval0], {[[RE0]], [[RE1]]}
|
||||
; CHECK-DAG: st.param.b16 [func_retval0+8], [[RE4]];
|
||||
; CHECK-NEXT: ret;
|
||||
define <5 x i16> @test_v5i16(<5 x i16> %a) {
|
||||
@ -369,8 +363,8 @@ define <5 x i16> @test_v5i16(<5 x i16> %a) {
|
||||
; CHECK-NEXT: .param .align 2 .b8 test_f16_param_0[2]
|
||||
; CHECK: ld.param.b16 [[E:%rs[0-9]+]], [test_f16_param_0];
|
||||
; CHECK: .param .align 2 .b8 param0[2];
|
||||
; CHECK: st.param.b16 [param0], [[E]];
|
||||
; CHECK: .param .align 2 .b8 retval0[2];
|
||||
; CHECK: st.param.b16 [param0], [[E]];
|
||||
; CHECK: call.uni (retval0), test_f16,
|
||||
; CHECK: ld.param.b16 [[R:%rs[0-9]+]], [retval0];
|
||||
; CHECK: st.param.b16 [func_retval0], [[R]]
|
||||
@ -385,8 +379,8 @@ define half @test_f16(half %a) {
|
||||
; CHECK-NEXT: .param .align 4 .b8 test_v2f16_param_0[4]
|
||||
; CHECK: ld.param.b32 [[E:%r[0-9]+]], [test_v2f16_param_0];
|
||||
; CHECK: .param .align 4 .b8 param0[4];
|
||||
; CHECK: st.param.b32 [param0], [[E]];
|
||||
; CHECK: .param .align 4 .b8 retval0[4];
|
||||
; CHECK: st.param.b32 [param0], [[E]];
|
||||
; CHECK: call.uni (retval0), test_v2f16,
|
||||
; CHECK: ld.param.b32 [[R:%r[0-9]+]], [retval0];
|
||||
; CHECK: st.param.b32 [func_retval0], [[R]]
|
||||
@ -401,8 +395,8 @@ define <2 x half> @test_v2f16(<2 x half> %a) {
|
||||
; CHECK-NEXT: .param .align 2 .b8 test_bf16_param_0[2]
|
||||
; CHECK: ld.param.b16 [[E:%rs[0-9]+]], [test_bf16_param_0];
|
||||
; CHECK: .param .align 2 .b8 param0[2];
|
||||
; CHECK: st.param.b16 [param0], [[E]];
|
||||
; CHECK: .param .align 2 .b8 retval0[2];
|
||||
; CHECK: st.param.b16 [param0], [[E]];
|
||||
; CHECK: call.uni (retval0), test_bf16,
|
||||
; CHECK: ld.param.b16 [[R:%rs[0-9]+]], [retval0];
|
||||
; CHECK: st.param.b16 [func_retval0], [[R]]
|
||||
@ -417,8 +411,8 @@ define bfloat @test_bf16(bfloat %a) {
|
||||
; CHECK-NEXT: .param .align 4 .b8 test_v2bf16_param_0[4]
|
||||
; CHECK: ld.param.b32 [[E:%r[0-9]+]], [test_v2bf16_param_0];
|
||||
; CHECK: .param .align 4 .b8 param0[4];
|
||||
; CHECK: st.param.b32 [param0], [[E]];
|
||||
; CHECK: .param .align 4 .b8 retval0[4];
|
||||
; CHECK: st.param.b32 [param0], [[E]];
|
||||
; CHECK: call.uni (retval0), test_v2bf16,
|
||||
; CHECK: ld.param.b32 [[R:%r[0-9]+]], [retval0];
|
||||
; CHECK: st.param.b32 [func_retval0], [[R]]
|
||||
@ -432,15 +426,16 @@ define <2 x bfloat> @test_v2bf16(<2 x bfloat> %a) {
|
||||
; CHECK:.func (.param .align 8 .b8 func_retval0[8])
|
||||
; CHECK-LABEL: test_v3f16(
|
||||
; CHECK: .param .align 8 .b8 test_v3f16_param_0[8]
|
||||
; CHECK-DAG: ld.param.v2.b16 {[[E0:%rs[0-9]+]], [[E1:%rs[0-9]+]]}, [test_v3f16_param_0];
|
||||
; CHECK-DAG: ld.param.b32 [[E0:%r[0-9]+]], [test_v3f16_param_0];
|
||||
; CHECK-DAG: ld.param.b16 [[E2:%rs[0-9]+]], [test_v3f16_param_0+4];
|
||||
; CHECK: .param .align 8 .b8 param0[8];
|
||||
; CHECK-DAG: st.param.v2.b16 [param0], {[[E0]], [[E1]]};
|
||||
; CHECK-DAG: st.param.b16 [param0+4], [[E2]];
|
||||
; CHECK: .param .align 8 .b8 retval0[8];
|
||||
; CHECK-DAG: st.param.b32 [param0], [[E0]];
|
||||
; CHECK-DAG: st.param.b16 [param0+4], [[E2]];
|
||||
; CHECK: call.uni (retval0), test_v3f16,
|
||||
; CHECK-DAG: ld.param.v2.b16 {[[R0:%rs[0-9]+]], [[R1:%rs[0-9]+]]}, [retval0];
|
||||
; CHECK-DAG: ld.param.b32 [[R:%r[0-9]+]], [retval0];
|
||||
; CHECK-DAG: ld.param.b16 [[R2:%rs[0-9]+]], [retval0+4];
|
||||
; CHECK-DAG: mov.b32 {[[R0:%rs[0-9]+]], [[R1:%rs[0-9]+]]}, [[R]];
|
||||
; CHECK-DAG: st.param.v2.b16 [func_retval0], {[[R0]], [[R1]]};
|
||||
; CHECK-DAG: st.param.b16 [func_retval0+4], [[R2]];
|
||||
; CHECK: ret;
|
||||
@ -454,8 +449,8 @@ define <3 x half> @test_v3f16(<3 x half> %a) {
|
||||
; CHECK: .param .align 8 .b8 test_v4f16_param_0[8]
|
||||
; CHECK: ld.param.v2.b32 {[[R01:%r[0-9]+]], [[R23:%r[0-9]+]]}, [test_v4f16_param_0];
|
||||
; CHECK: .param .align 8 .b8 param0[8];
|
||||
; CHECK: st.param.v2.b32 [param0], {[[R01]], [[R23]]};
|
||||
; CHECK: .param .align 8 .b8 retval0[8];
|
||||
; CHECK: st.param.v2.b32 [param0], {[[R01]], [[R23]]};
|
||||
; CHECK: call.uni (retval0), test_v4f16,
|
||||
; CHECK: ld.param.v2.b32 {[[RH01:%r[0-9]+]], [[RH23:%r[0-9]+]]}, [retval0];
|
||||
; CHECK: st.param.v2.b32 [func_retval0], {[[RH01]], [[RH23]]};
|
||||
@ -468,16 +463,16 @@ define <4 x half> @test_v4f16(<4 x half> %a) {
|
||||
; CHECK:.func (.param .align 16 .b8 func_retval0[16])
|
||||
; CHECK-LABEL: test_v5f16(
|
||||
; CHECK: .param .align 16 .b8 test_v5f16_param_0[16]
|
||||
; CHECK-DAG: ld.param.v4.b16 {[[E0:%rs[0-9]+]], [[E1:%rs[0-9]+]], [[E2:%rs[0-9]+]], [[E3:%rs[0-9]+]]}, [test_v5f16_param_0];
|
||||
; CHECK-DAG: ld.param.v2.b32 {[[E0:%r[0-9]+]], [[E1:%r[0-9]+]]}, [test_v5f16_param_0];
|
||||
; CHECK-DAG: ld.param.b16 [[E4:%rs[0-9]+]], [test_v5f16_param_0+8];
|
||||
; CHECK: .param .align 16 .b8 param0[16];
|
||||
; CHECK-DAG: st.param.v4.b16 [param0],
|
||||
; CHECK-DAG: st.param.b16 [param0+8], [[E4]];
|
||||
; CHECK: .param .align 16 .b8 retval0[16];
|
||||
; CHECK-DAG: st.param.v2.b32 [param0], {[[E0]], [[E1]]};
|
||||
; CHECK-DAG: st.param.b16 [param0+8], [[E4]];
|
||||
; CHECK: call.uni (retval0), test_v5f16,
|
||||
; CHECK-DAG: ld.param.v4.b16 {[[R0:%rs[0-9]+]], [[R1:%rs[0-9]+]], [[R2:%rs[0-9]+]], [[R3:%rs[0-9]+]]}, [retval0];
|
||||
; CHECK-DAG: ld.param.v2.b32 {[[R0:%r[0-9]+]], [[R1:%r[0-9]+]]}, [retval0];
|
||||
; CHECK-DAG: ld.param.b16 [[R4:%rs[0-9]+]], [retval0+8];
|
||||
; CHECK-DAG: st.param.v4.b16 [func_retval0], {[[R0]], [[R1]], [[R2]], [[R3]]};
|
||||
; CHECK-DAG: st.param.v2.b32 [func_retval0], {[[R0]], [[R1]]};
|
||||
; CHECK-DAG: st.param.b16 [func_retval0+8], [[R4]];
|
||||
; CHECK: ret;
|
||||
define <5 x half> @test_v5f16(<5 x half> %a) {
|
||||
@ -490,8 +485,8 @@ define <5 x half> @test_v5f16(<5 x half> %a) {
|
||||
; CHECK: .param .align 16 .b8 test_v8f16_param_0[16]
|
||||
; CHECK: ld.param.v4.b32 {[[R01:%r[0-9]+]], [[R23:%r[0-9]+]], [[R45:%r[0-9]+]], [[R67:%r[0-9]+]]}, [test_v8f16_param_0];
|
||||
; CHECK: .param .align 16 .b8 param0[16];
|
||||
; CHECK: st.param.v4.b32 [param0], {[[R01]], [[R23]], [[R45]], [[R67]]};
|
||||
; CHECK: .param .align 16 .b8 retval0[16];
|
||||
; CHECK: st.param.v4.b32 [param0], {[[R01]], [[R23]], [[R45]], [[R67]]};
|
||||
; CHECK: call.uni (retval0), test_v8f16,
|
||||
; CHECK: ld.param.v4.b32 {[[RH01:%r[0-9]+]], [[RH23:%r[0-9]+]], [[RH45:%r[0-9]+]], [[RH67:%r[0-9]+]]}, [retval0];
|
||||
; CHECK: st.param.v4.b32 [func_retval0], {[[RH01]], [[RH23]], [[RH45]], [[RH67]]};
|
||||
@ -504,20 +499,20 @@ define <8 x half> @test_v8f16(<8 x half> %a) {
|
||||
; CHECK:.func (.param .align 32 .b8 func_retval0[32])
|
||||
; CHECK-LABEL: test_v9f16(
|
||||
; CHECK: .param .align 32 .b8 test_v9f16_param_0[32]
|
||||
; CHECK-DAG: ld.param.v4.b16 {[[E0:%rs[0-9]+]], [[E1:%rs[0-9]+]], [[E2:%rs[0-9]+]], [[E3:%rs[0-9]+]]}, [test_v9f16_param_0];
|
||||
; CHECK-DAG: ld.param.v4.b16 {[[E4:%rs[0-9]+]], [[E5:%rs[0-9]+]], [[E6:%rs[0-9]+]], [[E7:%rs[0-9]+]]}, [test_v9f16_param_0+8];
|
||||
; CHECK-DAG: ld.param.v2.b32 {[[E0:%r[0-9]+]], [[E1:%r[0-9]+]]}, [test_v9f16_param_0];
|
||||
; CHECK-DAG: ld.param.v2.b32 {[[E2:%r[0-9]+]], [[E3:%r[0-9]+]]}, [test_v9f16_param_0+8];
|
||||
; CHECK-DAG: ld.param.b16 [[E8:%rs[0-9]+]], [test_v9f16_param_0+16];
|
||||
; CHECK: .param .align 32 .b8 param0[32];
|
||||
; CHECK-DAG: st.param.v4.b16 [param0],
|
||||
; CHECK-DAG: st.param.v4.b16 [param0+8],
|
||||
; CHECK-DAG: st.param.b16 [param0+16], [[E8]];
|
||||
; CHECK: .param .align 32 .b8 retval0[32];
|
||||
; CHECK-DAG: st.param.v2.b32 [param0], {[[E0]], [[E1]]};
|
||||
; CHECK-DAG: st.param.v2.b32 [param0+8], {[[E2]], [[E3]]};
|
||||
; CHECK-DAG: st.param.b16 [param0+16], [[E8]];
|
||||
; CHECK: call.uni (retval0), test_v9f16,
|
||||
; CHECK-DAG: ld.param.v4.b16 {[[R0:%rs[0-9]+]], [[R1:%rs[0-9]+]], [[R2:%rs[0-9]+]], [[R3:%rs[0-9]+]]}, [retval0];
|
||||
; CHECK-DAG: ld.param.v4.b16 {[[R4:%rs[0-9]+]], [[R5:%rs[0-9]+]], [[R6:%rs[0-9]+]], [[R7:%rs[0-9]+]]}, [retval0+8];
|
||||
; CHECK-DAG: ld.param.v2.b32 {[[R0:%r[0-9]+]], [[R1:%r[0-9]+]]}, [retval0];
|
||||
; CHECK-DAG: ld.param.v2.b32 {[[R2:%r[0-9]+]], [[R3:%r[0-9]+]]}, [retval0+8];
|
||||
; CHECK-DAG: ld.param.b16 [[R8:%rs[0-9]+]], [retval0+16];
|
||||
; CHECK-DAG: st.param.v4.b16 [func_retval0], {[[R0]], [[R1]], [[R2]], [[R3]]};
|
||||
; CHECK-DAG: st.param.v4.b16 [func_retval0+8], {[[R4]], [[R5]], [[R6]], [[R7]]};
|
||||
; CHECK-DAG: st.param.v2.b32 [func_retval0], {[[R0]], [[R1]]};
|
||||
; CHECK-DAG: st.param.v2.b32 [func_retval0+8], {[[R2]], [[R3]]};
|
||||
; CHECK-DAG: st.param.b16 [func_retval0+16], [[R8]];
|
||||
; CHECK: ret;
|
||||
define <9 x half> @test_v9f16(<9 x half> %a) {
|
||||
@ -531,8 +526,8 @@ define <9 x half> @test_v9f16(<9 x half> %a) {
|
||||
; CHECK-DAG: ld.param.b16 {{%r[0-9]+}}, [test_i19_param_0];
|
||||
; CHECK-DAG: ld.param.b8 {{%r[0-9]+}}, [test_i19_param_0+2];
|
||||
; CHECK: .param .b32 param0;
|
||||
; CHECK: st.param.b32 [param0], {{%r[0-9]+}};
|
||||
; CHECK: .param .b32 retval0;
|
||||
; CHECK: st.param.b32 [param0], {{%r[0-9]+}};
|
||||
; CHECK: call.uni (retval0), test_i19,
|
||||
; CHECK: ld.param.b32 {{%r[0-9]+}}, [retval0];
|
||||
; CHECK: st.param.b32 [func_retval0], {{%r[0-9]+}};
|
||||
@ -548,8 +543,8 @@ define i19 @test_i19(i19 %a) {
|
||||
; CHECK-DAG: ld.param.b16 {{%r[0-9]+}}, [test_i23_param_0];
|
||||
; CHECK-DAG: ld.param.b8 {{%r[0-9]+}}, [test_i23_param_0+2];
|
||||
; CHECK: .param .b32 param0;
|
||||
; CHECK: st.param.b32 [param0], {{%r[0-9]+}};
|
||||
; CHECK: .param .b32 retval0;
|
||||
; CHECK: st.param.b32 [param0], {{%r[0-9]+}};
|
||||
; CHECK: call.uni (retval0), test_i23,
|
||||
; CHECK: ld.param.b32 {{%r[0-9]+}}, [retval0];
|
||||
; CHECK: st.param.b32 [func_retval0], {{%r[0-9]+}};
|
||||
@ -565,8 +560,8 @@ define i23 @test_i23(i23 %a) {
|
||||
; CHECK-DAG: ld.param.b8 {{%r[0-9]+}}, [test_i24_param_0+2];
|
||||
; CHECK-DAG: ld.param.b16 {{%r[0-9]+}}, [test_i24_param_0];
|
||||
; CHECK: .param .b32 param0;
|
||||
; CHECK: st.param.b32 [param0], {{%r[0-9]+}};
|
||||
; CHECK: .param .b32 retval0;
|
||||
; CHECK: st.param.b32 [param0], {{%r[0-9]+}};
|
||||
; CHECK: call.uni (retval0), test_i24,
|
||||
; CHECK: ld.param.b32 {{%r[0-9]+}}, [retval0];
|
||||
; CHECK: st.param.b32 [func_retval0], {{%r[0-9]+}};
|
||||
@ -581,8 +576,8 @@ define i24 @test_i24(i24 %a) {
|
||||
; CHECK-NEXT: .param .b32 test_i29_param_0
|
||||
; CHECK: ld.param.b32 {{%r[0-9]+}}, [test_i29_param_0];
|
||||
; CHECK: .param .b32 param0;
|
||||
; CHECK: st.param.b32 [param0], {{%r[0-9]+}};
|
||||
; CHECK: .param .b32 retval0;
|
||||
; CHECK: st.param.b32 [param0], {{%r[0-9]+}};
|
||||
; CHECK: call.uni (retval0), test_i29,
|
||||
; CHECK: ld.param.b32 {{%r[0-9]+}}, [retval0];
|
||||
; CHECK: st.param.b32 [func_retval0], {{%r[0-9]+}};
|
||||
@ -597,8 +592,8 @@ define i29 @test_i29(i29 %a) {
|
||||
; CHECK-NEXT: .param .b32 test_i32_param_0
|
||||
; CHECK: ld.param.b32 [[E:%r[0-9]+]], [test_i32_param_0];
|
||||
; CHECK: .param .b32 param0;
|
||||
; CHECK: st.param.b32 [param0], [[E]];
|
||||
; CHECK: .param .b32 retval0;
|
||||
; CHECK: st.param.b32 [param0], [[E]];
|
||||
; CHECK: call.uni (retval0), test_i32,
|
||||
; CHECK: ld.param.b32 [[R:%r[0-9]+]], [retval0];
|
||||
; CHECK: st.param.b32 [func_retval0], [[R]];
|
||||
@ -613,10 +608,10 @@ define i32 @test_i32(i32 %a) {
|
||||
; CHECK-NEXT: .param .align 16 .b8 test_v3i32_param_0[16]
|
||||
; CHECK-DAG: ld.param.b32 [[E2:%r[0-9]+]], [test_v3i32_param_0+8];
|
||||
; CHECK-DAG: ld.param.v2.b32 {[[E0:%r[0-9]+]], [[E1:%r[0-9]+]]}, [test_v3i32_param_0];
|
||||
; CHECK: .param .align 16 .b8 param0[16];
|
||||
; CHECK: st.param.v2.b32 [param0], {[[E0]], [[E1]]};
|
||||
; CHECK: st.param.b32 [param0+8], [[E2]];
|
||||
; CHECK: .param .align 16 .b8 retval0[16];
|
||||
; CHECK-DAG: .param .align 16 .b8 param0[16];
|
||||
; CHECK-DAG: .param .align 16 .b8 retval0[16];
|
||||
; CHECK-DAG: st.param.v2.b32 [param0], {[[E0]], [[E1]]};
|
||||
; CHECK-DAG: st.param.b32 [param0+8], [[E2]];
|
||||
; CHECK: call.uni (retval0), test_v3i32,
|
||||
; CHECK: ld.param.v2.b32 {[[RE0:%r[0-9]+]], [[RE1:%r[0-9]+]]}, [retval0];
|
||||
; CHECK: ld.param.b32 [[RE2:%r[0-9]+]], [retval0+8];
|
||||
@ -632,9 +627,9 @@ define <3 x i32> @test_v3i32(<3 x i32> %a) {
|
||||
; CHECK-LABEL: test_v4i32(
|
||||
; CHECK-NEXT: .param .align 16 .b8 test_v4i32_param_0[16]
|
||||
; CHECK: ld.param.v4.b32 {[[E0:%r[0-9]+]], [[E1:%r[0-9]+]], [[E2:%r[0-9]+]], [[E3:%r[0-9]+]]}, [test_v4i32_param_0]
|
||||
; CHECK: .param .align 16 .b8 param0[16];
|
||||
; CHECK: st.param.v4.b32 [param0], {[[E0]], [[E1]], [[E2]], [[E3]]};
|
||||
; CHECK: .param .align 16 .b8 retval0[16];
|
||||
; CHECK-DAG: .param .align 16 .b8 param0[16];
|
||||
; CHECK-DAG: .param .align 16 .b8 retval0[16];
|
||||
; CHECK-DAG: st.param.v4.b32 [param0], {[[E0]], [[E1]], [[E2]], [[E3]]};
|
||||
; CHECK: call.uni (retval0), test_v4i32,
|
||||
; CHECK: ld.param.v4.b32 {[[RE0:%r[0-9]+]], [[RE1:%r[0-9]+]], [[RE2:%r[0-9]+]], [[RE3:%r[0-9]+]]}, [retval0];
|
||||
; CHECK: st.param.v4.b32 [func_retval0], {[[RE0]], [[RE1]], [[RE2]], [[RE3]]}
|
||||
@ -650,9 +645,9 @@ define <4 x i32> @test_v4i32(<4 x i32> %a) {
|
||||
; CHECK-DAG: ld.param.b32 [[E4:%r[0-9]+]], [test_v5i32_param_0+16];
|
||||
; CHECK-DAG: ld.param.v4.b32 {[[E0:%r[0-9]+]], [[E1:%r[0-9]+]], [[E2:%r[0-9]+]], [[E3:%r[0-9]+]]}, [test_v5i32_param_0]
|
||||
; CHECK: .param .align 32 .b8 param0[32];
|
||||
; CHECK: .param .align 32 .b8 retval0[32];
|
||||
; CHECK-DAG: st.param.v4.b32 [param0], {[[E0]], [[E1]], [[E2]], [[E3]]};
|
||||
; CHECK-DAG: st.param.b32 [param0+16], [[E4]];
|
||||
; CHECK: .param .align 32 .b8 retval0[32];
|
||||
; CHECK: call.uni (retval0), test_v5i32,
|
||||
; CHECK-DAG: ld.param.v4.b32 {[[RE0:%r[0-9]+]], [[RE1:%r[0-9]+]], [[RE2:%r[0-9]+]], [[RE3:%r[0-9]+]]}, [retval0];
|
||||
; CHECK-DAG: ld.param.b32 [[RE4:%r[0-9]+]], [retval0+16];
|
||||
@ -669,8 +664,8 @@ define <5 x i32> @test_v5i32(<5 x i32> %a) {
|
||||
; CHECK-NEXT: .param .b32 test_f32_param_0
|
||||
; CHECK: ld.param.b32 [[E:%r[0-9]+]], [test_f32_param_0];
|
||||
; CHECK: .param .b32 param0;
|
||||
; CHECK: st.param.b32 [param0], [[E]];
|
||||
; CHECK: .param .b32 retval0;
|
||||
; CHECK: st.param.b32 [param0], [[E]];
|
||||
; CHECK: call.uni (retval0), test_f32,
|
||||
; CHECK: ld.param.b32 [[R:%r[0-9]+]], [retval0];
|
||||
; CHECK: st.param.b32 [func_retval0], [[R]];
|
||||
@ -686,8 +681,8 @@ define float @test_f32(float %a) {
|
||||
; CHECK-DAG: ld.param.b8 {{%rd[0-9]+}}, [test_i40_param_0+4];
|
||||
; CHECK-DAG: ld.param.b32 {{%rd[0-9]+}}, [test_i40_param_0];
|
||||
; CHECK: .param .b64 param0;
|
||||
; CHECK: st.param.b64 [param0], {{%rd[0-9]+}};
|
||||
; CHECK: .param .b64 retval0;
|
||||
; CHECK: st.param.b64 [param0], {{%rd[0-9]+}};
|
||||
; CHECK: call.uni (retval0), test_i40,
|
||||
; CHECK: ld.param.b64 {{%rd[0-9]+}}, [retval0];
|
||||
; CHECK: st.param.b64 [func_retval0], {{%rd[0-9]+}};
|
||||
@ -703,8 +698,8 @@ define i40 @test_i40(i40 %a) {
|
||||
; CHECK-DAG: ld.param.b16 {{%rd[0-9]+}}, [test_i47_param_0+4];
|
||||
; CHECK-DAG: ld.param.b32 {{%rd[0-9]+}}, [test_i47_param_0];
|
||||
; CHECK: .param .b64 param0;
|
||||
; CHECK: st.param.b64 [param0], {{%rd[0-9]+}};
|
||||
; CHECK: .param .b64 retval0;
|
||||
; CHECK: st.param.b64 [param0], {{%rd[0-9]+}};
|
||||
; CHECK: call.uni (retval0), test_i47,
|
||||
; CHECK: ld.param.b64 {{%rd[0-9]+}}, [retval0];
|
||||
; CHECK: st.param.b64 [func_retval0], {{%rd[0-9]+}};
|
||||
@ -720,8 +715,8 @@ define i47 @test_i47(i47 %a) {
|
||||
; CHECK-DAG: ld.param.b16 {{%rd[0-9]+}}, [test_i48_param_0+4];
|
||||
; CHECK-DAG: ld.param.b32 {{%rd[0-9]+}}, [test_i48_param_0];
|
||||
; CHECK: .param .b64 param0;
|
||||
; CHECK: st.param.b64 [param0], {{%rd[0-9]+}};
|
||||
; CHECK: .param .b64 retval0;
|
||||
; CHECK: st.param.b64 [param0], {{%rd[0-9]+}};
|
||||
; CHECK: call.uni (retval0), test_i48,
|
||||
; CHECK: ld.param.b64 {{%rd[0-9]+}}, [retval0];
|
||||
; CHECK: st.param.b64 [func_retval0], {{%rd[0-9]+}};
|
||||
@ -738,8 +733,8 @@ define i48 @test_i48(i48 %a) {
|
||||
; CHECK-DAG: ld.param.b16 {{%rd[0-9]+}}, [test_i51_param_0+4];
|
||||
; CHECK-DAG: ld.param.b32 {{%rd[0-9]+}}, [test_i51_param_0];
|
||||
; CHECK: .param .b64 param0;
|
||||
; CHECK: st.param.b64 [param0], {{%rd[0-9]+}};
|
||||
; CHECK: .param .b64 retval0;
|
||||
; CHECK: st.param.b64 [param0], {{%rd[0-9]+}};
|
||||
; CHECK: call.uni (retval0), test_i51,
|
||||
; CHECK: ld.param.b64 {{%rd[0-9]+}}, [retval0];
|
||||
; CHECK: st.param.b64 [func_retval0], {{%rd[0-9]+}};
|
||||
@ -756,8 +751,8 @@ define i51 @test_i51(i51 %a) {
|
||||
; CHECK-DAG: ld.param.b16 {{%rd[0-9]+}}, [test_i56_param_0+4];
|
||||
; CHECK-DAG: ld.param.b32 {{%rd[0-9]+}}, [test_i56_param_0];
|
||||
; CHECK: .param .b64 param0;
|
||||
; CHECK: st.param.b64 [param0], {{%rd[0-9]+}};
|
||||
; CHECK: .param .b64 retval0;
|
||||
; CHECK: st.param.b64 [param0], {{%rd[0-9]+}};
|
||||
; CHECK: call.uni (retval0), test_i56,
|
||||
; CHECK: ld.param.b64 {{%rd[0-9]+}}, [retval0];
|
||||
; CHECK: st.param.b64 [func_retval0], {{%rd[0-9]+}};
|
||||
@ -772,8 +767,8 @@ define i56 @test_i56(i56 %a) {
|
||||
; CHECK-NEXT: .param .b64 test_i57_param_0
|
||||
; CHECK: ld.param.b64 {{%rd[0-9]+}}, [test_i57_param_0];
|
||||
; CHECK: .param .b64 param0;
|
||||
; CHECK: st.param.b64 [param0], {{%rd[0-9]+}};
|
||||
; CHECK: .param .b64 retval0;
|
||||
; CHECK: st.param.b64 [param0], {{%rd[0-9]+}};
|
||||
; CHECK: call.uni (retval0), test_i57,
|
||||
; CHECK: ld.param.b64 {{%rd[0-9]+}}, [retval0];
|
||||
; CHECK: st.param.b64 [func_retval0], {{%rd[0-9]+}};
|
||||
@ -788,8 +783,8 @@ define i57 @test_i57(i57 %a) {
|
||||
; CHECK-NEXT: .param .b64 test_i64_param_0
|
||||
; CHECK: ld.param.b64 [[E:%rd[0-9]+]], [test_i64_param_0];
|
||||
; CHECK: .param .b64 param0;
|
||||
; CHECK: st.param.b64 [param0], [[E]];
|
||||
; CHECK: .param .b64 retval0;
|
||||
; CHECK: st.param.b64 [param0], [[E]];
|
||||
; CHECK: call.uni (retval0), test_i64,
|
||||
; CHECK: ld.param.b64 [[R:%rd[0-9]+]], [retval0];
|
||||
; CHECK: st.param.b64 [func_retval0], [[R]];
|
||||
@ -805,9 +800,9 @@ define i64 @test_i64(i64 %a) {
|
||||
; CHECK-DAG: ld.param.b64 [[E2:%rd[0-9]+]], [test_v3i64_param_0+16];
|
||||
; CHECK-DAG: ld.param.v2.b64 {[[E0:%rd[0-9]+]], [[E1:%rd[0-9]+]]}, [test_v3i64_param_0];
|
||||
; CHECK: .param .align 32 .b8 param0[32];
|
||||
; CHECK: st.param.v2.b64 [param0], {[[E0]], [[E1]]};
|
||||
; CHECK: st.param.b64 [param0+16], [[E2]];
|
||||
; CHECK: .param .align 32 .b8 retval0[32];
|
||||
; CHECK-DAG: st.param.v2.b64 [param0], {[[E0]], [[E1]]};
|
||||
; CHECK-DAG: st.param.b64 [param0+16], [[E2]];
|
||||
; CHECK: call.uni (retval0), test_v3i64,
|
||||
; CHECK: ld.param.v2.b64 {[[RE0:%rd[0-9]+]], [[RE1:%rd[0-9]+]]}, [retval0];
|
||||
; CHECK: ld.param.b64 [[RE2:%rd[0-9]+]], [retval0+16];
|
||||
@ -828,9 +823,9 @@ define <3 x i64> @test_v3i64(<3 x i64> %a) {
|
||||
; CHECK-DAG: ld.param.v2.b64 {[[E2:%rd[0-9]+]], [[E3:%rd[0-9]+]]}, [test_v4i64_param_0+16];
|
||||
; CHECK-DAG: ld.param.v2.b64 {[[E0:%rd[0-9]+]], [[E1:%rd[0-9]+]]}, [test_v4i64_param_0];
|
||||
; CHECK: .param .align 32 .b8 param0[32];
|
||||
; CHECK: st.param.v2.b64 [param0], {[[E0]], [[E1]]};
|
||||
; CHECK: st.param.v2.b64 [param0+16], {[[E2]], [[E3]]};
|
||||
; CHECK: .param .align 32 .b8 retval0[32];
|
||||
; CHECK-DAG: st.param.v2.b64 [param0], {[[E0]], [[E1]]};
|
||||
; CHECK-DAG: st.param.v2.b64 [param0+16], {[[E2]], [[E3]]};
|
||||
; CHECK: call.uni (retval0), test_v4i64,
|
||||
; CHECK: ld.param.v2.b64 {[[RE0:%rd[0-9]+]], [[RE1:%rd[0-9]+]]}, [retval0];
|
||||
; CHECK: ld.param.v2.b64 {[[RE2:%rd[0-9]+]], [[RE3:%rd[0-9]+]]}, [retval0+16];
|
||||
@ -849,8 +844,8 @@ define <4 x i64> @test_v4i64(<4 x i64> %a) {
|
||||
; CHECK-NEXT: .align 1 .b8 test_s_i1_param_0[1]
|
||||
; CHECK: ld.param.b8 [[A:%rs[0-9]+]], [test_s_i1_param_0];
|
||||
; CHECK: .param .align 1 .b8 param0[1];
|
||||
; CHECK: st.param.b8 [param0], [[A]]
|
||||
; CHECK: .param .align 1 .b8 retval0[1];
|
||||
; CHECK: st.param.b8 [param0], [[A]]
|
||||
; CHECK: call.uni (retval0), test_s_i1,
|
||||
; CHECK: ld.param.b8 [[R:%rs[0-9]+]], [retval0];
|
||||
; CHECK: st.param.b8 [func_retval0], [[R]];
|
||||
@ -865,8 +860,8 @@ define %s_i1 @test_s_i1(%s_i1 %a) {
|
||||
; CHECK-NEXT: .param .align 1 .b8 test_s_i8_param_0[1]
|
||||
; CHECK: ld.param.b8 [[A:%rs[0-9]+]], [test_s_i8_param_0];
|
||||
; CHECK: .param .align 1 .b8 param0[1];
|
||||
; CHECK: st.param.b8 [param0], [[A]]
|
||||
; CHECK: .param .align 1 .b8 retval0[1];
|
||||
; CHECK: st.param.b8 [param0], [[A]]
|
||||
; CHECK: call.uni (retval0), test_s_i8,
|
||||
; CHECK: ld.param.b8 [[R:%rs[0-9]+]], [retval0];
|
||||
; CHECK: st.param.b8 [func_retval0], [[R]];
|
||||
@ -881,8 +876,8 @@ define %s_i8 @test_s_i8(%s_i8 %a) {
|
||||
; CHECK-NEXT: .param .align 2 .b8 test_s_i16_param_0[2]
|
||||
; CHECK: ld.param.b16 [[A:%rs[0-9]+]], [test_s_i16_param_0];
|
||||
; CHECK: .param .align 2 .b8 param0[2];
|
||||
; CHECK: st.param.b16 [param0], [[A]]
|
||||
; CHECK: .param .align 2 .b8 retval0[2];
|
||||
; CHECK: st.param.b16 [param0], [[A]]
|
||||
; CHECK: call.uni (retval0), test_s_i16,
|
||||
; CHECK: ld.param.b16 [[R:%rs[0-9]+]], [retval0];
|
||||
; CHECK: st.param.b16 [func_retval0], [[R]];
|
||||
@ -897,8 +892,8 @@ define %s_i16 @test_s_i16(%s_i16 %a) {
|
||||
; CHECK-NEXT: .param .align 2 .b8 test_s_f16_param_0[2]
|
||||
; CHECK: ld.param.b16 [[A:%rs[0-9]+]], [test_s_f16_param_0];
|
||||
; CHECK: .param .align 2 .b8 param0[2];
|
||||
; CHECK: st.param.b16 [param0], [[A]]
|
||||
; CHECK: .param .align 2 .b8 retval0[2];
|
||||
; CHECK: st.param.b16 [param0], [[A]]
|
||||
; CHECK: call.uni (retval0), test_s_f16,
|
||||
; CHECK: ld.param.b16 [[R:%rs[0-9]+]], [retval0];
|
||||
; CHECK: st.param.b16 [func_retval0], [[R]];
|
||||
@ -913,8 +908,8 @@ define %s_f16 @test_s_f16(%s_f16 %a) {
|
||||
; CHECK-NEXT: .param .align 4 .b8 test_s_i32_param_0[4]
|
||||
; CHECK: ld.param.b32 [[E:%r[0-9]+]], [test_s_i32_param_0];
|
||||
; CHECK: .param .align 4 .b8 param0[4]
|
||||
; CHECK: st.param.b32 [param0], [[E]];
|
||||
; CHECK: .param .align 4 .b8 retval0[4];
|
||||
; CHECK: st.param.b32 [param0], [[E]];
|
||||
; CHECK: call.uni (retval0), test_s_i32,
|
||||
; CHECK: ld.param.b32 [[R:%r[0-9]+]], [retval0];
|
||||
; CHECK: st.param.b32 [func_retval0], [[R]];
|
||||
@ -929,8 +924,8 @@ define %s_i32 @test_s_i32(%s_i32 %a) {
|
||||
; CHECK-NEXT: .param .align 4 .b8 test_s_f32_param_0[4]
|
||||
; CHECK: ld.param.b32 [[E:%r[0-9]+]], [test_s_f32_param_0];
|
||||
; CHECK: .param .align 4 .b8 param0[4]
|
||||
; CHECK: st.param.b32 [param0], [[E]];
|
||||
; CHECK: .param .align 4 .b8 retval0[4];
|
||||
; CHECK: st.param.b32 [param0], [[E]];
|
||||
; CHECK: call.uni (retval0), test_s_f32,
|
||||
; CHECK: ld.param.b32 [[R:%r[0-9]+]], [retval0];
|
||||
; CHECK: st.param.b32 [func_retval0], [[R]];
|
||||
@ -945,8 +940,8 @@ define %s_f32 @test_s_f32(%s_f32 %a) {
|
||||
; CHECK-NEXT: .param .align 8 .b8 test_s_i64_param_0[8]
|
||||
; CHECK: ld.param.b64 [[E:%rd[0-9]+]], [test_s_i64_param_0];
|
||||
; CHECK: .param .align 8 .b8 param0[8];
|
||||
; CHECK: st.param.b64 [param0], [[E]];
|
||||
; CHECK: .param .align 8 .b8 retval0[8];
|
||||
; CHECK: st.param.b64 [param0], [[E]];
|
||||
; CHECK: call.uni (retval0), test_s_i64,
|
||||
; CHECK: ld.param.b64 [[R:%rd[0-9]+]], [retval0];
|
||||
; CHECK: st.param.b64 [func_retval0], [[R]];
|
||||
@ -966,12 +961,12 @@ define %s_i64 @test_s_i64(%s_i64 %a) {
|
||||
; CHECK-DAG: ld.param.b32 [[E1:%r[0-9]+]], [test_s_i32f32_param_0+4];
|
||||
; CHECK-DAG: ld.param.b32 [[E0:%r[0-9]+]], [test_s_i32f32_param_0];
|
||||
; CHECK: .param .align 8 .b8 param0[24];
|
||||
; CHECK: .param .align 8 .b8 retval0[24];
|
||||
; CHECK-DAG: st.param.b32 [param0], [[E0]];
|
||||
; CHECK-DAG: st.param.b32 [param0+4], [[E1]];
|
||||
; CHECK-DAG: st.param.b32 [param0+8], [[E2]];
|
||||
; CHECK-DAG: st.param.b32 [param0+12], [[E3]];
|
||||
; CHECK-DAG: st.param.b64 [param0+16], [[E4]];
|
||||
; CHECK: .param .align 8 .b8 retval0[24];
|
||||
; CHECK: call.uni (retval0), test_s_i32f32,
|
||||
; CHECK-DAG: ld.param.b32 [[RE0:%r[0-9]+]], [retval0];
|
||||
; CHECK-DAG: ld.param.b32 [[RE1:%r[0-9]+]], [retval0+4];
|
||||
@ -997,10 +992,10 @@ define %s_i32f32 @test_s_i32f32(%s_i32f32 %a) {
|
||||
; CHECK-DAG: ld.param.v2.b32 {[[E2:%r[0-9]+]], [[E3:%r[0-9]+]]}, [test_s_i32x4_param_0+8];
|
||||
; CHECK-DAG: ld.param.v2.b32 {[[E0:%r[0-9]+]], [[E1:%r[0-9]+]]}, [test_s_i32x4_param_0];
|
||||
; CHECK: .param .align 8 .b8 param0[24];
|
||||
; CHECK: st.param.v2.b32 [param0], {[[E0]], [[E1]]};
|
||||
; CHECK: st.param.v2.b32 [param0+8], {[[E2]], [[E3]]};
|
||||
; CHECK: st.param.b64 [param0+16], [[E4]];
|
||||
; CHECK: .param .align 8 .b8 retval0[24];
|
||||
; CHECK-DAG: st.param.v2.b32 [param0], {[[E0]], [[E1]]};
|
||||
; CHECK-DAG: st.param.v2.b32 [param0+8], {[[E2]], [[E3]]};
|
||||
; CHECK-DAG: st.param.b64 [param0+16], [[E4]];
|
||||
; CHECK: call.uni (retval0), test_s_i32x4,
|
||||
; CHECK: ld.param.v2.b32 {[[RE0:%r[0-9]+]], [[RE1:%r[0-9]+]]}, [retval0];
|
||||
; CHECK: ld.param.v2.b32 {[[RE2:%r[0-9]+]], [[RE3:%r[0-9]+]]}, [retval0+8];
|
||||
@ -1024,16 +1019,13 @@ define %s_i32x4 @test_s_i32x4(%s_i32x4 %a) {
|
||||
; CHECK: ld.param.b8 [[E2:%rs[0-9]+]], [test_s_i1i32x4_param_0+8];
|
||||
; CHECK: ld.param.v2.b32 {[[E0:%r[0-9]+]], [[E1:%r[0-9]+]]}, [test_s_i1i32x4_param_0];
|
||||
; CHECK: .param .align 8 .b8 param0[32];
|
||||
; CHECK: st.param.v2.b32 [param0], {[[E0]], [[E1]]};
|
||||
; CHECK: st.param.b8 [param0+8], [[E2]];
|
||||
; CHECK: st.param.b32 [param0+12], [[E3]];
|
||||
; CHECK: st.param.b32 [param0+16], [[E4]];
|
||||
; CHECK: st.param.b64 [param0+24], [[E5]];
|
||||
; CHECK: .param .align 8 .b8 retval0[32];
|
||||
; CHECK: call.uni (retval0), test_s_i1i32x4,
|
||||
; CHECK: (
|
||||
; CHECK: param0
|
||||
; CHECK: );
|
||||
; CHECK-DAG: st.param.v2.b32 [param0], {[[E0]], [[E1]]};
|
||||
; CHECK-DAG: st.param.b8 [param0+8], [[E2]];
|
||||
; CHECK-DAG: st.param.b32 [param0+12], [[E3]];
|
||||
; CHECK-DAG: st.param.b32 [param0+16], [[E4]];
|
||||
; CHECK-DAG: st.param.b64 [param0+24], [[E5]];
|
||||
; CHECK: call.uni (retval0), test_s_i1i32x4, (param0);
|
||||
; CHECK: ld.param.v2.b32 {[[RE0:%r[0-9]+]], [[RE1:%r[0-9]+]]}, [retval0];
|
||||
; CHECK: ld.param.b8 [[RE2:%rs[0-9]+]], [retval0+8];
|
||||
; CHECK: ld.param.b32 [[RE3:%r[0-9]+]], [retval0+12];
|
||||
@ -1082,6 +1074,7 @@ define %s_i8i32x4 @test_s_i1i32x4(%s_i8i32x4 %a) {
|
||||
; CHECK-DAG: ld.param.b8 %r{{.*}}, [test_s_i1i32x4p_param_0+1];
|
||||
; CHECK-DAG: ld.param.b8 %r{{.*}}, [test_s_i1i32x4p_param_0];
|
||||
; CHECK: .param .align 1 .b8 param0[25];
|
||||
; CHECK: .param .align 1 .b8 retval0[25];
|
||||
; CHECK-DAG: st.param.b8 [param0],
|
||||
; CHECK-DAG: st.param.b8 [param0+1],
|
||||
; CHECK-DAG: st.param.b8 [param0+2],
|
||||
@ -1107,33 +1100,32 @@ define %s_i8i32x4 @test_s_i1i32x4(%s_i8i32x4 %a) {
|
||||
; CHECK-DAG: st.param.b8 [param0+22],
|
||||
; CHECK-DAG: st.param.b8 [param0+23],
|
||||
; CHECK-DAG: st.param.b8 [param0+24],
|
||||
; CHECK: .param .align 1 .b8 retval0[25];
|
||||
; CHECK: call.uni (retval0), test_s_i1i32x4p,
|
||||
; CHECK-DAG: ld.param.b8 %rs{{[0-9]+}}, [retval0];
|
||||
; CHECK-DAG: ld.param.b8 %rs{{[0-9]+}}, [retval0+1];
|
||||
; CHECK-DAG: ld.param.b8 %rs{{[0-9]+}}, [retval0+2];
|
||||
; CHECK-DAG: ld.param.b8 %rs{{[0-9]+}}, [retval0+3];
|
||||
; CHECK-DAG: ld.param.b8 %rs{{[0-9]+}}, [retval0+4];
|
||||
; CHECK-DAG: ld.param.b8 %rs{{[0-9]+}}, [retval0+5];
|
||||
; CHECK-DAG: ld.param.b8 %rs{{[0-9]+}}, [retval0+6];
|
||||
; CHECK-DAG: ld.param.b8 %rs{{[0-9]+}}, [retval0+7];
|
||||
; CHECK-DAG: ld.param.b8 %rs{{[0-9]+}}, [retval0+8];
|
||||
; CHECK-DAG: ld.param.b8 %rs{{[0-9]+}}, [retval0+9];
|
||||
; CHECK-DAG: ld.param.b8 %rs{{[0-9]+}}, [retval0+10];
|
||||
; CHECK-DAG: ld.param.b8 %rs{{[0-9]+}}, [retval0+11];
|
||||
; CHECK-DAG: ld.param.b8 %rs{{[0-9]+}}, [retval0+12];
|
||||
; CHECK-DAG: ld.param.b8 %rs{{[0-9]+}}, [retval0+13];
|
||||
; CHECK-DAG: ld.param.b8 %rs{{[0-9]+}}, [retval0+14];
|
||||
; CHECK-DAG: ld.param.b8 %rs{{[0-9]+}}, [retval0+15];
|
||||
; CHECK-DAG: ld.param.b8 %rs{{[0-9]+}}, [retval0+16];
|
||||
; CHECK-DAG: ld.param.b8 %rs{{[0-9]+}}, [retval0+17];
|
||||
; CHECK-DAG: ld.param.b8 %rs{{[0-9]+}}, [retval0+18];
|
||||
; CHECK-DAG: ld.param.b8 %rs{{[0-9]+}}, [retval0+19];
|
||||
; CHECK-DAG: ld.param.b8 %rs{{[0-9]+}}, [retval0+20];
|
||||
; CHECK-DAG: ld.param.b8 %rs{{[0-9]+}}, [retval0+21];
|
||||
; CHECK-DAG: ld.param.b8 %rs{{[0-9]+}}, [retval0+22];
|
||||
; CHECK-DAG: ld.param.b8 %rs{{[0-9]+}}, [retval0+23];
|
||||
; CHECK-DAG: ld.param.b8 %rs{{[0-9]+}}, [retval0+24];
|
||||
; CHECK: call.uni (retval0), test_s_i1i32x4p, (param0);
|
||||
; CHECK-DAG: ld.param.b8 %rs{{[0-9]+}}, [retval0+8];
|
||||
; CHECK-DAG: ld.param.b8 %r{{[0-9]+}}, [retval0+3];
|
||||
; CHECK-DAG: ld.param.b8 %r{{[0-9]+}}, [retval0+2];
|
||||
; CHECK-DAG: ld.param.b8 %r{{[0-9]+}}, [retval0+1];
|
||||
; CHECK-DAG: ld.param.b8 %r{{[0-9]+}}, [retval0];
|
||||
; CHECK-DAG: ld.param.b8 %r{{[0-9]+}}, [retval0+7];
|
||||
; CHECK-DAG: ld.param.b8 %r{{[0-9]+}}, [retval0+6];
|
||||
; CHECK-DAG: ld.param.b8 %r{{[0-9]+}}, [retval0+5];
|
||||
; CHECK-DAG: ld.param.b8 %r{{[0-9]+}}, [retval0+4];
|
||||
; CHECK-DAG: ld.param.b8 %r{{[0-9]+}}, [retval0+12];
|
||||
; CHECK-DAG: ld.param.b8 %r{{[0-9]+}}, [retval0+11];
|
||||
; CHECK-DAG: ld.param.b8 %r{{[0-9]+}}, [retval0+10];
|
||||
; CHECK-DAG: ld.param.b8 %r{{[0-9]+}}, [retval0+9];
|
||||
; CHECK-DAG: ld.param.b8 %r{{[0-9]+}}, [retval0+16];
|
||||
; CHECK-DAG: ld.param.b8 %r{{[0-9]+}}, [retval0+15];
|
||||
; CHECK-DAG: ld.param.b8 %r{{[0-9]+}}, [retval0+14];
|
||||
; CHECK-DAG: ld.param.b8 %r{{[0-9]+}}, [retval0+13];
|
||||
; CHECK-DAG: ld.param.b8 %rd{{[0-9]+}}, [retval0+24];
|
||||
; CHECK-DAG: ld.param.b8 %rd{{[0-9]+}}, [retval0+23];
|
||||
; CHECK-DAG: ld.param.b8 %rd{{[0-9]+}}, [retval0+22];
|
||||
; CHECK-DAG: ld.param.b8 %rd{{[0-9]+}}, [retval0+21];
|
||||
; CHECK-DAG: ld.param.b8 %rd{{[0-9]+}}, [retval0+20];
|
||||
; CHECK-DAG: ld.param.b8 %rd{{[0-9]+}}, [retval0+19];
|
||||
; CHECK-DAG: ld.param.b8 %rd{{[0-9]+}}, [retval0+18];
|
||||
; CHECK-DAG: ld.param.b8 %rd{{[0-9]+}}, [retval0+17];
|
||||
; CHECK: } // callseq
|
||||
; CHECK-DAG: st.param.b8 [func_retval0],
|
||||
; CHECK-DAG: st.param.b8 [func_retval0+1],
|
||||
@ -1177,13 +1169,13 @@ define %s_i8i32x4p @test_s_i1i32x4p(%s_i8i32x4p %a) {
|
||||
; CHECK: ld.param.b32 [[E2:%r[0-9]+]], [test_s_crossfield_param_0+8];
|
||||
; CHECK: ld.param.v2.b32 {[[E0:%r[0-9]+]], [[E1:%r[0-9]+]]}, [test_s_crossfield_param_0];
|
||||
; CHECK: .param .align 16 .b8 param0[80];
|
||||
; CHECK: st.param.v2.b32 [param0], {[[E0]], [[E1]]};
|
||||
; CHECK: st.param.b32 [param0+8], [[E2]];
|
||||
; CHECK: st.param.v4.b32 [param0+16], {[[E3]], [[E4]], [[E5]], [[E6]]};
|
||||
; CHECK: st.param.v4.b32 [param0+32], {[[E7]], [[E8]], [[E9]], [[E10]]};
|
||||
; CHECK: st.param.v4.b32 [param0+48], {[[E11]], [[E12]], [[E13]], [[E14]]};
|
||||
; CHECK: st.param.b32 [param0+64], [[E15]];
|
||||
; CHECK: .param .align 16 .b8 retval0[80];
|
||||
; CHECK-DAG: st.param.v2.b32 [param0], {[[E0]], [[E1]]};
|
||||
; CHECK-DAG: st.param.b32 [param0+8], [[E2]];
|
||||
; CHECK-DAG: st.param.v4.b32 [param0+16], {[[E3]], [[E4]], [[E5]], [[E6]]};
|
||||
; CHECK-DAG: st.param.v4.b32 [param0+32], {[[E7]], [[E8]], [[E9]], [[E10]]};
|
||||
; CHECK-DAG: st.param.v4.b32 [param0+48], {[[E11]], [[E12]], [[E13]], [[E14]]};
|
||||
; CHECK-DAG: st.param.b32 [param0+64], [[E15]];
|
||||
; CHECK: call.uni (retval0), test_s_crossfield,
|
||||
; CHECK: ld.param.v2.b32 {[[RE0:%r[0-9]+]], [[RE1:%r[0-9]+]]}, [retval0];
|
||||
; CHECK: ld.param.b32 [[RE2:%r[0-9]+]], [retval0+8];
|
||||
|
@ -28,8 +28,8 @@ define float @caller_md(float %a, float %b) {
|
||||
; CHECK-NEXT: ld.param.b32 %r2, [caller_md_param_1];
|
||||
; CHECK-NEXT: { // callseq 0, 0
|
||||
; CHECK-NEXT: .param .align 8 .b8 param0[8];
|
||||
; CHECK-NEXT: st.param.v2.b32 [param0], {%r1, %r2};
|
||||
; CHECK-NEXT: .param .b32 retval0;
|
||||
; CHECK-NEXT: st.param.v2.b32 [param0], {%r1, %r2};
|
||||
; CHECK-NEXT: call.uni (retval0), callee_md, (param0);
|
||||
; CHECK-NEXT: ld.param.b32 %r3, [retval0];
|
||||
; CHECK-NEXT: } // callseq 0
|
||||
@ -69,8 +69,8 @@ define float @caller(float %a, float %b) {
|
||||
; CHECK-NEXT: ld.param.b32 %r2, [caller_param_1];
|
||||
; CHECK-NEXT: { // callseq 1, 0
|
||||
; CHECK-NEXT: .param .align 8 .b8 param0[8];
|
||||
; CHECK-NEXT: st.param.v2.b32 [param0], {%r1, %r2};
|
||||
; CHECK-NEXT: .param .b32 retval0;
|
||||
; CHECK-NEXT: st.param.v2.b32 [param0], {%r1, %r2};
|
||||
; CHECK-NEXT: call.uni (retval0), callee, (param0);
|
||||
; CHECK-NEXT: ld.param.b32 %r3, [retval0];
|
||||
; CHECK-NEXT: } // callseq 1
|
||||
|
@ -84,8 +84,8 @@ define dso_local void @caller_St4x1(ptr nocapture noundef readonly byval(%struct
|
||||
; CHECK: .param .b64 caller_St4x1_param_1
|
||||
; CHECK: )
|
||||
; CHECK: .param .b32 param0;
|
||||
; CHECK: st.param.b32 [param0], {{%r[0-9]+}};
|
||||
; CHECK: .param .align 16 .b8 retval0[4];
|
||||
; CHECK: st.param.b32 [param0], {{%r[0-9]+}};
|
||||
; CHECK: call.uni (retval0), callee_St4x1, (param0);
|
||||
; CHECK: ld.param.b32 {{%r[0-9]+}}, [retval0];
|
||||
%1 = load i32, ptr %in, align 4
|
||||
@ -112,8 +112,8 @@ define dso_local void @caller_St4x2(ptr nocapture noundef readonly byval(%struct
|
||||
; CHECK: .param .b64 caller_St4x2_param_1
|
||||
; CHECK: )
|
||||
; CHECK: .param .align 16 .b8 param0[8];
|
||||
; CHECK: st.param.v2.b32 [param0], {{{%r[0-9]+}}, {{%r[0-9]+}}};
|
||||
; CHECK: .param .align 16 .b8 retval0[8];
|
||||
; CHECK: st.param.v2.b32 [param0], {{{%r[0-9]+}}, {{%r[0-9]+}}};
|
||||
; CHECK: call.uni (retval0), callee_St4x2, (param0);
|
||||
; CHECK: ld.param.v2.b32 {{{%r[0-9]+}}, {{%r[0-9]+}}}, [retval0];
|
||||
%agg.tmp = alloca %struct.St4x2, align 8
|
||||
@ -149,9 +149,9 @@ define dso_local void @caller_St4x3(ptr nocapture noundef readonly byval(%struct
|
||||
; CHECK: .param .b64 caller_St4x3_param_1
|
||||
; CHECK: )
|
||||
; CHECK: .param .align 16 .b8 param0[12];
|
||||
; CHECK: .param .align 16 .b8 retval0[12];
|
||||
; CHECK: st.param.v2.b32 [param0], {{{%r[0-9]+}}, {{%r[0-9]+}}};
|
||||
; CHECK: st.param.b32 [param0+8], {{%r[0-9]+}};
|
||||
; CHECK: .param .align 16 .b8 retval0[12];
|
||||
; CHECK: call.uni (retval0), callee_St4x3, (param0);
|
||||
; CHECK: ld.param.v2.b32 {{{%r[0-9]+}}, {{%r[0-9]+}}}, [retval0];
|
||||
; CHECK: ld.param.b32 {{%r[0-9]+}}, [retval0+8];
|
||||
@ -193,8 +193,8 @@ define dso_local void @caller_St4x4(ptr nocapture noundef readonly byval(%struct
|
||||
; CHECK: .param .b64 caller_St4x4_param_1
|
||||
; CHECK: )
|
||||
; CHECK: .param .align 16 .b8 param0[16];
|
||||
; CHECK: st.param.v4.b32 [param0], {{{%r[0-9]+}}, {{%r[0-9]+}}, {{%r[0-9]+}}, {{%r[0-9]+}}};
|
||||
; CHECK: .param .align 16 .b8 retval0[16];
|
||||
; CHECK: st.param.v4.b32 [param0], {{{%r[0-9]+}}, {{%r[0-9]+}}, {{%r[0-9]+}}, {{%r[0-9]+}}};
|
||||
; CHECK: call.uni (retval0), callee_St4x4, (param0);
|
||||
; CHECK: ld.param.v4.b32 {{{%r[0-9]+}}, {{%r[0-9]+}}, {{%r[0-9]+}}, {{%r[0-9]+}}}, [retval0];
|
||||
%call = tail call fastcc [4 x i32] @callee_St4x4(ptr noundef nonnull byval(%struct.St4x4) align 4 %in) #2
|
||||
@ -239,9 +239,9 @@ define dso_local void @caller_St4x5(ptr nocapture noundef readonly byval(%struct
|
||||
; CHECK: .param .b64 caller_St4x5_param_1
|
||||
; CHECK: )
|
||||
; CHECK: .param .align 16 .b8 param0[20];
|
||||
; CHECK: .param .align 16 .b8 retval0[20];
|
||||
; CHECK: st.param.v4.b32 [param0], {{{%r[0-9]+}}, {{%r[0-9]+}}, {{%r[0-9]+}}, {{%r[0-9]+}}};
|
||||
; CHECK: st.param.b32 [param0+16], {{%r[0-9]+}};
|
||||
; CHECK: .param .align 16 .b8 retval0[20];
|
||||
; CHECK: call.uni (retval0), callee_St4x5, (param0);
|
||||
; CHECK: ld.param.v4.b32 {{{%r[0-9]+}}, {{%r[0-9]+}}, {{%r[0-9]+}}, {{%r[0-9]+}}}, [retval0];
|
||||
; CHECK: ld.param.b32 {{%r[0-9]+}}, [retval0+16];
|
||||
@ -295,9 +295,9 @@ define dso_local void @caller_St4x6(ptr nocapture noundef readonly byval(%struct
|
||||
; CHECK: .param .b64 caller_St4x6_param_1
|
||||
; CHECK: )
|
||||
; CHECK: .param .align 16 .b8 param0[24];
|
||||
; CHECK: .param .align 16 .b8 retval0[24];
|
||||
; CHECK: st.param.v4.b32 [param0], {{{%r[0-9]+}}, {{%r[0-9]+}}, {{%r[0-9]+}}, {{%r[0-9]+}}};
|
||||
; CHECK: st.param.v2.b32 [param0+16], {{{%r[0-9]+}}, {{%r[0-9]+}}};
|
||||
; CHECK: .param .align 16 .b8 retval0[24];
|
||||
; CHECK: call.uni (retval0), callee_St4x6, (param0);
|
||||
; CHECK: ld.param.v4.b32 {{{%r[0-9]+}}, {{%r[0-9]+}}, {{%r[0-9]+}}, {{%r[0-9]+}}}, [retval0];
|
||||
; CHECK: ld.param.v2.b32 {{{%r[0-9]+}}, {{%r[0-9]+}}}, [retval0+16];
|
||||
@ -357,10 +357,10 @@ define dso_local void @caller_St4x7(ptr nocapture noundef readonly byval(%struct
|
||||
; CHECK: .param .b64 caller_St4x7_param_1
|
||||
; CHECK: )
|
||||
; CHECK: .param .align 16 .b8 param0[28];
|
||||
; CHECK: .param .align 16 .b8 retval0[28];
|
||||
; CHECK: st.param.v4.b32 [param0], {{{%r[0-9]+}}, {{%r[0-9]+}}, {{%r[0-9]+}}, {{%r[0-9]+}}};
|
||||
; CHECK: st.param.v2.b32 [param0+16], {{{%r[0-9]+}}, {{%r[0-9]+}}};
|
||||
; CHECK: st.param.b32 [param0+24], {{%r[0-9]+}};
|
||||
; CHECK: .param .align 16 .b8 retval0[28];
|
||||
; CHECK: call.uni (retval0), callee_St4x7, (param0);
|
||||
; CHECK: ld.param.v4.b32 {{{%r[0-9]+}}, {{%r[0-9]+}}, {{%r[0-9]+}}, {{%r[0-9]+}}}, [retval0];
|
||||
; CHECK: ld.param.v2.b32 {{{%r[0-9]+}}, {{%r[0-9]+}}}, [retval0+16];
|
||||
@ -429,9 +429,9 @@ define dso_local void @caller_St4x8(ptr nocapture noundef readonly byval(%struct
|
||||
; CHECK: .param .b64 caller_St4x8_param_1
|
||||
; CHECK: )
|
||||
; CHECK: .param .align 16 .b8 param0[32];
|
||||
; CHECK: st.param.v4.b32 [param0], {{{%r[0-9]+}}, {{%r[0-9]+}}, {{%r[0-9]+}}, {{%r[0-9]+}}};
|
||||
; CHECK: st.param.v4.b32 [param0+16], {{{%r[0-9]+}}, {{%r[0-9]+}}, {{%r[0-9]+}}, {{%r[0-9]+}}};
|
||||
; CHECK: .param .align 16 .b8 retval0[32];
|
||||
; CHECK-DAG: st.param.v4.b32 [param0], {{{%r[0-9]+}}, {{%r[0-9]+}}, {{%r[0-9]+}}, {{%r[0-9]+}}};
|
||||
; CHECK-DAG: st.param.v4.b32 [param0+16], {{{%r[0-9]+}}, {{%r[0-9]+}}, {{%r[0-9]+}}, {{%r[0-9]+}}};
|
||||
; CHECK: call.uni (retval0), callee_St4x8, (param0);
|
||||
; CHECK: ld.param.v4.b32 {{{%r[0-9]+}}, {{%r[0-9]+}}, {{%r[0-9]+}}, {{%r[0-9]+}}}, [retval0];
|
||||
; CHECK: ld.param.v4.b32 {{{%r[0-9]+}}, {{%r[0-9]+}}, {{%r[0-9]+}}, {{%r[0-9]+}}}, [retval0+16];
|
||||
@ -503,8 +503,8 @@ define dso_local void @caller_St8x1(ptr nocapture noundef readonly byval(%struct
|
||||
; CHECK: .param .b64 caller_St8x1_param_1
|
||||
; CHECK: )
|
||||
; CHECK: .param .b64 param0;
|
||||
; CHECK: st.param.b64 [param0], {{%rd[0-9]+}};
|
||||
; CHECK: .param .align 16 .b8 retval0[8];
|
||||
; CHECK: st.param.b64 [param0], {{%rd[0-9]+}};
|
||||
; CHECK: call.uni (retval0), callee_St8x1, (param0);
|
||||
; CHECK: ld.param.b64 {{%rd[0-9]+}}, [retval0];
|
||||
%1 = load i64, ptr %in, align 8
|
||||
@ -531,8 +531,8 @@ define dso_local void @caller_St8x2(ptr nocapture noundef readonly byval(%struct
|
||||
; CHECK: .param .b64 caller_St8x2_param_1
|
||||
; CHECK: )
|
||||
; CHECK: .param .align 16 .b8 param0[16];
|
||||
; CHECK: st.param.v2.b64 [param0], {{{%rd[0-9]+}}, {{%rd[0-9]+}}};
|
||||
; CHECK: .param .align 16 .b8 retval0[16];
|
||||
; CHECK: st.param.v2.b64 [param0], {{{%rd[0-9]+}}, {{%rd[0-9]+}}};
|
||||
; CHECK: call.uni (retval0), callee_St8x2, (param0);
|
||||
; CHECK: ld.param.v2.b64 {{{%rd[0-9]+}}, {{%rd[0-9]+}}}, [retval0];
|
||||
%call = tail call fastcc [2 x i64] @callee_St8x2(ptr noundef nonnull byval(%struct.St8x2) align 8 %in) #2
|
||||
@ -565,9 +565,9 @@ define dso_local void @caller_St8x3(ptr nocapture noundef readonly byval(%struct
|
||||
; CHECK: .param .b64 caller_St8x3_param_1
|
||||
; CHECK: )
|
||||
; CHECK: .param .align 16 .b8 param0[24];
|
||||
; CHECK: .param .align 16 .b8 retval0[24];
|
||||
; CHECK: st.param.v2.b64 [param0], {{{%rd[0-9]+}}, {{%rd[0-9]+}}};
|
||||
; CHECK: st.param.b64 [param0+16], {{%rd[0-9]+}};
|
||||
; CHECK: .param .align 16 .b8 retval0[24];
|
||||
; CHECK: call.uni (retval0), callee_St8x3, (param0);
|
||||
; CHECK: ld.param.v2.b64 {{{%rd[0-9]+}}, {{%rd[0-9]+}}}, [retval0];
|
||||
; CHECK: ld.param.b64 {{%rd[0-9]+}}, [retval0+16];
|
||||
@ -609,9 +609,9 @@ define dso_local void @caller_St8x4(ptr nocapture noundef readonly byval(%struct
|
||||
; CHECK: .param .b64 caller_St8x4_param_1
|
||||
; CHECK: )
|
||||
; CHECK: .param .align 16 .b8 param0[32];
|
||||
; CHECK: st.param.v2.b64 [param0], {{{%rd[0-9]+}}, {{%rd[0-9]+}}};
|
||||
; CHECK: st.param.v2.b64 [param0+16], {{{%rd[0-9]+}}, {{%rd[0-9]+}}};
|
||||
; CHECK: .param .align 16 .b8 retval0[32];
|
||||
; CHECK-DAG: st.param.v2.b64 [param0], {{{%rd[0-9]+}}, {{%rd[0-9]+}}};
|
||||
; CHECK-DAG: st.param.v2.b64 [param0+16], {{{%rd[0-9]+}}, {{%rd[0-9]+}}};
|
||||
; CHECK: call.uni (retval0), callee_St8x4, (param0);
|
||||
; CHECK: ld.param.v2.b64 {{{%rd[0-9]+}}, {{%rd[0-9]+}}}, [retval0];
|
||||
; CHECK: ld.param.v2.b64 {{{%rd[0-9]+}}, {{%rd[0-9]+}}}, [retval0+16];
|
||||
|
@ -77,7 +77,7 @@ constants: []
|
||||
machineFunctionInfo: {}
|
||||
body: |
|
||||
bb.0:
|
||||
%0:b32, %1:b32, %2:b32, %3:b32 = LoadParamMemV4I32 0
|
||||
%0:b32, %1:b32, %2:b32, %3:b32 = LDV_i32_v4 0, 0, 101, 3, 32, &retval0, 0 :: (load (s128), addrspace 101)
|
||||
; CHECK-NOT: ProxyReg
|
||||
%4:b32 = ProxyRegB32 killed %0
|
||||
%5:b32 = ProxyRegB32 killed %1
|
||||
@ -86,7 +86,7 @@ body: |
|
||||
; CHECK: STV_i32_v4 killed %0, killed %1, killed %2, killed %3
|
||||
STV_i32_v4 killed %4, killed %5, killed %6, killed %7, 0, 0, 101, 32, &func_retval0, 0 :: (store (s128), addrspace 101)
|
||||
|
||||
%8:b32 = LoadParamMemI32 0
|
||||
%8:b32 = LD_i32 0, 0, 101, 3, 32, &retval0, 0 :: (load (s32), addrspace 101)
|
||||
; CHECK-NOT: ProxyReg
|
||||
%9:b32 = ProxyRegB32 killed %8
|
||||
%10:b32 = ProxyRegB32 killed %9
|
||||
|
@ -26,8 +26,8 @@ define void @st_param_i8_i16() {
|
||||
; CHECK-NEXT: // %bb.0:
|
||||
; CHECK-NEXT: { // callseq 0, 0
|
||||
; CHECK-NEXT: .param .align 2 .b8 param0[4];
|
||||
; CHECK-NEXT: st.param.b8 [param0], 1;
|
||||
; CHECK-NEXT: st.param.b16 [param0+2], 2;
|
||||
; CHECK-NEXT: st.param.b8 [param0], 1;
|
||||
; CHECK-NEXT: call.uni call_i8_i16, (param0);
|
||||
; CHECK-NEXT: } // callseq 0
|
||||
; CHECK-NEXT: ret;
|
||||
@ -75,7 +75,7 @@ define void @st_param_f32() {
|
||||
; CHECK-NEXT: // %bb.0:
|
||||
; CHECK-NEXT: { // callseq 3, 0
|
||||
; CHECK-NEXT: .param .b32 param0;
|
||||
; CHECK-NEXT: st.param.b32 [param0], 0f40A00000;
|
||||
; CHECK-NEXT: st.param.b32 [param0], 1084227584;
|
||||
; CHECK-NEXT: call.uni call_f32, (param0);
|
||||
; CHECK-NEXT: } // callseq 3
|
||||
; CHECK-NEXT: ret;
|
||||
@ -91,7 +91,7 @@ define void @st_param_f64() {
|
||||
; CHECK-NEXT: // %bb.0:
|
||||
; CHECK-NEXT: { // callseq 4, 0
|
||||
; CHECK-NEXT: .param .b64 param0;
|
||||
; CHECK-NEXT: st.param.b64 [param0], 0d4018000000000000;
|
||||
; CHECK-NEXT: st.param.b64 [param0], 4618441417868443648;
|
||||
; CHECK-NEXT: call.uni call_f64, (param0);
|
||||
; CHECK-NEXT: } // callseq 4
|
||||
; CHECK-NEXT: ret;
|
||||
@ -165,7 +165,7 @@ define void @st_param_v2_i16_ii() {
|
||||
; CHECK-NEXT: // %bb.0:
|
||||
; CHECK-NEXT: { // callseq 8, 0
|
||||
; CHECK-NEXT: .param .align 4 .b8 param0[4];
|
||||
; CHECK-NEXT: st.param.v2.b16 [param0], {1, 2};
|
||||
; CHECK-NEXT: st.param.b32 [param0], 131073;
|
||||
; CHECK-NEXT: call.uni call_v2_i16, (param0);
|
||||
; CHECK-NEXT: } // callseq 8
|
||||
; CHECK-NEXT: ret;
|
||||
@ -432,7 +432,7 @@ define void @st_param_v4_i8_iiii() {
|
||||
; CHECK-NEXT: // %bb.0:
|
||||
; CHECK-NEXT: { // callseq 23, 0
|
||||
; CHECK-NEXT: .param .align 4 .b8 param0[4];
|
||||
; CHECK-NEXT: st.param.v4.b8 [param0], {1, 2, 3, 4};
|
||||
; CHECK-NEXT: st.param.b32 [param0], 67305985;
|
||||
; CHECK-NEXT: call.uni call_v4_i8, (param0);
|
||||
; CHECK-NEXT: } // callseq 23
|
||||
; CHECK-NEXT: ret;
|
||||
@ -442,15 +442,18 @@ define void @st_param_v4_i8_iiii() {
|
||||
define void @st_param_v4_i8_irrr(i8 %b, i8 %c, i8 %d) {
|
||||
; CHECK-LABEL: st_param_v4_i8_irrr(
|
||||
; CHECK: {
|
||||
; CHECK-NEXT: .reg .b16 %rs<4>;
|
||||
; CHECK-NEXT: .reg .b32 %r<7>;
|
||||
; CHECK-EMPTY:
|
||||
; CHECK-NEXT: // %bb.0:
|
||||
; CHECK-NEXT: ld.param.b8 %rs1, [st_param_v4_i8_irrr_param_2];
|
||||
; CHECK-NEXT: ld.param.b8 %rs2, [st_param_v4_i8_irrr_param_1];
|
||||
; CHECK-NEXT: ld.param.b8 %rs3, [st_param_v4_i8_irrr_param_0];
|
||||
; CHECK-NEXT: ld.param.b8 %r1, [st_param_v4_i8_irrr_param_2];
|
||||
; CHECK-NEXT: ld.param.b8 %r2, [st_param_v4_i8_irrr_param_1];
|
||||
; CHECK-NEXT: prmt.b32 %r3, %r2, %r1, 0x3340U;
|
||||
; CHECK-NEXT: ld.param.b8 %r4, [st_param_v4_i8_irrr_param_0];
|
||||
; CHECK-NEXT: prmt.b32 %r5, 1, %r4, 0x3340U;
|
||||
; CHECK-NEXT: prmt.b32 %r6, %r5, %r3, 0x5410U;
|
||||
; CHECK-NEXT: { // callseq 24, 0
|
||||
; CHECK-NEXT: .param .align 4 .b8 param0[4];
|
||||
; CHECK-NEXT: st.param.v4.b8 [param0], {1, %rs3, %rs2, %rs1};
|
||||
; CHECK-NEXT: st.param.b32 [param0], %r6;
|
||||
; CHECK-NEXT: call.uni call_v4_i8, (param0);
|
||||
; CHECK-NEXT: } // callseq 24
|
||||
; CHECK-NEXT: ret;
|
||||
@ -464,15 +467,18 @@ define void @st_param_v4_i8_irrr(i8 %b, i8 %c, i8 %d) {
|
||||
define void @st_param_v4_i8_rirr(i8 %a, i8 %c, i8 %d) {
|
||||
; CHECK-LABEL: st_param_v4_i8_rirr(
|
||||
; CHECK: {
|
||||
; CHECK-NEXT: .reg .b16 %rs<4>;
|
||||
; CHECK-NEXT: .reg .b32 %r<7>;
|
||||
; CHECK-EMPTY:
|
||||
; CHECK-NEXT: // %bb.0:
|
||||
; CHECK-NEXT: ld.param.b8 %rs1, [st_param_v4_i8_rirr_param_2];
|
||||
; CHECK-NEXT: ld.param.b8 %rs2, [st_param_v4_i8_rirr_param_1];
|
||||
; CHECK-NEXT: ld.param.b8 %rs3, [st_param_v4_i8_rirr_param_0];
|
||||
; CHECK-NEXT: ld.param.b8 %r1, [st_param_v4_i8_rirr_param_2];
|
||||
; CHECK-NEXT: ld.param.b8 %r2, [st_param_v4_i8_rirr_param_1];
|
||||
; CHECK-NEXT: prmt.b32 %r3, %r2, %r1, 0x3340U;
|
||||
; CHECK-NEXT: ld.param.b8 %r4, [st_param_v4_i8_rirr_param_0];
|
||||
; CHECK-NEXT: prmt.b32 %r5, %r4, 2, 0x3340U;
|
||||
; CHECK-NEXT: prmt.b32 %r6, %r5, %r3, 0x5410U;
|
||||
; CHECK-NEXT: { // callseq 25, 0
|
||||
; CHECK-NEXT: .param .align 4 .b8 param0[4];
|
||||
; CHECK-NEXT: st.param.v4.b8 [param0], {%rs3, 2, %rs2, %rs1};
|
||||
; CHECK-NEXT: st.param.b32 [param0], %r6;
|
||||
; CHECK-NEXT: call.uni call_v4_i8, (param0);
|
||||
; CHECK-NEXT: } // callseq 25
|
||||
; CHECK-NEXT: ret;
|
||||
@ -486,15 +492,18 @@ define void @st_param_v4_i8_rirr(i8 %a, i8 %c, i8 %d) {
|
||||
define void @st_param_v4_i8_rrir(i8 %a, i8 %b, i8 %d) {
|
||||
; CHECK-LABEL: st_param_v4_i8_rrir(
|
||||
; CHECK: {
|
||||
; CHECK-NEXT: .reg .b16 %rs<4>;
|
||||
; CHECK-NEXT: .reg .b32 %r<7>;
|
||||
; CHECK-EMPTY:
|
||||
; CHECK-NEXT: // %bb.0:
|
||||
; CHECK-NEXT: ld.param.b8 %rs1, [st_param_v4_i8_rrir_param_2];
|
||||
; CHECK-NEXT: ld.param.b8 %rs2, [st_param_v4_i8_rrir_param_1];
|
||||
; CHECK-NEXT: ld.param.b8 %rs3, [st_param_v4_i8_rrir_param_0];
|
||||
; CHECK-NEXT: ld.param.b8 %r1, [st_param_v4_i8_rrir_param_1];
|
||||
; CHECK-NEXT: ld.param.b8 %r2, [st_param_v4_i8_rrir_param_0];
|
||||
; CHECK-NEXT: prmt.b32 %r3, %r2, %r1, 0x3340U;
|
||||
; CHECK-NEXT: ld.param.b8 %r4, [st_param_v4_i8_rrir_param_2];
|
||||
; CHECK-NEXT: prmt.b32 %r5, 3, %r4, 0x3340U;
|
||||
; CHECK-NEXT: prmt.b32 %r6, %r3, %r5, 0x5410U;
|
||||
; CHECK-NEXT: { // callseq 26, 0
|
||||
; CHECK-NEXT: .param .align 4 .b8 param0[4];
|
||||
; CHECK-NEXT: st.param.v4.b8 [param0], {%rs3, %rs2, 3, %rs1};
|
||||
; CHECK-NEXT: st.param.b32 [param0], %r6;
|
||||
; CHECK-NEXT: call.uni call_v4_i8, (param0);
|
||||
; CHECK-NEXT: } // callseq 26
|
||||
; CHECK-NEXT: ret;
|
||||
@ -508,15 +517,18 @@ define void @st_param_v4_i8_rrir(i8 %a, i8 %b, i8 %d) {
|
||||
define void @st_param_v4_i8_rrri(i8 %a, i8 %b, i8 %c) {
|
||||
; CHECK-LABEL: st_param_v4_i8_rrri(
|
||||
; CHECK: {
|
||||
; CHECK-NEXT: .reg .b16 %rs<4>;
|
||||
; CHECK-NEXT: .reg .b32 %r<7>;
|
||||
; CHECK-EMPTY:
|
||||
; CHECK-NEXT: // %bb.0:
|
||||
; CHECK-NEXT: ld.param.b8 %rs1, [st_param_v4_i8_rrri_param_2];
|
||||
; CHECK-NEXT: ld.param.b8 %rs2, [st_param_v4_i8_rrri_param_1];
|
||||
; CHECK-NEXT: ld.param.b8 %rs3, [st_param_v4_i8_rrri_param_0];
|
||||
; CHECK-NEXT: ld.param.b8 %r1, [st_param_v4_i8_rrri_param_1];
|
||||
; CHECK-NEXT: ld.param.b8 %r2, [st_param_v4_i8_rrri_param_0];
|
||||
; CHECK-NEXT: prmt.b32 %r3, %r2, %r1, 0x3340U;
|
||||
; CHECK-NEXT: ld.param.b8 %r4, [st_param_v4_i8_rrri_param_2];
|
||||
; CHECK-NEXT: prmt.b32 %r5, %r4, 4, 0x3340U;
|
||||
; CHECK-NEXT: prmt.b32 %r6, %r3, %r5, 0x5410U;
|
||||
; CHECK-NEXT: { // callseq 27, 0
|
||||
; CHECK-NEXT: .param .align 4 .b8 param0[4];
|
||||
; CHECK-NEXT: st.param.v4.b8 [param0], {%rs3, %rs2, %rs1, 4};
|
||||
; CHECK-NEXT: st.param.b32 [param0], %r6;
|
||||
; CHECK-NEXT: call.uni call_v4_i8, (param0);
|
||||
; CHECK-NEXT: } // callseq 27
|
||||
; CHECK-NEXT: ret;
|
||||
@ -530,14 +542,16 @@ define void @st_param_v4_i8_rrri(i8 %a, i8 %b, i8 %c) {
|
||||
define void @st_param_v4_i8_iirr(i8 %c, i8 %d) {
|
||||
; CHECK-LABEL: st_param_v4_i8_iirr(
|
||||
; CHECK: {
|
||||
; CHECK-NEXT: .reg .b16 %rs<3>;
|
||||
; CHECK-NEXT: .reg .b32 %r<5>;
|
||||
; CHECK-EMPTY:
|
||||
; CHECK-NEXT: // %bb.0:
|
||||
; CHECK-NEXT: ld.param.b8 %rs1, [st_param_v4_i8_iirr_param_1];
|
||||
; CHECK-NEXT: ld.param.b8 %rs2, [st_param_v4_i8_iirr_param_0];
|
||||
; CHECK-NEXT: ld.param.b8 %r1, [st_param_v4_i8_iirr_param_1];
|
||||
; CHECK-NEXT: ld.param.b8 %r2, [st_param_v4_i8_iirr_param_0];
|
||||
; CHECK-NEXT: prmt.b32 %r3, %r2, %r1, 0x3340U;
|
||||
; CHECK-NEXT: prmt.b32 %r4, 513, %r3, 0x5410U;
|
||||
; CHECK-NEXT: { // callseq 28, 0
|
||||
; CHECK-NEXT: .param .align 4 .b8 param0[4];
|
||||
; CHECK-NEXT: st.param.v4.b8 [param0], {1, 2, %rs2, %rs1};
|
||||
; CHECK-NEXT: st.param.b32 [param0], %r4;
|
||||
; CHECK-NEXT: call.uni call_v4_i8, (param0);
|
||||
; CHECK-NEXT: } // callseq 28
|
||||
; CHECK-NEXT: ret;
|
||||
@ -551,14 +565,17 @@ define void @st_param_v4_i8_iirr(i8 %c, i8 %d) {
|
||||
define void @st_param_v4_i8_irir(i8 %b, i8 %d) {
|
||||
; CHECK-LABEL: st_param_v4_i8_irir(
|
||||
; CHECK: {
|
||||
; CHECK-NEXT: .reg .b16 %rs<3>;
|
||||
; CHECK-NEXT: .reg .b32 %r<6>;
|
||||
; CHECK-EMPTY:
|
||||
; CHECK-NEXT: // %bb.0:
|
||||
; CHECK-NEXT: ld.param.b8 %rs1, [st_param_v4_i8_irir_param_1];
|
||||
; CHECK-NEXT: ld.param.b8 %rs2, [st_param_v4_i8_irir_param_0];
|
||||
; CHECK-NEXT: ld.param.b8 %r1, [st_param_v4_i8_irir_param_1];
|
||||
; CHECK-NEXT: prmt.b32 %r2, 3, %r1, 0x3340U;
|
||||
; CHECK-NEXT: ld.param.b8 %r3, [st_param_v4_i8_irir_param_0];
|
||||
; CHECK-NEXT: prmt.b32 %r4, 1, %r3, 0x3340U;
|
||||
; CHECK-NEXT: prmt.b32 %r5, %r4, %r2, 0x5410U;
|
||||
; CHECK-NEXT: { // callseq 29, 0
|
||||
; CHECK-NEXT: .param .align 4 .b8 param0[4];
|
||||
; CHECK-NEXT: st.param.v4.b8 [param0], {1, %rs2, 3, %rs1};
|
||||
; CHECK-NEXT: st.param.b32 [param0], %r5;
|
||||
; CHECK-NEXT: call.uni call_v4_i8, (param0);
|
||||
; CHECK-NEXT: } // callseq 29
|
||||
; CHECK-NEXT: ret;
|
||||
@ -572,14 +589,17 @@ define void @st_param_v4_i8_irir(i8 %b, i8 %d) {
|
||||
define void @st_param_v4_i8_irri(i8 %b, i8 %c) {
|
||||
; CHECK-LABEL: st_param_v4_i8_irri(
|
||||
; CHECK: {
|
||||
; CHECK-NEXT: .reg .b16 %rs<3>;
|
||||
; CHECK-NEXT: .reg .b32 %r<6>;
|
||||
; CHECK-EMPTY:
|
||||
; CHECK-NEXT: // %bb.0:
|
||||
; CHECK-NEXT: ld.param.b8 %rs1, [st_param_v4_i8_irri_param_1];
|
||||
; CHECK-NEXT: ld.param.b8 %rs2, [st_param_v4_i8_irri_param_0];
|
||||
; CHECK-NEXT: ld.param.b8 %r1, [st_param_v4_i8_irri_param_1];
|
||||
; CHECK-NEXT: prmt.b32 %r2, %r1, 4, 0x3340U;
|
||||
; CHECK-NEXT: ld.param.b8 %r3, [st_param_v4_i8_irri_param_0];
|
||||
; CHECK-NEXT: prmt.b32 %r4, 1, %r3, 0x3340U;
|
||||
; CHECK-NEXT: prmt.b32 %r5, %r4, %r2, 0x5410U;
|
||||
; CHECK-NEXT: { // callseq 30, 0
|
||||
; CHECK-NEXT: .param .align 4 .b8 param0[4];
|
||||
; CHECK-NEXT: st.param.v4.b8 [param0], {1, %rs2, %rs1, 4};
|
||||
; CHECK-NEXT: st.param.b32 [param0], %r5;
|
||||
; CHECK-NEXT: call.uni call_v4_i8, (param0);
|
||||
; CHECK-NEXT: } // callseq 30
|
||||
; CHECK-NEXT: ret;
|
||||
@ -593,14 +613,17 @@ define void @st_param_v4_i8_irri(i8 %b, i8 %c) {
|
||||
define void @st_param_v4_i8_riir(i8 %a, i8 %d) {
|
||||
; CHECK-LABEL: st_param_v4_i8_riir(
|
||||
; CHECK: {
|
||||
; CHECK-NEXT: .reg .b16 %rs<3>;
|
||||
; CHECK-NEXT: .reg .b32 %r<6>;
|
||||
; CHECK-EMPTY:
|
||||
; CHECK-NEXT: // %bb.0:
|
||||
; CHECK-NEXT: ld.param.b8 %rs1, [st_param_v4_i8_riir_param_1];
|
||||
; CHECK-NEXT: ld.param.b8 %rs2, [st_param_v4_i8_riir_param_0];
|
||||
; CHECK-NEXT: ld.param.b8 %r1, [st_param_v4_i8_riir_param_1];
|
||||
; CHECK-NEXT: prmt.b32 %r2, 3, %r1, 0x3340U;
|
||||
; CHECK-NEXT: ld.param.b8 %r3, [st_param_v4_i8_riir_param_0];
|
||||
; CHECK-NEXT: prmt.b32 %r4, %r3, 2, 0x3340U;
|
||||
; CHECK-NEXT: prmt.b32 %r5, %r4, %r2, 0x5410U;
|
||||
; CHECK-NEXT: { // callseq 31, 0
|
||||
; CHECK-NEXT: .param .align 4 .b8 param0[4];
|
||||
; CHECK-NEXT: st.param.v4.b8 [param0], {%rs2, 2, 3, %rs1};
|
||||
; CHECK-NEXT: st.param.b32 [param0], %r5;
|
||||
; CHECK-NEXT: call.uni call_v4_i8, (param0);
|
||||
; CHECK-NEXT: } // callseq 31
|
||||
; CHECK-NEXT: ret;
|
||||
@ -614,14 +637,17 @@ define void @st_param_v4_i8_riir(i8 %a, i8 %d) {
|
||||
define void @st_param_v4_i8_riri(i8 %a, i8 %c) {
|
||||
; CHECK-LABEL: st_param_v4_i8_riri(
|
||||
; CHECK: {
|
||||
; CHECK-NEXT: .reg .b16 %rs<3>;
|
||||
; CHECK-NEXT: .reg .b32 %r<6>;
|
||||
; CHECK-EMPTY:
|
||||
; CHECK-NEXT: // %bb.0:
|
||||
; CHECK-NEXT: ld.param.b8 %rs1, [st_param_v4_i8_riri_param_1];
|
||||
; CHECK-NEXT: ld.param.b8 %rs2, [st_param_v4_i8_riri_param_0];
|
||||
; CHECK-NEXT: ld.param.b8 %r1, [st_param_v4_i8_riri_param_1];
|
||||
; CHECK-NEXT: prmt.b32 %r2, %r1, 4, 0x3340U;
|
||||
; CHECK-NEXT: ld.param.b8 %r3, [st_param_v4_i8_riri_param_0];
|
||||
; CHECK-NEXT: prmt.b32 %r4, %r3, 2, 0x3340U;
|
||||
; CHECK-NEXT: prmt.b32 %r5, %r4, %r2, 0x5410U;
|
||||
; CHECK-NEXT: { // callseq 32, 0
|
||||
; CHECK-NEXT: .param .align 4 .b8 param0[4];
|
||||
; CHECK-NEXT: st.param.v4.b8 [param0], {%rs2, 2, %rs1, 4};
|
||||
; CHECK-NEXT: st.param.b32 [param0], %r5;
|
||||
; CHECK-NEXT: call.uni call_v4_i8, (param0);
|
||||
; CHECK-NEXT: } // callseq 32
|
||||
; CHECK-NEXT: ret;
|
||||
@ -635,14 +661,16 @@ define void @st_param_v4_i8_riri(i8 %a, i8 %c) {
|
||||
define void @st_param_v4_i8_rrii(i8 %a, i8 %b) {
|
||||
; CHECK-LABEL: st_param_v4_i8_rrii(
|
||||
; CHECK: {
|
||||
; CHECK-NEXT: .reg .b16 %rs<3>;
|
||||
; CHECK-NEXT: .reg .b32 %r<5>;
|
||||
; CHECK-EMPTY:
|
||||
; CHECK-NEXT: // %bb.0:
|
||||
; CHECK-NEXT: ld.param.b8 %rs1, [st_param_v4_i8_rrii_param_1];
|
||||
; CHECK-NEXT: ld.param.b8 %rs2, [st_param_v4_i8_rrii_param_0];
|
||||
; CHECK-NEXT: ld.param.b8 %r1, [st_param_v4_i8_rrii_param_1];
|
||||
; CHECK-NEXT: ld.param.b8 %r2, [st_param_v4_i8_rrii_param_0];
|
||||
; CHECK-NEXT: prmt.b32 %r3, %r2, %r1, 0x3340U;
|
||||
; CHECK-NEXT: prmt.b32 %r4, %r3, 1027, 0x5410U;
|
||||
; CHECK-NEXT: { // callseq 33, 0
|
||||
; CHECK-NEXT: .param .align 4 .b8 param0[4];
|
||||
; CHECK-NEXT: st.param.v4.b8 [param0], {%rs2, %rs1, 3, 4};
|
||||
; CHECK-NEXT: st.param.b32 [param0], %r4;
|
||||
; CHECK-NEXT: call.uni call_v4_i8, (param0);
|
||||
; CHECK-NEXT: } // callseq 33
|
||||
; CHECK-NEXT: ret;
|
||||
@ -656,13 +684,15 @@ define void @st_param_v4_i8_rrii(i8 %a, i8 %b) {
|
||||
define void @st_param_v4_i8_iiir(i8 %d) {
|
||||
; CHECK-LABEL: st_param_v4_i8_iiir(
|
||||
; CHECK: {
|
||||
; CHECK-NEXT: .reg .b16 %rs<2>;
|
||||
; CHECK-NEXT: .reg .b32 %r<4>;
|
||||
; CHECK-EMPTY:
|
||||
; CHECK-NEXT: // %bb.0:
|
||||
; CHECK-NEXT: ld.param.b8 %rs1, [st_param_v4_i8_iiir_param_0];
|
||||
; CHECK-NEXT: { // callseq 34, 0
|
||||
; CHECK-NEXT: .param .align 4 .b8 param0[4];
|
||||
; CHECK-NEXT: st.param.v4.b8 [param0], {1, 2, 3, %rs1};
|
||||
; CHECK-NEXT: ld.param.b8 %r1, [st_param_v4_i8_iiir_param_0];
|
||||
; CHECK-NEXT: prmt.b32 %r2, 3, %r1, 0x3340U;
|
||||
; CHECK-NEXT: prmt.b32 %r3, 513, %r2, 0x5410U;
|
||||
; CHECK-NEXT: st.param.b32 [param0], %r3;
|
||||
; CHECK-NEXT: call.uni call_v4_i8, (param0);
|
||||
; CHECK-NEXT: } // callseq 34
|
||||
; CHECK-NEXT: ret;
|
||||
@ -676,13 +706,15 @@ define void @st_param_v4_i8_iiir(i8 %d) {
|
||||
define void @st_param_v4_i8_iiri(i8 %c) {
|
||||
; CHECK-LABEL: st_param_v4_i8_iiri(
|
||||
; CHECK: {
|
||||
; CHECK-NEXT: .reg .b16 %rs<2>;
|
||||
; CHECK-NEXT: .reg .b32 %r<4>;
|
||||
; CHECK-EMPTY:
|
||||
; CHECK-NEXT: // %bb.0:
|
||||
; CHECK-NEXT: ld.param.b8 %rs1, [st_param_v4_i8_iiri_param_0];
|
||||
; CHECK-NEXT: { // callseq 35, 0
|
||||
; CHECK-NEXT: .param .align 4 .b8 param0[4];
|
||||
; CHECK-NEXT: st.param.v4.b8 [param0], {1, 2, %rs1, 4};
|
||||
; CHECK-NEXT: ld.param.b8 %r1, [st_param_v4_i8_iiri_param_0];
|
||||
; CHECK-NEXT: prmt.b32 %r2, %r1, 4, 0x3340U;
|
||||
; CHECK-NEXT: prmt.b32 %r3, 513, %r2, 0x5410U;
|
||||
; CHECK-NEXT: st.param.b32 [param0], %r3;
|
||||
; CHECK-NEXT: call.uni call_v4_i8, (param0);
|
||||
; CHECK-NEXT: } // callseq 35
|
||||
; CHECK-NEXT: ret;
|
||||
@ -696,13 +728,15 @@ define void @st_param_v4_i8_iiri(i8 %c) {
|
||||
define void @st_param_v4_i8_irii(i8 %b) {
|
||||
; CHECK-LABEL: st_param_v4_i8_irii(
|
||||
; CHECK: {
|
||||
; CHECK-NEXT: .reg .b16 %rs<2>;
|
||||
; CHECK-NEXT: .reg .b32 %r<4>;
|
||||
; CHECK-EMPTY:
|
||||
; CHECK-NEXT: // %bb.0:
|
||||
; CHECK-NEXT: ld.param.b8 %rs1, [st_param_v4_i8_irii_param_0];
|
||||
; CHECK-NEXT: { // callseq 36, 0
|
||||
; CHECK-NEXT: .param .align 4 .b8 param0[4];
|
||||
; CHECK-NEXT: st.param.v4.b8 [param0], {1, %rs1, 3, 4};
|
||||
; CHECK-NEXT: ld.param.b8 %r1, [st_param_v4_i8_irii_param_0];
|
||||
; CHECK-NEXT: prmt.b32 %r2, 1, %r1, 0x3340U;
|
||||
; CHECK-NEXT: prmt.b32 %r3, %r2, 1027, 0x5410U;
|
||||
; CHECK-NEXT: st.param.b32 [param0], %r3;
|
||||
; CHECK-NEXT: call.uni call_v4_i8, (param0);
|
||||
; CHECK-NEXT: } // callseq 36
|
||||
; CHECK-NEXT: ret;
|
||||
@ -716,13 +750,15 @@ define void @st_param_v4_i8_irii(i8 %b) {
|
||||
define void @st_param_v4_i8_riii(i8 %a) {
|
||||
; CHECK-LABEL: st_param_v4_i8_riii(
|
||||
; CHECK: {
|
||||
; CHECK-NEXT: .reg .b16 %rs<2>;
|
||||
; CHECK-NEXT: .reg .b32 %r<4>;
|
||||
; CHECK-EMPTY:
|
||||
; CHECK-NEXT: // %bb.0:
|
||||
; CHECK-NEXT: ld.param.b8 %rs1, [st_param_v4_i8_riii_param_0];
|
||||
; CHECK-NEXT: { // callseq 37, 0
|
||||
; CHECK-NEXT: .param .align 4 .b8 param0[4];
|
||||
; CHECK-NEXT: st.param.v4.b8 [param0], {%rs1, 2, 3, 4};
|
||||
; CHECK-NEXT: ld.param.b8 %r1, [st_param_v4_i8_riii_param_0];
|
||||
; CHECK-NEXT: prmt.b32 %r2, %r1, 2, 0x3340U;
|
||||
; CHECK-NEXT: prmt.b32 %r3, %r2, 1027, 0x5410U;
|
||||
; CHECK-NEXT: st.param.b32 [param0], %r3;
|
||||
; CHECK-NEXT: call.uni call_v4_i8, (param0);
|
||||
; CHECK-NEXT: } // callseq 37
|
||||
; CHECK-NEXT: ret;
|
||||
@ -742,7 +778,7 @@ define void @st_param_v4_i16_iiii() {
|
||||
; CHECK-NEXT: // %bb.0:
|
||||
; CHECK-NEXT: { // callseq 38, 0
|
||||
; CHECK-NEXT: .param .align 8 .b8 param0[8];
|
||||
; CHECK-NEXT: st.param.v4.b16 [param0], {1, 2, 3, 4};
|
||||
; CHECK-NEXT: st.param.v2.b32 [param0], {131073, 262147};
|
||||
; CHECK-NEXT: call.uni call_v4_i16, (param0);
|
||||
; CHECK-NEXT: } // callseq 38
|
||||
; CHECK-NEXT: ret;
|
||||
@ -841,13 +877,15 @@ define void @st_param_v4_i16_iirr(i16 %c, i16 %d) {
|
||||
; CHECK-LABEL: st_param_v4_i16_iirr(
|
||||
; CHECK: {
|
||||
; CHECK-NEXT: .reg .b16 %rs<3>;
|
||||
; CHECK-NEXT: .reg .b32 %r<2>;
|
||||
; CHECK-EMPTY:
|
||||
; CHECK-NEXT: // %bb.0:
|
||||
; CHECK-NEXT: ld.param.b16 %rs1, [st_param_v4_i16_iirr_param_0];
|
||||
; CHECK-NEXT: ld.param.b16 %rs2, [st_param_v4_i16_iirr_param_1];
|
||||
; CHECK-NEXT: mov.b32 %r1, {%rs1, %rs2};
|
||||
; CHECK-NEXT: { // callseq 43, 0
|
||||
; CHECK-NEXT: .param .align 8 .b8 param0[8];
|
||||
; CHECK-NEXT: st.param.v4.b16 [param0], {1, 2, %rs1, %rs2};
|
||||
; CHECK-NEXT: st.param.v2.b32 [param0], {131073, %r1};
|
||||
; CHECK-NEXT: call.uni call_v4_i16, (param0);
|
||||
; CHECK-NEXT: } // callseq 43
|
||||
; CHECK-NEXT: ret;
|
||||
@ -946,13 +984,15 @@ define void @st_param_v4_i16_rrii(i16 %a, i16 %b) {
|
||||
; CHECK-LABEL: st_param_v4_i16_rrii(
|
||||
; CHECK: {
|
||||
; CHECK-NEXT: .reg .b16 %rs<3>;
|
||||
; CHECK-NEXT: .reg .b32 %r<2>;
|
||||
; CHECK-EMPTY:
|
||||
; CHECK-NEXT: // %bb.0:
|
||||
; CHECK-NEXT: ld.param.b16 %rs1, [st_param_v4_i16_rrii_param_0];
|
||||
; CHECK-NEXT: ld.param.b16 %rs2, [st_param_v4_i16_rrii_param_1];
|
||||
; CHECK-NEXT: mov.b32 %r1, {%rs1, %rs2};
|
||||
; CHECK-NEXT: { // callseq 48, 0
|
||||
; CHECK-NEXT: .param .align 8 .b8 param0[8];
|
||||
; CHECK-NEXT: st.param.v4.b16 [param0], {%rs1, %rs2, 3, 4};
|
||||
; CHECK-NEXT: st.param.v2.b32 [param0], {%r1, 262147};
|
||||
; CHECK-NEXT: call.uni call_v4_i16, (param0);
|
||||
; CHECK-NEXT: } // callseq 48
|
||||
; CHECK-NEXT: ret;
|
||||
@ -966,13 +1006,16 @@ define void @st_param_v4_i16_rrii(i16 %a, i16 %b) {
|
||||
define void @st_param_v4_i16_iiir(i16 %d) {
|
||||
; CHECK-LABEL: st_param_v4_i16_iiir(
|
||||
; CHECK: {
|
||||
; CHECK-NEXT: .reg .b16 %rs<2>;
|
||||
; CHECK-NEXT: .reg .b16 %rs<3>;
|
||||
; CHECK-NEXT: .reg .b32 %r<2>;
|
||||
; CHECK-EMPTY:
|
||||
; CHECK-NEXT: // %bb.0:
|
||||
; CHECK-NEXT: ld.param.b16 %rs1, [st_param_v4_i16_iiir_param_0];
|
||||
; CHECK-NEXT: mov.b16 %rs2, 3;
|
||||
; CHECK-NEXT: mov.b32 %r1, {%rs2, %rs1};
|
||||
; CHECK-NEXT: { // callseq 49, 0
|
||||
; CHECK-NEXT: .param .align 8 .b8 param0[8];
|
||||
; CHECK-NEXT: st.param.v4.b16 [param0], {1, 2, 3, %rs1};
|
||||
; CHECK-NEXT: st.param.v2.b32 [param0], {131073, %r1};
|
||||
; CHECK-NEXT: call.uni call_v4_i16, (param0);
|
||||
; CHECK-NEXT: } // callseq 49
|
||||
; CHECK-NEXT: ret;
|
||||
@ -986,13 +1029,16 @@ define void @st_param_v4_i16_iiir(i16 %d) {
|
||||
define void @st_param_v4_i16_iiri(i16 %c) {
|
||||
; CHECK-LABEL: st_param_v4_i16_iiri(
|
||||
; CHECK: {
|
||||
; CHECK-NEXT: .reg .b16 %rs<2>;
|
||||
; CHECK-NEXT: .reg .b16 %rs<3>;
|
||||
; CHECK-NEXT: .reg .b32 %r<2>;
|
||||
; CHECK-EMPTY:
|
||||
; CHECK-NEXT: // %bb.0:
|
||||
; CHECK-NEXT: ld.param.b16 %rs1, [st_param_v4_i16_iiri_param_0];
|
||||
; CHECK-NEXT: mov.b16 %rs2, 4;
|
||||
; CHECK-NEXT: mov.b32 %r1, {%rs1, %rs2};
|
||||
; CHECK-NEXT: { // callseq 50, 0
|
||||
; CHECK-NEXT: .param .align 8 .b8 param0[8];
|
||||
; CHECK-NEXT: st.param.v4.b16 [param0], {1, 2, %rs1, 4};
|
||||
; CHECK-NEXT: st.param.v2.b32 [param0], {131073, %r1};
|
||||
; CHECK-NEXT: call.uni call_v4_i16, (param0);
|
||||
; CHECK-NEXT: } // callseq 50
|
||||
; CHECK-NEXT: ret;
|
||||
@ -1006,13 +1052,16 @@ define void @st_param_v4_i16_iiri(i16 %c) {
|
||||
define void @st_param_v4_i16_irii(i16 %b) {
|
||||
; CHECK-LABEL: st_param_v4_i16_irii(
|
||||
; CHECK: {
|
||||
; CHECK-NEXT: .reg .b16 %rs<2>;
|
||||
; CHECK-NEXT: .reg .b16 %rs<3>;
|
||||
; CHECK-NEXT: .reg .b32 %r<2>;
|
||||
; CHECK-EMPTY:
|
||||
; CHECK-NEXT: // %bb.0:
|
||||
; CHECK-NEXT: ld.param.b16 %rs1, [st_param_v4_i16_irii_param_0];
|
||||
; CHECK-NEXT: mov.b16 %rs2, 1;
|
||||
; CHECK-NEXT: mov.b32 %r1, {%rs2, %rs1};
|
||||
; CHECK-NEXT: { // callseq 51, 0
|
||||
; CHECK-NEXT: .param .align 8 .b8 param0[8];
|
||||
; CHECK-NEXT: st.param.v4.b16 [param0], {1, %rs1, 3, 4};
|
||||
; CHECK-NEXT: st.param.v2.b32 [param0], {%r1, 262147};
|
||||
; CHECK-NEXT: call.uni call_v4_i16, (param0);
|
||||
; CHECK-NEXT: } // callseq 51
|
||||
; CHECK-NEXT: ret;
|
||||
@ -1026,13 +1075,16 @@ define void @st_param_v4_i16_irii(i16 %b) {
|
||||
define void @st_param_v4_i16_riii(i16 %a) {
|
||||
; CHECK-LABEL: st_param_v4_i16_riii(
|
||||
; CHECK: {
|
||||
; CHECK-NEXT: .reg .b16 %rs<2>;
|
||||
; CHECK-NEXT: .reg .b16 %rs<3>;
|
||||
; CHECK-NEXT: .reg .b32 %r<2>;
|
||||
; CHECK-EMPTY:
|
||||
; CHECK-NEXT: // %bb.0:
|
||||
; CHECK-NEXT: ld.param.b16 %rs1, [st_param_v4_i16_riii_param_0];
|
||||
; CHECK-NEXT: mov.b16 %rs2, 2;
|
||||
; CHECK-NEXT: mov.b32 %r1, {%rs1, %rs2};
|
||||
; CHECK-NEXT: { // callseq 52, 0
|
||||
; CHECK-NEXT: .param .align 8 .b8 param0[8];
|
||||
; CHECK-NEXT: st.param.v4.b16 [param0], {%rs1, 2, 3, 4};
|
||||
; CHECK-NEXT: st.param.v2.b32 [param0], {%r1, 262147};
|
||||
; CHECK-NEXT: call.uni call_v4_i16, (param0);
|
||||
; CHECK-NEXT: } // callseq 52
|
||||
; CHECK-NEXT: ret;
|
||||
@ -1672,13 +1724,12 @@ declare void @call_v4_f32(%struct.float4 alignstack(16))
|
||||
define void @st_param_bfloat() {
|
||||
; CHECK-LABEL: st_param_bfloat(
|
||||
; CHECK: {
|
||||
; CHECK-NEXT: .reg .b16 %rs<2>;
|
||||
; CHECK-EMPTY:
|
||||
; CHECK-EMPTY:
|
||||
; CHECK-NEXT: // %bb.0:
|
||||
; CHECK-NEXT: mov.b16 %rs1, 0x4100;
|
||||
; CHECK-NEXT: { // callseq 83, 0
|
||||
; CHECK-NEXT: .param .align 2 .b8 param0[2];
|
||||
; CHECK-NEXT: st.param.b16 [param0], %rs1;
|
||||
; CHECK-NEXT: st.param.b16 [param0], 0x4100;
|
||||
; CHECK-NEXT: call.uni call_bfloat, (param0);
|
||||
; CHECK-NEXT: } // callseq 83
|
||||
; CHECK-NEXT: ret;
|
||||
|
@ -34,9 +34,9 @@ define void @test_store_param_def(i64 %param0, i32 %param1) {
|
||||
; CHECK-NEXT: ld.param.b32 %r1, [test_store_param_def_param_1];
|
||||
; CHECK-NEXT: { // callseq 1, 0
|
||||
; CHECK-NEXT: .param .align 16 .b8 param0[32];
|
||||
; CHECK-NEXT: st.param.v4.b32 [param0+16], {%r2, %r1, %r3, %r4};
|
||||
; CHECK-NEXT: st.param.v2.b32 [param0+8], {%r5, %r1};
|
||||
; CHECK-NEXT: st.param.b64 [param0], %rd1;
|
||||
; CHECK-NEXT: st.param.v2.b32 [param0+8], {%r2, %r1};
|
||||
; CHECK-NEXT: st.param.v4.b32 [param0+16], {%r3, %r1, %r4, %r5};
|
||||
; CHECK-NEXT: call.uni test_call, (param0);
|
||||
; CHECK-NEXT: } // callseq 1
|
||||
; CHECK-NEXT: ret;
|
||||
|
@ -69,8 +69,8 @@ define ptx_kernel void @baz(ptr %red, i32 %idx) {
|
||||
; CHECK-NEXT: tex.1d.v4.f32.s32 {%r2, %r3, %r4, %r5}, [tex0, {%r1}];
|
||||
; CHECK-NEXT: { // callseq 0, 0
|
||||
; CHECK-NEXT: .param .b64 param0;
|
||||
; CHECK-NEXT: st.param.b64 [param0], %rd3;
|
||||
; CHECK-NEXT: .param .b32 retval0;
|
||||
; CHECK-NEXT: st.param.b64 [param0], %rd3;
|
||||
; CHECK-NEXT: call.uni (retval0), texfunc, (param0);
|
||||
; CHECK-NEXT: ld.param.b32 %r6, [retval0];
|
||||
; CHECK-NEXT: } // callseq 0
|
||||
|
@ -1,7 +1,7 @@
|
||||
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
|
||||
; Verifies correctness of load/store of parameters and return values.
|
||||
; RUN: llc < %s -mtriple=nvptx64 -mcpu=sm_35 -O0 -verify-machineinstrs | FileCheck -allow-deprecated-dag-overlap %s
|
||||
; RUN: %if ptxas %{ llc < %s -mtriple=nvptx64 -mcpu=sm_35 -O0 -verify-machineinstrs | %ptxas-verify %}
|
||||
; RUN: llc < %s -mtriple=nvptx64 -mcpu=sm_35 -verify-machineinstrs | FileCheck -allow-deprecated-dag-overlap %s
|
||||
; RUN: %if ptxas %{ llc < %s -mtriple=nvptx64 -mcpu=sm_35 -verify-machineinstrs | %ptxas-verify %}
|
||||
|
||||
%s_i8i16p = type { <{ i16, i8, i16 }>, i64 }
|
||||
%s_i8i32p = type { <{ i32, i8, i32 }>, i64 }
|
||||
@ -24,37 +24,35 @@
|
||||
define %s_i8i16p @test_s_i8i16p(%s_i8i16p %a) {
|
||||
; CHECK-LABEL: test_s_i8i16p(
|
||||
; CHECK: {
|
||||
; CHECK-NEXT: .reg .b16 %rs<15>;
|
||||
; CHECK-NEXT: .reg .b16 %rs<13>;
|
||||
; CHECK-NEXT: .reg .b32 %r<2>;
|
||||
; CHECK-NEXT: .reg .b64 %rd<4>;
|
||||
; CHECK-EMPTY:
|
||||
; CHECK-NEXT: // %bb.0:
|
||||
; CHECK-NEXT: ld.param.b8 %rs4, [test_s_i8i16p_param_0+4];
|
||||
; CHECK-NEXT: shl.b16 %rs5, %rs4, 8;
|
||||
; CHECK-NEXT: ld.param.b8 %rs6, [test_s_i8i16p_param_0+3];
|
||||
; CHECK-NEXT: or.b16 %rs3, %rs5, %rs6;
|
||||
; CHECK-NEXT: ld.param.b32 %r1, [test_s_i8i16p_param_0];
|
||||
; CHECK-NEXT: ld.param.b64 %rd1, [test_s_i8i16p_param_0+8];
|
||||
; CHECK-NEXT: ld.param.b8 %rs2, [test_s_i8i16p_param_0+2];
|
||||
; CHECK-NEXT: ld.param.b16 %rs1, [test_s_i8i16p_param_0];
|
||||
; CHECK-NEXT: ld.param.b8 %rs1, [test_s_i8i16p_param_0+4];
|
||||
; CHECK-NEXT: { // callseq 0, 0
|
||||
; CHECK-NEXT: .param .align 8 .b8 param0[16];
|
||||
; CHECK-NEXT: st.param.b16 [param0], %rs1;
|
||||
; CHECK-NEXT: st.param.b8 [param0+2], %rs2;
|
||||
; CHECK-NEXT: st.param.b8 [param0+3], %rs3;
|
||||
; CHECK-NEXT: st.param.b8 [param0+4], %rs4;
|
||||
; CHECK-NEXT: st.param.b64 [param0+8], %rd1;
|
||||
; CHECK-NEXT: .param .align 8 .b8 retval0[16];
|
||||
; CHECK-NEXT: st.param.b8 [param0+4], %rs1;
|
||||
; CHECK-NEXT: st.param.b64 [param0+8], %rd1;
|
||||
; CHECK-NEXT: st.param.b32 [param0], %r1;
|
||||
; CHECK-NEXT: call.uni (retval0), test_s_i8i16p, (param0);
|
||||
; CHECK-NEXT: ld.param.b16 %rs7, [retval0];
|
||||
; CHECK-NEXT: ld.param.b8 %rs8, [retval0+2];
|
||||
; CHECK-NEXT: ld.param.b8 %rs9, [retval0+3];
|
||||
; CHECK-NEXT: ld.param.b8 %rs10, [retval0+4];
|
||||
; CHECK-NEXT: ld.param.b64 %rd2, [retval0+8];
|
||||
; CHECK-NEXT: ld.param.b8 %rs2, [retval0+2];
|
||||
; CHECK-NEXT: ld.param.b16 %rs3, [retval0];
|
||||
; CHECK-NEXT: ld.param.b8 %rs4, [retval0+4];
|
||||
; CHECK-NEXT: ld.param.b8 %rs5, [retval0+3];
|
||||
; CHECK-NEXT: } // callseq 0
|
||||
; CHECK-NEXT: st.param.b16 [func_retval0], %rs7;
|
||||
; CHECK-NEXT: st.param.b8 [func_retval0+2], %rs8;
|
||||
; CHECK-NEXT: st.param.b8 [func_retval0+4], %rs10;
|
||||
; CHECK-NEXT: st.param.b8 [func_retval0+3], %rs9;
|
||||
; CHECK-NEXT: shl.b16 %rs8, %rs4, 8;
|
||||
; CHECK-NEXT: or.b16 %rs9, %rs8, %rs5;
|
||||
; CHECK-NEXT: st.param.b8 [func_retval0+3], %rs5;
|
||||
; CHECK-NEXT: st.param.b64 [func_retval0+8], %rd2;
|
||||
; CHECK-NEXT: st.param.b8 [func_retval0+2], %rs2;
|
||||
; CHECK-NEXT: st.param.b16 [func_retval0], %rs3;
|
||||
; CHECK-NEXT: shr.u16 %rs12, %rs9, 8;
|
||||
; CHECK-NEXT: st.param.b8 [func_retval0+4], %rs12;
|
||||
; CHECK-NEXT: ret;
|
||||
%r = tail call %s_i8i16p @test_s_i8i16p(%s_i8i16p %a)
|
||||
ret %s_i8i16p %r
|
||||
@ -64,56 +62,51 @@ define %s_i8i16p @test_s_i8i16p(%s_i8i16p %a) {
|
||||
define %s_i8i32p @test_s_i8i32p(%s_i8i32p %a) {
|
||||
; CHECK-LABEL: test_s_i8i32p(
|
||||
; CHECK: {
|
||||
; CHECK-NEXT: .reg .b16 %rs<12>;
|
||||
; CHECK-NEXT: .reg .b32 %r<20>;
|
||||
; CHECK-NEXT: .reg .b16 %rs<4>;
|
||||
; CHECK-NEXT: .reg .b32 %r<24>;
|
||||
; CHECK-NEXT: .reg .b64 %rd<4>;
|
||||
; CHECK-EMPTY:
|
||||
; CHECK-NEXT: // %bb.0:
|
||||
; CHECK-NEXT: ld.param.b8 %r3, [test_s_i8i32p_param_0+6];
|
||||
; CHECK-NEXT: shl.b32 %r4, %r3, 8;
|
||||
; CHECK-NEXT: ld.param.b8 %r5, [test_s_i8i32p_param_0+5];
|
||||
; CHECK-NEXT: or.b32 %r6, %r4, %r5;
|
||||
; CHECK-NEXT: ld.param.b8 %r7, [test_s_i8i32p_param_0+7];
|
||||
; CHECK-NEXT: shl.b32 %r8, %r7, 16;
|
||||
; CHECK-NEXT: ld.param.b8 %r9, [test_s_i8i32p_param_0+8];
|
||||
; CHECK-NEXT: shl.b32 %r10, %r9, 24;
|
||||
; CHECK-NEXT: or.b32 %r11, %r10, %r8;
|
||||
; CHECK-NEXT: or.b32 %r2, %r11, %r6;
|
||||
; CHECK-NEXT: ld.param.b64 %rd1, [test_s_i8i32p_param_0+16];
|
||||
; CHECK-NEXT: ld.param.b8 %rs1, [test_s_i8i32p_param_0+4];
|
||||
; CHECK-NEXT: ld.param.b32 %r1, [test_s_i8i32p_param_0];
|
||||
; CHECK-NEXT: shr.u32 %r12, %r2, 8;
|
||||
; CHECK-NEXT: shr.u32 %r13, %r11, 16;
|
||||
; CHECK-NEXT: ld.param.b16 %rs1, [test_s_i8i32p_param_0+4];
|
||||
; CHECK-NEXT: ld.param.b64 %rd1, [test_s_i8i32p_param_0+16];
|
||||
; CHECK-NEXT: ld.param.b8 %r2, [test_s_i8i32p_param_0+6];
|
||||
; CHECK-NEXT: ld.param.b8 %r3, [test_s_i8i32p_param_0+7];
|
||||
; CHECK-NEXT: ld.param.b8 %r4, [test_s_i8i32p_param_0+8];
|
||||
; CHECK-NEXT: { // callseq 1, 0
|
||||
; CHECK-NEXT: .param .align 8 .b8 param0[24];
|
||||
; CHECK-NEXT: st.param.b32 [param0], %r1;
|
||||
; CHECK-NEXT: st.param.b8 [param0+4], %rs1;
|
||||
; CHECK-NEXT: st.param.b8 [param0+5], %r2;
|
||||
; CHECK-NEXT: st.param.b8 [param0+6], %r12;
|
||||
; CHECK-NEXT: st.param.b8 [param0+7], %r13;
|
||||
; CHECK-NEXT: st.param.b8 [param0+8], %r9;
|
||||
; CHECK-NEXT: st.param.b64 [param0+16], %rd1;
|
||||
; CHECK-NEXT: .param .align 8 .b8 retval0[24];
|
||||
; CHECK-NEXT: st.param.b8 [param0+8], %r4;
|
||||
; CHECK-NEXT: st.param.b8 [param0+7], %r3;
|
||||
; CHECK-NEXT: st.param.b8 [param0+6], %r2;
|
||||
; CHECK-NEXT: st.param.b64 [param0+16], %rd1;
|
||||
; CHECK-NEXT: st.param.b16 [param0+4], %rs1;
|
||||
; CHECK-NEXT: st.param.b32 [param0], %r1;
|
||||
; CHECK-NEXT: call.uni (retval0), test_s_i8i32p, (param0);
|
||||
; CHECK-NEXT: ld.param.b32 %r14, [retval0];
|
||||
; CHECK-NEXT: ld.param.b8 %rs2, [retval0+4];
|
||||
; CHECK-NEXT: ld.param.b8 %rs3, [retval0+5];
|
||||
; CHECK-NEXT: ld.param.b8 %rs4, [retval0+6];
|
||||
; CHECK-NEXT: ld.param.b8 %rs5, [retval0+7];
|
||||
; CHECK-NEXT: ld.param.b8 %rs6, [retval0+8];
|
||||
; CHECK-NEXT: ld.param.b64 %rd2, [retval0+16];
|
||||
; CHECK-NEXT: ld.param.b8 %rs2, [retval0+4];
|
||||
; CHECK-NEXT: ld.param.b32 %r5, [retval0];
|
||||
; CHECK-NEXT: ld.param.b8 %r6, [retval0+8];
|
||||
; CHECK-NEXT: ld.param.b8 %r7, [retval0+7];
|
||||
; CHECK-NEXT: ld.param.b8 %r8, [retval0+6];
|
||||
; CHECK-NEXT: ld.param.b8 %r9, [retval0+5];
|
||||
; CHECK-NEXT: } // callseq 1
|
||||
; CHECK-NEXT: cvt.u32.u16 %r15, %rs3;
|
||||
; CHECK-NEXT: cvt.u32.u16 %r16, %rs4;
|
||||
; CHECK-NEXT: cvt.u32.u16 %r17, %rs5;
|
||||
; CHECK-NEXT: cvt.u32.u16 %r18, %rs6;
|
||||
; CHECK-NEXT: st.param.b32 [func_retval0], %r14;
|
||||
; CHECK-NEXT: st.param.b8 [func_retval0+4], %rs2;
|
||||
; CHECK-NEXT: st.param.b8 [func_retval0+8], %r18;
|
||||
; CHECK-NEXT: st.param.b8 [func_retval0+7], %r17;
|
||||
; CHECK-NEXT: st.param.b8 [func_retval0+6], %r16;
|
||||
; CHECK-NEXT: st.param.b8 [func_retval0+5], %r15;
|
||||
; CHECK-NEXT: shl.b32 %r12, %r8, 8;
|
||||
; CHECK-NEXT: or.b32 %r13, %r12, %r9;
|
||||
; CHECK-NEXT: shl.b32 %r15, %r7, 16;
|
||||
; CHECK-NEXT: shl.b32 %r17, %r6, 24;
|
||||
; CHECK-NEXT: or.b32 %r18, %r17, %r15;
|
||||
; CHECK-NEXT: or.b32 %r19, %r18, %r13;
|
||||
; CHECK-NEXT: st.param.b8 [func_retval0+5], %r9;
|
||||
; CHECK-NEXT: st.param.b64 [func_retval0+16], %rd2;
|
||||
; CHECK-NEXT: st.param.b8 [func_retval0+4], %rs2;
|
||||
; CHECK-NEXT: st.param.b32 [func_retval0], %r5;
|
||||
; CHECK-NEXT: shr.u32 %r21, %r19, 24;
|
||||
; CHECK-NEXT: st.param.b8 [func_retval0+8], %r21;
|
||||
; CHECK-NEXT: shr.u32 %r22, %r19, 16;
|
||||
; CHECK-NEXT: st.param.b8 [func_retval0+7], %r22;
|
||||
; CHECK-NEXT: shr.u32 %r23, %r19, 8;
|
||||
; CHECK-NEXT: st.param.b8 [func_retval0+6], %r23;
|
||||
; CHECK-NEXT: ret;
|
||||
%r = tail call %s_i8i32p @test_s_i8i32p(%s_i8i32p %a)
|
||||
ret %s_i8i32p %r
|
||||
@ -123,112 +116,66 @@ define %s_i8i32p @test_s_i8i32p(%s_i8i32p %a) {
|
||||
define %s_i8i64p @test_s_i8i64p(%s_i8i64p %a) {
|
||||
; CHECK-LABEL: test_s_i8i64p(
|
||||
; CHECK: {
|
||||
; CHECK-NEXT: .reg .b16 %rs<20>;
|
||||
; CHECK-NEXT: .reg .b64 %rd<68>;
|
||||
; CHECK-NEXT: .reg .b16 %rs<3>;
|
||||
; CHECK-NEXT: .reg .b64 %rd<46>;
|
||||
; CHECK-EMPTY:
|
||||
; CHECK-NEXT: // %bb.0:
|
||||
; CHECK-NEXT: ld.param.b8 %rd4, [test_s_i8i64p_param_0+10];
|
||||
; CHECK-NEXT: shl.b64 %rd5, %rd4, 8;
|
||||
; CHECK-NEXT: ld.param.b8 %rd6, [test_s_i8i64p_param_0+9];
|
||||
; CHECK-NEXT: or.b64 %rd7, %rd5, %rd6;
|
||||
; CHECK-NEXT: ld.param.b8 %rd8, [test_s_i8i64p_param_0+11];
|
||||
; CHECK-NEXT: shl.b64 %rd9, %rd8, 16;
|
||||
; CHECK-NEXT: ld.param.b8 %rd10, [test_s_i8i64p_param_0+12];
|
||||
; CHECK-NEXT: shl.b64 %rd11, %rd10, 24;
|
||||
; CHECK-NEXT: or.b64 %rd12, %rd11, %rd9;
|
||||
; CHECK-NEXT: or.b64 %rd13, %rd12, %rd7;
|
||||
; CHECK-NEXT: ld.param.b8 %rd14, [test_s_i8i64p_param_0+14];
|
||||
; CHECK-NEXT: shl.b64 %rd15, %rd14, 8;
|
||||
; CHECK-NEXT: ld.param.b8 %rd16, [test_s_i8i64p_param_0+13];
|
||||
; CHECK-NEXT: or.b64 %rd17, %rd15, %rd16;
|
||||
; CHECK-NEXT: ld.param.b8 %rd18, [test_s_i8i64p_param_0+15];
|
||||
; CHECK-NEXT: shl.b64 %rd19, %rd18, 16;
|
||||
; CHECK-NEXT: ld.param.b8 %rd20, [test_s_i8i64p_param_0+16];
|
||||
; CHECK-NEXT: shl.b64 %rd21, %rd20, 24;
|
||||
; CHECK-NEXT: or.b64 %rd22, %rd21, %rd19;
|
||||
; CHECK-NEXT: or.b64 %rd23, %rd22, %rd17;
|
||||
; CHECK-NEXT: shl.b64 %rd24, %rd23, 32;
|
||||
; CHECK-NEXT: or.b64 %rd2, %rd24, %rd13;
|
||||
; CHECK-NEXT: ld.param.b64 %rd3, [test_s_i8i64p_param_0+24];
|
||||
; CHECK-NEXT: ld.param.b8 %rs1, [test_s_i8i64p_param_0+8];
|
||||
; CHECK-NEXT: ld.param.b64 %rd1, [test_s_i8i64p_param_0];
|
||||
; CHECK-NEXT: shr.u64 %rd25, %rd2, 8;
|
||||
; CHECK-NEXT: shr.u64 %rd26, %rd2, 16;
|
||||
; CHECK-NEXT: shr.u64 %rd27, %rd2, 24;
|
||||
; CHECK-NEXT: bfe.u64 %rd28, %rd23, 8, 24;
|
||||
; CHECK-NEXT: bfe.u64 %rd29, %rd23, 16, 16;
|
||||
; CHECK-NEXT: bfe.u64 %rd30, %rd23, 24, 8;
|
||||
; CHECK-NEXT: ld.param.b64 %rd2, [test_s_i8i64p_param_0+8];
|
||||
; CHECK-NEXT: ld.param.b64 %rd3, [test_s_i8i64p_param_0+24];
|
||||
; CHECK-NEXT: ld.param.b8 %rd4, [test_s_i8i64p_param_0+16];
|
||||
; CHECK-NEXT: { // callseq 2, 0
|
||||
; CHECK-NEXT: .param .align 8 .b8 param0[32];
|
||||
; CHECK-NEXT: st.param.b64 [param0], %rd1;
|
||||
; CHECK-NEXT: st.param.b8 [param0+8], %rs1;
|
||||
; CHECK-NEXT: st.param.b8 [param0+9], %rd2;
|
||||
; CHECK-NEXT: st.param.b8 [param0+10], %rd25;
|
||||
; CHECK-NEXT: st.param.b8 [param0+11], %rd26;
|
||||
; CHECK-NEXT: st.param.b8 [param0+12], %rd27;
|
||||
; CHECK-NEXT: st.param.b8 [param0+13], %rd23;
|
||||
; CHECK-NEXT: st.param.b8 [param0+14], %rd28;
|
||||
; CHECK-NEXT: st.param.b8 [param0+15], %rd29;
|
||||
; CHECK-NEXT: st.param.b8 [param0+16], %rd30;
|
||||
; CHECK-NEXT: st.param.b64 [param0+24], %rd3;
|
||||
; CHECK-NEXT: .param .align 8 .b8 retval0[32];
|
||||
; CHECK-NEXT: st.param.b8 [param0+16], %rd4;
|
||||
; CHECK-NEXT: st.param.b64 [param0+24], %rd3;
|
||||
; CHECK-NEXT: st.param.b64 [param0+8], %rd2;
|
||||
; CHECK-NEXT: st.param.b64 [param0], %rd1;
|
||||
; CHECK-NEXT: call.uni (retval0), test_s_i8i64p, (param0);
|
||||
; CHECK-NEXT: ld.param.b64 %rd31, [retval0];
|
||||
; CHECK-NEXT: ld.param.b8 %rs2, [retval0+8];
|
||||
; CHECK-NEXT: ld.param.b8 %rs3, [retval0+9];
|
||||
; CHECK-NEXT: ld.param.b8 %rs4, [retval0+10];
|
||||
; CHECK-NEXT: ld.param.b8 %rs5, [retval0+11];
|
||||
; CHECK-NEXT: ld.param.b8 %rs6, [retval0+12];
|
||||
; CHECK-NEXT: ld.param.b8 %rs7, [retval0+13];
|
||||
; CHECK-NEXT: ld.param.b8 %rs8, [retval0+14];
|
||||
; CHECK-NEXT: ld.param.b8 %rs9, [retval0+15];
|
||||
; CHECK-NEXT: ld.param.b8 %rs10, [retval0+16];
|
||||
; CHECK-NEXT: ld.param.b64 %rd32, [retval0+24];
|
||||
; CHECK-NEXT: ld.param.b64 %rd5, [retval0+24];
|
||||
; CHECK-NEXT: ld.param.b8 %rs1, [retval0+8];
|
||||
; CHECK-NEXT: ld.param.b64 %rd6, [retval0];
|
||||
; CHECK-NEXT: ld.param.b8 %rd7, [retval0+16];
|
||||
; CHECK-NEXT: ld.param.b8 %rd8, [retval0+15];
|
||||
; CHECK-NEXT: ld.param.b8 %rd9, [retval0+14];
|
||||
; CHECK-NEXT: ld.param.b8 %rd10, [retval0+13];
|
||||
; CHECK-NEXT: ld.param.b8 %rd11, [retval0+12];
|
||||
; CHECK-NEXT: ld.param.b8 %rd12, [retval0+11];
|
||||
; CHECK-NEXT: ld.param.b8 %rd13, [retval0+10];
|
||||
; CHECK-NEXT: ld.param.b8 %rd14, [retval0+9];
|
||||
; CHECK-NEXT: } // callseq 2
|
||||
; CHECK-NEXT: cvt.u64.u16 %rd33, %rs3;
|
||||
; CHECK-NEXT: and.b64 %rd34, %rd33, 255;
|
||||
; CHECK-NEXT: cvt.u64.u16 %rd35, %rs4;
|
||||
; CHECK-NEXT: and.b64 %rd36, %rd35, 255;
|
||||
; CHECK-NEXT: shl.b64 %rd37, %rd36, 8;
|
||||
; CHECK-NEXT: or.b64 %rd38, %rd34, %rd37;
|
||||
; CHECK-NEXT: cvt.u64.u16 %rd39, %rs5;
|
||||
; CHECK-NEXT: and.b64 %rd40, %rd39, 255;
|
||||
; CHECK-NEXT: shl.b64 %rd41, %rd40, 16;
|
||||
; CHECK-NEXT: or.b64 %rd42, %rd38, %rd41;
|
||||
; CHECK-NEXT: cvt.u64.u16 %rd43, %rs6;
|
||||
; CHECK-NEXT: and.b64 %rd44, %rd43, 255;
|
||||
; CHECK-NEXT: shl.b64 %rd45, %rd44, 24;
|
||||
; CHECK-NEXT: or.b64 %rd46, %rd42, %rd45;
|
||||
; CHECK-NEXT: cvt.u64.u16 %rd47, %rs7;
|
||||
; CHECK-NEXT: and.b64 %rd48, %rd47, 255;
|
||||
; CHECK-NEXT: shl.b64 %rd49, %rd48, 32;
|
||||
; CHECK-NEXT: or.b64 %rd50, %rd46, %rd49;
|
||||
; CHECK-NEXT: cvt.u64.u16 %rd51, %rs8;
|
||||
; CHECK-NEXT: and.b64 %rd52, %rd51, 255;
|
||||
; CHECK-NEXT: shl.b64 %rd53, %rd52, 40;
|
||||
; CHECK-NEXT: or.b64 %rd54, %rd50, %rd53;
|
||||
; CHECK-NEXT: cvt.u64.u16 %rd55, %rs9;
|
||||
; CHECK-NEXT: and.b64 %rd56, %rd55, 255;
|
||||
; CHECK-NEXT: shl.b64 %rd57, %rd56, 48;
|
||||
; CHECK-NEXT: or.b64 %rd58, %rd54, %rd57;
|
||||
; CHECK-NEXT: cvt.u64.u16 %rd59, %rs10;
|
||||
; CHECK-NEXT: shl.b64 %rd60, %rd59, 56;
|
||||
; CHECK-NEXT: or.b64 %rd61, %rd58, %rd60;
|
||||
; CHECK-NEXT: st.param.b64 [func_retval0], %rd31;
|
||||
; CHECK-NEXT: st.param.b8 [func_retval0+8], %rs2;
|
||||
; CHECK-NEXT: shl.b64 %rd17, %rd13, 8;
|
||||
; CHECK-NEXT: or.b64 %rd18, %rd17, %rd14;
|
||||
; CHECK-NEXT: shl.b64 %rd20, %rd12, 16;
|
||||
; CHECK-NEXT: shl.b64 %rd22, %rd11, 24;
|
||||
; CHECK-NEXT: or.b64 %rd23, %rd22, %rd20;
|
||||
; CHECK-NEXT: or.b64 %rd24, %rd23, %rd18;
|
||||
; CHECK-NEXT: shl.b64 %rd27, %rd9, 8;
|
||||
; CHECK-NEXT: or.b64 %rd28, %rd27, %rd10;
|
||||
; CHECK-NEXT: shl.b64 %rd30, %rd8, 16;
|
||||
; CHECK-NEXT: shl.b64 %rd32, %rd7, 24;
|
||||
; CHECK-NEXT: or.b64 %rd33, %rd32, %rd30;
|
||||
; CHECK-NEXT: or.b64 %rd34, %rd33, %rd28;
|
||||
; CHECK-NEXT: shl.b64 %rd35, %rd34, 32;
|
||||
; CHECK-NEXT: or.b64 %rd36, %rd35, %rd24;
|
||||
; CHECK-NEXT: st.param.b8 [func_retval0+9], %rd14;
|
||||
; CHECK-NEXT: st.param.b64 [func_retval0+24], %rd5;
|
||||
; CHECK-NEXT: st.param.b8 [func_retval0+8], %rs1;
|
||||
; CHECK-NEXT: st.param.b64 [func_retval0], %rd6;
|
||||
; CHECK-NEXT: shr.u64 %rd39, %rd36, 56;
|
||||
; CHECK-NEXT: st.param.b8 [func_retval0+16], %rd39;
|
||||
; CHECK-NEXT: shr.u64 %rd40, %rd36, 48;
|
||||
; CHECK-NEXT: st.param.b8 [func_retval0+15], %rd40;
|
||||
; CHECK-NEXT: shr.u64 %rd41, %rd36, 40;
|
||||
; CHECK-NEXT: st.param.b8 [func_retval0+14], %rd41;
|
||||
; CHECK-NEXT: shr.u64 %rd42, %rd36, 32;
|
||||
; CHECK-NEXT: st.param.b8 [func_retval0+13], %rd42;
|
||||
; CHECK-NEXT: shr.u64 %rd43, %rd36, 24;
|
||||
; CHECK-NEXT: st.param.b8 [func_retval0+12], %rd43;
|
||||
; CHECK-NEXT: st.param.b8 [func_retval0+11], %rd39;
|
||||
; CHECK-NEXT: st.param.b8 [func_retval0+10], %rd35;
|
||||
; CHECK-NEXT: st.param.b8 [func_retval0+9], %rd33;
|
||||
; CHECK-NEXT: shr.u64 %rd64, %rd50, 32;
|
||||
; CHECK-NEXT: st.param.b8 [func_retval0+13], %rd64;
|
||||
; CHECK-NEXT: shr.u64 %rd65, %rd54, 40;
|
||||
; CHECK-NEXT: st.param.b8 [func_retval0+14], %rd65;
|
||||
; CHECK-NEXT: shr.u64 %rd66, %rd58, 48;
|
||||
; CHECK-NEXT: st.param.b8 [func_retval0+15], %rd66;
|
||||
; CHECK-NEXT: shr.u64 %rd67, %rd61, 56;
|
||||
; CHECK-NEXT: st.param.b8 [func_retval0+16], %rd67;
|
||||
; CHECK-NEXT: st.param.b64 [func_retval0+24], %rd32;
|
||||
; CHECK-NEXT: shr.u64 %rd44, %rd36, 16;
|
||||
; CHECK-NEXT: st.param.b8 [func_retval0+11], %rd44;
|
||||
; CHECK-NEXT: shr.u64 %rd45, %rd36, 8;
|
||||
; CHECK-NEXT: st.param.b8 [func_retval0+10], %rd45;
|
||||
; CHECK-NEXT: ret;
|
||||
%r = tail call %s_i8i64p @test_s_i8i64p(%s_i8i64p %a)
|
||||
ret %s_i8i64p %r
|
||||
@ -242,33 +189,32 @@ define %s_i8f16p @test_s_i8f16p(%s_i8f16p %a) {
|
||||
; CHECK-NEXT: .reg .b64 %rd<4>;
|
||||
; CHECK-EMPTY:
|
||||
; CHECK-NEXT: // %bb.0:
|
||||
; CHECK-NEXT: ld.param.b8 %rs4, [test_s_i8f16p_param_0+4];
|
||||
; CHECK-NEXT: shl.b16 %rs5, %rs4, 8;
|
||||
; CHECK-NEXT: ld.param.b8 %rs6, [test_s_i8f16p_param_0+3];
|
||||
; CHECK-NEXT: or.b16 %rs3, %rs5, %rs6;
|
||||
; CHECK-NEXT: ld.param.b64 %rd1, [test_s_i8f16p_param_0+8];
|
||||
; CHECK-NEXT: ld.param.b8 %rs2, [test_s_i8f16p_param_0+2];
|
||||
; CHECK-NEXT: ld.param.b16 %rs1, [test_s_i8f16p_param_0];
|
||||
; CHECK-NEXT: ld.param.b16 %rs2, [test_s_i8f16p_param_0+2];
|
||||
; CHECK-NEXT: ld.param.b64 %rd1, [test_s_i8f16p_param_0+8];
|
||||
; CHECK-NEXT: ld.param.b8 %rs3, [test_s_i8f16p_param_0+4];
|
||||
; CHECK-NEXT: { // callseq 3, 0
|
||||
; CHECK-NEXT: .param .align 8 .b8 param0[16];
|
||||
; CHECK-NEXT: st.param.b16 [param0], %rs1;
|
||||
; CHECK-NEXT: st.param.b8 [param0+2], %rs2;
|
||||
; CHECK-NEXT: st.param.b8 [param0+3], %rs3;
|
||||
; CHECK-NEXT: st.param.b8 [param0+4], %rs4;
|
||||
; CHECK-NEXT: st.param.b64 [param0+8], %rd1;
|
||||
; CHECK-NEXT: .param .align 8 .b8 retval0[16];
|
||||
; CHECK-NEXT: st.param.b8 [param0+4], %rs3;
|
||||
; CHECK-NEXT: st.param.b64 [param0+8], %rd1;
|
||||
; CHECK-NEXT: st.param.b16 [param0+2], %rs2;
|
||||
; CHECK-NEXT: st.param.b16 [param0], %rs1;
|
||||
; CHECK-NEXT: call.uni (retval0), test_s_i8f16p, (param0);
|
||||
; CHECK-NEXT: ld.param.b16 %rs7, [retval0];
|
||||
; CHECK-NEXT: ld.param.b8 %rs8, [retval0+2];
|
||||
; CHECK-NEXT: ld.param.b8 %rs9, [retval0+3];
|
||||
; CHECK-NEXT: ld.param.b8 %rs10, [retval0+4];
|
||||
; CHECK-NEXT: ld.param.b64 %rd2, [retval0+8];
|
||||
; CHECK-NEXT: ld.param.b8 %rs4, [retval0+2];
|
||||
; CHECK-NEXT: ld.param.b16 %rs5, [retval0];
|
||||
; CHECK-NEXT: ld.param.b8 %rs6, [retval0+4];
|
||||
; CHECK-NEXT: ld.param.b8 %rs7, [retval0+3];
|
||||
; CHECK-NEXT: } // callseq 3
|
||||
; CHECK-NEXT: st.param.b16 [func_retval0], %rs7;
|
||||
; CHECK-NEXT: st.param.b8 [func_retval0+2], %rs8;
|
||||
; CHECK-NEXT: st.param.b8 [func_retval0+4], %rs10;
|
||||
; CHECK-NEXT: st.param.b8 [func_retval0+3], %rs9;
|
||||
; CHECK-NEXT: shl.b16 %rs10, %rs6, 8;
|
||||
; CHECK-NEXT: or.b16 %rs11, %rs10, %rs7;
|
||||
; CHECK-NEXT: st.param.b8 [func_retval0+3], %rs7;
|
||||
; CHECK-NEXT: st.param.b64 [func_retval0+8], %rd2;
|
||||
; CHECK-NEXT: st.param.b8 [func_retval0+2], %rs4;
|
||||
; CHECK-NEXT: st.param.b16 [func_retval0], %rs5;
|
||||
; CHECK-NEXT: shr.u16 %rs14, %rs11, 8;
|
||||
; CHECK-NEXT: st.param.b8 [func_retval0+4], %rs14;
|
||||
; CHECK-NEXT: ret;
|
||||
%r = tail call %s_i8f16p @test_s_i8f16p(%s_i8f16p %a)
|
||||
ret %s_i8f16p %r
|
||||
@ -278,56 +224,51 @@ define %s_i8f16p @test_s_i8f16p(%s_i8f16p %a) {
|
||||
define %s_i8f16x2p @test_s_i8f16x2p(%s_i8f16x2p %a) {
|
||||
; CHECK-LABEL: test_s_i8f16x2p(
|
||||
; CHECK: {
|
||||
; CHECK-NEXT: .reg .b16 %rs<12>;
|
||||
; CHECK-NEXT: .reg .b32 %r<20>;
|
||||
; CHECK-NEXT: .reg .b16 %rs<4>;
|
||||
; CHECK-NEXT: .reg .b32 %r<24>;
|
||||
; CHECK-NEXT: .reg .b64 %rd<4>;
|
||||
; CHECK-EMPTY:
|
||||
; CHECK-NEXT: // %bb.0:
|
||||
; CHECK-NEXT: ld.param.b8 %r3, [test_s_i8f16x2p_param_0+6];
|
||||
; CHECK-NEXT: shl.b32 %r4, %r3, 8;
|
||||
; CHECK-NEXT: ld.param.b8 %r5, [test_s_i8f16x2p_param_0+5];
|
||||
; CHECK-NEXT: or.b32 %r6, %r4, %r5;
|
||||
; CHECK-NEXT: ld.param.b8 %r7, [test_s_i8f16x2p_param_0+7];
|
||||
; CHECK-NEXT: shl.b32 %r8, %r7, 16;
|
||||
; CHECK-NEXT: ld.param.b8 %r9, [test_s_i8f16x2p_param_0+8];
|
||||
; CHECK-NEXT: shl.b32 %r10, %r9, 24;
|
||||
; CHECK-NEXT: or.b32 %r11, %r10, %r8;
|
||||
; CHECK-NEXT: or.b32 %r2, %r11, %r6;
|
||||
; CHECK-NEXT: ld.param.b64 %rd1, [test_s_i8f16x2p_param_0+16];
|
||||
; CHECK-NEXT: ld.param.b8 %rs1, [test_s_i8f16x2p_param_0+4];
|
||||
; CHECK-NEXT: ld.param.b32 %r1, [test_s_i8f16x2p_param_0];
|
||||
; CHECK-NEXT: shr.u32 %r12, %r2, 8;
|
||||
; CHECK-NEXT: shr.u32 %r13, %r11, 16;
|
||||
; CHECK-NEXT: ld.param.b16 %rs1, [test_s_i8f16x2p_param_0+4];
|
||||
; CHECK-NEXT: ld.param.b64 %rd1, [test_s_i8f16x2p_param_0+16];
|
||||
; CHECK-NEXT: ld.param.b8 %r2, [test_s_i8f16x2p_param_0+6];
|
||||
; CHECK-NEXT: ld.param.b8 %r3, [test_s_i8f16x2p_param_0+7];
|
||||
; CHECK-NEXT: ld.param.b8 %r4, [test_s_i8f16x2p_param_0+8];
|
||||
; CHECK-NEXT: { // callseq 4, 0
|
||||
; CHECK-NEXT: .param .align 8 .b8 param0[24];
|
||||
; CHECK-NEXT: st.param.b32 [param0], %r1;
|
||||
; CHECK-NEXT: st.param.b8 [param0+4], %rs1;
|
||||
; CHECK-NEXT: st.param.b8 [param0+5], %r2;
|
||||
; CHECK-NEXT: st.param.b8 [param0+6], %r12;
|
||||
; CHECK-NEXT: st.param.b8 [param0+7], %r13;
|
||||
; CHECK-NEXT: st.param.b8 [param0+8], %r9;
|
||||
; CHECK-NEXT: st.param.b64 [param0+16], %rd1;
|
||||
; CHECK-NEXT: .param .align 8 .b8 retval0[24];
|
||||
; CHECK-NEXT: st.param.b8 [param0+8], %r4;
|
||||
; CHECK-NEXT: st.param.b8 [param0+7], %r3;
|
||||
; CHECK-NEXT: st.param.b8 [param0+6], %r2;
|
||||
; CHECK-NEXT: st.param.b64 [param0+16], %rd1;
|
||||
; CHECK-NEXT: st.param.b16 [param0+4], %rs1;
|
||||
; CHECK-NEXT: st.param.b32 [param0], %r1;
|
||||
; CHECK-NEXT: call.uni (retval0), test_s_i8f16x2p, (param0);
|
||||
; CHECK-NEXT: ld.param.b32 %r14, [retval0];
|
||||
; CHECK-NEXT: ld.param.b8 %rs2, [retval0+4];
|
||||
; CHECK-NEXT: ld.param.b8 %rs3, [retval0+5];
|
||||
; CHECK-NEXT: ld.param.b8 %rs4, [retval0+6];
|
||||
; CHECK-NEXT: ld.param.b8 %rs5, [retval0+7];
|
||||
; CHECK-NEXT: ld.param.b8 %rs6, [retval0+8];
|
||||
; CHECK-NEXT: ld.param.b64 %rd2, [retval0+16];
|
||||
; CHECK-NEXT: ld.param.b8 %rs2, [retval0+4];
|
||||
; CHECK-NEXT: ld.param.b32 %r5, [retval0];
|
||||
; CHECK-NEXT: ld.param.b8 %r6, [retval0+8];
|
||||
; CHECK-NEXT: ld.param.b8 %r7, [retval0+7];
|
||||
; CHECK-NEXT: ld.param.b8 %r8, [retval0+6];
|
||||
; CHECK-NEXT: ld.param.b8 %r9, [retval0+5];
|
||||
; CHECK-NEXT: } // callseq 4
|
||||
; CHECK-NEXT: cvt.u32.u16 %r15, %rs3;
|
||||
; CHECK-NEXT: cvt.u32.u16 %r16, %rs4;
|
||||
; CHECK-NEXT: cvt.u32.u16 %r17, %rs5;
|
||||
; CHECK-NEXT: cvt.u32.u16 %r18, %rs6;
|
||||
; CHECK-NEXT: st.param.b32 [func_retval0], %r14;
|
||||
; CHECK-NEXT: st.param.b8 [func_retval0+4], %rs2;
|
||||
; CHECK-NEXT: st.param.b8 [func_retval0+8], %r18;
|
||||
; CHECK-NEXT: st.param.b8 [func_retval0+7], %r17;
|
||||
; CHECK-NEXT: st.param.b8 [func_retval0+6], %r16;
|
||||
; CHECK-NEXT: st.param.b8 [func_retval0+5], %r15;
|
||||
; CHECK-NEXT: shl.b32 %r12, %r8, 8;
|
||||
; CHECK-NEXT: or.b32 %r13, %r12, %r9;
|
||||
; CHECK-NEXT: shl.b32 %r15, %r7, 16;
|
||||
; CHECK-NEXT: shl.b32 %r17, %r6, 24;
|
||||
; CHECK-NEXT: or.b32 %r18, %r17, %r15;
|
||||
; CHECK-NEXT: or.b32 %r19, %r18, %r13;
|
||||
; CHECK-NEXT: st.param.b8 [func_retval0+5], %r9;
|
||||
; CHECK-NEXT: st.param.b64 [func_retval0+16], %rd2;
|
||||
; CHECK-NEXT: st.param.b8 [func_retval0+4], %rs2;
|
||||
; CHECK-NEXT: st.param.b32 [func_retval0], %r5;
|
||||
; CHECK-NEXT: shr.u32 %r21, %r19, 24;
|
||||
; CHECK-NEXT: st.param.b8 [func_retval0+8], %r21;
|
||||
; CHECK-NEXT: shr.u32 %r22, %r19, 16;
|
||||
; CHECK-NEXT: st.param.b8 [func_retval0+7], %r22;
|
||||
; CHECK-NEXT: shr.u32 %r23, %r19, 8;
|
||||
; CHECK-NEXT: st.param.b8 [func_retval0+6], %r23;
|
||||
; CHECK-NEXT: ret;
|
||||
%r = tail call %s_i8f16x2p @test_s_i8f16x2p(%s_i8f16x2p %a)
|
||||
ret %s_i8f16x2p %r
|
||||
@ -337,56 +278,51 @@ define %s_i8f16x2p @test_s_i8f16x2p(%s_i8f16x2p %a) {
|
||||
define %s_i8f32p @test_s_i8f32p(%s_i8f32p %a) {
|
||||
; CHECK-LABEL: test_s_i8f32p(
|
||||
; CHECK: {
|
||||
; CHECK-NEXT: .reg .b16 %rs<12>;
|
||||
; CHECK-NEXT: .reg .b32 %r<20>;
|
||||
; CHECK-NEXT: .reg .b16 %rs<4>;
|
||||
; CHECK-NEXT: .reg .b32 %r<24>;
|
||||
; CHECK-NEXT: .reg .b64 %rd<4>;
|
||||
; CHECK-EMPTY:
|
||||
; CHECK-NEXT: // %bb.0:
|
||||
; CHECK-NEXT: ld.param.b8 %r3, [test_s_i8f32p_param_0+6];
|
||||
; CHECK-NEXT: shl.b32 %r4, %r3, 8;
|
||||
; CHECK-NEXT: ld.param.b8 %r5, [test_s_i8f32p_param_0+5];
|
||||
; CHECK-NEXT: or.b32 %r6, %r4, %r5;
|
||||
; CHECK-NEXT: ld.param.b8 %r7, [test_s_i8f32p_param_0+7];
|
||||
; CHECK-NEXT: shl.b32 %r8, %r7, 16;
|
||||
; CHECK-NEXT: ld.param.b8 %r9, [test_s_i8f32p_param_0+8];
|
||||
; CHECK-NEXT: shl.b32 %r10, %r9, 24;
|
||||
; CHECK-NEXT: or.b32 %r11, %r10, %r8;
|
||||
; CHECK-NEXT: or.b32 %r2, %r11, %r6;
|
||||
; CHECK-NEXT: ld.param.b64 %rd1, [test_s_i8f32p_param_0+16];
|
||||
; CHECK-NEXT: ld.param.b8 %rs1, [test_s_i8f32p_param_0+4];
|
||||
; CHECK-NEXT: ld.param.b32 %r1, [test_s_i8f32p_param_0];
|
||||
; CHECK-NEXT: shr.u32 %r12, %r2, 8;
|
||||
; CHECK-NEXT: shr.u32 %r13, %r11, 16;
|
||||
; CHECK-NEXT: ld.param.b16 %rs1, [test_s_i8f32p_param_0+4];
|
||||
; CHECK-NEXT: ld.param.b64 %rd1, [test_s_i8f32p_param_0+16];
|
||||
; CHECK-NEXT: ld.param.b8 %r2, [test_s_i8f32p_param_0+6];
|
||||
; CHECK-NEXT: ld.param.b8 %r3, [test_s_i8f32p_param_0+7];
|
||||
; CHECK-NEXT: ld.param.b8 %r4, [test_s_i8f32p_param_0+8];
|
||||
; CHECK-NEXT: { // callseq 5, 0
|
||||
; CHECK-NEXT: .param .align 8 .b8 param0[24];
|
||||
; CHECK-NEXT: st.param.b32 [param0], %r1;
|
||||
; CHECK-NEXT: st.param.b8 [param0+4], %rs1;
|
||||
; CHECK-NEXT: st.param.b8 [param0+5], %r2;
|
||||
; CHECK-NEXT: st.param.b8 [param0+6], %r12;
|
||||
; CHECK-NEXT: st.param.b8 [param0+7], %r13;
|
||||
; CHECK-NEXT: st.param.b8 [param0+8], %r9;
|
||||
; CHECK-NEXT: st.param.b64 [param0+16], %rd1;
|
||||
; CHECK-NEXT: .param .align 8 .b8 retval0[24];
|
||||
; CHECK-NEXT: st.param.b8 [param0+8], %r4;
|
||||
; CHECK-NEXT: st.param.b8 [param0+7], %r3;
|
||||
; CHECK-NEXT: st.param.b8 [param0+6], %r2;
|
||||
; CHECK-NEXT: st.param.b64 [param0+16], %rd1;
|
||||
; CHECK-NEXT: st.param.b16 [param0+4], %rs1;
|
||||
; CHECK-NEXT: st.param.b32 [param0], %r1;
|
||||
; CHECK-NEXT: call.uni (retval0), test_s_i8f32p, (param0);
|
||||
; CHECK-NEXT: ld.param.b32 %r14, [retval0];
|
||||
; CHECK-NEXT: ld.param.b8 %rs2, [retval0+4];
|
||||
; CHECK-NEXT: ld.param.b8 %rs3, [retval0+5];
|
||||
; CHECK-NEXT: ld.param.b8 %rs4, [retval0+6];
|
||||
; CHECK-NEXT: ld.param.b8 %rs5, [retval0+7];
|
||||
; CHECK-NEXT: ld.param.b8 %rs6, [retval0+8];
|
||||
; CHECK-NEXT: ld.param.b64 %rd2, [retval0+16];
|
||||
; CHECK-NEXT: ld.param.b8 %rs2, [retval0+4];
|
||||
; CHECK-NEXT: ld.param.b32 %r5, [retval0];
|
||||
; CHECK-NEXT: ld.param.b8 %r6, [retval0+8];
|
||||
; CHECK-NEXT: ld.param.b8 %r7, [retval0+7];
|
||||
; CHECK-NEXT: ld.param.b8 %r8, [retval0+6];
|
||||
; CHECK-NEXT: ld.param.b8 %r9, [retval0+5];
|
||||
; CHECK-NEXT: } // callseq 5
|
||||
; CHECK-NEXT: cvt.u32.u16 %r15, %rs3;
|
||||
; CHECK-NEXT: cvt.u32.u16 %r16, %rs4;
|
||||
; CHECK-NEXT: cvt.u32.u16 %r17, %rs5;
|
||||
; CHECK-NEXT: cvt.u32.u16 %r18, %rs6;
|
||||
; CHECK-NEXT: st.param.b32 [func_retval0], %r14;
|
||||
; CHECK-NEXT: st.param.b8 [func_retval0+4], %rs2;
|
||||
; CHECK-NEXT: st.param.b8 [func_retval0+8], %r18;
|
||||
; CHECK-NEXT: st.param.b8 [func_retval0+7], %r17;
|
||||
; CHECK-NEXT: st.param.b8 [func_retval0+6], %r16;
|
||||
; CHECK-NEXT: st.param.b8 [func_retval0+5], %r15;
|
||||
; CHECK-NEXT: shl.b32 %r12, %r8, 8;
|
||||
; CHECK-NEXT: or.b32 %r13, %r12, %r9;
|
||||
; CHECK-NEXT: shl.b32 %r15, %r7, 16;
|
||||
; CHECK-NEXT: shl.b32 %r17, %r6, 24;
|
||||
; CHECK-NEXT: or.b32 %r18, %r17, %r15;
|
||||
; CHECK-NEXT: or.b32 %r19, %r18, %r13;
|
||||
; CHECK-NEXT: st.param.b8 [func_retval0+5], %r9;
|
||||
; CHECK-NEXT: st.param.b64 [func_retval0+16], %rd2;
|
||||
; CHECK-NEXT: st.param.b8 [func_retval0+4], %rs2;
|
||||
; CHECK-NEXT: st.param.b32 [func_retval0], %r5;
|
||||
; CHECK-NEXT: shr.u32 %r21, %r19, 24;
|
||||
; CHECK-NEXT: st.param.b8 [func_retval0+8], %r21;
|
||||
; CHECK-NEXT: shr.u32 %r22, %r19, 16;
|
||||
; CHECK-NEXT: st.param.b8 [func_retval0+7], %r22;
|
||||
; CHECK-NEXT: shr.u32 %r23, %r19, 8;
|
||||
; CHECK-NEXT: st.param.b8 [func_retval0+6], %r23;
|
||||
; CHECK-NEXT: ret;
|
||||
%r = tail call %s_i8f32p @test_s_i8f32p(%s_i8f32p %a)
|
||||
ret %s_i8f32p %r
|
||||
@ -396,112 +332,66 @@ define %s_i8f32p @test_s_i8f32p(%s_i8f32p %a) {
|
||||
define %s_i8f64p @test_s_i8f64p(%s_i8f64p %a) {
|
||||
; CHECK-LABEL: test_s_i8f64p(
|
||||
; CHECK: {
|
||||
; CHECK-NEXT: .reg .b16 %rs<20>;
|
||||
; CHECK-NEXT: .reg .b64 %rd<68>;
|
||||
; CHECK-NEXT: .reg .b16 %rs<3>;
|
||||
; CHECK-NEXT: .reg .b64 %rd<46>;
|
||||
; CHECK-EMPTY:
|
||||
; CHECK-NEXT: // %bb.0:
|
||||
; CHECK-NEXT: ld.param.b8 %rd4, [test_s_i8f64p_param_0+10];
|
||||
; CHECK-NEXT: shl.b64 %rd5, %rd4, 8;
|
||||
; CHECK-NEXT: ld.param.b8 %rd6, [test_s_i8f64p_param_0+9];
|
||||
; CHECK-NEXT: or.b64 %rd7, %rd5, %rd6;
|
||||
; CHECK-NEXT: ld.param.b8 %rd8, [test_s_i8f64p_param_0+11];
|
||||
; CHECK-NEXT: shl.b64 %rd9, %rd8, 16;
|
||||
; CHECK-NEXT: ld.param.b8 %rd10, [test_s_i8f64p_param_0+12];
|
||||
; CHECK-NEXT: shl.b64 %rd11, %rd10, 24;
|
||||
; CHECK-NEXT: or.b64 %rd12, %rd11, %rd9;
|
||||
; CHECK-NEXT: or.b64 %rd13, %rd12, %rd7;
|
||||
; CHECK-NEXT: ld.param.b8 %rd14, [test_s_i8f64p_param_0+14];
|
||||
; CHECK-NEXT: shl.b64 %rd15, %rd14, 8;
|
||||
; CHECK-NEXT: ld.param.b8 %rd16, [test_s_i8f64p_param_0+13];
|
||||
; CHECK-NEXT: or.b64 %rd17, %rd15, %rd16;
|
||||
; CHECK-NEXT: ld.param.b8 %rd18, [test_s_i8f64p_param_0+15];
|
||||
; CHECK-NEXT: shl.b64 %rd19, %rd18, 16;
|
||||
; CHECK-NEXT: ld.param.b8 %rd20, [test_s_i8f64p_param_0+16];
|
||||
; CHECK-NEXT: shl.b64 %rd21, %rd20, 24;
|
||||
; CHECK-NEXT: or.b64 %rd22, %rd21, %rd19;
|
||||
; CHECK-NEXT: or.b64 %rd23, %rd22, %rd17;
|
||||
; CHECK-NEXT: shl.b64 %rd24, %rd23, 32;
|
||||
; CHECK-NEXT: or.b64 %rd2, %rd24, %rd13;
|
||||
; CHECK-NEXT: ld.param.b64 %rd3, [test_s_i8f64p_param_0+24];
|
||||
; CHECK-NEXT: ld.param.b8 %rs1, [test_s_i8f64p_param_0+8];
|
||||
; CHECK-NEXT: ld.param.b64 %rd1, [test_s_i8f64p_param_0];
|
||||
; CHECK-NEXT: shr.u64 %rd25, %rd2, 8;
|
||||
; CHECK-NEXT: shr.u64 %rd26, %rd2, 16;
|
||||
; CHECK-NEXT: shr.u64 %rd27, %rd2, 24;
|
||||
; CHECK-NEXT: bfe.u64 %rd28, %rd23, 8, 24;
|
||||
; CHECK-NEXT: bfe.u64 %rd29, %rd23, 16, 16;
|
||||
; CHECK-NEXT: bfe.u64 %rd30, %rd23, 24, 8;
|
||||
; CHECK-NEXT: ld.param.b64 %rd2, [test_s_i8f64p_param_0+8];
|
||||
; CHECK-NEXT: ld.param.b64 %rd3, [test_s_i8f64p_param_0+24];
|
||||
; CHECK-NEXT: ld.param.b8 %rd4, [test_s_i8f64p_param_0+16];
|
||||
; CHECK-NEXT: { // callseq 6, 0
|
||||
; CHECK-NEXT: .param .align 8 .b8 param0[32];
|
||||
; CHECK-NEXT: st.param.b64 [param0], %rd1;
|
||||
; CHECK-NEXT: st.param.b8 [param0+8], %rs1;
|
||||
; CHECK-NEXT: st.param.b8 [param0+9], %rd2;
|
||||
; CHECK-NEXT: st.param.b8 [param0+10], %rd25;
|
||||
; CHECK-NEXT: st.param.b8 [param0+11], %rd26;
|
||||
; CHECK-NEXT: st.param.b8 [param0+12], %rd27;
|
||||
; CHECK-NEXT: st.param.b8 [param0+13], %rd23;
|
||||
; CHECK-NEXT: st.param.b8 [param0+14], %rd28;
|
||||
; CHECK-NEXT: st.param.b8 [param0+15], %rd29;
|
||||
; CHECK-NEXT: st.param.b8 [param0+16], %rd30;
|
||||
; CHECK-NEXT: st.param.b64 [param0+24], %rd3;
|
||||
; CHECK-NEXT: .param .align 8 .b8 retval0[32];
|
||||
; CHECK-NEXT: st.param.b8 [param0+16], %rd4;
|
||||
; CHECK-NEXT: st.param.b64 [param0+24], %rd3;
|
||||
; CHECK-NEXT: st.param.b64 [param0+8], %rd2;
|
||||
; CHECK-NEXT: st.param.b64 [param0], %rd1;
|
||||
; CHECK-NEXT: call.uni (retval0), test_s_i8f64p, (param0);
|
||||
; CHECK-NEXT: ld.param.b64 %rd31, [retval0];
|
||||
; CHECK-NEXT: ld.param.b8 %rs2, [retval0+8];
|
||||
; CHECK-NEXT: ld.param.b8 %rs3, [retval0+9];
|
||||
; CHECK-NEXT: ld.param.b8 %rs4, [retval0+10];
|
||||
; CHECK-NEXT: ld.param.b8 %rs5, [retval0+11];
|
||||
; CHECK-NEXT: ld.param.b8 %rs6, [retval0+12];
|
||||
; CHECK-NEXT: ld.param.b8 %rs7, [retval0+13];
|
||||
; CHECK-NEXT: ld.param.b8 %rs8, [retval0+14];
|
||||
; CHECK-NEXT: ld.param.b8 %rs9, [retval0+15];
|
||||
; CHECK-NEXT: ld.param.b8 %rs10, [retval0+16];
|
||||
; CHECK-NEXT: ld.param.b64 %rd32, [retval0+24];
|
||||
; CHECK-NEXT: ld.param.b64 %rd5, [retval0+24];
|
||||
; CHECK-NEXT: ld.param.b8 %rs1, [retval0+8];
|
||||
; CHECK-NEXT: ld.param.b64 %rd6, [retval0];
|
||||
; CHECK-NEXT: ld.param.b8 %rd7, [retval0+16];
|
||||
; CHECK-NEXT: ld.param.b8 %rd8, [retval0+15];
|
||||
; CHECK-NEXT: ld.param.b8 %rd9, [retval0+14];
|
||||
; CHECK-NEXT: ld.param.b8 %rd10, [retval0+13];
|
||||
; CHECK-NEXT: ld.param.b8 %rd11, [retval0+12];
|
||||
; CHECK-NEXT: ld.param.b8 %rd12, [retval0+11];
|
||||
; CHECK-NEXT: ld.param.b8 %rd13, [retval0+10];
|
||||
; CHECK-NEXT: ld.param.b8 %rd14, [retval0+9];
|
||||
; CHECK-NEXT: } // callseq 6
|
||||
; CHECK-NEXT: cvt.u64.u16 %rd33, %rs3;
|
||||
; CHECK-NEXT: and.b64 %rd34, %rd33, 255;
|
||||
; CHECK-NEXT: cvt.u64.u16 %rd35, %rs4;
|
||||
; CHECK-NEXT: and.b64 %rd36, %rd35, 255;
|
||||
; CHECK-NEXT: shl.b64 %rd37, %rd36, 8;
|
||||
; CHECK-NEXT: or.b64 %rd38, %rd34, %rd37;
|
||||
; CHECK-NEXT: cvt.u64.u16 %rd39, %rs5;
|
||||
; CHECK-NEXT: and.b64 %rd40, %rd39, 255;
|
||||
; CHECK-NEXT: shl.b64 %rd41, %rd40, 16;
|
||||
; CHECK-NEXT: or.b64 %rd42, %rd38, %rd41;
|
||||
; CHECK-NEXT: cvt.u64.u16 %rd43, %rs6;
|
||||
; CHECK-NEXT: and.b64 %rd44, %rd43, 255;
|
||||
; CHECK-NEXT: shl.b64 %rd45, %rd44, 24;
|
||||
; CHECK-NEXT: or.b64 %rd46, %rd42, %rd45;
|
||||
; CHECK-NEXT: cvt.u64.u16 %rd47, %rs7;
|
||||
; CHECK-NEXT: and.b64 %rd48, %rd47, 255;
|
||||
; CHECK-NEXT: shl.b64 %rd49, %rd48, 32;
|
||||
; CHECK-NEXT: or.b64 %rd50, %rd46, %rd49;
|
||||
; CHECK-NEXT: cvt.u64.u16 %rd51, %rs8;
|
||||
; CHECK-NEXT: and.b64 %rd52, %rd51, 255;
|
||||
; CHECK-NEXT: shl.b64 %rd53, %rd52, 40;
|
||||
; CHECK-NEXT: or.b64 %rd54, %rd50, %rd53;
|
||||
; CHECK-NEXT: cvt.u64.u16 %rd55, %rs9;
|
||||
; CHECK-NEXT: and.b64 %rd56, %rd55, 255;
|
||||
; CHECK-NEXT: shl.b64 %rd57, %rd56, 48;
|
||||
; CHECK-NEXT: or.b64 %rd58, %rd54, %rd57;
|
||||
; CHECK-NEXT: cvt.u64.u16 %rd59, %rs10;
|
||||
; CHECK-NEXT: shl.b64 %rd60, %rd59, 56;
|
||||
; CHECK-NEXT: or.b64 %rd61, %rd58, %rd60;
|
||||
; CHECK-NEXT: st.param.b64 [func_retval0], %rd31;
|
||||
; CHECK-NEXT: st.param.b8 [func_retval0+8], %rs2;
|
||||
; CHECK-NEXT: shl.b64 %rd17, %rd13, 8;
|
||||
; CHECK-NEXT: or.b64 %rd18, %rd17, %rd14;
|
||||
; CHECK-NEXT: shl.b64 %rd20, %rd12, 16;
|
||||
; CHECK-NEXT: shl.b64 %rd22, %rd11, 24;
|
||||
; CHECK-NEXT: or.b64 %rd23, %rd22, %rd20;
|
||||
; CHECK-NEXT: or.b64 %rd24, %rd23, %rd18;
|
||||
; CHECK-NEXT: shl.b64 %rd27, %rd9, 8;
|
||||
; CHECK-NEXT: or.b64 %rd28, %rd27, %rd10;
|
||||
; CHECK-NEXT: shl.b64 %rd30, %rd8, 16;
|
||||
; CHECK-NEXT: shl.b64 %rd32, %rd7, 24;
|
||||
; CHECK-NEXT: or.b64 %rd33, %rd32, %rd30;
|
||||
; CHECK-NEXT: or.b64 %rd34, %rd33, %rd28;
|
||||
; CHECK-NEXT: shl.b64 %rd35, %rd34, 32;
|
||||
; CHECK-NEXT: or.b64 %rd36, %rd35, %rd24;
|
||||
; CHECK-NEXT: st.param.b8 [func_retval0+9], %rd14;
|
||||
; CHECK-NEXT: st.param.b64 [func_retval0+24], %rd5;
|
||||
; CHECK-NEXT: st.param.b8 [func_retval0+8], %rs1;
|
||||
; CHECK-NEXT: st.param.b64 [func_retval0], %rd6;
|
||||
; CHECK-NEXT: shr.u64 %rd39, %rd36, 56;
|
||||
; CHECK-NEXT: st.param.b8 [func_retval0+16], %rd39;
|
||||
; CHECK-NEXT: shr.u64 %rd40, %rd36, 48;
|
||||
; CHECK-NEXT: st.param.b8 [func_retval0+15], %rd40;
|
||||
; CHECK-NEXT: shr.u64 %rd41, %rd36, 40;
|
||||
; CHECK-NEXT: st.param.b8 [func_retval0+14], %rd41;
|
||||
; CHECK-NEXT: shr.u64 %rd42, %rd36, 32;
|
||||
; CHECK-NEXT: st.param.b8 [func_retval0+13], %rd42;
|
||||
; CHECK-NEXT: shr.u64 %rd43, %rd36, 24;
|
||||
; CHECK-NEXT: st.param.b8 [func_retval0+12], %rd43;
|
||||
; CHECK-NEXT: st.param.b8 [func_retval0+11], %rd39;
|
||||
; CHECK-NEXT: st.param.b8 [func_retval0+10], %rd35;
|
||||
; CHECK-NEXT: st.param.b8 [func_retval0+9], %rd33;
|
||||
; CHECK-NEXT: shr.u64 %rd64, %rd50, 32;
|
||||
; CHECK-NEXT: st.param.b8 [func_retval0+13], %rd64;
|
||||
; CHECK-NEXT: shr.u64 %rd65, %rd54, 40;
|
||||
; CHECK-NEXT: st.param.b8 [func_retval0+14], %rd65;
|
||||
; CHECK-NEXT: shr.u64 %rd66, %rd58, 48;
|
||||
; CHECK-NEXT: st.param.b8 [func_retval0+15], %rd66;
|
||||
; CHECK-NEXT: shr.u64 %rd67, %rd61, 56;
|
||||
; CHECK-NEXT: st.param.b8 [func_retval0+16], %rd67;
|
||||
; CHECK-NEXT: st.param.b64 [func_retval0+24], %rd32;
|
||||
; CHECK-NEXT: shr.u64 %rd44, %rd36, 16;
|
||||
; CHECK-NEXT: st.param.b8 [func_retval0+11], %rd44;
|
||||
; CHECK-NEXT: shr.u64 %rd45, %rd36, 8;
|
||||
; CHECK-NEXT: st.param.b8 [func_retval0+10], %rd45;
|
||||
; CHECK-NEXT: ret;
|
||||
%r = tail call %s_i8f64p @test_s_i8f64p(%s_i8f64p %a)
|
||||
ret %s_i8f64p %r
|
||||
|
@ -89,14 +89,14 @@ define i32 @test_foo(i32 %i, i64 %l, double %d, ptr %p) {
|
||||
; CHECK-NEXT: ld.param.b32 [[ARG_I32:%r[0-9]+]], [test_foo_param_0];
|
||||
|
||||
; Store arguments to an array
|
||||
; CHECK32: .param .align 8 .b8 param1[28];
|
||||
; CHECK64: .param .align 8 .b8 param1[32];
|
||||
; CHECK-NEXT: st.param.b32 [param1], [[ARG_I32]];
|
||||
; CHECK-NEXT: st.param.b64 [param1+8], [[ARG_I64]];
|
||||
; CHECK-NEXT: st.param.b64 [param1+16], [[ARG_DOUBLE]];
|
||||
; CHECK-NEXT: st.param.b[[BITS]] [param1+24], [[ARG_VOID_PTR]];
|
||||
; CHECK-NEXT: .param .b32 retval0;
|
||||
; CHECK-NEXT: prototype_1 : .callprototype (.param .b32 _) _ (.param .b32 _, .param .align 8 .b8 _[]
|
||||
; CHECK32: .param .align 8 .b8 param1[28];
|
||||
; CHECK64: .param .align 8 .b8 param1[32];
|
||||
; CHECK-DAG: .param .b32 retval0;
|
||||
; CHECK-DAG: st.param.b32 [param1], [[ARG_I32]];
|
||||
; CHECK-DAG: st.param.b64 [param1+8], [[ARG_I64]];
|
||||
; CHECK-DAG: st.param.b64 [param1+16], [[ARG_DOUBLE]];
|
||||
; CHECK-DAG: st.param.b[[BITS]] [param1+24], [[ARG_VOID_PTR]];
|
||||
; CHECK-DAG: prototype_1 : .callprototype (.param .b32 _) _ (.param .b32 _, .param .align 8 .b8 _[]
|
||||
|
||||
entry:
|
||||
%ptr = load ptr, ptr addrspacecast (ptr addrspace(1) @foo_ptr to ptr), align 8
|
||||
|
@ -115,13 +115,13 @@ define dso_local i32 @foo() {
|
||||
; CHECK-PTX-NEXT: st.b64 [%SP+16], 1;
|
||||
; CHECK-PTX-NEXT: st.b64 [%SP+24], 4607182418800017408;
|
||||
; CHECK-PTX-NEXT: st.b64 [%SP+32], 4607182418800017408;
|
||||
; CHECK-PTX-NEXT: add.u64 %rd1, %SP, 0;
|
||||
; CHECK-PTX-NEXT: { // callseq 0, 0
|
||||
; CHECK-PTX-NEXT: .param .b32 param0;
|
||||
; CHECK-PTX-NEXT: st.param.b32 [param0], 1;
|
||||
; CHECK-PTX-NEXT: .param .b64 param1;
|
||||
; CHECK-PTX-NEXT: st.param.b64 [param1], %rd1;
|
||||
; CHECK-PTX-NEXT: .param .b32 retval0;
|
||||
; CHECK-PTX-NEXT: add.u64 %rd1, %SP, 0;
|
||||
; CHECK-PTX-NEXT: st.param.b64 [param1], %rd1;
|
||||
; CHECK-PTX-NEXT: st.param.b32 [param0], 1;
|
||||
; CHECK-PTX-NEXT: call.uni (retval0), variadics1, (param0, param1);
|
||||
; CHECK-PTX-NEXT: ld.param.b32 %r1, [retval0];
|
||||
; CHECK-PTX-NEXT: } // callseq 0
|
||||
@ -218,13 +218,13 @@ define dso_local i32 @bar() {
|
||||
; CHECK-PTX-NEXT: st.b32 [%SP+8], 1;
|
||||
; CHECK-PTX-NEXT: st.b8 [%SP+12], 1;
|
||||
; CHECK-PTX-NEXT: st.b64 [%SP+16], 1;
|
||||
; CHECK-PTX-NEXT: add.u64 %rd3, %SP, 8;
|
||||
; CHECK-PTX-NEXT: { // callseq 1, 0
|
||||
; CHECK-PTX-NEXT: .param .b32 param0;
|
||||
; CHECK-PTX-NEXT: st.param.b32 [param0], 1;
|
||||
; CHECK-PTX-NEXT: .param .b64 param1;
|
||||
; CHECK-PTX-NEXT: st.param.b64 [param1], %rd3;
|
||||
; CHECK-PTX-NEXT: .param .b32 retval0;
|
||||
; CHECK-PTX-NEXT: add.u64 %rd3, %SP, 8;
|
||||
; CHECK-PTX-NEXT: st.param.b64 [param1], %rd3;
|
||||
; CHECK-PTX-NEXT: st.param.b32 [param0], 1;
|
||||
; CHECK-PTX-NEXT: call.uni (retval0), variadics2, (param0, param1);
|
||||
; CHECK-PTX-NEXT: ld.param.b32 %r1, [retval0];
|
||||
; CHECK-PTX-NEXT: } // callseq 1
|
||||
@ -289,13 +289,13 @@ define dso_local i32 @baz() {
|
||||
; CHECK-PTX-NEXT: mov.b64 %SPL, __local_depot5;
|
||||
; CHECK-PTX-NEXT: cvta.local.u64 %SP, %SPL;
|
||||
; CHECK-PTX-NEXT: st.v4.b32 [%SP], {1, 1, 1, 1};
|
||||
; CHECK-PTX-NEXT: add.u64 %rd1, %SP, 0;
|
||||
; CHECK-PTX-NEXT: { // callseq 2, 0
|
||||
; CHECK-PTX-NEXT: .param .b32 param0;
|
||||
; CHECK-PTX-NEXT: st.param.b32 [param0], 1;
|
||||
; CHECK-PTX-NEXT: .param .b64 param1;
|
||||
; CHECK-PTX-NEXT: st.param.b64 [param1], %rd1;
|
||||
; CHECK-PTX-NEXT: .param .b32 retval0;
|
||||
; CHECK-PTX-NEXT: add.u64 %rd1, %SP, 0;
|
||||
; CHECK-PTX-NEXT: st.param.b64 [param1], %rd1;
|
||||
; CHECK-PTX-NEXT: st.param.b32 [param0], 1;
|
||||
; CHECK-PTX-NEXT: call.uni (retval0), variadics3, (param0, param1);
|
||||
; CHECK-PTX-NEXT: ld.param.b32 %r1, [retval0];
|
||||
; CHECK-PTX-NEXT: } // callseq 2
|
||||
@ -348,7 +348,6 @@ define dso_local void @qux() {
|
||||
; CHECK-PTX-NEXT: .local .align 8 .b8 __local_depot7[24];
|
||||
; CHECK-PTX-NEXT: .reg .b64 %SP;
|
||||
; CHECK-PTX-NEXT: .reg .b64 %SPL;
|
||||
; CHECK-PTX-NEXT: .reg .b32 %r<2>;
|
||||
; CHECK-PTX-NEXT: .reg .b64 %rd<8>;
|
||||
; CHECK-PTX-EMPTY:
|
||||
; CHECK-PTX-NEXT: // %bb.0: // %entry
|
||||
@ -360,18 +359,17 @@ define dso_local void @qux() {
|
||||
; CHECK-PTX-NEXT: ld.global.nc.b64 %rd4, [__const_$_qux_$_s];
|
||||
; CHECK-PTX-NEXT: st.local.b64 [%rd2], %rd4;
|
||||
; CHECK-PTX-NEXT: st.b64 [%SP+16], 1;
|
||||
; CHECK-PTX-NEXT: ld.local.b64 %rd5, [%rd2];
|
||||
; CHECK-PTX-NEXT: ld.local.b64 %rd6, [%rd2+8];
|
||||
; CHECK-PTX-NEXT: add.u64 %rd7, %SP, 16;
|
||||
; CHECK-PTX-NEXT: { // callseq 3, 0
|
||||
; CHECK-PTX-NEXT: .param .align 8 .b8 param0[16];
|
||||
; CHECK-PTX-NEXT: st.param.b64 [param0], %rd5;
|
||||
; CHECK-PTX-NEXT: st.param.b64 [param0+8], %rd6;
|
||||
; CHECK-PTX-NEXT: .param .b64 param1;
|
||||
; CHECK-PTX-NEXT: st.param.b64 [param1], %rd7;
|
||||
; CHECK-PTX-NEXT: .param .b32 retval0;
|
||||
; CHECK-PTX-NEXT: add.u64 %rd5, %SP, 16;
|
||||
; CHECK-PTX-NEXT: st.param.b64 [param1], %rd5;
|
||||
; CHECK-PTX-NEXT: ld.local.b64 %rd6, [%rd2+8];
|
||||
; CHECK-PTX-NEXT: st.param.b64 [param0+8], %rd6;
|
||||
; CHECK-PTX-NEXT: ld.local.b64 %rd7, [%rd2];
|
||||
; CHECK-PTX-NEXT: st.param.b64 [param0], %rd7;
|
||||
; CHECK-PTX-NEXT: call.uni (retval0), variadics4, (param0, param1);
|
||||
; CHECK-PTX-NEXT: ld.param.b32 %r1, [retval0];
|
||||
; CHECK-PTX-NEXT: } // callseq 3
|
||||
; CHECK-PTX-NEXT: ret;
|
||||
entry:
|
||||
|
@ -6,16 +6,12 @@
|
||||
; CHECK: .visible .func use_dbg_declare()
|
||||
; CHECK: .local .align 8 .b8 __local_depot0[8];
|
||||
; CHECK: mov.b64 %SPL, __local_depot0;
|
||||
; CHECK: add.u64 %rd1, %SP, 0;
|
||||
; CHECK: .loc 1 5 3 // t.c:5:3
|
||||
; CHECK: { // callseq 0, 0
|
||||
; CHECK: .param .b64 param0;
|
||||
; CHECK: add.u64 %rd1, %SP, 0;
|
||||
; CHECK: st.param.b64 [param0], %rd1;
|
||||
; CHECK: call.uni
|
||||
; CHECK: escape_foo,
|
||||
; CHECK: (
|
||||
; CHECK: param0
|
||||
; CHECK: );
|
||||
; CHECK: call.uni escape_foo, (param0);
|
||||
; CHECK: } // callseq 0
|
||||
; CHECK: .loc 1 6 1 // t.c:6:1
|
||||
; CHECK: ret;
|
||||
|
@ -10,15 +10,15 @@ define dso_local void @caller_St8x4(ptr nocapture noundef readonly byval(%struct
|
||||
; CHECK-NEXT: .reg .b64 %rd<13>;
|
||||
; CHECK-EMPTY:
|
||||
; CHECK-NEXT: // %bb.0:
|
||||
; CHECK-NEXT: ld.param.b64 %rd1, [caller_St8x4_param_0+8];
|
||||
; CHECK-NEXT: ld.param.b64 %rd2, [caller_St8x4_param_0];
|
||||
; CHECK-NEXT: ld.param.b64 %rd3, [caller_St8x4_param_0+24];
|
||||
; CHECK-NEXT: ld.param.b64 %rd4, [caller_St8x4_param_0+16];
|
||||
; CHECK-NEXT: { // callseq 0, 0
|
||||
; CHECK-NEXT: .param .align 16 .b8 param0[32];
|
||||
; CHECK-NEXT: st.param.v2.b64 [param0], {%rd2, %rd1};
|
||||
; CHECK-NEXT: st.param.v2.b64 [param0+16], {%rd4, %rd3};
|
||||
; CHECK-NEXT: .param .align 16 .b8 retval0[32];
|
||||
; CHECK-NEXT: ld.param.b64 %rd1, [caller_St8x4_param_0+24];
|
||||
; CHECK-NEXT: ld.param.b64 %rd2, [caller_St8x4_param_0+16];
|
||||
; CHECK-NEXT: st.param.v2.b64 [param0+16], {%rd2, %rd1};
|
||||
; CHECK-NEXT: ld.param.b64 %rd3, [caller_St8x4_param_0+8];
|
||||
; CHECK-NEXT: ld.param.b64 %rd4, [caller_St8x4_param_0];
|
||||
; CHECK-NEXT: st.param.v2.b64 [param0], {%rd4, %rd3};
|
||||
; CHECK-NEXT: call.uni (retval0), callee_St8x4, (param0);
|
||||
; CHECK-NEXT: ld.param.v2.b64 {%rd5, %rd6}, [retval0];
|
||||
; CHECK-NEXT: ld.param.v2.b64 {%rd7, %rd8}, [retval0+16];
|
||||
|
Loading…
x
Reference in New Issue
Block a user