[NVPTX] Legalize aext-load to zext-load to expose more DAG combines (#154251)
This commit is contained in:
parent
304373fb6d
commit
a3ed96b899
@ -15132,7 +15132,7 @@ SDValue DAGCombiner::visitANY_EXTEND(SDNode *N) {
|
||||
return foldedExt;
|
||||
} else if (ISD::isNON_EXTLoad(N0.getNode()) &&
|
||||
ISD::isUNINDEXEDLoad(N0.getNode()) &&
|
||||
TLI.isLoadExtLegal(ISD::EXTLOAD, VT, N0.getValueType())) {
|
||||
TLI.isLoadExtLegalOrCustom(ISD::EXTLOAD, VT, N0.getValueType())) {
|
||||
bool DoXform = true;
|
||||
SmallVector<SDNode *, 4> SetCCs;
|
||||
if (!N0.hasOneUse())
|
||||
|
@ -702,57 +702,56 @@ NVPTXTargetLowering::NVPTXTargetLowering(const NVPTXTargetMachine &TM,
|
||||
// intrinsics.
|
||||
setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::Other, Custom);
|
||||
|
||||
// Turn FP extload into load/fpextend
|
||||
setLoadExtAction(ISD::EXTLOAD, MVT::f32, MVT::f16, Expand);
|
||||
setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f16, Expand);
|
||||
setLoadExtAction(ISD::EXTLOAD, MVT::f32, MVT::bf16, Expand);
|
||||
setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::bf16, Expand);
|
||||
setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f32, Expand);
|
||||
setLoadExtAction(ISD::EXTLOAD, MVT::v2f32, MVT::v2f16, Expand);
|
||||
setLoadExtAction(ISD::EXTLOAD, MVT::v2f64, MVT::v2f16, Expand);
|
||||
setLoadExtAction(ISD::EXTLOAD, MVT::v2f32, MVT::v2bf16, Expand);
|
||||
setLoadExtAction(ISD::EXTLOAD, MVT::v2f64, MVT::v2bf16, Expand);
|
||||
setLoadExtAction(ISD::EXTLOAD, MVT::v2f64, MVT::v2f32, Expand);
|
||||
setLoadExtAction(ISD::EXTLOAD, MVT::v4f32, MVT::v4f16, Expand);
|
||||
setLoadExtAction(ISD::EXTLOAD, MVT::v4f64, MVT::v4f16, Expand);
|
||||
setLoadExtAction(ISD::EXTLOAD, MVT::v4f32, MVT::v4bf16, Expand);
|
||||
setLoadExtAction(ISD::EXTLOAD, MVT::v4f64, MVT::v4bf16, Expand);
|
||||
setLoadExtAction(ISD::EXTLOAD, MVT::v4f64, MVT::v4f32, Expand);
|
||||
setLoadExtAction(ISD::EXTLOAD, MVT::v8f32, MVT::v8f16, Expand);
|
||||
setLoadExtAction(ISD::EXTLOAD, MVT::v8f64, MVT::v8f16, Expand);
|
||||
setLoadExtAction(ISD::EXTLOAD, MVT::v8f32, MVT::v8bf16, Expand);
|
||||
setLoadExtAction(ISD::EXTLOAD, MVT::v8f64, MVT::v8bf16, Expand);
|
||||
// Turn FP truncstore into trunc + store.
|
||||
// FIXME: vector types should also be expanded
|
||||
setTruncStoreAction(MVT::f32, MVT::f16, Expand);
|
||||
setTruncStoreAction(MVT::f64, MVT::f16, Expand);
|
||||
setTruncStoreAction(MVT::f32, MVT::bf16, Expand);
|
||||
setTruncStoreAction(MVT::f64, MVT::bf16, Expand);
|
||||
setTruncStoreAction(MVT::f64, MVT::f32, Expand);
|
||||
setTruncStoreAction(MVT::v2f32, MVT::v2f16, Expand);
|
||||
setTruncStoreAction(MVT::v2f32, MVT::v2bf16, Expand);
|
||||
// FP extload/truncstore is not legal in PTX. We need to expand all these.
|
||||
for (auto FloatVTs :
|
||||
{MVT::fp_valuetypes(), MVT::fp_fixedlen_vector_valuetypes()}) {
|
||||
for (MVT ValVT : FloatVTs) {
|
||||
for (MVT MemVT : FloatVTs) {
|
||||
setLoadExtAction(ISD::EXTLOAD, ValVT, MemVT, Expand);
|
||||
setTruncStoreAction(ValVT, MemVT, Expand);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// To improve CodeGen we'll legalize any-extend loads to zext loads. This is
|
||||
// how they'll be lowered in ISel anyway, and by doing this a little earlier
|
||||
// we allow for more DAG combine opportunities.
|
||||
for (auto IntVTs :
|
||||
{MVT::integer_valuetypes(), MVT::integer_fixedlen_vector_valuetypes()})
|
||||
for (MVT ValVT : IntVTs)
|
||||
for (MVT MemVT : IntVTs)
|
||||
if (isTypeLegal(ValVT))
|
||||
setLoadExtAction(ISD::EXTLOAD, ValVT, MemVT, Custom);
|
||||
|
||||
// PTX does not support load / store predicate registers
|
||||
setOperationAction(ISD::LOAD, MVT::i1, Custom);
|
||||
setOperationAction(ISD::STORE, MVT::i1, Custom);
|
||||
|
||||
setOperationAction({ISD::LOAD, ISD::STORE}, MVT::i1, Custom);
|
||||
for (MVT VT : MVT::integer_valuetypes()) {
|
||||
setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i1, Promote);
|
||||
setLoadExtAction(ISD::ZEXTLOAD, VT, MVT::i1, Promote);
|
||||
setLoadExtAction(ISD::EXTLOAD, VT, MVT::i1, Promote);
|
||||
setLoadExtAction({ISD::SEXTLOAD, ISD::ZEXTLOAD, ISD::EXTLOAD}, VT, MVT::i1,
|
||||
Promote);
|
||||
setTruncStoreAction(VT, MVT::i1, Expand);
|
||||
}
|
||||
|
||||
// Register custom handling for illegal type loads/stores. We'll try to custom
|
||||
// lower almost all illegal types and logic in the lowering will discard cases
|
||||
// we can't handle.
|
||||
setOperationAction({ISD::LOAD, ISD::STORE}, {MVT::i128, MVT::f128}, Custom);
|
||||
for (MVT VT : MVT::fixedlen_vector_valuetypes())
|
||||
if (!isTypeLegal(VT) && VT.getStoreSizeInBits() <= 256)
|
||||
setOperationAction({ISD::STORE, ISD::LOAD}, VT, Custom);
|
||||
|
||||
// Custom legalization for LDU intrinsics.
|
||||
// TODO: The logic to lower these is not very robust and we should rewrite it.
|
||||
// Perhaps LDU should not be represented as an intrinsic at all.
|
||||
setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::i8, Custom);
|
||||
for (MVT VT : MVT::fixedlen_vector_valuetypes())
|
||||
if (IsPTXVectorType(VT))
|
||||
setOperationAction(ISD::INTRINSIC_W_CHAIN, VT, Custom);
|
||||
|
||||
setCondCodeAction({ISD::SETNE, ISD::SETEQ, ISD::SETUGE, ISD::SETULE,
|
||||
ISD::SETUGT, ISD::SETULT, ISD::SETGT, ISD::SETLT,
|
||||
ISD::SETGE, ISD::SETLE},
|
||||
MVT::i1, Expand);
|
||||
|
||||
// expand extload of vector of integers.
|
||||
setLoadExtAction({ISD::EXTLOAD, ISD::SEXTLOAD, ISD::ZEXTLOAD}, MVT::v2i16,
|
||||
MVT::v2i8, Expand);
|
||||
setTruncStoreAction(MVT::v2i16, MVT::v2i8, Expand);
|
||||
|
||||
// This is legal in NVPTX
|
||||
setOperationAction(ISD::ConstantFP, MVT::f64, Legal);
|
||||
setOperationAction(ISD::ConstantFP, MVT::f32, Legal);
|
||||
@ -767,24 +766,12 @@ NVPTXTargetLowering::NVPTXTargetLowering(const NVPTXTargetMachine &TM,
|
||||
// DEBUGTRAP can be lowered to PTX brkpt
|
||||
setOperationAction(ISD::DEBUGTRAP, MVT::Other, Legal);
|
||||
|
||||
// Register custom handling for vector loads/stores
|
||||
for (MVT VT : MVT::fixedlen_vector_valuetypes())
|
||||
if (IsPTXVectorType(VT))
|
||||
setOperationAction({ISD::LOAD, ISD::STORE, ISD::INTRINSIC_W_CHAIN}, VT,
|
||||
Custom);
|
||||
|
||||
setOperationAction({ISD::LOAD, ISD::STORE, ISD::INTRINSIC_W_CHAIN},
|
||||
{MVT::i128, MVT::f128}, Custom);
|
||||
|
||||
// Support varargs.
|
||||
setOperationAction(ISD::VASTART, MVT::Other, Custom);
|
||||
setOperationAction(ISD::VAARG, MVT::Other, Custom);
|
||||
setOperationAction(ISD::VACOPY, MVT::Other, Expand);
|
||||
setOperationAction(ISD::VAEND, MVT::Other, Expand);
|
||||
|
||||
// Custom handling for i8 intrinsics
|
||||
setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::i8, Custom);
|
||||
|
||||
setOperationAction({ISD::ABS, ISD::SMIN, ISD::SMAX, ISD::UMIN, ISD::UMAX},
|
||||
{MVT::i16, MVT::i32, MVT::i64}, Legal);
|
||||
|
||||
@ -3092,39 +3079,14 @@ static void replaceLoadVector(SDNode *N, SelectionDAG &DAG,
|
||||
SmallVectorImpl<SDValue> &Results,
|
||||
const NVPTXSubtarget &STI);
|
||||
|
||||
SDValue NVPTXTargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const {
|
||||
if (Op.getValueType() == MVT::i1)
|
||||
return LowerLOADi1(Op, DAG);
|
||||
|
||||
EVT VT = Op.getValueType();
|
||||
|
||||
if (NVPTX::isPackedVectorTy(VT)) {
|
||||
// v2f32/v2f16/v2bf16/v2i16/v4i8 are legal, so we can't rely on legalizer to
|
||||
// handle unaligned loads and have to handle it here.
|
||||
LoadSDNode *Load = cast<LoadSDNode>(Op);
|
||||
EVT MemVT = Load->getMemoryVT();
|
||||
if (!allowsMemoryAccessForAlignment(*DAG.getContext(), DAG.getDataLayout(),
|
||||
MemVT, *Load->getMemOperand())) {
|
||||
SDValue Ops[2];
|
||||
std::tie(Ops[0], Ops[1]) = expandUnalignedLoad(Load, DAG);
|
||||
return DAG.getMergeValues(Ops, SDLoc(Op));
|
||||
}
|
||||
}
|
||||
|
||||
return SDValue();
|
||||
}
|
||||
|
||||
// v = ld i1* addr
|
||||
// =>
|
||||
// v1 = ld i8* addr (-> i16)
|
||||
// v = trunc i16 to i1
|
||||
SDValue NVPTXTargetLowering::LowerLOADi1(SDValue Op, SelectionDAG &DAG) const {
|
||||
SDNode *Node = Op.getNode();
|
||||
LoadSDNode *LD = cast<LoadSDNode>(Node);
|
||||
SDLoc dl(Node);
|
||||
static SDValue lowerLOADi1(LoadSDNode *LD, SelectionDAG &DAG) {
|
||||
SDLoc dl(LD);
|
||||
assert(LD->getExtensionType() == ISD::NON_EXTLOAD);
|
||||
assert(Node->getValueType(0) == MVT::i1 &&
|
||||
"Custom lowering for i1 load only");
|
||||
assert(LD->getValueType(0) == MVT::i1 && "Custom lowering for i1 load only");
|
||||
SDValue newLD = DAG.getExtLoad(ISD::ZEXTLOAD, dl, MVT::i16, LD->getChain(),
|
||||
LD->getBasePtr(), LD->getPointerInfo(),
|
||||
MVT::i8, LD->getAlign(),
|
||||
@ -3133,8 +3095,27 @@ SDValue NVPTXTargetLowering::LowerLOADi1(SDValue Op, SelectionDAG &DAG) const {
|
||||
// The legalizer (the caller) is expecting two values from the legalized
|
||||
// load, so we build a MergeValues node for it. See ExpandUnalignedLoad()
|
||||
// in LegalizeDAG.cpp which also uses MergeValues.
|
||||
SDValue Ops[] = { result, LD->getChain() };
|
||||
return DAG.getMergeValues(Ops, dl);
|
||||
return DAG.getMergeValues({result, LD->getChain()}, dl);
|
||||
}
|
||||
|
||||
SDValue NVPTXTargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const {
|
||||
LoadSDNode *LD = cast<LoadSDNode>(Op);
|
||||
|
||||
if (Op.getValueType() == MVT::i1)
|
||||
return lowerLOADi1(LD, DAG);
|
||||
|
||||
// To improve CodeGen we'll legalize any-extend loads to zext loads. This is
|
||||
// how they'll be lowered in ISel anyway, and by doing this a little earlier
|
||||
// we allow for more DAG combine opportunities.
|
||||
if (LD->getExtensionType() == ISD::EXTLOAD) {
|
||||
assert(LD->getValueType(0).isInteger() && LD->getMemoryVT().isInteger() &&
|
||||
"Unexpected fpext-load");
|
||||
return DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(Op), Op.getValueType(),
|
||||
LD->getChain(), LD->getBasePtr(), LD->getMemoryVT(),
|
||||
LD->getMemOperand());
|
||||
}
|
||||
|
||||
llvm_unreachable("Unexpected custom lowering for load");
|
||||
}
|
||||
|
||||
SDValue NVPTXTargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const {
|
||||
@ -3144,17 +3125,6 @@ SDValue NVPTXTargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const {
|
||||
if (VT == MVT::i1)
|
||||
return LowerSTOREi1(Op, DAG);
|
||||
|
||||
// v2f32/v2f16/v2bf16/v2i16/v4i8 are legal, so we can't rely on legalizer to
|
||||
// handle unaligned stores and have to handle it here.
|
||||
if (NVPTX::isPackedVectorTy(VT) &&
|
||||
!allowsMemoryAccessForAlignment(*DAG.getContext(), DAG.getDataLayout(),
|
||||
VT, *Store->getMemOperand()))
|
||||
return expandUnalignedStore(Store, DAG);
|
||||
|
||||
// v2f16/v2bf16/v2i16 don't need special handling.
|
||||
if (NVPTX::isPackedVectorTy(VT) && VT.is32BitVector())
|
||||
return SDValue();
|
||||
|
||||
// Lower store of any other vector type, including v2f32 as we want to break
|
||||
// it apart since this is not a widely-supported type.
|
||||
return LowerSTOREVector(Op, DAG);
|
||||
@ -4010,14 +3980,8 @@ bool NVPTXTargetLowering::getTgtMemIntrinsic(
|
||||
case Intrinsic::nvvm_ldu_global_i:
|
||||
case Intrinsic::nvvm_ldu_global_f:
|
||||
case Intrinsic::nvvm_ldu_global_p: {
|
||||
auto &DL = I.getDataLayout();
|
||||
Info.opc = ISD::INTRINSIC_W_CHAIN;
|
||||
if (Intrinsic == Intrinsic::nvvm_ldu_global_i)
|
||||
Info.memVT = getValueType(DL, I.getType());
|
||||
else if(Intrinsic == Intrinsic::nvvm_ldu_global_p)
|
||||
Info.memVT = getPointerTy(DL);
|
||||
else
|
||||
Info.memVT = getValueType(DL, I.getType());
|
||||
Info.memVT = getValueType(I.getDataLayout(), I.getType());
|
||||
Info.ptrVal = I.getArgOperand(0);
|
||||
Info.offset = 0;
|
||||
Info.flags = MachineMemOperand::MOLoad;
|
||||
|
@ -309,8 +309,6 @@ private:
|
||||
SDValue LowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) const;
|
||||
|
||||
SDValue LowerLOAD(SDValue Op, SelectionDAG &DAG) const;
|
||||
SDValue LowerLOADi1(SDValue Op, SelectionDAG &DAG) const;
|
||||
|
||||
SDValue LowerSTORE(SDValue Op, SelectionDAG &DAG) const;
|
||||
SDValue LowerSTOREi1(SDValue Op, SelectionDAG &DAG) const;
|
||||
SDValue LowerSTOREVector(SDValue Op, SelectionDAG &DAG) const;
|
||||
|
@ -19,9 +19,7 @@ define internal void @test() unnamed_addr nounwind {
|
||||
; CHECK-NEXT: ld $6, 24($sp)
|
||||
; CHECK-NEXT: ld $5, 16($sp)
|
||||
; CHECK-NEXT: ld $7, 32($sp)
|
||||
; CHECK-NEXT: lw $1, 0($sp)
|
||||
; CHECK-NEXT: # implicit-def: $a0_64
|
||||
; CHECK-NEXT: move $4, $1
|
||||
; CHECK-NEXT: lw $4, 0($sp)
|
||||
; CHECK-NEXT: jal use_sret
|
||||
; CHECK-NEXT: nop
|
||||
; CHECK-NEXT: ld $ra, 56($sp) # 8-byte Folded Reload
|
||||
@ -64,15 +62,9 @@ define internal void @test2() unnamed_addr nounwind {
|
||||
; CHECK-NEXT: daddiu $4, $sp, 0
|
||||
; CHECK-NEXT: jal implicit_sret_decl2
|
||||
; CHECK-NEXT: nop
|
||||
; CHECK-NEXT: lw $1, 20($sp)
|
||||
; CHECK-NEXT: lw $2, 12($sp)
|
||||
; CHECK-NEXT: lw $3, 4($sp)
|
||||
; CHECK-NEXT: # implicit-def: $a0_64
|
||||
; CHECK-NEXT: move $4, $3
|
||||
; CHECK-NEXT: # implicit-def: $a1_64
|
||||
; CHECK-NEXT: move $5, $2
|
||||
; CHECK-NEXT: # implicit-def: $a2_64
|
||||
; CHECK-NEXT: move $6, $1
|
||||
; CHECK-NEXT: lw $6, 20($sp)
|
||||
; CHECK-NEXT: lw $5, 12($sp)
|
||||
; CHECK-NEXT: lw $4, 4($sp)
|
||||
; CHECK-NEXT: jal use_sret2
|
||||
; CHECK-NEXT: nop
|
||||
; CHECK-NEXT: ld $ra, 24($sp) # 8-byte Folded Reload
|
||||
|
@ -1904,7 +1904,7 @@ define void @insert_v16i8_vidx(i32 signext %a) nounwind {
|
||||
; N64-NEXT: daddu $1, $1, $25
|
||||
; N64-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(insert_v16i8_vidx)))
|
||||
; N64-NEXT: ld $2, %got_disp(i32)($1)
|
||||
; N64-NEXT: lw $2, 0($2)
|
||||
; N64-NEXT: lwu $2, 0($2)
|
||||
; N64-NEXT: andi $2, $2, 15
|
||||
; N64-NEXT: ld $1, %got_disp(v16i8)($1)
|
||||
; N64-NEXT: daddu $1, $1, $2
|
||||
@ -1953,7 +1953,7 @@ define void @insert_v8i16_vidx(i32 signext %a) nounwind {
|
||||
; N64-NEXT: daddu $1, $1, $25
|
||||
; N64-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(insert_v8i16_vidx)))
|
||||
; N64-NEXT: ld $2, %got_disp(i32)($1)
|
||||
; N64-NEXT: lw $2, 0($2)
|
||||
; N64-NEXT: lwu $2, 0($2)
|
||||
; N64-NEXT: andi $2, $2, 7
|
||||
; N64-NEXT: ld $1, %got_disp(v8i16)($1)
|
||||
; N64-NEXT: dlsa $1, $2, $1, 1
|
||||
@ -2002,7 +2002,7 @@ define void @insert_v4i32_vidx(i32 signext %a) nounwind {
|
||||
; N64-NEXT: daddu $1, $1, $25
|
||||
; N64-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(insert_v4i32_vidx)))
|
||||
; N64-NEXT: ld $2, %got_disp(i32)($1)
|
||||
; N64-NEXT: lw $2, 0($2)
|
||||
; N64-NEXT: lwu $2, 0($2)
|
||||
; N64-NEXT: andi $2, $2, 3
|
||||
; N64-NEXT: ld $1, %got_disp(v4i32)($1)
|
||||
; N64-NEXT: dlsa $1, $2, $1, 2
|
||||
@ -2053,7 +2053,7 @@ define void @insert_v2i64_vidx(i64 signext %a) nounwind {
|
||||
; N64-NEXT: daddu $1, $1, $25
|
||||
; N64-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(insert_v2i64_vidx)))
|
||||
; N64-NEXT: ld $2, %got_disp(i32)($1)
|
||||
; N64-NEXT: lw $2, 0($2)
|
||||
; N64-NEXT: lwu $2, 0($2)
|
||||
; N64-NEXT: andi $2, $2, 1
|
||||
; N64-NEXT: ld $1, %got_disp(v2i64)($1)
|
||||
; N64-NEXT: dlsa $1, $2, $1, 3
|
||||
|
@ -711,11 +711,11 @@ define <2 x bfloat> @test_copysign(<2 x bfloat> %a, <2 x bfloat> %b) #0 {
|
||||
; CHECK-NEXT: .reg .b32 %r<6>;
|
||||
; CHECK-EMPTY:
|
||||
; CHECK-NEXT: // %bb.0:
|
||||
; CHECK-NEXT: ld.param.b32 %r1, [test_copysign_param_0];
|
||||
; CHECK-NEXT: ld.param.b32 %r2, [test_copysign_param_1];
|
||||
; CHECK-NEXT: and.b32 %r3, %r2, -2147450880;
|
||||
; CHECK-NEXT: and.b32 %r4, %r1, 2147450879;
|
||||
; CHECK-NEXT: or.b32 %r5, %r4, %r3;
|
||||
; CHECK-NEXT: ld.param.b32 %r1, [test_copysign_param_1];
|
||||
; CHECK-NEXT: and.b32 %r2, %r1, -2147450880;
|
||||
; CHECK-NEXT: ld.param.b32 %r3, [test_copysign_param_0];
|
||||
; CHECK-NEXT: and.b32 %r4, %r3, 2147450879;
|
||||
; CHECK-NEXT: or.b32 %r5, %r4, %r2;
|
||||
; CHECK-NEXT: st.param.b32 [func_retval0], %r5;
|
||||
; CHECK-NEXT: ret;
|
||||
%r = call <2 x bfloat> @llvm.copysign.f16(<2 x bfloat> %a, <2 x bfloat> %b)
|
||||
|
@ -7,7 +7,7 @@ define i8 @monotonic_monotonic_i8_global_cta(ptr addrspace(1) %addr, i8 %cmp, i8
|
||||
; SM60: {
|
||||
; SM60-NEXT: .reg .pred %p<3>;
|
||||
; SM60-NEXT: .reg .b16 %rs<2>;
|
||||
; SM60-NEXT: .reg .b32 %r<18>;
|
||||
; SM60-NEXT: .reg .b32 %r<17>;
|
||||
; SM60-NEXT: .reg .b64 %rd<3>;
|
||||
; SM60-EMPTY:
|
||||
; SM60-NEXT: // %bb.0:
|
||||
@ -22,23 +22,22 @@ define i8 @monotonic_monotonic_i8_global_cta(ptr addrspace(1) %addr, i8 %cmp, i8
|
||||
; SM60-NEXT: shl.b32 %r11, %r10, %r1;
|
||||
; SM60-NEXT: not.b32 %r2, %r11;
|
||||
; SM60-NEXT: cvt.u32.u16 %r12, %rs1;
|
||||
; SM60-NEXT: and.b32 %r13, %r12, 255;
|
||||
; SM60-NEXT: shl.b32 %r3, %r13, %r1;
|
||||
; SM60-NEXT: shl.b32 %r3, %r12, %r1;
|
||||
; SM60-NEXT: shl.b32 %r4, %r7, %r1;
|
||||
; SM60-NEXT: ld.global.b32 %r14, [%rd1];
|
||||
; SM60-NEXT: and.b32 %r17, %r14, %r2;
|
||||
; SM60-NEXT: ld.global.b32 %r13, [%rd1];
|
||||
; SM60-NEXT: and.b32 %r16, %r13, %r2;
|
||||
; SM60-NEXT: $L__BB0_1: // %partword.cmpxchg.loop
|
||||
; SM60-NEXT: // =>This Inner Loop Header: Depth=1
|
||||
; SM60-NEXT: or.b32 %r15, %r17, %r3;
|
||||
; SM60-NEXT: or.b32 %r16, %r17, %r4;
|
||||
; SM60-NEXT: atom.cta.global.cas.b32 %r5, [%rd1], %r16, %r15;
|
||||
; SM60-NEXT: setp.eq.b32 %p1, %r5, %r16;
|
||||
; SM60-NEXT: or.b32 %r14, %r16, %r3;
|
||||
; SM60-NEXT: or.b32 %r15, %r16, %r4;
|
||||
; SM60-NEXT: atom.cta.global.cas.b32 %r5, [%rd1], %r15, %r14;
|
||||
; SM60-NEXT: setp.eq.b32 %p1, %r5, %r15;
|
||||
; SM60-NEXT: @%p1 bra $L__BB0_3;
|
||||
; SM60-NEXT: // %bb.2: // %partword.cmpxchg.failure
|
||||
; SM60-NEXT: // in Loop: Header=BB0_1 Depth=1
|
||||
; SM60-NEXT: and.b32 %r6, %r5, %r2;
|
||||
; SM60-NEXT: setp.ne.b32 %p2, %r17, %r6;
|
||||
; SM60-NEXT: mov.b32 %r17, %r6;
|
||||
; SM60-NEXT: setp.ne.b32 %p2, %r16, %r6;
|
||||
; SM60-NEXT: mov.b32 %r16, %r6;
|
||||
; SM60-NEXT: @%p2 bra $L__BB0_1;
|
||||
; SM60-NEXT: $L__BB0_3: // %partword.cmpxchg.end
|
||||
; SM60-NEXT: st.param.b32 [func_retval0], %r12;
|
||||
@ -52,7 +51,7 @@ define i8 @monotonic_acquire_i8_global_cta(ptr addrspace(1) %addr, i8 %cmp, i8 %
|
||||
; SM60: {
|
||||
; SM60-NEXT: .reg .pred %p<3>;
|
||||
; SM60-NEXT: .reg .b16 %rs<2>;
|
||||
; SM60-NEXT: .reg .b32 %r<18>;
|
||||
; SM60-NEXT: .reg .b32 %r<17>;
|
||||
; SM60-NEXT: .reg .b64 %rd<3>;
|
||||
; SM60-EMPTY:
|
||||
; SM60-NEXT: // %bb.0:
|
||||
@ -67,23 +66,22 @@ define i8 @monotonic_acquire_i8_global_cta(ptr addrspace(1) %addr, i8 %cmp, i8 %
|
||||
; SM60-NEXT: shl.b32 %r11, %r10, %r1;
|
||||
; SM60-NEXT: not.b32 %r2, %r11;
|
||||
; SM60-NEXT: cvt.u32.u16 %r12, %rs1;
|
||||
; SM60-NEXT: and.b32 %r13, %r12, 255;
|
||||
; SM60-NEXT: shl.b32 %r3, %r13, %r1;
|
||||
; SM60-NEXT: shl.b32 %r3, %r12, %r1;
|
||||
; SM60-NEXT: shl.b32 %r4, %r7, %r1;
|
||||
; SM60-NEXT: ld.global.b32 %r14, [%rd1];
|
||||
; SM60-NEXT: and.b32 %r17, %r14, %r2;
|
||||
; SM60-NEXT: ld.global.b32 %r13, [%rd1];
|
||||
; SM60-NEXT: and.b32 %r16, %r13, %r2;
|
||||
; SM60-NEXT: $L__BB1_1: // %partword.cmpxchg.loop
|
||||
; SM60-NEXT: // =>This Inner Loop Header: Depth=1
|
||||
; SM60-NEXT: or.b32 %r15, %r17, %r3;
|
||||
; SM60-NEXT: or.b32 %r16, %r17, %r4;
|
||||
; SM60-NEXT: atom.cta.global.cas.b32 %r5, [%rd1], %r16, %r15;
|
||||
; SM60-NEXT: setp.eq.b32 %p1, %r5, %r16;
|
||||
; SM60-NEXT: or.b32 %r14, %r16, %r3;
|
||||
; SM60-NEXT: or.b32 %r15, %r16, %r4;
|
||||
; SM60-NEXT: atom.cta.global.cas.b32 %r5, [%rd1], %r15, %r14;
|
||||
; SM60-NEXT: setp.eq.b32 %p1, %r5, %r15;
|
||||
; SM60-NEXT: @%p1 bra $L__BB1_3;
|
||||
; SM60-NEXT: // %bb.2: // %partword.cmpxchg.failure
|
||||
; SM60-NEXT: // in Loop: Header=BB1_1 Depth=1
|
||||
; SM60-NEXT: and.b32 %r6, %r5, %r2;
|
||||
; SM60-NEXT: setp.ne.b32 %p2, %r17, %r6;
|
||||
; SM60-NEXT: mov.b32 %r17, %r6;
|
||||
; SM60-NEXT: setp.ne.b32 %p2, %r16, %r6;
|
||||
; SM60-NEXT: mov.b32 %r16, %r6;
|
||||
; SM60-NEXT: @%p2 bra $L__BB1_1;
|
||||
; SM60-NEXT: $L__BB1_3: // %partword.cmpxchg.end
|
||||
; SM60-NEXT: membar.cta;
|
||||
@ -98,7 +96,7 @@ define i8 @monotonic_seq_cst_i8_global_cta(ptr addrspace(1) %addr, i8 %cmp, i8 %
|
||||
; SM60: {
|
||||
; SM60-NEXT: .reg .pred %p<3>;
|
||||
; SM60-NEXT: .reg .b16 %rs<2>;
|
||||
; SM60-NEXT: .reg .b32 %r<18>;
|
||||
; SM60-NEXT: .reg .b32 %r<17>;
|
||||
; SM60-NEXT: .reg .b64 %rd<3>;
|
||||
; SM60-EMPTY:
|
||||
; SM60-NEXT: // %bb.0:
|
||||
@ -114,23 +112,22 @@ define i8 @monotonic_seq_cst_i8_global_cta(ptr addrspace(1) %addr, i8 %cmp, i8 %
|
||||
; SM60-NEXT: shl.b32 %r11, %r10, %r1;
|
||||
; SM60-NEXT: not.b32 %r2, %r11;
|
||||
; SM60-NEXT: cvt.u32.u16 %r12, %rs1;
|
||||
; SM60-NEXT: and.b32 %r13, %r12, 255;
|
||||
; SM60-NEXT: shl.b32 %r3, %r13, %r1;
|
||||
; SM60-NEXT: shl.b32 %r3, %r12, %r1;
|
||||
; SM60-NEXT: shl.b32 %r4, %r7, %r1;
|
||||
; SM60-NEXT: ld.global.b32 %r14, [%rd1];
|
||||
; SM60-NEXT: and.b32 %r17, %r14, %r2;
|
||||
; SM60-NEXT: ld.global.b32 %r13, [%rd1];
|
||||
; SM60-NEXT: and.b32 %r16, %r13, %r2;
|
||||
; SM60-NEXT: $L__BB2_1: // %partword.cmpxchg.loop
|
||||
; SM60-NEXT: // =>This Inner Loop Header: Depth=1
|
||||
; SM60-NEXT: or.b32 %r15, %r17, %r3;
|
||||
; SM60-NEXT: or.b32 %r16, %r17, %r4;
|
||||
; SM60-NEXT: atom.cta.global.cas.b32 %r5, [%rd1], %r16, %r15;
|
||||
; SM60-NEXT: setp.eq.b32 %p1, %r5, %r16;
|
||||
; SM60-NEXT: or.b32 %r14, %r16, %r3;
|
||||
; SM60-NEXT: or.b32 %r15, %r16, %r4;
|
||||
; SM60-NEXT: atom.cta.global.cas.b32 %r5, [%rd1], %r15, %r14;
|
||||
; SM60-NEXT: setp.eq.b32 %p1, %r5, %r15;
|
||||
; SM60-NEXT: @%p1 bra $L__BB2_3;
|
||||
; SM60-NEXT: // %bb.2: // %partword.cmpxchg.failure
|
||||
; SM60-NEXT: // in Loop: Header=BB2_1 Depth=1
|
||||
; SM60-NEXT: and.b32 %r6, %r5, %r2;
|
||||
; SM60-NEXT: setp.ne.b32 %p2, %r17, %r6;
|
||||
; SM60-NEXT: mov.b32 %r17, %r6;
|
||||
; SM60-NEXT: setp.ne.b32 %p2, %r16, %r6;
|
||||
; SM60-NEXT: mov.b32 %r16, %r6;
|
||||
; SM60-NEXT: @%p2 bra $L__BB2_1;
|
||||
; SM60-NEXT: $L__BB2_3: // %partword.cmpxchg.end
|
||||
; SM60-NEXT: membar.cta;
|
||||
@ -145,7 +142,7 @@ define i8 @acquire_monotonic_i8_global_cta(ptr addrspace(1) %addr, i8 %cmp, i8 %
|
||||
; SM60: {
|
||||
; SM60-NEXT: .reg .pred %p<3>;
|
||||
; SM60-NEXT: .reg .b16 %rs<2>;
|
||||
; SM60-NEXT: .reg .b32 %r<18>;
|
||||
; SM60-NEXT: .reg .b32 %r<17>;
|
||||
; SM60-NEXT: .reg .b64 %rd<3>;
|
||||
; SM60-EMPTY:
|
||||
; SM60-NEXT: // %bb.0:
|
||||
@ -160,23 +157,22 @@ define i8 @acquire_monotonic_i8_global_cta(ptr addrspace(1) %addr, i8 %cmp, i8 %
|
||||
; SM60-NEXT: shl.b32 %r11, %r10, %r1;
|
||||
; SM60-NEXT: not.b32 %r2, %r11;
|
||||
; SM60-NEXT: cvt.u32.u16 %r12, %rs1;
|
||||
; SM60-NEXT: and.b32 %r13, %r12, 255;
|
||||
; SM60-NEXT: shl.b32 %r3, %r13, %r1;
|
||||
; SM60-NEXT: shl.b32 %r3, %r12, %r1;
|
||||
; SM60-NEXT: shl.b32 %r4, %r7, %r1;
|
||||
; SM60-NEXT: ld.global.b32 %r14, [%rd1];
|
||||
; SM60-NEXT: and.b32 %r17, %r14, %r2;
|
||||
; SM60-NEXT: ld.global.b32 %r13, [%rd1];
|
||||
; SM60-NEXT: and.b32 %r16, %r13, %r2;
|
||||
; SM60-NEXT: $L__BB3_1: // %partword.cmpxchg.loop
|
||||
; SM60-NEXT: // =>This Inner Loop Header: Depth=1
|
||||
; SM60-NEXT: or.b32 %r15, %r17, %r3;
|
||||
; SM60-NEXT: or.b32 %r16, %r17, %r4;
|
||||
; SM60-NEXT: atom.cta.global.cas.b32 %r5, [%rd1], %r16, %r15;
|
||||
; SM60-NEXT: setp.eq.b32 %p1, %r5, %r16;
|
||||
; SM60-NEXT: or.b32 %r14, %r16, %r3;
|
||||
; SM60-NEXT: or.b32 %r15, %r16, %r4;
|
||||
; SM60-NEXT: atom.cta.global.cas.b32 %r5, [%rd1], %r15, %r14;
|
||||
; SM60-NEXT: setp.eq.b32 %p1, %r5, %r15;
|
||||
; SM60-NEXT: @%p1 bra $L__BB3_3;
|
||||
; SM60-NEXT: // %bb.2: // %partword.cmpxchg.failure
|
||||
; SM60-NEXT: // in Loop: Header=BB3_1 Depth=1
|
||||
; SM60-NEXT: and.b32 %r6, %r5, %r2;
|
||||
; SM60-NEXT: setp.ne.b32 %p2, %r17, %r6;
|
||||
; SM60-NEXT: mov.b32 %r17, %r6;
|
||||
; SM60-NEXT: setp.ne.b32 %p2, %r16, %r6;
|
||||
; SM60-NEXT: mov.b32 %r16, %r6;
|
||||
; SM60-NEXT: @%p2 bra $L__BB3_1;
|
||||
; SM60-NEXT: $L__BB3_3: // %partword.cmpxchg.end
|
||||
; SM60-NEXT: membar.cta;
|
||||
@ -191,7 +187,7 @@ define i8 @acquire_acquire_i8_global_cta(ptr addrspace(1) %addr, i8 %cmp, i8 %ne
|
||||
; SM60: {
|
||||
; SM60-NEXT: .reg .pred %p<3>;
|
||||
; SM60-NEXT: .reg .b16 %rs<2>;
|
||||
; SM60-NEXT: .reg .b32 %r<18>;
|
||||
; SM60-NEXT: .reg .b32 %r<17>;
|
||||
; SM60-NEXT: .reg .b64 %rd<3>;
|
||||
; SM60-EMPTY:
|
||||
; SM60-NEXT: // %bb.0:
|
||||
@ -206,23 +202,22 @@ define i8 @acquire_acquire_i8_global_cta(ptr addrspace(1) %addr, i8 %cmp, i8 %ne
|
||||
; SM60-NEXT: shl.b32 %r11, %r10, %r1;
|
||||
; SM60-NEXT: not.b32 %r2, %r11;
|
||||
; SM60-NEXT: cvt.u32.u16 %r12, %rs1;
|
||||
; SM60-NEXT: and.b32 %r13, %r12, 255;
|
||||
; SM60-NEXT: shl.b32 %r3, %r13, %r1;
|
||||
; SM60-NEXT: shl.b32 %r3, %r12, %r1;
|
||||
; SM60-NEXT: shl.b32 %r4, %r7, %r1;
|
||||
; SM60-NEXT: ld.global.b32 %r14, [%rd1];
|
||||
; SM60-NEXT: and.b32 %r17, %r14, %r2;
|
||||
; SM60-NEXT: ld.global.b32 %r13, [%rd1];
|
||||
; SM60-NEXT: and.b32 %r16, %r13, %r2;
|
||||
; SM60-NEXT: $L__BB4_1: // %partword.cmpxchg.loop
|
||||
; SM60-NEXT: // =>This Inner Loop Header: Depth=1
|
||||
; SM60-NEXT: or.b32 %r15, %r17, %r3;
|
||||
; SM60-NEXT: or.b32 %r16, %r17, %r4;
|
||||
; SM60-NEXT: atom.cta.global.cas.b32 %r5, [%rd1], %r16, %r15;
|
||||
; SM60-NEXT: setp.eq.b32 %p1, %r5, %r16;
|
||||
; SM60-NEXT: or.b32 %r14, %r16, %r3;
|
||||
; SM60-NEXT: or.b32 %r15, %r16, %r4;
|
||||
; SM60-NEXT: atom.cta.global.cas.b32 %r5, [%rd1], %r15, %r14;
|
||||
; SM60-NEXT: setp.eq.b32 %p1, %r5, %r15;
|
||||
; SM60-NEXT: @%p1 bra $L__BB4_3;
|
||||
; SM60-NEXT: // %bb.2: // %partword.cmpxchg.failure
|
||||
; SM60-NEXT: // in Loop: Header=BB4_1 Depth=1
|
||||
; SM60-NEXT: and.b32 %r6, %r5, %r2;
|
||||
; SM60-NEXT: setp.ne.b32 %p2, %r17, %r6;
|
||||
; SM60-NEXT: mov.b32 %r17, %r6;
|
||||
; SM60-NEXT: setp.ne.b32 %p2, %r16, %r6;
|
||||
; SM60-NEXT: mov.b32 %r16, %r6;
|
||||
; SM60-NEXT: @%p2 bra $L__BB4_1;
|
||||
; SM60-NEXT: $L__BB4_3: // %partword.cmpxchg.end
|
||||
; SM60-NEXT: membar.cta;
|
||||
@ -237,7 +232,7 @@ define i8 @acquire_seq_cst_i8_global_cta(ptr addrspace(1) %addr, i8 %cmp, i8 %ne
|
||||
; SM60: {
|
||||
; SM60-NEXT: .reg .pred %p<3>;
|
||||
; SM60-NEXT: .reg .b16 %rs<2>;
|
||||
; SM60-NEXT: .reg .b32 %r<18>;
|
||||
; SM60-NEXT: .reg .b32 %r<17>;
|
||||
; SM60-NEXT: .reg .b64 %rd<3>;
|
||||
; SM60-EMPTY:
|
||||
; SM60-NEXT: // %bb.0:
|
||||
@ -253,23 +248,22 @@ define i8 @acquire_seq_cst_i8_global_cta(ptr addrspace(1) %addr, i8 %cmp, i8 %ne
|
||||
; SM60-NEXT: shl.b32 %r11, %r10, %r1;
|
||||
; SM60-NEXT: not.b32 %r2, %r11;
|
||||
; SM60-NEXT: cvt.u32.u16 %r12, %rs1;
|
||||
; SM60-NEXT: and.b32 %r13, %r12, 255;
|
||||
; SM60-NEXT: shl.b32 %r3, %r13, %r1;
|
||||
; SM60-NEXT: shl.b32 %r3, %r12, %r1;
|
||||
; SM60-NEXT: shl.b32 %r4, %r7, %r1;
|
||||
; SM60-NEXT: ld.global.b32 %r14, [%rd1];
|
||||
; SM60-NEXT: and.b32 %r17, %r14, %r2;
|
||||
; SM60-NEXT: ld.global.b32 %r13, [%rd1];
|
||||
; SM60-NEXT: and.b32 %r16, %r13, %r2;
|
||||
; SM60-NEXT: $L__BB5_1: // %partword.cmpxchg.loop
|
||||
; SM60-NEXT: // =>This Inner Loop Header: Depth=1
|
||||
; SM60-NEXT: or.b32 %r15, %r17, %r3;
|
||||
; SM60-NEXT: or.b32 %r16, %r17, %r4;
|
||||
; SM60-NEXT: atom.cta.global.cas.b32 %r5, [%rd1], %r16, %r15;
|
||||
; SM60-NEXT: setp.eq.b32 %p1, %r5, %r16;
|
||||
; SM60-NEXT: or.b32 %r14, %r16, %r3;
|
||||
; SM60-NEXT: or.b32 %r15, %r16, %r4;
|
||||
; SM60-NEXT: atom.cta.global.cas.b32 %r5, [%rd1], %r15, %r14;
|
||||
; SM60-NEXT: setp.eq.b32 %p1, %r5, %r15;
|
||||
; SM60-NEXT: @%p1 bra $L__BB5_3;
|
||||
; SM60-NEXT: // %bb.2: // %partword.cmpxchg.failure
|
||||
; SM60-NEXT: // in Loop: Header=BB5_1 Depth=1
|
||||
; SM60-NEXT: and.b32 %r6, %r5, %r2;
|
||||
; SM60-NEXT: setp.ne.b32 %p2, %r17, %r6;
|
||||
; SM60-NEXT: mov.b32 %r17, %r6;
|
||||
; SM60-NEXT: setp.ne.b32 %p2, %r16, %r6;
|
||||
; SM60-NEXT: mov.b32 %r16, %r6;
|
||||
; SM60-NEXT: @%p2 bra $L__BB5_1;
|
||||
; SM60-NEXT: $L__BB5_3: // %partword.cmpxchg.end
|
||||
; SM60-NEXT: membar.cta;
|
||||
@ -284,7 +278,7 @@ define i8 @release_monotonic_i8_global_cta(ptr addrspace(1) %addr, i8 %cmp, i8 %
|
||||
; SM60: {
|
||||
; SM60-NEXT: .reg .pred %p<3>;
|
||||
; SM60-NEXT: .reg .b16 %rs<2>;
|
||||
; SM60-NEXT: .reg .b32 %r<18>;
|
||||
; SM60-NEXT: .reg .b32 %r<17>;
|
||||
; SM60-NEXT: .reg .b64 %rd<3>;
|
||||
; SM60-EMPTY:
|
||||
; SM60-NEXT: // %bb.0:
|
||||
@ -300,23 +294,22 @@ define i8 @release_monotonic_i8_global_cta(ptr addrspace(1) %addr, i8 %cmp, i8 %
|
||||
; SM60-NEXT: shl.b32 %r11, %r10, %r1;
|
||||
; SM60-NEXT: not.b32 %r2, %r11;
|
||||
; SM60-NEXT: cvt.u32.u16 %r12, %rs1;
|
||||
; SM60-NEXT: and.b32 %r13, %r12, 255;
|
||||
; SM60-NEXT: shl.b32 %r3, %r13, %r1;
|
||||
; SM60-NEXT: shl.b32 %r3, %r12, %r1;
|
||||
; SM60-NEXT: shl.b32 %r4, %r7, %r1;
|
||||
; SM60-NEXT: ld.global.b32 %r14, [%rd1];
|
||||
; SM60-NEXT: and.b32 %r17, %r14, %r2;
|
||||
; SM60-NEXT: ld.global.b32 %r13, [%rd1];
|
||||
; SM60-NEXT: and.b32 %r16, %r13, %r2;
|
||||
; SM60-NEXT: $L__BB6_1: // %partword.cmpxchg.loop
|
||||
; SM60-NEXT: // =>This Inner Loop Header: Depth=1
|
||||
; SM60-NEXT: or.b32 %r15, %r17, %r3;
|
||||
; SM60-NEXT: or.b32 %r16, %r17, %r4;
|
||||
; SM60-NEXT: atom.cta.global.cas.b32 %r5, [%rd1], %r16, %r15;
|
||||
; SM60-NEXT: setp.eq.b32 %p1, %r5, %r16;
|
||||
; SM60-NEXT: or.b32 %r14, %r16, %r3;
|
||||
; SM60-NEXT: or.b32 %r15, %r16, %r4;
|
||||
; SM60-NEXT: atom.cta.global.cas.b32 %r5, [%rd1], %r15, %r14;
|
||||
; SM60-NEXT: setp.eq.b32 %p1, %r5, %r15;
|
||||
; SM60-NEXT: @%p1 bra $L__BB6_3;
|
||||
; SM60-NEXT: // %bb.2: // %partword.cmpxchg.failure
|
||||
; SM60-NEXT: // in Loop: Header=BB6_1 Depth=1
|
||||
; SM60-NEXT: and.b32 %r6, %r5, %r2;
|
||||
; SM60-NEXT: setp.ne.b32 %p2, %r17, %r6;
|
||||
; SM60-NEXT: mov.b32 %r17, %r6;
|
||||
; SM60-NEXT: setp.ne.b32 %p2, %r16, %r6;
|
||||
; SM60-NEXT: mov.b32 %r16, %r6;
|
||||
; SM60-NEXT: @%p2 bra $L__BB6_1;
|
||||
; SM60-NEXT: $L__BB6_3: // %partword.cmpxchg.end
|
||||
; SM60-NEXT: st.param.b32 [func_retval0], %r12;
|
||||
@ -330,7 +323,7 @@ define i8 @release_acquire_i8_global_cta(ptr addrspace(1) %addr, i8 %cmp, i8 %ne
|
||||
; SM60: {
|
||||
; SM60-NEXT: .reg .pred %p<3>;
|
||||
; SM60-NEXT: .reg .b16 %rs<2>;
|
||||
; SM60-NEXT: .reg .b32 %r<18>;
|
||||
; SM60-NEXT: .reg .b32 %r<17>;
|
||||
; SM60-NEXT: .reg .b64 %rd<3>;
|
||||
; SM60-EMPTY:
|
||||
; SM60-NEXT: // %bb.0:
|
||||
@ -346,23 +339,22 @@ define i8 @release_acquire_i8_global_cta(ptr addrspace(1) %addr, i8 %cmp, i8 %ne
|
||||
; SM60-NEXT: shl.b32 %r11, %r10, %r1;
|
||||
; SM60-NEXT: not.b32 %r2, %r11;
|
||||
; SM60-NEXT: cvt.u32.u16 %r12, %rs1;
|
||||
; SM60-NEXT: and.b32 %r13, %r12, 255;
|
||||
; SM60-NEXT: shl.b32 %r3, %r13, %r1;
|
||||
; SM60-NEXT: shl.b32 %r3, %r12, %r1;
|
||||
; SM60-NEXT: shl.b32 %r4, %r7, %r1;
|
||||
; SM60-NEXT: ld.global.b32 %r14, [%rd1];
|
||||
; SM60-NEXT: and.b32 %r17, %r14, %r2;
|
||||
; SM60-NEXT: ld.global.b32 %r13, [%rd1];
|
||||
; SM60-NEXT: and.b32 %r16, %r13, %r2;
|
||||
; SM60-NEXT: $L__BB7_1: // %partword.cmpxchg.loop
|
||||
; SM60-NEXT: // =>This Inner Loop Header: Depth=1
|
||||
; SM60-NEXT: or.b32 %r15, %r17, %r3;
|
||||
; SM60-NEXT: or.b32 %r16, %r17, %r4;
|
||||
; SM60-NEXT: atom.cta.global.cas.b32 %r5, [%rd1], %r16, %r15;
|
||||
; SM60-NEXT: setp.eq.b32 %p1, %r5, %r16;
|
||||
; SM60-NEXT: or.b32 %r14, %r16, %r3;
|
||||
; SM60-NEXT: or.b32 %r15, %r16, %r4;
|
||||
; SM60-NEXT: atom.cta.global.cas.b32 %r5, [%rd1], %r15, %r14;
|
||||
; SM60-NEXT: setp.eq.b32 %p1, %r5, %r15;
|
||||
; SM60-NEXT: @%p1 bra $L__BB7_3;
|
||||
; SM60-NEXT: // %bb.2: // %partword.cmpxchg.failure
|
||||
; SM60-NEXT: // in Loop: Header=BB7_1 Depth=1
|
||||
; SM60-NEXT: and.b32 %r6, %r5, %r2;
|
||||
; SM60-NEXT: setp.ne.b32 %p2, %r17, %r6;
|
||||
; SM60-NEXT: mov.b32 %r17, %r6;
|
||||
; SM60-NEXT: setp.ne.b32 %p2, %r16, %r6;
|
||||
; SM60-NEXT: mov.b32 %r16, %r6;
|
||||
; SM60-NEXT: @%p2 bra $L__BB7_1;
|
||||
; SM60-NEXT: $L__BB7_3: // %partword.cmpxchg.end
|
||||
; SM60-NEXT: membar.cta;
|
||||
@ -377,7 +369,7 @@ define i8 @release_seq_cst_i8_global_cta(ptr addrspace(1) %addr, i8 %cmp, i8 %ne
|
||||
; SM60: {
|
||||
; SM60-NEXT: .reg .pred %p<3>;
|
||||
; SM60-NEXT: .reg .b16 %rs<2>;
|
||||
; SM60-NEXT: .reg .b32 %r<18>;
|
||||
; SM60-NEXT: .reg .b32 %r<17>;
|
||||
; SM60-NEXT: .reg .b64 %rd<3>;
|
||||
; SM60-EMPTY:
|
||||
; SM60-NEXT: // %bb.0:
|
||||
@ -393,23 +385,22 @@ define i8 @release_seq_cst_i8_global_cta(ptr addrspace(1) %addr, i8 %cmp, i8 %ne
|
||||
; SM60-NEXT: shl.b32 %r11, %r10, %r1;
|
||||
; SM60-NEXT: not.b32 %r2, %r11;
|
||||
; SM60-NEXT: cvt.u32.u16 %r12, %rs1;
|
||||
; SM60-NEXT: and.b32 %r13, %r12, 255;
|
||||
; SM60-NEXT: shl.b32 %r3, %r13, %r1;
|
||||
; SM60-NEXT: shl.b32 %r3, %r12, %r1;
|
||||
; SM60-NEXT: shl.b32 %r4, %r7, %r1;
|
||||
; SM60-NEXT: ld.global.b32 %r14, [%rd1];
|
||||
; SM60-NEXT: and.b32 %r17, %r14, %r2;
|
||||
; SM60-NEXT: ld.global.b32 %r13, [%rd1];
|
||||
; SM60-NEXT: and.b32 %r16, %r13, %r2;
|
||||
; SM60-NEXT: $L__BB8_1: // %partword.cmpxchg.loop
|
||||
; SM60-NEXT: // =>This Inner Loop Header: Depth=1
|
||||
; SM60-NEXT: or.b32 %r15, %r17, %r3;
|
||||
; SM60-NEXT: or.b32 %r16, %r17, %r4;
|
||||
; SM60-NEXT: atom.cta.global.cas.b32 %r5, [%rd1], %r16, %r15;
|
||||
; SM60-NEXT: setp.eq.b32 %p1, %r5, %r16;
|
||||
; SM60-NEXT: or.b32 %r14, %r16, %r3;
|
||||
; SM60-NEXT: or.b32 %r15, %r16, %r4;
|
||||
; SM60-NEXT: atom.cta.global.cas.b32 %r5, [%rd1], %r15, %r14;
|
||||
; SM60-NEXT: setp.eq.b32 %p1, %r5, %r15;
|
||||
; SM60-NEXT: @%p1 bra $L__BB8_3;
|
||||
; SM60-NEXT: // %bb.2: // %partword.cmpxchg.failure
|
||||
; SM60-NEXT: // in Loop: Header=BB8_1 Depth=1
|
||||
; SM60-NEXT: and.b32 %r6, %r5, %r2;
|
||||
; SM60-NEXT: setp.ne.b32 %p2, %r17, %r6;
|
||||
; SM60-NEXT: mov.b32 %r17, %r6;
|
||||
; SM60-NEXT: setp.ne.b32 %p2, %r16, %r6;
|
||||
; SM60-NEXT: mov.b32 %r16, %r6;
|
||||
; SM60-NEXT: @%p2 bra $L__BB8_1;
|
||||
; SM60-NEXT: $L__BB8_3: // %partword.cmpxchg.end
|
||||
; SM60-NEXT: membar.cta;
|
||||
@ -424,7 +415,7 @@ define i8 @acq_rel_monotonic_i8_global_cta(ptr addrspace(1) %addr, i8 %cmp, i8 %
|
||||
; SM60: {
|
||||
; SM60-NEXT: .reg .pred %p<3>;
|
||||
; SM60-NEXT: .reg .b16 %rs<2>;
|
||||
; SM60-NEXT: .reg .b32 %r<18>;
|
||||
; SM60-NEXT: .reg .b32 %r<17>;
|
||||
; SM60-NEXT: .reg .b64 %rd<3>;
|
||||
; SM60-EMPTY:
|
||||
; SM60-NEXT: // %bb.0:
|
||||
@ -440,23 +431,22 @@ define i8 @acq_rel_monotonic_i8_global_cta(ptr addrspace(1) %addr, i8 %cmp, i8 %
|
||||
; SM60-NEXT: shl.b32 %r11, %r10, %r1;
|
||||
; SM60-NEXT: not.b32 %r2, %r11;
|
||||
; SM60-NEXT: cvt.u32.u16 %r12, %rs1;
|
||||
; SM60-NEXT: and.b32 %r13, %r12, 255;
|
||||
; SM60-NEXT: shl.b32 %r3, %r13, %r1;
|
||||
; SM60-NEXT: shl.b32 %r3, %r12, %r1;
|
||||
; SM60-NEXT: shl.b32 %r4, %r7, %r1;
|
||||
; SM60-NEXT: ld.global.b32 %r14, [%rd1];
|
||||
; SM60-NEXT: and.b32 %r17, %r14, %r2;
|
||||
; SM60-NEXT: ld.global.b32 %r13, [%rd1];
|
||||
; SM60-NEXT: and.b32 %r16, %r13, %r2;
|
||||
; SM60-NEXT: $L__BB9_1: // %partword.cmpxchg.loop
|
||||
; SM60-NEXT: // =>This Inner Loop Header: Depth=1
|
||||
; SM60-NEXT: or.b32 %r15, %r17, %r3;
|
||||
; SM60-NEXT: or.b32 %r16, %r17, %r4;
|
||||
; SM60-NEXT: atom.cta.global.cas.b32 %r5, [%rd1], %r16, %r15;
|
||||
; SM60-NEXT: setp.eq.b32 %p1, %r5, %r16;
|
||||
; SM60-NEXT: or.b32 %r14, %r16, %r3;
|
||||
; SM60-NEXT: or.b32 %r15, %r16, %r4;
|
||||
; SM60-NEXT: atom.cta.global.cas.b32 %r5, [%rd1], %r15, %r14;
|
||||
; SM60-NEXT: setp.eq.b32 %p1, %r5, %r15;
|
||||
; SM60-NEXT: @%p1 bra $L__BB9_3;
|
||||
; SM60-NEXT: // %bb.2: // %partword.cmpxchg.failure
|
||||
; SM60-NEXT: // in Loop: Header=BB9_1 Depth=1
|
||||
; SM60-NEXT: and.b32 %r6, %r5, %r2;
|
||||
; SM60-NEXT: setp.ne.b32 %p2, %r17, %r6;
|
||||
; SM60-NEXT: mov.b32 %r17, %r6;
|
||||
; SM60-NEXT: setp.ne.b32 %p2, %r16, %r6;
|
||||
; SM60-NEXT: mov.b32 %r16, %r6;
|
||||
; SM60-NEXT: @%p2 bra $L__BB9_1;
|
||||
; SM60-NEXT: $L__BB9_3: // %partword.cmpxchg.end
|
||||
; SM60-NEXT: membar.cta;
|
||||
@ -471,7 +461,7 @@ define i8 @acq_rel_acquire_i8_global_cta(ptr addrspace(1) %addr, i8 %cmp, i8 %ne
|
||||
; SM60: {
|
||||
; SM60-NEXT: .reg .pred %p<3>;
|
||||
; SM60-NEXT: .reg .b16 %rs<2>;
|
||||
; SM60-NEXT: .reg .b32 %r<18>;
|
||||
; SM60-NEXT: .reg .b32 %r<17>;
|
||||
; SM60-NEXT: .reg .b64 %rd<3>;
|
||||
; SM60-EMPTY:
|
||||
; SM60-NEXT: // %bb.0:
|
||||
@ -487,23 +477,22 @@ define i8 @acq_rel_acquire_i8_global_cta(ptr addrspace(1) %addr, i8 %cmp, i8 %ne
|
||||
; SM60-NEXT: shl.b32 %r11, %r10, %r1;
|
||||
; SM60-NEXT: not.b32 %r2, %r11;
|
||||
; SM60-NEXT: cvt.u32.u16 %r12, %rs1;
|
||||
; SM60-NEXT: and.b32 %r13, %r12, 255;
|
||||
; SM60-NEXT: shl.b32 %r3, %r13, %r1;
|
||||
; SM60-NEXT: shl.b32 %r3, %r12, %r1;
|
||||
; SM60-NEXT: shl.b32 %r4, %r7, %r1;
|
||||
; SM60-NEXT: ld.global.b32 %r14, [%rd1];
|
||||
; SM60-NEXT: and.b32 %r17, %r14, %r2;
|
||||
; SM60-NEXT: ld.global.b32 %r13, [%rd1];
|
||||
; SM60-NEXT: and.b32 %r16, %r13, %r2;
|
||||
; SM60-NEXT: $L__BB10_1: // %partword.cmpxchg.loop
|
||||
; SM60-NEXT: // =>This Inner Loop Header: Depth=1
|
||||
; SM60-NEXT: or.b32 %r15, %r17, %r3;
|
||||
; SM60-NEXT: or.b32 %r16, %r17, %r4;
|
||||
; SM60-NEXT: atom.cta.global.cas.b32 %r5, [%rd1], %r16, %r15;
|
||||
; SM60-NEXT: setp.eq.b32 %p1, %r5, %r16;
|
||||
; SM60-NEXT: or.b32 %r14, %r16, %r3;
|
||||
; SM60-NEXT: or.b32 %r15, %r16, %r4;
|
||||
; SM60-NEXT: atom.cta.global.cas.b32 %r5, [%rd1], %r15, %r14;
|
||||
; SM60-NEXT: setp.eq.b32 %p1, %r5, %r15;
|
||||
; SM60-NEXT: @%p1 bra $L__BB10_3;
|
||||
; SM60-NEXT: // %bb.2: // %partword.cmpxchg.failure
|
||||
; SM60-NEXT: // in Loop: Header=BB10_1 Depth=1
|
||||
; SM60-NEXT: and.b32 %r6, %r5, %r2;
|
||||
; SM60-NEXT: setp.ne.b32 %p2, %r17, %r6;
|
||||
; SM60-NEXT: mov.b32 %r17, %r6;
|
||||
; SM60-NEXT: setp.ne.b32 %p2, %r16, %r6;
|
||||
; SM60-NEXT: mov.b32 %r16, %r6;
|
||||
; SM60-NEXT: @%p2 bra $L__BB10_1;
|
||||
; SM60-NEXT: $L__BB10_3: // %partword.cmpxchg.end
|
||||
; SM60-NEXT: membar.cta;
|
||||
@ -518,7 +507,7 @@ define i8 @acq_rel_seq_cst_i8_global_cta(ptr addrspace(1) %addr, i8 %cmp, i8 %ne
|
||||
; SM60: {
|
||||
; SM60-NEXT: .reg .pred %p<3>;
|
||||
; SM60-NEXT: .reg .b16 %rs<2>;
|
||||
; SM60-NEXT: .reg .b32 %r<18>;
|
||||
; SM60-NEXT: .reg .b32 %r<17>;
|
||||
; SM60-NEXT: .reg .b64 %rd<3>;
|
||||
; SM60-EMPTY:
|
||||
; SM60-NEXT: // %bb.0:
|
||||
@ -534,23 +523,22 @@ define i8 @acq_rel_seq_cst_i8_global_cta(ptr addrspace(1) %addr, i8 %cmp, i8 %ne
|
||||
; SM60-NEXT: shl.b32 %r11, %r10, %r1;
|
||||
; SM60-NEXT: not.b32 %r2, %r11;
|
||||
; SM60-NEXT: cvt.u32.u16 %r12, %rs1;
|
||||
; SM60-NEXT: and.b32 %r13, %r12, 255;
|
||||
; SM60-NEXT: shl.b32 %r3, %r13, %r1;
|
||||
; SM60-NEXT: shl.b32 %r3, %r12, %r1;
|
||||
; SM60-NEXT: shl.b32 %r4, %r7, %r1;
|
||||
; SM60-NEXT: ld.global.b32 %r14, [%rd1];
|
||||
; SM60-NEXT: and.b32 %r17, %r14, %r2;
|
||||
; SM60-NEXT: ld.global.b32 %r13, [%rd1];
|
||||
; SM60-NEXT: and.b32 %r16, %r13, %r2;
|
||||
; SM60-NEXT: $L__BB11_1: // %partword.cmpxchg.loop
|
||||
; SM60-NEXT: // =>This Inner Loop Header: Depth=1
|
||||
; SM60-NEXT: or.b32 %r15, %r17, %r3;
|
||||
; SM60-NEXT: or.b32 %r16, %r17, %r4;
|
||||
; SM60-NEXT: atom.cta.global.cas.b32 %r5, [%rd1], %r16, %r15;
|
||||
; SM60-NEXT: setp.eq.b32 %p1, %r5, %r16;
|
||||
; SM60-NEXT: or.b32 %r14, %r16, %r3;
|
||||
; SM60-NEXT: or.b32 %r15, %r16, %r4;
|
||||
; SM60-NEXT: atom.cta.global.cas.b32 %r5, [%rd1], %r15, %r14;
|
||||
; SM60-NEXT: setp.eq.b32 %p1, %r5, %r15;
|
||||
; SM60-NEXT: @%p1 bra $L__BB11_3;
|
||||
; SM60-NEXT: // %bb.2: // %partword.cmpxchg.failure
|
||||
; SM60-NEXT: // in Loop: Header=BB11_1 Depth=1
|
||||
; SM60-NEXT: and.b32 %r6, %r5, %r2;
|
||||
; SM60-NEXT: setp.ne.b32 %p2, %r17, %r6;
|
||||
; SM60-NEXT: mov.b32 %r17, %r6;
|
||||
; SM60-NEXT: setp.ne.b32 %p2, %r16, %r6;
|
||||
; SM60-NEXT: mov.b32 %r16, %r6;
|
||||
; SM60-NEXT: @%p2 bra $L__BB11_1;
|
||||
; SM60-NEXT: $L__BB11_3: // %partword.cmpxchg.end
|
||||
; SM60-NEXT: membar.cta;
|
||||
@ -565,7 +553,7 @@ define i8 @seq_cst_monotonic_i8_global_cta(ptr addrspace(1) %addr, i8 %cmp, i8 %
|
||||
; SM60: {
|
||||
; SM60-NEXT: .reg .pred %p<3>;
|
||||
; SM60-NEXT: .reg .b16 %rs<2>;
|
||||
; SM60-NEXT: .reg .b32 %r<18>;
|
||||
; SM60-NEXT: .reg .b32 %r<17>;
|
||||
; SM60-NEXT: .reg .b64 %rd<3>;
|
||||
; SM60-EMPTY:
|
||||
; SM60-NEXT: // %bb.0:
|
||||
@ -581,23 +569,22 @@ define i8 @seq_cst_monotonic_i8_global_cta(ptr addrspace(1) %addr, i8 %cmp, i8 %
|
||||
; SM60-NEXT: shl.b32 %r11, %r10, %r1;
|
||||
; SM60-NEXT: not.b32 %r2, %r11;
|
||||
; SM60-NEXT: cvt.u32.u16 %r12, %rs1;
|
||||
; SM60-NEXT: and.b32 %r13, %r12, 255;
|
||||
; SM60-NEXT: shl.b32 %r3, %r13, %r1;
|
||||
; SM60-NEXT: shl.b32 %r3, %r12, %r1;
|
||||
; SM60-NEXT: shl.b32 %r4, %r7, %r1;
|
||||
; SM60-NEXT: ld.global.b32 %r14, [%rd1];
|
||||
; SM60-NEXT: and.b32 %r17, %r14, %r2;
|
||||
; SM60-NEXT: ld.global.b32 %r13, [%rd1];
|
||||
; SM60-NEXT: and.b32 %r16, %r13, %r2;
|
||||
; SM60-NEXT: $L__BB12_1: // %partword.cmpxchg.loop
|
||||
; SM60-NEXT: // =>This Inner Loop Header: Depth=1
|
||||
; SM60-NEXT: or.b32 %r15, %r17, %r3;
|
||||
; SM60-NEXT: or.b32 %r16, %r17, %r4;
|
||||
; SM60-NEXT: atom.cta.global.cas.b32 %r5, [%rd1], %r16, %r15;
|
||||
; SM60-NEXT: setp.eq.b32 %p1, %r5, %r16;
|
||||
; SM60-NEXT: or.b32 %r14, %r16, %r3;
|
||||
; SM60-NEXT: or.b32 %r15, %r16, %r4;
|
||||
; SM60-NEXT: atom.cta.global.cas.b32 %r5, [%rd1], %r15, %r14;
|
||||
; SM60-NEXT: setp.eq.b32 %p1, %r5, %r15;
|
||||
; SM60-NEXT: @%p1 bra $L__BB12_3;
|
||||
; SM60-NEXT: // %bb.2: // %partword.cmpxchg.failure
|
||||
; SM60-NEXT: // in Loop: Header=BB12_1 Depth=1
|
||||
; SM60-NEXT: and.b32 %r6, %r5, %r2;
|
||||
; SM60-NEXT: setp.ne.b32 %p2, %r17, %r6;
|
||||
; SM60-NEXT: mov.b32 %r17, %r6;
|
||||
; SM60-NEXT: setp.ne.b32 %p2, %r16, %r6;
|
||||
; SM60-NEXT: mov.b32 %r16, %r6;
|
||||
; SM60-NEXT: @%p2 bra $L__BB12_1;
|
||||
; SM60-NEXT: $L__BB12_3: // %partword.cmpxchg.end
|
||||
; SM60-NEXT: membar.cta;
|
||||
@ -612,7 +599,7 @@ define i8 @seq_cst_acquire_i8_global_cta(ptr addrspace(1) %addr, i8 %cmp, i8 %ne
|
||||
; SM60: {
|
||||
; SM60-NEXT: .reg .pred %p<3>;
|
||||
; SM60-NEXT: .reg .b16 %rs<2>;
|
||||
; SM60-NEXT: .reg .b32 %r<18>;
|
||||
; SM60-NEXT: .reg .b32 %r<17>;
|
||||
; SM60-NEXT: .reg .b64 %rd<3>;
|
||||
; SM60-EMPTY:
|
||||
; SM60-NEXT: // %bb.0:
|
||||
@ -628,23 +615,22 @@ define i8 @seq_cst_acquire_i8_global_cta(ptr addrspace(1) %addr, i8 %cmp, i8 %ne
|
||||
; SM60-NEXT: shl.b32 %r11, %r10, %r1;
|
||||
; SM60-NEXT: not.b32 %r2, %r11;
|
||||
; SM60-NEXT: cvt.u32.u16 %r12, %rs1;
|
||||
; SM60-NEXT: and.b32 %r13, %r12, 255;
|
||||
; SM60-NEXT: shl.b32 %r3, %r13, %r1;
|
||||
; SM60-NEXT: shl.b32 %r3, %r12, %r1;
|
||||
; SM60-NEXT: shl.b32 %r4, %r7, %r1;
|
||||
; SM60-NEXT: ld.global.b32 %r14, [%rd1];
|
||||
; SM60-NEXT: and.b32 %r17, %r14, %r2;
|
||||
; SM60-NEXT: ld.global.b32 %r13, [%rd1];
|
||||
; SM60-NEXT: and.b32 %r16, %r13, %r2;
|
||||
; SM60-NEXT: $L__BB13_1: // %partword.cmpxchg.loop
|
||||
; SM60-NEXT: // =>This Inner Loop Header: Depth=1
|
||||
; SM60-NEXT: or.b32 %r15, %r17, %r3;
|
||||
; SM60-NEXT: or.b32 %r16, %r17, %r4;
|
||||
; SM60-NEXT: atom.cta.global.cas.b32 %r5, [%rd1], %r16, %r15;
|
||||
; SM60-NEXT: setp.eq.b32 %p1, %r5, %r16;
|
||||
; SM60-NEXT: or.b32 %r14, %r16, %r3;
|
||||
; SM60-NEXT: or.b32 %r15, %r16, %r4;
|
||||
; SM60-NEXT: atom.cta.global.cas.b32 %r5, [%rd1], %r15, %r14;
|
||||
; SM60-NEXT: setp.eq.b32 %p1, %r5, %r15;
|
||||
; SM60-NEXT: @%p1 bra $L__BB13_3;
|
||||
; SM60-NEXT: // %bb.2: // %partword.cmpxchg.failure
|
||||
; SM60-NEXT: // in Loop: Header=BB13_1 Depth=1
|
||||
; SM60-NEXT: and.b32 %r6, %r5, %r2;
|
||||
; SM60-NEXT: setp.ne.b32 %p2, %r17, %r6;
|
||||
; SM60-NEXT: mov.b32 %r17, %r6;
|
||||
; SM60-NEXT: setp.ne.b32 %p2, %r16, %r6;
|
||||
; SM60-NEXT: mov.b32 %r16, %r6;
|
||||
; SM60-NEXT: @%p2 bra $L__BB13_1;
|
||||
; SM60-NEXT: $L__BB13_3: // %partword.cmpxchg.end
|
||||
; SM60-NEXT: membar.cta;
|
||||
@ -659,7 +645,7 @@ define i8 @seq_cst_seq_cst_i8_global_cta(ptr addrspace(1) %addr, i8 %cmp, i8 %ne
|
||||
; SM60: {
|
||||
; SM60-NEXT: .reg .pred %p<3>;
|
||||
; SM60-NEXT: .reg .b16 %rs<2>;
|
||||
; SM60-NEXT: .reg .b32 %r<18>;
|
||||
; SM60-NEXT: .reg .b32 %r<17>;
|
||||
; SM60-NEXT: .reg .b64 %rd<3>;
|
||||
; SM60-EMPTY:
|
||||
; SM60-NEXT: // %bb.0:
|
||||
@ -675,23 +661,22 @@ define i8 @seq_cst_seq_cst_i8_global_cta(ptr addrspace(1) %addr, i8 %cmp, i8 %ne
|
||||
; SM60-NEXT: shl.b32 %r11, %r10, %r1;
|
||||
; SM60-NEXT: not.b32 %r2, %r11;
|
||||
; SM60-NEXT: cvt.u32.u16 %r12, %rs1;
|
||||
; SM60-NEXT: and.b32 %r13, %r12, 255;
|
||||
; SM60-NEXT: shl.b32 %r3, %r13, %r1;
|
||||
; SM60-NEXT: shl.b32 %r3, %r12, %r1;
|
||||
; SM60-NEXT: shl.b32 %r4, %r7, %r1;
|
||||
; SM60-NEXT: ld.global.b32 %r14, [%rd1];
|
||||
; SM60-NEXT: and.b32 %r17, %r14, %r2;
|
||||
; SM60-NEXT: ld.global.b32 %r13, [%rd1];
|
||||
; SM60-NEXT: and.b32 %r16, %r13, %r2;
|
||||
; SM60-NEXT: $L__BB14_1: // %partword.cmpxchg.loop
|
||||
; SM60-NEXT: // =>This Inner Loop Header: Depth=1
|
||||
; SM60-NEXT: or.b32 %r15, %r17, %r3;
|
||||
; SM60-NEXT: or.b32 %r16, %r17, %r4;
|
||||
; SM60-NEXT: atom.cta.global.cas.b32 %r5, [%rd1], %r16, %r15;
|
||||
; SM60-NEXT: setp.eq.b32 %p1, %r5, %r16;
|
||||
; SM60-NEXT: or.b32 %r14, %r16, %r3;
|
||||
; SM60-NEXT: or.b32 %r15, %r16, %r4;
|
||||
; SM60-NEXT: atom.cta.global.cas.b32 %r5, [%rd1], %r15, %r14;
|
||||
; SM60-NEXT: setp.eq.b32 %p1, %r5, %r15;
|
||||
; SM60-NEXT: @%p1 bra $L__BB14_3;
|
||||
; SM60-NEXT: // %bb.2: // %partword.cmpxchg.failure
|
||||
; SM60-NEXT: // in Loop: Header=BB14_1 Depth=1
|
||||
; SM60-NEXT: and.b32 %r6, %r5, %r2;
|
||||
; SM60-NEXT: setp.ne.b32 %p2, %r17, %r6;
|
||||
; SM60-NEXT: mov.b32 %r17, %r6;
|
||||
; SM60-NEXT: setp.ne.b32 %p2, %r16, %r6;
|
||||
; SM60-NEXT: mov.b32 %r16, %r6;
|
||||
; SM60-NEXT: @%p2 bra $L__BB14_1;
|
||||
; SM60-NEXT: $L__BB14_3: // %partword.cmpxchg.end
|
||||
; SM60-NEXT: membar.cta;
|
||||
@ -1899,7 +1884,7 @@ define i8 @acq_rel_acquire_i8_global(ptr addrspace(1) %addr, i8 %cmp, i8 %new) {
|
||||
; SM60: {
|
||||
; SM60-NEXT: .reg .pred %p<3>;
|
||||
; SM60-NEXT: .reg .b16 %rs<2>;
|
||||
; SM60-NEXT: .reg .b32 %r<18>;
|
||||
; SM60-NEXT: .reg .b32 %r<17>;
|
||||
; SM60-NEXT: .reg .b64 %rd<3>;
|
||||
; SM60-EMPTY:
|
||||
; SM60-NEXT: // %bb.0:
|
||||
@ -1915,23 +1900,22 @@ define i8 @acq_rel_acquire_i8_global(ptr addrspace(1) %addr, i8 %cmp, i8 %new) {
|
||||
; SM60-NEXT: shl.b32 %r11, %r10, %r1;
|
||||
; SM60-NEXT: not.b32 %r2, %r11;
|
||||
; SM60-NEXT: cvt.u32.u16 %r12, %rs1;
|
||||
; SM60-NEXT: and.b32 %r13, %r12, 255;
|
||||
; SM60-NEXT: shl.b32 %r3, %r13, %r1;
|
||||
; SM60-NEXT: shl.b32 %r3, %r12, %r1;
|
||||
; SM60-NEXT: shl.b32 %r4, %r7, %r1;
|
||||
; SM60-NEXT: ld.global.b32 %r14, [%rd1];
|
||||
; SM60-NEXT: and.b32 %r17, %r14, %r2;
|
||||
; SM60-NEXT: ld.global.b32 %r13, [%rd1];
|
||||
; SM60-NEXT: and.b32 %r16, %r13, %r2;
|
||||
; SM60-NEXT: $L__BB60_1: // %partword.cmpxchg.loop
|
||||
; SM60-NEXT: // =>This Inner Loop Header: Depth=1
|
||||
; SM60-NEXT: or.b32 %r15, %r17, %r3;
|
||||
; SM60-NEXT: or.b32 %r16, %r17, %r4;
|
||||
; SM60-NEXT: atom.sys.global.cas.b32 %r5, [%rd1], %r16, %r15;
|
||||
; SM60-NEXT: setp.eq.b32 %p1, %r5, %r16;
|
||||
; SM60-NEXT: or.b32 %r14, %r16, %r3;
|
||||
; SM60-NEXT: or.b32 %r15, %r16, %r4;
|
||||
; SM60-NEXT: atom.sys.global.cas.b32 %r5, [%rd1], %r15, %r14;
|
||||
; SM60-NEXT: setp.eq.b32 %p1, %r5, %r15;
|
||||
; SM60-NEXT: @%p1 bra $L__BB60_3;
|
||||
; SM60-NEXT: // %bb.2: // %partword.cmpxchg.failure
|
||||
; SM60-NEXT: // in Loop: Header=BB60_1 Depth=1
|
||||
; SM60-NEXT: and.b32 %r6, %r5, %r2;
|
||||
; SM60-NEXT: setp.ne.b32 %p2, %r17, %r6;
|
||||
; SM60-NEXT: mov.b32 %r17, %r6;
|
||||
; SM60-NEXT: setp.ne.b32 %p2, %r16, %r6;
|
||||
; SM60-NEXT: mov.b32 %r16, %r6;
|
||||
; SM60-NEXT: @%p2 bra $L__BB60_1;
|
||||
; SM60-NEXT: $L__BB60_3: // %partword.cmpxchg.end
|
||||
; SM60-NEXT: membar.sys;
|
||||
@ -1997,7 +1981,7 @@ define i8 @acq_rel_acquire_i8_generic_cta(ptr %addr, i8 %cmp, i8 %new) {
|
||||
; SM60: {
|
||||
; SM60-NEXT: .reg .pred %p<3>;
|
||||
; SM60-NEXT: .reg .b16 %rs<2>;
|
||||
; SM60-NEXT: .reg .b32 %r<18>;
|
||||
; SM60-NEXT: .reg .b32 %r<17>;
|
||||
; SM60-NEXT: .reg .b64 %rd<3>;
|
||||
; SM60-EMPTY:
|
||||
; SM60-NEXT: // %bb.0:
|
||||
@ -2013,23 +1997,22 @@ define i8 @acq_rel_acquire_i8_generic_cta(ptr %addr, i8 %cmp, i8 %new) {
|
||||
; SM60-NEXT: shl.b32 %r11, %r10, %r1;
|
||||
; SM60-NEXT: not.b32 %r2, %r11;
|
||||
; SM60-NEXT: cvt.u32.u16 %r12, %rs1;
|
||||
; SM60-NEXT: and.b32 %r13, %r12, 255;
|
||||
; SM60-NEXT: shl.b32 %r3, %r13, %r1;
|
||||
; SM60-NEXT: shl.b32 %r3, %r12, %r1;
|
||||
; SM60-NEXT: shl.b32 %r4, %r7, %r1;
|
||||
; SM60-NEXT: ld.b32 %r14, [%rd1];
|
||||
; SM60-NEXT: and.b32 %r17, %r14, %r2;
|
||||
; SM60-NEXT: ld.b32 %r13, [%rd1];
|
||||
; SM60-NEXT: and.b32 %r16, %r13, %r2;
|
||||
; SM60-NEXT: $L__BB64_1: // %partword.cmpxchg.loop
|
||||
; SM60-NEXT: // =>This Inner Loop Header: Depth=1
|
||||
; SM60-NEXT: or.b32 %r15, %r17, %r3;
|
||||
; SM60-NEXT: or.b32 %r16, %r17, %r4;
|
||||
; SM60-NEXT: atom.cta.cas.b32 %r5, [%rd1], %r16, %r15;
|
||||
; SM60-NEXT: setp.eq.b32 %p1, %r5, %r16;
|
||||
; SM60-NEXT: or.b32 %r14, %r16, %r3;
|
||||
; SM60-NEXT: or.b32 %r15, %r16, %r4;
|
||||
; SM60-NEXT: atom.cta.cas.b32 %r5, [%rd1], %r15, %r14;
|
||||
; SM60-NEXT: setp.eq.b32 %p1, %r5, %r15;
|
||||
; SM60-NEXT: @%p1 bra $L__BB64_3;
|
||||
; SM60-NEXT: // %bb.2: // %partword.cmpxchg.failure
|
||||
; SM60-NEXT: // in Loop: Header=BB64_1 Depth=1
|
||||
; SM60-NEXT: and.b32 %r6, %r5, %r2;
|
||||
; SM60-NEXT: setp.ne.b32 %p2, %r17, %r6;
|
||||
; SM60-NEXT: mov.b32 %r17, %r6;
|
||||
; SM60-NEXT: setp.ne.b32 %p2, %r16, %r6;
|
||||
; SM60-NEXT: mov.b32 %r16, %r6;
|
||||
; SM60-NEXT: @%p2 bra $L__BB64_1;
|
||||
; SM60-NEXT: $L__BB64_3: // %partword.cmpxchg.end
|
||||
; SM60-NEXT: membar.cta;
|
||||
@ -2044,7 +2027,7 @@ define i8 @acq_rel_acquire_i8_shared_cta(ptr addrspace(3) %addr, i8 %cmp, i8 %ne
|
||||
; SM60: {
|
||||
; SM60-NEXT: .reg .pred %p<3>;
|
||||
; SM60-NEXT: .reg .b16 %rs<2>;
|
||||
; SM60-NEXT: .reg .b32 %r<18>;
|
||||
; SM60-NEXT: .reg .b32 %r<17>;
|
||||
; SM60-NEXT: .reg .b64 %rd<3>;
|
||||
; SM60-EMPTY:
|
||||
; SM60-NEXT: // %bb.0:
|
||||
@ -2060,23 +2043,22 @@ define i8 @acq_rel_acquire_i8_shared_cta(ptr addrspace(3) %addr, i8 %cmp, i8 %ne
|
||||
; SM60-NEXT: shl.b32 %r11, %r10, %r1;
|
||||
; SM60-NEXT: not.b32 %r2, %r11;
|
||||
; SM60-NEXT: cvt.u32.u16 %r12, %rs1;
|
||||
; SM60-NEXT: and.b32 %r13, %r12, 255;
|
||||
; SM60-NEXT: shl.b32 %r3, %r13, %r1;
|
||||
; SM60-NEXT: shl.b32 %r3, %r12, %r1;
|
||||
; SM60-NEXT: shl.b32 %r4, %r7, %r1;
|
||||
; SM60-NEXT: ld.shared.b32 %r14, [%rd1];
|
||||
; SM60-NEXT: and.b32 %r17, %r14, %r2;
|
||||
; SM60-NEXT: ld.shared.b32 %r13, [%rd1];
|
||||
; SM60-NEXT: and.b32 %r16, %r13, %r2;
|
||||
; SM60-NEXT: $L__BB65_1: // %partword.cmpxchg.loop
|
||||
; SM60-NEXT: // =>This Inner Loop Header: Depth=1
|
||||
; SM60-NEXT: or.b32 %r15, %r17, %r3;
|
||||
; SM60-NEXT: or.b32 %r16, %r17, %r4;
|
||||
; SM60-NEXT: atom.cta.shared.cas.b32 %r5, [%rd1], %r16, %r15;
|
||||
; SM60-NEXT: setp.eq.b32 %p1, %r5, %r16;
|
||||
; SM60-NEXT: or.b32 %r14, %r16, %r3;
|
||||
; SM60-NEXT: or.b32 %r15, %r16, %r4;
|
||||
; SM60-NEXT: atom.cta.shared.cas.b32 %r5, [%rd1], %r15, %r14;
|
||||
; SM60-NEXT: setp.eq.b32 %p1, %r5, %r15;
|
||||
; SM60-NEXT: @%p1 bra $L__BB65_3;
|
||||
; SM60-NEXT: // %bb.2: // %partword.cmpxchg.failure
|
||||
; SM60-NEXT: // in Loop: Header=BB65_1 Depth=1
|
||||
; SM60-NEXT: and.b32 %r6, %r5, %r2;
|
||||
; SM60-NEXT: setp.ne.b32 %p2, %r17, %r6;
|
||||
; SM60-NEXT: mov.b32 %r17, %r6;
|
||||
; SM60-NEXT: setp.ne.b32 %p2, %r16, %r6;
|
||||
; SM60-NEXT: mov.b32 %r16, %r6;
|
||||
; SM60-NEXT: @%p2 bra $L__BB65_1;
|
||||
; SM60-NEXT: $L__BB65_3: // %partword.cmpxchg.end
|
||||
; SM60-NEXT: membar.cta;
|
||||
|
@ -7,7 +7,7 @@ define i8 @monotonic_monotonic_i8_global_cta(ptr addrspace(1) %addr, i8 %cmp, i8
|
||||
; SM70: {
|
||||
; SM70-NEXT: .reg .pred %p<3>;
|
||||
; SM70-NEXT: .reg .b16 %rs<2>;
|
||||
; SM70-NEXT: .reg .b32 %r<18>;
|
||||
; SM70-NEXT: .reg .b32 %r<17>;
|
||||
; SM70-NEXT: .reg .b64 %rd<3>;
|
||||
; SM70-EMPTY:
|
||||
; SM70-NEXT: // %bb.0:
|
||||
@ -22,23 +22,22 @@ define i8 @monotonic_monotonic_i8_global_cta(ptr addrspace(1) %addr, i8 %cmp, i8
|
||||
; SM70-NEXT: shl.b32 %r11, %r10, %r1;
|
||||
; SM70-NEXT: not.b32 %r2, %r11;
|
||||
; SM70-NEXT: cvt.u32.u16 %r12, %rs1;
|
||||
; SM70-NEXT: and.b32 %r13, %r12, 255;
|
||||
; SM70-NEXT: shl.b32 %r3, %r13, %r1;
|
||||
; SM70-NEXT: shl.b32 %r3, %r12, %r1;
|
||||
; SM70-NEXT: shl.b32 %r4, %r7, %r1;
|
||||
; SM70-NEXT: ld.global.b32 %r14, [%rd1];
|
||||
; SM70-NEXT: and.b32 %r17, %r14, %r2;
|
||||
; SM70-NEXT: ld.global.b32 %r13, [%rd1];
|
||||
; SM70-NEXT: and.b32 %r16, %r13, %r2;
|
||||
; SM70-NEXT: $L__BB0_1: // %partword.cmpxchg.loop
|
||||
; SM70-NEXT: // =>This Inner Loop Header: Depth=1
|
||||
; SM70-NEXT: or.b32 %r15, %r17, %r3;
|
||||
; SM70-NEXT: or.b32 %r16, %r17, %r4;
|
||||
; SM70-NEXT: atom.relaxed.cta.global.cas.b32 %r5, [%rd1], %r16, %r15;
|
||||
; SM70-NEXT: setp.eq.b32 %p1, %r5, %r16;
|
||||
; SM70-NEXT: or.b32 %r14, %r16, %r3;
|
||||
; SM70-NEXT: or.b32 %r15, %r16, %r4;
|
||||
; SM70-NEXT: atom.relaxed.cta.global.cas.b32 %r5, [%rd1], %r15, %r14;
|
||||
; SM70-NEXT: setp.eq.b32 %p1, %r5, %r15;
|
||||
; SM70-NEXT: @%p1 bra $L__BB0_3;
|
||||
; SM70-NEXT: // %bb.2: // %partword.cmpxchg.failure
|
||||
; SM70-NEXT: // in Loop: Header=BB0_1 Depth=1
|
||||
; SM70-NEXT: and.b32 %r6, %r5, %r2;
|
||||
; SM70-NEXT: setp.ne.b32 %p2, %r17, %r6;
|
||||
; SM70-NEXT: mov.b32 %r17, %r6;
|
||||
; SM70-NEXT: setp.ne.b32 %p2, %r16, %r6;
|
||||
; SM70-NEXT: mov.b32 %r16, %r6;
|
||||
; SM70-NEXT: @%p2 bra $L__BB0_1;
|
||||
; SM70-NEXT: $L__BB0_3: // %partword.cmpxchg.end
|
||||
; SM70-NEXT: st.param.b32 [func_retval0], %r12;
|
||||
@ -52,7 +51,7 @@ define i8 @monotonic_acquire_i8_global_cta(ptr addrspace(1) %addr, i8 %cmp, i8 %
|
||||
; SM70: {
|
||||
; SM70-NEXT: .reg .pred %p<3>;
|
||||
; SM70-NEXT: .reg .b16 %rs<2>;
|
||||
; SM70-NEXT: .reg .b32 %r<18>;
|
||||
; SM70-NEXT: .reg .b32 %r<17>;
|
||||
; SM70-NEXT: .reg .b64 %rd<3>;
|
||||
; SM70-EMPTY:
|
||||
; SM70-NEXT: // %bb.0:
|
||||
@ -67,23 +66,22 @@ define i8 @monotonic_acquire_i8_global_cta(ptr addrspace(1) %addr, i8 %cmp, i8 %
|
||||
; SM70-NEXT: shl.b32 %r11, %r10, %r1;
|
||||
; SM70-NEXT: not.b32 %r2, %r11;
|
||||
; SM70-NEXT: cvt.u32.u16 %r12, %rs1;
|
||||
; SM70-NEXT: and.b32 %r13, %r12, 255;
|
||||
; SM70-NEXT: shl.b32 %r3, %r13, %r1;
|
||||
; SM70-NEXT: shl.b32 %r3, %r12, %r1;
|
||||
; SM70-NEXT: shl.b32 %r4, %r7, %r1;
|
||||
; SM70-NEXT: ld.global.b32 %r14, [%rd1];
|
||||
; SM70-NEXT: and.b32 %r17, %r14, %r2;
|
||||
; SM70-NEXT: ld.global.b32 %r13, [%rd1];
|
||||
; SM70-NEXT: and.b32 %r16, %r13, %r2;
|
||||
; SM70-NEXT: $L__BB1_1: // %partword.cmpxchg.loop
|
||||
; SM70-NEXT: // =>This Inner Loop Header: Depth=1
|
||||
; SM70-NEXT: or.b32 %r15, %r17, %r3;
|
||||
; SM70-NEXT: or.b32 %r16, %r17, %r4;
|
||||
; SM70-NEXT: atom.relaxed.cta.global.cas.b32 %r5, [%rd1], %r16, %r15;
|
||||
; SM70-NEXT: setp.eq.b32 %p1, %r5, %r16;
|
||||
; SM70-NEXT: or.b32 %r14, %r16, %r3;
|
||||
; SM70-NEXT: or.b32 %r15, %r16, %r4;
|
||||
; SM70-NEXT: atom.relaxed.cta.global.cas.b32 %r5, [%rd1], %r15, %r14;
|
||||
; SM70-NEXT: setp.eq.b32 %p1, %r5, %r15;
|
||||
; SM70-NEXT: @%p1 bra $L__BB1_3;
|
||||
; SM70-NEXT: // %bb.2: // %partword.cmpxchg.failure
|
||||
; SM70-NEXT: // in Loop: Header=BB1_1 Depth=1
|
||||
; SM70-NEXT: and.b32 %r6, %r5, %r2;
|
||||
; SM70-NEXT: setp.ne.b32 %p2, %r17, %r6;
|
||||
; SM70-NEXT: mov.b32 %r17, %r6;
|
||||
; SM70-NEXT: setp.ne.b32 %p2, %r16, %r6;
|
||||
; SM70-NEXT: mov.b32 %r16, %r6;
|
||||
; SM70-NEXT: @%p2 bra $L__BB1_1;
|
||||
; SM70-NEXT: $L__BB1_3: // %partword.cmpxchg.end
|
||||
; SM70-NEXT: fence.acq_rel.cta;
|
||||
@ -98,7 +96,7 @@ define i8 @monotonic_seq_cst_i8_global_cta(ptr addrspace(1) %addr, i8 %cmp, i8 %
|
||||
; SM70: {
|
||||
; SM70-NEXT: .reg .pred %p<3>;
|
||||
; SM70-NEXT: .reg .b16 %rs<2>;
|
||||
; SM70-NEXT: .reg .b32 %r<18>;
|
||||
; SM70-NEXT: .reg .b32 %r<17>;
|
||||
; SM70-NEXT: .reg .b64 %rd<3>;
|
||||
; SM70-EMPTY:
|
||||
; SM70-NEXT: // %bb.0:
|
||||
@ -114,23 +112,22 @@ define i8 @monotonic_seq_cst_i8_global_cta(ptr addrspace(1) %addr, i8 %cmp, i8 %
|
||||
; SM70-NEXT: shl.b32 %r11, %r10, %r1;
|
||||
; SM70-NEXT: not.b32 %r2, %r11;
|
||||
; SM70-NEXT: cvt.u32.u16 %r12, %rs1;
|
||||
; SM70-NEXT: and.b32 %r13, %r12, 255;
|
||||
; SM70-NEXT: shl.b32 %r3, %r13, %r1;
|
||||
; SM70-NEXT: shl.b32 %r3, %r12, %r1;
|
||||
; SM70-NEXT: shl.b32 %r4, %r7, %r1;
|
||||
; SM70-NEXT: ld.global.b32 %r14, [%rd1];
|
||||
; SM70-NEXT: and.b32 %r17, %r14, %r2;
|
||||
; SM70-NEXT: ld.global.b32 %r13, [%rd1];
|
||||
; SM70-NEXT: and.b32 %r16, %r13, %r2;
|
||||
; SM70-NEXT: $L__BB2_1: // %partword.cmpxchg.loop
|
||||
; SM70-NEXT: // =>This Inner Loop Header: Depth=1
|
||||
; SM70-NEXT: or.b32 %r15, %r17, %r3;
|
||||
; SM70-NEXT: or.b32 %r16, %r17, %r4;
|
||||
; SM70-NEXT: atom.relaxed.cta.global.cas.b32 %r5, [%rd1], %r16, %r15;
|
||||
; SM70-NEXT: setp.eq.b32 %p1, %r5, %r16;
|
||||
; SM70-NEXT: or.b32 %r14, %r16, %r3;
|
||||
; SM70-NEXT: or.b32 %r15, %r16, %r4;
|
||||
; SM70-NEXT: atom.relaxed.cta.global.cas.b32 %r5, [%rd1], %r15, %r14;
|
||||
; SM70-NEXT: setp.eq.b32 %p1, %r5, %r15;
|
||||
; SM70-NEXT: @%p1 bra $L__BB2_3;
|
||||
; SM70-NEXT: // %bb.2: // %partword.cmpxchg.failure
|
||||
; SM70-NEXT: // in Loop: Header=BB2_1 Depth=1
|
||||
; SM70-NEXT: and.b32 %r6, %r5, %r2;
|
||||
; SM70-NEXT: setp.ne.b32 %p2, %r17, %r6;
|
||||
; SM70-NEXT: mov.b32 %r17, %r6;
|
||||
; SM70-NEXT: setp.ne.b32 %p2, %r16, %r6;
|
||||
; SM70-NEXT: mov.b32 %r16, %r6;
|
||||
; SM70-NEXT: @%p2 bra $L__BB2_1;
|
||||
; SM70-NEXT: $L__BB2_3: // %partword.cmpxchg.end
|
||||
; SM70-NEXT: fence.acq_rel.cta;
|
||||
@ -145,7 +142,7 @@ define i8 @acquire_monotonic_i8_global_cta(ptr addrspace(1) %addr, i8 %cmp, i8 %
|
||||
; SM70: {
|
||||
; SM70-NEXT: .reg .pred %p<3>;
|
||||
; SM70-NEXT: .reg .b16 %rs<2>;
|
||||
; SM70-NEXT: .reg .b32 %r<18>;
|
||||
; SM70-NEXT: .reg .b32 %r<17>;
|
||||
; SM70-NEXT: .reg .b64 %rd<3>;
|
||||
; SM70-EMPTY:
|
||||
; SM70-NEXT: // %bb.0:
|
||||
@ -160,23 +157,22 @@ define i8 @acquire_monotonic_i8_global_cta(ptr addrspace(1) %addr, i8 %cmp, i8 %
|
||||
; SM70-NEXT: shl.b32 %r11, %r10, %r1;
|
||||
; SM70-NEXT: not.b32 %r2, %r11;
|
||||
; SM70-NEXT: cvt.u32.u16 %r12, %rs1;
|
||||
; SM70-NEXT: and.b32 %r13, %r12, 255;
|
||||
; SM70-NEXT: shl.b32 %r3, %r13, %r1;
|
||||
; SM70-NEXT: shl.b32 %r3, %r12, %r1;
|
||||
; SM70-NEXT: shl.b32 %r4, %r7, %r1;
|
||||
; SM70-NEXT: ld.global.b32 %r14, [%rd1];
|
||||
; SM70-NEXT: and.b32 %r17, %r14, %r2;
|
||||
; SM70-NEXT: ld.global.b32 %r13, [%rd1];
|
||||
; SM70-NEXT: and.b32 %r16, %r13, %r2;
|
||||
; SM70-NEXT: $L__BB3_1: // %partword.cmpxchg.loop
|
||||
; SM70-NEXT: // =>This Inner Loop Header: Depth=1
|
||||
; SM70-NEXT: or.b32 %r15, %r17, %r3;
|
||||
; SM70-NEXT: or.b32 %r16, %r17, %r4;
|
||||
; SM70-NEXT: atom.relaxed.cta.global.cas.b32 %r5, [%rd1], %r16, %r15;
|
||||
; SM70-NEXT: setp.eq.b32 %p1, %r5, %r16;
|
||||
; SM70-NEXT: or.b32 %r14, %r16, %r3;
|
||||
; SM70-NEXT: or.b32 %r15, %r16, %r4;
|
||||
; SM70-NEXT: atom.relaxed.cta.global.cas.b32 %r5, [%rd1], %r15, %r14;
|
||||
; SM70-NEXT: setp.eq.b32 %p1, %r5, %r15;
|
||||
; SM70-NEXT: @%p1 bra $L__BB3_3;
|
||||
; SM70-NEXT: // %bb.2: // %partword.cmpxchg.failure
|
||||
; SM70-NEXT: // in Loop: Header=BB3_1 Depth=1
|
||||
; SM70-NEXT: and.b32 %r6, %r5, %r2;
|
||||
; SM70-NEXT: setp.ne.b32 %p2, %r17, %r6;
|
||||
; SM70-NEXT: mov.b32 %r17, %r6;
|
||||
; SM70-NEXT: setp.ne.b32 %p2, %r16, %r6;
|
||||
; SM70-NEXT: mov.b32 %r16, %r6;
|
||||
; SM70-NEXT: @%p2 bra $L__BB3_1;
|
||||
; SM70-NEXT: $L__BB3_3: // %partword.cmpxchg.end
|
||||
; SM70-NEXT: fence.acq_rel.cta;
|
||||
@ -191,7 +187,7 @@ define i8 @acquire_acquire_i8_global_cta(ptr addrspace(1) %addr, i8 %cmp, i8 %ne
|
||||
; SM70: {
|
||||
; SM70-NEXT: .reg .pred %p<3>;
|
||||
; SM70-NEXT: .reg .b16 %rs<2>;
|
||||
; SM70-NEXT: .reg .b32 %r<18>;
|
||||
; SM70-NEXT: .reg .b32 %r<17>;
|
||||
; SM70-NEXT: .reg .b64 %rd<3>;
|
||||
; SM70-EMPTY:
|
||||
; SM70-NEXT: // %bb.0:
|
||||
@ -206,23 +202,22 @@ define i8 @acquire_acquire_i8_global_cta(ptr addrspace(1) %addr, i8 %cmp, i8 %ne
|
||||
; SM70-NEXT: shl.b32 %r11, %r10, %r1;
|
||||
; SM70-NEXT: not.b32 %r2, %r11;
|
||||
; SM70-NEXT: cvt.u32.u16 %r12, %rs1;
|
||||
; SM70-NEXT: and.b32 %r13, %r12, 255;
|
||||
; SM70-NEXT: shl.b32 %r3, %r13, %r1;
|
||||
; SM70-NEXT: shl.b32 %r3, %r12, %r1;
|
||||
; SM70-NEXT: shl.b32 %r4, %r7, %r1;
|
||||
; SM70-NEXT: ld.global.b32 %r14, [%rd1];
|
||||
; SM70-NEXT: and.b32 %r17, %r14, %r2;
|
||||
; SM70-NEXT: ld.global.b32 %r13, [%rd1];
|
||||
; SM70-NEXT: and.b32 %r16, %r13, %r2;
|
||||
; SM70-NEXT: $L__BB4_1: // %partword.cmpxchg.loop
|
||||
; SM70-NEXT: // =>This Inner Loop Header: Depth=1
|
||||
; SM70-NEXT: or.b32 %r15, %r17, %r3;
|
||||
; SM70-NEXT: or.b32 %r16, %r17, %r4;
|
||||
; SM70-NEXT: atom.relaxed.cta.global.cas.b32 %r5, [%rd1], %r16, %r15;
|
||||
; SM70-NEXT: setp.eq.b32 %p1, %r5, %r16;
|
||||
; SM70-NEXT: or.b32 %r14, %r16, %r3;
|
||||
; SM70-NEXT: or.b32 %r15, %r16, %r4;
|
||||
; SM70-NEXT: atom.relaxed.cta.global.cas.b32 %r5, [%rd1], %r15, %r14;
|
||||
; SM70-NEXT: setp.eq.b32 %p1, %r5, %r15;
|
||||
; SM70-NEXT: @%p1 bra $L__BB4_3;
|
||||
; SM70-NEXT: // %bb.2: // %partword.cmpxchg.failure
|
||||
; SM70-NEXT: // in Loop: Header=BB4_1 Depth=1
|
||||
; SM70-NEXT: and.b32 %r6, %r5, %r2;
|
||||
; SM70-NEXT: setp.ne.b32 %p2, %r17, %r6;
|
||||
; SM70-NEXT: mov.b32 %r17, %r6;
|
||||
; SM70-NEXT: setp.ne.b32 %p2, %r16, %r6;
|
||||
; SM70-NEXT: mov.b32 %r16, %r6;
|
||||
; SM70-NEXT: @%p2 bra $L__BB4_1;
|
||||
; SM70-NEXT: $L__BB4_3: // %partword.cmpxchg.end
|
||||
; SM70-NEXT: fence.acq_rel.cta;
|
||||
@ -237,7 +232,7 @@ define i8 @acquire_seq_cst_i8_global_cta(ptr addrspace(1) %addr, i8 %cmp, i8 %ne
|
||||
; SM70: {
|
||||
; SM70-NEXT: .reg .pred %p<3>;
|
||||
; SM70-NEXT: .reg .b16 %rs<2>;
|
||||
; SM70-NEXT: .reg .b32 %r<18>;
|
||||
; SM70-NEXT: .reg .b32 %r<17>;
|
||||
; SM70-NEXT: .reg .b64 %rd<3>;
|
||||
; SM70-EMPTY:
|
||||
; SM70-NEXT: // %bb.0:
|
||||
@ -253,23 +248,22 @@ define i8 @acquire_seq_cst_i8_global_cta(ptr addrspace(1) %addr, i8 %cmp, i8 %ne
|
||||
; SM70-NEXT: shl.b32 %r11, %r10, %r1;
|
||||
; SM70-NEXT: not.b32 %r2, %r11;
|
||||
; SM70-NEXT: cvt.u32.u16 %r12, %rs1;
|
||||
; SM70-NEXT: and.b32 %r13, %r12, 255;
|
||||
; SM70-NEXT: shl.b32 %r3, %r13, %r1;
|
||||
; SM70-NEXT: shl.b32 %r3, %r12, %r1;
|
||||
; SM70-NEXT: shl.b32 %r4, %r7, %r1;
|
||||
; SM70-NEXT: ld.global.b32 %r14, [%rd1];
|
||||
; SM70-NEXT: and.b32 %r17, %r14, %r2;
|
||||
; SM70-NEXT: ld.global.b32 %r13, [%rd1];
|
||||
; SM70-NEXT: and.b32 %r16, %r13, %r2;
|
||||
; SM70-NEXT: $L__BB5_1: // %partword.cmpxchg.loop
|
||||
; SM70-NEXT: // =>This Inner Loop Header: Depth=1
|
||||
; SM70-NEXT: or.b32 %r15, %r17, %r3;
|
||||
; SM70-NEXT: or.b32 %r16, %r17, %r4;
|
||||
; SM70-NEXT: atom.relaxed.cta.global.cas.b32 %r5, [%rd1], %r16, %r15;
|
||||
; SM70-NEXT: setp.eq.b32 %p1, %r5, %r16;
|
||||
; SM70-NEXT: or.b32 %r14, %r16, %r3;
|
||||
; SM70-NEXT: or.b32 %r15, %r16, %r4;
|
||||
; SM70-NEXT: atom.relaxed.cta.global.cas.b32 %r5, [%rd1], %r15, %r14;
|
||||
; SM70-NEXT: setp.eq.b32 %p1, %r5, %r15;
|
||||
; SM70-NEXT: @%p1 bra $L__BB5_3;
|
||||
; SM70-NEXT: // %bb.2: // %partword.cmpxchg.failure
|
||||
; SM70-NEXT: // in Loop: Header=BB5_1 Depth=1
|
||||
; SM70-NEXT: and.b32 %r6, %r5, %r2;
|
||||
; SM70-NEXT: setp.ne.b32 %p2, %r17, %r6;
|
||||
; SM70-NEXT: mov.b32 %r17, %r6;
|
||||
; SM70-NEXT: setp.ne.b32 %p2, %r16, %r6;
|
||||
; SM70-NEXT: mov.b32 %r16, %r6;
|
||||
; SM70-NEXT: @%p2 bra $L__BB5_1;
|
||||
; SM70-NEXT: $L__BB5_3: // %partword.cmpxchg.end
|
||||
; SM70-NEXT: fence.acq_rel.cta;
|
||||
@ -284,7 +278,7 @@ define i8 @release_monotonic_i8_global_cta(ptr addrspace(1) %addr, i8 %cmp, i8 %
|
||||
; SM70: {
|
||||
; SM70-NEXT: .reg .pred %p<3>;
|
||||
; SM70-NEXT: .reg .b16 %rs<2>;
|
||||
; SM70-NEXT: .reg .b32 %r<18>;
|
||||
; SM70-NEXT: .reg .b32 %r<17>;
|
||||
; SM70-NEXT: .reg .b64 %rd<3>;
|
||||
; SM70-EMPTY:
|
||||
; SM70-NEXT: // %bb.0:
|
||||
@ -300,23 +294,22 @@ define i8 @release_monotonic_i8_global_cta(ptr addrspace(1) %addr, i8 %cmp, i8 %
|
||||
; SM70-NEXT: shl.b32 %r11, %r10, %r1;
|
||||
; SM70-NEXT: not.b32 %r2, %r11;
|
||||
; SM70-NEXT: cvt.u32.u16 %r12, %rs1;
|
||||
; SM70-NEXT: and.b32 %r13, %r12, 255;
|
||||
; SM70-NEXT: shl.b32 %r3, %r13, %r1;
|
||||
; SM70-NEXT: shl.b32 %r3, %r12, %r1;
|
||||
; SM70-NEXT: shl.b32 %r4, %r7, %r1;
|
||||
; SM70-NEXT: ld.global.b32 %r14, [%rd1];
|
||||
; SM70-NEXT: and.b32 %r17, %r14, %r2;
|
||||
; SM70-NEXT: ld.global.b32 %r13, [%rd1];
|
||||
; SM70-NEXT: and.b32 %r16, %r13, %r2;
|
||||
; SM70-NEXT: $L__BB6_1: // %partword.cmpxchg.loop
|
||||
; SM70-NEXT: // =>This Inner Loop Header: Depth=1
|
||||
; SM70-NEXT: or.b32 %r15, %r17, %r3;
|
||||
; SM70-NEXT: or.b32 %r16, %r17, %r4;
|
||||
; SM70-NEXT: atom.relaxed.cta.global.cas.b32 %r5, [%rd1], %r16, %r15;
|
||||
; SM70-NEXT: setp.eq.b32 %p1, %r5, %r16;
|
||||
; SM70-NEXT: or.b32 %r14, %r16, %r3;
|
||||
; SM70-NEXT: or.b32 %r15, %r16, %r4;
|
||||
; SM70-NEXT: atom.relaxed.cta.global.cas.b32 %r5, [%rd1], %r15, %r14;
|
||||
; SM70-NEXT: setp.eq.b32 %p1, %r5, %r15;
|
||||
; SM70-NEXT: @%p1 bra $L__BB6_3;
|
||||
; SM70-NEXT: // %bb.2: // %partword.cmpxchg.failure
|
||||
; SM70-NEXT: // in Loop: Header=BB6_1 Depth=1
|
||||
; SM70-NEXT: and.b32 %r6, %r5, %r2;
|
||||
; SM70-NEXT: setp.ne.b32 %p2, %r17, %r6;
|
||||
; SM70-NEXT: mov.b32 %r17, %r6;
|
||||
; SM70-NEXT: setp.ne.b32 %p2, %r16, %r6;
|
||||
; SM70-NEXT: mov.b32 %r16, %r6;
|
||||
; SM70-NEXT: @%p2 bra $L__BB6_1;
|
||||
; SM70-NEXT: $L__BB6_3: // %partword.cmpxchg.end
|
||||
; SM70-NEXT: st.param.b32 [func_retval0], %r12;
|
||||
@ -330,7 +323,7 @@ define i8 @release_acquire_i8_global_cta(ptr addrspace(1) %addr, i8 %cmp, i8 %ne
|
||||
; SM70: {
|
||||
; SM70-NEXT: .reg .pred %p<3>;
|
||||
; SM70-NEXT: .reg .b16 %rs<2>;
|
||||
; SM70-NEXT: .reg .b32 %r<18>;
|
||||
; SM70-NEXT: .reg .b32 %r<17>;
|
||||
; SM70-NEXT: .reg .b64 %rd<3>;
|
||||
; SM70-EMPTY:
|
||||
; SM70-NEXT: // %bb.0:
|
||||
@ -346,23 +339,22 @@ define i8 @release_acquire_i8_global_cta(ptr addrspace(1) %addr, i8 %cmp, i8 %ne
|
||||
; SM70-NEXT: shl.b32 %r11, %r10, %r1;
|
||||
; SM70-NEXT: not.b32 %r2, %r11;
|
||||
; SM70-NEXT: cvt.u32.u16 %r12, %rs1;
|
||||
; SM70-NEXT: and.b32 %r13, %r12, 255;
|
||||
; SM70-NEXT: shl.b32 %r3, %r13, %r1;
|
||||
; SM70-NEXT: shl.b32 %r3, %r12, %r1;
|
||||
; SM70-NEXT: shl.b32 %r4, %r7, %r1;
|
||||
; SM70-NEXT: ld.global.b32 %r14, [%rd1];
|
||||
; SM70-NEXT: and.b32 %r17, %r14, %r2;
|
||||
; SM70-NEXT: ld.global.b32 %r13, [%rd1];
|
||||
; SM70-NEXT: and.b32 %r16, %r13, %r2;
|
||||
; SM70-NEXT: $L__BB7_1: // %partword.cmpxchg.loop
|
||||
; SM70-NEXT: // =>This Inner Loop Header: Depth=1
|
||||
; SM70-NEXT: or.b32 %r15, %r17, %r3;
|
||||
; SM70-NEXT: or.b32 %r16, %r17, %r4;
|
||||
; SM70-NEXT: atom.relaxed.cta.global.cas.b32 %r5, [%rd1], %r16, %r15;
|
||||
; SM70-NEXT: setp.eq.b32 %p1, %r5, %r16;
|
||||
; SM70-NEXT: or.b32 %r14, %r16, %r3;
|
||||
; SM70-NEXT: or.b32 %r15, %r16, %r4;
|
||||
; SM70-NEXT: atom.relaxed.cta.global.cas.b32 %r5, [%rd1], %r15, %r14;
|
||||
; SM70-NEXT: setp.eq.b32 %p1, %r5, %r15;
|
||||
; SM70-NEXT: @%p1 bra $L__BB7_3;
|
||||
; SM70-NEXT: // %bb.2: // %partword.cmpxchg.failure
|
||||
; SM70-NEXT: // in Loop: Header=BB7_1 Depth=1
|
||||
; SM70-NEXT: and.b32 %r6, %r5, %r2;
|
||||
; SM70-NEXT: setp.ne.b32 %p2, %r17, %r6;
|
||||
; SM70-NEXT: mov.b32 %r17, %r6;
|
||||
; SM70-NEXT: setp.ne.b32 %p2, %r16, %r6;
|
||||
; SM70-NEXT: mov.b32 %r16, %r6;
|
||||
; SM70-NEXT: @%p2 bra $L__BB7_1;
|
||||
; SM70-NEXT: $L__BB7_3: // %partword.cmpxchg.end
|
||||
; SM70-NEXT: fence.acq_rel.cta;
|
||||
@ -377,7 +369,7 @@ define i8 @release_seq_cst_i8_global_cta(ptr addrspace(1) %addr, i8 %cmp, i8 %ne
|
||||
; SM70: {
|
||||
; SM70-NEXT: .reg .pred %p<3>;
|
||||
; SM70-NEXT: .reg .b16 %rs<2>;
|
||||
; SM70-NEXT: .reg .b32 %r<18>;
|
||||
; SM70-NEXT: .reg .b32 %r<17>;
|
||||
; SM70-NEXT: .reg .b64 %rd<3>;
|
||||
; SM70-EMPTY:
|
||||
; SM70-NEXT: // %bb.0:
|
||||
@ -393,23 +385,22 @@ define i8 @release_seq_cst_i8_global_cta(ptr addrspace(1) %addr, i8 %cmp, i8 %ne
|
||||
; SM70-NEXT: shl.b32 %r11, %r10, %r1;
|
||||
; SM70-NEXT: not.b32 %r2, %r11;
|
||||
; SM70-NEXT: cvt.u32.u16 %r12, %rs1;
|
||||
; SM70-NEXT: and.b32 %r13, %r12, 255;
|
||||
; SM70-NEXT: shl.b32 %r3, %r13, %r1;
|
||||
; SM70-NEXT: shl.b32 %r3, %r12, %r1;
|
||||
; SM70-NEXT: shl.b32 %r4, %r7, %r1;
|
||||
; SM70-NEXT: ld.global.b32 %r14, [%rd1];
|
||||
; SM70-NEXT: and.b32 %r17, %r14, %r2;
|
||||
; SM70-NEXT: ld.global.b32 %r13, [%rd1];
|
||||
; SM70-NEXT: and.b32 %r16, %r13, %r2;
|
||||
; SM70-NEXT: $L__BB8_1: // %partword.cmpxchg.loop
|
||||
; SM70-NEXT: // =>This Inner Loop Header: Depth=1
|
||||
; SM70-NEXT: or.b32 %r15, %r17, %r3;
|
||||
; SM70-NEXT: or.b32 %r16, %r17, %r4;
|
||||
; SM70-NEXT: atom.relaxed.cta.global.cas.b32 %r5, [%rd1], %r16, %r15;
|
||||
; SM70-NEXT: setp.eq.b32 %p1, %r5, %r16;
|
||||
; SM70-NEXT: or.b32 %r14, %r16, %r3;
|
||||
; SM70-NEXT: or.b32 %r15, %r16, %r4;
|
||||
; SM70-NEXT: atom.relaxed.cta.global.cas.b32 %r5, [%rd1], %r15, %r14;
|
||||
; SM70-NEXT: setp.eq.b32 %p1, %r5, %r15;
|
||||
; SM70-NEXT: @%p1 bra $L__BB8_3;
|
||||
; SM70-NEXT: // %bb.2: // %partword.cmpxchg.failure
|
||||
; SM70-NEXT: // in Loop: Header=BB8_1 Depth=1
|
||||
; SM70-NEXT: and.b32 %r6, %r5, %r2;
|
||||
; SM70-NEXT: setp.ne.b32 %p2, %r17, %r6;
|
||||
; SM70-NEXT: mov.b32 %r17, %r6;
|
||||
; SM70-NEXT: setp.ne.b32 %p2, %r16, %r6;
|
||||
; SM70-NEXT: mov.b32 %r16, %r6;
|
||||
; SM70-NEXT: @%p2 bra $L__BB8_1;
|
||||
; SM70-NEXT: $L__BB8_3: // %partword.cmpxchg.end
|
||||
; SM70-NEXT: fence.acq_rel.cta;
|
||||
@ -424,7 +415,7 @@ define i8 @acq_rel_monotonic_i8_global_cta(ptr addrspace(1) %addr, i8 %cmp, i8 %
|
||||
; SM70: {
|
||||
; SM70-NEXT: .reg .pred %p<3>;
|
||||
; SM70-NEXT: .reg .b16 %rs<2>;
|
||||
; SM70-NEXT: .reg .b32 %r<18>;
|
||||
; SM70-NEXT: .reg .b32 %r<17>;
|
||||
; SM70-NEXT: .reg .b64 %rd<3>;
|
||||
; SM70-EMPTY:
|
||||
; SM70-NEXT: // %bb.0:
|
||||
@ -440,23 +431,22 @@ define i8 @acq_rel_monotonic_i8_global_cta(ptr addrspace(1) %addr, i8 %cmp, i8 %
|
||||
; SM70-NEXT: shl.b32 %r11, %r10, %r1;
|
||||
; SM70-NEXT: not.b32 %r2, %r11;
|
||||
; SM70-NEXT: cvt.u32.u16 %r12, %rs1;
|
||||
; SM70-NEXT: and.b32 %r13, %r12, 255;
|
||||
; SM70-NEXT: shl.b32 %r3, %r13, %r1;
|
||||
; SM70-NEXT: shl.b32 %r3, %r12, %r1;
|
||||
; SM70-NEXT: shl.b32 %r4, %r7, %r1;
|
||||
; SM70-NEXT: ld.global.b32 %r14, [%rd1];
|
||||
; SM70-NEXT: and.b32 %r17, %r14, %r2;
|
||||
; SM70-NEXT: ld.global.b32 %r13, [%rd1];
|
||||
; SM70-NEXT: and.b32 %r16, %r13, %r2;
|
||||
; SM70-NEXT: $L__BB9_1: // %partword.cmpxchg.loop
|
||||
; SM70-NEXT: // =>This Inner Loop Header: Depth=1
|
||||
; SM70-NEXT: or.b32 %r15, %r17, %r3;
|
||||
; SM70-NEXT: or.b32 %r16, %r17, %r4;
|
||||
; SM70-NEXT: atom.relaxed.cta.global.cas.b32 %r5, [%rd1], %r16, %r15;
|
||||
; SM70-NEXT: setp.eq.b32 %p1, %r5, %r16;
|
||||
; SM70-NEXT: or.b32 %r14, %r16, %r3;
|
||||
; SM70-NEXT: or.b32 %r15, %r16, %r4;
|
||||
; SM70-NEXT: atom.relaxed.cta.global.cas.b32 %r5, [%rd1], %r15, %r14;
|
||||
; SM70-NEXT: setp.eq.b32 %p1, %r5, %r15;
|
||||
; SM70-NEXT: @%p1 bra $L__BB9_3;
|
||||
; SM70-NEXT: // %bb.2: // %partword.cmpxchg.failure
|
||||
; SM70-NEXT: // in Loop: Header=BB9_1 Depth=1
|
||||
; SM70-NEXT: and.b32 %r6, %r5, %r2;
|
||||
; SM70-NEXT: setp.ne.b32 %p2, %r17, %r6;
|
||||
; SM70-NEXT: mov.b32 %r17, %r6;
|
||||
; SM70-NEXT: setp.ne.b32 %p2, %r16, %r6;
|
||||
; SM70-NEXT: mov.b32 %r16, %r6;
|
||||
; SM70-NEXT: @%p2 bra $L__BB9_1;
|
||||
; SM70-NEXT: $L__BB9_3: // %partword.cmpxchg.end
|
||||
; SM70-NEXT: fence.acq_rel.cta;
|
||||
@ -471,7 +461,7 @@ define i8 @acq_rel_acquire_i8_global_cta(ptr addrspace(1) %addr, i8 %cmp, i8 %ne
|
||||
; SM70: {
|
||||
; SM70-NEXT: .reg .pred %p<3>;
|
||||
; SM70-NEXT: .reg .b16 %rs<2>;
|
||||
; SM70-NEXT: .reg .b32 %r<18>;
|
||||
; SM70-NEXT: .reg .b32 %r<17>;
|
||||
; SM70-NEXT: .reg .b64 %rd<3>;
|
||||
; SM70-EMPTY:
|
||||
; SM70-NEXT: // %bb.0:
|
||||
@ -487,23 +477,22 @@ define i8 @acq_rel_acquire_i8_global_cta(ptr addrspace(1) %addr, i8 %cmp, i8 %ne
|
||||
; SM70-NEXT: shl.b32 %r11, %r10, %r1;
|
||||
; SM70-NEXT: not.b32 %r2, %r11;
|
||||
; SM70-NEXT: cvt.u32.u16 %r12, %rs1;
|
||||
; SM70-NEXT: and.b32 %r13, %r12, 255;
|
||||
; SM70-NEXT: shl.b32 %r3, %r13, %r1;
|
||||
; SM70-NEXT: shl.b32 %r3, %r12, %r1;
|
||||
; SM70-NEXT: shl.b32 %r4, %r7, %r1;
|
||||
; SM70-NEXT: ld.global.b32 %r14, [%rd1];
|
||||
; SM70-NEXT: and.b32 %r17, %r14, %r2;
|
||||
; SM70-NEXT: ld.global.b32 %r13, [%rd1];
|
||||
; SM70-NEXT: and.b32 %r16, %r13, %r2;
|
||||
; SM70-NEXT: $L__BB10_1: // %partword.cmpxchg.loop
|
||||
; SM70-NEXT: // =>This Inner Loop Header: Depth=1
|
||||
; SM70-NEXT: or.b32 %r15, %r17, %r3;
|
||||
; SM70-NEXT: or.b32 %r16, %r17, %r4;
|
||||
; SM70-NEXT: atom.relaxed.cta.global.cas.b32 %r5, [%rd1], %r16, %r15;
|
||||
; SM70-NEXT: setp.eq.b32 %p1, %r5, %r16;
|
||||
; SM70-NEXT: or.b32 %r14, %r16, %r3;
|
||||
; SM70-NEXT: or.b32 %r15, %r16, %r4;
|
||||
; SM70-NEXT: atom.relaxed.cta.global.cas.b32 %r5, [%rd1], %r15, %r14;
|
||||
; SM70-NEXT: setp.eq.b32 %p1, %r5, %r15;
|
||||
; SM70-NEXT: @%p1 bra $L__BB10_3;
|
||||
; SM70-NEXT: // %bb.2: // %partword.cmpxchg.failure
|
||||
; SM70-NEXT: // in Loop: Header=BB10_1 Depth=1
|
||||
; SM70-NEXT: and.b32 %r6, %r5, %r2;
|
||||
; SM70-NEXT: setp.ne.b32 %p2, %r17, %r6;
|
||||
; SM70-NEXT: mov.b32 %r17, %r6;
|
||||
; SM70-NEXT: setp.ne.b32 %p2, %r16, %r6;
|
||||
; SM70-NEXT: mov.b32 %r16, %r6;
|
||||
; SM70-NEXT: @%p2 bra $L__BB10_1;
|
||||
; SM70-NEXT: $L__BB10_3: // %partword.cmpxchg.end
|
||||
; SM70-NEXT: fence.acq_rel.cta;
|
||||
@ -518,7 +507,7 @@ define i8 @acq_rel_seq_cst_i8_global_cta(ptr addrspace(1) %addr, i8 %cmp, i8 %ne
|
||||
; SM70: {
|
||||
; SM70-NEXT: .reg .pred %p<3>;
|
||||
; SM70-NEXT: .reg .b16 %rs<2>;
|
||||
; SM70-NEXT: .reg .b32 %r<18>;
|
||||
; SM70-NEXT: .reg .b32 %r<17>;
|
||||
; SM70-NEXT: .reg .b64 %rd<3>;
|
||||
; SM70-EMPTY:
|
||||
; SM70-NEXT: // %bb.0:
|
||||
@ -534,23 +523,22 @@ define i8 @acq_rel_seq_cst_i8_global_cta(ptr addrspace(1) %addr, i8 %cmp, i8 %ne
|
||||
; SM70-NEXT: shl.b32 %r11, %r10, %r1;
|
||||
; SM70-NEXT: not.b32 %r2, %r11;
|
||||
; SM70-NEXT: cvt.u32.u16 %r12, %rs1;
|
||||
; SM70-NEXT: and.b32 %r13, %r12, 255;
|
||||
; SM70-NEXT: shl.b32 %r3, %r13, %r1;
|
||||
; SM70-NEXT: shl.b32 %r3, %r12, %r1;
|
||||
; SM70-NEXT: shl.b32 %r4, %r7, %r1;
|
||||
; SM70-NEXT: ld.global.b32 %r14, [%rd1];
|
||||
; SM70-NEXT: and.b32 %r17, %r14, %r2;
|
||||
; SM70-NEXT: ld.global.b32 %r13, [%rd1];
|
||||
; SM70-NEXT: and.b32 %r16, %r13, %r2;
|
||||
; SM70-NEXT: $L__BB11_1: // %partword.cmpxchg.loop
|
||||
; SM70-NEXT: // =>This Inner Loop Header: Depth=1
|
||||
; SM70-NEXT: or.b32 %r15, %r17, %r3;
|
||||
; SM70-NEXT: or.b32 %r16, %r17, %r4;
|
||||
; SM70-NEXT: atom.relaxed.cta.global.cas.b32 %r5, [%rd1], %r16, %r15;
|
||||
; SM70-NEXT: setp.eq.b32 %p1, %r5, %r16;
|
||||
; SM70-NEXT: or.b32 %r14, %r16, %r3;
|
||||
; SM70-NEXT: or.b32 %r15, %r16, %r4;
|
||||
; SM70-NEXT: atom.relaxed.cta.global.cas.b32 %r5, [%rd1], %r15, %r14;
|
||||
; SM70-NEXT: setp.eq.b32 %p1, %r5, %r15;
|
||||
; SM70-NEXT: @%p1 bra $L__BB11_3;
|
||||
; SM70-NEXT: // %bb.2: // %partword.cmpxchg.failure
|
||||
; SM70-NEXT: // in Loop: Header=BB11_1 Depth=1
|
||||
; SM70-NEXT: and.b32 %r6, %r5, %r2;
|
||||
; SM70-NEXT: setp.ne.b32 %p2, %r17, %r6;
|
||||
; SM70-NEXT: mov.b32 %r17, %r6;
|
||||
; SM70-NEXT: setp.ne.b32 %p2, %r16, %r6;
|
||||
; SM70-NEXT: mov.b32 %r16, %r6;
|
||||
; SM70-NEXT: @%p2 bra $L__BB11_1;
|
||||
; SM70-NEXT: $L__BB11_3: // %partword.cmpxchg.end
|
||||
; SM70-NEXT: fence.acq_rel.cta;
|
||||
@ -565,7 +553,7 @@ define i8 @seq_cst_monotonic_i8_global_cta(ptr addrspace(1) %addr, i8 %cmp, i8 %
|
||||
; SM70: {
|
||||
; SM70-NEXT: .reg .pred %p<3>;
|
||||
; SM70-NEXT: .reg .b16 %rs<2>;
|
||||
; SM70-NEXT: .reg .b32 %r<18>;
|
||||
; SM70-NEXT: .reg .b32 %r<17>;
|
||||
; SM70-NEXT: .reg .b64 %rd<3>;
|
||||
; SM70-EMPTY:
|
||||
; SM70-NEXT: // %bb.0:
|
||||
@ -581,23 +569,22 @@ define i8 @seq_cst_monotonic_i8_global_cta(ptr addrspace(1) %addr, i8 %cmp, i8 %
|
||||
; SM70-NEXT: shl.b32 %r11, %r10, %r1;
|
||||
; SM70-NEXT: not.b32 %r2, %r11;
|
||||
; SM70-NEXT: cvt.u32.u16 %r12, %rs1;
|
||||
; SM70-NEXT: and.b32 %r13, %r12, 255;
|
||||
; SM70-NEXT: shl.b32 %r3, %r13, %r1;
|
||||
; SM70-NEXT: shl.b32 %r3, %r12, %r1;
|
||||
; SM70-NEXT: shl.b32 %r4, %r7, %r1;
|
||||
; SM70-NEXT: ld.global.b32 %r14, [%rd1];
|
||||
; SM70-NEXT: and.b32 %r17, %r14, %r2;
|
||||
; SM70-NEXT: ld.global.b32 %r13, [%rd1];
|
||||
; SM70-NEXT: and.b32 %r16, %r13, %r2;
|
||||
; SM70-NEXT: $L__BB12_1: // %partword.cmpxchg.loop
|
||||
; SM70-NEXT: // =>This Inner Loop Header: Depth=1
|
||||
; SM70-NEXT: or.b32 %r15, %r17, %r3;
|
||||
; SM70-NEXT: or.b32 %r16, %r17, %r4;
|
||||
; SM70-NEXT: atom.relaxed.cta.global.cas.b32 %r5, [%rd1], %r16, %r15;
|
||||
; SM70-NEXT: setp.eq.b32 %p1, %r5, %r16;
|
||||
; SM70-NEXT: or.b32 %r14, %r16, %r3;
|
||||
; SM70-NEXT: or.b32 %r15, %r16, %r4;
|
||||
; SM70-NEXT: atom.relaxed.cta.global.cas.b32 %r5, [%rd1], %r15, %r14;
|
||||
; SM70-NEXT: setp.eq.b32 %p1, %r5, %r15;
|
||||
; SM70-NEXT: @%p1 bra $L__BB12_3;
|
||||
; SM70-NEXT: // %bb.2: // %partword.cmpxchg.failure
|
||||
; SM70-NEXT: // in Loop: Header=BB12_1 Depth=1
|
||||
; SM70-NEXT: and.b32 %r6, %r5, %r2;
|
||||
; SM70-NEXT: setp.ne.b32 %p2, %r17, %r6;
|
||||
; SM70-NEXT: mov.b32 %r17, %r6;
|
||||
; SM70-NEXT: setp.ne.b32 %p2, %r16, %r6;
|
||||
; SM70-NEXT: mov.b32 %r16, %r6;
|
||||
; SM70-NEXT: @%p2 bra $L__BB12_1;
|
||||
; SM70-NEXT: $L__BB12_3: // %partword.cmpxchg.end
|
||||
; SM70-NEXT: fence.acq_rel.cta;
|
||||
@ -612,7 +599,7 @@ define i8 @seq_cst_acquire_i8_global_cta(ptr addrspace(1) %addr, i8 %cmp, i8 %ne
|
||||
; SM70: {
|
||||
; SM70-NEXT: .reg .pred %p<3>;
|
||||
; SM70-NEXT: .reg .b16 %rs<2>;
|
||||
; SM70-NEXT: .reg .b32 %r<18>;
|
||||
; SM70-NEXT: .reg .b32 %r<17>;
|
||||
; SM70-NEXT: .reg .b64 %rd<3>;
|
||||
; SM70-EMPTY:
|
||||
; SM70-NEXT: // %bb.0:
|
||||
@ -628,23 +615,22 @@ define i8 @seq_cst_acquire_i8_global_cta(ptr addrspace(1) %addr, i8 %cmp, i8 %ne
|
||||
; SM70-NEXT: shl.b32 %r11, %r10, %r1;
|
||||
; SM70-NEXT: not.b32 %r2, %r11;
|
||||
; SM70-NEXT: cvt.u32.u16 %r12, %rs1;
|
||||
; SM70-NEXT: and.b32 %r13, %r12, 255;
|
||||
; SM70-NEXT: shl.b32 %r3, %r13, %r1;
|
||||
; SM70-NEXT: shl.b32 %r3, %r12, %r1;
|
||||
; SM70-NEXT: shl.b32 %r4, %r7, %r1;
|
||||
; SM70-NEXT: ld.global.b32 %r14, [%rd1];
|
||||
; SM70-NEXT: and.b32 %r17, %r14, %r2;
|
||||
; SM70-NEXT: ld.global.b32 %r13, [%rd1];
|
||||
; SM70-NEXT: and.b32 %r16, %r13, %r2;
|
||||
; SM70-NEXT: $L__BB13_1: // %partword.cmpxchg.loop
|
||||
; SM70-NEXT: // =>This Inner Loop Header: Depth=1
|
||||
; SM70-NEXT: or.b32 %r15, %r17, %r3;
|
||||
; SM70-NEXT: or.b32 %r16, %r17, %r4;
|
||||
; SM70-NEXT: atom.relaxed.cta.global.cas.b32 %r5, [%rd1], %r16, %r15;
|
||||
; SM70-NEXT: setp.eq.b32 %p1, %r5, %r16;
|
||||
; SM70-NEXT: or.b32 %r14, %r16, %r3;
|
||||
; SM70-NEXT: or.b32 %r15, %r16, %r4;
|
||||
; SM70-NEXT: atom.relaxed.cta.global.cas.b32 %r5, [%rd1], %r15, %r14;
|
||||
; SM70-NEXT: setp.eq.b32 %p1, %r5, %r15;
|
||||
; SM70-NEXT: @%p1 bra $L__BB13_3;
|
||||
; SM70-NEXT: // %bb.2: // %partword.cmpxchg.failure
|
||||
; SM70-NEXT: // in Loop: Header=BB13_1 Depth=1
|
||||
; SM70-NEXT: and.b32 %r6, %r5, %r2;
|
||||
; SM70-NEXT: setp.ne.b32 %p2, %r17, %r6;
|
||||
; SM70-NEXT: mov.b32 %r17, %r6;
|
||||
; SM70-NEXT: setp.ne.b32 %p2, %r16, %r6;
|
||||
; SM70-NEXT: mov.b32 %r16, %r6;
|
||||
; SM70-NEXT: @%p2 bra $L__BB13_1;
|
||||
; SM70-NEXT: $L__BB13_3: // %partword.cmpxchg.end
|
||||
; SM70-NEXT: fence.acq_rel.cta;
|
||||
@ -659,7 +645,7 @@ define i8 @seq_cst_seq_cst_i8_global_cta(ptr addrspace(1) %addr, i8 %cmp, i8 %ne
|
||||
; SM70: {
|
||||
; SM70-NEXT: .reg .pred %p<3>;
|
||||
; SM70-NEXT: .reg .b16 %rs<2>;
|
||||
; SM70-NEXT: .reg .b32 %r<18>;
|
||||
; SM70-NEXT: .reg .b32 %r<17>;
|
||||
; SM70-NEXT: .reg .b64 %rd<3>;
|
||||
; SM70-EMPTY:
|
||||
; SM70-NEXT: // %bb.0:
|
||||
@ -675,23 +661,22 @@ define i8 @seq_cst_seq_cst_i8_global_cta(ptr addrspace(1) %addr, i8 %cmp, i8 %ne
|
||||
; SM70-NEXT: shl.b32 %r11, %r10, %r1;
|
||||
; SM70-NEXT: not.b32 %r2, %r11;
|
||||
; SM70-NEXT: cvt.u32.u16 %r12, %rs1;
|
||||
; SM70-NEXT: and.b32 %r13, %r12, 255;
|
||||
; SM70-NEXT: shl.b32 %r3, %r13, %r1;
|
||||
; SM70-NEXT: shl.b32 %r3, %r12, %r1;
|
||||
; SM70-NEXT: shl.b32 %r4, %r7, %r1;
|
||||
; SM70-NEXT: ld.global.b32 %r14, [%rd1];
|
||||
; SM70-NEXT: and.b32 %r17, %r14, %r2;
|
||||
; SM70-NEXT: ld.global.b32 %r13, [%rd1];
|
||||
; SM70-NEXT: and.b32 %r16, %r13, %r2;
|
||||
; SM70-NEXT: $L__BB14_1: // %partword.cmpxchg.loop
|
||||
; SM70-NEXT: // =>This Inner Loop Header: Depth=1
|
||||
; SM70-NEXT: or.b32 %r15, %r17, %r3;
|
||||
; SM70-NEXT: or.b32 %r16, %r17, %r4;
|
||||
; SM70-NEXT: atom.relaxed.cta.global.cas.b32 %r5, [%rd1], %r16, %r15;
|
||||
; SM70-NEXT: setp.eq.b32 %p1, %r5, %r16;
|
||||
; SM70-NEXT: or.b32 %r14, %r16, %r3;
|
||||
; SM70-NEXT: or.b32 %r15, %r16, %r4;
|
||||
; SM70-NEXT: atom.relaxed.cta.global.cas.b32 %r5, [%rd1], %r15, %r14;
|
||||
; SM70-NEXT: setp.eq.b32 %p1, %r5, %r15;
|
||||
; SM70-NEXT: @%p1 bra $L__BB14_3;
|
||||
; SM70-NEXT: // %bb.2: // %partword.cmpxchg.failure
|
||||
; SM70-NEXT: // in Loop: Header=BB14_1 Depth=1
|
||||
; SM70-NEXT: and.b32 %r6, %r5, %r2;
|
||||
; SM70-NEXT: setp.ne.b32 %p2, %r17, %r6;
|
||||
; SM70-NEXT: mov.b32 %r17, %r6;
|
||||
; SM70-NEXT: setp.ne.b32 %p2, %r16, %r6;
|
||||
; SM70-NEXT: mov.b32 %r16, %r6;
|
||||
; SM70-NEXT: @%p2 bra $L__BB14_1;
|
||||
; SM70-NEXT: $L__BB14_3: // %partword.cmpxchg.end
|
||||
; SM70-NEXT: fence.acq_rel.cta;
|
||||
@ -1899,7 +1884,7 @@ define i8 @acq_rel_acquire_i8_global(ptr addrspace(1) %addr, i8 %cmp, i8 %new) {
|
||||
; SM70: {
|
||||
; SM70-NEXT: .reg .pred %p<3>;
|
||||
; SM70-NEXT: .reg .b16 %rs<2>;
|
||||
; SM70-NEXT: .reg .b32 %r<18>;
|
||||
; SM70-NEXT: .reg .b32 %r<17>;
|
||||
; SM70-NEXT: .reg .b64 %rd<3>;
|
||||
; SM70-EMPTY:
|
||||
; SM70-NEXT: // %bb.0:
|
||||
@ -1915,23 +1900,22 @@ define i8 @acq_rel_acquire_i8_global(ptr addrspace(1) %addr, i8 %cmp, i8 %new) {
|
||||
; SM70-NEXT: shl.b32 %r11, %r10, %r1;
|
||||
; SM70-NEXT: not.b32 %r2, %r11;
|
||||
; SM70-NEXT: cvt.u32.u16 %r12, %rs1;
|
||||
; SM70-NEXT: and.b32 %r13, %r12, 255;
|
||||
; SM70-NEXT: shl.b32 %r3, %r13, %r1;
|
||||
; SM70-NEXT: shl.b32 %r3, %r12, %r1;
|
||||
; SM70-NEXT: shl.b32 %r4, %r7, %r1;
|
||||
; SM70-NEXT: ld.global.b32 %r14, [%rd1];
|
||||
; SM70-NEXT: and.b32 %r17, %r14, %r2;
|
||||
; SM70-NEXT: ld.global.b32 %r13, [%rd1];
|
||||
; SM70-NEXT: and.b32 %r16, %r13, %r2;
|
||||
; SM70-NEXT: $L__BB60_1: // %partword.cmpxchg.loop
|
||||
; SM70-NEXT: // =>This Inner Loop Header: Depth=1
|
||||
; SM70-NEXT: or.b32 %r15, %r17, %r3;
|
||||
; SM70-NEXT: or.b32 %r16, %r17, %r4;
|
||||
; SM70-NEXT: atom.relaxed.sys.global.cas.b32 %r5, [%rd1], %r16, %r15;
|
||||
; SM70-NEXT: setp.eq.b32 %p1, %r5, %r16;
|
||||
; SM70-NEXT: or.b32 %r14, %r16, %r3;
|
||||
; SM70-NEXT: or.b32 %r15, %r16, %r4;
|
||||
; SM70-NEXT: atom.relaxed.sys.global.cas.b32 %r5, [%rd1], %r15, %r14;
|
||||
; SM70-NEXT: setp.eq.b32 %p1, %r5, %r15;
|
||||
; SM70-NEXT: @%p1 bra $L__BB60_3;
|
||||
; SM70-NEXT: // %bb.2: // %partword.cmpxchg.failure
|
||||
; SM70-NEXT: // in Loop: Header=BB60_1 Depth=1
|
||||
; SM70-NEXT: and.b32 %r6, %r5, %r2;
|
||||
; SM70-NEXT: setp.ne.b32 %p2, %r17, %r6;
|
||||
; SM70-NEXT: mov.b32 %r17, %r6;
|
||||
; SM70-NEXT: setp.ne.b32 %p2, %r16, %r6;
|
||||
; SM70-NEXT: mov.b32 %r16, %r6;
|
||||
; SM70-NEXT: @%p2 bra $L__BB60_1;
|
||||
; SM70-NEXT: $L__BB60_3: // %partword.cmpxchg.end
|
||||
; SM70-NEXT: fence.acq_rel.sys;
|
||||
@ -1997,7 +1981,7 @@ define i8 @acq_rel_acquire_i8_generic_cta(ptr %addr, i8 %cmp, i8 %new) {
|
||||
; SM70: {
|
||||
; SM70-NEXT: .reg .pred %p<3>;
|
||||
; SM70-NEXT: .reg .b16 %rs<2>;
|
||||
; SM70-NEXT: .reg .b32 %r<18>;
|
||||
; SM70-NEXT: .reg .b32 %r<17>;
|
||||
; SM70-NEXT: .reg .b64 %rd<3>;
|
||||
; SM70-EMPTY:
|
||||
; SM70-NEXT: // %bb.0:
|
||||
@ -2013,23 +1997,22 @@ define i8 @acq_rel_acquire_i8_generic_cta(ptr %addr, i8 %cmp, i8 %new) {
|
||||
; SM70-NEXT: shl.b32 %r11, %r10, %r1;
|
||||
; SM70-NEXT: not.b32 %r2, %r11;
|
||||
; SM70-NEXT: cvt.u32.u16 %r12, %rs1;
|
||||
; SM70-NEXT: and.b32 %r13, %r12, 255;
|
||||
; SM70-NEXT: shl.b32 %r3, %r13, %r1;
|
||||
; SM70-NEXT: shl.b32 %r3, %r12, %r1;
|
||||
; SM70-NEXT: shl.b32 %r4, %r7, %r1;
|
||||
; SM70-NEXT: ld.b32 %r14, [%rd1];
|
||||
; SM70-NEXT: and.b32 %r17, %r14, %r2;
|
||||
; SM70-NEXT: ld.b32 %r13, [%rd1];
|
||||
; SM70-NEXT: and.b32 %r16, %r13, %r2;
|
||||
; SM70-NEXT: $L__BB64_1: // %partword.cmpxchg.loop
|
||||
; SM70-NEXT: // =>This Inner Loop Header: Depth=1
|
||||
; SM70-NEXT: or.b32 %r15, %r17, %r3;
|
||||
; SM70-NEXT: or.b32 %r16, %r17, %r4;
|
||||
; SM70-NEXT: atom.relaxed.cta.cas.b32 %r5, [%rd1], %r16, %r15;
|
||||
; SM70-NEXT: setp.eq.b32 %p1, %r5, %r16;
|
||||
; SM70-NEXT: or.b32 %r14, %r16, %r3;
|
||||
; SM70-NEXT: or.b32 %r15, %r16, %r4;
|
||||
; SM70-NEXT: atom.relaxed.cta.cas.b32 %r5, [%rd1], %r15, %r14;
|
||||
; SM70-NEXT: setp.eq.b32 %p1, %r5, %r15;
|
||||
; SM70-NEXT: @%p1 bra $L__BB64_3;
|
||||
; SM70-NEXT: // %bb.2: // %partword.cmpxchg.failure
|
||||
; SM70-NEXT: // in Loop: Header=BB64_1 Depth=1
|
||||
; SM70-NEXT: and.b32 %r6, %r5, %r2;
|
||||
; SM70-NEXT: setp.ne.b32 %p2, %r17, %r6;
|
||||
; SM70-NEXT: mov.b32 %r17, %r6;
|
||||
; SM70-NEXT: setp.ne.b32 %p2, %r16, %r6;
|
||||
; SM70-NEXT: mov.b32 %r16, %r6;
|
||||
; SM70-NEXT: @%p2 bra $L__BB64_1;
|
||||
; SM70-NEXT: $L__BB64_3: // %partword.cmpxchg.end
|
||||
; SM70-NEXT: fence.acq_rel.cta;
|
||||
@ -2044,7 +2027,7 @@ define i8 @acq_rel_acquire_i8_shared_cta(ptr addrspace(3) %addr, i8 %cmp, i8 %ne
|
||||
; SM70: {
|
||||
; SM70-NEXT: .reg .pred %p<3>;
|
||||
; SM70-NEXT: .reg .b16 %rs<2>;
|
||||
; SM70-NEXT: .reg .b32 %r<18>;
|
||||
; SM70-NEXT: .reg .b32 %r<17>;
|
||||
; SM70-NEXT: .reg .b64 %rd<3>;
|
||||
; SM70-EMPTY:
|
||||
; SM70-NEXT: // %bb.0:
|
||||
@ -2060,23 +2043,22 @@ define i8 @acq_rel_acquire_i8_shared_cta(ptr addrspace(3) %addr, i8 %cmp, i8 %ne
|
||||
; SM70-NEXT: shl.b32 %r11, %r10, %r1;
|
||||
; SM70-NEXT: not.b32 %r2, %r11;
|
||||
; SM70-NEXT: cvt.u32.u16 %r12, %rs1;
|
||||
; SM70-NEXT: and.b32 %r13, %r12, 255;
|
||||
; SM70-NEXT: shl.b32 %r3, %r13, %r1;
|
||||
; SM70-NEXT: shl.b32 %r3, %r12, %r1;
|
||||
; SM70-NEXT: shl.b32 %r4, %r7, %r1;
|
||||
; SM70-NEXT: ld.shared.b32 %r14, [%rd1];
|
||||
; SM70-NEXT: and.b32 %r17, %r14, %r2;
|
||||
; SM70-NEXT: ld.shared.b32 %r13, [%rd1];
|
||||
; SM70-NEXT: and.b32 %r16, %r13, %r2;
|
||||
; SM70-NEXT: $L__BB65_1: // %partword.cmpxchg.loop
|
||||
; SM70-NEXT: // =>This Inner Loop Header: Depth=1
|
||||
; SM70-NEXT: or.b32 %r15, %r17, %r3;
|
||||
; SM70-NEXT: or.b32 %r16, %r17, %r4;
|
||||
; SM70-NEXT: atom.relaxed.cta.shared.cas.b32 %r5, [%rd1], %r16, %r15;
|
||||
; SM70-NEXT: setp.eq.b32 %p1, %r5, %r16;
|
||||
; SM70-NEXT: or.b32 %r14, %r16, %r3;
|
||||
; SM70-NEXT: or.b32 %r15, %r16, %r4;
|
||||
; SM70-NEXT: atom.relaxed.cta.shared.cas.b32 %r5, [%rd1], %r15, %r14;
|
||||
; SM70-NEXT: setp.eq.b32 %p1, %r5, %r15;
|
||||
; SM70-NEXT: @%p1 bra $L__BB65_3;
|
||||
; SM70-NEXT: // %bb.2: // %partword.cmpxchg.failure
|
||||
; SM70-NEXT: // in Loop: Header=BB65_1 Depth=1
|
||||
; SM70-NEXT: and.b32 %r6, %r5, %r2;
|
||||
; SM70-NEXT: setp.ne.b32 %p2, %r17, %r6;
|
||||
; SM70-NEXT: mov.b32 %r17, %r6;
|
||||
; SM70-NEXT: setp.ne.b32 %p2, %r16, %r6;
|
||||
; SM70-NEXT: mov.b32 %r16, %r6;
|
||||
; SM70-NEXT: @%p2 bra $L__BB65_1;
|
||||
; SM70-NEXT: $L__BB65_3: // %partword.cmpxchg.end
|
||||
; SM70-NEXT: fence.acq_rel.cta;
|
||||
|
@ -7,7 +7,7 @@ define i8 @monotonic_monotonic_i8_global_cta(ptr addrspace(1) %addr, i8 %cmp, i8
|
||||
; SM90: {
|
||||
; SM90-NEXT: .reg .pred %p<3>;
|
||||
; SM90-NEXT: .reg .b16 %rs<2>;
|
||||
; SM90-NEXT: .reg .b32 %r<18>;
|
||||
; SM90-NEXT: .reg .b32 %r<17>;
|
||||
; SM90-NEXT: .reg .b64 %rd<3>;
|
||||
; SM90-EMPTY:
|
||||
; SM90-NEXT: // %bb.0:
|
||||
@ -22,23 +22,22 @@ define i8 @monotonic_monotonic_i8_global_cta(ptr addrspace(1) %addr, i8 %cmp, i8
|
||||
; SM90-NEXT: shl.b32 %r11, %r10, %r1;
|
||||
; SM90-NEXT: not.b32 %r2, %r11;
|
||||
; SM90-NEXT: cvt.u32.u16 %r12, %rs1;
|
||||
; SM90-NEXT: and.b32 %r13, %r12, 255;
|
||||
; SM90-NEXT: shl.b32 %r3, %r13, %r1;
|
||||
; SM90-NEXT: shl.b32 %r3, %r12, %r1;
|
||||
; SM90-NEXT: shl.b32 %r4, %r7, %r1;
|
||||
; SM90-NEXT: ld.global.b32 %r14, [%rd1];
|
||||
; SM90-NEXT: and.b32 %r17, %r14, %r2;
|
||||
; SM90-NEXT: ld.global.b32 %r13, [%rd1];
|
||||
; SM90-NEXT: and.b32 %r16, %r13, %r2;
|
||||
; SM90-NEXT: $L__BB0_1: // %partword.cmpxchg.loop
|
||||
; SM90-NEXT: // =>This Inner Loop Header: Depth=1
|
||||
; SM90-NEXT: or.b32 %r15, %r17, %r3;
|
||||
; SM90-NEXT: or.b32 %r16, %r17, %r4;
|
||||
; SM90-NEXT: atom.relaxed.cta.global.cas.b32 %r5, [%rd1], %r16, %r15;
|
||||
; SM90-NEXT: setp.eq.b32 %p1, %r5, %r16;
|
||||
; SM90-NEXT: or.b32 %r14, %r16, %r3;
|
||||
; SM90-NEXT: or.b32 %r15, %r16, %r4;
|
||||
; SM90-NEXT: atom.relaxed.cta.global.cas.b32 %r5, [%rd1], %r15, %r14;
|
||||
; SM90-NEXT: setp.eq.b32 %p1, %r5, %r15;
|
||||
; SM90-NEXT: @%p1 bra $L__BB0_3;
|
||||
; SM90-NEXT: // %bb.2: // %partword.cmpxchg.failure
|
||||
; SM90-NEXT: // in Loop: Header=BB0_1 Depth=1
|
||||
; SM90-NEXT: and.b32 %r6, %r5, %r2;
|
||||
; SM90-NEXT: setp.ne.b32 %p2, %r17, %r6;
|
||||
; SM90-NEXT: mov.b32 %r17, %r6;
|
||||
; SM90-NEXT: setp.ne.b32 %p2, %r16, %r6;
|
||||
; SM90-NEXT: mov.b32 %r16, %r6;
|
||||
; SM90-NEXT: @%p2 bra $L__BB0_1;
|
||||
; SM90-NEXT: $L__BB0_3: // %partword.cmpxchg.end
|
||||
; SM90-NEXT: st.param.b32 [func_retval0], %r12;
|
||||
@ -52,7 +51,7 @@ define i8 @monotonic_acquire_i8_global_cta(ptr addrspace(1) %addr, i8 %cmp, i8 %
|
||||
; SM90: {
|
||||
; SM90-NEXT: .reg .pred %p<3>;
|
||||
; SM90-NEXT: .reg .b16 %rs<2>;
|
||||
; SM90-NEXT: .reg .b32 %r<18>;
|
||||
; SM90-NEXT: .reg .b32 %r<17>;
|
||||
; SM90-NEXT: .reg .b64 %rd<3>;
|
||||
; SM90-EMPTY:
|
||||
; SM90-NEXT: // %bb.0:
|
||||
@ -67,23 +66,22 @@ define i8 @monotonic_acquire_i8_global_cta(ptr addrspace(1) %addr, i8 %cmp, i8 %
|
||||
; SM90-NEXT: shl.b32 %r11, %r10, %r1;
|
||||
; SM90-NEXT: not.b32 %r2, %r11;
|
||||
; SM90-NEXT: cvt.u32.u16 %r12, %rs1;
|
||||
; SM90-NEXT: and.b32 %r13, %r12, 255;
|
||||
; SM90-NEXT: shl.b32 %r3, %r13, %r1;
|
||||
; SM90-NEXT: shl.b32 %r3, %r12, %r1;
|
||||
; SM90-NEXT: shl.b32 %r4, %r7, %r1;
|
||||
; SM90-NEXT: ld.global.b32 %r14, [%rd1];
|
||||
; SM90-NEXT: and.b32 %r17, %r14, %r2;
|
||||
; SM90-NEXT: ld.global.b32 %r13, [%rd1];
|
||||
; SM90-NEXT: and.b32 %r16, %r13, %r2;
|
||||
; SM90-NEXT: $L__BB1_1: // %partword.cmpxchg.loop
|
||||
; SM90-NEXT: // =>This Inner Loop Header: Depth=1
|
||||
; SM90-NEXT: or.b32 %r15, %r17, %r3;
|
||||
; SM90-NEXT: or.b32 %r16, %r17, %r4;
|
||||
; SM90-NEXT: atom.relaxed.cta.global.cas.b32 %r5, [%rd1], %r16, %r15;
|
||||
; SM90-NEXT: setp.eq.b32 %p1, %r5, %r16;
|
||||
; SM90-NEXT: or.b32 %r14, %r16, %r3;
|
||||
; SM90-NEXT: or.b32 %r15, %r16, %r4;
|
||||
; SM90-NEXT: atom.relaxed.cta.global.cas.b32 %r5, [%rd1], %r15, %r14;
|
||||
; SM90-NEXT: setp.eq.b32 %p1, %r5, %r15;
|
||||
; SM90-NEXT: @%p1 bra $L__BB1_3;
|
||||
; SM90-NEXT: // %bb.2: // %partword.cmpxchg.failure
|
||||
; SM90-NEXT: // in Loop: Header=BB1_1 Depth=1
|
||||
; SM90-NEXT: and.b32 %r6, %r5, %r2;
|
||||
; SM90-NEXT: setp.ne.b32 %p2, %r17, %r6;
|
||||
; SM90-NEXT: mov.b32 %r17, %r6;
|
||||
; SM90-NEXT: setp.ne.b32 %p2, %r16, %r6;
|
||||
; SM90-NEXT: mov.b32 %r16, %r6;
|
||||
; SM90-NEXT: @%p2 bra $L__BB1_1;
|
||||
; SM90-NEXT: $L__BB1_3: // %partword.cmpxchg.end
|
||||
; SM90-NEXT: fence.acquire.cta;
|
||||
@ -98,7 +96,7 @@ define i8 @monotonic_seq_cst_i8_global_cta(ptr addrspace(1) %addr, i8 %cmp, i8 %
|
||||
; SM90: {
|
||||
; SM90-NEXT: .reg .pred %p<3>;
|
||||
; SM90-NEXT: .reg .b16 %rs<2>;
|
||||
; SM90-NEXT: .reg .b32 %r<18>;
|
||||
; SM90-NEXT: .reg .b32 %r<17>;
|
||||
; SM90-NEXT: .reg .b64 %rd<3>;
|
||||
; SM90-EMPTY:
|
||||
; SM90-NEXT: // %bb.0:
|
||||
@ -114,23 +112,22 @@ define i8 @monotonic_seq_cst_i8_global_cta(ptr addrspace(1) %addr, i8 %cmp, i8 %
|
||||
; SM90-NEXT: shl.b32 %r11, %r10, %r1;
|
||||
; SM90-NEXT: not.b32 %r2, %r11;
|
||||
; SM90-NEXT: cvt.u32.u16 %r12, %rs1;
|
||||
; SM90-NEXT: and.b32 %r13, %r12, 255;
|
||||
; SM90-NEXT: shl.b32 %r3, %r13, %r1;
|
||||
; SM90-NEXT: shl.b32 %r3, %r12, %r1;
|
||||
; SM90-NEXT: shl.b32 %r4, %r7, %r1;
|
||||
; SM90-NEXT: ld.global.b32 %r14, [%rd1];
|
||||
; SM90-NEXT: and.b32 %r17, %r14, %r2;
|
||||
; SM90-NEXT: ld.global.b32 %r13, [%rd1];
|
||||
; SM90-NEXT: and.b32 %r16, %r13, %r2;
|
||||
; SM90-NEXT: $L__BB2_1: // %partword.cmpxchg.loop
|
||||
; SM90-NEXT: // =>This Inner Loop Header: Depth=1
|
||||
; SM90-NEXT: or.b32 %r15, %r17, %r3;
|
||||
; SM90-NEXT: or.b32 %r16, %r17, %r4;
|
||||
; SM90-NEXT: atom.relaxed.cta.global.cas.b32 %r5, [%rd1], %r16, %r15;
|
||||
; SM90-NEXT: setp.eq.b32 %p1, %r5, %r16;
|
||||
; SM90-NEXT: or.b32 %r14, %r16, %r3;
|
||||
; SM90-NEXT: or.b32 %r15, %r16, %r4;
|
||||
; SM90-NEXT: atom.relaxed.cta.global.cas.b32 %r5, [%rd1], %r15, %r14;
|
||||
; SM90-NEXT: setp.eq.b32 %p1, %r5, %r15;
|
||||
; SM90-NEXT: @%p1 bra $L__BB2_3;
|
||||
; SM90-NEXT: // %bb.2: // %partword.cmpxchg.failure
|
||||
; SM90-NEXT: // in Loop: Header=BB2_1 Depth=1
|
||||
; SM90-NEXT: and.b32 %r6, %r5, %r2;
|
||||
; SM90-NEXT: setp.ne.b32 %p2, %r17, %r6;
|
||||
; SM90-NEXT: mov.b32 %r17, %r6;
|
||||
; SM90-NEXT: setp.ne.b32 %p2, %r16, %r6;
|
||||
; SM90-NEXT: mov.b32 %r16, %r6;
|
||||
; SM90-NEXT: @%p2 bra $L__BB2_1;
|
||||
; SM90-NEXT: $L__BB2_3: // %partword.cmpxchg.end
|
||||
; SM90-NEXT: fence.acquire.cta;
|
||||
@ -145,7 +142,7 @@ define i8 @acquire_monotonic_i8_global_cta(ptr addrspace(1) %addr, i8 %cmp, i8 %
|
||||
; SM90: {
|
||||
; SM90-NEXT: .reg .pred %p<3>;
|
||||
; SM90-NEXT: .reg .b16 %rs<2>;
|
||||
; SM90-NEXT: .reg .b32 %r<18>;
|
||||
; SM90-NEXT: .reg .b32 %r<17>;
|
||||
; SM90-NEXT: .reg .b64 %rd<3>;
|
||||
; SM90-EMPTY:
|
||||
; SM90-NEXT: // %bb.0:
|
||||
@ -160,23 +157,22 @@ define i8 @acquire_monotonic_i8_global_cta(ptr addrspace(1) %addr, i8 %cmp, i8 %
|
||||
; SM90-NEXT: shl.b32 %r11, %r10, %r1;
|
||||
; SM90-NEXT: not.b32 %r2, %r11;
|
||||
; SM90-NEXT: cvt.u32.u16 %r12, %rs1;
|
||||
; SM90-NEXT: and.b32 %r13, %r12, 255;
|
||||
; SM90-NEXT: shl.b32 %r3, %r13, %r1;
|
||||
; SM90-NEXT: shl.b32 %r3, %r12, %r1;
|
||||
; SM90-NEXT: shl.b32 %r4, %r7, %r1;
|
||||
; SM90-NEXT: ld.global.b32 %r14, [%rd1];
|
||||
; SM90-NEXT: and.b32 %r17, %r14, %r2;
|
||||
; SM90-NEXT: ld.global.b32 %r13, [%rd1];
|
||||
; SM90-NEXT: and.b32 %r16, %r13, %r2;
|
||||
; SM90-NEXT: $L__BB3_1: // %partword.cmpxchg.loop
|
||||
; SM90-NEXT: // =>This Inner Loop Header: Depth=1
|
||||
; SM90-NEXT: or.b32 %r15, %r17, %r3;
|
||||
; SM90-NEXT: or.b32 %r16, %r17, %r4;
|
||||
; SM90-NEXT: atom.relaxed.cta.global.cas.b32 %r5, [%rd1], %r16, %r15;
|
||||
; SM90-NEXT: setp.eq.b32 %p1, %r5, %r16;
|
||||
; SM90-NEXT: or.b32 %r14, %r16, %r3;
|
||||
; SM90-NEXT: or.b32 %r15, %r16, %r4;
|
||||
; SM90-NEXT: atom.relaxed.cta.global.cas.b32 %r5, [%rd1], %r15, %r14;
|
||||
; SM90-NEXT: setp.eq.b32 %p1, %r5, %r15;
|
||||
; SM90-NEXT: @%p1 bra $L__BB3_3;
|
||||
; SM90-NEXT: // %bb.2: // %partword.cmpxchg.failure
|
||||
; SM90-NEXT: // in Loop: Header=BB3_1 Depth=1
|
||||
; SM90-NEXT: and.b32 %r6, %r5, %r2;
|
||||
; SM90-NEXT: setp.ne.b32 %p2, %r17, %r6;
|
||||
; SM90-NEXT: mov.b32 %r17, %r6;
|
||||
; SM90-NEXT: setp.ne.b32 %p2, %r16, %r6;
|
||||
; SM90-NEXT: mov.b32 %r16, %r6;
|
||||
; SM90-NEXT: @%p2 bra $L__BB3_1;
|
||||
; SM90-NEXT: $L__BB3_3: // %partword.cmpxchg.end
|
||||
; SM90-NEXT: fence.acquire.cta;
|
||||
@ -191,7 +187,7 @@ define i8 @acquire_acquire_i8_global_cta(ptr addrspace(1) %addr, i8 %cmp, i8 %ne
|
||||
; SM90: {
|
||||
; SM90-NEXT: .reg .pred %p<3>;
|
||||
; SM90-NEXT: .reg .b16 %rs<2>;
|
||||
; SM90-NEXT: .reg .b32 %r<18>;
|
||||
; SM90-NEXT: .reg .b32 %r<17>;
|
||||
; SM90-NEXT: .reg .b64 %rd<3>;
|
||||
; SM90-EMPTY:
|
||||
; SM90-NEXT: // %bb.0:
|
||||
@ -206,23 +202,22 @@ define i8 @acquire_acquire_i8_global_cta(ptr addrspace(1) %addr, i8 %cmp, i8 %ne
|
||||
; SM90-NEXT: shl.b32 %r11, %r10, %r1;
|
||||
; SM90-NEXT: not.b32 %r2, %r11;
|
||||
; SM90-NEXT: cvt.u32.u16 %r12, %rs1;
|
||||
; SM90-NEXT: and.b32 %r13, %r12, 255;
|
||||
; SM90-NEXT: shl.b32 %r3, %r13, %r1;
|
||||
; SM90-NEXT: shl.b32 %r3, %r12, %r1;
|
||||
; SM90-NEXT: shl.b32 %r4, %r7, %r1;
|
||||
; SM90-NEXT: ld.global.b32 %r14, [%rd1];
|
||||
; SM90-NEXT: and.b32 %r17, %r14, %r2;
|
||||
; SM90-NEXT: ld.global.b32 %r13, [%rd1];
|
||||
; SM90-NEXT: and.b32 %r16, %r13, %r2;
|
||||
; SM90-NEXT: $L__BB4_1: // %partword.cmpxchg.loop
|
||||
; SM90-NEXT: // =>This Inner Loop Header: Depth=1
|
||||
; SM90-NEXT: or.b32 %r15, %r17, %r3;
|
||||
; SM90-NEXT: or.b32 %r16, %r17, %r4;
|
||||
; SM90-NEXT: atom.relaxed.cta.global.cas.b32 %r5, [%rd1], %r16, %r15;
|
||||
; SM90-NEXT: setp.eq.b32 %p1, %r5, %r16;
|
||||
; SM90-NEXT: or.b32 %r14, %r16, %r3;
|
||||
; SM90-NEXT: or.b32 %r15, %r16, %r4;
|
||||
; SM90-NEXT: atom.relaxed.cta.global.cas.b32 %r5, [%rd1], %r15, %r14;
|
||||
; SM90-NEXT: setp.eq.b32 %p1, %r5, %r15;
|
||||
; SM90-NEXT: @%p1 bra $L__BB4_3;
|
||||
; SM90-NEXT: // %bb.2: // %partword.cmpxchg.failure
|
||||
; SM90-NEXT: // in Loop: Header=BB4_1 Depth=1
|
||||
; SM90-NEXT: and.b32 %r6, %r5, %r2;
|
||||
; SM90-NEXT: setp.ne.b32 %p2, %r17, %r6;
|
||||
; SM90-NEXT: mov.b32 %r17, %r6;
|
||||
; SM90-NEXT: setp.ne.b32 %p2, %r16, %r6;
|
||||
; SM90-NEXT: mov.b32 %r16, %r6;
|
||||
; SM90-NEXT: @%p2 bra $L__BB4_1;
|
||||
; SM90-NEXT: $L__BB4_3: // %partword.cmpxchg.end
|
||||
; SM90-NEXT: fence.acquire.cta;
|
||||
@ -237,7 +232,7 @@ define i8 @acquire_seq_cst_i8_global_cta(ptr addrspace(1) %addr, i8 %cmp, i8 %ne
|
||||
; SM90: {
|
||||
; SM90-NEXT: .reg .pred %p<3>;
|
||||
; SM90-NEXT: .reg .b16 %rs<2>;
|
||||
; SM90-NEXT: .reg .b32 %r<18>;
|
||||
; SM90-NEXT: .reg .b32 %r<17>;
|
||||
; SM90-NEXT: .reg .b64 %rd<3>;
|
||||
; SM90-EMPTY:
|
||||
; SM90-NEXT: // %bb.0:
|
||||
@ -253,23 +248,22 @@ define i8 @acquire_seq_cst_i8_global_cta(ptr addrspace(1) %addr, i8 %cmp, i8 %ne
|
||||
; SM90-NEXT: shl.b32 %r11, %r10, %r1;
|
||||
; SM90-NEXT: not.b32 %r2, %r11;
|
||||
; SM90-NEXT: cvt.u32.u16 %r12, %rs1;
|
||||
; SM90-NEXT: and.b32 %r13, %r12, 255;
|
||||
; SM90-NEXT: shl.b32 %r3, %r13, %r1;
|
||||
; SM90-NEXT: shl.b32 %r3, %r12, %r1;
|
||||
; SM90-NEXT: shl.b32 %r4, %r7, %r1;
|
||||
; SM90-NEXT: ld.global.b32 %r14, [%rd1];
|
||||
; SM90-NEXT: and.b32 %r17, %r14, %r2;
|
||||
; SM90-NEXT: ld.global.b32 %r13, [%rd1];
|
||||
; SM90-NEXT: and.b32 %r16, %r13, %r2;
|
||||
; SM90-NEXT: $L__BB5_1: // %partword.cmpxchg.loop
|
||||
; SM90-NEXT: // =>This Inner Loop Header: Depth=1
|
||||
; SM90-NEXT: or.b32 %r15, %r17, %r3;
|
||||
; SM90-NEXT: or.b32 %r16, %r17, %r4;
|
||||
; SM90-NEXT: atom.relaxed.cta.global.cas.b32 %r5, [%rd1], %r16, %r15;
|
||||
; SM90-NEXT: setp.eq.b32 %p1, %r5, %r16;
|
||||
; SM90-NEXT: or.b32 %r14, %r16, %r3;
|
||||
; SM90-NEXT: or.b32 %r15, %r16, %r4;
|
||||
; SM90-NEXT: atom.relaxed.cta.global.cas.b32 %r5, [%rd1], %r15, %r14;
|
||||
; SM90-NEXT: setp.eq.b32 %p1, %r5, %r15;
|
||||
; SM90-NEXT: @%p1 bra $L__BB5_3;
|
||||
; SM90-NEXT: // %bb.2: // %partword.cmpxchg.failure
|
||||
; SM90-NEXT: // in Loop: Header=BB5_1 Depth=1
|
||||
; SM90-NEXT: and.b32 %r6, %r5, %r2;
|
||||
; SM90-NEXT: setp.ne.b32 %p2, %r17, %r6;
|
||||
; SM90-NEXT: mov.b32 %r17, %r6;
|
||||
; SM90-NEXT: setp.ne.b32 %p2, %r16, %r6;
|
||||
; SM90-NEXT: mov.b32 %r16, %r6;
|
||||
; SM90-NEXT: @%p2 bra $L__BB5_1;
|
||||
; SM90-NEXT: $L__BB5_3: // %partword.cmpxchg.end
|
||||
; SM90-NEXT: fence.acquire.cta;
|
||||
@ -284,7 +278,7 @@ define i8 @release_monotonic_i8_global_cta(ptr addrspace(1) %addr, i8 %cmp, i8 %
|
||||
; SM90: {
|
||||
; SM90-NEXT: .reg .pred %p<3>;
|
||||
; SM90-NEXT: .reg .b16 %rs<2>;
|
||||
; SM90-NEXT: .reg .b32 %r<18>;
|
||||
; SM90-NEXT: .reg .b32 %r<17>;
|
||||
; SM90-NEXT: .reg .b64 %rd<3>;
|
||||
; SM90-EMPTY:
|
||||
; SM90-NEXT: // %bb.0:
|
||||
@ -300,23 +294,22 @@ define i8 @release_monotonic_i8_global_cta(ptr addrspace(1) %addr, i8 %cmp, i8 %
|
||||
; SM90-NEXT: shl.b32 %r11, %r10, %r1;
|
||||
; SM90-NEXT: not.b32 %r2, %r11;
|
||||
; SM90-NEXT: cvt.u32.u16 %r12, %rs1;
|
||||
; SM90-NEXT: and.b32 %r13, %r12, 255;
|
||||
; SM90-NEXT: shl.b32 %r3, %r13, %r1;
|
||||
; SM90-NEXT: shl.b32 %r3, %r12, %r1;
|
||||
; SM90-NEXT: shl.b32 %r4, %r7, %r1;
|
||||
; SM90-NEXT: ld.global.b32 %r14, [%rd1];
|
||||
; SM90-NEXT: and.b32 %r17, %r14, %r2;
|
||||
; SM90-NEXT: ld.global.b32 %r13, [%rd1];
|
||||
; SM90-NEXT: and.b32 %r16, %r13, %r2;
|
||||
; SM90-NEXT: $L__BB6_1: // %partword.cmpxchg.loop
|
||||
; SM90-NEXT: // =>This Inner Loop Header: Depth=1
|
||||
; SM90-NEXT: or.b32 %r15, %r17, %r3;
|
||||
; SM90-NEXT: or.b32 %r16, %r17, %r4;
|
||||
; SM90-NEXT: atom.relaxed.cta.global.cas.b32 %r5, [%rd1], %r16, %r15;
|
||||
; SM90-NEXT: setp.eq.b32 %p1, %r5, %r16;
|
||||
; SM90-NEXT: or.b32 %r14, %r16, %r3;
|
||||
; SM90-NEXT: or.b32 %r15, %r16, %r4;
|
||||
; SM90-NEXT: atom.relaxed.cta.global.cas.b32 %r5, [%rd1], %r15, %r14;
|
||||
; SM90-NEXT: setp.eq.b32 %p1, %r5, %r15;
|
||||
; SM90-NEXT: @%p1 bra $L__BB6_3;
|
||||
; SM90-NEXT: // %bb.2: // %partword.cmpxchg.failure
|
||||
; SM90-NEXT: // in Loop: Header=BB6_1 Depth=1
|
||||
; SM90-NEXT: and.b32 %r6, %r5, %r2;
|
||||
; SM90-NEXT: setp.ne.b32 %p2, %r17, %r6;
|
||||
; SM90-NEXT: mov.b32 %r17, %r6;
|
||||
; SM90-NEXT: setp.ne.b32 %p2, %r16, %r6;
|
||||
; SM90-NEXT: mov.b32 %r16, %r6;
|
||||
; SM90-NEXT: @%p2 bra $L__BB6_1;
|
||||
; SM90-NEXT: $L__BB6_3: // %partword.cmpxchg.end
|
||||
; SM90-NEXT: st.param.b32 [func_retval0], %r12;
|
||||
@ -330,7 +323,7 @@ define i8 @release_acquire_i8_global_cta(ptr addrspace(1) %addr, i8 %cmp, i8 %ne
|
||||
; SM90: {
|
||||
; SM90-NEXT: .reg .pred %p<3>;
|
||||
; SM90-NEXT: .reg .b16 %rs<2>;
|
||||
; SM90-NEXT: .reg .b32 %r<18>;
|
||||
; SM90-NEXT: .reg .b32 %r<17>;
|
||||
; SM90-NEXT: .reg .b64 %rd<3>;
|
||||
; SM90-EMPTY:
|
||||
; SM90-NEXT: // %bb.0:
|
||||
@ -346,23 +339,22 @@ define i8 @release_acquire_i8_global_cta(ptr addrspace(1) %addr, i8 %cmp, i8 %ne
|
||||
; SM90-NEXT: shl.b32 %r11, %r10, %r1;
|
||||
; SM90-NEXT: not.b32 %r2, %r11;
|
||||
; SM90-NEXT: cvt.u32.u16 %r12, %rs1;
|
||||
; SM90-NEXT: and.b32 %r13, %r12, 255;
|
||||
; SM90-NEXT: shl.b32 %r3, %r13, %r1;
|
||||
; SM90-NEXT: shl.b32 %r3, %r12, %r1;
|
||||
; SM90-NEXT: shl.b32 %r4, %r7, %r1;
|
||||
; SM90-NEXT: ld.global.b32 %r14, [%rd1];
|
||||
; SM90-NEXT: and.b32 %r17, %r14, %r2;
|
||||
; SM90-NEXT: ld.global.b32 %r13, [%rd1];
|
||||
; SM90-NEXT: and.b32 %r16, %r13, %r2;
|
||||
; SM90-NEXT: $L__BB7_1: // %partword.cmpxchg.loop
|
||||
; SM90-NEXT: // =>This Inner Loop Header: Depth=1
|
||||
; SM90-NEXT: or.b32 %r15, %r17, %r3;
|
||||
; SM90-NEXT: or.b32 %r16, %r17, %r4;
|
||||
; SM90-NEXT: atom.relaxed.cta.global.cas.b32 %r5, [%rd1], %r16, %r15;
|
||||
; SM90-NEXT: setp.eq.b32 %p1, %r5, %r16;
|
||||
; SM90-NEXT: or.b32 %r14, %r16, %r3;
|
||||
; SM90-NEXT: or.b32 %r15, %r16, %r4;
|
||||
; SM90-NEXT: atom.relaxed.cta.global.cas.b32 %r5, [%rd1], %r15, %r14;
|
||||
; SM90-NEXT: setp.eq.b32 %p1, %r5, %r15;
|
||||
; SM90-NEXT: @%p1 bra $L__BB7_3;
|
||||
; SM90-NEXT: // %bb.2: // %partword.cmpxchg.failure
|
||||
; SM90-NEXT: // in Loop: Header=BB7_1 Depth=1
|
||||
; SM90-NEXT: and.b32 %r6, %r5, %r2;
|
||||
; SM90-NEXT: setp.ne.b32 %p2, %r17, %r6;
|
||||
; SM90-NEXT: mov.b32 %r17, %r6;
|
||||
; SM90-NEXT: setp.ne.b32 %p2, %r16, %r6;
|
||||
; SM90-NEXT: mov.b32 %r16, %r6;
|
||||
; SM90-NEXT: @%p2 bra $L__BB7_1;
|
||||
; SM90-NEXT: $L__BB7_3: // %partword.cmpxchg.end
|
||||
; SM90-NEXT: fence.acquire.cta;
|
||||
@ -377,7 +369,7 @@ define i8 @release_seq_cst_i8_global_cta(ptr addrspace(1) %addr, i8 %cmp, i8 %ne
|
||||
; SM90: {
|
||||
; SM90-NEXT: .reg .pred %p<3>;
|
||||
; SM90-NEXT: .reg .b16 %rs<2>;
|
||||
; SM90-NEXT: .reg .b32 %r<18>;
|
||||
; SM90-NEXT: .reg .b32 %r<17>;
|
||||
; SM90-NEXT: .reg .b64 %rd<3>;
|
||||
; SM90-EMPTY:
|
||||
; SM90-NEXT: // %bb.0:
|
||||
@ -393,23 +385,22 @@ define i8 @release_seq_cst_i8_global_cta(ptr addrspace(1) %addr, i8 %cmp, i8 %ne
|
||||
; SM90-NEXT: shl.b32 %r11, %r10, %r1;
|
||||
; SM90-NEXT: not.b32 %r2, %r11;
|
||||
; SM90-NEXT: cvt.u32.u16 %r12, %rs1;
|
||||
; SM90-NEXT: and.b32 %r13, %r12, 255;
|
||||
; SM90-NEXT: shl.b32 %r3, %r13, %r1;
|
||||
; SM90-NEXT: shl.b32 %r3, %r12, %r1;
|
||||
; SM90-NEXT: shl.b32 %r4, %r7, %r1;
|
||||
; SM90-NEXT: ld.global.b32 %r14, [%rd1];
|
||||
; SM90-NEXT: and.b32 %r17, %r14, %r2;
|
||||
; SM90-NEXT: ld.global.b32 %r13, [%rd1];
|
||||
; SM90-NEXT: and.b32 %r16, %r13, %r2;
|
||||
; SM90-NEXT: $L__BB8_1: // %partword.cmpxchg.loop
|
||||
; SM90-NEXT: // =>This Inner Loop Header: Depth=1
|
||||
; SM90-NEXT: or.b32 %r15, %r17, %r3;
|
||||
; SM90-NEXT: or.b32 %r16, %r17, %r4;
|
||||
; SM90-NEXT: atom.relaxed.cta.global.cas.b32 %r5, [%rd1], %r16, %r15;
|
||||
; SM90-NEXT: setp.eq.b32 %p1, %r5, %r16;
|
||||
; SM90-NEXT: or.b32 %r14, %r16, %r3;
|
||||
; SM90-NEXT: or.b32 %r15, %r16, %r4;
|
||||
; SM90-NEXT: atom.relaxed.cta.global.cas.b32 %r5, [%rd1], %r15, %r14;
|
||||
; SM90-NEXT: setp.eq.b32 %p1, %r5, %r15;
|
||||
; SM90-NEXT: @%p1 bra $L__BB8_3;
|
||||
; SM90-NEXT: // %bb.2: // %partword.cmpxchg.failure
|
||||
; SM90-NEXT: // in Loop: Header=BB8_1 Depth=1
|
||||
; SM90-NEXT: and.b32 %r6, %r5, %r2;
|
||||
; SM90-NEXT: setp.ne.b32 %p2, %r17, %r6;
|
||||
; SM90-NEXT: mov.b32 %r17, %r6;
|
||||
; SM90-NEXT: setp.ne.b32 %p2, %r16, %r6;
|
||||
; SM90-NEXT: mov.b32 %r16, %r6;
|
||||
; SM90-NEXT: @%p2 bra $L__BB8_1;
|
||||
; SM90-NEXT: $L__BB8_3: // %partword.cmpxchg.end
|
||||
; SM90-NEXT: fence.acquire.cta;
|
||||
@ -424,7 +415,7 @@ define i8 @acq_rel_monotonic_i8_global_cta(ptr addrspace(1) %addr, i8 %cmp, i8 %
|
||||
; SM90: {
|
||||
; SM90-NEXT: .reg .pred %p<3>;
|
||||
; SM90-NEXT: .reg .b16 %rs<2>;
|
||||
; SM90-NEXT: .reg .b32 %r<18>;
|
||||
; SM90-NEXT: .reg .b32 %r<17>;
|
||||
; SM90-NEXT: .reg .b64 %rd<3>;
|
||||
; SM90-EMPTY:
|
||||
; SM90-NEXT: // %bb.0:
|
||||
@ -440,23 +431,22 @@ define i8 @acq_rel_monotonic_i8_global_cta(ptr addrspace(1) %addr, i8 %cmp, i8 %
|
||||
; SM90-NEXT: shl.b32 %r11, %r10, %r1;
|
||||
; SM90-NEXT: not.b32 %r2, %r11;
|
||||
; SM90-NEXT: cvt.u32.u16 %r12, %rs1;
|
||||
; SM90-NEXT: and.b32 %r13, %r12, 255;
|
||||
; SM90-NEXT: shl.b32 %r3, %r13, %r1;
|
||||
; SM90-NEXT: shl.b32 %r3, %r12, %r1;
|
||||
; SM90-NEXT: shl.b32 %r4, %r7, %r1;
|
||||
; SM90-NEXT: ld.global.b32 %r14, [%rd1];
|
||||
; SM90-NEXT: and.b32 %r17, %r14, %r2;
|
||||
; SM90-NEXT: ld.global.b32 %r13, [%rd1];
|
||||
; SM90-NEXT: and.b32 %r16, %r13, %r2;
|
||||
; SM90-NEXT: $L__BB9_1: // %partword.cmpxchg.loop
|
||||
; SM90-NEXT: // =>This Inner Loop Header: Depth=1
|
||||
; SM90-NEXT: or.b32 %r15, %r17, %r3;
|
||||
; SM90-NEXT: or.b32 %r16, %r17, %r4;
|
||||
; SM90-NEXT: atom.relaxed.cta.global.cas.b32 %r5, [%rd1], %r16, %r15;
|
||||
; SM90-NEXT: setp.eq.b32 %p1, %r5, %r16;
|
||||
; SM90-NEXT: or.b32 %r14, %r16, %r3;
|
||||
; SM90-NEXT: or.b32 %r15, %r16, %r4;
|
||||
; SM90-NEXT: atom.relaxed.cta.global.cas.b32 %r5, [%rd1], %r15, %r14;
|
||||
; SM90-NEXT: setp.eq.b32 %p1, %r5, %r15;
|
||||
; SM90-NEXT: @%p1 bra $L__BB9_3;
|
||||
; SM90-NEXT: // %bb.2: // %partword.cmpxchg.failure
|
||||
; SM90-NEXT: // in Loop: Header=BB9_1 Depth=1
|
||||
; SM90-NEXT: and.b32 %r6, %r5, %r2;
|
||||
; SM90-NEXT: setp.ne.b32 %p2, %r17, %r6;
|
||||
; SM90-NEXT: mov.b32 %r17, %r6;
|
||||
; SM90-NEXT: setp.ne.b32 %p2, %r16, %r6;
|
||||
; SM90-NEXT: mov.b32 %r16, %r6;
|
||||
; SM90-NEXT: @%p2 bra $L__BB9_1;
|
||||
; SM90-NEXT: $L__BB9_3: // %partword.cmpxchg.end
|
||||
; SM90-NEXT: fence.acquire.cta;
|
||||
@ -471,7 +461,7 @@ define i8 @acq_rel_acquire_i8_global_cta(ptr addrspace(1) %addr, i8 %cmp, i8 %ne
|
||||
; SM90: {
|
||||
; SM90-NEXT: .reg .pred %p<3>;
|
||||
; SM90-NEXT: .reg .b16 %rs<2>;
|
||||
; SM90-NEXT: .reg .b32 %r<18>;
|
||||
; SM90-NEXT: .reg .b32 %r<17>;
|
||||
; SM90-NEXT: .reg .b64 %rd<3>;
|
||||
; SM90-EMPTY:
|
||||
; SM90-NEXT: // %bb.0:
|
||||
@ -487,23 +477,22 @@ define i8 @acq_rel_acquire_i8_global_cta(ptr addrspace(1) %addr, i8 %cmp, i8 %ne
|
||||
; SM90-NEXT: shl.b32 %r11, %r10, %r1;
|
||||
; SM90-NEXT: not.b32 %r2, %r11;
|
||||
; SM90-NEXT: cvt.u32.u16 %r12, %rs1;
|
||||
; SM90-NEXT: and.b32 %r13, %r12, 255;
|
||||
; SM90-NEXT: shl.b32 %r3, %r13, %r1;
|
||||
; SM90-NEXT: shl.b32 %r3, %r12, %r1;
|
||||
; SM90-NEXT: shl.b32 %r4, %r7, %r1;
|
||||
; SM90-NEXT: ld.global.b32 %r14, [%rd1];
|
||||
; SM90-NEXT: and.b32 %r17, %r14, %r2;
|
||||
; SM90-NEXT: ld.global.b32 %r13, [%rd1];
|
||||
; SM90-NEXT: and.b32 %r16, %r13, %r2;
|
||||
; SM90-NEXT: $L__BB10_1: // %partword.cmpxchg.loop
|
||||
; SM90-NEXT: // =>This Inner Loop Header: Depth=1
|
||||
; SM90-NEXT: or.b32 %r15, %r17, %r3;
|
||||
; SM90-NEXT: or.b32 %r16, %r17, %r4;
|
||||
; SM90-NEXT: atom.relaxed.cta.global.cas.b32 %r5, [%rd1], %r16, %r15;
|
||||
; SM90-NEXT: setp.eq.b32 %p1, %r5, %r16;
|
||||
; SM90-NEXT: or.b32 %r14, %r16, %r3;
|
||||
; SM90-NEXT: or.b32 %r15, %r16, %r4;
|
||||
; SM90-NEXT: atom.relaxed.cta.global.cas.b32 %r5, [%rd1], %r15, %r14;
|
||||
; SM90-NEXT: setp.eq.b32 %p1, %r5, %r15;
|
||||
; SM90-NEXT: @%p1 bra $L__BB10_3;
|
||||
; SM90-NEXT: // %bb.2: // %partword.cmpxchg.failure
|
||||
; SM90-NEXT: // in Loop: Header=BB10_1 Depth=1
|
||||
; SM90-NEXT: and.b32 %r6, %r5, %r2;
|
||||
; SM90-NEXT: setp.ne.b32 %p2, %r17, %r6;
|
||||
; SM90-NEXT: mov.b32 %r17, %r6;
|
||||
; SM90-NEXT: setp.ne.b32 %p2, %r16, %r6;
|
||||
; SM90-NEXT: mov.b32 %r16, %r6;
|
||||
; SM90-NEXT: @%p2 bra $L__BB10_1;
|
||||
; SM90-NEXT: $L__BB10_3: // %partword.cmpxchg.end
|
||||
; SM90-NEXT: fence.acquire.cta;
|
||||
@ -518,7 +507,7 @@ define i8 @acq_rel_seq_cst_i8_global_cta(ptr addrspace(1) %addr, i8 %cmp, i8 %ne
|
||||
; SM90: {
|
||||
; SM90-NEXT: .reg .pred %p<3>;
|
||||
; SM90-NEXT: .reg .b16 %rs<2>;
|
||||
; SM90-NEXT: .reg .b32 %r<18>;
|
||||
; SM90-NEXT: .reg .b32 %r<17>;
|
||||
; SM90-NEXT: .reg .b64 %rd<3>;
|
||||
; SM90-EMPTY:
|
||||
; SM90-NEXT: // %bb.0:
|
||||
@ -534,23 +523,22 @@ define i8 @acq_rel_seq_cst_i8_global_cta(ptr addrspace(1) %addr, i8 %cmp, i8 %ne
|
||||
; SM90-NEXT: shl.b32 %r11, %r10, %r1;
|
||||
; SM90-NEXT: not.b32 %r2, %r11;
|
||||
; SM90-NEXT: cvt.u32.u16 %r12, %rs1;
|
||||
; SM90-NEXT: and.b32 %r13, %r12, 255;
|
||||
; SM90-NEXT: shl.b32 %r3, %r13, %r1;
|
||||
; SM90-NEXT: shl.b32 %r3, %r12, %r1;
|
||||
; SM90-NEXT: shl.b32 %r4, %r7, %r1;
|
||||
; SM90-NEXT: ld.global.b32 %r14, [%rd1];
|
||||
; SM90-NEXT: and.b32 %r17, %r14, %r2;
|
||||
; SM90-NEXT: ld.global.b32 %r13, [%rd1];
|
||||
; SM90-NEXT: and.b32 %r16, %r13, %r2;
|
||||
; SM90-NEXT: $L__BB11_1: // %partword.cmpxchg.loop
|
||||
; SM90-NEXT: // =>This Inner Loop Header: Depth=1
|
||||
; SM90-NEXT: or.b32 %r15, %r17, %r3;
|
||||
; SM90-NEXT: or.b32 %r16, %r17, %r4;
|
||||
; SM90-NEXT: atom.relaxed.cta.global.cas.b32 %r5, [%rd1], %r16, %r15;
|
||||
; SM90-NEXT: setp.eq.b32 %p1, %r5, %r16;
|
||||
; SM90-NEXT: or.b32 %r14, %r16, %r3;
|
||||
; SM90-NEXT: or.b32 %r15, %r16, %r4;
|
||||
; SM90-NEXT: atom.relaxed.cta.global.cas.b32 %r5, [%rd1], %r15, %r14;
|
||||
; SM90-NEXT: setp.eq.b32 %p1, %r5, %r15;
|
||||
; SM90-NEXT: @%p1 bra $L__BB11_3;
|
||||
; SM90-NEXT: // %bb.2: // %partword.cmpxchg.failure
|
||||
; SM90-NEXT: // in Loop: Header=BB11_1 Depth=1
|
||||
; SM90-NEXT: and.b32 %r6, %r5, %r2;
|
||||
; SM90-NEXT: setp.ne.b32 %p2, %r17, %r6;
|
||||
; SM90-NEXT: mov.b32 %r17, %r6;
|
||||
; SM90-NEXT: setp.ne.b32 %p2, %r16, %r6;
|
||||
; SM90-NEXT: mov.b32 %r16, %r6;
|
||||
; SM90-NEXT: @%p2 bra $L__BB11_1;
|
||||
; SM90-NEXT: $L__BB11_3: // %partword.cmpxchg.end
|
||||
; SM90-NEXT: fence.acquire.cta;
|
||||
@ -565,7 +553,7 @@ define i8 @seq_cst_monotonic_i8_global_cta(ptr addrspace(1) %addr, i8 %cmp, i8 %
|
||||
; SM90: {
|
||||
; SM90-NEXT: .reg .pred %p<3>;
|
||||
; SM90-NEXT: .reg .b16 %rs<2>;
|
||||
; SM90-NEXT: .reg .b32 %r<18>;
|
||||
; SM90-NEXT: .reg .b32 %r<17>;
|
||||
; SM90-NEXT: .reg .b64 %rd<3>;
|
||||
; SM90-EMPTY:
|
||||
; SM90-NEXT: // %bb.0:
|
||||
@ -581,23 +569,22 @@ define i8 @seq_cst_monotonic_i8_global_cta(ptr addrspace(1) %addr, i8 %cmp, i8 %
|
||||
; SM90-NEXT: shl.b32 %r11, %r10, %r1;
|
||||
; SM90-NEXT: not.b32 %r2, %r11;
|
||||
; SM90-NEXT: cvt.u32.u16 %r12, %rs1;
|
||||
; SM90-NEXT: and.b32 %r13, %r12, 255;
|
||||
; SM90-NEXT: shl.b32 %r3, %r13, %r1;
|
||||
; SM90-NEXT: shl.b32 %r3, %r12, %r1;
|
||||
; SM90-NEXT: shl.b32 %r4, %r7, %r1;
|
||||
; SM90-NEXT: ld.global.b32 %r14, [%rd1];
|
||||
; SM90-NEXT: and.b32 %r17, %r14, %r2;
|
||||
; SM90-NEXT: ld.global.b32 %r13, [%rd1];
|
||||
; SM90-NEXT: and.b32 %r16, %r13, %r2;
|
||||
; SM90-NEXT: $L__BB12_1: // %partword.cmpxchg.loop
|
||||
; SM90-NEXT: // =>This Inner Loop Header: Depth=1
|
||||
; SM90-NEXT: or.b32 %r15, %r17, %r3;
|
||||
; SM90-NEXT: or.b32 %r16, %r17, %r4;
|
||||
; SM90-NEXT: atom.relaxed.cta.global.cas.b32 %r5, [%rd1], %r16, %r15;
|
||||
; SM90-NEXT: setp.eq.b32 %p1, %r5, %r16;
|
||||
; SM90-NEXT: or.b32 %r14, %r16, %r3;
|
||||
; SM90-NEXT: or.b32 %r15, %r16, %r4;
|
||||
; SM90-NEXT: atom.relaxed.cta.global.cas.b32 %r5, [%rd1], %r15, %r14;
|
||||
; SM90-NEXT: setp.eq.b32 %p1, %r5, %r15;
|
||||
; SM90-NEXT: @%p1 bra $L__BB12_3;
|
||||
; SM90-NEXT: // %bb.2: // %partword.cmpxchg.failure
|
||||
; SM90-NEXT: // in Loop: Header=BB12_1 Depth=1
|
||||
; SM90-NEXT: and.b32 %r6, %r5, %r2;
|
||||
; SM90-NEXT: setp.ne.b32 %p2, %r17, %r6;
|
||||
; SM90-NEXT: mov.b32 %r17, %r6;
|
||||
; SM90-NEXT: setp.ne.b32 %p2, %r16, %r6;
|
||||
; SM90-NEXT: mov.b32 %r16, %r6;
|
||||
; SM90-NEXT: @%p2 bra $L__BB12_1;
|
||||
; SM90-NEXT: $L__BB12_3: // %partword.cmpxchg.end
|
||||
; SM90-NEXT: fence.acquire.cta;
|
||||
@ -612,7 +599,7 @@ define i8 @seq_cst_acquire_i8_global_cta(ptr addrspace(1) %addr, i8 %cmp, i8 %ne
|
||||
; SM90: {
|
||||
; SM90-NEXT: .reg .pred %p<3>;
|
||||
; SM90-NEXT: .reg .b16 %rs<2>;
|
||||
; SM90-NEXT: .reg .b32 %r<18>;
|
||||
; SM90-NEXT: .reg .b32 %r<17>;
|
||||
; SM90-NEXT: .reg .b64 %rd<3>;
|
||||
; SM90-EMPTY:
|
||||
; SM90-NEXT: // %bb.0:
|
||||
@ -628,23 +615,22 @@ define i8 @seq_cst_acquire_i8_global_cta(ptr addrspace(1) %addr, i8 %cmp, i8 %ne
|
||||
; SM90-NEXT: shl.b32 %r11, %r10, %r1;
|
||||
; SM90-NEXT: not.b32 %r2, %r11;
|
||||
; SM90-NEXT: cvt.u32.u16 %r12, %rs1;
|
||||
; SM90-NEXT: and.b32 %r13, %r12, 255;
|
||||
; SM90-NEXT: shl.b32 %r3, %r13, %r1;
|
||||
; SM90-NEXT: shl.b32 %r3, %r12, %r1;
|
||||
; SM90-NEXT: shl.b32 %r4, %r7, %r1;
|
||||
; SM90-NEXT: ld.global.b32 %r14, [%rd1];
|
||||
; SM90-NEXT: and.b32 %r17, %r14, %r2;
|
||||
; SM90-NEXT: ld.global.b32 %r13, [%rd1];
|
||||
; SM90-NEXT: and.b32 %r16, %r13, %r2;
|
||||
; SM90-NEXT: $L__BB13_1: // %partword.cmpxchg.loop
|
||||
; SM90-NEXT: // =>This Inner Loop Header: Depth=1
|
||||
; SM90-NEXT: or.b32 %r15, %r17, %r3;
|
||||
; SM90-NEXT: or.b32 %r16, %r17, %r4;
|
||||
; SM90-NEXT: atom.relaxed.cta.global.cas.b32 %r5, [%rd1], %r16, %r15;
|
||||
; SM90-NEXT: setp.eq.b32 %p1, %r5, %r16;
|
||||
; SM90-NEXT: or.b32 %r14, %r16, %r3;
|
||||
; SM90-NEXT: or.b32 %r15, %r16, %r4;
|
||||
; SM90-NEXT: atom.relaxed.cta.global.cas.b32 %r5, [%rd1], %r15, %r14;
|
||||
; SM90-NEXT: setp.eq.b32 %p1, %r5, %r15;
|
||||
; SM90-NEXT: @%p1 bra $L__BB13_3;
|
||||
; SM90-NEXT: // %bb.2: // %partword.cmpxchg.failure
|
||||
; SM90-NEXT: // in Loop: Header=BB13_1 Depth=1
|
||||
; SM90-NEXT: and.b32 %r6, %r5, %r2;
|
||||
; SM90-NEXT: setp.ne.b32 %p2, %r17, %r6;
|
||||
; SM90-NEXT: mov.b32 %r17, %r6;
|
||||
; SM90-NEXT: setp.ne.b32 %p2, %r16, %r6;
|
||||
; SM90-NEXT: mov.b32 %r16, %r6;
|
||||
; SM90-NEXT: @%p2 bra $L__BB13_1;
|
||||
; SM90-NEXT: $L__BB13_3: // %partword.cmpxchg.end
|
||||
; SM90-NEXT: fence.acquire.cta;
|
||||
@ -659,7 +645,7 @@ define i8 @seq_cst_seq_cst_i8_global_cta(ptr addrspace(1) %addr, i8 %cmp, i8 %ne
|
||||
; SM90: {
|
||||
; SM90-NEXT: .reg .pred %p<3>;
|
||||
; SM90-NEXT: .reg .b16 %rs<2>;
|
||||
; SM90-NEXT: .reg .b32 %r<18>;
|
||||
; SM90-NEXT: .reg .b32 %r<17>;
|
||||
; SM90-NEXT: .reg .b64 %rd<3>;
|
||||
; SM90-EMPTY:
|
||||
; SM90-NEXT: // %bb.0:
|
||||
@ -675,23 +661,22 @@ define i8 @seq_cst_seq_cst_i8_global_cta(ptr addrspace(1) %addr, i8 %cmp, i8 %ne
|
||||
; SM90-NEXT: shl.b32 %r11, %r10, %r1;
|
||||
; SM90-NEXT: not.b32 %r2, %r11;
|
||||
; SM90-NEXT: cvt.u32.u16 %r12, %rs1;
|
||||
; SM90-NEXT: and.b32 %r13, %r12, 255;
|
||||
; SM90-NEXT: shl.b32 %r3, %r13, %r1;
|
||||
; SM90-NEXT: shl.b32 %r3, %r12, %r1;
|
||||
; SM90-NEXT: shl.b32 %r4, %r7, %r1;
|
||||
; SM90-NEXT: ld.global.b32 %r14, [%rd1];
|
||||
; SM90-NEXT: and.b32 %r17, %r14, %r2;
|
||||
; SM90-NEXT: ld.global.b32 %r13, [%rd1];
|
||||
; SM90-NEXT: and.b32 %r16, %r13, %r2;
|
||||
; SM90-NEXT: $L__BB14_1: // %partword.cmpxchg.loop
|
||||
; SM90-NEXT: // =>This Inner Loop Header: Depth=1
|
||||
; SM90-NEXT: or.b32 %r15, %r17, %r3;
|
||||
; SM90-NEXT: or.b32 %r16, %r17, %r4;
|
||||
; SM90-NEXT: atom.relaxed.cta.global.cas.b32 %r5, [%rd1], %r16, %r15;
|
||||
; SM90-NEXT: setp.eq.b32 %p1, %r5, %r16;
|
||||
; SM90-NEXT: or.b32 %r14, %r16, %r3;
|
||||
; SM90-NEXT: or.b32 %r15, %r16, %r4;
|
||||
; SM90-NEXT: atom.relaxed.cta.global.cas.b32 %r5, [%rd1], %r15, %r14;
|
||||
; SM90-NEXT: setp.eq.b32 %p1, %r5, %r15;
|
||||
; SM90-NEXT: @%p1 bra $L__BB14_3;
|
||||
; SM90-NEXT: // %bb.2: // %partword.cmpxchg.failure
|
||||
; SM90-NEXT: // in Loop: Header=BB14_1 Depth=1
|
||||
; SM90-NEXT: and.b32 %r6, %r5, %r2;
|
||||
; SM90-NEXT: setp.ne.b32 %p2, %r17, %r6;
|
||||
; SM90-NEXT: mov.b32 %r17, %r6;
|
||||
; SM90-NEXT: setp.ne.b32 %p2, %r16, %r6;
|
||||
; SM90-NEXT: mov.b32 %r16, %r6;
|
||||
; SM90-NEXT: @%p2 bra $L__BB14_1;
|
||||
; SM90-NEXT: $L__BB14_3: // %partword.cmpxchg.end
|
||||
; SM90-NEXT: fence.acquire.cta;
|
||||
@ -1899,7 +1884,7 @@ define i8 @acq_rel_acquire_i8_global(ptr addrspace(1) %addr, i8 %cmp, i8 %new) {
|
||||
; SM90: {
|
||||
; SM90-NEXT: .reg .pred %p<3>;
|
||||
; SM90-NEXT: .reg .b16 %rs<2>;
|
||||
; SM90-NEXT: .reg .b32 %r<18>;
|
||||
; SM90-NEXT: .reg .b32 %r<17>;
|
||||
; SM90-NEXT: .reg .b64 %rd<3>;
|
||||
; SM90-EMPTY:
|
||||
; SM90-NEXT: // %bb.0:
|
||||
@ -1915,23 +1900,22 @@ define i8 @acq_rel_acquire_i8_global(ptr addrspace(1) %addr, i8 %cmp, i8 %new) {
|
||||
; SM90-NEXT: shl.b32 %r11, %r10, %r1;
|
||||
; SM90-NEXT: not.b32 %r2, %r11;
|
||||
; SM90-NEXT: cvt.u32.u16 %r12, %rs1;
|
||||
; SM90-NEXT: and.b32 %r13, %r12, 255;
|
||||
; SM90-NEXT: shl.b32 %r3, %r13, %r1;
|
||||
; SM90-NEXT: shl.b32 %r3, %r12, %r1;
|
||||
; SM90-NEXT: shl.b32 %r4, %r7, %r1;
|
||||
; SM90-NEXT: ld.global.b32 %r14, [%rd1];
|
||||
; SM90-NEXT: and.b32 %r17, %r14, %r2;
|
||||
; SM90-NEXT: ld.global.b32 %r13, [%rd1];
|
||||
; SM90-NEXT: and.b32 %r16, %r13, %r2;
|
||||
; SM90-NEXT: $L__BB60_1: // %partword.cmpxchg.loop
|
||||
; SM90-NEXT: // =>This Inner Loop Header: Depth=1
|
||||
; SM90-NEXT: or.b32 %r15, %r17, %r3;
|
||||
; SM90-NEXT: or.b32 %r16, %r17, %r4;
|
||||
; SM90-NEXT: atom.relaxed.sys.global.cas.b32 %r5, [%rd1], %r16, %r15;
|
||||
; SM90-NEXT: setp.eq.b32 %p1, %r5, %r16;
|
||||
; SM90-NEXT: or.b32 %r14, %r16, %r3;
|
||||
; SM90-NEXT: or.b32 %r15, %r16, %r4;
|
||||
; SM90-NEXT: atom.relaxed.sys.global.cas.b32 %r5, [%rd1], %r15, %r14;
|
||||
; SM90-NEXT: setp.eq.b32 %p1, %r5, %r15;
|
||||
; SM90-NEXT: @%p1 bra $L__BB60_3;
|
||||
; SM90-NEXT: // %bb.2: // %partword.cmpxchg.failure
|
||||
; SM90-NEXT: // in Loop: Header=BB60_1 Depth=1
|
||||
; SM90-NEXT: and.b32 %r6, %r5, %r2;
|
||||
; SM90-NEXT: setp.ne.b32 %p2, %r17, %r6;
|
||||
; SM90-NEXT: mov.b32 %r17, %r6;
|
||||
; SM90-NEXT: setp.ne.b32 %p2, %r16, %r6;
|
||||
; SM90-NEXT: mov.b32 %r16, %r6;
|
||||
; SM90-NEXT: @%p2 bra $L__BB60_1;
|
||||
; SM90-NEXT: $L__BB60_3: // %partword.cmpxchg.end
|
||||
; SM90-NEXT: fence.acquire.sys;
|
||||
@ -2014,7 +1998,7 @@ define i8 @acq_rel_acquire_i8_generic_cta(ptr %addr, i8 %cmp, i8 %new) {
|
||||
; SM90: {
|
||||
; SM90-NEXT: .reg .pred %p<3>;
|
||||
; SM90-NEXT: .reg .b16 %rs<2>;
|
||||
; SM90-NEXT: .reg .b32 %r<18>;
|
||||
; SM90-NEXT: .reg .b32 %r<17>;
|
||||
; SM90-NEXT: .reg .b64 %rd<3>;
|
||||
; SM90-EMPTY:
|
||||
; SM90-NEXT: // %bb.0:
|
||||
@ -2030,23 +2014,22 @@ define i8 @acq_rel_acquire_i8_generic_cta(ptr %addr, i8 %cmp, i8 %new) {
|
||||
; SM90-NEXT: shl.b32 %r11, %r10, %r1;
|
||||
; SM90-NEXT: not.b32 %r2, %r11;
|
||||
; SM90-NEXT: cvt.u32.u16 %r12, %rs1;
|
||||
; SM90-NEXT: and.b32 %r13, %r12, 255;
|
||||
; SM90-NEXT: shl.b32 %r3, %r13, %r1;
|
||||
; SM90-NEXT: shl.b32 %r3, %r12, %r1;
|
||||
; SM90-NEXT: shl.b32 %r4, %r7, %r1;
|
||||
; SM90-NEXT: ld.b32 %r14, [%rd1];
|
||||
; SM90-NEXT: and.b32 %r17, %r14, %r2;
|
||||
; SM90-NEXT: ld.b32 %r13, [%rd1];
|
||||
; SM90-NEXT: and.b32 %r16, %r13, %r2;
|
||||
; SM90-NEXT: $L__BB65_1: // %partword.cmpxchg.loop
|
||||
; SM90-NEXT: // =>This Inner Loop Header: Depth=1
|
||||
; SM90-NEXT: or.b32 %r15, %r17, %r3;
|
||||
; SM90-NEXT: or.b32 %r16, %r17, %r4;
|
||||
; SM90-NEXT: atom.relaxed.cta.cas.b32 %r5, [%rd1], %r16, %r15;
|
||||
; SM90-NEXT: setp.eq.b32 %p1, %r5, %r16;
|
||||
; SM90-NEXT: or.b32 %r14, %r16, %r3;
|
||||
; SM90-NEXT: or.b32 %r15, %r16, %r4;
|
||||
; SM90-NEXT: atom.relaxed.cta.cas.b32 %r5, [%rd1], %r15, %r14;
|
||||
; SM90-NEXT: setp.eq.b32 %p1, %r5, %r15;
|
||||
; SM90-NEXT: @%p1 bra $L__BB65_3;
|
||||
; SM90-NEXT: // %bb.2: // %partword.cmpxchg.failure
|
||||
; SM90-NEXT: // in Loop: Header=BB65_1 Depth=1
|
||||
; SM90-NEXT: and.b32 %r6, %r5, %r2;
|
||||
; SM90-NEXT: setp.ne.b32 %p2, %r17, %r6;
|
||||
; SM90-NEXT: mov.b32 %r17, %r6;
|
||||
; SM90-NEXT: setp.ne.b32 %p2, %r16, %r6;
|
||||
; SM90-NEXT: mov.b32 %r16, %r6;
|
||||
; SM90-NEXT: @%p2 bra $L__BB65_1;
|
||||
; SM90-NEXT: $L__BB65_3: // %partword.cmpxchg.end
|
||||
; SM90-NEXT: fence.acquire.cta;
|
||||
@ -2061,7 +2044,7 @@ define i8 @acq_rel_acquire_i8_shared_cta(ptr addrspace(3) %addr, i8 %cmp, i8 %ne
|
||||
; SM90: {
|
||||
; SM90-NEXT: .reg .pred %p<3>;
|
||||
; SM90-NEXT: .reg .b16 %rs<2>;
|
||||
; SM90-NEXT: .reg .b32 %r<18>;
|
||||
; SM90-NEXT: .reg .b32 %r<17>;
|
||||
; SM90-NEXT: .reg .b64 %rd<3>;
|
||||
; SM90-EMPTY:
|
||||
; SM90-NEXT: // %bb.0:
|
||||
@ -2077,23 +2060,22 @@ define i8 @acq_rel_acquire_i8_shared_cta(ptr addrspace(3) %addr, i8 %cmp, i8 %ne
|
||||
; SM90-NEXT: shl.b32 %r11, %r10, %r1;
|
||||
; SM90-NEXT: not.b32 %r2, %r11;
|
||||
; SM90-NEXT: cvt.u32.u16 %r12, %rs1;
|
||||
; SM90-NEXT: and.b32 %r13, %r12, 255;
|
||||
; SM90-NEXT: shl.b32 %r3, %r13, %r1;
|
||||
; SM90-NEXT: shl.b32 %r3, %r12, %r1;
|
||||
; SM90-NEXT: shl.b32 %r4, %r7, %r1;
|
||||
; SM90-NEXT: ld.shared.b32 %r14, [%rd1];
|
||||
; SM90-NEXT: and.b32 %r17, %r14, %r2;
|
||||
; SM90-NEXT: ld.shared.b32 %r13, [%rd1];
|
||||
; SM90-NEXT: and.b32 %r16, %r13, %r2;
|
||||
; SM90-NEXT: $L__BB66_1: // %partword.cmpxchg.loop
|
||||
; SM90-NEXT: // =>This Inner Loop Header: Depth=1
|
||||
; SM90-NEXT: or.b32 %r15, %r17, %r3;
|
||||
; SM90-NEXT: or.b32 %r16, %r17, %r4;
|
||||
; SM90-NEXT: atom.relaxed.cta.shared.cas.b32 %r5, [%rd1], %r16, %r15;
|
||||
; SM90-NEXT: setp.eq.b32 %p1, %r5, %r16;
|
||||
; SM90-NEXT: or.b32 %r14, %r16, %r3;
|
||||
; SM90-NEXT: or.b32 %r15, %r16, %r4;
|
||||
; SM90-NEXT: atom.relaxed.cta.shared.cas.b32 %r5, [%rd1], %r15, %r14;
|
||||
; SM90-NEXT: setp.eq.b32 %p1, %r5, %r15;
|
||||
; SM90-NEXT: @%p1 bra $L__BB66_3;
|
||||
; SM90-NEXT: // %bb.2: // %partword.cmpxchg.failure
|
||||
; SM90-NEXT: // in Loop: Header=BB66_1 Depth=1
|
||||
; SM90-NEXT: and.b32 %r6, %r5, %r2;
|
||||
; SM90-NEXT: setp.ne.b32 %p2, %r17, %r6;
|
||||
; SM90-NEXT: mov.b32 %r17, %r6;
|
||||
; SM90-NEXT: setp.ne.b32 %p2, %r16, %r6;
|
||||
; SM90-NEXT: mov.b32 %r16, %r6;
|
||||
; SM90-NEXT: @%p2 bra $L__BB66_1;
|
||||
; SM90-NEXT: $L__BB66_3: // %partword.cmpxchg.end
|
||||
; SM90-NEXT: fence.acquire.cta;
|
||||
|
@ -14,7 +14,7 @@ define i8 @relaxed_sys_i8(ptr %addr, i8 %cmp, i8 %new) {
|
||||
; SM30: {
|
||||
; SM30-NEXT: .reg .pred %p<3>;
|
||||
; SM30-NEXT: .reg .b16 %rs<2>;
|
||||
; SM30-NEXT: .reg .b32 %r<18>;
|
||||
; SM30-NEXT: .reg .b32 %r<17>;
|
||||
; SM30-NEXT: .reg .b64 %rd<3>;
|
||||
; SM30-EMPTY:
|
||||
; SM30-NEXT: // %bb.0:
|
||||
@ -29,23 +29,22 @@ define i8 @relaxed_sys_i8(ptr %addr, i8 %cmp, i8 %new) {
|
||||
; SM30-NEXT: shl.b32 %r11, %r10, %r1;
|
||||
; SM30-NEXT: not.b32 %r2, %r11;
|
||||
; SM30-NEXT: cvt.u32.u16 %r12, %rs1;
|
||||
; SM30-NEXT: and.b32 %r13, %r12, 255;
|
||||
; SM30-NEXT: shl.b32 %r3, %r13, %r1;
|
||||
; SM30-NEXT: shl.b32 %r3, %r12, %r1;
|
||||
; SM30-NEXT: shl.b32 %r4, %r7, %r1;
|
||||
; SM30-NEXT: ld.b32 %r14, [%rd1];
|
||||
; SM30-NEXT: and.b32 %r17, %r14, %r2;
|
||||
; SM30-NEXT: ld.b32 %r13, [%rd1];
|
||||
; SM30-NEXT: and.b32 %r16, %r13, %r2;
|
||||
; SM30-NEXT: $L__BB0_1: // %partword.cmpxchg.loop
|
||||
; SM30-NEXT: // =>This Inner Loop Header: Depth=1
|
||||
; SM30-NEXT: or.b32 %r15, %r17, %r3;
|
||||
; SM30-NEXT: or.b32 %r16, %r17, %r4;
|
||||
; SM30-NEXT: atom.cas.b32 %r5, [%rd1], %r16, %r15;
|
||||
; SM30-NEXT: setp.eq.b32 %p1, %r5, %r16;
|
||||
; SM30-NEXT: or.b32 %r14, %r16, %r3;
|
||||
; SM30-NEXT: or.b32 %r15, %r16, %r4;
|
||||
; SM30-NEXT: atom.cas.b32 %r5, [%rd1], %r15, %r14;
|
||||
; SM30-NEXT: setp.eq.b32 %p1, %r5, %r15;
|
||||
; SM30-NEXT: @%p1 bra $L__BB0_3;
|
||||
; SM30-NEXT: // %bb.2: // %partword.cmpxchg.failure
|
||||
; SM30-NEXT: // in Loop: Header=BB0_1 Depth=1
|
||||
; SM30-NEXT: and.b32 %r6, %r5, %r2;
|
||||
; SM30-NEXT: setp.ne.b32 %p2, %r17, %r6;
|
||||
; SM30-NEXT: mov.b32 %r17, %r6;
|
||||
; SM30-NEXT: setp.ne.b32 %p2, %r16, %r6;
|
||||
; SM30-NEXT: mov.b32 %r16, %r6;
|
||||
; SM30-NEXT: @%p2 bra $L__BB0_1;
|
||||
; SM30-NEXT: $L__BB0_3: // %partword.cmpxchg.end
|
||||
; SM30-NEXT: st.param.b32 [func_retval0], %r12;
|
||||
@ -55,7 +54,7 @@ define i8 @relaxed_sys_i8(ptr %addr, i8 %cmp, i8 %new) {
|
||||
; SM70: {
|
||||
; SM70-NEXT: .reg .pred %p<3>;
|
||||
; SM70-NEXT: .reg .b16 %rs<2>;
|
||||
; SM70-NEXT: .reg .b32 %r<18>;
|
||||
; SM70-NEXT: .reg .b32 %r<17>;
|
||||
; SM70-NEXT: .reg .b64 %rd<3>;
|
||||
; SM70-EMPTY:
|
||||
; SM70-NEXT: // %bb.0:
|
||||
@ -70,23 +69,22 @@ define i8 @relaxed_sys_i8(ptr %addr, i8 %cmp, i8 %new) {
|
||||
; SM70-NEXT: shl.b32 %r11, %r10, %r1;
|
||||
; SM70-NEXT: not.b32 %r2, %r11;
|
||||
; SM70-NEXT: cvt.u32.u16 %r12, %rs1;
|
||||
; SM70-NEXT: and.b32 %r13, %r12, 255;
|
||||
; SM70-NEXT: shl.b32 %r3, %r13, %r1;
|
||||
; SM70-NEXT: shl.b32 %r3, %r12, %r1;
|
||||
; SM70-NEXT: shl.b32 %r4, %r7, %r1;
|
||||
; SM70-NEXT: ld.b32 %r14, [%rd1];
|
||||
; SM70-NEXT: and.b32 %r17, %r14, %r2;
|
||||
; SM70-NEXT: ld.b32 %r13, [%rd1];
|
||||
; SM70-NEXT: and.b32 %r16, %r13, %r2;
|
||||
; SM70-NEXT: $L__BB0_1: // %partword.cmpxchg.loop
|
||||
; SM70-NEXT: // =>This Inner Loop Header: Depth=1
|
||||
; SM70-NEXT: or.b32 %r15, %r17, %r3;
|
||||
; SM70-NEXT: or.b32 %r16, %r17, %r4;
|
||||
; SM70-NEXT: atom.relaxed.sys.cas.b32 %r5, [%rd1], %r16, %r15;
|
||||
; SM70-NEXT: setp.eq.b32 %p1, %r5, %r16;
|
||||
; SM70-NEXT: or.b32 %r14, %r16, %r3;
|
||||
; SM70-NEXT: or.b32 %r15, %r16, %r4;
|
||||
; SM70-NEXT: atom.relaxed.sys.cas.b32 %r5, [%rd1], %r15, %r14;
|
||||
; SM70-NEXT: setp.eq.b32 %p1, %r5, %r15;
|
||||
; SM70-NEXT: @%p1 bra $L__BB0_3;
|
||||
; SM70-NEXT: // %bb.2: // %partword.cmpxchg.failure
|
||||
; SM70-NEXT: // in Loop: Header=BB0_1 Depth=1
|
||||
; SM70-NEXT: and.b32 %r6, %r5, %r2;
|
||||
; SM70-NEXT: setp.ne.b32 %p2, %r17, %r6;
|
||||
; SM70-NEXT: mov.b32 %r17, %r6;
|
||||
; SM70-NEXT: setp.ne.b32 %p2, %r16, %r6;
|
||||
; SM70-NEXT: mov.b32 %r16, %r6;
|
||||
; SM70-NEXT: @%p2 bra $L__BB0_1;
|
||||
; SM70-NEXT: $L__BB0_3: // %partword.cmpxchg.end
|
||||
; SM70-NEXT: st.param.b32 [func_retval0], %r12;
|
||||
@ -140,7 +138,7 @@ define i8 @acquire_sys_i8(ptr %addr, i8 %cmp, i8 %new) {
|
||||
; SM30: {
|
||||
; SM30-NEXT: .reg .pred %p<3>;
|
||||
; SM30-NEXT: .reg .b16 %rs<2>;
|
||||
; SM30-NEXT: .reg .b32 %r<18>;
|
||||
; SM30-NEXT: .reg .b32 %r<17>;
|
||||
; SM30-NEXT: .reg .b64 %rd<3>;
|
||||
; SM30-EMPTY:
|
||||
; SM30-NEXT: // %bb.0:
|
||||
@ -155,23 +153,22 @@ define i8 @acquire_sys_i8(ptr %addr, i8 %cmp, i8 %new) {
|
||||
; SM30-NEXT: shl.b32 %r11, %r10, %r1;
|
||||
; SM30-NEXT: not.b32 %r2, %r11;
|
||||
; SM30-NEXT: cvt.u32.u16 %r12, %rs1;
|
||||
; SM30-NEXT: and.b32 %r13, %r12, 255;
|
||||
; SM30-NEXT: shl.b32 %r3, %r13, %r1;
|
||||
; SM30-NEXT: shl.b32 %r3, %r12, %r1;
|
||||
; SM30-NEXT: shl.b32 %r4, %r7, %r1;
|
||||
; SM30-NEXT: ld.b32 %r14, [%rd1];
|
||||
; SM30-NEXT: and.b32 %r17, %r14, %r2;
|
||||
; SM30-NEXT: ld.b32 %r13, [%rd1];
|
||||
; SM30-NEXT: and.b32 %r16, %r13, %r2;
|
||||
; SM30-NEXT: $L__BB1_1: // %partword.cmpxchg.loop
|
||||
; SM30-NEXT: // =>This Inner Loop Header: Depth=1
|
||||
; SM30-NEXT: or.b32 %r15, %r17, %r3;
|
||||
; SM30-NEXT: or.b32 %r16, %r17, %r4;
|
||||
; SM30-NEXT: atom.cas.b32 %r5, [%rd1], %r16, %r15;
|
||||
; SM30-NEXT: setp.eq.b32 %p1, %r5, %r16;
|
||||
; SM30-NEXT: or.b32 %r14, %r16, %r3;
|
||||
; SM30-NEXT: or.b32 %r15, %r16, %r4;
|
||||
; SM30-NEXT: atom.cas.b32 %r5, [%rd1], %r15, %r14;
|
||||
; SM30-NEXT: setp.eq.b32 %p1, %r5, %r15;
|
||||
; SM30-NEXT: @%p1 bra $L__BB1_3;
|
||||
; SM30-NEXT: // %bb.2: // %partword.cmpxchg.failure
|
||||
; SM30-NEXT: // in Loop: Header=BB1_1 Depth=1
|
||||
; SM30-NEXT: and.b32 %r6, %r5, %r2;
|
||||
; SM30-NEXT: setp.ne.b32 %p2, %r17, %r6;
|
||||
; SM30-NEXT: mov.b32 %r17, %r6;
|
||||
; SM30-NEXT: setp.ne.b32 %p2, %r16, %r6;
|
||||
; SM30-NEXT: mov.b32 %r16, %r6;
|
||||
; SM30-NEXT: @%p2 bra $L__BB1_1;
|
||||
; SM30-NEXT: $L__BB1_3: // %partword.cmpxchg.end
|
||||
; SM30-NEXT: membar.sys;
|
||||
@ -182,7 +179,7 @@ define i8 @acquire_sys_i8(ptr %addr, i8 %cmp, i8 %new) {
|
||||
; SM70: {
|
||||
; SM70-NEXT: .reg .pred %p<3>;
|
||||
; SM70-NEXT: .reg .b16 %rs<2>;
|
||||
; SM70-NEXT: .reg .b32 %r<18>;
|
||||
; SM70-NEXT: .reg .b32 %r<17>;
|
||||
; SM70-NEXT: .reg .b64 %rd<3>;
|
||||
; SM70-EMPTY:
|
||||
; SM70-NEXT: // %bb.0:
|
||||
@ -197,23 +194,22 @@ define i8 @acquire_sys_i8(ptr %addr, i8 %cmp, i8 %new) {
|
||||
; SM70-NEXT: shl.b32 %r11, %r10, %r1;
|
||||
; SM70-NEXT: not.b32 %r2, %r11;
|
||||
; SM70-NEXT: cvt.u32.u16 %r12, %rs1;
|
||||
; SM70-NEXT: and.b32 %r13, %r12, 255;
|
||||
; SM70-NEXT: shl.b32 %r3, %r13, %r1;
|
||||
; SM70-NEXT: shl.b32 %r3, %r12, %r1;
|
||||
; SM70-NEXT: shl.b32 %r4, %r7, %r1;
|
||||
; SM70-NEXT: ld.b32 %r14, [%rd1];
|
||||
; SM70-NEXT: and.b32 %r17, %r14, %r2;
|
||||
; SM70-NEXT: ld.b32 %r13, [%rd1];
|
||||
; SM70-NEXT: and.b32 %r16, %r13, %r2;
|
||||
; SM70-NEXT: $L__BB1_1: // %partword.cmpxchg.loop
|
||||
; SM70-NEXT: // =>This Inner Loop Header: Depth=1
|
||||
; SM70-NEXT: or.b32 %r15, %r17, %r3;
|
||||
; SM70-NEXT: or.b32 %r16, %r17, %r4;
|
||||
; SM70-NEXT: atom.relaxed.sys.cas.b32 %r5, [%rd1], %r16, %r15;
|
||||
; SM70-NEXT: setp.eq.b32 %p1, %r5, %r16;
|
||||
; SM70-NEXT: or.b32 %r14, %r16, %r3;
|
||||
; SM70-NEXT: or.b32 %r15, %r16, %r4;
|
||||
; SM70-NEXT: atom.relaxed.sys.cas.b32 %r5, [%rd1], %r15, %r14;
|
||||
; SM70-NEXT: setp.eq.b32 %p1, %r5, %r15;
|
||||
; SM70-NEXT: @%p1 bra $L__BB1_3;
|
||||
; SM70-NEXT: // %bb.2: // %partword.cmpxchg.failure
|
||||
; SM70-NEXT: // in Loop: Header=BB1_1 Depth=1
|
||||
; SM70-NEXT: and.b32 %r6, %r5, %r2;
|
||||
; SM70-NEXT: setp.ne.b32 %p2, %r17, %r6;
|
||||
; SM70-NEXT: mov.b32 %r17, %r6;
|
||||
; SM70-NEXT: setp.ne.b32 %p2, %r16, %r6;
|
||||
; SM70-NEXT: mov.b32 %r16, %r6;
|
||||
; SM70-NEXT: @%p2 bra $L__BB1_1;
|
||||
; SM70-NEXT: $L__BB1_3: // %partword.cmpxchg.end
|
||||
; SM70-NEXT: fence.acq_rel.sys;
|
||||
@ -269,7 +265,7 @@ define i8 @release_sys_i8(ptr %addr, i8 %cmp, i8 %new) {
|
||||
; SM30: {
|
||||
; SM30-NEXT: .reg .pred %p<3>;
|
||||
; SM30-NEXT: .reg .b16 %rs<2>;
|
||||
; SM30-NEXT: .reg .b32 %r<18>;
|
||||
; SM30-NEXT: .reg .b32 %r<17>;
|
||||
; SM30-NEXT: .reg .b64 %rd<3>;
|
||||
; SM30-EMPTY:
|
||||
; SM30-NEXT: // %bb.0:
|
||||
@ -285,23 +281,22 @@ define i8 @release_sys_i8(ptr %addr, i8 %cmp, i8 %new) {
|
||||
; SM30-NEXT: shl.b32 %r11, %r10, %r1;
|
||||
; SM30-NEXT: not.b32 %r2, %r11;
|
||||
; SM30-NEXT: cvt.u32.u16 %r12, %rs1;
|
||||
; SM30-NEXT: and.b32 %r13, %r12, 255;
|
||||
; SM30-NEXT: shl.b32 %r3, %r13, %r1;
|
||||
; SM30-NEXT: shl.b32 %r3, %r12, %r1;
|
||||
; SM30-NEXT: shl.b32 %r4, %r7, %r1;
|
||||
; SM30-NEXT: ld.b32 %r14, [%rd1];
|
||||
; SM30-NEXT: and.b32 %r17, %r14, %r2;
|
||||
; SM30-NEXT: ld.b32 %r13, [%rd1];
|
||||
; SM30-NEXT: and.b32 %r16, %r13, %r2;
|
||||
; SM30-NEXT: $L__BB2_1: // %partword.cmpxchg.loop
|
||||
; SM30-NEXT: // =>This Inner Loop Header: Depth=1
|
||||
; SM30-NEXT: or.b32 %r15, %r17, %r3;
|
||||
; SM30-NEXT: or.b32 %r16, %r17, %r4;
|
||||
; SM30-NEXT: atom.cas.b32 %r5, [%rd1], %r16, %r15;
|
||||
; SM30-NEXT: setp.eq.b32 %p1, %r5, %r16;
|
||||
; SM30-NEXT: or.b32 %r14, %r16, %r3;
|
||||
; SM30-NEXT: or.b32 %r15, %r16, %r4;
|
||||
; SM30-NEXT: atom.cas.b32 %r5, [%rd1], %r15, %r14;
|
||||
; SM30-NEXT: setp.eq.b32 %p1, %r5, %r15;
|
||||
; SM30-NEXT: @%p1 bra $L__BB2_3;
|
||||
; SM30-NEXT: // %bb.2: // %partword.cmpxchg.failure
|
||||
; SM30-NEXT: // in Loop: Header=BB2_1 Depth=1
|
||||
; SM30-NEXT: and.b32 %r6, %r5, %r2;
|
||||
; SM30-NEXT: setp.ne.b32 %p2, %r17, %r6;
|
||||
; SM30-NEXT: mov.b32 %r17, %r6;
|
||||
; SM30-NEXT: setp.ne.b32 %p2, %r16, %r6;
|
||||
; SM30-NEXT: mov.b32 %r16, %r6;
|
||||
; SM30-NEXT: @%p2 bra $L__BB2_1;
|
||||
; SM30-NEXT: $L__BB2_3: // %partword.cmpxchg.end
|
||||
; SM30-NEXT: st.param.b32 [func_retval0], %r12;
|
||||
@ -311,7 +306,7 @@ define i8 @release_sys_i8(ptr %addr, i8 %cmp, i8 %new) {
|
||||
; SM70: {
|
||||
; SM70-NEXT: .reg .pred %p<3>;
|
||||
; SM70-NEXT: .reg .b16 %rs<2>;
|
||||
; SM70-NEXT: .reg .b32 %r<18>;
|
||||
; SM70-NEXT: .reg .b32 %r<17>;
|
||||
; SM70-NEXT: .reg .b64 %rd<3>;
|
||||
; SM70-EMPTY:
|
||||
; SM70-NEXT: // %bb.0:
|
||||
@ -327,23 +322,22 @@ define i8 @release_sys_i8(ptr %addr, i8 %cmp, i8 %new) {
|
||||
; SM70-NEXT: shl.b32 %r11, %r10, %r1;
|
||||
; SM70-NEXT: not.b32 %r2, %r11;
|
||||
; SM70-NEXT: cvt.u32.u16 %r12, %rs1;
|
||||
; SM70-NEXT: and.b32 %r13, %r12, 255;
|
||||
; SM70-NEXT: shl.b32 %r3, %r13, %r1;
|
||||
; SM70-NEXT: shl.b32 %r3, %r12, %r1;
|
||||
; SM70-NEXT: shl.b32 %r4, %r7, %r1;
|
||||
; SM70-NEXT: ld.b32 %r14, [%rd1];
|
||||
; SM70-NEXT: and.b32 %r17, %r14, %r2;
|
||||
; SM70-NEXT: ld.b32 %r13, [%rd1];
|
||||
; SM70-NEXT: and.b32 %r16, %r13, %r2;
|
||||
; SM70-NEXT: $L__BB2_1: // %partword.cmpxchg.loop
|
||||
; SM70-NEXT: // =>This Inner Loop Header: Depth=1
|
||||
; SM70-NEXT: or.b32 %r15, %r17, %r3;
|
||||
; SM70-NEXT: or.b32 %r16, %r17, %r4;
|
||||
; SM70-NEXT: atom.relaxed.sys.cas.b32 %r5, [%rd1], %r16, %r15;
|
||||
; SM70-NEXT: setp.eq.b32 %p1, %r5, %r16;
|
||||
; SM70-NEXT: or.b32 %r14, %r16, %r3;
|
||||
; SM70-NEXT: or.b32 %r15, %r16, %r4;
|
||||
; SM70-NEXT: atom.relaxed.sys.cas.b32 %r5, [%rd1], %r15, %r14;
|
||||
; SM70-NEXT: setp.eq.b32 %p1, %r5, %r15;
|
||||
; SM70-NEXT: @%p1 bra $L__BB2_3;
|
||||
; SM70-NEXT: // %bb.2: // %partword.cmpxchg.failure
|
||||
; SM70-NEXT: // in Loop: Header=BB2_1 Depth=1
|
||||
; SM70-NEXT: and.b32 %r6, %r5, %r2;
|
||||
; SM70-NEXT: setp.ne.b32 %p2, %r17, %r6;
|
||||
; SM70-NEXT: mov.b32 %r17, %r6;
|
||||
; SM70-NEXT: setp.ne.b32 %p2, %r16, %r6;
|
||||
; SM70-NEXT: mov.b32 %r16, %r6;
|
||||
; SM70-NEXT: @%p2 bra $L__BB2_1;
|
||||
; SM70-NEXT: $L__BB2_3: // %partword.cmpxchg.end
|
||||
; SM70-NEXT: st.param.b32 [func_retval0], %r12;
|
||||
@ -398,7 +392,7 @@ define i8 @acq_rel_sys_i8(ptr %addr, i8 %cmp, i8 %new) {
|
||||
; SM30: {
|
||||
; SM30-NEXT: .reg .pred %p<3>;
|
||||
; SM30-NEXT: .reg .b16 %rs<2>;
|
||||
; SM30-NEXT: .reg .b32 %r<18>;
|
||||
; SM30-NEXT: .reg .b32 %r<17>;
|
||||
; SM30-NEXT: .reg .b64 %rd<3>;
|
||||
; SM30-EMPTY:
|
||||
; SM30-NEXT: // %bb.0:
|
||||
@ -414,23 +408,22 @@ define i8 @acq_rel_sys_i8(ptr %addr, i8 %cmp, i8 %new) {
|
||||
; SM30-NEXT: shl.b32 %r11, %r10, %r1;
|
||||
; SM30-NEXT: not.b32 %r2, %r11;
|
||||
; SM30-NEXT: cvt.u32.u16 %r12, %rs1;
|
||||
; SM30-NEXT: and.b32 %r13, %r12, 255;
|
||||
; SM30-NEXT: shl.b32 %r3, %r13, %r1;
|
||||
; SM30-NEXT: shl.b32 %r3, %r12, %r1;
|
||||
; SM30-NEXT: shl.b32 %r4, %r7, %r1;
|
||||
; SM30-NEXT: ld.b32 %r14, [%rd1];
|
||||
; SM30-NEXT: and.b32 %r17, %r14, %r2;
|
||||
; SM30-NEXT: ld.b32 %r13, [%rd1];
|
||||
; SM30-NEXT: and.b32 %r16, %r13, %r2;
|
||||
; SM30-NEXT: $L__BB3_1: // %partword.cmpxchg.loop
|
||||
; SM30-NEXT: // =>This Inner Loop Header: Depth=1
|
||||
; SM30-NEXT: or.b32 %r15, %r17, %r3;
|
||||
; SM30-NEXT: or.b32 %r16, %r17, %r4;
|
||||
; SM30-NEXT: atom.cas.b32 %r5, [%rd1], %r16, %r15;
|
||||
; SM30-NEXT: setp.eq.b32 %p1, %r5, %r16;
|
||||
; SM30-NEXT: or.b32 %r14, %r16, %r3;
|
||||
; SM30-NEXT: or.b32 %r15, %r16, %r4;
|
||||
; SM30-NEXT: atom.cas.b32 %r5, [%rd1], %r15, %r14;
|
||||
; SM30-NEXT: setp.eq.b32 %p1, %r5, %r15;
|
||||
; SM30-NEXT: @%p1 bra $L__BB3_3;
|
||||
; SM30-NEXT: // %bb.2: // %partword.cmpxchg.failure
|
||||
; SM30-NEXT: // in Loop: Header=BB3_1 Depth=1
|
||||
; SM30-NEXT: and.b32 %r6, %r5, %r2;
|
||||
; SM30-NEXT: setp.ne.b32 %p2, %r17, %r6;
|
||||
; SM30-NEXT: mov.b32 %r17, %r6;
|
||||
; SM30-NEXT: setp.ne.b32 %p2, %r16, %r6;
|
||||
; SM30-NEXT: mov.b32 %r16, %r6;
|
||||
; SM30-NEXT: @%p2 bra $L__BB3_1;
|
||||
; SM30-NEXT: $L__BB3_3: // %partword.cmpxchg.end
|
||||
; SM30-NEXT: membar.sys;
|
||||
@ -441,7 +434,7 @@ define i8 @acq_rel_sys_i8(ptr %addr, i8 %cmp, i8 %new) {
|
||||
; SM70: {
|
||||
; SM70-NEXT: .reg .pred %p<3>;
|
||||
; SM70-NEXT: .reg .b16 %rs<2>;
|
||||
; SM70-NEXT: .reg .b32 %r<18>;
|
||||
; SM70-NEXT: .reg .b32 %r<17>;
|
||||
; SM70-NEXT: .reg .b64 %rd<3>;
|
||||
; SM70-EMPTY:
|
||||
; SM70-NEXT: // %bb.0:
|
||||
@ -457,23 +450,22 @@ define i8 @acq_rel_sys_i8(ptr %addr, i8 %cmp, i8 %new) {
|
||||
; SM70-NEXT: shl.b32 %r11, %r10, %r1;
|
||||
; SM70-NEXT: not.b32 %r2, %r11;
|
||||
; SM70-NEXT: cvt.u32.u16 %r12, %rs1;
|
||||
; SM70-NEXT: and.b32 %r13, %r12, 255;
|
||||
; SM70-NEXT: shl.b32 %r3, %r13, %r1;
|
||||
; SM70-NEXT: shl.b32 %r3, %r12, %r1;
|
||||
; SM70-NEXT: shl.b32 %r4, %r7, %r1;
|
||||
; SM70-NEXT: ld.b32 %r14, [%rd1];
|
||||
; SM70-NEXT: and.b32 %r17, %r14, %r2;
|
||||
; SM70-NEXT: ld.b32 %r13, [%rd1];
|
||||
; SM70-NEXT: and.b32 %r16, %r13, %r2;
|
||||
; SM70-NEXT: $L__BB3_1: // %partword.cmpxchg.loop
|
||||
; SM70-NEXT: // =>This Inner Loop Header: Depth=1
|
||||
; SM70-NEXT: or.b32 %r15, %r17, %r3;
|
||||
; SM70-NEXT: or.b32 %r16, %r17, %r4;
|
||||
; SM70-NEXT: atom.relaxed.sys.cas.b32 %r5, [%rd1], %r16, %r15;
|
||||
; SM70-NEXT: setp.eq.b32 %p1, %r5, %r16;
|
||||
; SM70-NEXT: or.b32 %r14, %r16, %r3;
|
||||
; SM70-NEXT: or.b32 %r15, %r16, %r4;
|
||||
; SM70-NEXT: atom.relaxed.sys.cas.b32 %r5, [%rd1], %r15, %r14;
|
||||
; SM70-NEXT: setp.eq.b32 %p1, %r5, %r15;
|
||||
; SM70-NEXT: @%p1 bra $L__BB3_3;
|
||||
; SM70-NEXT: // %bb.2: // %partword.cmpxchg.failure
|
||||
; SM70-NEXT: // in Loop: Header=BB3_1 Depth=1
|
||||
; SM70-NEXT: and.b32 %r6, %r5, %r2;
|
||||
; SM70-NEXT: setp.ne.b32 %p2, %r17, %r6;
|
||||
; SM70-NEXT: mov.b32 %r17, %r6;
|
||||
; SM70-NEXT: setp.ne.b32 %p2, %r16, %r6;
|
||||
; SM70-NEXT: mov.b32 %r16, %r6;
|
||||
; SM70-NEXT: @%p2 bra $L__BB3_1;
|
||||
; SM70-NEXT: $L__BB3_3: // %partword.cmpxchg.end
|
||||
; SM70-NEXT: fence.acq_rel.sys;
|
||||
@ -530,7 +522,7 @@ define i8 @seq_cst_sys_i8(ptr %addr, i8 %cmp, i8 %new) {
|
||||
; SM30: {
|
||||
; SM30-NEXT: .reg .pred %p<3>;
|
||||
; SM30-NEXT: .reg .b16 %rs<2>;
|
||||
; SM30-NEXT: .reg .b32 %r<18>;
|
||||
; SM30-NEXT: .reg .b32 %r<17>;
|
||||
; SM30-NEXT: .reg .b64 %rd<3>;
|
||||
; SM30-EMPTY:
|
||||
; SM30-NEXT: // %bb.0:
|
||||
@ -546,23 +538,22 @@ define i8 @seq_cst_sys_i8(ptr %addr, i8 %cmp, i8 %new) {
|
||||
; SM30-NEXT: shl.b32 %r11, %r10, %r1;
|
||||
; SM30-NEXT: not.b32 %r2, %r11;
|
||||
; SM30-NEXT: cvt.u32.u16 %r12, %rs1;
|
||||
; SM30-NEXT: and.b32 %r13, %r12, 255;
|
||||
; SM30-NEXT: shl.b32 %r3, %r13, %r1;
|
||||
; SM30-NEXT: shl.b32 %r3, %r12, %r1;
|
||||
; SM30-NEXT: shl.b32 %r4, %r7, %r1;
|
||||
; SM30-NEXT: ld.b32 %r14, [%rd1];
|
||||
; SM30-NEXT: and.b32 %r17, %r14, %r2;
|
||||
; SM30-NEXT: ld.b32 %r13, [%rd1];
|
||||
; SM30-NEXT: and.b32 %r16, %r13, %r2;
|
||||
; SM30-NEXT: $L__BB4_1: // %partword.cmpxchg.loop
|
||||
; SM30-NEXT: // =>This Inner Loop Header: Depth=1
|
||||
; SM30-NEXT: or.b32 %r15, %r17, %r3;
|
||||
; SM30-NEXT: or.b32 %r16, %r17, %r4;
|
||||
; SM30-NEXT: atom.cas.b32 %r5, [%rd1], %r16, %r15;
|
||||
; SM30-NEXT: setp.eq.b32 %p1, %r5, %r16;
|
||||
; SM30-NEXT: or.b32 %r14, %r16, %r3;
|
||||
; SM30-NEXT: or.b32 %r15, %r16, %r4;
|
||||
; SM30-NEXT: atom.cas.b32 %r5, [%rd1], %r15, %r14;
|
||||
; SM30-NEXT: setp.eq.b32 %p1, %r5, %r15;
|
||||
; SM30-NEXT: @%p1 bra $L__BB4_3;
|
||||
; SM30-NEXT: // %bb.2: // %partword.cmpxchg.failure
|
||||
; SM30-NEXT: // in Loop: Header=BB4_1 Depth=1
|
||||
; SM30-NEXT: and.b32 %r6, %r5, %r2;
|
||||
; SM30-NEXT: setp.ne.b32 %p2, %r17, %r6;
|
||||
; SM30-NEXT: mov.b32 %r17, %r6;
|
||||
; SM30-NEXT: setp.ne.b32 %p2, %r16, %r6;
|
||||
; SM30-NEXT: mov.b32 %r16, %r6;
|
||||
; SM30-NEXT: @%p2 bra $L__BB4_1;
|
||||
; SM30-NEXT: $L__BB4_3: // %partword.cmpxchg.end
|
||||
; SM30-NEXT: membar.sys;
|
||||
@ -573,7 +564,7 @@ define i8 @seq_cst_sys_i8(ptr %addr, i8 %cmp, i8 %new) {
|
||||
; SM70: {
|
||||
; SM70-NEXT: .reg .pred %p<3>;
|
||||
; SM70-NEXT: .reg .b16 %rs<2>;
|
||||
; SM70-NEXT: .reg .b32 %r<18>;
|
||||
; SM70-NEXT: .reg .b32 %r<17>;
|
||||
; SM70-NEXT: .reg .b64 %rd<3>;
|
||||
; SM70-EMPTY:
|
||||
; SM70-NEXT: // %bb.0:
|
||||
@ -589,23 +580,22 @@ define i8 @seq_cst_sys_i8(ptr %addr, i8 %cmp, i8 %new) {
|
||||
; SM70-NEXT: shl.b32 %r11, %r10, %r1;
|
||||
; SM70-NEXT: not.b32 %r2, %r11;
|
||||
; SM70-NEXT: cvt.u32.u16 %r12, %rs1;
|
||||
; SM70-NEXT: and.b32 %r13, %r12, 255;
|
||||
; SM70-NEXT: shl.b32 %r3, %r13, %r1;
|
||||
; SM70-NEXT: shl.b32 %r3, %r12, %r1;
|
||||
; SM70-NEXT: shl.b32 %r4, %r7, %r1;
|
||||
; SM70-NEXT: ld.b32 %r14, [%rd1];
|
||||
; SM70-NEXT: and.b32 %r17, %r14, %r2;
|
||||
; SM70-NEXT: ld.b32 %r13, [%rd1];
|
||||
; SM70-NEXT: and.b32 %r16, %r13, %r2;
|
||||
; SM70-NEXT: $L__BB4_1: // %partword.cmpxchg.loop
|
||||
; SM70-NEXT: // =>This Inner Loop Header: Depth=1
|
||||
; SM70-NEXT: or.b32 %r15, %r17, %r3;
|
||||
; SM70-NEXT: or.b32 %r16, %r17, %r4;
|
||||
; SM70-NEXT: atom.relaxed.sys.cas.b32 %r5, [%rd1], %r16, %r15;
|
||||
; SM70-NEXT: setp.eq.b32 %p1, %r5, %r16;
|
||||
; SM70-NEXT: or.b32 %r14, %r16, %r3;
|
||||
; SM70-NEXT: or.b32 %r15, %r16, %r4;
|
||||
; SM70-NEXT: atom.relaxed.sys.cas.b32 %r5, [%rd1], %r15, %r14;
|
||||
; SM70-NEXT: setp.eq.b32 %p1, %r5, %r15;
|
||||
; SM70-NEXT: @%p1 bra $L__BB4_3;
|
||||
; SM70-NEXT: // %bb.2: // %partword.cmpxchg.failure
|
||||
; SM70-NEXT: // in Loop: Header=BB4_1 Depth=1
|
||||
; SM70-NEXT: and.b32 %r6, %r5, %r2;
|
||||
; SM70-NEXT: setp.ne.b32 %p2, %r17, %r6;
|
||||
; SM70-NEXT: mov.b32 %r17, %r6;
|
||||
; SM70-NEXT: setp.ne.b32 %p2, %r16, %r6;
|
||||
; SM70-NEXT: mov.b32 %r16, %r6;
|
||||
; SM70-NEXT: @%p2 bra $L__BB4_1;
|
||||
; SM70-NEXT: $L__BB4_3: // %partword.cmpxchg.end
|
||||
; SM70-NEXT: fence.acq_rel.sys;
|
||||
|
@ -185,44 +185,40 @@ define void @test_v4halfp0a1(ptr noalias readonly %from, ptr %to) {
|
||||
define void @s1(ptr %p1, <4 x float> %v) {
|
||||
; CHECK-LABEL: s1(
|
||||
; CHECK: {
|
||||
; CHECK-NEXT: .reg .b32 %r<5>;
|
||||
; CHECK-NEXT: .reg .b64 %rd<18>;
|
||||
; CHECK-NEXT: .reg .b32 %r<17>;
|
||||
; CHECK-NEXT: .reg .b64 %rd<2>;
|
||||
; CHECK-EMPTY:
|
||||
; CHECK-NEXT: // %bb.0:
|
||||
; CHECK-NEXT: ld.param.b64 %rd1, [s1_param_0];
|
||||
; CHECK-NEXT: ld.param.v4.b32 {%r1, %r2, %r3, %r4}, [s1_param_1];
|
||||
; CHECK-NEXT: cvt.u64.u32 %rd2, %r4;
|
||||
; CHECK-NEXT: st.b8 [%rd1+12], %rd2;
|
||||
; CHECK-NEXT: cvt.u64.u32 %rd3, %r3;
|
||||
; CHECK-NEXT: st.b8 [%rd1+8], %rd3;
|
||||
; CHECK-NEXT: cvt.u64.u32 %rd4, %r2;
|
||||
; CHECK-NEXT: st.b8 [%rd1+4], %rd4;
|
||||
; CHECK-NEXT: cvt.u64.u32 %rd5, %r1;
|
||||
; CHECK-NEXT: st.b8 [%rd1], %rd5;
|
||||
; CHECK-NEXT: shr.u64 %rd6, %rd2, 24;
|
||||
; CHECK-NEXT: st.b8 [%rd1+15], %rd6;
|
||||
; CHECK-NEXT: shr.u64 %rd7, %rd2, 16;
|
||||
; CHECK-NEXT: st.b8 [%rd1+14], %rd7;
|
||||
; CHECK-NEXT: shr.u64 %rd8, %rd2, 8;
|
||||
; CHECK-NEXT: st.b8 [%rd1+13], %rd8;
|
||||
; CHECK-NEXT: shr.u64 %rd9, %rd3, 24;
|
||||
; CHECK-NEXT: st.b8 [%rd1+11], %rd9;
|
||||
; CHECK-NEXT: shr.u64 %rd10, %rd3, 16;
|
||||
; CHECK-NEXT: st.b8 [%rd1+10], %rd10;
|
||||
; CHECK-NEXT: shr.u64 %rd11, %rd3, 8;
|
||||
; CHECK-NEXT: st.b8 [%rd1+9], %rd11;
|
||||
; CHECK-NEXT: shr.u64 %rd12, %rd4, 24;
|
||||
; CHECK-NEXT: st.b8 [%rd1+7], %rd12;
|
||||
; CHECK-NEXT: shr.u64 %rd13, %rd4, 16;
|
||||
; CHECK-NEXT: st.b8 [%rd1+6], %rd13;
|
||||
; CHECK-NEXT: shr.u64 %rd14, %rd4, 8;
|
||||
; CHECK-NEXT: st.b8 [%rd1+5], %rd14;
|
||||
; CHECK-NEXT: shr.u64 %rd15, %rd5, 24;
|
||||
; CHECK-NEXT: st.b8 [%rd1+3], %rd15;
|
||||
; CHECK-NEXT: shr.u64 %rd16, %rd5, 16;
|
||||
; CHECK-NEXT: st.b8 [%rd1+2], %rd16;
|
||||
; CHECK-NEXT: shr.u64 %rd17, %rd5, 8;
|
||||
; CHECK-NEXT: st.b8 [%rd1+1], %rd17;
|
||||
; CHECK-NEXT: st.b8 [%rd1+12], %r4;
|
||||
; CHECK-NEXT: st.b8 [%rd1+8], %r3;
|
||||
; CHECK-NEXT: st.b8 [%rd1+4], %r2;
|
||||
; CHECK-NEXT: st.b8 [%rd1], %r1;
|
||||
; CHECK-NEXT: shr.u32 %r5, %r4, 24;
|
||||
; CHECK-NEXT: st.b8 [%rd1+15], %r5;
|
||||
; CHECK-NEXT: shr.u32 %r6, %r4, 16;
|
||||
; CHECK-NEXT: st.b8 [%rd1+14], %r6;
|
||||
; CHECK-NEXT: shr.u32 %r7, %r4, 8;
|
||||
; CHECK-NEXT: st.b8 [%rd1+13], %r7;
|
||||
; CHECK-NEXT: shr.u32 %r8, %r3, 24;
|
||||
; CHECK-NEXT: st.b8 [%rd1+11], %r8;
|
||||
; CHECK-NEXT: shr.u32 %r9, %r3, 16;
|
||||
; CHECK-NEXT: st.b8 [%rd1+10], %r9;
|
||||
; CHECK-NEXT: shr.u32 %r10, %r3, 8;
|
||||
; CHECK-NEXT: st.b8 [%rd1+9], %r10;
|
||||
; CHECK-NEXT: shr.u32 %r11, %r2, 24;
|
||||
; CHECK-NEXT: st.b8 [%rd1+7], %r11;
|
||||
; CHECK-NEXT: shr.u32 %r12, %r2, 16;
|
||||
; CHECK-NEXT: st.b8 [%rd1+6], %r12;
|
||||
; CHECK-NEXT: shr.u32 %r13, %r2, 8;
|
||||
; CHECK-NEXT: st.b8 [%rd1+5], %r13;
|
||||
; CHECK-NEXT: shr.u32 %r14, %r1, 24;
|
||||
; CHECK-NEXT: st.b8 [%rd1+3], %r14;
|
||||
; CHECK-NEXT: shr.u32 %r15, %r1, 16;
|
||||
; CHECK-NEXT: st.b8 [%rd1+2], %r15;
|
||||
; CHECK-NEXT: shr.u32 %r16, %r1, 8;
|
||||
; CHECK-NEXT: st.b8 [%rd1+1], %r16;
|
||||
; CHECK-NEXT: ret;
|
||||
store <4 x float> %v, ptr %p1, align 1
|
||||
ret void
|
||||
|
@ -118,17 +118,15 @@ define i32 @mulwideu8(i8 %a, i8 %b) {
|
||||
; NOOPT-LABEL: mulwideu8(
|
||||
; NOOPT: {
|
||||
; NOOPT-NEXT: .reg .b16 %rs<3>;
|
||||
; NOOPT-NEXT: .reg .b32 %r<6>;
|
||||
; NOOPT-NEXT: .reg .b32 %r<4>;
|
||||
; NOOPT-EMPTY:
|
||||
; NOOPT-NEXT: // %bb.0:
|
||||
; NOOPT-NEXT: ld.param.b8 %rs2, [mulwideu8_param_1];
|
||||
; NOOPT-NEXT: ld.param.b8 %rs1, [mulwideu8_param_0];
|
||||
; NOOPT-NEXT: cvt.u32.u16 %r1, %rs1;
|
||||
; NOOPT-NEXT: and.b32 %r2, %r1, 255;
|
||||
; NOOPT-NEXT: cvt.u32.u16 %r3, %rs2;
|
||||
; NOOPT-NEXT: and.b32 %r4, %r3, 255;
|
||||
; NOOPT-NEXT: mul.lo.s32 %r5, %r2, %r4;
|
||||
; NOOPT-NEXT: st.param.b32 [func_retval0], %r5;
|
||||
; NOOPT-NEXT: cvt.u32.u16 %r2, %rs2;
|
||||
; NOOPT-NEXT: mul.lo.s32 %r3, %r1, %r2;
|
||||
; NOOPT-NEXT: st.param.b32 [func_retval0], %r3;
|
||||
; NOOPT-NEXT: ret;
|
||||
%val0 = zext i8 %a to i32
|
||||
%val1 = zext i8 %b to i32
|
||||
|
Loading…
x
Reference in New Issue
Block a user