Reland "[SelectionDAG] Make (a & x) | (~a & y) -> (a & (x ^ y)) ^ y
available for all targets" (#143651)
This commit is contained in:
parent
14c11e4bcb
commit
24d730b380
@ -8128,6 +8128,59 @@ static SDValue visitORCommutative(SelectionDAG &DAG, SDValue N0, SDValue N1,
|
||||
return SDValue();
|
||||
}
|
||||
|
||||
static SDValue foldMaskedMergeImpl(SDValue AndL0, SDValue AndR0, SDValue AndL1,
|
||||
SDValue AndR1, const SDLoc &DL,
|
||||
SelectionDAG &DAG) {
|
||||
if (!isBitwiseNot(AndL0, true) || !AndL0->hasOneUse())
|
||||
return SDValue();
|
||||
SDValue NotOp = AndL0->getOperand(0);
|
||||
if (NotOp == AndR1)
|
||||
std::swap(AndR1, AndL1);
|
||||
if (NotOp != AndL1)
|
||||
return SDValue();
|
||||
|
||||
EVT VT = AndL1.getValueType();
|
||||
SDValue Xor0 = DAG.getNode(ISD::XOR, DL, VT, AndR1, AndR0);
|
||||
SDValue And = DAG.getNode(ISD::AND, DL, VT, Xor0, NotOp);
|
||||
SDValue Xor1 = DAG.getNode(ISD::XOR, DL, VT, And, AndR0);
|
||||
return Xor1;
|
||||
}
|
||||
|
||||
/// Fold "masked merge" expressions like `(m & x) | (~m & y)` into the
|
||||
/// equivalent `((x ^ y) & m) ^ y)` pattern.
|
||||
/// This is typically a better representation for targets without a fused
|
||||
/// "and-not" operation.
|
||||
static SDValue foldMaskedMerge(SDNode *Node, SelectionDAG &DAG,
|
||||
const TargetLowering &TLI, const SDLoc &DL) {
|
||||
// Note that masked-merge variants using XOR or ADD expressions are
|
||||
// normalized to OR by InstCombine so we only check for OR.
|
||||
assert(Node->getOpcode() == ISD::OR && "Must be called with ISD::OR node");
|
||||
SDValue N0 = Node->getOperand(0);
|
||||
if (N0->getOpcode() != ISD::AND || !N0->hasOneUse())
|
||||
return SDValue();
|
||||
SDValue N1 = Node->getOperand(1);
|
||||
if (N1->getOpcode() != ISD::AND || !N1->hasOneUse())
|
||||
return SDValue();
|
||||
|
||||
// If the target supports and-not, don't fold this.
|
||||
if (TLI.hasAndNot(SDValue(Node, 0)))
|
||||
return SDValue();
|
||||
|
||||
SDValue N00 = N0->getOperand(0);
|
||||
SDValue N01 = N0->getOperand(1);
|
||||
SDValue N10 = N1->getOperand(0);
|
||||
SDValue N11 = N1->getOperand(1);
|
||||
if (SDValue Result = foldMaskedMergeImpl(N00, N01, N10, N11, DL, DAG))
|
||||
return Result;
|
||||
if (SDValue Result = foldMaskedMergeImpl(N01, N00, N10, N11, DL, DAG))
|
||||
return Result;
|
||||
if (SDValue Result = foldMaskedMergeImpl(N10, N11, N00, N01, DL, DAG))
|
||||
return Result;
|
||||
if (SDValue Result = foldMaskedMergeImpl(N11, N10, N00, N01, DL, DAG))
|
||||
return Result;
|
||||
return SDValue();
|
||||
}
|
||||
|
||||
SDValue DAGCombiner::visitOR(SDNode *N) {
|
||||
SDValue N0 = N->getOperand(0);
|
||||
SDValue N1 = N->getOperand(1);
|
||||
@ -8306,6 +8359,10 @@ SDValue DAGCombiner::visitOR(SDNode *N) {
|
||||
if (SDValue R = foldLogicTreeOfShifts(N, N0, N1, DAG))
|
||||
return R;
|
||||
|
||||
if (VT.isScalarInteger() && VT != MVT::i1)
|
||||
if (SDValue R = foldMaskedMerge(N, DAG, TLI, DL))
|
||||
return R;
|
||||
|
||||
return SDValue();
|
||||
}
|
||||
|
||||
|
@ -1283,6 +1283,20 @@ bool SystemZTargetLowering::allowsMisalignedMemoryAccesses(
|
||||
return true;
|
||||
}
|
||||
|
||||
bool SystemZTargetLowering::hasAndNot(SDValue Y) const {
|
||||
EVT VT = Y.getValueType();
|
||||
|
||||
// We can use NC(G)RK for types in GPRs ...
|
||||
if (VT == MVT::i32 || VT == MVT::i64)
|
||||
return Subtarget.hasMiscellaneousExtensions3();
|
||||
|
||||
// ... or VNC for types in VRs.
|
||||
if (VT.isVector() || VT == MVT::i128)
|
||||
return Subtarget.hasVector();
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
// Information about the addressing mode for a memory access.
|
||||
struct AddressingMode {
|
||||
// True if a long displacement is supported.
|
||||
|
@ -671,6 +671,7 @@ public:
|
||||
}
|
||||
|
||||
unsigned getStackProbeSize(const MachineFunction &MF) const;
|
||||
bool hasAndNot(SDValue Y) const override;
|
||||
|
||||
private:
|
||||
const SystemZSubtarget &Subtarget;
|
||||
|
@ -52350,59 +52350,6 @@ static SDValue combineOrCmpEqZeroToCtlzSrl(SDNode *N, SelectionDAG &DAG,
|
||||
return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), N->getValueType(0), Ret);
|
||||
}
|
||||
|
||||
static SDValue foldMaskedMergeImpl(SDValue And0_L, SDValue And0_R,
|
||||
SDValue And1_L, SDValue And1_R,
|
||||
const SDLoc &DL, SelectionDAG &DAG) {
|
||||
if (!isBitwiseNot(And0_L, true) || !And0_L->hasOneUse())
|
||||
return SDValue();
|
||||
SDValue NotOp = And0_L->getOperand(0);
|
||||
if (NotOp == And1_R)
|
||||
std::swap(And1_R, And1_L);
|
||||
if (NotOp != And1_L)
|
||||
return SDValue();
|
||||
|
||||
// (~(NotOp) & And0_R) | (NotOp & And1_R)
|
||||
// --> ((And0_R ^ And1_R) & NotOp) ^ And1_R
|
||||
EVT VT = And1_L->getValueType(0);
|
||||
SDValue Freeze_And0_R = DAG.getNode(ISD::FREEZE, SDLoc(), VT, And0_R);
|
||||
SDValue Xor0 = DAG.getNode(ISD::XOR, DL, VT, And1_R, Freeze_And0_R);
|
||||
SDValue And = DAG.getNode(ISD::AND, DL, VT, Xor0, NotOp);
|
||||
SDValue Xor1 = DAG.getNode(ISD::XOR, DL, VT, And, Freeze_And0_R);
|
||||
return Xor1;
|
||||
}
|
||||
|
||||
/// Fold "masked merge" expressions like `(m & x) | (~m & y)` into the
|
||||
/// equivalent `((x ^ y) & m) ^ y)` pattern.
|
||||
/// This is typically a better representation for targets without a fused
|
||||
/// "and-not" operation. This function is intended to be called from a
|
||||
/// `TargetLowering::PerformDAGCombine` callback on `ISD::OR` nodes.
|
||||
static SDValue foldMaskedMerge(SDNode *Node, SelectionDAG &DAG) {
|
||||
// Note that masked-merge variants using XOR or ADD expressions are
|
||||
// normalized to OR by InstCombine so we only check for OR.
|
||||
assert(Node->getOpcode() == ISD::OR && "Must be called with ISD::OR node");
|
||||
SDValue N0 = Node->getOperand(0);
|
||||
if (N0->getOpcode() != ISD::AND || !N0->hasOneUse())
|
||||
return SDValue();
|
||||
SDValue N1 = Node->getOperand(1);
|
||||
if (N1->getOpcode() != ISD::AND || !N1->hasOneUse())
|
||||
return SDValue();
|
||||
|
||||
SDLoc DL(Node);
|
||||
SDValue N00 = N0->getOperand(0);
|
||||
SDValue N01 = N0->getOperand(1);
|
||||
SDValue N10 = N1->getOperand(0);
|
||||
SDValue N11 = N1->getOperand(1);
|
||||
if (SDValue Result = foldMaskedMergeImpl(N00, N01, N10, N11, DL, DAG))
|
||||
return Result;
|
||||
if (SDValue Result = foldMaskedMergeImpl(N01, N00, N10, N11, DL, DAG))
|
||||
return Result;
|
||||
if (SDValue Result = foldMaskedMergeImpl(N10, N11, N00, N01, DL, DAG))
|
||||
return Result;
|
||||
if (SDValue Result = foldMaskedMergeImpl(N11, N10, N00, N01, DL, DAG))
|
||||
return Result;
|
||||
return SDValue();
|
||||
}
|
||||
|
||||
/// If this is an add or subtract where one operand is produced by a cmp+setcc,
|
||||
/// then try to convert it to an ADC or SBB. This replaces TEST+SET+{ADD/SUB}
|
||||
/// with CMP+{ADC, SBB}.
|
||||
@ -52806,11 +52753,6 @@ static SDValue combineOr(SDNode *N, SelectionDAG &DAG,
|
||||
}
|
||||
}
|
||||
|
||||
// We should fold "masked merge" patterns when `andn` is not available.
|
||||
if (!Subtarget.hasBMI() && VT.isScalarInteger() && VT != MVT::i1)
|
||||
if (SDValue R = foldMaskedMerge(N, DAG))
|
||||
return R;
|
||||
|
||||
if (SDValue R = combineOrXorWithSETCC(N->getOpcode(), dl, VT, N0, N1, DAG))
|
||||
return R;
|
||||
|
||||
|
@ -16,9 +16,9 @@ define amdgpu_kernel void @s_bfi_def_i32(ptr addrspace(1) %out, i32 %x, i32 %y,
|
||||
; GFX7-NEXT: s_mov_b32 s7, 0xf000
|
||||
; GFX7-NEXT: s_mov_b32 s6, -1
|
||||
; GFX7-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX7-NEXT: s_andn2_b32 s2, s2, s0
|
||||
; GFX7-NEXT: s_xor_b32 s1, s1, s2
|
||||
; GFX7-NEXT: s_and_b32 s0, s1, s0
|
||||
; GFX7-NEXT: s_or_b32 s0, s2, s0
|
||||
; GFX7-NEXT: s_xor_b32 s0, s0, s2
|
||||
; GFX7-NEXT: v_mov_b32_e32 v0, s0
|
||||
; GFX7-NEXT: buffer_store_dword v0, off, s[4:7], 0
|
||||
; GFX7-NEXT: s_endpgm
|
||||
@ -28,9 +28,9 @@ define amdgpu_kernel void @s_bfi_def_i32(ptr addrspace(1) %out, i32 %x, i32 %y,
|
||||
; GFX8-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x2c
|
||||
; GFX8-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x24
|
||||
; GFX8-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX8-NEXT: s_andn2_b32 s2, s2, s0
|
||||
; GFX8-NEXT: s_xor_b32 s1, s1, s2
|
||||
; GFX8-NEXT: s_and_b32 s0, s1, s0
|
||||
; GFX8-NEXT: s_or_b32 s0, s2, s0
|
||||
; GFX8-NEXT: s_xor_b32 s0, s0, s2
|
||||
; GFX8-NEXT: v_mov_b32_e32 v0, s4
|
||||
; GFX8-NEXT: v_mov_b32_e32 v1, s5
|
||||
; GFX8-NEXT: v_mov_b32_e32 v2, s0
|
||||
@ -44,9 +44,9 @@ define amdgpu_kernel void @s_bfi_def_i32(ptr addrspace(1) %out, i32 %x, i32 %y,
|
||||
; GFX10-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x24
|
||||
; GFX10-NEXT: v_mov_b32_e32 v0, 0
|
||||
; GFX10-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX10-NEXT: s_andn2_b32 s2, s2, s0
|
||||
; GFX10-NEXT: s_xor_b32 s1, s1, s2
|
||||
; GFX10-NEXT: s_and_b32 s0, s1, s0
|
||||
; GFX10-NEXT: s_or_b32 s0, s2, s0
|
||||
; GFX10-NEXT: s_xor_b32 s0, s0, s2
|
||||
; GFX10-NEXT: v_mov_b32_e32 v1, s0
|
||||
; GFX10-NEXT: global_store_dword v0, v1, s[4:5]
|
||||
; GFX10-NEXT: s_endpgm
|
||||
@ -1407,9 +1407,9 @@ define amdgpu_kernel void @s_bitselect_i64_pat_0(i64 %a, i64 %b, i64 %mask) {
|
||||
; GFX7-NEXT: s_mov_b32 s7, 0xf000
|
||||
; GFX7-NEXT: s_mov_b32 s6, -1
|
||||
; GFX7-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX7-NEXT: s_and_b64 s[2:3], s[0:1], s[2:3]
|
||||
; GFX7-NEXT: s_andn2_b64 s[0:1], s[4:5], s[0:1]
|
||||
; GFX7-NEXT: s_or_b64 s[0:1], s[2:3], s[0:1]
|
||||
; GFX7-NEXT: s_xor_b64 s[2:3], s[2:3], s[4:5]
|
||||
; GFX7-NEXT: s_and_b64 s[0:1], s[2:3], s[0:1]
|
||||
; GFX7-NEXT: s_xor_b64 s[0:1], s[0:1], s[4:5]
|
||||
; GFX7-NEXT: s_add_u32 s0, s0, 10
|
||||
; GFX7-NEXT: s_addc_u32 s1, s1, 0
|
||||
; GFX7-NEXT: v_mov_b32_e32 v0, s0
|
||||
@ -1422,9 +1422,9 @@ define amdgpu_kernel void @s_bitselect_i64_pat_0(i64 %a, i64 %b, i64 %mask) {
|
||||
; GFX8-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
|
||||
; GFX8-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x34
|
||||
; GFX8-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX8-NEXT: s_and_b64 s[2:3], s[0:1], s[2:3]
|
||||
; GFX8-NEXT: s_andn2_b64 s[0:1], s[4:5], s[0:1]
|
||||
; GFX8-NEXT: s_or_b64 s[0:1], s[2:3], s[0:1]
|
||||
; GFX8-NEXT: s_xor_b64 s[2:3], s[2:3], s[4:5]
|
||||
; GFX8-NEXT: s_and_b64 s[0:1], s[2:3], s[0:1]
|
||||
; GFX8-NEXT: s_xor_b64 s[0:1], s[0:1], s[4:5]
|
||||
; GFX8-NEXT: s_add_u32 s0, s0, 10
|
||||
; GFX8-NEXT: s_addc_u32 s1, s1, 0
|
||||
; GFX8-NEXT: v_mov_b32_e32 v0, s0
|
||||
@ -1438,9 +1438,9 @@ define amdgpu_kernel void @s_bitselect_i64_pat_0(i64 %a, i64 %b, i64 %mask) {
|
||||
; GFX10-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
|
||||
; GFX10-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x34
|
||||
; GFX10-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX10-NEXT: s_and_b64 s[2:3], s[0:1], s[2:3]
|
||||
; GFX10-NEXT: s_andn2_b64 s[0:1], s[4:5], s[0:1]
|
||||
; GFX10-NEXT: s_or_b64 s[0:1], s[2:3], s[0:1]
|
||||
; GFX10-NEXT: s_xor_b64 s[2:3], s[2:3], s[4:5]
|
||||
; GFX10-NEXT: s_and_b64 s[0:1], s[2:3], s[0:1]
|
||||
; GFX10-NEXT: s_xor_b64 s[0:1], s[0:1], s[4:5]
|
||||
; GFX10-NEXT: s_add_u32 s0, s0, 10
|
||||
; GFX10-NEXT: s_addc_u32 s1, s1, 0
|
||||
; GFX10-NEXT: v_mov_b32_e32 v0, s0
|
||||
|
@ -289,16 +289,16 @@ entry:
|
||||
define amdgpu_kernel void @half4_inselt(ptr addrspace(1) %out, <4 x half> %vec, i32 %sel) {
|
||||
; GCN-LABEL: half4_inselt:
|
||||
; GCN: ; %bb.0: ; %entry
|
||||
; GCN-NEXT: s_load_dword s6, s[4:5], 0x34
|
||||
; GCN-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
|
||||
; GCN-NEXT: s_load_dword s6, s[4:5], 0x34
|
||||
; GCN-NEXT: s_mov_b32 s4, 0x3c003c00
|
||||
; GCN-NEXT: s_mov_b32 s5, s4
|
||||
; GCN-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GCN-NEXT: s_xor_b64 s[4:5], s[2:3], s[4:5]
|
||||
; GCN-NEXT: s_lshl_b32 s6, s6, 4
|
||||
; GCN-NEXT: s_lshl_b64 s[6:7], 0xffff, s6
|
||||
; GCN-NEXT: s_andn2_b64 s[2:3], s[2:3], s[6:7]
|
||||
; GCN-NEXT: s_and_b64 s[4:5], s[6:7], s[4:5]
|
||||
; GCN-NEXT: s_or_b64 s[2:3], s[4:5], s[2:3]
|
||||
; GCN-NEXT: s_and_b64 s[4:5], s[4:5], s[6:7]
|
||||
; GCN-NEXT: s_xor_b64 s[2:3], s[4:5], s[2:3]
|
||||
; GCN-NEXT: v_mov_b32_e32 v0, s0
|
||||
; GCN-NEXT: v_mov_b32_e32 v2, s2
|
||||
; GCN-NEXT: v_mov_b32_e32 v1, s1
|
||||
@ -317,10 +317,10 @@ define amdgpu_kernel void @half2_inselt(ptr addrspace(1) %out, <2 x half> %vec,
|
||||
; GCN-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
|
||||
; GCN-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GCN-NEXT: s_lshl_b32 s3, s3, 4
|
||||
; GCN-NEXT: s_xor_b32 s4, s2, 0x3c003c00
|
||||
; GCN-NEXT: s_lshl_b32 s3, 0xffff, s3
|
||||
; GCN-NEXT: s_andn2_b32 s2, s2, s3
|
||||
; GCN-NEXT: s_and_b32 s3, s3, 0x3c003c00
|
||||
; GCN-NEXT: s_or_b32 s2, s3, s2
|
||||
; GCN-NEXT: s_and_b32 s3, s4, s3
|
||||
; GCN-NEXT: s_xor_b32 s2, s3, s2
|
||||
; GCN-NEXT: v_mov_b32_e32 v0, s0
|
||||
; GCN-NEXT: v_mov_b32_e32 v1, s1
|
||||
; GCN-NEXT: v_mov_b32_e32 v2, s2
|
||||
@ -399,10 +399,10 @@ define amdgpu_kernel void @short2_inselt(ptr addrspace(1) %out, <2 x i16> %vec,
|
||||
; GCN-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
|
||||
; GCN-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GCN-NEXT: s_lshl_b32 s3, s3, 4
|
||||
; GCN-NEXT: s_xor_b32 s4, s2, 0x10001
|
||||
; GCN-NEXT: s_lshl_b32 s3, 0xffff, s3
|
||||
; GCN-NEXT: s_andn2_b32 s2, s2, s3
|
||||
; GCN-NEXT: s_and_b32 s3, s3, 0x10001
|
||||
; GCN-NEXT: s_or_b32 s2, s3, s2
|
||||
; GCN-NEXT: s_and_b32 s3, s4, s3
|
||||
; GCN-NEXT: s_xor_b32 s2, s3, s2
|
||||
; GCN-NEXT: v_mov_b32_e32 v0, s0
|
||||
; GCN-NEXT: v_mov_b32_e32 v1, s1
|
||||
; GCN-NEXT: v_mov_b32_e32 v2, s2
|
||||
@ -417,16 +417,16 @@ entry:
|
||||
define amdgpu_kernel void @short4_inselt(ptr addrspace(1) %out, <4 x i16> %vec, i32 %sel) {
|
||||
; GCN-LABEL: short4_inselt:
|
||||
; GCN: ; %bb.0: ; %entry
|
||||
; GCN-NEXT: s_load_dword s6, s[4:5], 0x34
|
||||
; GCN-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
|
||||
; GCN-NEXT: s_load_dword s6, s[4:5], 0x34
|
||||
; GCN-NEXT: s_mov_b32 s4, 0x10001
|
||||
; GCN-NEXT: s_mov_b32 s5, s4
|
||||
; GCN-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GCN-NEXT: s_xor_b64 s[4:5], s[2:3], s[4:5]
|
||||
; GCN-NEXT: s_lshl_b32 s6, s6, 4
|
||||
; GCN-NEXT: s_lshl_b64 s[6:7], 0xffff, s6
|
||||
; GCN-NEXT: s_andn2_b64 s[2:3], s[2:3], s[6:7]
|
||||
; GCN-NEXT: s_and_b64 s[4:5], s[6:7], s[4:5]
|
||||
; GCN-NEXT: s_or_b64 s[2:3], s[4:5], s[2:3]
|
||||
; GCN-NEXT: s_and_b64 s[4:5], s[4:5], s[6:7]
|
||||
; GCN-NEXT: s_xor_b64 s[2:3], s[4:5], s[2:3]
|
||||
; GCN-NEXT: v_mov_b32_e32 v0, s0
|
||||
; GCN-NEXT: v_mov_b32_e32 v2, s2
|
||||
; GCN-NEXT: v_mov_b32_e32 v1, s1
|
||||
@ -442,15 +442,15 @@ entry:
|
||||
define amdgpu_kernel void @byte8_inselt(ptr addrspace(1) %out, <8 x i8> %vec, i32 %sel) {
|
||||
; GCN-LABEL: byte8_inselt:
|
||||
; GCN: ; %bb.0: ; %entry
|
||||
; GCN-NEXT: s_load_dword s6, s[4:5], 0x34
|
||||
; GCN-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
|
||||
; GCN-NEXT: s_load_dword s6, s[4:5], 0x34
|
||||
; GCN-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GCN-NEXT: s_lshl_b32 s4, s6, 3
|
||||
; GCN-NEXT: s_lshl_b64 s[4:5], 0xff, s4
|
||||
; GCN-NEXT: s_and_b32 s7, s5, 0x1010101
|
||||
; GCN-NEXT: s_and_b32 s6, s4, 0x1010101
|
||||
; GCN-NEXT: s_andn2_b64 s[2:3], s[2:3], s[4:5]
|
||||
; GCN-NEXT: s_or_b64 s[2:3], s[6:7], s[2:3]
|
||||
; GCN-NEXT: s_xor_b32 s5, s3, 0x1010101
|
||||
; GCN-NEXT: s_lshl_b32 s6, s6, 3
|
||||
; GCN-NEXT: s_xor_b32 s4, s2, 0x1010101
|
||||
; GCN-NEXT: s_lshl_b64 s[6:7], 0xff, s6
|
||||
; GCN-NEXT: s_and_b64 s[4:5], s[4:5], s[6:7]
|
||||
; GCN-NEXT: s_xor_b64 s[2:3], s[4:5], s[2:3]
|
||||
; GCN-NEXT: v_mov_b32_e32 v0, s0
|
||||
; GCN-NEXT: v_mov_b32_e32 v2, s2
|
||||
; GCN-NEXT: v_mov_b32_e32 v1, s1
|
||||
|
@ -1511,13 +1511,13 @@ define amdgpu_kernel void @dynamic_insertelement_v2i16(ptr addrspace(1) %out, <2
|
||||
; SI-NEXT: s_mov_b32 s7, 0x100f000
|
||||
; SI-NEXT: s_mov_b32 s6, -1
|
||||
; SI-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; SI-NEXT: s_mov_b32 s4, s0
|
||||
; SI-NEXT: s_lshl_b32 s0, s3, 4
|
||||
; SI-NEXT: s_lshl_b32 s0, 0xffff, s0
|
||||
; SI-NEXT: s_mov_b32 s5, s1
|
||||
; SI-NEXT: s_andn2_b32 s1, s2, s0
|
||||
; SI-NEXT: s_and_b32 s0, s0, 0x50005
|
||||
; SI-NEXT: s_or_b32 s0, s0, s1
|
||||
; SI-NEXT: s_lshl_b32 s1, s3, 4
|
||||
; SI-NEXT: s_mov_b32 s4, s0
|
||||
; SI-NEXT: s_xor_b32 s0, s2, 0x50005
|
||||
; SI-NEXT: s_lshl_b32 s1, 0xffff, s1
|
||||
; SI-NEXT: s_and_b32 s0, s0, s1
|
||||
; SI-NEXT: s_xor_b32 s0, s0, s2
|
||||
; SI-NEXT: v_mov_b32_e32 v0, s0
|
||||
; SI-NEXT: buffer_store_dword v0, off, s[4:7], 0
|
||||
; SI-NEXT: s_endpgm
|
||||
@ -1528,13 +1528,13 @@ define amdgpu_kernel void @dynamic_insertelement_v2i16(ptr addrspace(1) %out, <2
|
||||
; VI-NEXT: s_mov_b32 s7, 0x1100f000
|
||||
; VI-NEXT: s_mov_b32 s6, -1
|
||||
; VI-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; VI-NEXT: s_mov_b32 s4, s0
|
||||
; VI-NEXT: s_lshl_b32 s0, s3, 4
|
||||
; VI-NEXT: s_lshl_b32 s0, 0xffff, s0
|
||||
; VI-NEXT: s_mov_b32 s5, s1
|
||||
; VI-NEXT: s_andn2_b32 s1, s2, s0
|
||||
; VI-NEXT: s_and_b32 s0, s0, 0x50005
|
||||
; VI-NEXT: s_or_b32 s0, s0, s1
|
||||
; VI-NEXT: s_lshl_b32 s1, s3, 4
|
||||
; VI-NEXT: s_mov_b32 s4, s0
|
||||
; VI-NEXT: s_xor_b32 s0, s2, 0x50005
|
||||
; VI-NEXT: s_lshl_b32 s1, 0xffff, s1
|
||||
; VI-NEXT: s_and_b32 s0, s0, s1
|
||||
; VI-NEXT: s_xor_b32 s0, s0, s2
|
||||
; VI-NEXT: v_mov_b32_e32 v0, s0
|
||||
; VI-NEXT: buffer_store_dword v0, off, s[4:7], 0
|
||||
; VI-NEXT: s_endpgm
|
||||
@ -1552,13 +1552,13 @@ define amdgpu_kernel void @dynamic_insertelement_v3i16(ptr addrspace(1) %out, <3
|
||||
; SI-NEXT: s_mov_b32 s6, -1
|
||||
; SI-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; SI-NEXT: s_mov_b32 s4, s0
|
||||
; SI-NEXT: s_lshl_b32 s0, s8, 4
|
||||
; SI-NEXT: s_lshl_b32 s8, s8, 4
|
||||
; SI-NEXT: s_mov_b32 s5, s1
|
||||
; SI-NEXT: s_lshl_b64 s[0:1], 0xffff, s0
|
||||
; SI-NEXT: s_and_b32 s9, s1, 0x50005
|
||||
; SI-NEXT: s_and_b32 s8, s0, 0x50005
|
||||
; SI-NEXT: s_andn2_b64 s[0:1], s[2:3], s[0:1]
|
||||
; SI-NEXT: s_or_b64 s[0:1], s[8:9], s[0:1]
|
||||
; SI-NEXT: s_xor_b32 s1, s3, 0x50005
|
||||
; SI-NEXT: s_xor_b32 s0, s2, 0x50005
|
||||
; SI-NEXT: s_lshl_b64 s[8:9], 0xffff, s8
|
||||
; SI-NEXT: s_and_b64 s[0:1], s[0:1], s[8:9]
|
||||
; SI-NEXT: s_xor_b64 s[0:1], s[0:1], s[2:3]
|
||||
; SI-NEXT: v_mov_b32_e32 v0, s1
|
||||
; SI-NEXT: buffer_store_short v0, off, s[4:7], 0 offset:4
|
||||
; SI-NEXT: v_mov_b32_e32 v0, s0
|
||||
@ -1573,14 +1573,14 @@ define amdgpu_kernel void @dynamic_insertelement_v3i16(ptr addrspace(1) %out, <3
|
||||
; VI-NEXT: s_mov_b32 s6, -1
|
||||
; VI-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; VI-NEXT: s_mov_b32 s4, s0
|
||||
; VI-NEXT: s_lshl_b32 s0, s8, 4
|
||||
; VI-NEXT: s_mov_b32 s8, 0x50005
|
||||
; VI-NEXT: s_mov_b32 s0, 0x50005
|
||||
; VI-NEXT: s_mov_b32 s5, s1
|
||||
; VI-NEXT: s_lshl_b64 s[0:1], 0xffff, s0
|
||||
; VI-NEXT: s_mov_b32 s9, s8
|
||||
; VI-NEXT: s_andn2_b64 s[2:3], s[2:3], s[0:1]
|
||||
; VI-NEXT: s_mov_b32 s1, s0
|
||||
; VI-NEXT: s_lshl_b32 s8, s8, 4
|
||||
; VI-NEXT: s_xor_b64 s[0:1], s[2:3], s[0:1]
|
||||
; VI-NEXT: s_lshl_b64 s[8:9], 0xffff, s8
|
||||
; VI-NEXT: s_and_b64 s[0:1], s[0:1], s[8:9]
|
||||
; VI-NEXT: s_or_b64 s[0:1], s[0:1], s[2:3]
|
||||
; VI-NEXT: s_xor_b64 s[0:1], s[0:1], s[2:3]
|
||||
; VI-NEXT: v_mov_b32_e32 v0, s1
|
||||
; VI-NEXT: buffer_store_short v0, off, s[4:7], 0 offset:4
|
||||
; VI-NEXT: v_mov_b32_e32 v0, s0
|
||||
@ -1594,35 +1594,34 @@ define amdgpu_kernel void @dynamic_insertelement_v3i16(ptr addrspace(1) %out, <3
|
||||
define amdgpu_kernel void @dynamic_insertelement_v2i8(ptr addrspace(1) %out, [8 x i32], <2 x i8> %a, [8 x i32], i32 %b) nounwind {
|
||||
; SI-LABEL: dynamic_insertelement_v2i8:
|
||||
; SI: ; %bb.0:
|
||||
; SI-NEXT: s_load_dword s4, s[8:9], 0x13
|
||||
; SI-NEXT: s_load_dword s4, s[8:9], 0xa
|
||||
; SI-NEXT: s_load_dword s5, s[8:9], 0x13
|
||||
; SI-NEXT: s_load_dwordx2 s[0:1], s[8:9], 0x0
|
||||
; SI-NEXT: s_load_dword s5, s[8:9], 0xa
|
||||
; SI-NEXT: s_mov_b32 s3, 0x100f000
|
||||
; SI-NEXT: s_mov_b32 s2, -1
|
||||
; SI-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; SI-NEXT: s_lshl_b32 s4, s4, 3
|
||||
; SI-NEXT: s_lshl_b32 s4, 0xff, s4
|
||||
; SI-NEXT: s_andn2_b32 s5, s5, s4
|
||||
; SI-NEXT: s_and_b32 s4, s4, 0x505
|
||||
; SI-NEXT: s_or_b32 s4, s4, s5
|
||||
; SI-NEXT: s_xor_b32 s6, s4, 0x505
|
||||
; SI-NEXT: s_lshl_b32 s5, s5, 3
|
||||
; SI-NEXT: s_lshl_b32 s5, 0xff, s5
|
||||
; SI-NEXT: s_and_b32 s5, s6, s5
|
||||
; SI-NEXT: s_xor_b32 s4, s5, s4
|
||||
; SI-NEXT: v_mov_b32_e32 v0, s4
|
||||
; SI-NEXT: buffer_store_short v0, off, s[0:3], 0
|
||||
; SI-NEXT: s_endpgm
|
||||
;
|
||||
; VI-LABEL: dynamic_insertelement_v2i8:
|
||||
; VI: ; %bb.0:
|
||||
; VI-NEXT: s_load_dword s4, s[8:9], 0x4c
|
||||
; VI-NEXT: s_load_dword s4, s[8:9], 0x28
|
||||
; VI-NEXT: s_load_dword s5, s[8:9], 0x4c
|
||||
; VI-NEXT: s_load_dwordx2 s[0:1], s[8:9], 0x0
|
||||
; VI-NEXT: s_load_dword s5, s[8:9], 0x28
|
||||
; VI-NEXT: s_mov_b32 s3, 0x1100f000
|
||||
; VI-NEXT: s_mov_b32 s2, -1
|
||||
; VI-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; VI-NEXT: s_lshl_b32 s4, s4, 3
|
||||
; VI-NEXT: s_lshl_b32 s4, 0xff, s4
|
||||
; VI-NEXT: s_and_b32 s6, s4, 0x505
|
||||
; VI-NEXT: s_xor_b32 s4, s4, 0xffff
|
||||
; VI-NEXT: s_and_b32 s4, s4, s5
|
||||
; VI-NEXT: s_or_b32 s4, s6, s4
|
||||
; VI-NEXT: s_xor_b32 s6, s4, 0x505
|
||||
; VI-NEXT: s_lshl_b32 s5, s5, 3
|
||||
; VI-NEXT: s_lshl_b32 s5, 0xff, s5
|
||||
; VI-NEXT: s_and_b32 s5, s6, s5
|
||||
; VI-NEXT: s_xor_b32 s4, s5, s4
|
||||
; VI-NEXT: v_mov_b32_e32 v0, s4
|
||||
; VI-NEXT: buffer_store_short v0, off, s[0:3], 0
|
||||
; VI-NEXT: s_endpgm
|
||||
@ -1636,17 +1635,17 @@ define amdgpu_kernel void @dynamic_insertelement_v2i8(ptr addrspace(1) %out, [8
|
||||
define amdgpu_kernel void @dynamic_insertelement_v3i8(ptr addrspace(1) %out, [8 x i32], <3 x i8> %a, [8 x i32], i32 %b) nounwind {
|
||||
; SI-LABEL: dynamic_insertelement_v3i8:
|
||||
; SI: ; %bb.0:
|
||||
; SI-NEXT: s_load_dword s4, s[8:9], 0x13
|
||||
; SI-NEXT: s_load_dword s4, s[8:9], 0xa
|
||||
; SI-NEXT: s_load_dword s5, s[8:9], 0x13
|
||||
; SI-NEXT: s_load_dwordx2 s[0:1], s[8:9], 0x0
|
||||
; SI-NEXT: s_load_dword s5, s[8:9], 0xa
|
||||
; SI-NEXT: s_mov_b32 s3, 0x100f000
|
||||
; SI-NEXT: s_mov_b32 s2, -1
|
||||
; SI-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; SI-NEXT: s_lshl_b32 s4, s4, 3
|
||||
; SI-NEXT: s_lshl_b32 s4, 0xff, s4
|
||||
; SI-NEXT: s_andn2_b32 s5, s5, s4
|
||||
; SI-NEXT: s_and_b32 s4, s4, 0x5050505
|
||||
; SI-NEXT: s_or_b32 s4, s4, s5
|
||||
; SI-NEXT: s_xor_b32 s6, s4, 0x5050505
|
||||
; SI-NEXT: s_lshl_b32 s5, s5, 3
|
||||
; SI-NEXT: s_lshl_b32 s5, 0xff, s5
|
||||
; SI-NEXT: s_and_b32 s5, s6, s5
|
||||
; SI-NEXT: s_xor_b32 s4, s5, s4
|
||||
; SI-NEXT: s_lshr_b32 s5, s4, 16
|
||||
; SI-NEXT: v_mov_b32_e32 v0, s4
|
||||
; SI-NEXT: buffer_store_short v0, off, s[0:3], 0
|
||||
@ -1656,17 +1655,17 @@ define amdgpu_kernel void @dynamic_insertelement_v3i8(ptr addrspace(1) %out, [8
|
||||
;
|
||||
; VI-LABEL: dynamic_insertelement_v3i8:
|
||||
; VI: ; %bb.0:
|
||||
; VI-NEXT: s_load_dword s4, s[8:9], 0x4c
|
||||
; VI-NEXT: s_load_dword s4, s[8:9], 0x28
|
||||
; VI-NEXT: s_load_dword s5, s[8:9], 0x4c
|
||||
; VI-NEXT: s_load_dwordx2 s[0:1], s[8:9], 0x0
|
||||
; VI-NEXT: s_load_dword s5, s[8:9], 0x28
|
||||
; VI-NEXT: s_mov_b32 s3, 0x1100f000
|
||||
; VI-NEXT: s_mov_b32 s2, -1
|
||||
; VI-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; VI-NEXT: s_lshl_b32 s4, s4, 3
|
||||
; VI-NEXT: s_lshl_b32 s4, 0xff, s4
|
||||
; VI-NEXT: s_andn2_b32 s5, s5, s4
|
||||
; VI-NEXT: s_and_b32 s4, s4, 0x5050505
|
||||
; VI-NEXT: s_or_b32 s4, s4, s5
|
||||
; VI-NEXT: s_xor_b32 s6, s4, 0x5050505
|
||||
; VI-NEXT: s_lshl_b32 s5, s5, 3
|
||||
; VI-NEXT: s_lshl_b32 s5, 0xff, s5
|
||||
; VI-NEXT: s_and_b32 s5, s6, s5
|
||||
; VI-NEXT: s_xor_b32 s4, s5, s4
|
||||
; VI-NEXT: s_lshr_b32 s5, s4, 16
|
||||
; VI-NEXT: v_mov_b32_e32 v0, s4
|
||||
; VI-NEXT: buffer_store_short v0, off, s[0:3], 0
|
||||
@ -1681,34 +1680,34 @@ define amdgpu_kernel void @dynamic_insertelement_v3i8(ptr addrspace(1) %out, [8
|
||||
define amdgpu_kernel void @dynamic_insertelement_v4i8(ptr addrspace(1) %out, [8 x i32], <4 x i8> %a, [8 x i32], i32 %b) nounwind {
|
||||
; SI-LABEL: dynamic_insertelement_v4i8:
|
||||
; SI: ; %bb.0:
|
||||
; SI-NEXT: s_load_dword s4, s[8:9], 0x13
|
||||
; SI-NEXT: s_load_dword s4, s[8:9], 0xa
|
||||
; SI-NEXT: s_load_dword s5, s[8:9], 0x13
|
||||
; SI-NEXT: s_load_dwordx2 s[0:1], s[8:9], 0x0
|
||||
; SI-NEXT: s_load_dword s5, s[8:9], 0xa
|
||||
; SI-NEXT: s_mov_b32 s3, 0x100f000
|
||||
; SI-NEXT: s_mov_b32 s2, -1
|
||||
; SI-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; SI-NEXT: s_lshl_b32 s4, s4, 3
|
||||
; SI-NEXT: s_lshl_b32 s4, 0xff, s4
|
||||
; SI-NEXT: s_andn2_b32 s5, s5, s4
|
||||
; SI-NEXT: s_and_b32 s4, s4, 0x5050505
|
||||
; SI-NEXT: s_or_b32 s4, s4, s5
|
||||
; SI-NEXT: s_xor_b32 s6, s4, 0x5050505
|
||||
; SI-NEXT: s_lshl_b32 s5, s5, 3
|
||||
; SI-NEXT: s_lshl_b32 s5, 0xff, s5
|
||||
; SI-NEXT: s_and_b32 s5, s6, s5
|
||||
; SI-NEXT: s_xor_b32 s4, s5, s4
|
||||
; SI-NEXT: v_mov_b32_e32 v0, s4
|
||||
; SI-NEXT: buffer_store_dword v0, off, s[0:3], 0
|
||||
; SI-NEXT: s_endpgm
|
||||
;
|
||||
; VI-LABEL: dynamic_insertelement_v4i8:
|
||||
; VI: ; %bb.0:
|
||||
; VI-NEXT: s_load_dword s4, s[8:9], 0x4c
|
||||
; VI-NEXT: s_load_dword s4, s[8:9], 0x28
|
||||
; VI-NEXT: s_load_dword s5, s[8:9], 0x4c
|
||||
; VI-NEXT: s_load_dwordx2 s[0:1], s[8:9], 0x0
|
||||
; VI-NEXT: s_load_dword s5, s[8:9], 0x28
|
||||
; VI-NEXT: s_mov_b32 s3, 0x1100f000
|
||||
; VI-NEXT: s_mov_b32 s2, -1
|
||||
; VI-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; VI-NEXT: s_lshl_b32 s4, s4, 3
|
||||
; VI-NEXT: s_lshl_b32 s4, 0xff, s4
|
||||
; VI-NEXT: s_andn2_b32 s5, s5, s4
|
||||
; VI-NEXT: s_and_b32 s4, s4, 0x5050505
|
||||
; VI-NEXT: s_or_b32 s4, s4, s5
|
||||
; VI-NEXT: s_xor_b32 s6, s4, 0x5050505
|
||||
; VI-NEXT: s_lshl_b32 s5, s5, 3
|
||||
; VI-NEXT: s_lshl_b32 s5, 0xff, s5
|
||||
; VI-NEXT: s_and_b32 s5, s6, s5
|
||||
; VI-NEXT: s_xor_b32 s4, s5, s4
|
||||
; VI-NEXT: v_mov_b32_e32 v0, s4
|
||||
; VI-NEXT: buffer_store_dword v0, off, s[0:3], 0
|
||||
; VI-NEXT: s_endpgm
|
||||
@ -1721,20 +1720,20 @@ define amdgpu_kernel void @s_dynamic_insertelement_v8i8(ptr addrspace(1) %out, p
|
||||
; SI-LABEL: s_dynamic_insertelement_v8i8:
|
||||
; SI: ; %bb.0:
|
||||
; SI-NEXT: s_load_dwordx4 s[0:3], s[8:9], 0x0
|
||||
; SI-NEXT: s_load_dword s8, s[8:9], 0x4
|
||||
; SI-NEXT: s_mov_b32 s7, 0x100f000
|
||||
; SI-NEXT: s_mov_b32 s6, -1
|
||||
; SI-NEXT: s_load_dword s8, s[8:9], 0x4
|
||||
; SI-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; SI-NEXT: s_load_dwordx2 s[2:3], s[2:3], 0x0
|
||||
; SI-NEXT: s_mov_b32 s4, s0
|
||||
; SI-NEXT: s_lshl_b32 s0, s8, 3
|
||||
; SI-NEXT: s_mov_b32 s5, s1
|
||||
; SI-NEXT: s_lshl_b64 s[0:1], 0xff, s0
|
||||
; SI-NEXT: s_and_b32 s9, s1, 0x5050505
|
||||
; SI-NEXT: s_lshl_b32 s8, s8, 3
|
||||
; SI-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; SI-NEXT: s_andn2_b64 s[2:3], s[2:3], s[0:1]
|
||||
; SI-NEXT: s_and_b32 s8, s0, 0x5050505
|
||||
; SI-NEXT: s_or_b64 s[0:1], s[8:9], s[2:3]
|
||||
; SI-NEXT: s_xor_b32 s1, s3, 0x5050505
|
||||
; SI-NEXT: s_xor_b32 s0, s2, 0x5050505
|
||||
; SI-NEXT: s_lshl_b64 s[8:9], 0xff, s8
|
||||
; SI-NEXT: s_and_b64 s[0:1], s[0:1], s[8:9]
|
||||
; SI-NEXT: s_xor_b64 s[0:1], s[0:1], s[2:3]
|
||||
; SI-NEXT: v_mov_b32_e32 v0, s0
|
||||
; SI-NEXT: v_mov_b32_e32 v1, s1
|
||||
; SI-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0
|
||||
@ -1743,20 +1742,20 @@ define amdgpu_kernel void @s_dynamic_insertelement_v8i8(ptr addrspace(1) %out, p
|
||||
; VI-LABEL: s_dynamic_insertelement_v8i8:
|
||||
; VI: ; %bb.0:
|
||||
; VI-NEXT: s_load_dwordx4 s[0:3], s[8:9], 0x0
|
||||
; VI-NEXT: s_load_dword s8, s[8:9], 0x10
|
||||
; VI-NEXT: s_mov_b32 s7, 0x1100f000
|
||||
; VI-NEXT: s_mov_b32 s6, -1
|
||||
; VI-NEXT: s_load_dword s8, s[8:9], 0x10
|
||||
; VI-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; VI-NEXT: s_load_dwordx2 s[2:3], s[2:3], 0x0
|
||||
; VI-NEXT: s_mov_b32 s4, s0
|
||||
; VI-NEXT: s_lshl_b32 s0, s8, 3
|
||||
; VI-NEXT: s_mov_b32 s5, s1
|
||||
; VI-NEXT: s_lshl_b64 s[0:1], 0xff, s0
|
||||
; VI-NEXT: s_and_b32 s9, s1, 0x5050505
|
||||
; VI-NEXT: s_lshl_b32 s8, s8, 3
|
||||
; VI-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; VI-NEXT: s_andn2_b64 s[2:3], s[2:3], s[0:1]
|
||||
; VI-NEXT: s_and_b32 s8, s0, 0x5050505
|
||||
; VI-NEXT: s_or_b64 s[0:1], s[8:9], s[2:3]
|
||||
; VI-NEXT: s_xor_b32 s1, s3, 0x5050505
|
||||
; VI-NEXT: s_xor_b32 s0, s2, 0x5050505
|
||||
; VI-NEXT: s_lshl_b64 s[8:9], 0xff, s8
|
||||
; VI-NEXT: s_and_b64 s[0:1], s[0:1], s[8:9]
|
||||
; VI-NEXT: s_xor_b64 s[0:1], s[0:1], s[2:3]
|
||||
; VI-NEXT: v_mov_b32_e32 v0, s0
|
||||
; VI-NEXT: v_mov_b32_e32 v1, s1
|
||||
; VI-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0
|
||||
|
@ -1534,11 +1534,11 @@ define amdgpu_kernel void @s_insertelement_v2i16_dynamic(ptr addrspace(1) %out,
|
||||
; GFX9-NEXT: s_load_dword s6, s[4:5], 0x0
|
||||
; GFX9-NEXT: s_load_dword s7, s[2:3], 0x0
|
||||
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX9-NEXT: s_lshl_b32 s2, s6, 4
|
||||
; GFX9-NEXT: s_lshl_b32 s2, 0xffff, s2
|
||||
; GFX9-NEXT: s_andn2_b32 s3, s7, s2
|
||||
; GFX9-NEXT: s_and_b32 s2, s2, 0x3e703e7
|
||||
; GFX9-NEXT: s_or_b32 s2, s2, s3
|
||||
; GFX9-NEXT: s_lshl_b32 s3, s6, 4
|
||||
; GFX9-NEXT: s_xor_b32 s2, s7, 0x3e703e7
|
||||
; GFX9-NEXT: s_lshl_b32 s3, 0xffff, s3
|
||||
; GFX9-NEXT: s_and_b32 s2, s2, s3
|
||||
; GFX9-NEXT: s_xor_b32 s2, s2, s7
|
||||
; GFX9-NEXT: v_mov_b32_e32 v1, s2
|
||||
; GFX9-NEXT: global_store_dword v0, v1, s[0:1]
|
||||
; GFX9-NEXT: s_endpgm
|
||||
@ -1553,14 +1553,14 @@ define amdgpu_kernel void @s_insertelement_v2i16_dynamic(ptr addrspace(1) %out,
|
||||
; VI-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; VI-NEXT: s_load_dword s4, s[4:5], 0x0
|
||||
; VI-NEXT: s_load_dword s2, s[2:3], 0x0
|
||||
; VI-NEXT: v_mov_b32_e32 v0, s0
|
||||
; VI-NEXT: v_mov_b32_e32 v1, s1
|
||||
; VI-NEXT: v_mov_b32_e32 v0, s0
|
||||
; VI-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; VI-NEXT: s_lshl_b32 s0, s4, 4
|
||||
; VI-NEXT: s_lshl_b32 s0, 0xffff, s0
|
||||
; VI-NEXT: s_andn2_b32 s1, s2, s0
|
||||
; VI-NEXT: s_and_b32 s0, s0, 0x3e703e7
|
||||
; VI-NEXT: s_or_b32 s0, s0, s1
|
||||
; VI-NEXT: s_lshl_b32 s1, s4, 4
|
||||
; VI-NEXT: s_xor_b32 s0, s2, 0x3e703e7
|
||||
; VI-NEXT: s_lshl_b32 s1, 0xffff, s1
|
||||
; VI-NEXT: s_and_b32 s0, s0, s1
|
||||
; VI-NEXT: s_xor_b32 s0, s0, s2
|
||||
; VI-NEXT: v_mov_b32_e32 v2, s0
|
||||
; VI-NEXT: flat_store_dword v[0:1], v2
|
||||
; VI-NEXT: s_endpgm
|
||||
@ -1575,14 +1575,14 @@ define amdgpu_kernel void @s_insertelement_v2i16_dynamic(ptr addrspace(1) %out,
|
||||
; CI-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; CI-NEXT: s_load_dword s4, s[4:5], 0x0
|
||||
; CI-NEXT: s_load_dword s2, s[2:3], 0x0
|
||||
; CI-NEXT: v_mov_b32_e32 v0, s0
|
||||
; CI-NEXT: v_mov_b32_e32 v1, s1
|
||||
; CI-NEXT: v_mov_b32_e32 v0, s0
|
||||
; CI-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; CI-NEXT: s_lshl_b32 s0, s4, 4
|
||||
; CI-NEXT: s_lshl_b32 s0, 0xffff, s0
|
||||
; CI-NEXT: s_andn2_b32 s1, s2, s0
|
||||
; CI-NEXT: s_and_b32 s0, s0, 0x3e703e7
|
||||
; CI-NEXT: s_or_b32 s0, s0, s1
|
||||
; CI-NEXT: s_lshl_b32 s1, s4, 4
|
||||
; CI-NEXT: s_xor_b32 s0, s2, 0x3e703e7
|
||||
; CI-NEXT: s_lshl_b32 s1, 0xffff, s1
|
||||
; CI-NEXT: s_and_b32 s0, s0, s1
|
||||
; CI-NEXT: s_xor_b32 s0, s0, s2
|
||||
; CI-NEXT: v_mov_b32_e32 v2, s0
|
||||
; CI-NEXT: flat_store_dword v[0:1], v2
|
||||
; CI-NEXT: s_endpgm
|
||||
@ -1597,12 +1597,12 @@ define amdgpu_kernel void @s_insertelement_v2i16_dynamic(ptr addrspace(1) %out,
|
||||
; GFX11-NEXT: s_load_b32 s2, s[2:3], 0x0
|
||||
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX11-NEXT: s_lshl_b32 s3, s4, 4
|
||||
; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
|
||||
; GFX11-NEXT: s_xor_b32 s4, s2, 0x3e703e7
|
||||
; GFX11-NEXT: s_lshl_b32 s3, 0xffff, s3
|
||||
; GFX11-NEXT: s_and_not1_b32 s2, s2, s3
|
||||
; GFX11-NEXT: s_and_b32 s3, s3, 0x3e703e7
|
||||
; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
|
||||
; GFX11-NEXT: s_or_b32 s2, s3, s2
|
||||
; GFX11-NEXT: s_and_b32 s3, s4, s3
|
||||
; GFX11-NEXT: s_xor_b32 s2, s3, s2
|
||||
; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
|
||||
; GFX11-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2
|
||||
; GFX11-NEXT: global_store_b32 v0, v1, s[0:1]
|
||||
; GFX11-NEXT: s_endpgm
|
||||
|
@ -5,10 +5,11 @@ define i32 @s_out32(i32 inreg %x, i32 inreg %y, i32 inreg %mask) {
|
||||
; GCN-LABEL: s_out32:
|
||||
; GCN: ; %bb.0:
|
||||
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GCN-NEXT: s_and_b32 s0, s0, s2
|
||||
; GCN-NEXT: s_and_not1_b32 s1, s1, s2
|
||||
; GCN-NEXT: s_xor_b32 s0, s0, s1
|
||||
; GCN-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
|
||||
; GCN-NEXT: s_or_b32 s0, s0, s1
|
||||
; GCN-NEXT: s_and_b32 s0, s0, s2
|
||||
; GCN-NEXT: s_xor_b32 s0, s0, s1
|
||||
; GCN-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
|
||||
; GCN-NEXT: v_mov_b32_e32 v0, s0
|
||||
; GCN-NEXT: s_setpc_b64 s[30:31]
|
||||
%mx = and i32 %x, %mask
|
||||
@ -22,10 +23,11 @@ define i64 @s_out64(i64 inreg %x, i64 inreg %y, i64 inreg %mask) {
|
||||
; GCN-LABEL: s_out64:
|
||||
; GCN: ; %bb.0:
|
||||
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GCN-NEXT: s_and_b64 s[0:1], s[0:1], s[16:17]
|
||||
; GCN-NEXT: s_and_not1_b64 s[2:3], s[2:3], s[16:17]
|
||||
; GCN-NEXT: s_xor_b64 s[0:1], s[0:1], s[2:3]
|
||||
; GCN-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
|
||||
; GCN-NEXT: s_or_b64 s[0:1], s[0:1], s[2:3]
|
||||
; GCN-NEXT: s_and_b64 s[0:1], s[0:1], s[16:17]
|
||||
; GCN-NEXT: s_xor_b64 s[0:1], s[0:1], s[2:3]
|
||||
; GCN-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
|
||||
; GCN-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1
|
||||
; GCN-NEXT: s_setpc_b64 s[30:31]
|
||||
%mx = and i64 %x, %mask
|
||||
@ -427,10 +429,11 @@ define i32 @s_out_constant_varx_42(i32 inreg %x, i32 inreg %y, i32 inreg %mask)
|
||||
; GCN-LABEL: s_out_constant_varx_42:
|
||||
; GCN: ; %bb.0:
|
||||
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GCN-NEXT: s_and_b32 s0, s2, s0
|
||||
; GCN-NEXT: s_and_not1_b32 s1, 42, s2
|
||||
; GCN-NEXT: s_xor_b32 s0, s0, 42
|
||||
; GCN-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
|
||||
; GCN-NEXT: s_or_b32 s0, s0, s1
|
||||
; GCN-NEXT: s_and_b32 s0, s0, s2
|
||||
; GCN-NEXT: s_xor_b32 s0, s0, 42
|
||||
; GCN-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
|
||||
; GCN-NEXT: v_mov_b32_e32 v0, s0
|
||||
; GCN-NEXT: s_setpc_b64 s[30:31]
|
||||
%notmask = xor i32 %mask, -1
|
||||
@ -462,10 +465,11 @@ define i32 @s_out_constant_varx_42_invmask(i32 inreg %x, i32 inreg %y, i32 inreg
|
||||
; GCN-LABEL: s_out_constant_varx_42_invmask:
|
||||
; GCN: ; %bb.0:
|
||||
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GCN-NEXT: s_and_not1_b32 s0, s0, s2
|
||||
; GCN-NEXT: s_and_b32 s1, s2, 42
|
||||
; GCN-NEXT: s_xor_b32 s1, s0, 42
|
||||
; GCN-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
|
||||
; GCN-NEXT: s_or_b32 s0, s0, s1
|
||||
; GCN-NEXT: s_and_b32 s1, s1, s2
|
||||
; GCN-NEXT: s_xor_b32 s0, s1, s0
|
||||
; GCN-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
|
||||
; GCN-NEXT: v_mov_b32_e32 v0, s0
|
||||
; GCN-NEXT: s_setpc_b64 s[30:31]
|
||||
%notmask = xor i32 %mask, -1
|
||||
@ -560,10 +564,11 @@ define i32 @s_out_constant_42_vary(i32 inreg %x, i32 inreg %y, i32 inreg %mask)
|
||||
; GCN-LABEL: s_out_constant_42_vary:
|
||||
; GCN: ; %bb.0:
|
||||
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GCN-NEXT: s_and_b32 s0, s2, 42
|
||||
; GCN-NEXT: s_and_not1_b32 s1, s1, s2
|
||||
; GCN-NEXT: s_xor_b32 s0, s1, 42
|
||||
; GCN-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
|
||||
; GCN-NEXT: s_or_b32 s0, s0, s1
|
||||
; GCN-NEXT: s_and_b32 s0, s0, s2
|
||||
; GCN-NEXT: s_xor_b32 s0, s0, s1
|
||||
; GCN-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
|
||||
; GCN-NEXT: v_mov_b32_e32 v0, s0
|
||||
; GCN-NEXT: s_setpc_b64 s[30:31]
|
||||
%notmask = xor i32 %mask, -1
|
||||
@ -595,10 +600,11 @@ define i32 @s_out_constant_42_vary_invmask(i32 inreg %x, i32 inreg %y, i32 inreg
|
||||
; GCN-LABEL: s_out_constant_42_vary_invmask:
|
||||
; GCN: ; %bb.0:
|
||||
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GCN-NEXT: s_and_not1_b32 s0, 42, s2
|
||||
; GCN-NEXT: s_and_b32 s1, s2, s1
|
||||
; GCN-NEXT: s_xor_b32 s0, s1, 42
|
||||
; GCN-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
|
||||
; GCN-NEXT: s_or_b32 s0, s0, s1
|
||||
; GCN-NEXT: s_and_b32 s0, s0, s2
|
||||
; GCN-NEXT: s_xor_b32 s0, s0, 42
|
||||
; GCN-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
|
||||
; GCN-NEXT: v_mov_b32_e32 v0, s0
|
||||
; GCN-NEXT: s_setpc_b64 s[30:31]
|
||||
%notmask = xor i32 %mask, -1
|
||||
|
@ -8,17 +8,16 @@
|
||||
define <1 x i8> @out_v1i8(<1 x i8> %x, <1 x i8> %y, <1 x i8> %mask) nounwind {
|
||||
; CHECK-LABEL: out_v1i8(
|
||||
; CHECK: {
|
||||
; CHECK-NEXT: .reg .b16 %rs<8>;
|
||||
; CHECK-NEXT: .reg .b16 %rs<7>;
|
||||
; CHECK-EMPTY:
|
||||
; CHECK-NEXT: // %bb.0:
|
||||
; CHECK-NEXT: ld.param.b8 %rs1, [out_v1i8_param_0];
|
||||
; CHECK-NEXT: ld.param.b8 %rs2, [out_v1i8_param_2];
|
||||
; CHECK-NEXT: and.b16 %rs3, %rs1, %rs2;
|
||||
; CHECK-NEXT: ld.param.b8 %rs4, [out_v1i8_param_1];
|
||||
; CHECK-NEXT: not.b16 %rs5, %rs2;
|
||||
; CHECK-NEXT: and.b16 %rs6, %rs4, %rs5;
|
||||
; CHECK-NEXT: or.b16 %rs7, %rs3, %rs6;
|
||||
; CHECK-NEXT: st.param.b8 [func_retval0], %rs7;
|
||||
; CHECK-NEXT: ld.param.b8 %rs2, [out_v1i8_param_1];
|
||||
; CHECK-NEXT: ld.param.b8 %rs3, [out_v1i8_param_2];
|
||||
; CHECK-NEXT: xor.b16 %rs4, %rs1, %rs2;
|
||||
; CHECK-NEXT: and.b16 %rs5, %rs4, %rs3;
|
||||
; CHECK-NEXT: xor.b16 %rs6, %rs5, %rs2;
|
||||
; CHECK-NEXT: st.param.b8 [func_retval0], %rs6;
|
||||
; CHECK-NEXT: ret;
|
||||
%mx = and <1 x i8> %x, %mask
|
||||
%notmask = xor <1 x i8> %mask, <i8 -1>
|
||||
@ -34,17 +33,16 @@ define <1 x i8> @out_v1i8(<1 x i8> %x, <1 x i8> %y, <1 x i8> %mask) nounwind {
|
||||
define <1 x i16> @out_v1i16(<1 x i16> %x, <1 x i16> %y, <1 x i16> %mask) nounwind {
|
||||
; CHECK-LABEL: out_v1i16(
|
||||
; CHECK: {
|
||||
; CHECK-NEXT: .reg .b16 %rs<8>;
|
||||
; CHECK-NEXT: .reg .b16 %rs<7>;
|
||||
; CHECK-EMPTY:
|
||||
; CHECK-NEXT: // %bb.0:
|
||||
; CHECK-NEXT: ld.param.b16 %rs1, [out_v1i16_param_0];
|
||||
; CHECK-NEXT: ld.param.b16 %rs2, [out_v1i16_param_2];
|
||||
; CHECK-NEXT: and.b16 %rs3, %rs1, %rs2;
|
||||
; CHECK-NEXT: ld.param.b16 %rs4, [out_v1i16_param_1];
|
||||
; CHECK-NEXT: not.b16 %rs5, %rs2;
|
||||
; CHECK-NEXT: and.b16 %rs6, %rs4, %rs5;
|
||||
; CHECK-NEXT: or.b16 %rs7, %rs3, %rs6;
|
||||
; CHECK-NEXT: st.param.b16 [func_retval0], %rs7;
|
||||
; CHECK-NEXT: ld.param.b16 %rs2, [out_v1i16_param_1];
|
||||
; CHECK-NEXT: ld.param.b16 %rs3, [out_v1i16_param_2];
|
||||
; CHECK-NEXT: xor.b16 %rs4, %rs1, %rs2;
|
||||
; CHECK-NEXT: and.b16 %rs5, %rs4, %rs3;
|
||||
; CHECK-NEXT: xor.b16 %rs6, %rs5, %rs2;
|
||||
; CHECK-NEXT: st.param.b16 [func_retval0], %rs6;
|
||||
; CHECK-NEXT: ret;
|
||||
%mx = and <1 x i16> %x, %mask
|
||||
%notmask = xor <1 x i16> %mask, <i16 -1>
|
||||
@ -126,17 +124,16 @@ define <2 x i16> @out_v2i16(<2 x i16> %x, <2 x i16> %y, <2 x i16> %mask) nounwin
|
||||
define <1 x i32> @out_v1i32(<1 x i32> %x, <1 x i32> %y, <1 x i32> %mask) nounwind {
|
||||
; CHECK-LABEL: out_v1i32(
|
||||
; CHECK: {
|
||||
; CHECK-NEXT: .reg .b32 %r<8>;
|
||||
; CHECK-NEXT: .reg .b32 %r<7>;
|
||||
; CHECK-EMPTY:
|
||||
; CHECK-NEXT: // %bb.0:
|
||||
; CHECK-NEXT: ld.param.b32 %r1, [out_v1i32_param_0];
|
||||
; CHECK-NEXT: ld.param.b32 %r2, [out_v1i32_param_2];
|
||||
; CHECK-NEXT: and.b32 %r3, %r1, %r2;
|
||||
; CHECK-NEXT: ld.param.b32 %r4, [out_v1i32_param_1];
|
||||
; CHECK-NEXT: not.b32 %r5, %r2;
|
||||
; CHECK-NEXT: and.b32 %r6, %r4, %r5;
|
||||
; CHECK-NEXT: or.b32 %r7, %r3, %r6;
|
||||
; CHECK-NEXT: st.param.b32 [func_retval0], %r7;
|
||||
; CHECK-NEXT: ld.param.b32 %r2, [out_v1i32_param_1];
|
||||
; CHECK-NEXT: ld.param.b32 %r3, [out_v1i32_param_2];
|
||||
; CHECK-NEXT: xor.b32 %r4, %r1, %r2;
|
||||
; CHECK-NEXT: and.b32 %r5, %r4, %r3;
|
||||
; CHECK-NEXT: xor.b32 %r6, %r5, %r2;
|
||||
; CHECK-NEXT: st.param.b32 [func_retval0], %r6;
|
||||
; CHECK-NEXT: ret;
|
||||
%mx = and <1 x i32> %x, %mask
|
||||
%notmask = xor <1 x i32> %mask, <i32 -1>
|
||||
@ -230,21 +227,19 @@ define <4 x i16> @out_v4i16_undef(<4 x i16> %x, <4 x i16> %y, <4 x i16> %mask) n
|
||||
define <2 x i32> @out_v2i32(<2 x i32> %x, <2 x i32> %y, <2 x i32> %mask) nounwind {
|
||||
; CHECK-LABEL: out_v2i32(
|
||||
; CHECK: {
|
||||
; CHECK-NEXT: .reg .b32 %r<15>;
|
||||
; CHECK-NEXT: .reg .b32 %r<13>;
|
||||
; CHECK-EMPTY:
|
||||
; CHECK-NEXT: // %bb.0:
|
||||
; CHECK-NEXT: ld.param.v2.b32 {%r1, %r2}, [out_v2i32_param_0];
|
||||
; CHECK-NEXT: ld.param.v2.b32 {%r3, %r4}, [out_v2i32_param_2];
|
||||
; CHECK-NEXT: and.b32 %r5, %r1, %r3;
|
||||
; CHECK-NEXT: and.b32 %r6, %r2, %r4;
|
||||
; CHECK-NEXT: ld.param.v2.b32 {%r7, %r8}, [out_v2i32_param_1];
|
||||
; CHECK-NEXT: not.b32 %r9, %r4;
|
||||
; CHECK-NEXT: not.b32 %r10, %r3;
|
||||
; CHECK-NEXT: and.b32 %r11, %r7, %r10;
|
||||
; CHECK-NEXT: and.b32 %r12, %r8, %r9;
|
||||
; CHECK-NEXT: or.b32 %r13, %r6, %r12;
|
||||
; CHECK-NEXT: or.b32 %r14, %r5, %r11;
|
||||
; CHECK-NEXT: st.param.v2.b32 [func_retval0], {%r14, %r13};
|
||||
; CHECK-NEXT: ld.param.v2.b32 {%r3, %r4}, [out_v2i32_param_1];
|
||||
; CHECK-NEXT: ld.param.v2.b32 {%r5, %r6}, [out_v2i32_param_2];
|
||||
; CHECK-NEXT: xor.b32 %r7, %r2, %r4;
|
||||
; CHECK-NEXT: and.b32 %r8, %r7, %r6;
|
||||
; CHECK-NEXT: xor.b32 %r9, %r8, %r4;
|
||||
; CHECK-NEXT: xor.b32 %r10, %r1, %r3;
|
||||
; CHECK-NEXT: and.b32 %r11, %r10, %r5;
|
||||
; CHECK-NEXT: xor.b32 %r12, %r11, %r3;
|
||||
; CHECK-NEXT: st.param.v2.b32 [func_retval0], {%r12, %r9};
|
||||
; CHECK-NEXT: ret;
|
||||
%mx = and <2 x i32> %x, %mask
|
||||
%notmask = xor <2 x i32> %mask, <i32 -1, i32 -1>
|
||||
@ -256,17 +251,16 @@ define <2 x i32> @out_v2i32(<2 x i32> %x, <2 x i32> %y, <2 x i32> %mask) nounwin
|
||||
define <1 x i64> @out_v1i64(<1 x i64> %x, <1 x i64> %y, <1 x i64> %mask) nounwind {
|
||||
; CHECK-LABEL: out_v1i64(
|
||||
; CHECK: {
|
||||
; CHECK-NEXT: .reg .b64 %rd<8>;
|
||||
; CHECK-NEXT: .reg .b64 %rd<7>;
|
||||
; CHECK-EMPTY:
|
||||
; CHECK-NEXT: // %bb.0:
|
||||
; CHECK-NEXT: ld.param.b64 %rd1, [out_v1i64_param_0];
|
||||
; CHECK-NEXT: ld.param.b64 %rd2, [out_v1i64_param_2];
|
||||
; CHECK-NEXT: and.b64 %rd3, %rd1, %rd2;
|
||||
; CHECK-NEXT: ld.param.b64 %rd4, [out_v1i64_param_1];
|
||||
; CHECK-NEXT: not.b64 %rd5, %rd2;
|
||||
; CHECK-NEXT: and.b64 %rd6, %rd4, %rd5;
|
||||
; CHECK-NEXT: or.b64 %rd7, %rd3, %rd6;
|
||||
; CHECK-NEXT: st.param.b64 [func_retval0], %rd7;
|
||||
; CHECK-NEXT: ld.param.b64 %rd2, [out_v1i64_param_1];
|
||||
; CHECK-NEXT: ld.param.b64 %rd3, [out_v1i64_param_2];
|
||||
; CHECK-NEXT: xor.b64 %rd4, %rd1, %rd2;
|
||||
; CHECK-NEXT: and.b64 %rd5, %rd4, %rd3;
|
||||
; CHECK-NEXT: xor.b64 %rd6, %rd5, %rd2;
|
||||
; CHECK-NEXT: st.param.b64 [func_retval0], %rd6;
|
||||
; CHECK-NEXT: ret;
|
||||
%mx = and <1 x i64> %x, %mask
|
||||
%notmask = xor <1 x i64> %mask, <i64 -1>
|
||||
@ -350,29 +344,25 @@ define <8 x i16> @out_v8i16(<8 x i16> %x, <8 x i16> %y, <8 x i16> %mask) nounwin
|
||||
define <4 x i32> @out_v4i32(<4 x i32> %x, <4 x i32> %y, <4 x i32> %mask) nounwind {
|
||||
; CHECK-LABEL: out_v4i32(
|
||||
; CHECK: {
|
||||
; CHECK-NEXT: .reg .b32 %r<29>;
|
||||
; CHECK-NEXT: .reg .b32 %r<25>;
|
||||
; CHECK-EMPTY:
|
||||
; CHECK-NEXT: // %bb.0:
|
||||
; CHECK-NEXT: ld.param.v4.b32 {%r1, %r2, %r3, %r4}, [out_v4i32_param_0];
|
||||
; CHECK-NEXT: ld.param.v4.b32 {%r5, %r6, %r7, %r8}, [out_v4i32_param_2];
|
||||
; CHECK-NEXT: and.b32 %r9, %r1, %r5;
|
||||
; CHECK-NEXT: and.b32 %r10, %r2, %r6;
|
||||
; CHECK-NEXT: and.b32 %r11, %r3, %r7;
|
||||
; CHECK-NEXT: and.b32 %r12, %r4, %r8;
|
||||
; CHECK-NEXT: ld.param.v4.b32 {%r13, %r14, %r15, %r16}, [out_v4i32_param_1];
|
||||
; CHECK-NEXT: not.b32 %r17, %r8;
|
||||
; CHECK-NEXT: not.b32 %r18, %r7;
|
||||
; CHECK-NEXT: not.b32 %r19, %r6;
|
||||
; CHECK-NEXT: not.b32 %r20, %r5;
|
||||
; CHECK-NEXT: and.b32 %r21, %r13, %r20;
|
||||
; CHECK-NEXT: and.b32 %r22, %r14, %r19;
|
||||
; CHECK-NEXT: and.b32 %r23, %r15, %r18;
|
||||
; CHECK-NEXT: and.b32 %r24, %r16, %r17;
|
||||
; CHECK-NEXT: or.b32 %r25, %r12, %r24;
|
||||
; CHECK-NEXT: or.b32 %r26, %r11, %r23;
|
||||
; CHECK-NEXT: or.b32 %r27, %r10, %r22;
|
||||
; CHECK-NEXT: or.b32 %r28, %r9, %r21;
|
||||
; CHECK-NEXT: st.param.v4.b32 [func_retval0], {%r28, %r27, %r26, %r25};
|
||||
; CHECK-NEXT: ld.param.v4.b32 {%r5, %r6, %r7, %r8}, [out_v4i32_param_1];
|
||||
; CHECK-NEXT: ld.param.v4.b32 {%r9, %r10, %r11, %r12}, [out_v4i32_param_2];
|
||||
; CHECK-NEXT: xor.b32 %r13, %r4, %r8;
|
||||
; CHECK-NEXT: and.b32 %r14, %r13, %r12;
|
||||
; CHECK-NEXT: xor.b32 %r15, %r14, %r8;
|
||||
; CHECK-NEXT: xor.b32 %r16, %r3, %r7;
|
||||
; CHECK-NEXT: and.b32 %r17, %r16, %r11;
|
||||
; CHECK-NEXT: xor.b32 %r18, %r17, %r7;
|
||||
; CHECK-NEXT: xor.b32 %r19, %r2, %r6;
|
||||
; CHECK-NEXT: and.b32 %r20, %r19, %r10;
|
||||
; CHECK-NEXT: xor.b32 %r21, %r20, %r6;
|
||||
; CHECK-NEXT: xor.b32 %r22, %r1, %r5;
|
||||
; CHECK-NEXT: and.b32 %r23, %r22, %r9;
|
||||
; CHECK-NEXT: xor.b32 %r24, %r23, %r5;
|
||||
; CHECK-NEXT: st.param.v4.b32 [func_retval0], {%r24, %r21, %r18, %r15};
|
||||
; CHECK-NEXT: ret;
|
||||
%mx = and <4 x i32> %x, %mask
|
||||
%notmask = xor <4 x i32> %mask, <i32 -1, i32 -1, i32 -1, i32 -1>
|
||||
@ -384,26 +374,23 @@ define <4 x i32> @out_v4i32(<4 x i32> %x, <4 x i32> %y, <4 x i32> %mask) nounwin
|
||||
define <4 x i32> @out_v4i32_undef(<4 x i32> %x, <4 x i32> %y, <4 x i32> %mask) nounwind {
|
||||
; CHECK-LABEL: out_v4i32_undef(
|
||||
; CHECK: {
|
||||
; CHECK-NEXT: .reg .b32 %r<26>;
|
||||
; CHECK-NEXT: .reg .b32 %r<23>;
|
||||
; CHECK-EMPTY:
|
||||
; CHECK-NEXT: // %bb.0:
|
||||
; CHECK-NEXT: ld.param.v4.b32 {%r1, %r2, %r3, %r4}, [out_v4i32_undef_param_0];
|
||||
; CHECK-NEXT: ld.param.v4.b32 {%r5, %r6, %r7, %r8}, [out_v4i32_undef_param_2];
|
||||
; CHECK-NEXT: and.b32 %r9, %r3, %r7;
|
||||
; CHECK-NEXT: and.b32 %r10, %r1, %r5;
|
||||
; CHECK-NEXT: and.b32 %r11, %r2, %r6;
|
||||
; CHECK-NEXT: and.b32 %r12, %r4, %r8;
|
||||
; CHECK-NEXT: ld.param.v4.b32 {%r13, %r14, %r15, %r16}, [out_v4i32_undef_param_1];
|
||||
; CHECK-NEXT: not.b32 %r17, %r8;
|
||||
; CHECK-NEXT: not.b32 %r18, %r6;
|
||||
; CHECK-NEXT: not.b32 %r19, %r5;
|
||||
; CHECK-NEXT: and.b32 %r20, %r13, %r19;
|
||||
; CHECK-NEXT: and.b32 %r21, %r14, %r18;
|
||||
; CHECK-NEXT: and.b32 %r22, %r16, %r17;
|
||||
; CHECK-NEXT: or.b32 %r23, %r12, %r22;
|
||||
; CHECK-NEXT: or.b32 %r24, %r11, %r21;
|
||||
; CHECK-NEXT: or.b32 %r25, %r10, %r20;
|
||||
; CHECK-NEXT: st.param.v4.b32 [func_retval0], {%r25, %r24, %r9, %r23};
|
||||
; CHECK-NEXT: ld.param.v4.b32 {%r10, %r11, %r12, %r13}, [out_v4i32_undef_param_1];
|
||||
; CHECK-NEXT: xor.b32 %r14, %r4, %r13;
|
||||
; CHECK-NEXT: and.b32 %r15, %r14, %r8;
|
||||
; CHECK-NEXT: xor.b32 %r16, %r15, %r13;
|
||||
; CHECK-NEXT: xor.b32 %r17, %r2, %r11;
|
||||
; CHECK-NEXT: and.b32 %r18, %r17, %r6;
|
||||
; CHECK-NEXT: xor.b32 %r19, %r18, %r11;
|
||||
; CHECK-NEXT: xor.b32 %r20, %r1, %r10;
|
||||
; CHECK-NEXT: and.b32 %r21, %r20, %r5;
|
||||
; CHECK-NEXT: xor.b32 %r22, %r21, %r10;
|
||||
; CHECK-NEXT: st.param.v4.b32 [func_retval0], {%r22, %r19, %r9, %r16};
|
||||
; CHECK-NEXT: ret;
|
||||
%mx = and <4 x i32> %x, %mask
|
||||
%notmask = xor <4 x i32> %mask, <i32 -1, i32 -1, i32 undef, i32 -1>
|
||||
@ -415,21 +402,19 @@ define <4 x i32> @out_v4i32_undef(<4 x i32> %x, <4 x i32> %y, <4 x i32> %mask) n
|
||||
define <2 x i64> @out_v2i64(<2 x i64> %x, <2 x i64> %y, <2 x i64> %mask) nounwind {
|
||||
; CHECK-LABEL: out_v2i64(
|
||||
; CHECK: {
|
||||
; CHECK-NEXT: .reg .b64 %rd<15>;
|
||||
; CHECK-NEXT: .reg .b64 %rd<13>;
|
||||
; CHECK-EMPTY:
|
||||
; CHECK-NEXT: // %bb.0:
|
||||
; CHECK-NEXT: ld.param.v2.b64 {%rd1, %rd2}, [out_v2i64_param_0];
|
||||
; CHECK-NEXT: ld.param.v2.b64 {%rd3, %rd4}, [out_v2i64_param_2];
|
||||
; CHECK-NEXT: and.b64 %rd5, %rd1, %rd3;
|
||||
; CHECK-NEXT: and.b64 %rd6, %rd2, %rd4;
|
||||
; CHECK-NEXT: ld.param.v2.b64 {%rd7, %rd8}, [out_v2i64_param_1];
|
||||
; CHECK-NEXT: not.b64 %rd9, %rd4;
|
||||
; CHECK-NEXT: not.b64 %rd10, %rd3;
|
||||
; CHECK-NEXT: and.b64 %rd11, %rd7, %rd10;
|
||||
; CHECK-NEXT: and.b64 %rd12, %rd8, %rd9;
|
||||
; CHECK-NEXT: or.b64 %rd13, %rd6, %rd12;
|
||||
; CHECK-NEXT: or.b64 %rd14, %rd5, %rd11;
|
||||
; CHECK-NEXT: st.param.v2.b64 [func_retval0], {%rd14, %rd13};
|
||||
; CHECK-NEXT: ld.param.v2.b64 {%rd3, %rd4}, [out_v2i64_param_1];
|
||||
; CHECK-NEXT: ld.param.v2.b64 {%rd5, %rd6}, [out_v2i64_param_2];
|
||||
; CHECK-NEXT: xor.b64 %rd7, %rd2, %rd4;
|
||||
; CHECK-NEXT: and.b64 %rd8, %rd7, %rd6;
|
||||
; CHECK-NEXT: xor.b64 %rd9, %rd8, %rd4;
|
||||
; CHECK-NEXT: xor.b64 %rd10, %rd1, %rd3;
|
||||
; CHECK-NEXT: and.b64 %rd11, %rd10, %rd5;
|
||||
; CHECK-NEXT: xor.b64 %rd12, %rd11, %rd3;
|
||||
; CHECK-NEXT: st.param.v2.b64 [func_retval0], {%rd12, %rd9};
|
||||
; CHECK-NEXT: ret;
|
||||
%mx = and <2 x i64> %x, %mask
|
||||
%notmask = xor <2 x i64> %mask, <i64 -1, i64 -1>
|
||||
|
302
llvm/test/CodeGen/RISCV/fold-masked-merge.ll
Normal file
302
llvm/test/CodeGen/RISCV/fold-masked-merge.ll
Normal file
@ -0,0 +1,302 @@
|
||||
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
||||
; RUN: llc -mtriple=riscv32 < %s \
|
||||
; RUN: | FileCheck %s --check-prefixes=CHECK,CHECK-I,RV32,RV32I
|
||||
; RUN: llc -mtriple=riscv64 < %s \
|
||||
; RUN: | FileCheck %s --check-prefixes=CHECK,CHECK-I,RV64,RV64I
|
||||
; RUN: llc -mtriple=riscv32 -mattr=+zbb < %s \
|
||||
; RUN: | FileCheck %s --check-prefixes=CHECK,CHECK-ZBB,RV32,RV32ZBB
|
||||
; RUN: llc -mtriple=riscv64 -mattr=+zbb < %s \
|
||||
; RUN: | FileCheck %s --check-prefixes=CHECK,CHECK-ZBB,RV64,RV64ZBB
|
||||
;
|
||||
; test that masked-merge code is generated as "xor;and;xor" sequence or
|
||||
; "andn ; and; or" if and-not is available.
|
||||
|
||||
define i32 @masked_merge0(i32 %a0, i32 %a1, i32 %a2) {
|
||||
; CHECK-I-LABEL: masked_merge0:
|
||||
; CHECK-I: # %bb.0:
|
||||
; CHECK-I-NEXT: xor a1, a1, a2
|
||||
; CHECK-I-NEXT: and a0, a1, a0
|
||||
; CHECK-I-NEXT: xor a0, a0, a2
|
||||
; CHECK-I-NEXT: ret
|
||||
;
|
||||
; CHECK-ZBB-LABEL: masked_merge0:
|
||||
; CHECK-ZBB: # %bb.0:
|
||||
; CHECK-ZBB-NEXT: and a1, a0, a1
|
||||
; CHECK-ZBB-NEXT: andn a0, a2, a0
|
||||
; CHECK-ZBB-NEXT: or a0, a1, a0
|
||||
; CHECK-ZBB-NEXT: ret
|
||||
%and0 = and i32 %a0, %a1
|
||||
%not = xor i32 %a0, -1
|
||||
%and1 = and i32 %not, %a2
|
||||
%or = or i32 %and0, %and1
|
||||
ret i32 %or
|
||||
}
|
||||
|
||||
define i16 @masked_merge1(i16 %a0, i16 %a1, i16 %a2) {
|
||||
; CHECK-I-LABEL: masked_merge1:
|
||||
; CHECK-I: # %bb.0:
|
||||
; CHECK-I-NEXT: xor a1, a1, a2
|
||||
; CHECK-I-NEXT: and a0, a1, a0
|
||||
; CHECK-I-NEXT: xor a0, a0, a2
|
||||
; CHECK-I-NEXT: ret
|
||||
;
|
||||
; CHECK-ZBB-LABEL: masked_merge1:
|
||||
; CHECK-ZBB: # %bb.0:
|
||||
; CHECK-ZBB-NEXT: and a1, a0, a1
|
||||
; CHECK-ZBB-NEXT: andn a0, a2, a0
|
||||
; CHECK-ZBB-NEXT: or a0, a1, a0
|
||||
; CHECK-ZBB-NEXT: ret
|
||||
%and0 = and i16 %a0, %a1
|
||||
%not = xor i16 %a0, -1
|
||||
%and1 = and i16 %a2, %not
|
||||
%or = or i16 %and0, %and1
|
||||
ret i16 %or
|
||||
}
|
||||
|
||||
define i8 @masked_merge2(i8 %a0, i8 %a1, i8 %a2) {
|
||||
; CHECK-I-LABEL: masked_merge2:
|
||||
; CHECK-I: # %bb.0:
|
||||
; CHECK-I-NEXT: mv a0, a1
|
||||
; CHECK-I-NEXT: ret
|
||||
;
|
||||
; CHECK-ZBB-LABEL: masked_merge2:
|
||||
; CHECK-ZBB: # %bb.0:
|
||||
; CHECK-ZBB-NEXT: andn a2, a1, a0
|
||||
; CHECK-ZBB-NEXT: and a0, a1, a0
|
||||
; CHECK-ZBB-NEXT: or a0, a2, a0
|
||||
; CHECK-ZBB-NEXT: ret
|
||||
%not = xor i8 %a0, -1
|
||||
%and0 = and i8 %not, %a1
|
||||
%and1 = and i8 %a1, %a0
|
||||
%or = or i8 %and0, %and1
|
||||
ret i8 %or
|
||||
}
|
||||
|
||||
define i64 @masked_merge3(i64 %a0, i64 %a1, i64 %a2) {
|
||||
; RV32I-LABEL: masked_merge3:
|
||||
; RV32I: # %bb.0:
|
||||
; RV32I-NEXT: not a5, a5
|
||||
; RV32I-NEXT: not a4, a4
|
||||
; RV32I-NEXT: xor a3, a3, a5
|
||||
; RV32I-NEXT: xor a2, a2, a4
|
||||
; RV32I-NEXT: not a2, a2
|
||||
; RV32I-NEXT: not a3, a3
|
||||
; RV32I-NEXT: and a0, a2, a0
|
||||
; RV32I-NEXT: and a1, a3, a1
|
||||
; RV32I-NEXT: xor a0, a0, a4
|
||||
; RV32I-NEXT: xor a1, a1, a5
|
||||
; RV32I-NEXT: ret
|
||||
;
|
||||
; RV64I-LABEL: masked_merge3:
|
||||
; RV64I: # %bb.0:
|
||||
; RV64I-NEXT: not a2, a2
|
||||
; RV64I-NEXT: xor a1, a1, a2
|
||||
; RV64I-NEXT: not a1, a1
|
||||
; RV64I-NEXT: and a0, a1, a0
|
||||
; RV64I-NEXT: xor a0, a0, a2
|
||||
; RV64I-NEXT: ret
|
||||
;
|
||||
; RV32ZBB-LABEL: masked_merge3:
|
||||
; RV32ZBB: # %bb.0:
|
||||
; RV32ZBB-NEXT: not a6, a0
|
||||
; RV32ZBB-NEXT: not a7, a1
|
||||
; RV32ZBB-NEXT: andn a1, a1, a3
|
||||
; RV32ZBB-NEXT: andn a0, a0, a2
|
||||
; RV32ZBB-NEXT: andn a2, a7, a5
|
||||
; RV32ZBB-NEXT: andn a3, a6, a4
|
||||
; RV32ZBB-NEXT: or a0, a3, a0
|
||||
; RV32ZBB-NEXT: or a1, a2, a1
|
||||
; RV32ZBB-NEXT: ret
|
||||
;
|
||||
; RV64ZBB-LABEL: masked_merge3:
|
||||
; RV64ZBB: # %bb.0:
|
||||
; RV64ZBB-NEXT: not a3, a0
|
||||
; RV64ZBB-NEXT: andn a2, a3, a2
|
||||
; RV64ZBB-NEXT: andn a0, a0, a1
|
||||
; RV64ZBB-NEXT: or a0, a2, a0
|
||||
; RV64ZBB-NEXT: ret
|
||||
%v0 = xor i64 %a1, -1
|
||||
%v1 = xor i64 %a2, -1
|
||||
%not = xor i64 %a0, -1
|
||||
%and0 = and i64 %not, %v1
|
||||
%and1 = and i64 %v0, %a0
|
||||
%or = or i64 %and0, %and1
|
||||
ret i64 %or
|
||||
}
|
||||
|
||||
define i32 @not_a_masked_merge0(i32 %a0, i32 %a1, i32 %a2) {
|
||||
; RV32-LABEL: not_a_masked_merge0:
|
||||
; RV32: # %bb.0:
|
||||
; RV32-NEXT: and a1, a0, a1
|
||||
; RV32-NEXT: neg a0, a0
|
||||
; RV32-NEXT: and a0, a0, a2
|
||||
; RV32-NEXT: or a0, a1, a0
|
||||
; RV32-NEXT: ret
|
||||
;
|
||||
; RV64-LABEL: not_a_masked_merge0:
|
||||
; RV64: # %bb.0:
|
||||
; RV64-NEXT: and a1, a0, a1
|
||||
; RV64-NEXT: negw a0, a0
|
||||
; RV64-NEXT: and a0, a0, a2
|
||||
; RV64-NEXT: or a0, a1, a0
|
||||
; RV64-NEXT: ret
|
||||
%and0 = and i32 %a0, %a1
|
||||
%not_a_not = sub i32 0, %a0
|
||||
%and1 = and i32 %not_a_not, %a2
|
||||
%or = or i32 %and0, %and1
|
||||
ret i32 %or
|
||||
}
|
||||
|
||||
define i32 @not_a_masked_merge1(i32 %a0, i32 %a1, i32 %a2, i32 %a3) {
|
||||
; CHECK-I-LABEL: not_a_masked_merge1:
|
||||
; CHECK-I: # %bb.0:
|
||||
; CHECK-I-NEXT: and a0, a0, a1
|
||||
; CHECK-I-NEXT: not a1, a3
|
||||
; CHECK-I-NEXT: and a1, a1, a2
|
||||
; CHECK-I-NEXT: or a0, a0, a1
|
||||
; CHECK-I-NEXT: ret
|
||||
;
|
||||
; CHECK-ZBB-LABEL: not_a_masked_merge1:
|
||||
; CHECK-ZBB: # %bb.0:
|
||||
; CHECK-ZBB-NEXT: and a0, a0, a1
|
||||
; CHECK-ZBB-NEXT: andn a1, a2, a3
|
||||
; CHECK-ZBB-NEXT: or a0, a0, a1
|
||||
; CHECK-ZBB-NEXT: ret
|
||||
%and0 = and i32 %a0, %a1
|
||||
%not = xor i32 %a3, -1
|
||||
%and1 = and i32 %not, %a2
|
||||
%or = or i32 %and0, %and1
|
||||
ret i32 %or
|
||||
}
|
||||
|
||||
define i32 @not_a_masked_merge2(i32 %a0, i32 %a1, i32 %a2) {
|
||||
; CHECK-I-LABEL: not_a_masked_merge2:
|
||||
; CHECK-I: # %bb.0:
|
||||
; CHECK-I-NEXT: or a1, a0, a1
|
||||
; CHECK-I-NEXT: not a0, a0
|
||||
; CHECK-I-NEXT: and a0, a0, a2
|
||||
; CHECK-I-NEXT: or a0, a1, a0
|
||||
; CHECK-I-NEXT: ret
|
||||
;
|
||||
; CHECK-ZBB-LABEL: not_a_masked_merge2:
|
||||
; CHECK-ZBB: # %bb.0:
|
||||
; CHECK-ZBB-NEXT: or a1, a0, a1
|
||||
; CHECK-ZBB-NEXT: andn a0, a2, a0
|
||||
; CHECK-ZBB-NEXT: or a0, a1, a0
|
||||
; CHECK-ZBB-NEXT: ret
|
||||
%not_an_and0 = or i32 %a0, %a1
|
||||
%not = xor i32 %a0, -1
|
||||
%and1 = and i32 %not, %a2
|
||||
%or = or i32 %not_an_and0, %and1
|
||||
ret i32 %or
|
||||
}
|
||||
|
||||
define i32 @not_a_masked_merge3(i32 %a0, i32 %a1, i32 %a2) {
|
||||
; CHECK-I-LABEL: not_a_masked_merge3:
|
||||
; CHECK-I: # %bb.0:
|
||||
; CHECK-I-NEXT: and a1, a0, a1
|
||||
; CHECK-I-NEXT: xor a0, a0, a2
|
||||
; CHECK-I-NEXT: not a0, a0
|
||||
; CHECK-I-NEXT: or a0, a1, a0
|
||||
; CHECK-I-NEXT: ret
|
||||
;
|
||||
; CHECK-ZBB-LABEL: not_a_masked_merge3:
|
||||
; CHECK-ZBB: # %bb.0:
|
||||
; CHECK-ZBB-NEXT: and a1, a0, a1
|
||||
; CHECK-ZBB-NEXT: xor a0, a0, a2
|
||||
; CHECK-ZBB-NEXT: orn a0, a1, a0
|
||||
; CHECK-ZBB-NEXT: ret
|
||||
%and0 = and i32 %a0, %a1
|
||||
%not = xor i32 %a0, -1
|
||||
%not_an_and1 = xor i32 %not, %a2
|
||||
%or = or i32 %and0, %not_an_and1
|
||||
ret i32 %or
|
||||
}
|
||||
|
||||
define i32 @not_a_masked_merge4(i32 %a0, i32 %a1, i32 %a2) {
|
||||
; CHECK-LABEL: not_a_masked_merge4:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: and a0, a0, a1
|
||||
; CHECK-NEXT: ret
|
||||
%and0 = and i32 %a0, %a1
|
||||
%not = xor i32 %a2, -1
|
||||
%and1 = and i32 %not, %a2
|
||||
%or = or i32 %and0, %and1
|
||||
ret i32 %or
|
||||
}
|
||||
|
||||
define i32 @masked_merge_no_transform0(i32 %a0, i32 %a1, i32 %a2, ptr %p1) {
|
||||
; CHECK-I-LABEL: masked_merge_no_transform0:
|
||||
; CHECK-I: # %bb.0:
|
||||
; CHECK-I-NEXT: and a1, a0, a1
|
||||
; CHECK-I-NEXT: not a0, a0
|
||||
; CHECK-I-NEXT: and a0, a0, a2
|
||||
; CHECK-I-NEXT: or a0, a1, a0
|
||||
; CHECK-I-NEXT: sw a1, 0(a3)
|
||||
; CHECK-I-NEXT: ret
|
||||
;
|
||||
; CHECK-ZBB-LABEL: masked_merge_no_transform0:
|
||||
; CHECK-ZBB: # %bb.0:
|
||||
; CHECK-ZBB-NEXT: and a1, a0, a1
|
||||
; CHECK-ZBB-NEXT: andn a0, a2, a0
|
||||
; CHECK-ZBB-NEXT: or a0, a1, a0
|
||||
; CHECK-ZBB-NEXT: sw a1, 0(a3)
|
||||
; CHECK-ZBB-NEXT: ret
|
||||
%and0 = and i32 %a0, %a1
|
||||
%not = xor i32 %a0, -1
|
||||
%and1 = and i32 %not, %a2
|
||||
%or = or i32 %and0, %and1
|
||||
store i32 %and0, ptr %p1
|
||||
ret i32 %or
|
||||
}
|
||||
|
||||
define i32 @masked_merge_no_transform1(i32 %a0, i32 %a1, i32 %a2, ptr %p1) {
|
||||
; CHECK-I-LABEL: masked_merge_no_transform1:
|
||||
; CHECK-I: # %bb.0:
|
||||
; CHECK-I-NEXT: and a1, a0, a1
|
||||
; CHECK-I-NEXT: not a4, a0
|
||||
; CHECK-I-NEXT: and a0, a4, a2
|
||||
; CHECK-I-NEXT: or a0, a1, a0
|
||||
; CHECK-I-NEXT: sw a4, 0(a3)
|
||||
; CHECK-I-NEXT: ret
|
||||
;
|
||||
; CHECK-ZBB-LABEL: masked_merge_no_transform1:
|
||||
; CHECK-ZBB: # %bb.0:
|
||||
; CHECK-ZBB-NEXT: and a1, a0, a1
|
||||
; CHECK-ZBB-NEXT: not a4, a0
|
||||
; CHECK-ZBB-NEXT: andn a0, a2, a0
|
||||
; CHECK-ZBB-NEXT: or a0, a1, a0
|
||||
; CHECK-ZBB-NEXT: sw a4, 0(a3)
|
||||
; CHECK-ZBB-NEXT: ret
|
||||
%and0 = and i32 %a0, %a1
|
||||
%not = xor i32 %a0, -1
|
||||
%and1 = and i32 %not, %a2
|
||||
%or = or i32 %and0, %and1
|
||||
store i32 %not, ptr %p1
|
||||
ret i32 %or
|
||||
}
|
||||
|
||||
define i32 @masked_merge_no_transform2(i32 %a0, i32 %a1, i32 %a2, ptr %p1) {
|
||||
; CHECK-I-LABEL: masked_merge_no_transform2:
|
||||
; CHECK-I: # %bb.0:
|
||||
; CHECK-I-NEXT: and a1, a0, a1
|
||||
; CHECK-I-NEXT: not a0, a0
|
||||
; CHECK-I-NEXT: and a2, a0, a2
|
||||
; CHECK-I-NEXT: or a0, a1, a2
|
||||
; CHECK-I-NEXT: sw a2, 0(a3)
|
||||
; CHECK-I-NEXT: ret
|
||||
;
|
||||
; CHECK-ZBB-LABEL: masked_merge_no_transform2:
|
||||
; CHECK-ZBB: # %bb.0:
|
||||
; CHECK-ZBB-NEXT: and a1, a0, a1
|
||||
; CHECK-ZBB-NEXT: andn a2, a2, a0
|
||||
; CHECK-ZBB-NEXT: or a0, a1, a2
|
||||
; CHECK-ZBB-NEXT: sw a2, 0(a3)
|
||||
; CHECK-ZBB-NEXT: ret
|
||||
%and0 = and i32 %a0, %a1
|
||||
%not = xor i32 %a0, -1
|
||||
%and1 = and i32 %not, %a2
|
||||
%or = or i32 %and0, %and1
|
||||
store i32 %and1, ptr %p1
|
||||
ret i32 %or
|
||||
}
|
@ -8,16 +8,13 @@
|
||||
; RUN: llc -mtriple=riscv64 -mattr=+zbb < %s \
|
||||
; RUN: | FileCheck %s --check-prefixes=CHECK,CHECK-ZBB,RV64,RV64ZBB
|
||||
|
||||
; TODO: Should we convert these to X ^ ((X ^ Y) & M) form when Zbb isn't
|
||||
; present?
|
||||
|
||||
define i8 @out8(i8 %x, i8 %y, i8 %mask) {
|
||||
; CHECK-I-LABEL: out8:
|
||||
; CHECK-I: # %bb.0:
|
||||
; CHECK-I-NEXT: xor a0, a0, a1
|
||||
; CHECK-I-NEXT: and a0, a0, a2
|
||||
; CHECK-I-NEXT: not a2, a2
|
||||
; CHECK-I-NEXT: and a1, a1, a2
|
||||
; CHECK-I-NEXT: or a0, a0, a1
|
||||
; CHECK-I-NEXT: xor a0, a0, a1
|
||||
; CHECK-I-NEXT: ret
|
||||
;
|
||||
; CHECK-ZBB-LABEL: out8:
|
||||
@ -36,10 +33,9 @@ define i8 @out8(i8 %x, i8 %y, i8 %mask) {
|
||||
define i16 @out16(i16 %x, i16 %y, i16 %mask) {
|
||||
; CHECK-I-LABEL: out16:
|
||||
; CHECK-I: # %bb.0:
|
||||
; CHECK-I-NEXT: xor a0, a0, a1
|
||||
; CHECK-I-NEXT: and a0, a0, a2
|
||||
; CHECK-I-NEXT: not a2, a2
|
||||
; CHECK-I-NEXT: and a1, a1, a2
|
||||
; CHECK-I-NEXT: or a0, a0, a1
|
||||
; CHECK-I-NEXT: xor a0, a0, a1
|
||||
; CHECK-I-NEXT: ret
|
||||
;
|
||||
; CHECK-ZBB-LABEL: out16:
|
||||
@ -58,10 +54,9 @@ define i16 @out16(i16 %x, i16 %y, i16 %mask) {
|
||||
define i32 @out32(i32 %x, i32 %y, i32 %mask) {
|
||||
; CHECK-I-LABEL: out32:
|
||||
; CHECK-I: # %bb.0:
|
||||
; CHECK-I-NEXT: xor a0, a0, a1
|
||||
; CHECK-I-NEXT: and a0, a0, a2
|
||||
; CHECK-I-NEXT: not a2, a2
|
||||
; CHECK-I-NEXT: and a1, a1, a2
|
||||
; CHECK-I-NEXT: or a0, a0, a1
|
||||
; CHECK-I-NEXT: xor a0, a0, a1
|
||||
; CHECK-I-NEXT: ret
|
||||
;
|
||||
; CHECK-ZBB-LABEL: out32:
|
||||
@ -80,22 +75,19 @@ define i32 @out32(i32 %x, i32 %y, i32 %mask) {
|
||||
define i64 @out64(i64 %x, i64 %y, i64 %mask) {
|
||||
; RV32I-LABEL: out64:
|
||||
; RV32I: # %bb.0:
|
||||
; RV32I-NEXT: and a1, a1, a5
|
||||
; RV32I-NEXT: xor a0, a0, a2
|
||||
; RV32I-NEXT: xor a1, a1, a3
|
||||
; RV32I-NEXT: and a0, a0, a4
|
||||
; RV32I-NEXT: not a4, a4
|
||||
; RV32I-NEXT: not a5, a5
|
||||
; RV32I-NEXT: and a3, a3, a5
|
||||
; RV32I-NEXT: and a2, a2, a4
|
||||
; RV32I-NEXT: or a0, a0, a2
|
||||
; RV32I-NEXT: or a1, a1, a3
|
||||
; RV32I-NEXT: and a1, a1, a5
|
||||
; RV32I-NEXT: xor a0, a0, a2
|
||||
; RV32I-NEXT: xor a1, a1, a3
|
||||
; RV32I-NEXT: ret
|
||||
;
|
||||
; RV64I-LABEL: out64:
|
||||
; RV64I: # %bb.0:
|
||||
; RV64I-NEXT: xor a0, a0, a1
|
||||
; RV64I-NEXT: and a0, a0, a2
|
||||
; RV64I-NEXT: not a2, a2
|
||||
; RV64I-NEXT: and a1, a1, a2
|
||||
; RV64I-NEXT: or a0, a0, a1
|
||||
; RV64I-NEXT: xor a0, a0, a1
|
||||
; RV64I-NEXT: ret
|
||||
;
|
||||
; RV32ZBB-LABEL: out64:
|
||||
@ -660,10 +652,9 @@ define i32 @in_constant_varx_mone_invmask(i32 %x, i32 %y, i32 %mask) {
|
||||
define i32 @out_constant_varx_42(i32 %x, i32 %y, i32 %mask) {
|
||||
; CHECK-I-LABEL: out_constant_varx_42:
|
||||
; CHECK-I: # %bb.0:
|
||||
; CHECK-I-NEXT: not a1, a2
|
||||
; CHECK-I-NEXT: and a0, a2, a0
|
||||
; CHECK-I-NEXT: andi a1, a1, 42
|
||||
; CHECK-I-NEXT: or a0, a0, a1
|
||||
; CHECK-I-NEXT: xori a0, a0, 42
|
||||
; CHECK-I-NEXT: and a0, a0, a2
|
||||
; CHECK-I-NEXT: xori a0, a0, 42
|
||||
; CHECK-I-NEXT: ret
|
||||
;
|
||||
; CHECK-ZBB-LABEL: out_constant_varx_42:
|
||||
@ -704,10 +695,9 @@ define i32 @in_constant_varx_42(i32 %x, i32 %y, i32 %mask) {
|
||||
define i32 @out_constant_varx_42_invmask(i32 %x, i32 %y, i32 %mask) {
|
||||
; CHECK-I-LABEL: out_constant_varx_42_invmask:
|
||||
; CHECK-I: # %bb.0:
|
||||
; CHECK-I-NEXT: not a1, a2
|
||||
; CHECK-I-NEXT: and a0, a1, a0
|
||||
; CHECK-I-NEXT: andi a1, a2, 42
|
||||
; CHECK-I-NEXT: or a0, a0, a1
|
||||
; CHECK-I-NEXT: xori a1, a0, 42
|
||||
; CHECK-I-NEXT: and a1, a1, a2
|
||||
; CHECK-I-NEXT: xor a0, a1, a0
|
||||
; CHECK-I-NEXT: ret
|
||||
;
|
||||
; CHECK-ZBB-LABEL: out_constant_varx_42_invmask:
|
||||
@ -812,10 +802,9 @@ define i32 @in_constant_mone_vary_invmask(i32 %x, i32 %y, i32 %mask) {
|
||||
define i32 @out_constant_42_vary(i32 %x, i32 %y, i32 %mask) {
|
||||
; CHECK-I-LABEL: out_constant_42_vary:
|
||||
; CHECK-I: # %bb.0:
|
||||
; CHECK-I-NEXT: not a0, a2
|
||||
; CHECK-I-NEXT: andi a2, a2, 42
|
||||
; CHECK-I-NEXT: and a0, a0, a1
|
||||
; CHECK-I-NEXT: or a0, a2, a0
|
||||
; CHECK-I-NEXT: xori a0, a1, 42
|
||||
; CHECK-I-NEXT: and a0, a0, a2
|
||||
; CHECK-I-NEXT: xor a0, a0, a1
|
||||
; CHECK-I-NEXT: ret
|
||||
;
|
||||
; CHECK-ZBB-LABEL: out_constant_42_vary:
|
||||
@ -855,10 +844,9 @@ define i32 @in_constant_42_vary(i32 %x, i32 %y, i32 %mask) {
|
||||
define i32 @out_constant_42_vary_invmask(i32 %x, i32 %y, i32 %mask) {
|
||||
; CHECK-I-LABEL: out_constant_42_vary_invmask:
|
||||
; CHECK-I: # %bb.0:
|
||||
; CHECK-I-NEXT: not a0, a2
|
||||
; CHECK-I-NEXT: andi a0, a0, 42
|
||||
; CHECK-I-NEXT: and a1, a2, a1
|
||||
; CHECK-I-NEXT: or a0, a0, a1
|
||||
; CHECK-I-NEXT: xori a0, a1, 42
|
||||
; CHECK-I-NEXT: and a0, a0, a2
|
||||
; CHECK-I-NEXT: xori a0, a0, 42
|
||||
; CHECK-I-NEXT: ret
|
||||
;
|
||||
; CHECK-ZBB-LABEL: out_constant_42_vary_invmask:
|
||||
|
277
llvm/test/CodeGen/SystemZ/fold-masked-merge.ll
Normal file
277
llvm/test/CodeGen/SystemZ/fold-masked-merge.ll
Normal file
@ -0,0 +1,277 @@
|
||||
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
|
||||
; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s --check-prefix=NO-MISC3
|
||||
; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z15 | FileCheck %s --check-prefix=MISC3
|
||||
|
||||
; test that masked-merge code is generated as "xor;and;xor" sequence or
|
||||
; "andn ; and; or" if and-not is available.
|
||||
|
||||
define i32 @masked_merge0(i32 %a0, i32 %a1, i32 %a2) {
|
||||
; NO-MISC3-LABEL: masked_merge0:
|
||||
; NO-MISC3: # %bb.0:
|
||||
; NO-MISC3-NEXT: xr %r3, %r4
|
||||
; NO-MISC3-NEXT: nr %r2, %r3
|
||||
; NO-MISC3-NEXT: xr %r2, %r4
|
||||
; NO-MISC3-NEXT: br %r14
|
||||
;
|
||||
; MISC3-LABEL: masked_merge0:
|
||||
; MISC3: # %bb.0:
|
||||
; MISC3-NEXT: nr %r3, %r2
|
||||
; MISC3-NEXT: ncrk %r2, %r4, %r2
|
||||
; MISC3-NEXT: or %r2, %r3
|
||||
; MISC3-NEXT: br %r14
|
||||
%and0 = and i32 %a0, %a1
|
||||
%not = xor i32 %a0, -1
|
||||
%and1 = and i32 %not, %a2
|
||||
%or = or i32 %and0, %and1
|
||||
ret i32 %or
|
||||
}
|
||||
|
||||
define i16 @masked_merge1(i16 %a0, i16 %a1, i16 %a2) {
|
||||
; NO-MISC3-LABEL: masked_merge1:
|
||||
; NO-MISC3: # %bb.0:
|
||||
; NO-MISC3-NEXT: xr %r3, %r4
|
||||
; NO-MISC3-NEXT: nr %r2, %r3
|
||||
; NO-MISC3-NEXT: xr %r2, %r4
|
||||
; NO-MISC3-NEXT: br %r14
|
||||
;
|
||||
; MISC3-LABEL: masked_merge1:
|
||||
; MISC3: # %bb.0:
|
||||
; MISC3-NEXT: ncrk %r0, %r4, %r2
|
||||
; MISC3-NEXT: nr %r2, %r3
|
||||
; MISC3-NEXT: or %r2, %r0
|
||||
; MISC3-NEXT: br %r14
|
||||
%and0 = and i16 %a0, %a1
|
||||
%not = xor i16 %a0, -1
|
||||
%and1 = and i16 %a2, %not
|
||||
%or = or i16 %and0, %and1
|
||||
ret i16 %or
|
||||
}
|
||||
|
||||
define i8 @masked_merge2(i8 %a0, i8 %a1, i8 %a2) {
|
||||
; NO-MISC3-LABEL: masked_merge2:
|
||||
; NO-MISC3: # %bb.0:
|
||||
; NO-MISC3-NEXT: lr %r2, %r3
|
||||
; NO-MISC3-NEXT: br %r14
|
||||
;
|
||||
; MISC3-LABEL: masked_merge2:
|
||||
; MISC3: # %bb.0:
|
||||
; MISC3-NEXT: lr %r2, %r3
|
||||
; MISC3-NEXT: br %r14
|
||||
%not = xor i8 %a0, -1
|
||||
%and0 = and i8 %not, %a1
|
||||
%and1 = and i8 %a1, %a0
|
||||
%or = or i8 %and0, %and1
|
||||
ret i8 %or
|
||||
}
|
||||
|
||||
define i64 @masked_merge3(i64 %a0, i64 %a1, i64 %a2) {
|
||||
; NO-MISC3-LABEL: masked_merge3:
|
||||
; NO-MISC3: # %bb.0:
|
||||
; NO-MISC3-NEXT: lcgr %r0, %r4
|
||||
; NO-MISC3-NEXT: aghi %r0, -1
|
||||
; NO-MISC3-NEXT: xgr %r3, %r0
|
||||
; NO-MISC3-NEXT: ngr %r3, %r2
|
||||
; NO-MISC3-NEXT: xgr %r3, %r2
|
||||
; NO-MISC3-NEXT: xgrk %r2, %r3, %r0
|
||||
; NO-MISC3-NEXT: br %r14
|
||||
;
|
||||
; MISC3-LABEL: masked_merge3:
|
||||
; MISC3: # %bb.0:
|
||||
; MISC3-NEXT: lcgr %r0, %r2
|
||||
; MISC3-NEXT: aghi %r0, -1
|
||||
; MISC3-NEXT: ncgrk %r0, %r0, %r4
|
||||
; MISC3-NEXT: ncgrk %r2, %r2, %r3
|
||||
; MISC3-NEXT: ogr %r2, %r0
|
||||
; MISC3-NEXT: br %r14
|
||||
%v0 = xor i64 %a1, -1
|
||||
%v1 = xor i64 %a2, -1
|
||||
%not = xor i64 %a0, -1
|
||||
%and0 = and i64 %not, %v1
|
||||
%and1 = and i64 %v0, %a0
|
||||
%or = or i64 %and0, %and1
|
||||
ret i64 %or
|
||||
}
|
||||
|
||||
define i32 @not_a_masked_merge0(i32 %a0, i32 %a1, i32 %a2) {
|
||||
; NO-MISC3-LABEL: not_a_masked_merge0:
|
||||
; NO-MISC3: # %bb.0:
|
||||
; NO-MISC3-NEXT: lcr %r0, %r2
|
||||
; NO-MISC3-NEXT: nr %r3, %r2
|
||||
; NO-MISC3-NEXT: nr %r0, %r4
|
||||
; NO-MISC3-NEXT: ork %r2, %r3, %r0
|
||||
; NO-MISC3-NEXT: br %r14
|
||||
;
|
||||
; MISC3-LABEL: not_a_masked_merge0:
|
||||
; MISC3: # %bb.0:
|
||||
; MISC3-NEXT: lcr %r0, %r2
|
||||
; MISC3-NEXT: nr %r3, %r2
|
||||
; MISC3-NEXT: nr %r0, %r4
|
||||
; MISC3-NEXT: ork %r2, %r3, %r0
|
||||
; MISC3-NEXT: br %r14
|
||||
%and0 = and i32 %a0, %a1
|
||||
%not_a_not = sub i32 0, %a0
|
||||
%and1 = and i32 %not_a_not, %a2
|
||||
%or = or i32 %and0, %and1
|
||||
ret i32 %or
|
||||
}
|
||||
|
||||
define i32 @not_a_masked_merge1(i32 %a0, i32 %a1, i32 %a2, i32 %a3) {
|
||||
; NO-MISC3-LABEL: not_a_masked_merge1:
|
||||
; NO-MISC3: # %bb.0:
|
||||
; NO-MISC3-NEXT: xilf %r5, 4294967295
|
||||
; NO-MISC3-NEXT: nr %r2, %r3
|
||||
; NO-MISC3-NEXT: nr %r4, %r5
|
||||
; NO-MISC3-NEXT: or %r2, %r4
|
||||
; NO-MISC3-NEXT: br %r14
|
||||
;
|
||||
; MISC3-LABEL: not_a_masked_merge1:
|
||||
; MISC3: # %bb.0:
|
||||
; MISC3-NEXT: nr %r2, %r3
|
||||
; MISC3-NEXT: ncrk %r0, %r4, %r5
|
||||
; MISC3-NEXT: or %r2, %r0
|
||||
; MISC3-NEXT: br %r14
|
||||
%and0 = and i32 %a0, %a1
|
||||
%not = xor i32 %a3, -1
|
||||
%and1 = and i32 %not, %a2
|
||||
%or = or i32 %and0, %and1
|
||||
ret i32 %or
|
||||
}
|
||||
|
||||
define i32 @not_a_masked_merge2(i32 %a0, i32 %a1, i32 %a2) {
|
||||
; NO-MISC3-LABEL: not_a_masked_merge2:
|
||||
; NO-MISC3: # %bb.0:
|
||||
; NO-MISC3-NEXT: or %r3, %r2
|
||||
; NO-MISC3-NEXT: xilf %r2, 4294967295
|
||||
; NO-MISC3-NEXT: nr %r2, %r4
|
||||
; NO-MISC3-NEXT: or %r2, %r3
|
||||
; NO-MISC3-NEXT: br %r14
|
||||
;
|
||||
; MISC3-LABEL: not_a_masked_merge2:
|
||||
; MISC3: # %bb.0:
|
||||
; MISC3-NEXT: or %r3, %r2
|
||||
; MISC3-NEXT: ncrk %r2, %r4, %r2
|
||||
; MISC3-NEXT: or %r2, %r3
|
||||
; MISC3-NEXT: br %r14
|
||||
%not_an_and0 = or i32 %a0, %a1
|
||||
%not = xor i32 %a0, -1
|
||||
%and1 = and i32 %not, %a2
|
||||
%or = or i32 %not_an_and0, %and1
|
||||
ret i32 %or
|
||||
}
|
||||
|
||||
define i32 @not_a_masked_merge3(i32 %a0, i32 %a1, i32 %a2) {
|
||||
; NO-MISC3-LABEL: not_a_masked_merge3:
|
||||
; NO-MISC3: # %bb.0:
|
||||
; NO-MISC3-NEXT: nr %r3, %r2
|
||||
; NO-MISC3-NEXT: xr %r2, %r4
|
||||
; NO-MISC3-NEXT: xilf %r2, 4294967295
|
||||
; NO-MISC3-NEXT: or %r2, %r3
|
||||
; NO-MISC3-NEXT: br %r14
|
||||
;
|
||||
; MISC3-LABEL: not_a_masked_merge3:
|
||||
; MISC3: # %bb.0:
|
||||
; MISC3-NEXT: nr %r3, %r2
|
||||
; MISC3-NEXT: xr %r2, %r4
|
||||
; MISC3-NEXT: ocrk %r2, %r3, %r2
|
||||
; MISC3-NEXT: br %r14
|
||||
%and0 = and i32 %a0, %a1
|
||||
%not = xor i32 %a0, -1
|
||||
%not_an_and1 = xor i32 %not, %a2
|
||||
%or = or i32 %and0, %not_an_and1
|
||||
ret i32 %or
|
||||
}
|
||||
|
||||
define i32 @not_a_masked_merge4(i32 %a0, i32 %a1, i32 %a2) {
|
||||
; NO-MISC3-LABEL: not_a_masked_merge4:
|
||||
; NO-MISC3: # %bb.0:
|
||||
; NO-MISC3-NEXT: nr %r2, %r3
|
||||
; NO-MISC3-NEXT: br %r14
|
||||
;
|
||||
; MISC3-LABEL: not_a_masked_merge4:
|
||||
; MISC3: # %bb.0:
|
||||
; MISC3-NEXT: nr %r2, %r3
|
||||
; MISC3-NEXT: br %r14
|
||||
%and0 = and i32 %a0, %a1
|
||||
%not = xor i32 %a2, -1
|
||||
%and1 = and i32 %not, %a2
|
||||
%or = or i32 %and0, %and1
|
||||
ret i32 %or
|
||||
}
|
||||
|
||||
define i32 @masked_merge_no_transform0(i32 %a0, i32 %a1, i32 %a2, ptr %p1) {
|
||||
; NO-MISC3-LABEL: masked_merge_no_transform0:
|
||||
; NO-MISC3: # %bb.0:
|
||||
; NO-MISC3-NEXT: nr %r3, %r2
|
||||
; NO-MISC3-NEXT: xilf %r2, 4294967295
|
||||
; NO-MISC3-NEXT: nr %r2, %r4
|
||||
; NO-MISC3-NEXT: or %r2, %r3
|
||||
; NO-MISC3-NEXT: st %r3, 0(%r5)
|
||||
; NO-MISC3-NEXT: br %r14
|
||||
;
|
||||
; MISC3-LABEL: masked_merge_no_transform0:
|
||||
; MISC3: # %bb.0:
|
||||
; MISC3-NEXT: nr %r3, %r2
|
||||
; MISC3-NEXT: ncrk %r2, %r4, %r2
|
||||
; MISC3-NEXT: or %r2, %r3
|
||||
; MISC3-NEXT: st %r3, 0(%r5)
|
||||
; MISC3-NEXT: br %r14
|
||||
%and0 = and i32 %a0, %a1
|
||||
%not = xor i32 %a0, -1
|
||||
%and1 = and i32 %not, %a2
|
||||
%or = or i32 %and0, %and1
|
||||
store i32 %and0, ptr %p1
|
||||
ret i32 %or
|
||||
}
|
||||
|
||||
define i32 @masked_merge_no_transform1(i32 %a0, i32 %a1, i32 %a2, ptr %p1) {
|
||||
; NO-MISC3-LABEL: masked_merge_no_transform1:
|
||||
; NO-MISC3: # %bb.0:
|
||||
; NO-MISC3-NEXT: nrk %r0, %r2, %r3
|
||||
; NO-MISC3-NEXT: xilf %r2, 4294967295
|
||||
; NO-MISC3-NEXT: nr %r4, %r2
|
||||
; NO-MISC3-NEXT: or %r0, %r4
|
||||
; NO-MISC3-NEXT: st %r2, 0(%r5)
|
||||
; NO-MISC3-NEXT: lr %r2, %r0
|
||||
; NO-MISC3-NEXT: br %r14
|
||||
;
|
||||
; MISC3-LABEL: masked_merge_no_transform1:
|
||||
; MISC3: # %bb.0:
|
||||
; MISC3-NEXT: nrk %r0, %r2, %r3
|
||||
; MISC3-NEXT: ncrk %r1, %r4, %r2
|
||||
; MISC3-NEXT: xilf %r2, 4294967295
|
||||
; MISC3-NEXT: or %r0, %r1
|
||||
; MISC3-NEXT: st %r2, 0(%r5)
|
||||
; MISC3-NEXT: lr %r2, %r0
|
||||
; MISC3-NEXT: br %r14
|
||||
%and0 = and i32 %a0, %a1
|
||||
%not = xor i32 %a0, -1
|
||||
%and1 = and i32 %not, %a2
|
||||
%or = or i32 %and0, %and1
|
||||
store i32 %not, ptr %p1
|
||||
ret i32 %or
|
||||
}
|
||||
|
||||
define i32 @masked_merge_no_transform2(i32 %a0, i32 %a1, i32 %a2, ptr %p1) {
|
||||
; NO-MISC3-LABEL: masked_merge_no_transform2:
|
||||
; NO-MISC3: # %bb.0:
|
||||
; NO-MISC3-NEXT: nr %r3, %r2
|
||||
; NO-MISC3-NEXT: xilf %r2, 4294967295
|
||||
; NO-MISC3-NEXT: nr %r4, %r2
|
||||
; NO-MISC3-NEXT: ork %r2, %r3, %r4
|
||||
; NO-MISC3-NEXT: st %r4, 0(%r5)
|
||||
; NO-MISC3-NEXT: br %r14
|
||||
;
|
||||
; MISC3-LABEL: masked_merge_no_transform2:
|
||||
; MISC3: # %bb.0:
|
||||
; MISC3-NEXT: nr %r3, %r2
|
||||
; MISC3-NEXT: ncrk %r0, %r4, %r2
|
||||
; MISC3-NEXT: ork %r2, %r3, %r0
|
||||
; MISC3-NEXT: st %r0, 0(%r5)
|
||||
; MISC3-NEXT: br %r14
|
||||
%and0 = and i32 %a0, %a1
|
||||
%not = xor i32 %a0, -1
|
||||
%and1 = and i32 %not, %a2
|
||||
%or = or i32 %and0, %and1
|
||||
store i32 %and1, ptr %p1
|
||||
ret i32 %or
|
||||
}
|
@ -4465,203 +4465,139 @@ define <16 x i8> @bitselect_v16i8(<16 x i8> %c, <16 x i8> %v1, <16 x i8> %v2) {
|
||||
; NO-SIMD128-LABEL: bitselect_v16i8:
|
||||
; NO-SIMD128: .functype bitselect_v16i8 (i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) -> ()
|
||||
; NO-SIMD128-NEXT: # %bb.0:
|
||||
; NO-SIMD128-NEXT: i32.and $push0=, $16, $32
|
||||
; NO-SIMD128-NEXT: i32.const $push1=, -1
|
||||
; NO-SIMD128-NEXT: i32.xor $push2=, $16, $pop1
|
||||
; NO-SIMD128-NEXT: i32.and $push3=, $pop2, $48
|
||||
; NO-SIMD128-NEXT: i32.or $push4=, $pop0, $pop3
|
||||
; NO-SIMD128-NEXT: i32.store8 15($0), $pop4
|
||||
; NO-SIMD128-NEXT: i32.and $push5=, $15, $31
|
||||
; NO-SIMD128-NEXT: i32.const $push79=, -1
|
||||
; NO-SIMD128-NEXT: i32.xor $push6=, $15, $pop79
|
||||
; NO-SIMD128-NEXT: i32.and $push7=, $pop6, $47
|
||||
; NO-SIMD128-NEXT: i32.or $push8=, $pop5, $pop7
|
||||
; NO-SIMD128-NEXT: i32.store8 14($0), $pop8
|
||||
; NO-SIMD128-NEXT: i32.and $push9=, $14, $30
|
||||
; NO-SIMD128-NEXT: i32.const $push78=, -1
|
||||
; NO-SIMD128-NEXT: i32.xor $push10=, $14, $pop78
|
||||
; NO-SIMD128-NEXT: i32.and $push11=, $pop10, $46
|
||||
; NO-SIMD128-NEXT: i32.or $push12=, $pop9, $pop11
|
||||
; NO-SIMD128-NEXT: i32.store8 13($0), $pop12
|
||||
; NO-SIMD128-NEXT: i32.and $push13=, $13, $29
|
||||
; NO-SIMD128-NEXT: i32.const $push77=, -1
|
||||
; NO-SIMD128-NEXT: i32.xor $push14=, $13, $pop77
|
||||
; NO-SIMD128-NEXT: i32.and $push15=, $pop14, $45
|
||||
; NO-SIMD128-NEXT: i32.or $push16=, $pop13, $pop15
|
||||
; NO-SIMD128-NEXT: i32.store8 12($0), $pop16
|
||||
; NO-SIMD128-NEXT: i32.and $push17=, $12, $28
|
||||
; NO-SIMD128-NEXT: i32.const $push76=, -1
|
||||
; NO-SIMD128-NEXT: i32.xor $push18=, $12, $pop76
|
||||
; NO-SIMD128-NEXT: i32.and $push19=, $pop18, $44
|
||||
; NO-SIMD128-NEXT: i32.or $push20=, $pop17, $pop19
|
||||
; NO-SIMD128-NEXT: i32.store8 11($0), $pop20
|
||||
; NO-SIMD128-NEXT: i32.and $push21=, $11, $27
|
||||
; NO-SIMD128-NEXT: i32.const $push75=, -1
|
||||
; NO-SIMD128-NEXT: i32.xor $push22=, $11, $pop75
|
||||
; NO-SIMD128-NEXT: i32.and $push23=, $pop22, $43
|
||||
; NO-SIMD128-NEXT: i32.or $push24=, $pop21, $pop23
|
||||
; NO-SIMD128-NEXT: i32.store8 10($0), $pop24
|
||||
; NO-SIMD128-NEXT: i32.and $push25=, $10, $26
|
||||
; NO-SIMD128-NEXT: i32.const $push74=, -1
|
||||
; NO-SIMD128-NEXT: i32.xor $push26=, $10, $pop74
|
||||
; NO-SIMD128-NEXT: i32.and $push27=, $pop26, $42
|
||||
; NO-SIMD128-NEXT: i32.or $push28=, $pop25, $pop27
|
||||
; NO-SIMD128-NEXT: i32.store8 9($0), $pop28
|
||||
; NO-SIMD128-NEXT: i32.and $push29=, $9, $25
|
||||
; NO-SIMD128-NEXT: i32.const $push73=, -1
|
||||
; NO-SIMD128-NEXT: i32.xor $push30=, $9, $pop73
|
||||
; NO-SIMD128-NEXT: i32.and $push31=, $pop30, $41
|
||||
; NO-SIMD128-NEXT: i32.or $push32=, $pop29, $pop31
|
||||
; NO-SIMD128-NEXT: i32.store8 8($0), $pop32
|
||||
; NO-SIMD128-NEXT: i32.and $push33=, $8, $24
|
||||
; NO-SIMD128-NEXT: i32.const $push72=, -1
|
||||
; NO-SIMD128-NEXT: i32.xor $push34=, $8, $pop72
|
||||
; NO-SIMD128-NEXT: i32.and $push35=, $pop34, $40
|
||||
; NO-SIMD128-NEXT: i32.or $push36=, $pop33, $pop35
|
||||
; NO-SIMD128-NEXT: i32.store8 7($0), $pop36
|
||||
; NO-SIMD128-NEXT: i32.and $push37=, $7, $23
|
||||
; NO-SIMD128-NEXT: i32.const $push71=, -1
|
||||
; NO-SIMD128-NEXT: i32.xor $push38=, $7, $pop71
|
||||
; NO-SIMD128-NEXT: i32.and $push39=, $pop38, $39
|
||||
; NO-SIMD128-NEXT: i32.or $push40=, $pop37, $pop39
|
||||
; NO-SIMD128-NEXT: i32.store8 6($0), $pop40
|
||||
; NO-SIMD128-NEXT: i32.and $push41=, $6, $22
|
||||
; NO-SIMD128-NEXT: i32.const $push70=, -1
|
||||
; NO-SIMD128-NEXT: i32.xor $push42=, $6, $pop70
|
||||
; NO-SIMD128-NEXT: i32.and $push43=, $pop42, $38
|
||||
; NO-SIMD128-NEXT: i32.or $push44=, $pop41, $pop43
|
||||
; NO-SIMD128-NEXT: i32.store8 5($0), $pop44
|
||||
; NO-SIMD128-NEXT: i32.and $push45=, $5, $21
|
||||
; NO-SIMD128-NEXT: i32.const $push69=, -1
|
||||
; NO-SIMD128-NEXT: i32.xor $push46=, $5, $pop69
|
||||
; NO-SIMD128-NEXT: i32.and $push47=, $pop46, $37
|
||||
; NO-SIMD128-NEXT: i32.or $push48=, $pop45, $pop47
|
||||
; NO-SIMD128-NEXT: i32.store8 4($0), $pop48
|
||||
; NO-SIMD128-NEXT: i32.and $push49=, $4, $20
|
||||
; NO-SIMD128-NEXT: i32.const $push68=, -1
|
||||
; NO-SIMD128-NEXT: i32.xor $push50=, $4, $pop68
|
||||
; NO-SIMD128-NEXT: i32.and $push51=, $pop50, $36
|
||||
; NO-SIMD128-NEXT: i32.or $push52=, $pop49, $pop51
|
||||
; NO-SIMD128-NEXT: i32.store8 3($0), $pop52
|
||||
; NO-SIMD128-NEXT: i32.and $push53=, $3, $19
|
||||
; NO-SIMD128-NEXT: i32.const $push67=, -1
|
||||
; NO-SIMD128-NEXT: i32.xor $push54=, $3, $pop67
|
||||
; NO-SIMD128-NEXT: i32.and $push55=, $pop54, $35
|
||||
; NO-SIMD128-NEXT: i32.or $push56=, $pop53, $pop55
|
||||
; NO-SIMD128-NEXT: i32.store8 2($0), $pop56
|
||||
; NO-SIMD128-NEXT: i32.and $push57=, $2, $18
|
||||
; NO-SIMD128-NEXT: i32.const $push66=, -1
|
||||
; NO-SIMD128-NEXT: i32.xor $push58=, $2, $pop66
|
||||
; NO-SIMD128-NEXT: i32.and $push59=, $pop58, $34
|
||||
; NO-SIMD128-NEXT: i32.or $push60=, $pop57, $pop59
|
||||
; NO-SIMD128-NEXT: i32.store8 1($0), $pop60
|
||||
; NO-SIMD128-NEXT: i32.and $push61=, $1, $17
|
||||
; NO-SIMD128-NEXT: i32.const $push65=, -1
|
||||
; NO-SIMD128-NEXT: i32.xor $push62=, $1, $pop65
|
||||
; NO-SIMD128-NEXT: i32.and $push63=, $pop62, $33
|
||||
; NO-SIMD128-NEXT: i32.or $push64=, $pop61, $pop63
|
||||
; NO-SIMD128-NEXT: i32.store8 0($0), $pop64
|
||||
; NO-SIMD128-NEXT: i32.xor $push0=, $32, $48
|
||||
; NO-SIMD128-NEXT: i32.and $push1=, $pop0, $16
|
||||
; NO-SIMD128-NEXT: i32.xor $push2=, $pop1, $48
|
||||
; NO-SIMD128-NEXT: i32.store8 15($0), $pop2
|
||||
; NO-SIMD128-NEXT: i32.xor $push3=, $31, $47
|
||||
; NO-SIMD128-NEXT: i32.and $push4=, $pop3, $15
|
||||
; NO-SIMD128-NEXT: i32.xor $push5=, $pop4, $47
|
||||
; NO-SIMD128-NEXT: i32.store8 14($0), $pop5
|
||||
; NO-SIMD128-NEXT: i32.xor $push6=, $30, $46
|
||||
; NO-SIMD128-NEXT: i32.and $push7=, $pop6, $14
|
||||
; NO-SIMD128-NEXT: i32.xor $push8=, $pop7, $46
|
||||
; NO-SIMD128-NEXT: i32.store8 13($0), $pop8
|
||||
; NO-SIMD128-NEXT: i32.xor $push9=, $29, $45
|
||||
; NO-SIMD128-NEXT: i32.and $push10=, $pop9, $13
|
||||
; NO-SIMD128-NEXT: i32.xor $push11=, $pop10, $45
|
||||
; NO-SIMD128-NEXT: i32.store8 12($0), $pop11
|
||||
; NO-SIMD128-NEXT: i32.xor $push12=, $28, $44
|
||||
; NO-SIMD128-NEXT: i32.and $push13=, $pop12, $12
|
||||
; NO-SIMD128-NEXT: i32.xor $push14=, $pop13, $44
|
||||
; NO-SIMD128-NEXT: i32.store8 11($0), $pop14
|
||||
; NO-SIMD128-NEXT: i32.xor $push15=, $27, $43
|
||||
; NO-SIMD128-NEXT: i32.and $push16=, $pop15, $11
|
||||
; NO-SIMD128-NEXT: i32.xor $push17=, $pop16, $43
|
||||
; NO-SIMD128-NEXT: i32.store8 10($0), $pop17
|
||||
; NO-SIMD128-NEXT: i32.xor $push18=, $26, $42
|
||||
; NO-SIMD128-NEXT: i32.and $push19=, $pop18, $10
|
||||
; NO-SIMD128-NEXT: i32.xor $push20=, $pop19, $42
|
||||
; NO-SIMD128-NEXT: i32.store8 9($0), $pop20
|
||||
; NO-SIMD128-NEXT: i32.xor $push21=, $25, $41
|
||||
; NO-SIMD128-NEXT: i32.and $push22=, $pop21, $9
|
||||
; NO-SIMD128-NEXT: i32.xor $push23=, $pop22, $41
|
||||
; NO-SIMD128-NEXT: i32.store8 8($0), $pop23
|
||||
; NO-SIMD128-NEXT: i32.xor $push24=, $24, $40
|
||||
; NO-SIMD128-NEXT: i32.and $push25=, $pop24, $8
|
||||
; NO-SIMD128-NEXT: i32.xor $push26=, $pop25, $40
|
||||
; NO-SIMD128-NEXT: i32.store8 7($0), $pop26
|
||||
; NO-SIMD128-NEXT: i32.xor $push27=, $23, $39
|
||||
; NO-SIMD128-NEXT: i32.and $push28=, $pop27, $7
|
||||
; NO-SIMD128-NEXT: i32.xor $push29=, $pop28, $39
|
||||
; NO-SIMD128-NEXT: i32.store8 6($0), $pop29
|
||||
; NO-SIMD128-NEXT: i32.xor $push30=, $22, $38
|
||||
; NO-SIMD128-NEXT: i32.and $push31=, $pop30, $6
|
||||
; NO-SIMD128-NEXT: i32.xor $push32=, $pop31, $38
|
||||
; NO-SIMD128-NEXT: i32.store8 5($0), $pop32
|
||||
; NO-SIMD128-NEXT: i32.xor $push33=, $21, $37
|
||||
; NO-SIMD128-NEXT: i32.and $push34=, $pop33, $5
|
||||
; NO-SIMD128-NEXT: i32.xor $push35=, $pop34, $37
|
||||
; NO-SIMD128-NEXT: i32.store8 4($0), $pop35
|
||||
; NO-SIMD128-NEXT: i32.xor $push36=, $20, $36
|
||||
; NO-SIMD128-NEXT: i32.and $push37=, $pop36, $4
|
||||
; NO-SIMD128-NEXT: i32.xor $push38=, $pop37, $36
|
||||
; NO-SIMD128-NEXT: i32.store8 3($0), $pop38
|
||||
; NO-SIMD128-NEXT: i32.xor $push39=, $19, $35
|
||||
; NO-SIMD128-NEXT: i32.and $push40=, $pop39, $3
|
||||
; NO-SIMD128-NEXT: i32.xor $push41=, $pop40, $35
|
||||
; NO-SIMD128-NEXT: i32.store8 2($0), $pop41
|
||||
; NO-SIMD128-NEXT: i32.xor $push42=, $18, $34
|
||||
; NO-SIMD128-NEXT: i32.and $push43=, $pop42, $2
|
||||
; NO-SIMD128-NEXT: i32.xor $push44=, $pop43, $34
|
||||
; NO-SIMD128-NEXT: i32.store8 1($0), $pop44
|
||||
; NO-SIMD128-NEXT: i32.xor $push45=, $17, $33
|
||||
; NO-SIMD128-NEXT: i32.and $push46=, $pop45, $1
|
||||
; NO-SIMD128-NEXT: i32.xor $push47=, $pop46, $33
|
||||
; NO-SIMD128-NEXT: i32.store8 0($0), $pop47
|
||||
; NO-SIMD128-NEXT: return
|
||||
;
|
||||
; NO-SIMD128-FAST-LABEL: bitselect_v16i8:
|
||||
; NO-SIMD128-FAST: .functype bitselect_v16i8 (i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) -> ()
|
||||
; NO-SIMD128-FAST-NEXT: # %bb.0:
|
||||
; NO-SIMD128-FAST-NEXT: i32.and $push0=, $1, $17
|
||||
; NO-SIMD128-FAST-NEXT: i32.const $push1=, -1
|
||||
; NO-SIMD128-FAST-NEXT: i32.xor $push2=, $1, $pop1
|
||||
; NO-SIMD128-FAST-NEXT: i32.and $push3=, $pop2, $33
|
||||
; NO-SIMD128-FAST-NEXT: i32.or $push4=, $pop0, $pop3
|
||||
; NO-SIMD128-FAST-NEXT: i32.store8 0($0), $pop4
|
||||
; NO-SIMD128-FAST-NEXT: i32.and $push5=, $2, $18
|
||||
; NO-SIMD128-FAST-NEXT: i32.const $push79=, -1
|
||||
; NO-SIMD128-FAST-NEXT: i32.xor $push6=, $2, $pop79
|
||||
; NO-SIMD128-FAST-NEXT: i32.and $push7=, $pop6, $34
|
||||
; NO-SIMD128-FAST-NEXT: i32.or $push8=, $pop5, $pop7
|
||||
; NO-SIMD128-FAST-NEXT: i32.store8 1($0), $pop8
|
||||
; NO-SIMD128-FAST-NEXT: i32.and $push9=, $3, $19
|
||||
; NO-SIMD128-FAST-NEXT: i32.const $push78=, -1
|
||||
; NO-SIMD128-FAST-NEXT: i32.xor $push10=, $3, $pop78
|
||||
; NO-SIMD128-FAST-NEXT: i32.and $push11=, $pop10, $35
|
||||
; NO-SIMD128-FAST-NEXT: i32.or $push12=, $pop9, $pop11
|
||||
; NO-SIMD128-FAST-NEXT: i32.store8 2($0), $pop12
|
||||
; NO-SIMD128-FAST-NEXT: i32.and $push13=, $4, $20
|
||||
; NO-SIMD128-FAST-NEXT: i32.const $push77=, -1
|
||||
; NO-SIMD128-FAST-NEXT: i32.xor $push14=, $4, $pop77
|
||||
; NO-SIMD128-FAST-NEXT: i32.and $push15=, $pop14, $36
|
||||
; NO-SIMD128-FAST-NEXT: i32.or $push16=, $pop13, $pop15
|
||||
; NO-SIMD128-FAST-NEXT: i32.store8 3($0), $pop16
|
||||
; NO-SIMD128-FAST-NEXT: i32.and $push17=, $5, $21
|
||||
; NO-SIMD128-FAST-NEXT: i32.const $push76=, -1
|
||||
; NO-SIMD128-FAST-NEXT: i32.xor $push18=, $5, $pop76
|
||||
; NO-SIMD128-FAST-NEXT: i32.and $push19=, $pop18, $37
|
||||
; NO-SIMD128-FAST-NEXT: i32.or $push20=, $pop17, $pop19
|
||||
; NO-SIMD128-FAST-NEXT: i32.store8 4($0), $pop20
|
||||
; NO-SIMD128-FAST-NEXT: i32.and $push21=, $6, $22
|
||||
; NO-SIMD128-FAST-NEXT: i32.const $push75=, -1
|
||||
; NO-SIMD128-FAST-NEXT: i32.xor $push22=, $6, $pop75
|
||||
; NO-SIMD128-FAST-NEXT: i32.and $push23=, $pop22, $38
|
||||
; NO-SIMD128-FAST-NEXT: i32.or $push24=, $pop21, $pop23
|
||||
; NO-SIMD128-FAST-NEXT: i32.store8 5($0), $pop24
|
||||
; NO-SIMD128-FAST-NEXT: i32.and $push25=, $7, $23
|
||||
; NO-SIMD128-FAST-NEXT: i32.const $push74=, -1
|
||||
; NO-SIMD128-FAST-NEXT: i32.xor $push26=, $7, $pop74
|
||||
; NO-SIMD128-FAST-NEXT: i32.and $push27=, $pop26, $39
|
||||
; NO-SIMD128-FAST-NEXT: i32.or $push28=, $pop25, $pop27
|
||||
; NO-SIMD128-FAST-NEXT: i32.store8 6($0), $pop28
|
||||
; NO-SIMD128-FAST-NEXT: i32.and $push29=, $8, $24
|
||||
; NO-SIMD128-FAST-NEXT: i32.const $push73=, -1
|
||||
; NO-SIMD128-FAST-NEXT: i32.xor $push30=, $8, $pop73
|
||||
; NO-SIMD128-FAST-NEXT: i32.and $push31=, $pop30, $40
|
||||
; NO-SIMD128-FAST-NEXT: i32.or $push32=, $pop29, $pop31
|
||||
; NO-SIMD128-FAST-NEXT: i32.store8 7($0), $pop32
|
||||
; NO-SIMD128-FAST-NEXT: i32.and $push33=, $9, $25
|
||||
; NO-SIMD128-FAST-NEXT: i32.const $push72=, -1
|
||||
; NO-SIMD128-FAST-NEXT: i32.xor $push34=, $9, $pop72
|
||||
; NO-SIMD128-FAST-NEXT: i32.and $push35=, $pop34, $41
|
||||
; NO-SIMD128-FAST-NEXT: i32.or $push36=, $pop33, $pop35
|
||||
; NO-SIMD128-FAST-NEXT: i32.store8 8($0), $pop36
|
||||
; NO-SIMD128-FAST-NEXT: i32.and $push37=, $10, $26
|
||||
; NO-SIMD128-FAST-NEXT: i32.const $push71=, -1
|
||||
; NO-SIMD128-FAST-NEXT: i32.xor $push38=, $10, $pop71
|
||||
; NO-SIMD128-FAST-NEXT: i32.and $push39=, $pop38, $42
|
||||
; NO-SIMD128-FAST-NEXT: i32.or $push40=, $pop37, $pop39
|
||||
; NO-SIMD128-FAST-NEXT: i32.store8 9($0), $pop40
|
||||
; NO-SIMD128-FAST-NEXT: i32.and $push41=, $11, $27
|
||||
; NO-SIMD128-FAST-NEXT: i32.const $push70=, -1
|
||||
; NO-SIMD128-FAST-NEXT: i32.xor $push42=, $11, $pop70
|
||||
; NO-SIMD128-FAST-NEXT: i32.and $push43=, $pop42, $43
|
||||
; NO-SIMD128-FAST-NEXT: i32.or $push44=, $pop41, $pop43
|
||||
; NO-SIMD128-FAST-NEXT: i32.store8 10($0), $pop44
|
||||
; NO-SIMD128-FAST-NEXT: i32.and $push45=, $12, $28
|
||||
; NO-SIMD128-FAST-NEXT: i32.const $push69=, -1
|
||||
; NO-SIMD128-FAST-NEXT: i32.xor $push46=, $12, $pop69
|
||||
; NO-SIMD128-FAST-NEXT: i32.and $push47=, $pop46, $44
|
||||
; NO-SIMD128-FAST-NEXT: i32.or $push48=, $pop45, $pop47
|
||||
; NO-SIMD128-FAST-NEXT: i32.store8 11($0), $pop48
|
||||
; NO-SIMD128-FAST-NEXT: i32.and $push49=, $13, $29
|
||||
; NO-SIMD128-FAST-NEXT: i32.const $push68=, -1
|
||||
; NO-SIMD128-FAST-NEXT: i32.xor $push50=, $13, $pop68
|
||||
; NO-SIMD128-FAST-NEXT: i32.and $push51=, $pop50, $45
|
||||
; NO-SIMD128-FAST-NEXT: i32.or $push52=, $pop49, $pop51
|
||||
; NO-SIMD128-FAST-NEXT: i32.store8 12($0), $pop52
|
||||
; NO-SIMD128-FAST-NEXT: i32.and $push53=, $14, $30
|
||||
; NO-SIMD128-FAST-NEXT: i32.const $push67=, -1
|
||||
; NO-SIMD128-FAST-NEXT: i32.xor $push54=, $14, $pop67
|
||||
; NO-SIMD128-FAST-NEXT: i32.and $push55=, $pop54, $46
|
||||
; NO-SIMD128-FAST-NEXT: i32.or $push56=, $pop53, $pop55
|
||||
; NO-SIMD128-FAST-NEXT: i32.store8 13($0), $pop56
|
||||
; NO-SIMD128-FAST-NEXT: i32.and $push57=, $15, $31
|
||||
; NO-SIMD128-FAST-NEXT: i32.const $push66=, -1
|
||||
; NO-SIMD128-FAST-NEXT: i32.xor $push58=, $15, $pop66
|
||||
; NO-SIMD128-FAST-NEXT: i32.and $push59=, $pop58, $47
|
||||
; NO-SIMD128-FAST-NEXT: i32.or $push60=, $pop57, $pop59
|
||||
; NO-SIMD128-FAST-NEXT: i32.store8 14($0), $pop60
|
||||
; NO-SIMD128-FAST-NEXT: i32.and $push61=, $16, $32
|
||||
; NO-SIMD128-FAST-NEXT: i32.const $push65=, -1
|
||||
; NO-SIMD128-FAST-NEXT: i32.xor $push62=, $16, $pop65
|
||||
; NO-SIMD128-FAST-NEXT: i32.and $push63=, $pop62, $48
|
||||
; NO-SIMD128-FAST-NEXT: i32.or $push64=, $pop61, $pop63
|
||||
; NO-SIMD128-FAST-NEXT: i32.store8 15($0), $pop64
|
||||
; NO-SIMD128-FAST-NEXT: i32.xor $push0=, $17, $33
|
||||
; NO-SIMD128-FAST-NEXT: i32.and $push1=, $pop0, $1
|
||||
; NO-SIMD128-FAST-NEXT: i32.xor $push2=, $pop1, $33
|
||||
; NO-SIMD128-FAST-NEXT: i32.store8 0($0), $pop2
|
||||
; NO-SIMD128-FAST-NEXT: i32.xor $push3=, $18, $34
|
||||
; NO-SIMD128-FAST-NEXT: i32.and $push4=, $pop3, $2
|
||||
; NO-SIMD128-FAST-NEXT: i32.xor $push5=, $pop4, $34
|
||||
; NO-SIMD128-FAST-NEXT: i32.store8 1($0), $pop5
|
||||
; NO-SIMD128-FAST-NEXT: i32.xor $push6=, $19, $35
|
||||
; NO-SIMD128-FAST-NEXT: i32.and $push7=, $pop6, $3
|
||||
; NO-SIMD128-FAST-NEXT: i32.xor $push8=, $pop7, $35
|
||||
; NO-SIMD128-FAST-NEXT: i32.store8 2($0), $pop8
|
||||
; NO-SIMD128-FAST-NEXT: i32.xor $push9=, $20, $36
|
||||
; NO-SIMD128-FAST-NEXT: i32.and $push10=, $pop9, $4
|
||||
; NO-SIMD128-FAST-NEXT: i32.xor $push11=, $pop10, $36
|
||||
; NO-SIMD128-FAST-NEXT: i32.store8 3($0), $pop11
|
||||
; NO-SIMD128-FAST-NEXT: i32.xor $push12=, $21, $37
|
||||
; NO-SIMD128-FAST-NEXT: i32.and $push13=, $pop12, $5
|
||||
; NO-SIMD128-FAST-NEXT: i32.xor $push14=, $pop13, $37
|
||||
; NO-SIMD128-FAST-NEXT: i32.store8 4($0), $pop14
|
||||
; NO-SIMD128-FAST-NEXT: i32.xor $push15=, $22, $38
|
||||
; NO-SIMD128-FAST-NEXT: i32.and $push16=, $pop15, $6
|
||||
; NO-SIMD128-FAST-NEXT: i32.xor $push17=, $pop16, $38
|
||||
; NO-SIMD128-FAST-NEXT: i32.store8 5($0), $pop17
|
||||
; NO-SIMD128-FAST-NEXT: i32.xor $push18=, $23, $39
|
||||
; NO-SIMD128-FAST-NEXT: i32.and $push19=, $pop18, $7
|
||||
; NO-SIMD128-FAST-NEXT: i32.xor $push20=, $pop19, $39
|
||||
; NO-SIMD128-FAST-NEXT: i32.store8 6($0), $pop20
|
||||
; NO-SIMD128-FAST-NEXT: i32.xor $push21=, $24, $40
|
||||
; NO-SIMD128-FAST-NEXT: i32.and $push22=, $pop21, $8
|
||||
; NO-SIMD128-FAST-NEXT: i32.xor $push23=, $pop22, $40
|
||||
; NO-SIMD128-FAST-NEXT: i32.store8 7($0), $pop23
|
||||
; NO-SIMD128-FAST-NEXT: i32.xor $push24=, $25, $41
|
||||
; NO-SIMD128-FAST-NEXT: i32.and $push25=, $pop24, $9
|
||||
; NO-SIMD128-FAST-NEXT: i32.xor $push26=, $pop25, $41
|
||||
; NO-SIMD128-FAST-NEXT: i32.store8 8($0), $pop26
|
||||
; NO-SIMD128-FAST-NEXT: i32.xor $push27=, $26, $42
|
||||
; NO-SIMD128-FAST-NEXT: i32.and $push28=, $pop27, $10
|
||||
; NO-SIMD128-FAST-NEXT: i32.xor $push29=, $pop28, $42
|
||||
; NO-SIMD128-FAST-NEXT: i32.store8 9($0), $pop29
|
||||
; NO-SIMD128-FAST-NEXT: i32.xor $push30=, $27, $43
|
||||
; NO-SIMD128-FAST-NEXT: i32.and $push31=, $pop30, $11
|
||||
; NO-SIMD128-FAST-NEXT: i32.xor $push32=, $pop31, $43
|
||||
; NO-SIMD128-FAST-NEXT: i32.store8 10($0), $pop32
|
||||
; NO-SIMD128-FAST-NEXT: i32.xor $push33=, $28, $44
|
||||
; NO-SIMD128-FAST-NEXT: i32.and $push34=, $pop33, $12
|
||||
; NO-SIMD128-FAST-NEXT: i32.xor $push35=, $pop34, $44
|
||||
; NO-SIMD128-FAST-NEXT: i32.store8 11($0), $pop35
|
||||
; NO-SIMD128-FAST-NEXT: i32.xor $push36=, $29, $45
|
||||
; NO-SIMD128-FAST-NEXT: i32.and $push37=, $pop36, $13
|
||||
; NO-SIMD128-FAST-NEXT: i32.xor $push38=, $pop37, $45
|
||||
; NO-SIMD128-FAST-NEXT: i32.store8 12($0), $pop38
|
||||
; NO-SIMD128-FAST-NEXT: i32.xor $push39=, $30, $46
|
||||
; NO-SIMD128-FAST-NEXT: i32.and $push40=, $pop39, $14
|
||||
; NO-SIMD128-FAST-NEXT: i32.xor $push41=, $pop40, $46
|
||||
; NO-SIMD128-FAST-NEXT: i32.store8 13($0), $pop41
|
||||
; NO-SIMD128-FAST-NEXT: i32.xor $push42=, $31, $47
|
||||
; NO-SIMD128-FAST-NEXT: i32.and $push43=, $pop42, $15
|
||||
; NO-SIMD128-FAST-NEXT: i32.xor $push44=, $pop43, $47
|
||||
; NO-SIMD128-FAST-NEXT: i32.store8 14($0), $pop44
|
||||
; NO-SIMD128-FAST-NEXT: i32.xor $push45=, $32, $48
|
||||
; NO-SIMD128-FAST-NEXT: i32.and $push46=, $pop45, $16
|
||||
; NO-SIMD128-FAST-NEXT: i32.xor $push47=, $pop46, $48
|
||||
; NO-SIMD128-FAST-NEXT: i32.store8 15($0), $pop47
|
||||
; NO-SIMD128-FAST-NEXT: return
|
||||
%masked_v1 = and <16 x i8> %c, %v1
|
||||
%inv_mask = xor <16 x i8> %c,
|
||||
@ -7546,107 +7482,75 @@ define <8 x i16> @bitselect_v8i16(<8 x i16> %c, <8 x i16> %v1, <8 x i16> %v2) {
|
||||
; NO-SIMD128-LABEL: bitselect_v8i16:
|
||||
; NO-SIMD128: .functype bitselect_v8i16 (i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) -> ()
|
||||
; NO-SIMD128-NEXT: # %bb.0:
|
||||
; NO-SIMD128-NEXT: i32.and $push0=, $16, $8
|
||||
; NO-SIMD128-NEXT: i32.const $push1=, -1
|
||||
; NO-SIMD128-NEXT: i32.xor $push2=, $8, $pop1
|
||||
; NO-SIMD128-NEXT: i32.and $push3=, $24, $pop2
|
||||
; NO-SIMD128-NEXT: i32.or $push4=, $pop0, $pop3
|
||||
; NO-SIMD128-NEXT: i32.store16 14($0), $pop4
|
||||
; NO-SIMD128-NEXT: i32.and $push5=, $15, $7
|
||||
; NO-SIMD128-NEXT: i32.const $push39=, -1
|
||||
; NO-SIMD128-NEXT: i32.xor $push6=, $7, $pop39
|
||||
; NO-SIMD128-NEXT: i32.and $push7=, $23, $pop6
|
||||
; NO-SIMD128-NEXT: i32.or $push8=, $pop5, $pop7
|
||||
; NO-SIMD128-NEXT: i32.store16 12($0), $pop8
|
||||
; NO-SIMD128-NEXT: i32.and $push9=, $14, $6
|
||||
; NO-SIMD128-NEXT: i32.const $push38=, -1
|
||||
; NO-SIMD128-NEXT: i32.xor $push10=, $6, $pop38
|
||||
; NO-SIMD128-NEXT: i32.and $push11=, $22, $pop10
|
||||
; NO-SIMD128-NEXT: i32.or $push12=, $pop9, $pop11
|
||||
; NO-SIMD128-NEXT: i32.store16 10($0), $pop12
|
||||
; NO-SIMD128-NEXT: i32.and $push13=, $13, $5
|
||||
; NO-SIMD128-NEXT: i32.const $push37=, -1
|
||||
; NO-SIMD128-NEXT: i32.xor $push14=, $5, $pop37
|
||||
; NO-SIMD128-NEXT: i32.and $push15=, $21, $pop14
|
||||
; NO-SIMD128-NEXT: i32.or $push16=, $pop13, $pop15
|
||||
; NO-SIMD128-NEXT: i32.store16 8($0), $pop16
|
||||
; NO-SIMD128-NEXT: i32.and $push17=, $12, $4
|
||||
; NO-SIMD128-NEXT: i32.const $push36=, -1
|
||||
; NO-SIMD128-NEXT: i32.xor $push18=, $4, $pop36
|
||||
; NO-SIMD128-NEXT: i32.and $push19=, $20, $pop18
|
||||
; NO-SIMD128-NEXT: i32.or $push20=, $pop17, $pop19
|
||||
; NO-SIMD128-NEXT: i32.store16 6($0), $pop20
|
||||
; NO-SIMD128-NEXT: i32.and $push21=, $11, $3
|
||||
; NO-SIMD128-NEXT: i32.const $push35=, -1
|
||||
; NO-SIMD128-NEXT: i32.xor $push22=, $3, $pop35
|
||||
; NO-SIMD128-NEXT: i32.and $push23=, $19, $pop22
|
||||
; NO-SIMD128-NEXT: i32.or $push24=, $pop21, $pop23
|
||||
; NO-SIMD128-NEXT: i32.store16 4($0), $pop24
|
||||
; NO-SIMD128-NEXT: i32.and $push25=, $10, $2
|
||||
; NO-SIMD128-NEXT: i32.const $push34=, -1
|
||||
; NO-SIMD128-NEXT: i32.xor $push26=, $2, $pop34
|
||||
; NO-SIMD128-NEXT: i32.and $push27=, $18, $pop26
|
||||
; NO-SIMD128-NEXT: i32.or $push28=, $pop25, $pop27
|
||||
; NO-SIMD128-NEXT: i32.store16 2($0), $pop28
|
||||
; NO-SIMD128-NEXT: i32.and $push29=, $9, $1
|
||||
; NO-SIMD128-NEXT: i32.const $push33=, -1
|
||||
; NO-SIMD128-NEXT: i32.xor $push30=, $1, $pop33
|
||||
; NO-SIMD128-NEXT: i32.and $push31=, $17, $pop30
|
||||
; NO-SIMD128-NEXT: i32.or $push32=, $pop29, $pop31
|
||||
; NO-SIMD128-NEXT: i32.store16 0($0), $pop32
|
||||
; NO-SIMD128-NEXT: i32.xor $push0=, $16, $24
|
||||
; NO-SIMD128-NEXT: i32.and $push1=, $pop0, $8
|
||||
; NO-SIMD128-NEXT: i32.xor $push2=, $pop1, $24
|
||||
; NO-SIMD128-NEXT: i32.store16 14($0), $pop2
|
||||
; NO-SIMD128-NEXT: i32.xor $push3=, $15, $23
|
||||
; NO-SIMD128-NEXT: i32.and $push4=, $pop3, $7
|
||||
; NO-SIMD128-NEXT: i32.xor $push5=, $pop4, $23
|
||||
; NO-SIMD128-NEXT: i32.store16 12($0), $pop5
|
||||
; NO-SIMD128-NEXT: i32.xor $push6=, $14, $22
|
||||
; NO-SIMD128-NEXT: i32.and $push7=, $pop6, $6
|
||||
; NO-SIMD128-NEXT: i32.xor $push8=, $pop7, $22
|
||||
; NO-SIMD128-NEXT: i32.store16 10($0), $pop8
|
||||
; NO-SIMD128-NEXT: i32.xor $push9=, $13, $21
|
||||
; NO-SIMD128-NEXT: i32.and $push10=, $pop9, $5
|
||||
; NO-SIMD128-NEXT: i32.xor $push11=, $pop10, $21
|
||||
; NO-SIMD128-NEXT: i32.store16 8($0), $pop11
|
||||
; NO-SIMD128-NEXT: i32.xor $push12=, $12, $20
|
||||
; NO-SIMD128-NEXT: i32.and $push13=, $pop12, $4
|
||||
; NO-SIMD128-NEXT: i32.xor $push14=, $pop13, $20
|
||||
; NO-SIMD128-NEXT: i32.store16 6($0), $pop14
|
||||
; NO-SIMD128-NEXT: i32.xor $push15=, $11, $19
|
||||
; NO-SIMD128-NEXT: i32.and $push16=, $pop15, $3
|
||||
; NO-SIMD128-NEXT: i32.xor $push17=, $pop16, $19
|
||||
; NO-SIMD128-NEXT: i32.store16 4($0), $pop17
|
||||
; NO-SIMD128-NEXT: i32.xor $push18=, $10, $18
|
||||
; NO-SIMD128-NEXT: i32.and $push19=, $pop18, $2
|
||||
; NO-SIMD128-NEXT: i32.xor $push20=, $pop19, $18
|
||||
; NO-SIMD128-NEXT: i32.store16 2($0), $pop20
|
||||
; NO-SIMD128-NEXT: i32.xor $push21=, $9, $17
|
||||
; NO-SIMD128-NEXT: i32.and $push22=, $pop21, $1
|
||||
; NO-SIMD128-NEXT: i32.xor $push23=, $pop22, $17
|
||||
; NO-SIMD128-NEXT: i32.store16 0($0), $pop23
|
||||
; NO-SIMD128-NEXT: return
|
||||
;
|
||||
; NO-SIMD128-FAST-LABEL: bitselect_v8i16:
|
||||
; NO-SIMD128-FAST: .functype bitselect_v8i16 (i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) -> ()
|
||||
; NO-SIMD128-FAST-NEXT: # %bb.0:
|
||||
; NO-SIMD128-FAST-NEXT: i32.and $push0=, $9, $1
|
||||
; NO-SIMD128-FAST-NEXT: i32.const $push1=, -1
|
||||
; NO-SIMD128-FAST-NEXT: i32.xor $push2=, $1, $pop1
|
||||
; NO-SIMD128-FAST-NEXT: i32.and $push3=, $17, $pop2
|
||||
; NO-SIMD128-FAST-NEXT: i32.or $push4=, $pop0, $pop3
|
||||
; NO-SIMD128-FAST-NEXT: i32.store16 0($0), $pop4
|
||||
; NO-SIMD128-FAST-NEXT: i32.and $push5=, $10, $2
|
||||
; NO-SIMD128-FAST-NEXT: i32.const $push39=, -1
|
||||
; NO-SIMD128-FAST-NEXT: i32.xor $push6=, $2, $pop39
|
||||
; NO-SIMD128-FAST-NEXT: i32.and $push7=, $18, $pop6
|
||||
; NO-SIMD128-FAST-NEXT: i32.or $push8=, $pop5, $pop7
|
||||
; NO-SIMD128-FAST-NEXT: i32.store16 2($0), $pop8
|
||||
; NO-SIMD128-FAST-NEXT: i32.and $push9=, $11, $3
|
||||
; NO-SIMD128-FAST-NEXT: i32.const $push38=, -1
|
||||
; NO-SIMD128-FAST-NEXT: i32.xor $push10=, $3, $pop38
|
||||
; NO-SIMD128-FAST-NEXT: i32.and $push11=, $19, $pop10
|
||||
; NO-SIMD128-FAST-NEXT: i32.or $push12=, $pop9, $pop11
|
||||
; NO-SIMD128-FAST-NEXT: i32.store16 4($0), $pop12
|
||||
; NO-SIMD128-FAST-NEXT: i32.and $push13=, $12, $4
|
||||
; NO-SIMD128-FAST-NEXT: i32.const $push37=, -1
|
||||
; NO-SIMD128-FAST-NEXT: i32.xor $push14=, $4, $pop37
|
||||
; NO-SIMD128-FAST-NEXT: i32.and $push15=, $20, $pop14
|
||||
; NO-SIMD128-FAST-NEXT: i32.or $push16=, $pop13, $pop15
|
||||
; NO-SIMD128-FAST-NEXT: i32.store16 6($0), $pop16
|
||||
; NO-SIMD128-FAST-NEXT: i32.and $push17=, $13, $5
|
||||
; NO-SIMD128-FAST-NEXT: i32.const $push36=, -1
|
||||
; NO-SIMD128-FAST-NEXT: i32.xor $push18=, $5, $pop36
|
||||
; NO-SIMD128-FAST-NEXT: i32.and $push19=, $21, $pop18
|
||||
; NO-SIMD128-FAST-NEXT: i32.or $push20=, $pop17, $pop19
|
||||
; NO-SIMD128-FAST-NEXT: i32.store16 8($0), $pop20
|
||||
; NO-SIMD128-FAST-NEXT: i32.and $push21=, $14, $6
|
||||
; NO-SIMD128-FAST-NEXT: i32.const $push35=, -1
|
||||
; NO-SIMD128-FAST-NEXT: i32.xor $push22=, $6, $pop35
|
||||
; NO-SIMD128-FAST-NEXT: i32.and $push23=, $22, $pop22
|
||||
; NO-SIMD128-FAST-NEXT: i32.or $push24=, $pop21, $pop23
|
||||
; NO-SIMD128-FAST-NEXT: i32.store16 10($0), $pop24
|
||||
; NO-SIMD128-FAST-NEXT: i32.and $push25=, $15, $7
|
||||
; NO-SIMD128-FAST-NEXT: i32.const $push34=, -1
|
||||
; NO-SIMD128-FAST-NEXT: i32.xor $push26=, $7, $pop34
|
||||
; NO-SIMD128-FAST-NEXT: i32.and $push27=, $23, $pop26
|
||||
; NO-SIMD128-FAST-NEXT: i32.or $push28=, $pop25, $pop27
|
||||
; NO-SIMD128-FAST-NEXT: i32.store16 12($0), $pop28
|
||||
; NO-SIMD128-FAST-NEXT: i32.and $push29=, $16, $8
|
||||
; NO-SIMD128-FAST-NEXT: i32.const $push33=, -1
|
||||
; NO-SIMD128-FAST-NEXT: i32.xor $push30=, $8, $pop33
|
||||
; NO-SIMD128-FAST-NEXT: i32.and $push31=, $24, $pop30
|
||||
; NO-SIMD128-FAST-NEXT: i32.or $push32=, $pop29, $pop31
|
||||
; NO-SIMD128-FAST-NEXT: i32.store16 14($0), $pop32
|
||||
; NO-SIMD128-FAST-NEXT: i32.xor $push0=, $9, $17
|
||||
; NO-SIMD128-FAST-NEXT: i32.and $push1=, $pop0, $1
|
||||
; NO-SIMD128-FAST-NEXT: i32.xor $push2=, $pop1, $17
|
||||
; NO-SIMD128-FAST-NEXT: i32.store16 0($0), $pop2
|
||||
; NO-SIMD128-FAST-NEXT: i32.xor $push3=, $10, $18
|
||||
; NO-SIMD128-FAST-NEXT: i32.and $push4=, $pop3, $2
|
||||
; NO-SIMD128-FAST-NEXT: i32.xor $push5=, $pop4, $18
|
||||
; NO-SIMD128-FAST-NEXT: i32.store16 2($0), $pop5
|
||||
; NO-SIMD128-FAST-NEXT: i32.xor $push6=, $11, $19
|
||||
; NO-SIMD128-FAST-NEXT: i32.and $push7=, $pop6, $3
|
||||
; NO-SIMD128-FAST-NEXT: i32.xor $push8=, $pop7, $19
|
||||
; NO-SIMD128-FAST-NEXT: i32.store16 4($0), $pop8
|
||||
; NO-SIMD128-FAST-NEXT: i32.xor $push9=, $12, $20
|
||||
; NO-SIMD128-FAST-NEXT: i32.and $push10=, $pop9, $4
|
||||
; NO-SIMD128-FAST-NEXT: i32.xor $push11=, $pop10, $20
|
||||
; NO-SIMD128-FAST-NEXT: i32.store16 6($0), $pop11
|
||||
; NO-SIMD128-FAST-NEXT: i32.xor $push12=, $13, $21
|
||||
; NO-SIMD128-FAST-NEXT: i32.and $push13=, $pop12, $5
|
||||
; NO-SIMD128-FAST-NEXT: i32.xor $push14=, $pop13, $21
|
||||
; NO-SIMD128-FAST-NEXT: i32.store16 8($0), $pop14
|
||||
; NO-SIMD128-FAST-NEXT: i32.xor $push15=, $14, $22
|
||||
; NO-SIMD128-FAST-NEXT: i32.and $push16=, $pop15, $6
|
||||
; NO-SIMD128-FAST-NEXT: i32.xor $push17=, $pop16, $22
|
||||
; NO-SIMD128-FAST-NEXT: i32.store16 10($0), $pop17
|
||||
; NO-SIMD128-FAST-NEXT: i32.xor $push18=, $15, $23
|
||||
; NO-SIMD128-FAST-NEXT: i32.and $push19=, $pop18, $7
|
||||
; NO-SIMD128-FAST-NEXT: i32.xor $push20=, $pop19, $23
|
||||
; NO-SIMD128-FAST-NEXT: i32.store16 12($0), $pop20
|
||||
; NO-SIMD128-FAST-NEXT: i32.xor $push21=, $16, $24
|
||||
; NO-SIMD128-FAST-NEXT: i32.and $push22=, $pop21, $8
|
||||
; NO-SIMD128-FAST-NEXT: i32.xor $push23=, $pop22, $24
|
||||
; NO-SIMD128-FAST-NEXT: i32.store16 14($0), $pop23
|
||||
; NO-SIMD128-FAST-NEXT: return
|
||||
%masked_v1 = and <8 x i16> %v1, %c
|
||||
%inv_mask = xor <8 x i16>
|
||||
@ -9453,59 +9357,43 @@ define <4 x i32> @bitselect_v4i32(<4 x i32> %c, <4 x i32> %v1, <4 x i32> %v2) {
|
||||
; NO-SIMD128-LABEL: bitselect_v4i32:
|
||||
; NO-SIMD128: .functype bitselect_v4i32 (i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) -> ()
|
||||
; NO-SIMD128-NEXT: # %bb.0:
|
||||
; NO-SIMD128-NEXT: i32.const $push1=, -1
|
||||
; NO-SIMD128-NEXT: i32.xor $push2=, $4, $pop1
|
||||
; NO-SIMD128-NEXT: i32.and $push3=, $pop2, $12
|
||||
; NO-SIMD128-NEXT: i32.and $push0=, $4, $8
|
||||
; NO-SIMD128-NEXT: i32.or $push4=, $pop3, $pop0
|
||||
; NO-SIMD128-NEXT: i32.store 12($0), $pop4
|
||||
; NO-SIMD128-NEXT: i32.const $push19=, -1
|
||||
; NO-SIMD128-NEXT: i32.xor $push6=, $3, $pop19
|
||||
; NO-SIMD128-NEXT: i32.and $push7=, $pop6, $11
|
||||
; NO-SIMD128-NEXT: i32.and $push5=, $3, $7
|
||||
; NO-SIMD128-NEXT: i32.or $push8=, $pop7, $pop5
|
||||
; NO-SIMD128-NEXT: i32.store 8($0), $pop8
|
||||
; NO-SIMD128-NEXT: i32.const $push18=, -1
|
||||
; NO-SIMD128-NEXT: i32.xor $push10=, $2, $pop18
|
||||
; NO-SIMD128-NEXT: i32.and $push11=, $pop10, $10
|
||||
; NO-SIMD128-NEXT: i32.and $push9=, $2, $6
|
||||
; NO-SIMD128-NEXT: i32.or $push12=, $pop11, $pop9
|
||||
; NO-SIMD128-NEXT: i32.store 4($0), $pop12
|
||||
; NO-SIMD128-NEXT: i32.const $push17=, -1
|
||||
; NO-SIMD128-NEXT: i32.xor $push14=, $1, $pop17
|
||||
; NO-SIMD128-NEXT: i32.and $push15=, $pop14, $9
|
||||
; NO-SIMD128-NEXT: i32.and $push13=, $1, $5
|
||||
; NO-SIMD128-NEXT: i32.or $push16=, $pop15, $pop13
|
||||
; NO-SIMD128-NEXT: i32.store 0($0), $pop16
|
||||
; NO-SIMD128-NEXT: i32.xor $push0=, $8, $12
|
||||
; NO-SIMD128-NEXT: i32.and $push1=, $pop0, $4
|
||||
; NO-SIMD128-NEXT: i32.xor $push2=, $pop1, $12
|
||||
; NO-SIMD128-NEXT: i32.store 12($0), $pop2
|
||||
; NO-SIMD128-NEXT: i32.xor $push3=, $7, $11
|
||||
; NO-SIMD128-NEXT: i32.and $push4=, $pop3, $3
|
||||
; NO-SIMD128-NEXT: i32.xor $push5=, $pop4, $11
|
||||
; NO-SIMD128-NEXT: i32.store 8($0), $pop5
|
||||
; NO-SIMD128-NEXT: i32.xor $push6=, $6, $10
|
||||
; NO-SIMD128-NEXT: i32.and $push7=, $pop6, $2
|
||||
; NO-SIMD128-NEXT: i32.xor $push8=, $pop7, $10
|
||||
; NO-SIMD128-NEXT: i32.store 4($0), $pop8
|
||||
; NO-SIMD128-NEXT: i32.xor $push9=, $5, $9
|
||||
; NO-SIMD128-NEXT: i32.and $push10=, $pop9, $1
|
||||
; NO-SIMD128-NEXT: i32.xor $push11=, $pop10, $9
|
||||
; NO-SIMD128-NEXT: i32.store 0($0), $pop11
|
||||
; NO-SIMD128-NEXT: return
|
||||
;
|
||||
; NO-SIMD128-FAST-LABEL: bitselect_v4i32:
|
||||
; NO-SIMD128-FAST: .functype bitselect_v4i32 (i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) -> ()
|
||||
; NO-SIMD128-FAST-NEXT: # %bb.0:
|
||||
; NO-SIMD128-FAST-NEXT: i32.const $push1=, -1
|
||||
; NO-SIMD128-FAST-NEXT: i32.xor $push2=, $1, $pop1
|
||||
; NO-SIMD128-FAST-NEXT: i32.and $push3=, $pop2, $9
|
||||
; NO-SIMD128-FAST-NEXT: i32.and $push0=, $1, $5
|
||||
; NO-SIMD128-FAST-NEXT: i32.or $push4=, $pop3, $pop0
|
||||
; NO-SIMD128-FAST-NEXT: i32.store 0($0), $pop4
|
||||
; NO-SIMD128-FAST-NEXT: i32.const $push19=, -1
|
||||
; NO-SIMD128-FAST-NEXT: i32.xor $push6=, $2, $pop19
|
||||
; NO-SIMD128-FAST-NEXT: i32.and $push7=, $pop6, $10
|
||||
; NO-SIMD128-FAST-NEXT: i32.and $push5=, $2, $6
|
||||
; NO-SIMD128-FAST-NEXT: i32.or $push8=, $pop7, $pop5
|
||||
; NO-SIMD128-FAST-NEXT: i32.store 4($0), $pop8
|
||||
; NO-SIMD128-FAST-NEXT: i32.const $push18=, -1
|
||||
; NO-SIMD128-FAST-NEXT: i32.xor $push10=, $3, $pop18
|
||||
; NO-SIMD128-FAST-NEXT: i32.and $push11=, $pop10, $11
|
||||
; NO-SIMD128-FAST-NEXT: i32.and $push9=, $3, $7
|
||||
; NO-SIMD128-FAST-NEXT: i32.or $push12=, $pop11, $pop9
|
||||
; NO-SIMD128-FAST-NEXT: i32.store 8($0), $pop12
|
||||
; NO-SIMD128-FAST-NEXT: i32.const $push17=, -1
|
||||
; NO-SIMD128-FAST-NEXT: i32.xor $push14=, $4, $pop17
|
||||
; NO-SIMD128-FAST-NEXT: i32.and $push15=, $pop14, $12
|
||||
; NO-SIMD128-FAST-NEXT: i32.and $push13=, $4, $8
|
||||
; NO-SIMD128-FAST-NEXT: i32.or $push16=, $pop15, $pop13
|
||||
; NO-SIMD128-FAST-NEXT: i32.store 12($0), $pop16
|
||||
; NO-SIMD128-FAST-NEXT: i32.xor $push0=, $5, $9
|
||||
; NO-SIMD128-FAST-NEXT: i32.and $push1=, $pop0, $1
|
||||
; NO-SIMD128-FAST-NEXT: i32.xor $push2=, $pop1, $9
|
||||
; NO-SIMD128-FAST-NEXT: i32.store 0($0), $pop2
|
||||
; NO-SIMD128-FAST-NEXT: i32.xor $push3=, $6, $10
|
||||
; NO-SIMD128-FAST-NEXT: i32.and $push4=, $pop3, $2
|
||||
; NO-SIMD128-FAST-NEXT: i32.xor $push5=, $pop4, $10
|
||||
; NO-SIMD128-FAST-NEXT: i32.store 4($0), $pop5
|
||||
; NO-SIMD128-FAST-NEXT: i32.xor $push6=, $7, $11
|
||||
; NO-SIMD128-FAST-NEXT: i32.and $push7=, $pop6, $3
|
||||
; NO-SIMD128-FAST-NEXT: i32.xor $push8=, $pop7, $11
|
||||
; NO-SIMD128-FAST-NEXT: i32.store 8($0), $pop8
|
||||
; NO-SIMD128-FAST-NEXT: i32.xor $push9=, $8, $12
|
||||
; NO-SIMD128-FAST-NEXT: i32.and $push10=, $pop9, $4
|
||||
; NO-SIMD128-FAST-NEXT: i32.xor $push11=, $pop10, $12
|
||||
; NO-SIMD128-FAST-NEXT: i32.store 12($0), $pop11
|
||||
; NO-SIMD128-FAST-NEXT: return
|
||||
%masked_v1 = and <4 x i32> %c, %v1
|
||||
%inv_mask = xor <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>, %c
|
||||
@ -10974,35 +10862,27 @@ define <2 x i64> @bitselect_v2i64(<2 x i64> %c, <2 x i64> %v1, <2 x i64> %v2) {
|
||||
; NO-SIMD128-LABEL: bitselect_v2i64:
|
||||
; NO-SIMD128: .functype bitselect_v2i64 (i32, i64, i64, i64, i64, i64, i64) -> ()
|
||||
; NO-SIMD128-NEXT: # %bb.0:
|
||||
; NO-SIMD128-NEXT: i64.const $push1=, -1
|
||||
; NO-SIMD128-NEXT: i64.xor $push2=, $2, $pop1
|
||||
; NO-SIMD128-NEXT: i64.and $push3=, $6, $pop2
|
||||
; NO-SIMD128-NEXT: i64.and $push0=, $4, $2
|
||||
; NO-SIMD128-NEXT: i64.or $push4=, $pop3, $pop0
|
||||
; NO-SIMD128-NEXT: i64.store 8($0), $pop4
|
||||
; NO-SIMD128-NEXT: i64.const $push9=, -1
|
||||
; NO-SIMD128-NEXT: i64.xor $push6=, $1, $pop9
|
||||
; NO-SIMD128-NEXT: i64.and $push7=, $5, $pop6
|
||||
; NO-SIMD128-NEXT: i64.and $push5=, $3, $1
|
||||
; NO-SIMD128-NEXT: i64.or $push8=, $pop7, $pop5
|
||||
; NO-SIMD128-NEXT: i64.store 0($0), $pop8
|
||||
; NO-SIMD128-NEXT: i64.xor $push0=, $4, $6
|
||||
; NO-SIMD128-NEXT: i64.and $push1=, $pop0, $2
|
||||
; NO-SIMD128-NEXT: i64.xor $push2=, $pop1, $6
|
||||
; NO-SIMD128-NEXT: i64.store 8($0), $pop2
|
||||
; NO-SIMD128-NEXT: i64.xor $push3=, $3, $5
|
||||
; NO-SIMD128-NEXT: i64.and $push4=, $pop3, $1
|
||||
; NO-SIMD128-NEXT: i64.xor $push5=, $pop4, $5
|
||||
; NO-SIMD128-NEXT: i64.store 0($0), $pop5
|
||||
; NO-SIMD128-NEXT: return
|
||||
;
|
||||
; NO-SIMD128-FAST-LABEL: bitselect_v2i64:
|
||||
; NO-SIMD128-FAST: .functype bitselect_v2i64 (i32, i64, i64, i64, i64, i64, i64) -> ()
|
||||
; NO-SIMD128-FAST-NEXT: # %bb.0:
|
||||
; NO-SIMD128-FAST-NEXT: i64.const $push1=, -1
|
||||
; NO-SIMD128-FAST-NEXT: i64.xor $push2=, $1, $pop1
|
||||
; NO-SIMD128-FAST-NEXT: i64.and $push3=, $5, $pop2
|
||||
; NO-SIMD128-FAST-NEXT: i64.and $push0=, $3, $1
|
||||
; NO-SIMD128-FAST-NEXT: i64.or $push4=, $pop3, $pop0
|
||||
; NO-SIMD128-FAST-NEXT: i64.store 0($0), $pop4
|
||||
; NO-SIMD128-FAST-NEXT: i64.const $push9=, -1
|
||||
; NO-SIMD128-FAST-NEXT: i64.xor $push6=, $2, $pop9
|
||||
; NO-SIMD128-FAST-NEXT: i64.and $push7=, $6, $pop6
|
||||
; NO-SIMD128-FAST-NEXT: i64.and $push5=, $4, $2
|
||||
; NO-SIMD128-FAST-NEXT: i64.or $push8=, $pop7, $pop5
|
||||
; NO-SIMD128-FAST-NEXT: i64.store 8($0), $pop8
|
||||
; NO-SIMD128-FAST-NEXT: i64.xor $push0=, $3, $5
|
||||
; NO-SIMD128-FAST-NEXT: i64.and $push1=, $pop0, $1
|
||||
; NO-SIMD128-FAST-NEXT: i64.xor $push2=, $pop1, $5
|
||||
; NO-SIMD128-FAST-NEXT: i64.store 0($0), $pop2
|
||||
; NO-SIMD128-FAST-NEXT: i64.xor $push3=, $4, $6
|
||||
; NO-SIMD128-FAST-NEXT: i64.and $push4=, $pop3, $2
|
||||
; NO-SIMD128-FAST-NEXT: i64.xor $push5=, $pop4, $6
|
||||
; NO-SIMD128-FAST-NEXT: i64.store 8($0), $pop5
|
||||
; NO-SIMD128-FAST-NEXT: return
|
||||
%masked_v1 = and <2 x i64> %v1, %c
|
||||
%inv_mask = xor <2 x i64> <i64 -1, i64 -1>, %c
|
||||
|
@ -1,7 +1,7 @@
|
||||
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
||||
; RUN: llc < %s -mtriple=i686-unknown-unknown | FileCheck %s --check-prefixes=X86
|
||||
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=-bmi | FileCheck %s --check-prefixes=X64,X64-NOBMI
|
||||
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+bmi | FileCheck %s --check-prefixes=X64,X64-BMI
|
||||
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=-bmi | FileCheck %s --check-prefixes=X64-NOBMI
|
||||
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+bmi | FileCheck %s --check-prefixes=X64-BMI
|
||||
|
||||
; PR46472
|
||||
; bitselect(a,b,m) == or(and(a,not(m)),and(b,m))
|
||||
@ -17,14 +17,22 @@ define i8 @bitselect_i8(i8 %a, i8 %b, i8 %m) nounwind {
|
||||
; X86-NEXT: xorb %cl, %al
|
||||
; X86-NEXT: retl
|
||||
;
|
||||
; X64-LABEL: bitselect_i8:
|
||||
; X64: # %bb.0:
|
||||
; X64-NEXT: andl %edx, %esi
|
||||
; X64-NEXT: movl %edx, %eax
|
||||
; X64-NEXT: notb %al
|
||||
; X64-NEXT: andb %dil, %al
|
||||
; X64-NEXT: orb %sil, %al
|
||||
; X64-NEXT: retq
|
||||
; X64-NOBMI-LABEL: bitselect_i8:
|
||||
; X64-NOBMI: # %bb.0:
|
||||
; X64-NOBMI-NEXT: movl %esi, %eax
|
||||
; X64-NOBMI-NEXT: xorl %edi, %eax
|
||||
; X64-NOBMI-NEXT: andl %edx, %eax
|
||||
; X64-NOBMI-NEXT: xorl %edi, %eax
|
||||
; X64-NOBMI-NEXT: # kill: def $al killed $al killed $eax
|
||||
; X64-NOBMI-NEXT: retq
|
||||
;
|
||||
; X64-BMI-LABEL: bitselect_i8:
|
||||
; X64-BMI: # %bb.0:
|
||||
; X64-BMI-NEXT: andnl %edi, %edx, %eax
|
||||
; X64-BMI-NEXT: andl %edx, %esi
|
||||
; X64-BMI-NEXT: orl %esi, %eax
|
||||
; X64-BMI-NEXT: # kill: def $al killed $al killed $eax
|
||||
; X64-BMI-NEXT: retq
|
||||
%not = xor i8 %m, -1
|
||||
%ma = and i8 %a, %not
|
||||
%mb = and i8 %b, %m
|
||||
@ -35,21 +43,20 @@ define i8 @bitselect_i8(i8 %a, i8 %b, i8 %m) nounwind {
|
||||
define i16 @bitselect_i16(i16 %a, i16 %b, i16 %m) nounwind {
|
||||
; X86-LABEL: bitselect_i16:
|
||||
; X86: # %bb.0:
|
||||
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
|
||||
; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax
|
||||
; X86-NEXT: movzwl {{[0-9]+}}(%esp), %ecx
|
||||
; X86-NEXT: xorw %ax, %cx
|
||||
; X86-NEXT: andw {{[0-9]+}}(%esp), %cx
|
||||
; X86-NEXT: xorw %cx, %ax
|
||||
; X86-NEXT: andw {{[0-9]+}}(%esp), %ax
|
||||
; X86-NEXT: xorl %ecx, %eax
|
||||
; X86-NEXT: # kill: def $ax killed $ax killed $eax
|
||||
; X86-NEXT: retl
|
||||
;
|
||||
; X64-NOBMI-LABEL: bitselect_i16:
|
||||
; X64-NOBMI: # %bb.0:
|
||||
; X64-NOBMI-NEXT: movl %edx, %eax
|
||||
; X64-NOBMI-NEXT: andl %edx, %esi
|
||||
; X64-NOBMI-NEXT: notl %eax
|
||||
; X64-NOBMI-NEXT: andl %edi, %eax
|
||||
; X64-NOBMI-NEXT: orl %esi, %eax
|
||||
; X64-NOBMI-NEXT: movl %esi, %eax
|
||||
; X64-NOBMI-NEXT: xorl %edi, %eax
|
||||
; X64-NOBMI-NEXT: andl %edx, %eax
|
||||
; X64-NOBMI-NEXT: xorl %edi, %eax
|
||||
; X64-NOBMI-NEXT: # kill: def $ax killed $ax killed $eax
|
||||
; X64-NOBMI-NEXT: retq
|
||||
;
|
||||
@ -186,13 +193,12 @@ define i128 @bitselect_i128(i128 %a, i128 %b, i128 %m) nounwind {
|
||||
;
|
||||
; X64-BMI-LABEL: bitselect_i128:
|
||||
; X64-BMI: # %bb.0:
|
||||
; X64-BMI-NEXT: andnq %rsi, %r9, %rsi
|
||||
; X64-BMI-NEXT: andnq %rdi, %r8, %rax
|
||||
; X64-BMI-NEXT: andq %r9, %rcx
|
||||
; X64-BMI-NEXT: orq %rcx, %rsi
|
||||
; X64-BMI-NEXT: andq %r8, %rdx
|
||||
; X64-BMI-NEXT: orq %rdx, %rax
|
||||
; X64-BMI-NEXT: movq %rsi, %rdx
|
||||
; X64-BMI-NEXT: andnq %rsi, %r9, %rdx
|
||||
; X64-BMI-NEXT: andq %r9, %rcx
|
||||
; X64-BMI-NEXT: orq %rcx, %rdx
|
||||
; X64-BMI-NEXT: retq
|
||||
%not = xor i128 %m, -1
|
||||
%ma = and i128 %a, %not
|
||||
|
@ -30,18 +30,17 @@ define i32 @masked_merge0(i32 %a0, i32 %a1, i32 %a2) {
|
||||
define i16 @masked_merge1(i16 %a0, i16 %a1, i16 %a2) {
|
||||
; NOBMI-LABEL: masked_merge1:
|
||||
; NOBMI: # %bb.0:
|
||||
; NOBMI-NEXT: movl %edi, %eax
|
||||
; NOBMI-NEXT: andl %edi, %esi
|
||||
; NOBMI-NEXT: notl %eax
|
||||
; NOBMI-NEXT: andl %edx, %eax
|
||||
; NOBMI-NEXT: orl %esi, %eax
|
||||
; NOBMI-NEXT: movl %esi, %eax
|
||||
; NOBMI-NEXT: xorl %edx, %eax
|
||||
; NOBMI-NEXT: andl %edi, %eax
|
||||
; NOBMI-NEXT: xorl %edx, %eax
|
||||
; NOBMI-NEXT: # kill: def $ax killed $ax killed $eax
|
||||
; NOBMI-NEXT: retq
|
||||
;
|
||||
; BMI-LABEL: masked_merge1:
|
||||
; BMI: # %bb.0:
|
||||
; BMI-NEXT: andl %edi, %esi
|
||||
; BMI-NEXT: andnl %edx, %edi, %eax
|
||||
; BMI-NEXT: andl %edi, %esi
|
||||
; BMI-NEXT: orl %esi, %eax
|
||||
; BMI-NEXT: # kill: def $ax killed $ax killed $eax
|
||||
; BMI-NEXT: retq
|
||||
@ -53,20 +52,11 @@ define i16 @masked_merge1(i16 %a0, i16 %a1, i16 %a2) {
|
||||
}
|
||||
|
||||
define i8 @masked_merge2(i8 %a0, i8 %a1, i8 %a2) {
|
||||
; NOBMI-LABEL: masked_merge2:
|
||||
; NOBMI: # %bb.0:
|
||||
; NOBMI-NEXT: movl %esi, %eax
|
||||
; NOBMI-NEXT: # kill: def $al killed $al killed $eax
|
||||
; NOBMI-NEXT: retq
|
||||
;
|
||||
; BMI-LABEL: masked_merge2:
|
||||
; BMI: # %bb.0:
|
||||
; BMI-NEXT: movl %edi, %eax
|
||||
; BMI-NEXT: notb %al
|
||||
; BMI-NEXT: andb %sil, %al
|
||||
; BMI-NEXT: andb %dil, %sil
|
||||
; BMI-NEXT: orb %sil, %al
|
||||
; BMI-NEXT: retq
|
||||
; CHECK-LABEL: masked_merge2:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: movl %esi, %eax
|
||||
; CHECK-NEXT: # kill: def $al killed $al killed $eax
|
||||
; CHECK-NEXT: retq
|
||||
%not = xor i8 %a0, -1
|
||||
%and0 = and i8 %not, %a1
|
||||
%and1 = and i8 %a1, %a0
|
||||
@ -279,3 +269,27 @@ define i32 @masked_merge_no_transform2(i32 %a0, i32 %a1, i32 %a2, ptr %p1) {
|
||||
store i32 %and1, ptr %p1
|
||||
ret i32 %or
|
||||
}
|
||||
|
||||
define i32 @pr137641_crash({ i8, i32 } %0) {
|
||||
; NOBMI-LABEL: pr137641_crash:
|
||||
; NOBMI: # %bb.0:
|
||||
; NOBMI-NEXT: movl %esi, %eax
|
||||
; NOBMI-NEXT: andl $201, %eax
|
||||
; NOBMI-NEXT: xorl $1, %eax
|
||||
; NOBMI-NEXT: retq
|
||||
;
|
||||
; BMI-LABEL: pr137641_crash:
|
||||
; BMI: # %bb.0:
|
||||
; BMI-NEXT: movl %esi, %eax
|
||||
; BMI-NEXT: notl %eax
|
||||
; BMI-NEXT: andl $1, %eax
|
||||
; BMI-NEXT: andl $200, %esi
|
||||
; BMI-NEXT: orl %esi, %eax
|
||||
; BMI-NEXT: retq
|
||||
%asmresult1.i = extractvalue { i8, i32 } %0, 1
|
||||
%not = xor i32 %asmresult1.i, 1
|
||||
%and = and i32 1, %not
|
||||
%and1 = and i32 %asmresult1.i, 200
|
||||
%2 = or i32 %and, %and1
|
||||
ret i32 %2
|
||||
}
|
||||
|
@ -6,21 +6,18 @@
|
||||
define i8 @out8(i8 %x, i8 %y, i8 %mask) {
|
||||
; CHECK-NOBMI-LABEL: out8:
|
||||
; CHECK-NOBMI: # %bb.0:
|
||||
; CHECK-NOBMI-NEXT: movl %edx, %eax
|
||||
; CHECK-NOBMI-NEXT: andl %edx, %edi
|
||||
; CHECK-NOBMI-NEXT: notb %al
|
||||
; CHECK-NOBMI-NEXT: andb %sil, %al
|
||||
; CHECK-NOBMI-NEXT: orb %dil, %al
|
||||
; CHECK-NOBMI-NEXT: movl %edi, %eax
|
||||
; CHECK-NOBMI-NEXT: xorl %esi, %eax
|
||||
; CHECK-NOBMI-NEXT: andl %edx, %eax
|
||||
; CHECK-NOBMI-NEXT: xorl %esi, %eax
|
||||
; CHECK-NOBMI-NEXT: # kill: def $al killed $al killed $eax
|
||||
; CHECK-NOBMI-NEXT: retq
|
||||
;
|
||||
; CHECK-BMI-LABEL: out8:
|
||||
; CHECK-BMI: # %bb.0:
|
||||
; CHECK-BMI-NEXT: movl %edx, %eax
|
||||
; CHECK-BMI-NEXT: andnl %esi, %edx, %eax
|
||||
; CHECK-BMI-NEXT: andl %edx, %edi
|
||||
; CHECK-BMI-NEXT: notb %al
|
||||
; CHECK-BMI-NEXT: andb %sil, %al
|
||||
; CHECK-BMI-NEXT: orb %dil, %al
|
||||
; CHECK-BMI-NEXT: orl %edi, %eax
|
||||
; CHECK-BMI-NEXT: # kill: def $al killed $al killed $eax
|
||||
; CHECK-BMI-NEXT: retq
|
||||
%mx = and i8 %x, %mask
|
||||
@ -33,18 +30,17 @@ define i8 @out8(i8 %x, i8 %y, i8 %mask) {
|
||||
define i16 @out16(i16 %x, i16 %y, i16 %mask) {
|
||||
; CHECK-NOBMI-LABEL: out16:
|
||||
; CHECK-NOBMI: # %bb.0:
|
||||
; CHECK-NOBMI-NEXT: movl %edx, %eax
|
||||
; CHECK-NOBMI-NEXT: andl %edx, %edi
|
||||
; CHECK-NOBMI-NEXT: notl %eax
|
||||
; CHECK-NOBMI-NEXT: andl %esi, %eax
|
||||
; CHECK-NOBMI-NEXT: orl %edi, %eax
|
||||
; CHECK-NOBMI-NEXT: movl %edi, %eax
|
||||
; CHECK-NOBMI-NEXT: xorl %esi, %eax
|
||||
; CHECK-NOBMI-NEXT: andl %edx, %eax
|
||||
; CHECK-NOBMI-NEXT: xorl %esi, %eax
|
||||
; CHECK-NOBMI-NEXT: # kill: def $ax killed $ax killed $eax
|
||||
; CHECK-NOBMI-NEXT: retq
|
||||
;
|
||||
; CHECK-BMI-LABEL: out16:
|
||||
; CHECK-BMI: # %bb.0:
|
||||
; CHECK-BMI-NEXT: andl %edx, %edi
|
||||
; CHECK-BMI-NEXT: andnl %esi, %edx, %eax
|
||||
; CHECK-BMI-NEXT: andl %edx, %edi
|
||||
; CHECK-BMI-NEXT: orl %edi, %eax
|
||||
; CHECK-BMI-NEXT: # kill: def $ax killed $ax killed $eax
|
||||
; CHECK-BMI-NEXT: retq
|
||||
|
@ -16,11 +16,10 @@
|
||||
define <1 x i8> @out_v1i8(<1 x i8> %x, <1 x i8> %y, <1 x i8> %mask) nounwind {
|
||||
; CHECK-LABEL: out_v1i8:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: movl %edx, %eax
|
||||
; CHECK-NEXT: andl %edx, %edi
|
||||
; CHECK-NEXT: notb %al
|
||||
; CHECK-NEXT: andb %sil, %al
|
||||
; CHECK-NEXT: orb %dil, %al
|
||||
; CHECK-NEXT: movl %edi, %eax
|
||||
; CHECK-NEXT: xorl %esi, %eax
|
||||
; CHECK-NEXT: andl %edx, %eax
|
||||
; CHECK-NEXT: xorl %esi, %eax
|
||||
; CHECK-NEXT: # kill: def $al killed $al killed $eax
|
||||
; CHECK-NEXT: retq
|
||||
%mx = and <1 x i8> %x, %mask
|
||||
@ -37,32 +36,28 @@ define <1 x i8> @out_v1i8(<1 x i8> %x, <1 x i8> %y, <1 x i8> %mask) nounwind {
|
||||
define <2 x i8> @out_v2i8(<2 x i8> %x, <2 x i8> %y, <2 x i8> %mask) nounwind {
|
||||
; CHECK-BASELINE-LABEL: out_v2i8:
|
||||
; CHECK-BASELINE: # %bb.0:
|
||||
; CHECK-BASELINE-NEXT: movl %r8d, %eax
|
||||
; CHECK-BASELINE-NEXT: movl %edi, %eax
|
||||
; CHECK-BASELINE-NEXT: xorl %edx, %eax
|
||||
; CHECK-BASELINE-NEXT: andl %r8d, %eax
|
||||
; CHECK-BASELINE-NEXT: xorl %edx, %eax
|
||||
; CHECK-BASELINE-NEXT: xorl %ecx, %esi
|
||||
; CHECK-BASELINE-NEXT: andl %r9d, %esi
|
||||
; CHECK-BASELINE-NEXT: andl %r8d, %edi
|
||||
; CHECK-BASELINE-NEXT: notb %al
|
||||
; CHECK-BASELINE-NEXT: notb %r9b
|
||||
; CHECK-BASELINE-NEXT: andb %cl, %r9b
|
||||
; CHECK-BASELINE-NEXT: andb %dl, %al
|
||||
; CHECK-BASELINE-NEXT: orb %dil, %al
|
||||
; CHECK-BASELINE-NEXT: orb %sil, %r9b
|
||||
; CHECK-BASELINE-NEXT: xorl %ecx, %esi
|
||||
; CHECK-BASELINE-NEXT: # kill: def $al killed $al killed $eax
|
||||
; CHECK-BASELINE-NEXT: movl %r9d, %edx
|
||||
; CHECK-BASELINE-NEXT: movl %esi, %edx
|
||||
; CHECK-BASELINE-NEXT: retq
|
||||
;
|
||||
; CHECK-SSE1-LABEL: out_v2i8:
|
||||
; CHECK-SSE1: # %bb.0:
|
||||
; CHECK-SSE1-NEXT: movl %r8d, %eax
|
||||
; CHECK-SSE1-NEXT: movl %edi, %eax
|
||||
; CHECK-SSE1-NEXT: xorl %edx, %eax
|
||||
; CHECK-SSE1-NEXT: andl %r8d, %eax
|
||||
; CHECK-SSE1-NEXT: xorl %edx, %eax
|
||||
; CHECK-SSE1-NEXT: xorl %ecx, %esi
|
||||
; CHECK-SSE1-NEXT: andl %r9d, %esi
|
||||
; CHECK-SSE1-NEXT: andl %r8d, %edi
|
||||
; CHECK-SSE1-NEXT: notb %al
|
||||
; CHECK-SSE1-NEXT: notb %r9b
|
||||
; CHECK-SSE1-NEXT: andb %cl, %r9b
|
||||
; CHECK-SSE1-NEXT: andb %dl, %al
|
||||
; CHECK-SSE1-NEXT: orb %dil, %al
|
||||
; CHECK-SSE1-NEXT: orb %sil, %r9b
|
||||
; CHECK-SSE1-NEXT: xorl %ecx, %esi
|
||||
; CHECK-SSE1-NEXT: # kill: def $al killed $al killed $eax
|
||||
; CHECK-SSE1-NEXT: movl %r9d, %edx
|
||||
; CHECK-SSE1-NEXT: movl %esi, %edx
|
||||
; CHECK-SSE1-NEXT: retq
|
||||
;
|
||||
; CHECK-SSE2-LABEL: out_v2i8:
|
||||
@ -86,11 +81,10 @@ define <2 x i8> @out_v2i8(<2 x i8> %x, <2 x i8> %y, <2 x i8> %mask) nounwind {
|
||||
define <1 x i16> @out_v1i16(<1 x i16> %x, <1 x i16> %y, <1 x i16> %mask) nounwind {
|
||||
; CHECK-LABEL: out_v1i16:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: movl %edx, %eax
|
||||
; CHECK-NEXT: andl %edx, %edi
|
||||
; CHECK-NEXT: notl %eax
|
||||
; CHECK-NEXT: andl %esi, %eax
|
||||
; CHECK-NEXT: orl %edi, %eax
|
||||
; CHECK-NEXT: movl %edi, %eax
|
||||
; CHECK-NEXT: xorl %esi, %eax
|
||||
; CHECK-NEXT: andl %edx, %eax
|
||||
; CHECK-NEXT: xorl %esi, %eax
|
||||
; CHECK-NEXT: # kill: def $ax killed $ax killed $eax
|
||||
; CHECK-NEXT: retq
|
||||
%mx = and <1 x i16> %x, %mask
|
||||
@ -235,32 +229,28 @@ define <4 x i8> @out_v4i8_undef(<4 x i8> %x, <4 x i8> %y, <4 x i8> %mask) nounwi
|
||||
define <2 x i16> @out_v2i16(<2 x i16> %x, <2 x i16> %y, <2 x i16> %mask) nounwind {
|
||||
; CHECK-BASELINE-LABEL: out_v2i16:
|
||||
; CHECK-BASELINE: # %bb.0:
|
||||
; CHECK-BASELINE-NEXT: movl %r8d, %eax
|
||||
; CHECK-BASELINE-NEXT: movl %edi, %eax
|
||||
; CHECK-BASELINE-NEXT: xorl %edx, %eax
|
||||
; CHECK-BASELINE-NEXT: andl %r8d, %eax
|
||||
; CHECK-BASELINE-NEXT: xorl %edx, %eax
|
||||
; CHECK-BASELINE-NEXT: xorl %ecx, %esi
|
||||
; CHECK-BASELINE-NEXT: andl %r9d, %esi
|
||||
; CHECK-BASELINE-NEXT: andl %r8d, %edi
|
||||
; CHECK-BASELINE-NEXT: notl %eax
|
||||
; CHECK-BASELINE-NEXT: notl %r9d
|
||||
; CHECK-BASELINE-NEXT: andl %ecx, %r9d
|
||||
; CHECK-BASELINE-NEXT: orl %esi, %r9d
|
||||
; CHECK-BASELINE-NEXT: andl %edx, %eax
|
||||
; CHECK-BASELINE-NEXT: orl %edi, %eax
|
||||
; CHECK-BASELINE-NEXT: xorl %ecx, %esi
|
||||
; CHECK-BASELINE-NEXT: # kill: def $ax killed $ax killed $eax
|
||||
; CHECK-BASELINE-NEXT: movl %r9d, %edx
|
||||
; CHECK-BASELINE-NEXT: movl %esi, %edx
|
||||
; CHECK-BASELINE-NEXT: retq
|
||||
;
|
||||
; CHECK-SSE1-LABEL: out_v2i16:
|
||||
; CHECK-SSE1: # %bb.0:
|
||||
; CHECK-SSE1-NEXT: movl %r8d, %eax
|
||||
; CHECK-SSE1-NEXT: movl %edi, %eax
|
||||
; CHECK-SSE1-NEXT: xorl %edx, %eax
|
||||
; CHECK-SSE1-NEXT: andl %r8d, %eax
|
||||
; CHECK-SSE1-NEXT: xorl %edx, %eax
|
||||
; CHECK-SSE1-NEXT: xorl %ecx, %esi
|
||||
; CHECK-SSE1-NEXT: andl %r9d, %esi
|
||||
; CHECK-SSE1-NEXT: andl %r8d, %edi
|
||||
; CHECK-SSE1-NEXT: notl %eax
|
||||
; CHECK-SSE1-NEXT: notl %r9d
|
||||
; CHECK-SSE1-NEXT: andl %ecx, %r9d
|
||||
; CHECK-SSE1-NEXT: orl %esi, %r9d
|
||||
; CHECK-SSE1-NEXT: andl %edx, %eax
|
||||
; CHECK-SSE1-NEXT: orl %edi, %eax
|
||||
; CHECK-SSE1-NEXT: xorl %ecx, %esi
|
||||
; CHECK-SSE1-NEXT: # kill: def $ax killed $ax killed $eax
|
||||
; CHECK-SSE1-NEXT: movl %r9d, %edx
|
||||
; CHECK-SSE1-NEXT: movl %esi, %edx
|
||||
; CHECK-SSE1-NEXT: retq
|
||||
;
|
||||
; CHECK-SSE2-LABEL: out_v2i16:
|
||||
@ -439,9 +429,12 @@ define <4 x i16> @out_v4i16(<4 x i16> %x, <4 x i16> %y, <4 x i16> %mask) nounwin
|
||||
; CHECK-BASELINE-LABEL: out_v4i16:
|
||||
; CHECK-BASELINE: # %bb.0:
|
||||
; CHECK-BASELINE-NEXT: movq %rdi, %rax
|
||||
; CHECK-BASELINE-NEXT: movzwl {{[0-9]+}}(%rsp), %edi
|
||||
; CHECK-BASELINE-NEXT: movzwl {{[0-9]+}}(%rsp), %r10d
|
||||
; CHECK-BASELINE-NEXT: movzwl {{[0-9]+}}(%rsp), %r11d
|
||||
; CHECK-BASELINE-NEXT: movl {{[0-9]+}}(%rsp), %edi
|
||||
; CHECK-BASELINE-NEXT: movl {{[0-9]+}}(%rsp), %r10d
|
||||
; CHECK-BASELINE-NEXT: movl {{[0-9]+}}(%rsp), %r11d
|
||||
; CHECK-BASELINE-NEXT: xorl %r9d, %esi
|
||||
; CHECK-BASELINE-NEXT: andw {{[0-9]+}}(%rsp), %si
|
||||
; CHECK-BASELINE-NEXT: xorl %r9d, %esi
|
||||
; CHECK-BASELINE-NEXT: xorl %r11d, %edx
|
||||
; CHECK-BASELINE-NEXT: andw {{[0-9]+}}(%rsp), %dx
|
||||
; CHECK-BASELINE-NEXT: xorl %r11d, %edx
|
||||
@ -451,21 +444,21 @@ define <4 x i16> @out_v4i16(<4 x i16> %x, <4 x i16> %y, <4 x i16> %mask) nounwin
|
||||
; CHECK-BASELINE-NEXT: xorl %edi, %r8d
|
||||
; CHECK-BASELINE-NEXT: andw {{[0-9]+}}(%rsp), %r8w
|
||||
; CHECK-BASELINE-NEXT: xorl %edi, %r8d
|
||||
; CHECK-BASELINE-NEXT: xorl %r9d, %esi
|
||||
; CHECK-BASELINE-NEXT: andw {{[0-9]+}}(%rsp), %si
|
||||
; CHECK-BASELINE-NEXT: xorl %r9d, %esi
|
||||
; CHECK-BASELINE-NEXT: movw %si, (%rax)
|
||||
; CHECK-BASELINE-NEXT: movw %r8w, 6(%rax)
|
||||
; CHECK-BASELINE-NEXT: movw %cx, 4(%rax)
|
||||
; CHECK-BASELINE-NEXT: movw %dx, 2(%rax)
|
||||
; CHECK-BASELINE-NEXT: movw %si, (%rax)
|
||||
; CHECK-BASELINE-NEXT: retq
|
||||
;
|
||||
; CHECK-SSE1-LABEL: out_v4i16:
|
||||
; CHECK-SSE1: # %bb.0:
|
||||
; CHECK-SSE1-NEXT: movq %rdi, %rax
|
||||
; CHECK-SSE1-NEXT: movzwl {{[0-9]+}}(%rsp), %edi
|
||||
; CHECK-SSE1-NEXT: movzwl {{[0-9]+}}(%rsp), %r10d
|
||||
; CHECK-SSE1-NEXT: movzwl {{[0-9]+}}(%rsp), %r11d
|
||||
; CHECK-SSE1-NEXT: movl {{[0-9]+}}(%rsp), %edi
|
||||
; CHECK-SSE1-NEXT: movl {{[0-9]+}}(%rsp), %r10d
|
||||
; CHECK-SSE1-NEXT: movl {{[0-9]+}}(%rsp), %r11d
|
||||
; CHECK-SSE1-NEXT: xorl %r9d, %esi
|
||||
; CHECK-SSE1-NEXT: andw {{[0-9]+}}(%rsp), %si
|
||||
; CHECK-SSE1-NEXT: xorl %r9d, %esi
|
||||
; CHECK-SSE1-NEXT: xorl %r11d, %edx
|
||||
; CHECK-SSE1-NEXT: andw {{[0-9]+}}(%rsp), %dx
|
||||
; CHECK-SSE1-NEXT: xorl %r11d, %edx
|
||||
@ -475,13 +468,10 @@ define <4 x i16> @out_v4i16(<4 x i16> %x, <4 x i16> %y, <4 x i16> %mask) nounwin
|
||||
; CHECK-SSE1-NEXT: xorl %edi, %r8d
|
||||
; CHECK-SSE1-NEXT: andw {{[0-9]+}}(%rsp), %r8w
|
||||
; CHECK-SSE1-NEXT: xorl %edi, %r8d
|
||||
; CHECK-SSE1-NEXT: xorl %r9d, %esi
|
||||
; CHECK-SSE1-NEXT: andw {{[0-9]+}}(%rsp), %si
|
||||
; CHECK-SSE1-NEXT: xorl %r9d, %esi
|
||||
; CHECK-SSE1-NEXT: movw %si, (%rax)
|
||||
; CHECK-SSE1-NEXT: movw %r8w, 6(%rax)
|
||||
; CHECK-SSE1-NEXT: movw %cx, 4(%rax)
|
||||
; CHECK-SSE1-NEXT: movw %dx, 2(%rax)
|
||||
; CHECK-SSE1-NEXT: movw %si, (%rax)
|
||||
; CHECK-SSE1-NEXT: retq
|
||||
;
|
||||
; CHECK-SSE2-LABEL: out_v4i16:
|
||||
@ -506,43 +496,43 @@ define <4 x i16> @out_v4i16_undef(<4 x i16> %x, <4 x i16> %y, <4 x i16> %mask) n
|
||||
; CHECK-BASELINE-LABEL: out_v4i16_undef:
|
||||
; CHECK-BASELINE: # %bb.0:
|
||||
; CHECK-BASELINE-NEXT: movq %rdi, %rax
|
||||
; CHECK-BASELINE-NEXT: movzwl {{[0-9]+}}(%rsp), %edi
|
||||
; CHECK-BASELINE-NEXT: movzwl {{[0-9]+}}(%rsp), %r10d
|
||||
; CHECK-BASELINE-NEXT: movl {{[0-9]+}}(%rsp), %edi
|
||||
; CHECK-BASELINE-NEXT: movl {{[0-9]+}}(%rsp), %r10d
|
||||
; CHECK-BASELINE-NEXT: andw {{[0-9]+}}(%rsp), %cx
|
||||
; CHECK-BASELINE-NEXT: xorl %r9d, %esi
|
||||
; CHECK-BASELINE-NEXT: andw {{[0-9]+}}(%rsp), %si
|
||||
; CHECK-BASELINE-NEXT: xorl %r9d, %esi
|
||||
; CHECK-BASELINE-NEXT: xorl %r10d, %edx
|
||||
; CHECK-BASELINE-NEXT: andw {{[0-9]+}}(%rsp), %dx
|
||||
; CHECK-BASELINE-NEXT: xorl %r10d, %edx
|
||||
; CHECK-BASELINE-NEXT: xorl %edi, %r8d
|
||||
; CHECK-BASELINE-NEXT: andw {{[0-9]+}}(%rsp), %r8w
|
||||
; CHECK-BASELINE-NEXT: xorl %edi, %r8d
|
||||
; CHECK-BASELINE-NEXT: xorl %r9d, %esi
|
||||
; CHECK-BASELINE-NEXT: andw {{[0-9]+}}(%rsp), %si
|
||||
; CHECK-BASELINE-NEXT: xorl %r9d, %esi
|
||||
; CHECK-BASELINE-NEXT: movw %cx, 4(%rax)
|
||||
; CHECK-BASELINE-NEXT: movw %si, (%rax)
|
||||
; CHECK-BASELINE-NEXT: movw %r8w, 6(%rax)
|
||||
; CHECK-BASELINE-NEXT: movw %dx, 2(%rax)
|
||||
; CHECK-BASELINE-NEXT: movw %si, (%rax)
|
||||
; CHECK-BASELINE-NEXT: retq
|
||||
;
|
||||
; CHECK-SSE1-LABEL: out_v4i16_undef:
|
||||
; CHECK-SSE1: # %bb.0:
|
||||
; CHECK-SSE1-NEXT: movq %rdi, %rax
|
||||
; CHECK-SSE1-NEXT: movzwl {{[0-9]+}}(%rsp), %edi
|
||||
; CHECK-SSE1-NEXT: movzwl {{[0-9]+}}(%rsp), %r10d
|
||||
; CHECK-SSE1-NEXT: movl {{[0-9]+}}(%rsp), %edi
|
||||
; CHECK-SSE1-NEXT: movl {{[0-9]+}}(%rsp), %r10d
|
||||
; CHECK-SSE1-NEXT: andw {{[0-9]+}}(%rsp), %cx
|
||||
; CHECK-SSE1-NEXT: xorl %r9d, %esi
|
||||
; CHECK-SSE1-NEXT: andw {{[0-9]+}}(%rsp), %si
|
||||
; CHECK-SSE1-NEXT: xorl %r9d, %esi
|
||||
; CHECK-SSE1-NEXT: xorl %r10d, %edx
|
||||
; CHECK-SSE1-NEXT: andw {{[0-9]+}}(%rsp), %dx
|
||||
; CHECK-SSE1-NEXT: xorl %r10d, %edx
|
||||
; CHECK-SSE1-NEXT: xorl %edi, %r8d
|
||||
; CHECK-SSE1-NEXT: andw {{[0-9]+}}(%rsp), %r8w
|
||||
; CHECK-SSE1-NEXT: xorl %edi, %r8d
|
||||
; CHECK-SSE1-NEXT: xorl %r9d, %esi
|
||||
; CHECK-SSE1-NEXT: andw {{[0-9]+}}(%rsp), %si
|
||||
; CHECK-SSE1-NEXT: xorl %r9d, %esi
|
||||
; CHECK-SSE1-NEXT: movw %cx, 4(%rax)
|
||||
; CHECK-SSE1-NEXT: movw %si, (%rax)
|
||||
; CHECK-SSE1-NEXT: movw %r8w, 6(%rax)
|
||||
; CHECK-SSE1-NEXT: movw %dx, 2(%rax)
|
||||
; CHECK-SSE1-NEXT: movw %si, (%rax)
|
||||
; CHECK-SSE1-NEXT: retq
|
||||
;
|
||||
; CHECK-SSE2-LABEL: out_v4i16_undef:
|
||||
@ -883,14 +873,14 @@ define <8 x i16> @out_v8i16(<8 x i16> %x, <8 x i16> %y, <8 x i16> %mask) nounwin
|
||||
; CHECK-BASELINE-NEXT: pushq %r12
|
||||
; CHECK-BASELINE-NEXT: pushq %rbx
|
||||
; CHECK-BASELINE-NEXT: movq %rdi, %rax
|
||||
; CHECK-BASELINE-NEXT: movzwl {{[0-9]+}}(%rsp), %edi
|
||||
; CHECK-BASELINE-NEXT: movzwl {{[0-9]+}}(%rsp), %r10d
|
||||
; CHECK-BASELINE-NEXT: movzwl {{[0-9]+}}(%rsp), %r11d
|
||||
; CHECK-BASELINE-NEXT: movzwl {{[0-9]+}}(%rsp), %ebx
|
||||
; CHECK-BASELINE-NEXT: movzwl {{[0-9]+}}(%rsp), %ebp
|
||||
; CHECK-BASELINE-NEXT: movzwl {{[0-9]+}}(%rsp), %r14d
|
||||
; CHECK-BASELINE-NEXT: movzwl {{[0-9]+}}(%rsp), %r15d
|
||||
; CHECK-BASELINE-NEXT: movzwl {{[0-9]+}}(%rsp), %r12d
|
||||
; CHECK-BASELINE-NEXT: movl {{[0-9]+}}(%rsp), %edi
|
||||
; CHECK-BASELINE-NEXT: movl {{[0-9]+}}(%rsp), %r10d
|
||||
; CHECK-BASELINE-NEXT: movl {{[0-9]+}}(%rsp), %r11d
|
||||
; CHECK-BASELINE-NEXT: movl {{[0-9]+}}(%rsp), %ebx
|
||||
; CHECK-BASELINE-NEXT: movl {{[0-9]+}}(%rsp), %ebp
|
||||
; CHECK-BASELINE-NEXT: movl {{[0-9]+}}(%rsp), %r14d
|
||||
; CHECK-BASELINE-NEXT: movl {{[0-9]+}}(%rsp), %r15d
|
||||
; CHECK-BASELINE-NEXT: movl {{[0-9]+}}(%rsp), %r12d
|
||||
; CHECK-BASELINE-NEXT: xorl %r12d, %esi
|
||||
; CHECK-BASELINE-NEXT: andw {{[0-9]+}}(%rsp), %si
|
||||
; CHECK-BASELINE-NEXT: xorl %r12d, %esi
|
||||
@ -906,16 +896,16 @@ define <8 x i16> @out_v8i16(<8 x i16> %x, <8 x i16> %y, <8 x i16> %mask) nounwin
|
||||
; CHECK-BASELINE-NEXT: xorl %ebx, %r9d
|
||||
; CHECK-BASELINE-NEXT: andw {{[0-9]+}}(%rsp), %r9w
|
||||
; CHECK-BASELINE-NEXT: xorl %ebx, %r9d
|
||||
; CHECK-BASELINE-NEXT: movl %r11d, %ebx
|
||||
; CHECK-BASELINE-NEXT: xorw {{[0-9]+}}(%rsp), %bx
|
||||
; CHECK-BASELINE-NEXT: movzwl {{[0-9]+}}(%rsp), %ebx
|
||||
; CHECK-BASELINE-NEXT: xorw %r11w, %bx
|
||||
; CHECK-BASELINE-NEXT: andw {{[0-9]+}}(%rsp), %bx
|
||||
; CHECK-BASELINE-NEXT: xorl %r11d, %ebx
|
||||
; CHECK-BASELINE-NEXT: movl %r10d, %r11d
|
||||
; CHECK-BASELINE-NEXT: xorw {{[0-9]+}}(%rsp), %r11w
|
||||
; CHECK-BASELINE-NEXT: movzwl {{[0-9]+}}(%rsp), %r11d
|
||||
; CHECK-BASELINE-NEXT: xorw %r10w, %r11w
|
||||
; CHECK-BASELINE-NEXT: andw {{[0-9]+}}(%rsp), %r11w
|
||||
; CHECK-BASELINE-NEXT: xorl %r10d, %r11d
|
||||
; CHECK-BASELINE-NEXT: movl %edi, %r10d
|
||||
; CHECK-BASELINE-NEXT: xorw {{[0-9]+}}(%rsp), %r10w
|
||||
; CHECK-BASELINE-NEXT: movzwl {{[0-9]+}}(%rsp), %r10d
|
||||
; CHECK-BASELINE-NEXT: xorw %di, %r10w
|
||||
; CHECK-BASELINE-NEXT: andw {{[0-9]+}}(%rsp), %r10w
|
||||
; CHECK-BASELINE-NEXT: xorl %edi, %r10d
|
||||
; CHECK-BASELINE-NEXT: movw %r10w, 14(%rax)
|
||||
@ -941,14 +931,14 @@ define <8 x i16> @out_v8i16(<8 x i16> %x, <8 x i16> %y, <8 x i16> %mask) nounwin
|
||||
; CHECK-SSE1-NEXT: pushq %r12
|
||||
; CHECK-SSE1-NEXT: pushq %rbx
|
||||
; CHECK-SSE1-NEXT: movq %rdi, %rax
|
||||
; CHECK-SSE1-NEXT: movzwl {{[0-9]+}}(%rsp), %edi
|
||||
; CHECK-SSE1-NEXT: movzwl {{[0-9]+}}(%rsp), %r10d
|
||||
; CHECK-SSE1-NEXT: movzwl {{[0-9]+}}(%rsp), %r11d
|
||||
; CHECK-SSE1-NEXT: movzwl {{[0-9]+}}(%rsp), %ebx
|
||||
; CHECK-SSE1-NEXT: movzwl {{[0-9]+}}(%rsp), %ebp
|
||||
; CHECK-SSE1-NEXT: movzwl {{[0-9]+}}(%rsp), %r14d
|
||||
; CHECK-SSE1-NEXT: movzwl {{[0-9]+}}(%rsp), %r15d
|
||||
; CHECK-SSE1-NEXT: movzwl {{[0-9]+}}(%rsp), %r12d
|
||||
; CHECK-SSE1-NEXT: movl {{[0-9]+}}(%rsp), %edi
|
||||
; CHECK-SSE1-NEXT: movl {{[0-9]+}}(%rsp), %r10d
|
||||
; CHECK-SSE1-NEXT: movl {{[0-9]+}}(%rsp), %r11d
|
||||
; CHECK-SSE1-NEXT: movl {{[0-9]+}}(%rsp), %ebx
|
||||
; CHECK-SSE1-NEXT: movl {{[0-9]+}}(%rsp), %ebp
|
||||
; CHECK-SSE1-NEXT: movl {{[0-9]+}}(%rsp), %r14d
|
||||
; CHECK-SSE1-NEXT: movl {{[0-9]+}}(%rsp), %r15d
|
||||
; CHECK-SSE1-NEXT: movl {{[0-9]+}}(%rsp), %r12d
|
||||
; CHECK-SSE1-NEXT: xorl %r12d, %esi
|
||||
; CHECK-SSE1-NEXT: andw {{[0-9]+}}(%rsp), %si
|
||||
; CHECK-SSE1-NEXT: xorl %r12d, %esi
|
||||
@ -964,16 +954,16 @@ define <8 x i16> @out_v8i16(<8 x i16> %x, <8 x i16> %y, <8 x i16> %mask) nounwin
|
||||
; CHECK-SSE1-NEXT: xorl %ebx, %r9d
|
||||
; CHECK-SSE1-NEXT: andw {{[0-9]+}}(%rsp), %r9w
|
||||
; CHECK-SSE1-NEXT: xorl %ebx, %r9d
|
||||
; CHECK-SSE1-NEXT: movl %r11d, %ebx
|
||||
; CHECK-SSE1-NEXT: xorw {{[0-9]+}}(%rsp), %bx
|
||||
; CHECK-SSE1-NEXT: movzwl {{[0-9]+}}(%rsp), %ebx
|
||||
; CHECK-SSE1-NEXT: xorw %r11w, %bx
|
||||
; CHECK-SSE1-NEXT: andw {{[0-9]+}}(%rsp), %bx
|
||||
; CHECK-SSE1-NEXT: xorl %r11d, %ebx
|
||||
; CHECK-SSE1-NEXT: movl %r10d, %r11d
|
||||
; CHECK-SSE1-NEXT: xorw {{[0-9]+}}(%rsp), %r11w
|
||||
; CHECK-SSE1-NEXT: movzwl {{[0-9]+}}(%rsp), %r11d
|
||||
; CHECK-SSE1-NEXT: xorw %r10w, %r11w
|
||||
; CHECK-SSE1-NEXT: andw {{[0-9]+}}(%rsp), %r11w
|
||||
; CHECK-SSE1-NEXT: xorl %r10d, %r11d
|
||||
; CHECK-SSE1-NEXT: movl %edi, %r10d
|
||||
; CHECK-SSE1-NEXT: xorw {{[0-9]+}}(%rsp), %r10w
|
||||
; CHECK-SSE1-NEXT: movzwl {{[0-9]+}}(%rsp), %r10d
|
||||
; CHECK-SSE1-NEXT: xorw %di, %r10w
|
||||
; CHECK-SSE1-NEXT: andw {{[0-9]+}}(%rsp), %r10w
|
||||
; CHECK-SSE1-NEXT: xorl %edi, %r10d
|
||||
; CHECK-SSE1-NEXT: movw %r10w, 14(%rax)
|
||||
@ -1759,113 +1749,117 @@ define <16 x i16> @out_v16i16(ptr%px, ptr%py, ptr%pmask) nounwind {
|
||||
; CHECK-BASELINE-NEXT: pushq %r13
|
||||
; CHECK-BASELINE-NEXT: pushq %r12
|
||||
; CHECK-BASELINE-NEXT: pushq %rbx
|
||||
; CHECK-BASELINE-NEXT: movzwl 18(%rdx), %r15d
|
||||
; CHECK-BASELINE-NEXT: movzwl 16(%rdx), %r14d
|
||||
; CHECK-BASELINE-NEXT: movzwl 14(%rdx), %ebp
|
||||
; CHECK-BASELINE-NEXT: movzwl 12(%rdx), %ebx
|
||||
; CHECK-BASELINE-NEXT: movzwl 10(%rdx), %r13d
|
||||
; CHECK-BASELINE-NEXT: movzwl 8(%rdx), %r11d
|
||||
; CHECK-BASELINE-NEXT: movzwl 6(%rdx), %r10d
|
||||
; CHECK-BASELINE-NEXT: movzwl 4(%rdx), %r9d
|
||||
; CHECK-BASELINE-NEXT: movzwl (%rdx), %r8d
|
||||
; CHECK-BASELINE-NEXT: movzwl 2(%rdx), %r12d
|
||||
; CHECK-BASELINE-NEXT: movzwl (%rsi), %eax
|
||||
; CHECK-BASELINE-NEXT: xorw %r8w, %ax
|
||||
; CHECK-BASELINE-NEXT: andw (%rcx), %ax
|
||||
; CHECK-BASELINE-NEXT: xorl %eax, %r8d
|
||||
; CHECK-BASELINE-NEXT: movl %r8d, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
|
||||
; CHECK-BASELINE-NEXT: movzwl 2(%rsi), %eax
|
||||
; CHECK-BASELINE-NEXT: xorw %r12w, %ax
|
||||
; CHECK-BASELINE-NEXT: andw 2(%rcx), %ax
|
||||
; CHECK-BASELINE-NEXT: xorl %eax, %r12d
|
||||
; CHECK-BASELINE-NEXT: movzwl 4(%rsi), %eax
|
||||
; CHECK-BASELINE-NEXT: xorw %r9w, %ax
|
||||
; CHECK-BASELINE-NEXT: andw 4(%rcx), %ax
|
||||
; CHECK-BASELINE-NEXT: xorl %eax, %r9d
|
||||
; CHECK-BASELINE-NEXT: movl %r9d, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
|
||||
; CHECK-BASELINE-NEXT: movzwl 6(%rsi), %eax
|
||||
; CHECK-BASELINE-NEXT: xorw %r10w, %ax
|
||||
; CHECK-BASELINE-NEXT: andw 6(%rcx), %ax
|
||||
; CHECK-BASELINE-NEXT: xorl %eax, %r10d
|
||||
; CHECK-BASELINE-NEXT: movl %r10d, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
|
||||
; CHECK-BASELINE-NEXT: movzwl 8(%rsi), %eax
|
||||
; CHECK-BASELINE-NEXT: xorw %r11w, %ax
|
||||
; CHECK-BASELINE-NEXT: andw 8(%rcx), %ax
|
||||
; CHECK-BASELINE-NEXT: xorl %eax, %r11d
|
||||
; CHECK-BASELINE-NEXT: movl %r11d, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
|
||||
; CHECK-BASELINE-NEXT: movzwl 10(%rsi), %eax
|
||||
; CHECK-BASELINE-NEXT: xorw %r13w, %ax
|
||||
; CHECK-BASELINE-NEXT: andw 10(%rcx), %ax
|
||||
; CHECK-BASELINE-NEXT: xorl %eax, %r13d
|
||||
; CHECK-BASELINE-NEXT: movl %r13d, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
|
||||
; CHECK-BASELINE-NEXT: movzwl 12(%rsi), %eax
|
||||
; CHECK-BASELINE-NEXT: movq %rcx, %r10
|
||||
; CHECK-BASELINE-NEXT: movq %rdx, %r8
|
||||
; CHECK-BASELINE-NEXT: movq %rsi, %r9
|
||||
; CHECK-BASELINE-NEXT: movq %rdi, %r11
|
||||
; CHECK-BASELINE-NEXT: movzwl 18(%rdx), %ebp
|
||||
; CHECK-BASELINE-NEXT: movl 16(%rdx), %r15d
|
||||
; CHECK-BASELINE-NEXT: movzwl 14(%rdx), %r13d
|
||||
; CHECK-BASELINE-NEXT: movl 12(%rdx), %r12d
|
||||
; CHECK-BASELINE-NEXT: movzwl 10(%rdx), %r14d
|
||||
; CHECK-BASELINE-NEXT: movl 8(%rdx), %ebx
|
||||
; CHECK-BASELINE-NEXT: movzwl 6(%rdx), %eax
|
||||
; CHECK-BASELINE-NEXT: movl (%rdx), %ecx
|
||||
; CHECK-BASELINE-NEXT: movl 4(%rdx), %edx
|
||||
; CHECK-BASELINE-NEXT: movzwl 2(%r8), %esi
|
||||
; CHECK-BASELINE-NEXT: movzwl (%r9), %edi
|
||||
; CHECK-BASELINE-NEXT: xorw %cx, %di
|
||||
; CHECK-BASELINE-NEXT: andw (%r10), %di
|
||||
; CHECK-BASELINE-NEXT: xorl %ecx, %edi
|
||||
; CHECK-BASELINE-NEXT: movl %edi, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
|
||||
; CHECK-BASELINE-NEXT: movzwl 2(%r9), %ecx
|
||||
; CHECK-BASELINE-NEXT: xorw %si, %cx
|
||||
; CHECK-BASELINE-NEXT: andw 2(%r10), %cx
|
||||
; CHECK-BASELINE-NEXT: xorl %esi, %ecx
|
||||
; CHECK-BASELINE-NEXT: movl %ecx, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
|
||||
; CHECK-BASELINE-NEXT: movzwl 4(%r9), %ecx
|
||||
; CHECK-BASELINE-NEXT: xorw %dx, %cx
|
||||
; CHECK-BASELINE-NEXT: andw 4(%r10), %cx
|
||||
; CHECK-BASELINE-NEXT: xorl %edx, %ecx
|
||||
; CHECK-BASELINE-NEXT: movl %ecx, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
|
||||
; CHECK-BASELINE-NEXT: movzwl 6(%r9), %ecx
|
||||
; CHECK-BASELINE-NEXT: xorw %ax, %cx
|
||||
; CHECK-BASELINE-NEXT: andw 6(%r10), %cx
|
||||
; CHECK-BASELINE-NEXT: xorl %eax, %ecx
|
||||
; CHECK-BASELINE-NEXT: movl %ecx, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
|
||||
; CHECK-BASELINE-NEXT: movzwl 8(%r9), %eax
|
||||
; CHECK-BASELINE-NEXT: xorw %bx, %ax
|
||||
; CHECK-BASELINE-NEXT: andw 12(%rcx), %ax
|
||||
; CHECK-BASELINE-NEXT: xorl %eax, %ebx
|
||||
; CHECK-BASELINE-NEXT: movzwl 14(%rsi), %eax
|
||||
; CHECK-BASELINE-NEXT: xorw %bp, %ax
|
||||
; CHECK-BASELINE-NEXT: andw 14(%rcx), %ax
|
||||
; CHECK-BASELINE-NEXT: andw 8(%r10), %ax
|
||||
; CHECK-BASELINE-NEXT: xorl %ebx, %eax
|
||||
; CHECK-BASELINE-NEXT: movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
|
||||
; CHECK-BASELINE-NEXT: movzwl 10(%r9), %ebx
|
||||
; CHECK-BASELINE-NEXT: xorw %r14w, %bx
|
||||
; CHECK-BASELINE-NEXT: andw 10(%r10), %bx
|
||||
; CHECK-BASELINE-NEXT: xorl %r14d, %ebx
|
||||
; CHECK-BASELINE-NEXT: movzwl 12(%r9), %r14d
|
||||
; CHECK-BASELINE-NEXT: xorw %r12w, %r14w
|
||||
; CHECK-BASELINE-NEXT: andw 12(%r10), %r14w
|
||||
; CHECK-BASELINE-NEXT: xorl %r12d, %r14d
|
||||
; CHECK-BASELINE-NEXT: movzwl 14(%r9), %r12d
|
||||
; CHECK-BASELINE-NEXT: xorw %r13w, %r12w
|
||||
; CHECK-BASELINE-NEXT: andw 14(%r10), %r12w
|
||||
; CHECK-BASELINE-NEXT: xorl %r13d, %r12d
|
||||
; CHECK-BASELINE-NEXT: movzwl 16(%r9), %r13d
|
||||
; CHECK-BASELINE-NEXT: xorw %r15w, %r13w
|
||||
; CHECK-BASELINE-NEXT: andw 16(%r10), %r13w
|
||||
; CHECK-BASELINE-NEXT: xorl %r15d, %r13d
|
||||
; CHECK-BASELINE-NEXT: movzwl 18(%r9), %r15d
|
||||
; CHECK-BASELINE-NEXT: xorw %bp, %r15w
|
||||
; CHECK-BASELINE-NEXT: andw 18(%r10), %r15w
|
||||
; CHECK-BASELINE-NEXT: xorl %ebp, %r15d
|
||||
; CHECK-BASELINE-NEXT: movl 20(%r8), %eax
|
||||
; CHECK-BASELINE-NEXT: movzwl 20(%r9), %ebp
|
||||
; CHECK-BASELINE-NEXT: xorw %ax, %bp
|
||||
; CHECK-BASELINE-NEXT: andw 20(%r10), %bp
|
||||
; CHECK-BASELINE-NEXT: xorl %eax, %ebp
|
||||
; CHECK-BASELINE-NEXT: movzwl 16(%rsi), %eax
|
||||
; CHECK-BASELINE-NEXT: xorw %r14w, %ax
|
||||
; CHECK-BASELINE-NEXT: andw 16(%rcx), %ax
|
||||
; CHECK-BASELINE-NEXT: xorl %eax, %r14d
|
||||
; CHECK-BASELINE-NEXT: movzwl 18(%rsi), %eax
|
||||
; CHECK-BASELINE-NEXT: xorw %r15w, %ax
|
||||
; CHECK-BASELINE-NEXT: andw 18(%rcx), %ax
|
||||
; CHECK-BASELINE-NEXT: xorl %eax, %r15d
|
||||
; CHECK-BASELINE-NEXT: movzwl 20(%rdx), %r13d
|
||||
; CHECK-BASELINE-NEXT: movzwl 20(%rsi), %eax
|
||||
; CHECK-BASELINE-NEXT: xorw %r13w, %ax
|
||||
; CHECK-BASELINE-NEXT: andw 20(%rcx), %ax
|
||||
; CHECK-BASELINE-NEXT: xorl %eax, %r13d
|
||||
; CHECK-BASELINE-NEXT: movzwl 22(%rdx), %r9d
|
||||
; CHECK-BASELINE-NEXT: movzwl 22(%rsi), %eax
|
||||
; CHECK-BASELINE-NEXT: xorw %r9w, %ax
|
||||
; CHECK-BASELINE-NEXT: andw 22(%rcx), %ax
|
||||
; CHECK-BASELINE-NEXT: xorl %eax, %r9d
|
||||
; CHECK-BASELINE-NEXT: movzwl 24(%rdx), %r8d
|
||||
; CHECK-BASELINE-NEXT: movzwl 24(%rsi), %eax
|
||||
; CHECK-BASELINE-NEXT: xorw %r8w, %ax
|
||||
; CHECK-BASELINE-NEXT: andw 24(%rcx), %ax
|
||||
; CHECK-BASELINE-NEXT: xorl %eax, %r8d
|
||||
; CHECK-BASELINE-NEXT: movzwl 26(%rdx), %eax
|
||||
; CHECK-BASELINE-NEXT: movzwl 26(%rsi), %r10d
|
||||
; CHECK-BASELINE-NEXT: xorw %ax, %r10w
|
||||
; CHECK-BASELINE-NEXT: andw 26(%rcx), %r10w
|
||||
; CHECK-BASELINE-NEXT: xorl %r10d, %eax
|
||||
; CHECK-BASELINE-NEXT: movzwl 28(%rdx), %r10d
|
||||
; CHECK-BASELINE-NEXT: movzwl 28(%rsi), %r11d
|
||||
; CHECK-BASELINE-NEXT: xorw %r10w, %r11w
|
||||
; CHECK-BASELINE-NEXT: andw 28(%rcx), %r11w
|
||||
; CHECK-BASELINE-NEXT: xorl %r11d, %r10d
|
||||
; CHECK-BASELINE-NEXT: movzwl 30(%rdx), %edx
|
||||
; CHECK-BASELINE-NEXT: movzwl 30(%rsi), %esi
|
||||
; CHECK-BASELINE-NEXT: xorw %dx, %si
|
||||
; CHECK-BASELINE-NEXT: andw 30(%rcx), %si
|
||||
; CHECK-BASELINE-NEXT: xorl %esi, %edx
|
||||
; CHECK-BASELINE-NEXT: movw %dx, 30(%rdi)
|
||||
; CHECK-BASELINE-NEXT: movw %r10w, 28(%rdi)
|
||||
; CHECK-BASELINE-NEXT: movw %ax, 26(%rdi)
|
||||
; CHECK-BASELINE-NEXT: movw %r8w, 24(%rdi)
|
||||
; CHECK-BASELINE-NEXT: movw %r9w, 22(%rdi)
|
||||
; CHECK-BASELINE-NEXT: movw %r13w, 20(%rdi)
|
||||
; CHECK-BASELINE-NEXT: movw %r15w, 18(%rdi)
|
||||
; CHECK-BASELINE-NEXT: movw %r14w, 16(%rdi)
|
||||
; CHECK-BASELINE-NEXT: movw %bp, 14(%rdi)
|
||||
; CHECK-BASELINE-NEXT: movw %bx, 12(%rdi)
|
||||
; CHECK-BASELINE-NEXT: movzwl 22(%r8), %eax
|
||||
; CHECK-BASELINE-NEXT: movzwl 22(%r9), %esi
|
||||
; CHECK-BASELINE-NEXT: xorw %ax, %si
|
||||
; CHECK-BASELINE-NEXT: andw 22(%r10), %si
|
||||
; CHECK-BASELINE-NEXT: xorl %eax, %esi
|
||||
; CHECK-BASELINE-NEXT: movl 24(%r8), %eax
|
||||
; CHECK-BASELINE-NEXT: movzwl 24(%r9), %edx
|
||||
; CHECK-BASELINE-NEXT: xorw %ax, %dx
|
||||
; CHECK-BASELINE-NEXT: andw 24(%r10), %dx
|
||||
; CHECK-BASELINE-NEXT: xorl %eax, %edx
|
||||
; CHECK-BASELINE-NEXT: movzwl 26(%r8), %eax
|
||||
; CHECK-BASELINE-NEXT: movzwl 26(%r9), %ecx
|
||||
; CHECK-BASELINE-NEXT: xorw %ax, %cx
|
||||
; CHECK-BASELINE-NEXT: andw 26(%r10), %cx
|
||||
; CHECK-BASELINE-NEXT: xorl %eax, %ecx
|
||||
; CHECK-BASELINE-NEXT: movl 28(%r8), %edi
|
||||
; CHECK-BASELINE-NEXT: movzwl 28(%r9), %eax
|
||||
; CHECK-BASELINE-NEXT: xorw %di, %ax
|
||||
; CHECK-BASELINE-NEXT: andw 28(%r10), %ax
|
||||
; CHECK-BASELINE-NEXT: xorl %edi, %eax
|
||||
; CHECK-BASELINE-NEXT: movzwl 30(%r8), %edi
|
||||
; CHECK-BASELINE-NEXT: movzwl 30(%r9), %r8d
|
||||
; CHECK-BASELINE-NEXT: xorw %di, %r8w
|
||||
; CHECK-BASELINE-NEXT: andw 30(%r10), %r8w
|
||||
; CHECK-BASELINE-NEXT: xorl %edi, %r8d
|
||||
; CHECK-BASELINE-NEXT: movw %r8w, 30(%r11)
|
||||
; CHECK-BASELINE-NEXT: movw %ax, 28(%r11)
|
||||
; CHECK-BASELINE-NEXT: movw %cx, 26(%r11)
|
||||
; CHECK-BASELINE-NEXT: movw %dx, 24(%r11)
|
||||
; CHECK-BASELINE-NEXT: movw %si, 22(%r11)
|
||||
; CHECK-BASELINE-NEXT: movw %bp, 20(%r11)
|
||||
; CHECK-BASELINE-NEXT: movw %r15w, 18(%r11)
|
||||
; CHECK-BASELINE-NEXT: movw %r13w, 16(%r11)
|
||||
; CHECK-BASELINE-NEXT: movw %r12w, 14(%r11)
|
||||
; CHECK-BASELINE-NEXT: movw %r14w, 12(%r11)
|
||||
; CHECK-BASELINE-NEXT: movw %bx, 10(%r11)
|
||||
; CHECK-BASELINE-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Reload
|
||||
; CHECK-BASELINE-NEXT: movw %ax, 10(%rdi)
|
||||
; CHECK-BASELINE-NEXT: movw %ax, 8(%r11)
|
||||
; CHECK-BASELINE-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Reload
|
||||
; CHECK-BASELINE-NEXT: movw %ax, 8(%rdi)
|
||||
; CHECK-BASELINE-NEXT: movw %ax, 6(%r11)
|
||||
; CHECK-BASELINE-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Reload
|
||||
; CHECK-BASELINE-NEXT: movw %ax, 6(%rdi)
|
||||
; CHECK-BASELINE-NEXT: movw %ax, 4(%r11)
|
||||
; CHECK-BASELINE-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Reload
|
||||
; CHECK-BASELINE-NEXT: movw %ax, 4(%rdi)
|
||||
; CHECK-BASELINE-NEXT: movw %r12w, 2(%rdi)
|
||||
; CHECK-BASELINE-NEXT: movw %ax, 2(%r11)
|
||||
; CHECK-BASELINE-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Reload
|
||||
; CHECK-BASELINE-NEXT: movw %ax, (%rdi)
|
||||
; CHECK-BASELINE-NEXT: movq %rdi, %rax
|
||||
; CHECK-BASELINE-NEXT: movw %ax, (%r11)
|
||||
; CHECK-BASELINE-NEXT: movq %r11, %rax
|
||||
; CHECK-BASELINE-NEXT: popq %rbx
|
||||
; CHECK-BASELINE-NEXT: popq %r12
|
||||
; CHECK-BASELINE-NEXT: popq %r13
|
||||
@ -1882,113 +1876,117 @@ define <16 x i16> @out_v16i16(ptr%px, ptr%py, ptr%pmask) nounwind {
|
||||
; CHECK-SSE1-NEXT: pushq %r13
|
||||
; CHECK-SSE1-NEXT: pushq %r12
|
||||
; CHECK-SSE1-NEXT: pushq %rbx
|
||||
; CHECK-SSE1-NEXT: movzwl 18(%rdx), %r15d
|
||||
; CHECK-SSE1-NEXT: movzwl 16(%rdx), %r14d
|
||||
; CHECK-SSE1-NEXT: movzwl 14(%rdx), %ebp
|
||||
; CHECK-SSE1-NEXT: movzwl 12(%rdx), %ebx
|
||||
; CHECK-SSE1-NEXT: movzwl 10(%rdx), %r13d
|
||||
; CHECK-SSE1-NEXT: movzwl 8(%rdx), %r11d
|
||||
; CHECK-SSE1-NEXT: movzwl 6(%rdx), %r10d
|
||||
; CHECK-SSE1-NEXT: movzwl 4(%rdx), %r9d
|
||||
; CHECK-SSE1-NEXT: movzwl (%rdx), %r8d
|
||||
; CHECK-SSE1-NEXT: movzwl 2(%rdx), %r12d
|
||||
; CHECK-SSE1-NEXT: movzwl (%rsi), %eax
|
||||
; CHECK-SSE1-NEXT: xorw %r8w, %ax
|
||||
; CHECK-SSE1-NEXT: andw (%rcx), %ax
|
||||
; CHECK-SSE1-NEXT: xorl %eax, %r8d
|
||||
; CHECK-SSE1-NEXT: movl %r8d, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
|
||||
; CHECK-SSE1-NEXT: movzwl 2(%rsi), %eax
|
||||
; CHECK-SSE1-NEXT: xorw %r12w, %ax
|
||||
; CHECK-SSE1-NEXT: andw 2(%rcx), %ax
|
||||
; CHECK-SSE1-NEXT: xorl %eax, %r12d
|
||||
; CHECK-SSE1-NEXT: movzwl 4(%rsi), %eax
|
||||
; CHECK-SSE1-NEXT: xorw %r9w, %ax
|
||||
; CHECK-SSE1-NEXT: andw 4(%rcx), %ax
|
||||
; CHECK-SSE1-NEXT: xorl %eax, %r9d
|
||||
; CHECK-SSE1-NEXT: movl %r9d, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
|
||||
; CHECK-SSE1-NEXT: movzwl 6(%rsi), %eax
|
||||
; CHECK-SSE1-NEXT: xorw %r10w, %ax
|
||||
; CHECK-SSE1-NEXT: andw 6(%rcx), %ax
|
||||
; CHECK-SSE1-NEXT: xorl %eax, %r10d
|
||||
; CHECK-SSE1-NEXT: movl %r10d, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
|
||||
; CHECK-SSE1-NEXT: movzwl 8(%rsi), %eax
|
||||
; CHECK-SSE1-NEXT: xorw %r11w, %ax
|
||||
; CHECK-SSE1-NEXT: andw 8(%rcx), %ax
|
||||
; CHECK-SSE1-NEXT: xorl %eax, %r11d
|
||||
; CHECK-SSE1-NEXT: movl %r11d, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
|
||||
; CHECK-SSE1-NEXT: movzwl 10(%rsi), %eax
|
||||
; CHECK-SSE1-NEXT: xorw %r13w, %ax
|
||||
; CHECK-SSE1-NEXT: andw 10(%rcx), %ax
|
||||
; CHECK-SSE1-NEXT: xorl %eax, %r13d
|
||||
; CHECK-SSE1-NEXT: movl %r13d, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
|
||||
; CHECK-SSE1-NEXT: movzwl 12(%rsi), %eax
|
||||
; CHECK-SSE1-NEXT: movq %rcx, %r10
|
||||
; CHECK-SSE1-NEXT: movq %rdx, %r8
|
||||
; CHECK-SSE1-NEXT: movq %rsi, %r9
|
||||
; CHECK-SSE1-NEXT: movq %rdi, %r11
|
||||
; CHECK-SSE1-NEXT: movzwl 18(%rdx), %ebp
|
||||
; CHECK-SSE1-NEXT: movl 16(%rdx), %r15d
|
||||
; CHECK-SSE1-NEXT: movzwl 14(%rdx), %r13d
|
||||
; CHECK-SSE1-NEXT: movl 12(%rdx), %r12d
|
||||
; CHECK-SSE1-NEXT: movzwl 10(%rdx), %r14d
|
||||
; CHECK-SSE1-NEXT: movl 8(%rdx), %ebx
|
||||
; CHECK-SSE1-NEXT: movzwl 6(%rdx), %eax
|
||||
; CHECK-SSE1-NEXT: movl (%rdx), %ecx
|
||||
; CHECK-SSE1-NEXT: movl 4(%rdx), %edx
|
||||
; CHECK-SSE1-NEXT: movzwl 2(%r8), %esi
|
||||
; CHECK-SSE1-NEXT: movzwl (%r9), %edi
|
||||
; CHECK-SSE1-NEXT: xorw %cx, %di
|
||||
; CHECK-SSE1-NEXT: andw (%r10), %di
|
||||
; CHECK-SSE1-NEXT: xorl %ecx, %edi
|
||||
; CHECK-SSE1-NEXT: movl %edi, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
|
||||
; CHECK-SSE1-NEXT: movzwl 2(%r9), %ecx
|
||||
; CHECK-SSE1-NEXT: xorw %si, %cx
|
||||
; CHECK-SSE1-NEXT: andw 2(%r10), %cx
|
||||
; CHECK-SSE1-NEXT: xorl %esi, %ecx
|
||||
; CHECK-SSE1-NEXT: movl %ecx, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
|
||||
; CHECK-SSE1-NEXT: movzwl 4(%r9), %ecx
|
||||
; CHECK-SSE1-NEXT: xorw %dx, %cx
|
||||
; CHECK-SSE1-NEXT: andw 4(%r10), %cx
|
||||
; CHECK-SSE1-NEXT: xorl %edx, %ecx
|
||||
; CHECK-SSE1-NEXT: movl %ecx, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
|
||||
; CHECK-SSE1-NEXT: movzwl 6(%r9), %ecx
|
||||
; CHECK-SSE1-NEXT: xorw %ax, %cx
|
||||
; CHECK-SSE1-NEXT: andw 6(%r10), %cx
|
||||
; CHECK-SSE1-NEXT: xorl %eax, %ecx
|
||||
; CHECK-SSE1-NEXT: movl %ecx, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
|
||||
; CHECK-SSE1-NEXT: movzwl 8(%r9), %eax
|
||||
; CHECK-SSE1-NEXT: xorw %bx, %ax
|
||||
; CHECK-SSE1-NEXT: andw 12(%rcx), %ax
|
||||
; CHECK-SSE1-NEXT: xorl %eax, %ebx
|
||||
; CHECK-SSE1-NEXT: movzwl 14(%rsi), %eax
|
||||
; CHECK-SSE1-NEXT: xorw %bp, %ax
|
||||
; CHECK-SSE1-NEXT: andw 14(%rcx), %ax
|
||||
; CHECK-SSE1-NEXT: andw 8(%r10), %ax
|
||||
; CHECK-SSE1-NEXT: xorl %ebx, %eax
|
||||
; CHECK-SSE1-NEXT: movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
|
||||
; CHECK-SSE1-NEXT: movzwl 10(%r9), %ebx
|
||||
; CHECK-SSE1-NEXT: xorw %r14w, %bx
|
||||
; CHECK-SSE1-NEXT: andw 10(%r10), %bx
|
||||
; CHECK-SSE1-NEXT: xorl %r14d, %ebx
|
||||
; CHECK-SSE1-NEXT: movzwl 12(%r9), %r14d
|
||||
; CHECK-SSE1-NEXT: xorw %r12w, %r14w
|
||||
; CHECK-SSE1-NEXT: andw 12(%r10), %r14w
|
||||
; CHECK-SSE1-NEXT: xorl %r12d, %r14d
|
||||
; CHECK-SSE1-NEXT: movzwl 14(%r9), %r12d
|
||||
; CHECK-SSE1-NEXT: xorw %r13w, %r12w
|
||||
; CHECK-SSE1-NEXT: andw 14(%r10), %r12w
|
||||
; CHECK-SSE1-NEXT: xorl %r13d, %r12d
|
||||
; CHECK-SSE1-NEXT: movzwl 16(%r9), %r13d
|
||||
; CHECK-SSE1-NEXT: xorw %r15w, %r13w
|
||||
; CHECK-SSE1-NEXT: andw 16(%r10), %r13w
|
||||
; CHECK-SSE1-NEXT: xorl %r15d, %r13d
|
||||
; CHECK-SSE1-NEXT: movzwl 18(%r9), %r15d
|
||||
; CHECK-SSE1-NEXT: xorw %bp, %r15w
|
||||
; CHECK-SSE1-NEXT: andw 18(%r10), %r15w
|
||||
; CHECK-SSE1-NEXT: xorl %ebp, %r15d
|
||||
; CHECK-SSE1-NEXT: movl 20(%r8), %eax
|
||||
; CHECK-SSE1-NEXT: movzwl 20(%r9), %ebp
|
||||
; CHECK-SSE1-NEXT: xorw %ax, %bp
|
||||
; CHECK-SSE1-NEXT: andw 20(%r10), %bp
|
||||
; CHECK-SSE1-NEXT: xorl %eax, %ebp
|
||||
; CHECK-SSE1-NEXT: movzwl 16(%rsi), %eax
|
||||
; CHECK-SSE1-NEXT: xorw %r14w, %ax
|
||||
; CHECK-SSE1-NEXT: andw 16(%rcx), %ax
|
||||
; CHECK-SSE1-NEXT: xorl %eax, %r14d
|
||||
; CHECK-SSE1-NEXT: movzwl 18(%rsi), %eax
|
||||
; CHECK-SSE1-NEXT: xorw %r15w, %ax
|
||||
; CHECK-SSE1-NEXT: andw 18(%rcx), %ax
|
||||
; CHECK-SSE1-NEXT: xorl %eax, %r15d
|
||||
; CHECK-SSE1-NEXT: movzwl 20(%rdx), %r13d
|
||||
; CHECK-SSE1-NEXT: movzwl 20(%rsi), %eax
|
||||
; CHECK-SSE1-NEXT: xorw %r13w, %ax
|
||||
; CHECK-SSE1-NEXT: andw 20(%rcx), %ax
|
||||
; CHECK-SSE1-NEXT: xorl %eax, %r13d
|
||||
; CHECK-SSE1-NEXT: movzwl 22(%rdx), %r9d
|
||||
; CHECK-SSE1-NEXT: movzwl 22(%rsi), %eax
|
||||
; CHECK-SSE1-NEXT: xorw %r9w, %ax
|
||||
; CHECK-SSE1-NEXT: andw 22(%rcx), %ax
|
||||
; CHECK-SSE1-NEXT: xorl %eax, %r9d
|
||||
; CHECK-SSE1-NEXT: movzwl 24(%rdx), %r8d
|
||||
; CHECK-SSE1-NEXT: movzwl 24(%rsi), %eax
|
||||
; CHECK-SSE1-NEXT: xorw %r8w, %ax
|
||||
; CHECK-SSE1-NEXT: andw 24(%rcx), %ax
|
||||
; CHECK-SSE1-NEXT: xorl %eax, %r8d
|
||||
; CHECK-SSE1-NEXT: movzwl 26(%rdx), %eax
|
||||
; CHECK-SSE1-NEXT: movzwl 26(%rsi), %r10d
|
||||
; CHECK-SSE1-NEXT: xorw %ax, %r10w
|
||||
; CHECK-SSE1-NEXT: andw 26(%rcx), %r10w
|
||||
; CHECK-SSE1-NEXT: xorl %r10d, %eax
|
||||
; CHECK-SSE1-NEXT: movzwl 28(%rdx), %r10d
|
||||
; CHECK-SSE1-NEXT: movzwl 28(%rsi), %r11d
|
||||
; CHECK-SSE1-NEXT: xorw %r10w, %r11w
|
||||
; CHECK-SSE1-NEXT: andw 28(%rcx), %r11w
|
||||
; CHECK-SSE1-NEXT: xorl %r11d, %r10d
|
||||
; CHECK-SSE1-NEXT: movzwl 30(%rdx), %edx
|
||||
; CHECK-SSE1-NEXT: movzwl 30(%rsi), %esi
|
||||
; CHECK-SSE1-NEXT: xorw %dx, %si
|
||||
; CHECK-SSE1-NEXT: andw 30(%rcx), %si
|
||||
; CHECK-SSE1-NEXT: xorl %esi, %edx
|
||||
; CHECK-SSE1-NEXT: movw %dx, 30(%rdi)
|
||||
; CHECK-SSE1-NEXT: movw %r10w, 28(%rdi)
|
||||
; CHECK-SSE1-NEXT: movw %ax, 26(%rdi)
|
||||
; CHECK-SSE1-NEXT: movw %r8w, 24(%rdi)
|
||||
; CHECK-SSE1-NEXT: movw %r9w, 22(%rdi)
|
||||
; CHECK-SSE1-NEXT: movw %r13w, 20(%rdi)
|
||||
; CHECK-SSE1-NEXT: movw %r15w, 18(%rdi)
|
||||
; CHECK-SSE1-NEXT: movw %r14w, 16(%rdi)
|
||||
; CHECK-SSE1-NEXT: movw %bp, 14(%rdi)
|
||||
; CHECK-SSE1-NEXT: movw %bx, 12(%rdi)
|
||||
; CHECK-SSE1-NEXT: movzwl 22(%r8), %eax
|
||||
; CHECK-SSE1-NEXT: movzwl 22(%r9), %esi
|
||||
; CHECK-SSE1-NEXT: xorw %ax, %si
|
||||
; CHECK-SSE1-NEXT: andw 22(%r10), %si
|
||||
; CHECK-SSE1-NEXT: xorl %eax, %esi
|
||||
; CHECK-SSE1-NEXT: movl 24(%r8), %eax
|
||||
; CHECK-SSE1-NEXT: movzwl 24(%r9), %edx
|
||||
; CHECK-SSE1-NEXT: xorw %ax, %dx
|
||||
; CHECK-SSE1-NEXT: andw 24(%r10), %dx
|
||||
; CHECK-SSE1-NEXT: xorl %eax, %edx
|
||||
; CHECK-SSE1-NEXT: movzwl 26(%r8), %eax
|
||||
; CHECK-SSE1-NEXT: movzwl 26(%r9), %ecx
|
||||
; CHECK-SSE1-NEXT: xorw %ax, %cx
|
||||
; CHECK-SSE1-NEXT: andw 26(%r10), %cx
|
||||
; CHECK-SSE1-NEXT: xorl %eax, %ecx
|
||||
; CHECK-SSE1-NEXT: movl 28(%r8), %edi
|
||||
; CHECK-SSE1-NEXT: movzwl 28(%r9), %eax
|
||||
; CHECK-SSE1-NEXT: xorw %di, %ax
|
||||
; CHECK-SSE1-NEXT: andw 28(%r10), %ax
|
||||
; CHECK-SSE1-NEXT: xorl %edi, %eax
|
||||
; CHECK-SSE1-NEXT: movzwl 30(%r8), %edi
|
||||
; CHECK-SSE1-NEXT: movzwl 30(%r9), %r8d
|
||||
; CHECK-SSE1-NEXT: xorw %di, %r8w
|
||||
; CHECK-SSE1-NEXT: andw 30(%r10), %r8w
|
||||
; CHECK-SSE1-NEXT: xorl %edi, %r8d
|
||||
; CHECK-SSE1-NEXT: movw %r8w, 30(%r11)
|
||||
; CHECK-SSE1-NEXT: movw %ax, 28(%r11)
|
||||
; CHECK-SSE1-NEXT: movw %cx, 26(%r11)
|
||||
; CHECK-SSE1-NEXT: movw %dx, 24(%r11)
|
||||
; CHECK-SSE1-NEXT: movw %si, 22(%r11)
|
||||
; CHECK-SSE1-NEXT: movw %bp, 20(%r11)
|
||||
; CHECK-SSE1-NEXT: movw %r15w, 18(%r11)
|
||||
; CHECK-SSE1-NEXT: movw %r13w, 16(%r11)
|
||||
; CHECK-SSE1-NEXT: movw %r12w, 14(%r11)
|
||||
; CHECK-SSE1-NEXT: movw %r14w, 12(%r11)
|
||||
; CHECK-SSE1-NEXT: movw %bx, 10(%r11)
|
||||
; CHECK-SSE1-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Reload
|
||||
; CHECK-SSE1-NEXT: movw %ax, 10(%rdi)
|
||||
; CHECK-SSE1-NEXT: movw %ax, 8(%r11)
|
||||
; CHECK-SSE1-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Reload
|
||||
; CHECK-SSE1-NEXT: movw %ax, 8(%rdi)
|
||||
; CHECK-SSE1-NEXT: movw %ax, 6(%r11)
|
||||
; CHECK-SSE1-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Reload
|
||||
; CHECK-SSE1-NEXT: movw %ax, 6(%rdi)
|
||||
; CHECK-SSE1-NEXT: movw %ax, 4(%r11)
|
||||
; CHECK-SSE1-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Reload
|
||||
; CHECK-SSE1-NEXT: movw %ax, 4(%rdi)
|
||||
; CHECK-SSE1-NEXT: movw %r12w, 2(%rdi)
|
||||
; CHECK-SSE1-NEXT: movw %ax, 2(%r11)
|
||||
; CHECK-SSE1-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Reload
|
||||
; CHECK-SSE1-NEXT: movw %ax, (%rdi)
|
||||
; CHECK-SSE1-NEXT: movq %rdi, %rax
|
||||
; CHECK-SSE1-NEXT: movw %ax, (%r11)
|
||||
; CHECK-SSE1-NEXT: movq %r11, %rax
|
||||
; CHECK-SSE1-NEXT: popq %rbx
|
||||
; CHECK-SSE1-NEXT: popq %r12
|
||||
; CHECK-SSE1-NEXT: popq %r13
|
||||
|
Loading…
x
Reference in New Issue
Block a user