diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index dc5c5f38e3bd..8136f1794775 100644 --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -649,14 +649,15 @@ namespace { bool DemandHighBits = true); SDValue MatchBSwapHWord(SDNode *N, SDValue N0, SDValue N1); SDValue MatchRotatePosNeg(SDValue Shifted, SDValue Pos, SDValue Neg, - SDValue InnerPos, SDValue InnerNeg, bool HasPos, - unsigned PosOpcode, unsigned NegOpcode, - const SDLoc &DL); + SDValue InnerPos, SDValue InnerNeg, bool FromAdd, + bool HasPos, unsigned PosOpcode, + unsigned NegOpcode, const SDLoc &DL); SDValue MatchFunnelPosNeg(SDValue N0, SDValue N1, SDValue Pos, SDValue Neg, - SDValue InnerPos, SDValue InnerNeg, bool HasPos, - unsigned PosOpcode, unsigned NegOpcode, - const SDLoc &DL); - SDValue MatchRotate(SDValue LHS, SDValue RHS, const SDLoc &DL); + SDValue InnerPos, SDValue InnerNeg, bool FromAdd, + bool HasPos, unsigned PosOpcode, + unsigned NegOpcode, const SDLoc &DL); + SDValue MatchRotate(SDValue LHS, SDValue RHS, const SDLoc &DL, + bool FromAdd); SDValue MatchLoadCombine(SDNode *N); SDValue mergeTruncStores(StoreSDNode *N); SDValue reduceLoadWidth(SDNode *N); @@ -2986,6 +2987,9 @@ SDValue DAGCombiner::visitADD(SDNode *N) { if (SDValue V = foldAddSubOfSignBit(N, DL, DAG)) return V; + if (SDValue V = MatchRotate(N0, N1, SDLoc(N), /*FromAdd=*/true)) + return V; + // Try to match AVGFLOOR fixedwidth pattern if (SDValue V = foldAddToAvg(N, DL)) return V; @@ -8175,7 +8179,7 @@ SDValue DAGCombiner::visitOR(SDNode *N) { return V; // See if this is some rotate idiom. - if (SDValue Rot = MatchRotate(N0, N1, DL)) + if (SDValue Rot = MatchRotate(N0, N1, DL, /*FromAdd=*/false)) return Rot; if (SDValue Load = MatchLoadCombine(N)) @@ -8364,7 +8368,7 @@ static SDValue extractShiftForRotate(SelectionDAG &DAG, SDValue OppShift, // The IsRotate flag should be set when the LHS of both shifts is the same. // Otherwise if matching a general funnel shift, it should be clear. static bool matchRotateSub(SDValue Pos, SDValue Neg, unsigned EltSize, - SelectionDAG &DAG, bool IsRotate) { + SelectionDAG &DAG, bool IsRotate, bool FromAdd) { const auto &TLI = DAG.getTargetLoweringInfo(); // If EltSize is a power of 2 then: // @@ -8403,7 +8407,7 @@ static bool matchRotateSub(SDValue Pos, SDValue Neg, unsigned EltSize, // NOTE: We can only do this when matching operations which won't modify the // least Log2(EltSize) significant bits and not a general funnel shift. unsigned MaskLoBits = 0; - if (IsRotate && isPowerOf2_64(EltSize)) { + if (IsRotate && !FromAdd && isPowerOf2_64(EltSize)) { unsigned Bits = Log2_64(EltSize); unsigned NegBits = Neg.getScalarValueSizeInBits(); if (NegBits >= Bits) { @@ -8486,22 +8490,21 @@ static bool matchRotateSub(SDValue Pos, SDValue Neg, unsigned EltSize, // Neg with outer conversions stripped away. SDValue DAGCombiner::MatchRotatePosNeg(SDValue Shifted, SDValue Pos, SDValue Neg, SDValue InnerPos, - SDValue InnerNeg, bool HasPos, - unsigned PosOpcode, unsigned NegOpcode, - const SDLoc &DL) { - // fold (or (shl x, (*ext y)), - // (srl x, (*ext (sub 32, y)))) -> + SDValue InnerNeg, bool FromAdd, + bool HasPos, unsigned PosOpcode, + unsigned NegOpcode, const SDLoc &DL) { + // fold (or/add (shl x, (*ext y)), + // (srl x, (*ext (sub 32, y)))) -> // (rotl x, y) or (rotr x, (sub 32, y)) // - // fold (or (shl x, (*ext (sub 32, y))), - // (srl x, (*ext y))) -> + // fold (or/add (shl x, (*ext (sub 32, y))), + // (srl x, (*ext y))) -> // (rotr x, y) or (rotl x, (sub 32, y)) EVT VT = Shifted.getValueType(); if (matchRotateSub(InnerPos, InnerNeg, VT.getScalarSizeInBits(), DAG, - /*IsRotate*/ true)) { + /*IsRotate*/ true, FromAdd)) return DAG.getNode(HasPos ? PosOpcode : NegOpcode, DL, VT, Shifted, HasPos ? Pos : Neg); - } return SDValue(); } @@ -8514,30 +8517,30 @@ SDValue DAGCombiner::MatchRotatePosNeg(SDValue Shifted, SDValue Pos, // TODO: Merge with MatchRotatePosNeg. SDValue DAGCombiner::MatchFunnelPosNeg(SDValue N0, SDValue N1, SDValue Pos, SDValue Neg, SDValue InnerPos, - SDValue InnerNeg, bool HasPos, - unsigned PosOpcode, unsigned NegOpcode, - const SDLoc &DL) { + SDValue InnerNeg, bool FromAdd, + bool HasPos, unsigned PosOpcode, + unsigned NegOpcode, const SDLoc &DL) { EVT VT = N0.getValueType(); unsigned EltBits = VT.getScalarSizeInBits(); - // fold (or (shl x0, (*ext y)), - // (srl x1, (*ext (sub 32, y)))) -> + // fold (or/add (shl x0, (*ext y)), + // (srl x1, (*ext (sub 32, y)))) -> // (fshl x0, x1, y) or (fshr x0, x1, (sub 32, y)) // - // fold (or (shl x0, (*ext (sub 32, y))), - // (srl x1, (*ext y))) -> + // fold (or/add (shl x0, (*ext (sub 32, y))), + // (srl x1, (*ext y))) -> // (fshr x0, x1, y) or (fshl x0, x1, (sub 32, y)) - if (matchRotateSub(InnerPos, InnerNeg, EltBits, DAG, /*IsRotate*/ N0 == N1)) { + if (matchRotateSub(InnerPos, InnerNeg, EltBits, DAG, /*IsRotate*/ N0 == N1, + FromAdd)) return DAG.getNode(HasPos ? PosOpcode : NegOpcode, DL, VT, N0, N1, HasPos ? Pos : Neg); - } // Matching the shift+xor cases, we can't easily use the xor'd shift amount // so for now just use the PosOpcode case if its legal. // TODO: When can we use the NegOpcode case? if (PosOpcode == ISD::FSHL && isPowerOf2_32(EltBits)) { SDValue X; - // fold (or (shl x0, y), (srl (srl x1, 1), (xor y, 31))) + // fold (or/add (shl x0, y), (srl (srl x1, 1), (xor y, 31))) // -> (fshl x0, x1, y) if (sd_match(N1, m_Srl(m_Value(X), m_One())) && sd_match(InnerNeg, @@ -8546,7 +8549,7 @@ SDValue DAGCombiner::MatchFunnelPosNeg(SDValue N0, SDValue N1, SDValue Pos, return DAG.getNode(ISD::FSHL, DL, VT, N0, X, Pos); } - // fold (or (shl (shl x0, 1), (xor y, 31)), (srl x1, y)) + // fold (or/add (shl (shl x0, 1), (xor y, 31)), (srl x1, y)) // -> (fshr x0, x1, y) if (sd_match(N0, m_Shl(m_Value(X), m_One())) && sd_match(InnerPos, @@ -8555,7 +8558,7 @@ SDValue DAGCombiner::MatchFunnelPosNeg(SDValue N0, SDValue N1, SDValue Pos, return DAG.getNode(ISD::FSHR, DL, VT, X, N1, Neg); } - // fold (or (shl (add x0, x0), (xor y, 31)), (srl x1, y)) + // fold (or/add (shl (add x0, x0), (xor y, 31)), (srl x1, y)) // -> (fshr x0, x1, y) // TODO: Should add(x,x) -> shl(x,1) be a general DAG canonicalization? if (sd_match(N0, m_Add(m_Value(X), m_Deferred(X))) && @@ -8569,11 +8572,12 @@ SDValue DAGCombiner::MatchFunnelPosNeg(SDValue N0, SDValue N1, SDValue Pos, return SDValue(); } -// MatchRotate - Handle an 'or' of two operands. If this is one of the many -// idioms for rotate, and if the target supports rotation instructions, generate -// a rot[lr]. This also matches funnel shift patterns, similar to rotation but -// with different shifted sources. -SDValue DAGCombiner::MatchRotate(SDValue LHS, SDValue RHS, const SDLoc &DL) { +// MatchRotate - Handle an 'or' or 'add' of two operands. If this is one of the +// many idioms for rotate, and if the target supports rotation instructions, +// generate a rot[lr]. This also matches funnel shift patterns, similar to +// rotation but with different shifted sources. +SDValue DAGCombiner::MatchRotate(SDValue LHS, SDValue RHS, const SDLoc &DL, + bool FromAdd) { EVT VT = LHS.getValueType(); // The target must have at least one rotate/funnel flavor. @@ -8600,9 +8604,9 @@ SDValue DAGCombiner::MatchRotate(SDValue LHS, SDValue RHS, const SDLoc &DL) { if (LHS.getOpcode() == ISD::TRUNCATE && RHS.getOpcode() == ISD::TRUNCATE && LHS.getOperand(0).getValueType() == RHS.getOperand(0).getValueType()) { assert(LHS.getValueType() == RHS.getValueType()); - if (SDValue Rot = MatchRotate(LHS.getOperand(0), RHS.getOperand(0), DL)) { + if (SDValue Rot = + MatchRotate(LHS.getOperand(0), RHS.getOperand(0), DL, FromAdd)) return DAG.getNode(ISD::TRUNCATE, SDLoc(LHS), LHS.getValueType(), Rot); - } } // Match "(X shl/srl V1) & V2" where V2 may not be present. @@ -8736,10 +8740,10 @@ SDValue DAGCombiner::MatchRotate(SDValue LHS, SDValue RHS, const SDLoc &DL) { return SDValue(); // Requires funnel shift support. } - // fold (or (shl x, C1), (srl x, C2)) -> (rotl x, C1) - // fold (or (shl x, C1), (srl x, C2)) -> (rotr x, C2) - // fold (or (shl x, C1), (srl y, C2)) -> (fshl x, y, C1) - // fold (or (shl x, C1), (srl y, C2)) -> (fshr x, y, C2) + // fold (or/add (shl x, C1), (srl x, C2)) -> (rotl x, C1) + // fold (or/add (shl x, C1), (srl x, C2)) -> (rotr x, C2) + // fold (or/add (shl x, C1), (srl y, C2)) -> (fshl x, y, C1) + // fold (or/add (shl x, C1), (srl y, C2)) -> (fshr x, y, C2) // iff C1+C2 == EltSizeInBits if (ISD::matchBinaryPredicate(LHSShiftAmt, RHSShiftAmt, MatchRotateSum)) { SDValue Res; @@ -8782,29 +8786,25 @@ SDValue DAGCombiner::MatchRotate(SDValue LHS, SDValue RHS, const SDLoc &DL) { } if (IsRotate && (HasROTL || HasROTR)) { - SDValue TryL = - MatchRotatePosNeg(LHSShiftArg, LHSShiftAmt, RHSShiftAmt, LExtOp0, - RExtOp0, HasROTL, ISD::ROTL, ISD::ROTR, DL); - if (TryL) + if (SDValue TryL = MatchRotatePosNeg(LHSShiftArg, LHSShiftAmt, RHSShiftAmt, + LExtOp0, RExtOp0, FromAdd, HasROTL, + ISD::ROTL, ISD::ROTR, DL)) return TryL; - SDValue TryR = - MatchRotatePosNeg(RHSShiftArg, RHSShiftAmt, LHSShiftAmt, RExtOp0, - LExtOp0, HasROTR, ISD::ROTR, ISD::ROTL, DL); - if (TryR) + if (SDValue TryR = MatchRotatePosNeg(RHSShiftArg, RHSShiftAmt, LHSShiftAmt, + RExtOp0, LExtOp0, FromAdd, HasROTR, + ISD::ROTR, ISD::ROTL, DL)) return TryR; } - SDValue TryL = - MatchFunnelPosNeg(LHSShiftArg, RHSShiftArg, LHSShiftAmt, RHSShiftAmt, - LExtOp0, RExtOp0, HasFSHL, ISD::FSHL, ISD::FSHR, DL); - if (TryL) + if (SDValue TryL = MatchFunnelPosNeg(LHSShiftArg, RHSShiftArg, LHSShiftAmt, + RHSShiftAmt, LExtOp0, RExtOp0, FromAdd, + HasFSHL, ISD::FSHL, ISD::FSHR, DL)) return TryL; - SDValue TryR = - MatchFunnelPosNeg(LHSShiftArg, RHSShiftArg, RHSShiftAmt, LHSShiftAmt, - RExtOp0, LExtOp0, HasFSHR, ISD::FSHR, ISD::FSHL, DL); - if (TryR) + if (SDValue TryR = MatchFunnelPosNeg(LHSShiftArg, RHSShiftArg, RHSShiftAmt, + LHSShiftAmt, RExtOp0, LExtOp0, FromAdd, + HasFSHR, ISD::FSHR, ISD::FSHL, DL)) return TryR; return SDValue(); diff --git a/llvm/test/CodeGen/AMDGPU/rotate-add.ll b/llvm/test/CodeGen/AMDGPU/rotate-add.ll index faf89f41bdf8..53a49c9a21e2 100644 --- a/llvm/test/CodeGen/AMDGPU/rotate-add.ll +++ b/llvm/test/CodeGen/AMDGPU/rotate-add.ll @@ -44,19 +44,15 @@ define i32 @test_rotl_var(i32 %x, i32 %y) { ; SI-LABEL: test_rotl_var: ; SI: ; %bb.0: ; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; SI-NEXT: v_lshlrev_b32_e32 v2, v1, v0 ; SI-NEXT: v_sub_i32_e32 v1, vcc, 32, v1 -; SI-NEXT: v_lshrrev_b32_e32 v0, v1, v0 -; SI-NEXT: v_add_i32_e32 v0, vcc, v2, v0 +; SI-NEXT: v_alignbit_b32 v0, v0, v0, v1 ; SI-NEXT: s_setpc_b64 s[30:31] ; ; VI-LABEL: test_rotl_var: ; VI: ; %bb.0: ; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; VI-NEXT: v_lshlrev_b32_e32 v2, v1, v0 ; VI-NEXT: v_sub_u32_e32 v1, vcc, 32, v1 -; VI-NEXT: v_lshrrev_b32_e32 v0, v1, v0 -; VI-NEXT: v_add_u32_e32 v0, vcc, v2, v0 +; VI-NEXT: v_alignbit_b32 v0, v0, v0, v1 ; VI-NEXT: s_setpc_b64 s[30:31] %shl = shl i32 %x, %y %sub = sub i32 32, %y @@ -69,19 +65,13 @@ define i32 @test_rotr_var(i32 %x, i32 %y) { ; SI-LABEL: test_rotr_var: ; SI: ; %bb.0: ; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; SI-NEXT: v_lshrrev_b32_e32 v2, v1, v0 -; SI-NEXT: v_sub_i32_e32 v1, vcc, 32, v1 -; SI-NEXT: v_lshlrev_b32_e32 v0, v1, v0 -; SI-NEXT: v_add_i32_e32 v0, vcc, v2, v0 +; SI-NEXT: v_alignbit_b32 v0, v0, v0, v1 ; SI-NEXT: s_setpc_b64 s[30:31] ; ; VI-LABEL: test_rotr_var: ; VI: ; %bb.0: ; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; VI-NEXT: v_lshrrev_b32_e32 v2, v1, v0 -; VI-NEXT: v_sub_u32_e32 v1, vcc, 32, v1 -; VI-NEXT: v_lshlrev_b32_e32 v0, v1, v0 -; VI-NEXT: v_add_u32_e32 v0, vcc, v2, v0 +; VI-NEXT: v_alignbit_b32 v0, v0, v0, v1 ; VI-NEXT: s_setpc_b64 s[30:31] %shr = lshr i32 %x, %y %sub = sub i32 32, %y @@ -174,21 +164,13 @@ define i32 @test_fshr_special_case(i32 %x0, i32 %x1, i32 %y) { ; SI-LABEL: test_fshr_special_case: ; SI: ; %bb.0: ; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; SI-NEXT: v_lshrrev_b32_e32 v1, v2, v1 -; SI-NEXT: v_lshlrev_b32_e32 v0, 1, v0 -; SI-NEXT: v_xor_b32_e32 v2, 31, v2 -; SI-NEXT: v_lshlrev_b32_e32 v0, v2, v0 -; SI-NEXT: v_add_i32_e32 v0, vcc, v1, v0 +; SI-NEXT: v_alignbit_b32 v0, v0, v1, v2 ; SI-NEXT: s_setpc_b64 s[30:31] ; ; VI-LABEL: test_fshr_special_case: ; VI: ; %bb.0: ; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; VI-NEXT: v_lshrrev_b32_e32 v1, v2, v1 -; VI-NEXT: v_lshlrev_b32_e32 v0, 1, v0 -; VI-NEXT: v_xor_b32_e32 v2, 31, v2 -; VI-NEXT: v_lshlrev_b32_e32 v0, v2, v0 -; VI-NEXT: v_add_u32_e32 v0, vcc, v1, v0 +; VI-NEXT: v_alignbit_b32 v0, v0, v1, v2 ; VI-NEXT: s_setpc_b64 s[30:31] %shl = lshr i32 %x1, %y %srli = shl i32 %x0, 1 diff --git a/llvm/test/CodeGen/ARM/rotate-add.ll b/llvm/test/CodeGen/ARM/rotate-add.ll index 9325e8b062dd..fd3055e5e272 100644 --- a/llvm/test/CodeGen/ARM/rotate-add.ll +++ b/llvm/test/CodeGen/ARM/rotate-add.ll @@ -29,9 +29,8 @@ define i32 @test_simple_rotr(i32 %x) { define i32 @test_rotl_var(i32 %x, i32 %y) { ; CHECK-LABEL: test_rotl_var: ; CHECK: @ %bb.0: -; CHECK-NEXT: lsl r2, r0, r1 ; CHECK-NEXT: rsb r1, r1, #32 -; CHECK-NEXT: add r0, r2, r0, lsr r1 +; CHECK-NEXT: ror r0, r0, r1 ; CHECK-NEXT: bx lr %shl = shl i32 %x, %y %sub = sub i32 32, %y @@ -43,9 +42,7 @@ define i32 @test_rotl_var(i32 %x, i32 %y) { define i32 @test_rotr_var(i32 %x, i32 %y) { ; CHECK-LABEL: test_rotr_var: ; CHECK: @ %bb.0: -; CHECK-NEXT: lsr r2, r0, r1 -; CHECK-NEXT: rsb r1, r1, #32 -; CHECK-NEXT: add r0, r2, r0, lsl r1 +; CHECK-NEXT: ror r0, r0, r1 ; CHECK-NEXT: bx lr %shr = lshr i32 %x, %y %sub = sub i32 32, %y diff --git a/llvm/test/CodeGen/NVPTX/rotate-add.ll b/llvm/test/CodeGen/NVPTX/rotate-add.ll index c79a95958eca..820e8000a565 100644 --- a/llvm/test/CodeGen/NVPTX/rotate-add.ll +++ b/llvm/test/CodeGen/NVPTX/rotate-add.ll @@ -39,16 +39,13 @@ define i32 @test_simple_rotr(i32 %x) { define i32 @test_rotl_var(i32 %x, i32 %y) { ; CHECK-LABEL: test_rotl_var( ; CHECK: { -; CHECK-NEXT: .reg .b32 %r<7>; +; CHECK-NEXT: .reg .b32 %r<4>; ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: ; CHECK-NEXT: ld.param.u32 %r1, [test_rotl_var_param_0]; ; CHECK-NEXT: ld.param.u32 %r2, [test_rotl_var_param_1]; -; CHECK-NEXT: shl.b32 %r3, %r1, %r2; -; CHECK-NEXT: sub.s32 %r4, 32, %r2; -; CHECK-NEXT: shr.u32 %r5, %r1, %r4; -; CHECK-NEXT: add.s32 %r6, %r3, %r5; -; CHECK-NEXT: st.param.b32 [func_retval0], %r6; +; CHECK-NEXT: shf.l.wrap.b32 %r3, %r1, %r1, %r2; +; CHECK-NEXT: st.param.b32 [func_retval0], %r3; ; CHECK-NEXT: ret; %shl = shl i32 %x, %y %sub = sub i32 32, %y @@ -60,16 +57,13 @@ define i32 @test_rotl_var(i32 %x, i32 %y) { define i32 @test_rotr_var(i32 %x, i32 %y) { ; CHECK-LABEL: test_rotr_var( ; CHECK: { -; CHECK-NEXT: .reg .b32 %r<7>; +; CHECK-NEXT: .reg .b32 %r<4>; ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: ; CHECK-NEXT: ld.param.u32 %r1, [test_rotr_var_param_0]; ; CHECK-NEXT: ld.param.u32 %r2, [test_rotr_var_param_1]; -; CHECK-NEXT: shr.u32 %r3, %r1, %r2; -; CHECK-NEXT: sub.s32 %r4, 32, %r2; -; CHECK-NEXT: shl.b32 %r5, %r1, %r4; -; CHECK-NEXT: add.s32 %r6, %r3, %r5; -; CHECK-NEXT: st.param.b32 [func_retval0], %r6; +; CHECK-NEXT: shf.r.wrap.b32 %r3, %r1, %r1, %r2; +; CHECK-NEXT: st.param.b32 [func_retval0], %r3; ; CHECK-NEXT: ret; %shr = lshr i32 %x, %y %sub = sub i32 32, %y @@ -127,18 +121,14 @@ define i32 @test_invalid_rotr_var_and(i32 %x, i32 %y) { define i32 @test_fshl_special_case(i32 %x0, i32 %x1, i32 %y) { ; CHECK-LABEL: test_fshl_special_case( ; CHECK: { -; CHECK-NEXT: .reg .b32 %r<9>; +; CHECK-NEXT: .reg .b32 %r<5>; ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: ; CHECK-NEXT: ld.param.u32 %r1, [test_fshl_special_case_param_0]; -; CHECK-NEXT: ld.param.u32 %r2, [test_fshl_special_case_param_2]; -; CHECK-NEXT: shl.b32 %r3, %r1, %r2; -; CHECK-NEXT: ld.param.u32 %r4, [test_fshl_special_case_param_1]; -; CHECK-NEXT: shr.u32 %r5, %r4, 1; -; CHECK-NEXT: xor.b32 %r6, %r2, 31; -; CHECK-NEXT: shr.u32 %r7, %r5, %r6; -; CHECK-NEXT: add.s32 %r8, %r3, %r7; -; CHECK-NEXT: st.param.b32 [func_retval0], %r8; +; CHECK-NEXT: ld.param.u32 %r2, [test_fshl_special_case_param_1]; +; CHECK-NEXT: ld.param.u32 %r3, [test_fshl_special_case_param_2]; +; CHECK-NEXT: shf.l.wrap.b32 %r4, %r2, %r1, %r3; +; CHECK-NEXT: st.param.b32 [func_retval0], %r4; ; CHECK-NEXT: ret; %shl = shl i32 %x0, %y %srli = lshr i32 %x1, 1 @@ -151,18 +141,14 @@ define i32 @test_fshl_special_case(i32 %x0, i32 %x1, i32 %y) { define i32 @test_fshr_special_case(i32 %x0, i32 %x1, i32 %y) { ; CHECK-LABEL: test_fshr_special_case( ; CHECK: { -; CHECK-NEXT: .reg .b32 %r<9>; +; CHECK-NEXT: .reg .b32 %r<5>; ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: ; CHECK-NEXT: ld.param.u32 %r1, [test_fshr_special_case_param_0]; ; CHECK-NEXT: ld.param.u32 %r2, [test_fshr_special_case_param_1]; ; CHECK-NEXT: ld.param.u32 %r3, [test_fshr_special_case_param_2]; -; CHECK-NEXT: shr.u32 %r4, %r2, %r3; -; CHECK-NEXT: shl.b32 %r5, %r1, 1; -; CHECK-NEXT: xor.b32 %r6, %r3, 31; -; CHECK-NEXT: shl.b32 %r7, %r5, %r6; -; CHECK-NEXT: add.s32 %r8, %r4, %r7; -; CHECK-NEXT: st.param.b32 [func_retval0], %r8; +; CHECK-NEXT: shf.r.wrap.b32 %r4, %r2, %r1, %r3; +; CHECK-NEXT: st.param.b32 [func_retval0], %r4; ; CHECK-NEXT: ret; %shl = lshr i32 %x1, %y %srli = shl i32 %x0, 1 diff --git a/llvm/test/CodeGen/X86/rotate-add.ll b/llvm/test/CodeGen/X86/rotate-add.ll index 6e19fc20abf0..c705505bbbf2 100644 --- a/llvm/test/CodeGen/X86/rotate-add.ll +++ b/llvm/test/CodeGen/X86/rotate-add.ll @@ -43,22 +43,15 @@ define i32 @test_rotl_var(i32 %x, i32 %y) { ; X86: # %bb.0: ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: movl %eax, %edx -; X86-NEXT: shll %cl, %edx -; X86-NEXT: negb %cl -; X86-NEXT: shrl %cl, %eax -; X86-NEXT: addl %edx, %eax +; X86-NEXT: roll %cl, %eax ; X86-NEXT: retl ; ; X64-LABEL: test_rotl_var: ; X64: # %bb.0: ; X64-NEXT: movl %esi, %ecx ; X64-NEXT: movl %edi, %eax -; X64-NEXT: shll %cl, %eax -; X64-NEXT: negb %cl ; X64-NEXT: # kill: def $cl killed $cl killed $ecx -; X64-NEXT: shrl %cl, %edi -; X64-NEXT: addl %edi, %eax +; X64-NEXT: roll %cl, %eax ; X64-NEXT: retq %shl = shl i32 %x, %y %sub = sub i32 32, %y @@ -72,22 +65,15 @@ define i32 @test_rotr_var(i32 %x, i32 %y) { ; X86: # %bb.0: ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: movl %eax, %edx -; X86-NEXT: shrl %cl, %edx -; X86-NEXT: negb %cl -; X86-NEXT: shll %cl, %eax -; X86-NEXT: addl %edx, %eax +; X86-NEXT: rorl %cl, %eax ; X86-NEXT: retl ; ; X64-LABEL: test_rotr_var: ; X64: # %bb.0: ; X64-NEXT: movl %esi, %ecx ; X64-NEXT: movl %edi, %eax -; X64-NEXT: shrl %cl, %eax -; X64-NEXT: negb %cl ; X64-NEXT: # kill: def $cl killed $cl killed $ecx -; X64-NEXT: shll %cl, %edi -; X64-NEXT: addl %edi, %eax +; X64-NEXT: rorl %cl, %eax ; X64-NEXT: retq %shr = lshr i32 %x, %y %sub = sub i32 32, %y @@ -159,27 +145,18 @@ define i32 @test_invalid_rotr_var_and(i32 %x, i32 %y) { define i32 @test_fshl_special_case(i32 %x0, i32 %x1, i32 %y) { ; X86-LABEL: test_fshl_special_case: ; X86: # %bb.0: -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx -; X86-NEXT: shll %cl, %edx -; X86-NEXT: shrl %eax -; X86-NEXT: notb %cl -; X86-NEXT: shrl %cl, %eax -; X86-NEXT: addl %edx, %eax +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: shldl %cl, %edx, %eax ; X86-NEXT: retl ; ; X64-LABEL: test_fshl_special_case: ; X64: # %bb.0: ; X64-NEXT: movl %edx, %ecx -; X64-NEXT: # kill: def $esi killed $esi def $rsi -; X64-NEXT: # kill: def $edi killed $edi def $rdi -; X64-NEXT: shll %cl, %edi -; X64-NEXT: shrl %esi -; X64-NEXT: notb %cl +; X64-NEXT: movl %edi, %eax ; X64-NEXT: # kill: def $cl killed $cl killed $ecx -; X64-NEXT: shrl %cl, %esi -; X64-NEXT: leal (%rsi,%rdi), %eax +; X64-NEXT: shldl %cl, %esi, %eax ; X64-NEXT: retq %shl = shl i32 %x0, %y %srli = lshr i32 %x1, 1 @@ -192,26 +169,18 @@ define i32 @test_fshl_special_case(i32 %x0, i32 %x1, i32 %y) { define i32 @test_fshr_special_case(i32 %x0, i32 %x1, i32 %y) { ; X86-LABEL: test_fshr_special_case: ; X86: # %bb.0: -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx -; X86-NEXT: shrl %cl, %edx -; X86-NEXT: addl %eax, %eax -; X86-NEXT: notb %cl -; X86-NEXT: shll %cl, %eax -; X86-NEXT: addl %edx, %eax +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: shrdl %cl, %edx, %eax ; X86-NEXT: retl ; ; X64-LABEL: test_fshr_special_case: ; X64: # %bb.0: ; X64-NEXT: movl %edx, %ecx -; X64-NEXT: # kill: def $edi killed $edi def $rdi -; X64-NEXT: shrl %cl, %esi -; X64-NEXT: leal (%rdi,%rdi), %eax -; X64-NEXT: notb %cl +; X64-NEXT: movl %esi, %eax ; X64-NEXT: # kill: def $cl killed $cl killed $ecx -; X64-NEXT: shll %cl, %eax -; X64-NEXT: addl %esi, %eax +; X64-NEXT: shrdl %cl, %edi, %eax ; X64-NEXT: retq %shl = lshr i32 %x1, %y %srli = shl i32 %x0, 1