Add support for flag output operand "=@cc" for SystemZ. (#125970)

Added Support for flag output operand "=@cc", inline assembly constraint
for
SystemZ.

- Clang now accepts "=@cc" assembly operands, and sets 2-bits condition
code
    for output operand for SyatemZ.

- Clang currently emits an assertion that flag output operands are
boolean
values, i.e. in the range [0, 2). Generalize this mechanism to allow
targets to specify arbitrary range assertions for any inline assembly
    output operand.  This will be used to assert that SystemZ two-bit
    condition-code values are in the range [0, 4).

- SystemZ backend lowers "@cc" targets by using ipm sequence to extract
    condition code from PSW.

  - DAGCombine tries to optimize lowered ipm sequence by combining
CCReg and computing effective CCMask and CCValid in combineCCMask for
    select_ccmask and br_ccmask.

- Cost computation is done for merging conditionals for branch
instruction
in SelectionDAG, as split may cause branches conditions evaluation goes
    across basic block and difficult to combine.

---------

Co-authored-by: anoopkg6 <anoopkg6@github.com>
Co-authored-by: Ulrich Weigand <ulrich.weigand@de.ibm.com>
This commit is contained in:
anoopkg6 2025-10-14 04:53:42 -05:00 committed by GitHub
parent 3793e75b7a
commit 6712e20c52
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
14 changed files with 2867 additions and 100 deletions

View File

@ -1211,6 +1211,25 @@ public:
TiedOperand = N;
// Don't copy Name or constraint string.
}
// For output operand constraints, the target can set bounds to indicate
// that the result value is guaranteed to fall within a certain range.
// This will cause corresponding assertions to be emitted that will allow
// for potential optimization based of that guarantee.
//
// NOTE: This re-uses the `ImmRange` fields to store the range, which are
// otherwise unused for constraint types used for output operands.
void setOutputOperandBounds(unsigned Min, unsigned Max) {
ImmRange.Min = Min;
ImmRange.Max = Max;
ImmRange.isConstrained = true;
}
std::optional<std::pair<unsigned, unsigned>>
getOutputOperandBounds() const {
return ImmRange.isConstrained
? std::make_pair(ImmRange.Min, ImmRange.Max)
: std::optional<std::pair<unsigned, unsigned>>();
}
};
/// Validate register name used for global register variables.

View File

@ -1568,6 +1568,7 @@ bool AArch64TargetInfo::validateAsmConstraint(
if (const unsigned Len = matchAsmCCConstraint(Name)) {
Name += Len - 1;
Info.setAllowsRegister();
Info.setOutputOperandBounds(0, 2);
return true;
}
}

View File

@ -99,6 +99,16 @@ bool SystemZTargetInfo::validateAsmConstraint(
case 'T': // Likewise, plus an index
Info.setAllowsMemory();
return true;
case '@':
// CC condition changes.
if (StringRef(Name) == "@cc") {
Name += 2;
Info.setAllowsRegister();
// SystemZ has 2-bits CC, and hence Interval [0, 4).
Info.setOutputOperandBounds(0, 4);
return true;
}
return false;
}
}
@ -161,6 +171,9 @@ unsigned SystemZTargetInfo::getMinGlobalAlign(uint64_t Size,
void SystemZTargetInfo::getTargetDefines(const LangOptions &Opts,
MacroBuilder &Builder) const {
// Inline assembly supports SystemZ flag outputs.
Builder.defineMacro("__GCC_ASM_FLAG_OUTPUTS__");
Builder.defineMacro("__s390__");
Builder.defineMacro("__s390x__");
Builder.defineMacro("__zarch__");

View File

@ -136,6 +136,12 @@ public:
std::string convertConstraint(const char *&Constraint) const override {
switch (Constraint[0]) {
case '@': // Flag output operand.
if (llvm::StringRef(Constraint) == "@cc") {
Constraint += 2;
return std::string("{@cc}");
}
break;
case 'p': // Keep 'p' constraint.
return std::string("p");
case 'Z':

View File

@ -1516,6 +1516,7 @@ bool X86TargetInfo::validateAsmConstraint(
if (auto Len = matchAsmCCConstraint(Name)) {
Name += Len - 1;
Info.setAllowsRegister();
Info.setOutputOperandBounds(0, 2);
return true;
}
return false;

View File

@ -2674,7 +2674,8 @@ EmitAsmStores(CodeGenFunction &CGF, const AsmStmt &S,
const llvm::ArrayRef<LValue> ResultRegDests,
const llvm::ArrayRef<QualType> ResultRegQualTys,
const llvm::BitVector &ResultTypeRequiresCast,
const llvm::BitVector &ResultRegIsFlagReg) {
const std::vector<std::optional<std::pair<unsigned, unsigned>>>
&ResultBounds) {
CGBuilderTy &Builder = CGF.Builder;
CodeGenModule &CGM = CGF.CGM;
llvm::LLVMContext &CTX = CGF.getLLVMContext();
@ -2685,18 +2686,20 @@ EmitAsmStores(CodeGenFunction &CGF, const AsmStmt &S,
// ResultRegDests can be also populated by addReturnRegisterOutputs() above,
// in which case its size may grow.
assert(ResultTypeRequiresCast.size() <= ResultRegDests.size());
assert(ResultRegIsFlagReg.size() <= ResultRegDests.size());
assert(ResultBounds.size() <= ResultRegDests.size());
for (unsigned i = 0, e = RegResults.size(); i != e; ++i) {
llvm::Value *Tmp = RegResults[i];
llvm::Type *TruncTy = ResultTruncRegTypes[i];
if ((i < ResultRegIsFlagReg.size()) && ResultRegIsFlagReg[i]) {
// Target must guarantee the Value `Tmp` here is lowered to a boolean
// value.
llvm::Constant *Two = llvm::ConstantInt::get(Tmp->getType(), 2);
if ((i < ResultBounds.size()) && ResultBounds[i].has_value()) {
const auto [LowerBound, UpperBound] = ResultBounds[i].value();
// FIXME: Support for nonzero lower bounds not yet implemented.
assert(LowerBound == 0 && "Output operand lower bound is not zero.");
llvm::Constant *UpperBoundConst =
llvm::ConstantInt::get(Tmp->getType(), UpperBound);
llvm::Value *IsBooleanValue =
Builder.CreateCmp(llvm::CmpInst::ICMP_ULT, Tmp, Two);
Builder.CreateCmp(llvm::CmpInst::ICMP_ULT, Tmp, UpperBoundConst);
llvm::Function *FnAssume = CGM.getIntrinsic(llvm::Intrinsic::assume);
Builder.CreateCall(FnAssume, IsBooleanValue);
}
@ -2825,7 +2828,7 @@ void CodeGenFunction::EmitAsmStmt(const AsmStmt &S) {
std::vector<llvm::Type *> ArgElemTypes;
std::vector<llvm::Value*> Args;
llvm::BitVector ResultTypeRequiresCast;
llvm::BitVector ResultRegIsFlagReg;
std::vector<std::optional<std::pair<unsigned, unsigned>>> ResultBounds;
// Keep track of inout constraints.
std::string InOutConstraints;
@ -2883,8 +2886,7 @@ void CodeGenFunction::EmitAsmStmt(const AsmStmt &S) {
ResultRegQualTys.push_back(QTy);
ResultRegDests.push_back(Dest);
bool IsFlagReg = llvm::StringRef(OutputConstraint).starts_with("{@cc");
ResultRegIsFlagReg.push_back(IsFlagReg);
ResultBounds.emplace_back(Info.getOutputOperandBounds());
llvm::Type *Ty = ConvertTypeForMem(QTy);
const bool RequiresCast = Info.allowsRegister() &&
@ -3231,7 +3233,7 @@ void CodeGenFunction::EmitAsmStmt(const AsmStmt &S) {
EmitAsmStores(*this, S, RegResults, ResultRegTypes, ResultTruncRegTypes,
ResultRegDests, ResultRegQualTys, ResultTypeRequiresCast,
ResultRegIsFlagReg);
ResultBounds);
// If this is an asm goto with outputs, repeat EmitAsmStores, but with a
// different insertion point; one for each indirect destination and with
@ -3242,7 +3244,7 @@ void CodeGenFunction::EmitAsmStmt(const AsmStmt &S) {
Builder.SetInsertPoint(Succ, --(Succ->end()));
EmitAsmStores(*this, S, CBRRegResults[Succ], ResultRegTypes,
ResultTruncRegTypes, ResultRegDests, ResultRegQualTys,
ResultTypeRequiresCast, ResultRegIsFlagReg);
ResultTypeRequiresCast, ResultBounds);
}
}
}

View File

@ -0,0 +1,57 @@
// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 6
// RUN: %clang_cc1 -O2 -triple s390x-linux -emit-llvm -o - %s | FileCheck %s
// CHECK-LABEL: define dso_local signext range(i32 0, 4) i32 @test(
// CHECK-SAME: i32 noundef signext [[X:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] {
// CHECK-NEXT: [[ENTRY:.*:]]
// CHECK-NEXT: [[TMP0:%.*]] = tail call { i32, i32 } asm "ahi $0,42\0A", "=d,={@cc},0"(i32 [[X]]) #[[ATTR2:[0-9]+]], !srcloc [[META2:![0-9]+]]
// CHECK-NEXT: [[ASMRESULT1:%.*]] = extractvalue { i32, i32 } [[TMP0]], 1
// CHECK-NEXT: [[TMP1:%.*]] = icmp ult i32 [[ASMRESULT1]], 4
// CHECK-NEXT: tail call void @llvm.assume(i1 [[TMP1]])
// CHECK-NEXT: ret i32 [[ASMRESULT1]]
//
int test(int x) {
int cc;
asm ("ahi %[x],42\n" : [x] "+d"(x), "=@cc" (cc));
return cc;
}
// CHECK-LABEL: define dso_local signext range(i32 0, 2) i32 @test_low_high_transformation(
// CHECK-SAME: i32 noundef signext [[X:%.*]]) local_unnamed_addr #[[ATTR0]] {
// CHECK-NEXT: [[ENTRY:.*:]]
// CHECK-NEXT: [[TMP0:%.*]] = tail call { i32, i32 } asm "ahi $0,42\0A", "=d,={@cc},0"(i32 [[X]]) #[[ATTR2]], !srcloc [[META3:![0-9]+]]
// CHECK-NEXT: [[ASMRESULT1:%.*]] = extractvalue { i32, i32 } [[TMP0]], 1
// CHECK-NEXT: [[TMP1:%.*]] = icmp ult i32 [[ASMRESULT1]], 4
// CHECK-NEXT: tail call void @llvm.assume(i1 [[TMP1]])
// CHECK-NEXT: [[TMP2:%.*]] = add nsw i32 [[ASMRESULT1]], -1
// CHECK-NEXT: [[TMP3:%.*]] = icmp ult i32 [[TMP2]], 2
// CHECK-NEXT: [[LOR_EXT:%.*]] = zext i1 [[TMP3]] to i32
// CHECK-NEXT: ret i32 [[LOR_EXT]]
//
int test_low_high_transformation(int x) {
int cc;
asm ("ahi %[x],42\n" : [x] "+d"(x), "=@cc" (cc));
return cc == 1 || cc == 2;
}
// CHECK-LABEL: define dso_local signext range(i32 0, 2) i32 @test_equal_high_transformation(
// CHECK-SAME: i32 noundef signext [[X:%.*]]) local_unnamed_addr #[[ATTR0]] {
// CHECK-NEXT: [[ENTRY:.*:]]
// CHECK-NEXT: [[TMP0:%.*]] = tail call { i32, i32 } asm "ahi $0,42\0A", "=d,={@cc},0"(i32 [[X]]) #[[ATTR2]], !srcloc [[META4:![0-9]+]]
// CHECK-NEXT: [[ASMRESULT1:%.*]] = extractvalue { i32, i32 } [[TMP0]], 1
// CHECK-NEXT: [[TMP1:%.*]] = icmp ult i32 [[ASMRESULT1]], 4
// CHECK-NEXT: tail call void @llvm.assume(i1 [[TMP1]])
// CHECK-NEXT: [[TMP2:%.*]] = and i32 [[ASMRESULT1]], 1
// CHECK-NEXT: [[LOR_EXT:%.*]] = xor i32 [[TMP2]], 1
// CHECK-NEXT: ret i32 [[LOR_EXT]]
//
int test_equal_high_transformation(int x) {
int cc;
asm ("ahi %[x],42\n" : [x] "+d"(x), "=@cc" (cc));
return cc == 0 || cc == 2;
}
//.
// CHECK: [[META2]] = !{i64 788}
// CHECK: [[META3]] = !{i64 1670}
// CHECK: [[META4]] = !{i64 2505}
//.

View File

@ -0,0 +1,4 @@
// RUN: %clang -target systemz-unknown-unknown -x c -E -dM -o - %s | FileCheck -match-full-lines %s
// RUN: %clang -target s390x-unknown-unknown -x c -E -dM -o - %s | FileCheck -match-full-lines %s
// CHECK: #define __GCC_ASM_FLAG_OUTPUTS__ 1

View File

@ -24,6 +24,7 @@ class SystemZTargetMachine;
namespace SystemZ {
// Condition-code mask values.
const unsigned CCMASK_NONE = 0;
const unsigned CCMASK_0 = 1 << 3;
const unsigned CCMASK_1 = 1 << 2;
const unsigned CCMASK_2 = 1 << 1;

View File

@ -15,6 +15,7 @@
#include "SystemZConstantPoolValue.h"
#include "SystemZMachineFunctionInfo.h"
#include "SystemZTargetMachine.h"
#include "llvm/ADT/SmallSet.h"
#include "llvm/CodeGen/CallingConvLower.h"
#include "llvm/CodeGen/ISDOpcodes.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
@ -24,6 +25,7 @@
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/Intrinsics.h"
#include "llvm/IR/IntrinsicsS390.h"
#include "llvm/IR/PatternMatch.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/KnownBits.h"
@ -1514,6 +1516,9 @@ SystemZTargetLowering::getConstraintType(StringRef Constraint) const {
default:
break;
}
} else if (Constraint.size() == 5 && Constraint.starts_with("{")) {
if (StringRef("{@cc}").compare(Constraint) == 0)
return C_Other;
}
return TargetLowering::getConstraintType(Constraint);
}
@ -1707,6 +1712,10 @@ SystemZTargetLowering::getRegForInlineAsmConstraint(
return parseRegisterNumber(Constraint, &SystemZ::VR128BitRegClass,
SystemZMC::VR128Regs, 32);
}
if (Constraint[1] == '@') {
if (StringRef("{@cc}").compare(Constraint) == 0)
return std::make_pair(0u, &SystemZ::GR32BitRegClass);
}
}
return TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT);
}
@ -1737,6 +1746,38 @@ Register SystemZTargetLowering::getExceptionSelectorRegister(
return Subtarget.isTargetXPLINK64() ? SystemZ::R2D : SystemZ::R7D;
}
// Convert condition code in CCReg to an i32 value.
static SDValue getCCResult(SelectionDAG &DAG, SDValue CCReg) {
SDLoc DL(CCReg);
SDValue IPM = DAG.getNode(SystemZISD::IPM, DL, MVT::i32, CCReg);
return DAG.getNode(ISD::SRL, DL, MVT::i32, IPM,
DAG.getConstant(SystemZ::IPM_CC, DL, MVT::i32));
}
// Lower @cc targets via setcc.
SDValue SystemZTargetLowering::LowerAsmOutputForConstraint(
SDValue &Chain, SDValue &Glue, const SDLoc &DL,
const AsmOperandInfo &OpInfo, SelectionDAG &DAG) const {
if (StringRef("{@cc}").compare(OpInfo.ConstraintCode) != 0)
return SDValue();
// Check that return type is valid.
if (OpInfo.ConstraintVT.isVector() || !OpInfo.ConstraintVT.isInteger() ||
OpInfo.ConstraintVT.getSizeInBits() < 8)
report_fatal_error("Glue output operand is of invalid type");
MachineFunction &MF = DAG.getMachineFunction();
MachineRegisterInfo &MRI = MF.getRegInfo();
MRI.addLiveIn(SystemZ::CC);
if (Glue.getNode()) {
Glue = DAG.getCopyFromReg(Chain, DL, SystemZ::CC, MVT::i32, Glue);
Chain = Glue.getValue(1);
} else
Glue = DAG.getCopyFromReg(Chain, DL, SystemZ::CC, MVT::i32);
return getCCResult(DAG, Glue);
}
void SystemZTargetLowering::LowerAsmOperandForConstraint(
SDValue Op, StringRef Constraint, std::vector<SDValue> &Ops,
SelectionDAG &DAG) const {
@ -5300,14 +5341,6 @@ SDValue SystemZTargetLowering::lowerPREFETCH(SDValue Op,
Node->getMemoryVT(), Node->getMemOperand());
}
// Convert condition code in CCReg to an i32 value.
static SDValue getCCResult(SelectionDAG &DAG, SDValue CCReg) {
SDLoc DL(CCReg);
SDValue IPM = DAG.getNode(SystemZISD::IPM, DL, MVT::i32, CCReg);
return DAG.getNode(ISD::SRL, DL, MVT::i32, IPM,
DAG.getConstant(SystemZ::IPM_CC, DL, MVT::i32));
}
SDValue
SystemZTargetLowering::lowerINTRINSIC_W_CHAIN(SDValue Op,
SelectionDAG &DAG) const {
@ -8723,95 +8756,247 @@ SDValue SystemZTargetLowering::combineSETCC(
return SDValue();
}
static bool combineCCMask(SDValue &CCReg, int &CCValid, int &CCMask) {
static std::pair<SDValue, int> findCCUse(const SDValue &Val) {
switch (Val.getOpcode()) {
default:
return std::make_pair(SDValue(), SystemZ::CCMASK_NONE);
case SystemZISD::IPM:
if (Val.getOperand(0).getOpcode() == SystemZISD::CLC ||
Val.getOperand(0).getOpcode() == SystemZISD::STRCMP)
return std::make_pair(Val.getOperand(0), SystemZ::CCMASK_ICMP);
return std::make_pair(Val.getOperand(0), SystemZ::CCMASK_ANY);
case SystemZISD::SELECT_CCMASK: {
SDValue Op4CCReg = Val.getOperand(4);
if (Op4CCReg.getOpcode() == SystemZISD::ICMP ||
Op4CCReg.getOpcode() == SystemZISD::TM) {
auto [OpCC, OpCCValid] = findCCUse(Op4CCReg.getOperand(0));
if (OpCC != SDValue())
return std::make_pair(OpCC, OpCCValid);
}
auto *CCValid = dyn_cast<ConstantSDNode>(Val.getOperand(2));
if (!CCValid)
return std::make_pair(SDValue(), SystemZ::CCMASK_NONE);
int CCValidVal = CCValid->getZExtValue();
return std::make_pair(Op4CCReg, CCValidVal);
}
case ISD::ADD:
case ISD::AND:
case ISD::OR:
case ISD::XOR:
case ISD::SHL:
case ISD::SRA:
case ISD::SRL:
auto [Op0CC, Op0CCValid] = findCCUse(Val.getOperand(0));
if (Op0CC != SDValue())
return std::make_pair(Op0CC, Op0CCValid);
return findCCUse(Val.getOperand(1));
}
}
static bool combineCCMask(SDValue &CCReg, int &CCValid, int &CCMask,
SelectionDAG &DAG);
SmallVector<SDValue, 4> static simplifyAssumingCCVal(SDValue &Val, SDValue &CC,
SelectionDAG &DAG) {
SDLoc DL(Val);
auto Opcode = Val.getOpcode();
switch (Opcode) {
default:
return {};
case ISD::Constant:
return {Val, Val, Val, Val};
case SystemZISD::IPM: {
SDValue IPMOp0 = Val.getOperand(0);
if (IPMOp0 != CC)
return {};
SmallVector<SDValue, 4> ShiftedCCVals;
for (auto CC : {0, 1, 2, 3})
ShiftedCCVals.emplace_back(
DAG.getConstant((CC << SystemZ::IPM_CC), DL, MVT::i32));
return ShiftedCCVals;
}
case SystemZISD::SELECT_CCMASK: {
SDValue TrueVal = Val.getOperand(0), FalseVal = Val.getOperand(1);
auto *CCValid = dyn_cast<ConstantSDNode>(Val.getOperand(2));
auto *CCMask = dyn_cast<ConstantSDNode>(Val.getOperand(3));
if (!CCValid || !CCMask)
return {};
int CCValidVal = CCValid->getZExtValue();
int CCMaskVal = CCMask->getZExtValue();
const auto &&TrueSDVals = simplifyAssumingCCVal(TrueVal, CC, DAG);
const auto &&FalseSDVals = simplifyAssumingCCVal(FalseVal, CC, DAG);
if (TrueSDVals.empty() || FalseSDVals.empty())
return {};
SDValue Op4CCReg = Val.getOperand(4);
if (Op4CCReg != CC)
combineCCMask(Op4CCReg, CCValidVal, CCMaskVal, DAG);
if (Op4CCReg != CC)
return {};
SmallVector<SDValue, 4> MergedSDVals;
for (auto &CCVal : {0, 1, 2, 3})
MergedSDVals.emplace_back(((CCMaskVal & (1 << (3 - CCVal))) != 0)
? TrueSDVals[CCVal]
: FalseSDVals[CCVal]);
return MergedSDVals;
}
case ISD::ADD:
case ISD::AND:
case ISD::OR:
case ISD::XOR:
case ISD::SRA:
// Avoid introducing CC spills (because ADD/AND/OR/XOR/SRA
// would clobber CC).
if (!Val.hasOneUse())
return {};
[[fallthrough]];
case ISD::SHL:
case ISD::SRL:
SDValue Op0 = Val.getOperand(0), Op1 = Val.getOperand(1);
const auto &&Op0SDVals = simplifyAssumingCCVal(Op0, CC, DAG);
const auto &&Op1SDVals = simplifyAssumingCCVal(Op1, CC, DAG);
if (Op0SDVals.empty() || Op1SDVals.empty())
return {};
SmallVector<SDValue, 4> BinaryOpSDVals;
for (auto CCVal : {0, 1, 2, 3})
BinaryOpSDVals.emplace_back(DAG.getNode(
Opcode, DL, Val.getValueType(), Op0SDVals[CCVal], Op1SDVals[CCVal]));
return BinaryOpSDVals;
}
}
static bool combineCCMask(SDValue &CCReg, int &CCValid, int &CCMask,
SelectionDAG &DAG) {
// We have a SELECT_CCMASK or BR_CCMASK comparing the condition code
// set by the CCReg instruction using the CCValid / CCMask masks,
// If the CCReg instruction is itself a ICMP testing the condition
// If the CCReg instruction is itself a ICMP / TM testing the condition
// code set by some other instruction, see whether we can directly
// use that condition code.
// Verify that we have an ICMP against some constant.
if (CCValid != SystemZ::CCMASK_ICMP)
return false;
auto *ICmp = CCReg.getNode();
if (ICmp->getOpcode() != SystemZISD::ICMP)
return false;
auto *CompareLHS = ICmp->getOperand(0).getNode();
auto *CompareRHS = dyn_cast<ConstantSDNode>(ICmp->getOperand(1));
if (!CompareRHS)
auto *CCNode = CCReg.getNode();
if (!CCNode)
return false;
// Optimize the case where CompareLHS is a SELECT_CCMASK.
if (CompareLHS->getOpcode() == SystemZISD::SELECT_CCMASK) {
// Verify that we have an appropriate mask for a EQ or NE comparison.
bool Invert = false;
if (CCMask == SystemZ::CCMASK_CMP_NE)
Invert = !Invert;
else if (CCMask != SystemZ::CCMASK_CMP_EQ)
if (CCNode->getOpcode() == SystemZISD::TM) {
if (CCValid != SystemZ::CCMASK_TM)
return false;
// Verify that the ICMP compares against one of select values.
auto *TrueVal = dyn_cast<ConstantSDNode>(CompareLHS->getOperand(0));
if (!TrueVal)
auto emulateTMCCMask = [](const SDValue &Op0Val, const SDValue &Op1Val) {
auto *Op0Node = dyn_cast<ConstantSDNode>(Op0Val.getNode());
auto *Op1Node = dyn_cast<ConstantSDNode>(Op1Val.getNode());
if (!Op0Node || !Op1Node)
return -1;
auto Op0APVal = Op0Node->getAPIntValue();
auto Op1APVal = Op1Node->getAPIntValue();
auto Result = Op0APVal & Op1APVal;
bool AllOnes = Result == Op1APVal;
bool AllZeros = Result == 0;
bool IsLeftMostBitSet = Result[Op1APVal.getActiveBits()] != 0;
return AllZeros ? 0 : AllOnes ? 3 : IsLeftMostBitSet ? 2 : 1;
};
SDValue Op0 = CCNode->getOperand(0);
SDValue Op1 = CCNode->getOperand(1);
auto [Op0CC, Op0CCValid] = findCCUse(Op0);
if (Op0CC == SDValue())
return false;
auto *FalseVal = dyn_cast<ConstantSDNode>(CompareLHS->getOperand(1));
if (!FalseVal)
const auto &&Op0SDVals = simplifyAssumingCCVal(Op0, Op0CC, DAG);
const auto &&Op1SDVals = simplifyAssumingCCVal(Op1, Op0CC, DAG);
if (Op0SDVals.empty() || Op1SDVals.empty())
return false;
if (CompareRHS->getAPIntValue() == FalseVal->getAPIntValue())
Invert = !Invert;
else if (CompareRHS->getAPIntValue() != TrueVal->getAPIntValue())
return false;
// Compute the effective CC mask for the new branch or select.
auto *NewCCValid = dyn_cast<ConstantSDNode>(CompareLHS->getOperand(2));
auto *NewCCMask = dyn_cast<ConstantSDNode>(CompareLHS->getOperand(3));
if (!NewCCValid || !NewCCMask)
return false;
CCValid = NewCCValid->getZExtValue();
CCMask = NewCCMask->getZExtValue();
if (Invert)
CCMask ^= CCValid;
// Return the updated CCReg link.
CCReg = CompareLHS->getOperand(4);
int NewCCMask = 0;
for (auto CC : {0, 1, 2, 3}) {
auto CCVal = emulateTMCCMask(Op0SDVals[CC], Op1SDVals[CC]);
if (CCVal < 0)
return false;
NewCCMask <<= 1;
NewCCMask |= (CCMask & (1 << (3 - CCVal))) != 0;
}
NewCCMask &= Op0CCValid;
CCReg = Op0CC;
CCMask = NewCCMask;
CCValid = Op0CCValid;
return true;
}
if (CCNode->getOpcode() != SystemZISD::ICMP ||
CCValid != SystemZ::CCMASK_ICMP)
return false;
// Optimize the case where CompareRHS is (SRA (SHL (IPM))).
if (CompareLHS->getOpcode() == ISD::SRA) {
auto *SRACount = dyn_cast<ConstantSDNode>(CompareLHS->getOperand(1));
if (!SRACount || SRACount->getZExtValue() != 30)
return false;
auto *SHL = CompareLHS->getOperand(0).getNode();
if (SHL->getOpcode() != ISD::SHL)
return false;
auto *SHLCount = dyn_cast<ConstantSDNode>(SHL->getOperand(1));
if (!SHLCount || SHLCount->getZExtValue() != 30 - SystemZ::IPM_CC)
return false;
auto *IPM = SHL->getOperand(0).getNode();
if (IPM->getOpcode() != SystemZISD::IPM)
SDValue CmpOp0 = CCNode->getOperand(0);
SDValue CmpOp1 = CCNode->getOperand(1);
SDValue CmpOp2 = CCNode->getOperand(2);
auto [Op0CC, Op0CCValid] = findCCUse(CmpOp0);
if (Op0CC != SDValue()) {
const auto &&Op0SDVals = simplifyAssumingCCVal(CmpOp0, Op0CC, DAG);
const auto &&Op1SDVals = simplifyAssumingCCVal(CmpOp1, Op0CC, DAG);
if (Op0SDVals.empty() || Op1SDVals.empty())
return false;
// Avoid introducing CC spills (because SRA would clobber CC).
if (!CompareLHS->hasOneUse())
return false;
// Verify that the ICMP compares against zero.
if (CompareRHS->getZExtValue() != 0)
return false;
// Compute the effective CC mask for the new branch or select.
CCMask = SystemZ::reverseCCMask(CCMask);
// Return the updated CCReg link.
CCReg = IPM->getOperand(0);
auto *CmpType = dyn_cast<ConstantSDNode>(CmpOp2);
auto CmpTypeVal = CmpType->getZExtValue();
const auto compareCCSigned = [&CmpTypeVal](const SDValue &Op0Val,
const SDValue &Op1Val) {
auto *Op0Node = dyn_cast<ConstantSDNode>(Op0Val.getNode());
auto *Op1Node = dyn_cast<ConstantSDNode>(Op1Val.getNode());
if (!Op0Node || !Op1Node)
return -1;
auto Op0APVal = Op0Node->getAPIntValue();
auto Op1APVal = Op1Node->getAPIntValue();
if (CmpTypeVal == SystemZICMP::SignedOnly)
return Op0APVal == Op1APVal ? 0 : Op0APVal.slt(Op1APVal) ? 1 : 2;
return Op0APVal == Op1APVal ? 0 : Op0APVal.ult(Op1APVal) ? 1 : 2;
};
int NewCCMask = 0;
for (auto CC : {0, 1, 2, 3}) {
auto CCVal = compareCCSigned(Op0SDVals[CC], Op1SDVals[CC]);
if (CCVal < 0)
return false;
NewCCMask <<= 1;
NewCCMask |= (CCMask & (1 << (3 - CCVal))) != 0;
}
NewCCMask &= Op0CCValid;
CCMask = NewCCMask;
CCReg = Op0CC;
CCValid = Op0CCValid;
return true;
}
return false;
}
SDValue SystemZTargetLowering::combineBR_CCMASK(
SDNode *N, DAGCombinerInfo &DCI) const {
// Merging versus split in multiple branches cost.
TargetLoweringBase::CondMergingParams
SystemZTargetLowering::getJumpConditionMergingParams(Instruction::BinaryOps Opc,
const Value *Lhs,
const Value *Rhs) const {
const auto isFlagOutOpCC = [](const Value *V) {
using namespace llvm::PatternMatch;
const Value *RHSVal;
const APInt *RHSC;
if (const auto *I = dyn_cast<Instruction>(V)) {
// PatternMatch.h provides concise tree-based pattern match of llvm IR.
if (match(I->getOperand(0), m_And(m_Value(RHSVal), m_APInt(RHSC))) ||
match(I, m_Cmp(m_Value(RHSVal), m_APInt(RHSC)))) {
if (const auto *CB = dyn_cast<CallBase>(RHSVal)) {
if (CB->isInlineAsm()) {
const InlineAsm *IA = cast<InlineAsm>(CB->getCalledOperand());
return IA &&
IA->getConstraintString().find("{@cc}") != std::string::npos;
}
}
}
}
return false;
};
// Pattern (ICmp %asm) or (ICmp (And %asm)).
// Cost of longest dependency chain (ICmp, And) is 2. CostThreshold or
// BaseCost can be set >=2. If cost of instruction <= CostThreshold
// conditionals will be merged or else conditionals will be split.
if (isFlagOutOpCC(Lhs) && isFlagOutOpCC(Rhs))
return {3, 0, -1};
// Default.
return {-1, -1, -1};
}
SDValue SystemZTargetLowering::combineBR_CCMASK(SDNode *N,
DAGCombinerInfo &DCI) const {
SelectionDAG &DAG = DCI.DAG;
// Combine BR_CCMASK (ICMP (SELECT_CCMASK)) into a single BR_CCMASK.
@ -8824,8 +9009,7 @@ SDValue SystemZTargetLowering::combineBR_CCMASK(
int CCMaskVal = CCMask->getZExtValue();
SDValue Chain = N->getOperand(0);
SDValue CCReg = N->getOperand(4);
if (combineCCMask(CCReg, CCValidVal, CCMaskVal))
if (combineCCMask(CCReg, CCValidVal, CCMaskVal, DAG))
return DAG.getNode(SystemZISD::BR_CCMASK, SDLoc(N), N->getValueType(0),
Chain,
DAG.getTargetConstant(CCValidVal, SDLoc(N), MVT::i32),
@ -8848,16 +9032,80 @@ SDValue SystemZTargetLowering::combineSELECT_CCMASK(
int CCMaskVal = CCMask->getZExtValue();
SDValue CCReg = N->getOperand(4);
if (combineCCMask(CCReg, CCValidVal, CCMaskVal))
return DAG.getNode(SystemZISD::SELECT_CCMASK, SDLoc(N), N->getValueType(0),
N->getOperand(0), N->getOperand(1),
DAG.getTargetConstant(CCValidVal, SDLoc(N), MVT::i32),
DAG.getTargetConstant(CCMaskVal, SDLoc(N), MVT::i32),
CCReg);
bool IsCombinedCCReg = combineCCMask(CCReg, CCValidVal, CCMaskVal, DAG);
// Populate SDVals vector for each condition code ccval for given Val, which
// can again be another nested select_ccmask with the same CC.
const auto constructCCSDValsFromSELECT = [&CCReg](SDValue &Val) {
if (Val.getOpcode() == SystemZISD::SELECT_CCMASK) {
SmallVector<SDValue, 4> Res;
if (Val.getOperand(4) != CCReg)
return SmallVector<SDValue, 4>{};
SDValue TrueVal = Val.getOperand(0), FalseVal = Val.getOperand(1);
auto *CCMask = dyn_cast<ConstantSDNode>(Val.getOperand(3));
if (!CCMask)
return SmallVector<SDValue, 4>{};
int CCMaskVal = CCMask->getZExtValue();
for (auto &CC : {0, 1, 2, 3})
Res.emplace_back(((CCMaskVal & (1 << (3 - CC))) != 0) ? TrueVal
: FalseVal);
return Res;
}
return SmallVector<SDValue, 4>{Val, Val, Val, Val};
};
// Attempting to optimize TrueVal/FalseVal in outermost select_ccmask either
// with CCReg found by combineCCMask or original CCReg.
SDValue TrueVal = N->getOperand(0);
SDValue FalseVal = N->getOperand(1);
auto &&TrueSDVals = simplifyAssumingCCVal(TrueVal, CCReg, DAG);
auto &&FalseSDVals = simplifyAssumingCCVal(FalseVal, CCReg, DAG);
// TrueSDVals/FalseSDVals might be empty in case of non-constant
// TrueVal/FalseVal for select_ccmask, which can not be optimized further.
if (TrueSDVals.empty())
TrueSDVals = constructCCSDValsFromSELECT(TrueVal);
if (FalseSDVals.empty())
FalseSDVals = constructCCSDValsFromSELECT(FalseVal);
if (!TrueSDVals.empty() && !FalseSDVals.empty()) {
SmallSet<SDValue, 4> MergedSDValsSet;
// Ignoring CC values outside CCValiid.
for (auto CC : {0, 1, 2, 3}) {
if ((CCValidVal & ((1 << (3 - CC)))) != 0)
MergedSDValsSet.insert(((CCMaskVal & (1 << (3 - CC))) != 0)
? TrueSDVals[CC]
: FalseSDVals[CC]);
}
if (MergedSDValsSet.size() == 1)
return *MergedSDValsSet.begin();
if (MergedSDValsSet.size() == 2) {
auto BeginIt = MergedSDValsSet.begin();
SDValue NewTrueVal = *BeginIt, NewFalseVal = *next(BeginIt);
if (NewTrueVal == FalseVal || NewFalseVal == TrueVal)
std::swap(NewTrueVal, NewFalseVal);
int NewCCMask = 0;
for (auto CC : {0, 1, 2, 3}) {
NewCCMask <<= 1;
NewCCMask |= ((CCMaskVal & (1 << (3 - CC))) != 0)
? (TrueSDVals[CC] == NewTrueVal)
: (FalseSDVals[CC] == NewTrueVal);
}
CCMaskVal = NewCCMask;
CCMaskVal &= CCValidVal;
TrueVal = NewTrueVal;
FalseVal = NewFalseVal;
IsCombinedCCReg = true;
}
}
if (IsCombinedCCReg)
return DAG.getNode(
SystemZISD::SELECT_CCMASK, SDLoc(N), N->getValueType(0), TrueVal,
FalseVal, DAG.getTargetConstant(CCValidVal, SDLoc(N), MVT::i32),
DAG.getTargetConstant(CCMaskVal, SDLoc(N), MVT::i32), CCReg);
return SDValue();
}
SDValue SystemZTargetLowering::combineGET_CCMASK(
SDNode *N, DAGCombinerInfo &DCI) const {

View File

@ -533,6 +533,18 @@ public:
}
const char *getTargetNodeName(unsigned Opcode) const override;
// This function currently returns cost for srl/ipm/cc sequence for merging.
CondMergingParams
getJumpConditionMergingParams(Instruction::BinaryOps Opc, const Value *Lhs,
const Value *Rhs) const override;
// Handle Lowering flag assembly outputs.
SDValue LowerAsmOutputForConstraint(SDValue &Chain, SDValue &Flag,
const SDLoc &DL,
const AsmOperandInfo &Constraint,
SelectionDAG &DAG) const override;
std::pair<unsigned, const TargetRegisterClass *>
getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
StringRef Constraint, MVT VT) const override;

View File

@ -90,7 +90,7 @@ define i32 @test_tbegin_nofloat4(i32 %pad, ptr %ptr) {
; CHECK: tbegin 0, 65292
; CHECK: ipm %r2
; CHECK: srl %r2, 28
; CHECK: ciblh %r2, 2, 0(%r14)
; CHECK: bnhr %r14
; CHECK: mvhi 0(%r3), 0
; CHECK: br %r14
%res = call i32 @llvm.s390.tbegin.nofloat(ptr null, i32 65292)
@ -219,7 +219,7 @@ define i32 @test_tend2(i32 %pad, ptr %ptr) {
; CHECK: tend
; CHECK: ipm %r2
; CHECK: srl %r2, 28
; CHECK: ciblh %r2, 2, 0(%r14)
; CHECK: bnhr %r14
; CHECK: mvhi 0(%r3), 0
; CHECK: br %r14
%res = call i32 @llvm.s390.tend()

View File

@ -0,0 +1,738 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
; RUN: llc < %s -verify-machineinstrs -mtriple=s390x-linux-gnu -O2 | FileCheck %s
; Test implementation of combining br_ccmask for flag output operand, and
; optimizing ipm sequence using conditional branches.
declare void @dummy()
; Check a case where the cc is used as an integer.
; Just (srl (ipm)) sequence without optimization.
define i32 @test(ptr %a) {
; CHECK-LABEL: test:
; CHECK: # %bb.0:
; CHECK-NEXT: #APP
; CHECK-NEXT: alsi 0(%r2), -1
; CHECK-EMPTY:
; CHECK-NEXT: #NO_APP
; CHECK-NEXT: ipm %r2
; CHECK-NEXT: srl %r2, 28
; CHECK-NEXT: br %r14
%cc = tail call i32 asm sideeffect "alsi $1,-1\0A", "={@cc},=*QS,*QS,~{memory}"(ptr elementtype(i32) %a, ptr elementtype(i32) %a)
%tmp = icmp ult i32 %cc, 4
tail call void @llvm.assume(i1 %tmp)
ret i32 %cc
}
; Test-1(f1_0_*). Test all 14 valid combinations, where cc is being used for
; branching.
; Check (cc == 0).
define void @f1_0_eq_0(ptr %a) {
; CHECK-LABEL: f1_0_eq_0:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: #APP
; CHECK-NEXT: alsi 0(%r2), -1
; CHECK-EMPTY:
; CHECK-NEXT: #NO_APP
; CHECK-NEXT: jge dummy@PLT
; CHECK-NEXT: .LBB1_1: # %exit
; CHECK-NEXT: br %r14
entry:
%cc = tail call i32 asm sideeffect "alsi $1,-1\0A", "={@cc},=*QS,*QS,~{memory}"(ptr elementtype(i32) %a, ptr elementtype(i32) %a)
%tmp = icmp ult i32 %cc, 4
tail call void @llvm.assume(i1 %tmp)
%cmp = icmp eq i32 %cc, 0
br i1 %cmp, label %branch, label %exit
branch:
tail call void @dummy()
br label %exit
exit:
ret void
}
; Check (cc != 0).
define void @f1_0_ne_0(ptr %a) {
; CHECK-LABEL: f1_0_ne_0:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: #APP
; CHECK-NEXT: alsi 0(%r2), -1
; CHECK-EMPTY:
; CHECK-NEXT: #NO_APP
; CHECK-NEXT: jgne dummy@PLT
; CHECK-NEXT: .LBB2_1: # %exit
; CHECK-NEXT: br %r14
entry:
%cc = tail call i32 asm sideeffect "alsi $1,-1\0A", "={@cc},=*QS,*QS,~{memory}"(ptr elementtype(i32) %a, ptr elementtype(i32) %a)
%tmp = icmp ult i32 %cc, 4
tail call void @llvm.assume(i1 %tmp)
%cmp = icmp ugt i32 %cc, 0
br i1 %cmp, label %branch, label %exit
branch:
tail call void @dummy()
br label %exit
exit:
ret void
}
; Check (cc == 1).
define void @f1_0_eq_1(ptr %a) {
; CHECK-LABEL: f1_0_eq_1:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: #APP
; CHECK-NEXT: alsi 0(%r2), -1
; CHECK-EMPTY:
; CHECK-NEXT: #NO_APP
; CHECK-NEXT: jgl dummy@PLT
; CHECK-NEXT: .LBB3_1: # %exit
; CHECK-NEXT: br %r14
entry:
%cc = tail call i32 asm sideeffect "alsi $1,-1\0A", "={@cc},=*QS,*QS,~{memory}"(ptr elementtype(i32) %a, ptr elementtype(i32) %a)
%tmp = icmp ult i32 %cc, 4
tail call void @llvm.assume(i1 %tmp)
%cmp = icmp eq i32 %cc, 1
br i1 %cmp, label %branch, label %exit
branch:
tail call void @dummy()
br label %exit
exit:
ret void
}
; Check (cc != 1).
define void @f1_0_ne_1(ptr %a) {
; CHECK-LABEL: f1_0_ne_1:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: #APP
; CHECK-NEXT: alsi 0(%r2), -1
; CHECK-EMPTY:
; CHECK-NEXT: #NO_APP
; CHECK-NEXT: jgnl dummy@PLT
; CHECK-NEXT: .LBB4_1: # %exit
; CHECK-NEXT: br %r14
entry:
%cc = tail call i32 asm sideeffect "alsi $1,-1\0A", "={@cc},=*QS,*QS,~{memory}"(ptr elementtype(i32) %a, ptr elementtype(i32) %a)
%tmp = icmp ult i32 %cc, 4
tail call void @llvm.assume(i1 %tmp)
%cmp = icmp ne i32 %cc, 1
br i1 %cmp, label %branch, label %exit
branch:
tail call void @dummy()
br label %exit
exit:
ret void
}
; Check (cc == 2).
define void @f1_0_eq_2(ptr %a) {
; CHECK-LABEL: f1_0_eq_2:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: #APP
; CHECK-NEXT: alsi 0(%r2), -1
; CHECK-EMPTY:
; CHECK-NEXT: #NO_APP
; CHECK-NEXT: jgh dummy@PLT
; CHECK-NEXT: .LBB5_1: # %exit
; CHECK-NEXT: br %r14
entry:
%cc = tail call i32 asm sideeffect "alsi $1,-1\0A", "={@cc},=*QS,*QS,~{memory}"(ptr elementtype(i32) %a, ptr elementtype(i32) %a)
%tmp = icmp ult i32 %cc, 4
tail call void @llvm.assume(i1 %tmp)
%cmp = icmp eq i32 %cc, 2
br i1 %cmp, label %branch, label %exit
branch:
tail call void @dummy()
br label %exit
exit:
ret void
}
; Check (cc != 2).
define void @f1_0_ne_2(ptr %a) {
; CHECK-LABEL: f1_0_ne_2:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: #APP
; CHECK-NEXT: alsi 0(%r2), -1
; CHECK-EMPTY:
; CHECK-NEXT: #NO_APP
; CHECK-NEXT: jgnh dummy@PLT
; CHECK-NEXT: .LBB6_1: # %exit
; CHECK-NEXT: br %r14
entry:
%cc = tail call i32 asm sideeffect "alsi $1,-1\0A", "={@cc},=*QS,*QS,~{memory}"(ptr elementtype(i32) %a, ptr elementtype(i32) %a)
%tmp = icmp ult i32 %cc, 4
tail call void @llvm.assume(i1 %tmp)
%cmp = icmp ne i32 %cc, 2
br i1 %cmp, label %branch, label %exit
branch:
tail call void @dummy()
br label %exit
exit:
ret void
}
; Check (cc == 3).
define void @f1_0_eq_3(ptr %a) {
; CHECK-LABEL: f1_0_eq_3:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: #APP
; CHECK-NEXT: alsi 0(%r2), -1
; CHECK-EMPTY:
; CHECK-NEXT: #NO_APP
; CHECK-NEXT: jgo dummy@PLT
; CHECK-NEXT: .LBB7_1: # %exit
; CHECK-NEXT: br %r14
entry:
%cc = tail call i32 asm sideeffect "alsi $1,-1\0A", "={@cc},=*QS,*QS,~{memory}"(ptr elementtype(i32) %a, ptr elementtype(i32) %a)
%tmp = icmp ult i32 %cc, 4
tail call void @llvm.assume(i1 %tmp)
%cmp = icmp eq i32 %cc, 3
br i1 %cmp, label %branch, label %exit
branch:
tail call void @dummy()
br label %exit
exit:
ret void
}
; Check (cc != 3).
define void @f1_0_ne_3(ptr %a) {
; CHECK-LABEL: f1_0_ne_3:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: #APP
; CHECK-NEXT: alsi 0(%r2), -1
; CHECK-EMPTY:
; CHECK-NEXT: #NO_APP
; CHECK-NEXT: jgno dummy@PLT
; CHECK-NEXT: .LBB8_1: # %exit
; CHECK-NEXT: br %r14
entry:
%cc = tail call i32 asm sideeffect "alsi $1,-1\0A", "={@cc},=*QS,*QS,~{memory}"(ptr elementtype(i32) %a, ptr elementtype(i32) %a)
%tmp = icmp ult i32 %cc, 4
tail call void @llvm.assume(i1 %tmp)
%cmp = icmp ult i32 %cc, 3
br i1 %cmp, label %branch, label %exit
branch:
tail call void @dummy()
br label %exit
exit:
ret void
}
; Check (cc == 0|1).
define void @f1_0_01(ptr %a) {
; CHECK-LABEL: f1_0_01:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: #APP
; CHECK-NEXT: alsi 0(%r2), -1
; CHECK-EMPTY:
; CHECK-NEXT: #NO_APP
; CHECK-NEXT: jgle dummy@PLT
; CHECK-NEXT: .LBB9_1: # %exit
; CHECK-NEXT: br %r14
entry:
%cc = tail call i32 asm sideeffect "alsi $1,-1\0A", "={@cc},=*QS,*QS,~{memory}"(ptr elementtype(i32) %a, ptr elementtype(i32) %a)
%tmp = icmp ult i32 %cc, 4
tail call void @llvm.assume(i1 %tmp)
%cmp = icmp ult i32 %cc, 2
br i1 %cmp, label %branch, label %exit
branch:
tail call void @dummy()
br label %exit
exit:
ret void
}
; Check (cc == 0|2).
define void @f1_0_02(ptr %a) {
; CHECK-LABEL: f1_0_02:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: #APP
; CHECK-NEXT: alsi 0(%r2), -1
; CHECK-EMPTY:
; CHECK-NEXT: #NO_APP
; CHECK-NEXT: jghe dummy@PLT
; CHECK-NEXT: .LBB10_1: # %exit
; CHECK-NEXT: br %r14
entry:
%cc = tail call i32 asm sideeffect "alsi $1,-1\0A", "={@cc},=*QS,*QS,~{memory}"(ptr elementtype(i32) %a, ptr elementtype(i32) %a)
%tmp = icmp ult i32 %cc, 4
tail call void @llvm.assume(i1 %tmp)
%and = and i32 %cc, 1
%cmp = icmp eq i32 %and, 0
br i1 %cmp, label %branch, label %exit
branch:
tail call void @dummy()
br label %exit
exit:
ret void
}
; Check (cc == 0|3).
define void @f1_0_03(ptr %a) {
; CHECK-LABEL: f1_0_03:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: #APP
; CHECK-NEXT: alsi 0(%r2), -1
; CHECK-EMPTY:
; CHECK-NEXT: #NO_APP
; CHECK-NEXT: jgnlh dummy@PLT
; CHECK-NEXT: .LBB11_1: # %exit
; CHECK-NEXT: br %r14
entry:
%cc = tail call i32 asm sideeffect "alsi $1,-1\0A", "={@cc},=*QS,*QS,~{memory}"(ptr elementtype(i32) %a, ptr elementtype(i32) %a)
%tmp = icmp ult i32 %cc, 4
tail call void @llvm.assume(i1 %tmp)
%cmp0 = icmp ne i32 %cc, 0
%cmp3 = icmp ne i32 %cc, 3
%cmp.inv = and i1 %cmp0, %cmp3
br i1 %cmp.inv, label %exit, label %branch
branch:
tail call void @dummy()
br label %exit
exit:
ret void
}
; Check (cc == 1|2).
define void @f1_0_12(ptr %a) {
; CHECK-LABEL: f1_0_12:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: #APP
; CHECK-NEXT: alsi 0(%r2), -1
; CHECK-EMPTY:
; CHECK-NEXT: #NO_APP
; CHECK-NEXT: jglh dummy@PLT
; CHECK-NEXT: .LBB12_1: # %exit
; CHECK-NEXT: br %r14
entry:
%cc = tail call i32 asm sideeffect "alsi $1,-1\0A", "={@cc},=*QS,*QS,~{memory}"(ptr elementtype(i32) %a, ptr elementtype(i32) %a)
%tmp = icmp ult i32 %cc, 4
tail call void @llvm.assume(i1 %tmp)
%cmpeq1 = icmp eq i32 %cc, 1
%cmpeq2 = icmp eq i32 %cc, 2
%cmp = or i1 %cmpeq1, %cmpeq2
br i1 %cmp, label %branch, label %exit
branch:
tail call void @dummy()
br label %exit
exit:
ret void
}
; Check (cc == 1|3).
define void @f1_0_13(ptr %a) {
; CHECK-LABEL: f1_0_13:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: #APP
; CHECK-NEXT: alsi 0(%r2), -1
; CHECK-EMPTY:
; CHECK-NEXT: #NO_APP
; CHECK-NEXT: jgnhe dummy@PLT
; CHECK-NEXT: .LBB13_1: # %exit
; CHECK-NEXT: br %r14
entry:
%cc = tail call i32 asm sideeffect "alsi $1,-1\0A", "={@cc},=*QS,*QS,~{memory}"(ptr elementtype(i32) %a, ptr elementtype(i32) %a)
%tmp = icmp ult i32 %cc, 4
tail call void @llvm.assume(i1 %tmp)
%cmpeq1 = icmp eq i32 %cc, 1
%cmpeq3 = icmp eq i32 %cc, 3
%cmp = or i1 %cmpeq1, %cmpeq3
br i1 %cmp, label %branch, label %exit
branch:
tail call void @dummy()
br label %exit
exit:
ret void
}
; Check (cc == 2|3).
define void @f1_0_23(ptr %a) {
; CHECK-LABEL: f1_0_23:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: #APP
; CHECK-NEXT: alsi 0(%r2), -1
; CHECK-EMPTY:
; CHECK-NEXT: #NO_APP
; CHECK-NEXT: jgnle dummy@PLT
; CHECK-NEXT: .LBB14_1: # %exit
; CHECK-NEXT: br %r14
entry:
%cc = tail call i32 asm sideeffect "alsi $1,-1\0A", "={@cc},=*QS,*QS,~{memory}"(ptr elementtype(i32) %a, ptr elementtype(i32) %a)
%tmp = icmp ult i32 %cc, 4
tail call void @llvm.assume(i1 %tmp)
%cmp = icmp ugt i32 %cc, 1
br i1 %cmp, label %branch, label %exit
branch:
tail call void @dummy()
br label %exit
exit:
ret void
}
; Test-2(f1_1_*/f1_2_*/fl_3_*/f1_4_*).
; Test Mixed patterns involving Binary Ops.
; Check 'add' for (cc != 0).
define void @f1_1_1(ptr %a) {
; CHECK-LABEL: f1_1_1:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: #APP
; CHECK-NEXT: alsi 0(%r2), -1
; CHECK-EMPTY:
; CHECK-NEXT: #NO_APP
; CHECK-NEXT: jgne dummy@PLT
; CHECK-NEXT: .LBB15_1: # %exit
; CHECK-NEXT: br %r14
entry:
%cc = tail call i32 asm sideeffect "alsi $1,-1\0A", "={@cc},=*QS,*QS,~{memory}"(ptr elementtype(i32) %a, ptr elementtype(i32) %a)
%tmp = icmp ult i32 %cc, 4
tail call void @llvm.assume(i1 %tmp)
%add = add nsw i32 %cc, -1
%cmp = icmp ult i32 %add, 3
br i1 %cmp, label %branch, label %exit
branch:
tail call void @dummy()
br label %exit
exit:
ret void
}
; Check 'add' for (cc == 1|2).
define void @f1_1_2(ptr %a) {
; CHECK-LABEL: f1_1_2:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: #APP
; CHECK-NEXT: alsi 0(%r2), -1
; CHECK-EMPTY:
; CHECK-NEXT: #NO_APP
; CHECK-NEXT: jglh dummy@PLT
; CHECK-NEXT: .LBB16_1: # %exit
; CHECK-NEXT: br %r14
entry:
%cc = tail call i32 asm sideeffect "alsi $1,-1\0A", "={@cc},=*QS,*QS,~{memory}"(ptr elementtype(i32) %a, ptr elementtype(i32) %a)
%tmp = icmp ult i32 %cc, 4
tail call void @llvm.assume(i1 %tmp)
%add = add nsw i32 %cc, -1
%cmp = icmp ult i32 %add, 2
br i1 %cmp, label %branch, label %exit
branch:
tail call void @dummy()
br label %exit
exit:
ret void
}
; Check 'add' for (cc == 1|2).
define void @f1_1_3(ptr %a) {
; CHECK-LABEL: f1_1_3:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: #APP
; CHECK-NEXT: alsi 0(%r2), -1
; CHECK-EMPTY:
; CHECK-NEXT: #NO_APP
; CHECK-NEXT: jglh dummy@PLT
; CHECK-NEXT: .LBB17_1: # %exit
; CHECK-NEXT: br %r14
entry:
%cc = tail call i32 asm sideeffect "alsi $1,-1\0A", "={@cc},=*QS,*QS,~{memory}"(ptr elementtype(i32) %a, ptr elementtype(i32) %a)
%tmp = icmp ult i32 %cc, 4
tail call void @llvm.assume(i1 %tmp)
%add = add nsw i32 %cc, -3
%cmp.inv = icmp ult i32 %add, -2
br i1 %cmp.inv, label %exit, label %branch
branch:
tail call void @dummy()
br label %exit
exit:
ret void
}
; Check 'and' with one operand cc and other select_ccmask(cc !=1).
define void @f1_2_1(ptr %a) {
; CHECK-LABEL: f1_2_1:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: #APP
; CHECK-NEXT: alsi 0(%r2), -1
; CHECK-EMPTY:
; CHECK-NEXT: #NO_APP
; CHECK-NEXT: jgnl dummy@PLT
; CHECK-NEXT: .LBB18_1: # %exit
; CHECK-NEXT: br %r14
entry:
%cc = tail call i32 asm sideeffect "alsi $1,-1\0A", "={@cc},=*QS,*QS,~{memory}"(ptr elementtype(i32) %a, ptr elementtype(i32) %a)
%tmp = icmp ult i32 %cc, 4
tail call void @llvm.assume(i1 %tmp)
%andcc = and i32 %cc, 1
%cmpne0 = icmp ne i32 %andcc, 0
%cmpne3 = icmp ne i32 %cc, 3
%cmp.inv = and i1 %cmpne3, %cmpne0
br i1 %cmp.inv, label %exit, label %branch
branch:
tail call void @dummy()
br label %exit
exit:
ret void
}
; Check 'and' with both operands select_ccmask(cc != 2).
define void @f1_2_2(ptr %a) {
; CHECK-LABEL: f1_2_2:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: #APP
; CHECK-NEXT: alsi 0(%r2), -1
; CHECK-EMPTY:
; CHECK-NEXT: #NO_APP
; CHECK-NEXT: jgnh dummy@PLT
; CHECK-NEXT: .LBB19_1: # %exit
; CHECK-NEXT: br %r14
entry:
%cc = tail call i32 asm sideeffect "alsi $1,-1\0A", "={@cc},=*QS,*QS,~{memory}"(ptr elementtype(i32) %a, ptr elementtype(i32) %a)
%tmp = icmp ult i32 %cc, 4
tail call void @llvm.assume(i1 %tmp)
%ugt1 = icmp samesign ugt i32 %cc, 1
%cmpne3 = icmp ne i32 %cc, 3
%and.cond.inv = and i1 %ugt1, %cmpne3
br i1 %and.cond.inv, label %exit, label %branch
branch:
tail call void @dummy()
br label %exit
exit:
ret void
}
; Check 'and/tm' for (cc == 0|2).
define void @f1_2_3(ptr %a) {
; CHECK-LABEL: f1_2_3:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: #APP
; CHECK-NEXT: alsi 0(%r2), -1
; CHECK-EMPTY:
; CHECK-NEXT: #NO_APP
; CHECK-NEXT: jghe dummy@PLT
; CHECK-NEXT: .LBB20_1: # %exit
; CHECK-NEXT: br %r14
entry:
%cc = tail call i32 asm sideeffect "alsi $1,-1\0A", "={@cc},=*QS,*QS,~{memory}"(ptr elementtype(i32) %a, ptr elementtype(i32) %a)
%tmp = icmp ult i32 %cc, 4
tail call void @llvm.assume(i1 %tmp)
%and = and i32 %cc, 1
%cmp = icmp eq i32 %and, 0
br i1 %cmp, label %branch, label %exit
branch:
tail call void @dummy()
br label %exit
exit:
ret void
}
; Check 'and/tm' for (cc == 1|3).
define void @f1_2_4(ptr %a) {
; CHECK-LABEL: f1_2_4:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: #APP
; CHECK-NEXT: alsi 0(%r2), -1
; CHECK-EMPTY:
; CHECK-NEXT: #NO_APP
; CHECK-NEXT: jgnhe dummy@PLT
; CHECK-NEXT: .LBB21_1: # %exit
; CHECK-NEXT: br %r14
entry:
%cc = tail call i32 asm sideeffect "alsi $1,-1\0A", "={@cc},=*QS,*QS,~{memory}"(ptr elementtype(i32) %a, ptr elementtype(i32) %a)
%tmp = icmp ult i32 %cc, 4
tail call void @llvm.assume(i1 %tmp)
%and = and i32 %cc, 1
%cmp = icmp eq i32 %and, 0
br i1 %cmp, label %exit, label %branch
branch:
tail call void @dummy()
br label %exit
exit:
ret void
}
; Check 'icmp' with one operand 'and' and other 'select_ccmask'(cc != 1).
define void @f1_2_5(ptr %a) {
; CHECK-LABEL: f1_2_5:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: #APP
; CHECK-NEXT: alsi 0(%r2), -1
; CHECK-EMPTY:
; CHECK-NEXT: #NO_APP
; CHECK-NEXT: jgnl dummy@PLT
; CHECK-NEXT: .LBB22_1: # %exit
; CHECK-NEXT: br %r14
entry:
%cc = tail call i32 asm sideeffect "alsi $1,-1\0A", "={@cc},=*QS,*QS,~{memory}"(ptr elementtype(i32) %a, ptr elementtype(i32) %a)
%tmp = icmp ult i32 %cc, 4
tail call void @llvm.assume(i1 %tmp)
%trunc = trunc i32 %cc to i1
%cmpne3 = icmp ne i32 %cc, 3
%cmp = xor i1 %cmpne3, %trunc
br i1 %cmp, label %branch, label %exit
branch:
tail call void @dummy()
br label %exit
exit:
ret void
}
; Check nested 'xor' cc with select_ccmask(cc != 1).
define void @f1_3_1(ptr %a) {
; CHECK-LABEL: f1_3_1:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: #APP
; CHECK-NEXT: alsi 0(%r2), -1
; CHECK-EMPTY:
; CHECK-NEXT: #NO_APP
; CHECK-NEXT: jgnl dummy@PLT
; CHECK-NEXT: .LBB23_1: # %exit
; CHECK-NEXT: br %r14
entry:
%cc = tail call i32 asm sideeffect "alsi $1,-1\0A", "={@cc},=*QS,*QS,~{memory}"(ptr elementtype(i32) %a, ptr elementtype(i32) %a)
%tmp = icmp ult i32 %cc, 4
tail call void @llvm.assume(i1 %tmp)
%cmpeq0 = icmp eq i32 %cc, 0
%cmpeq2 = icmp eq i32 %cc, 2
%xor = xor i1 %cmpeq0, %cmpeq2
%cmpne3 = icmp ne i32 %cc, 3
%cmp.inv = xor i1 %cmpne3, %xor
br i1 %cmp.inv, label %exit, label %branch
branch:
tail call void @dummy()
br label %exit
exit:
ret void
}
; Check branching on 'tm' and 'xor' with one operand cc and the other
; select_ccmask(cc !=1).
define void @f1_3_2(ptr %a) {
; CHECK-LABEL: f1_3_2:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: #APP
; CHECK-NEXT: alsi 0(%r2), -1
; CHECK-EMPTY:
; CHECK-NEXT: #NO_APP
; CHECK-NEXT: jgnl dummy@PLT
; CHECK-NEXT: .LBB24_1: # %exit
; CHECK-NEXT: br %r14
entry:
%cc = tail call i32 asm sideeffect "alsi $1,-1\0A", "={@cc},=*QS,*QS,~{memory}"(ptr elementtype(i32) %a, ptr elementtype(i32) %a)
%tmp = icmp ult i32 %cc, 4
tail call void @llvm.assume(i1 %tmp)
%trunc = trunc i32 %cc to i1
%cmpeq3 = icmp eq i32 %cc, 3
%cmp.inv = xor i1 %cmpeq3, %trunc
br i1 %cmp.inv, label %exit, label %branch
branch:
tail call void @dummy()
br label %exit
exit:
ret void
}
; Check branching on 'tm' and 'xor' with one operand cc and the other
; select_ccmask(cc !=2).
define void @f1_3_3(ptr %a) {
; CHECK-LABEL: f1_3_3:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: #APP
; CHECK-NEXT: alsi 0(%r2), -1
; CHECK-EMPTY:
; CHECK-NEXT: #NO_APP
; CHECK-NEXT: jgnh dummy@PLT
; CHECK-NEXT: .LBB25_1: # %exit
; CHECK-NEXT: br %r14
entry:
%cc = tail call i32 asm sideeffect "alsi $1,-1\0A", "={@cc},=*QS,*QS,~{memory}"(ptr elementtype(i32) %a, ptr elementtype(i32) %a)
%tmp = icmp ult i32 %cc, 4
tail call void @llvm.assume(i1 %tmp)
%trunc = trunc i32 %cc to i1
%cmpne0 = icmp ne i32 %cc, 0
%cmp.cond.inv = xor i1 %cmpne0, %trunc
br i1 %cmp.cond.inv, label %exit, label %branch
branch:
tail call void @dummy()
br label %exit
exit:
ret void
}
; Check 'or' with both operands are select_ccmask one with TM and other with
; ICMP(cc == 1).
define void @f1_4_1(ptr %a) {
; CHECK-LABEL: f1_4_1:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: #APP
; CHECK-NEXT: alsi 0(%r2), -1
; CHECK-EMPTY:
; CHECK-NEXT: #NO_APP
; CHECK-NEXT: jgl dummy@PLT
; CHECK-NEXT: .LBB26_1: # %exit
; CHECK-NEXT: br %r14
entry:
%cc = tail call i32 asm sideeffect "alsi $1,-1\0A", "={@cc},=*QS,*QS,~{memory}"(ptr elementtype(i32) %a, ptr elementtype(i32) %a)
%tmp = icmp ult i32 %cc, 4
tail call void @llvm.assume(i1 %tmp)
%andcc = and i32 %cc, 1
%cmpeq0 = icmp eq i32 %andcc, 0
%cmpeq3 = icmp eq i32 %cc, 3
%cmp.cond.inv = or i1 %cmpeq3, %cmpeq0
br i1 %cmp.cond.inv, label %exit, label %branch
branch:
tail call void @dummy()
br label %exit
exit:
ret void
}
; Check 'or' for (cc == 0|1).
define void @f1_4_2(ptr %a) {
; CHECK-LABEL: f1_4_2:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: #APP
; CHECK-NEXT: alsi 0(%r2), -1
; CHECK-EMPTY:
; CHECK-NEXT: #NO_APP
; CHECK-NEXT: jgle dummy@PLT
; CHECK-NEXT: .LBB27_1: # %exit
; CHECK-NEXT: br %r14
entry:
%cc = tail call i32 asm sideeffect "alsi $1,-1\0A", "={@cc},=*QS,*QS,~{memory}"(ptr elementtype(i32) %a, ptr elementtype(i32) %a)
%tmp = icmp ult i32 %cc, 4
tail call void @llvm.assume(i1 %tmp)
%or = or disjoint i32 %cc, -4
%cmp.inv = icmp samesign ugt i32 %or, -3
br i1 %cmp.inv, label %exit, label %branch
branch:
tail call void @dummy()
br label %exit
exit:
ret void
}
; Check 'or' for (cc == 0|1).
define void @f1_4_3(ptr %a) {
; CHECK-LABEL: f1_4_3:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: #APP
; CHECK-NEXT: alsi 0(%r2), -1
; CHECK-EMPTY:
; CHECK-NEXT: #NO_APP
; CHECK-NEXT: jgle dummy@PLT
; CHECK-NEXT: .LBB28_1: # %exit
; CHECK-NEXT: br %r14
entry:
%cc = tail call i32 asm sideeffect "alsi $1,-1\0A", "={@cc},=*QS,*QS,~{memory}"(ptr elementtype(i32) %a, ptr elementtype(i32) %a)
%tmp = icmp ult i32 %cc, 4
tail call void @llvm.assume(i1 %tmp)
%or = or disjoint i32 %cc, -4
%cmp = icmp samesign ult i32 %or, -2
br i1 %cmp, label %branch, label %exit
branch:
tail call void @dummy()
br label %exit
exit:
ret void
}

File diff suppressed because it is too large Load Diff