llvm-project/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp
Slava Zakharin c0c71463f6
[InstCombine] Optimize sub(sext(add(x,y)),sext(add(x,z))). (#144174)
This pattern can be often met in Flang generated LLVM IR,
for example, for the counts of the loops generated for array
expressions like: `a(x:x+y)` or `a(x+z:x+z)` or their variations.

In order to compute the loop count, Flang needs to subtract
the lower bound of the array slice from the upper bound
of the array slice. To avoid the sign wraps, it sign extends
the original values (that may be of any user data type)
to `i64`.

This peephole is really helpful in CPU2017/548.exchange2,
where we have multiple following statements like this:
```
block(row+1:row+2, 7:9, i7) = block(row+1:row+2, 7:9, i7) - 10
```

While this is just a 2x3 iterations loop nest, LLVM cannot
figure it out, ending up vectorizing the inner loop really
hard (with a vector epilog and scalar remainder). This, in turn,
causes problems for LSR that ends up creating too many loop-carried
values in the loop containing the above statement, which are then
causing too many spills/reloads.

Alive2: https://alive2.llvm.org/ce/z/gLgfYX

Related to #143219.
2025-06-19 10:13:58 -07:00

2563 lines
97 KiB
C++

//===- InstCombineMulDivRem.cpp -------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
// This file implements the visit functions for mul, fmul, sdiv, udiv, fdiv,
// srem, urem, frem.
//
//===----------------------------------------------------------------------===//
#include "InstCombineInternal.h"
#include "llvm/ADT/APInt.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/Analysis/InstructionSimplify.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/IR/BasicBlock.h"
#include "llvm/IR/Constant.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/InstrTypes.h"
#include "llvm/IR/Instruction.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/Intrinsics.h"
#include "llvm/IR/Operator.h"
#include "llvm/IR/PatternMatch.h"
#include "llvm/IR/Type.h"
#include "llvm/IR/Value.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Transforms/InstCombine/InstCombiner.h"
#include "llvm/Transforms/Utils/BuildLibCalls.h"
#include <cassert>
#define DEBUG_TYPE "instcombine"
#include "llvm/Transforms/Utils/InstructionWorklist.h"
using namespace llvm;
using namespace PatternMatch;
/// The specific integer value is used in a context where it is known to be
/// non-zero. If this allows us to simplify the computation, do so and return
/// the new operand, otherwise return null.
static Value *simplifyValueKnownNonZero(Value *V, InstCombinerImpl &IC,
Instruction &CxtI) {
// If V has multiple uses, then we would have to do more analysis to determine
// if this is safe. For example, the use could be in dynamically unreached
// code.
if (!V->hasOneUse()) return nullptr;
bool MadeChange = false;
// ((1 << A) >>u B) --> (1 << (A-B))
// Because V cannot be zero, we know that B is less than A.
Value *A = nullptr, *B = nullptr, *One = nullptr;
if (match(V, m_LShr(m_OneUse(m_Shl(m_Value(One), m_Value(A))), m_Value(B))) &&
match(One, m_One())) {
A = IC.Builder.CreateSub(A, B);
return IC.Builder.CreateShl(One, A);
}
// (PowerOfTwo >>u B) --> isExact since shifting out the result would make it
// inexact. Similarly for <<.
BinaryOperator *I = dyn_cast<BinaryOperator>(V);
if (I && I->isLogicalShift() &&
IC.isKnownToBeAPowerOfTwo(I->getOperand(0), false, &CxtI)) {
// We know that this is an exact/nuw shift and that the input is a
// non-zero context as well.
if (Value *V2 = simplifyValueKnownNonZero(I->getOperand(0), IC, CxtI)) {
IC.replaceOperand(*I, 0, V2);
MadeChange = true;
}
if (I->getOpcode() == Instruction::LShr && !I->isExact()) {
I->setIsExact();
MadeChange = true;
}
if (I->getOpcode() == Instruction::Shl && !I->hasNoUnsignedWrap()) {
I->setHasNoUnsignedWrap();
MadeChange = true;
}
}
// TODO: Lots more we could do here:
// If V is a phi node, we can call this on each of its operands.
// "select cond, X, 0" can simplify to "X".
return MadeChange ? V : nullptr;
}
// TODO: This is a specific form of a much more general pattern.
// We could detect a select with any binop identity constant, or we
// could use SimplifyBinOp to see if either arm of the select reduces.
// But that needs to be done carefully and/or while removing potential
// reverse canonicalizations as in InstCombiner::foldSelectIntoOp().
static Value *foldMulSelectToNegate(BinaryOperator &I,
InstCombiner::BuilderTy &Builder) {
Value *Cond, *OtherOp;
// mul (select Cond, 1, -1), OtherOp --> select Cond, OtherOp, -OtherOp
// mul OtherOp, (select Cond, 1, -1) --> select Cond, OtherOp, -OtherOp
if (match(&I, m_c_Mul(m_OneUse(m_Select(m_Value(Cond), m_One(), m_AllOnes())),
m_Value(OtherOp)))) {
bool HasAnyNoWrap = I.hasNoSignedWrap() || I.hasNoUnsignedWrap();
Value *Neg = Builder.CreateNeg(OtherOp, "", HasAnyNoWrap);
return Builder.CreateSelect(Cond, OtherOp, Neg);
}
// mul (select Cond, -1, 1), OtherOp --> select Cond, -OtherOp, OtherOp
// mul OtherOp, (select Cond, -1, 1) --> select Cond, -OtherOp, OtherOp
if (match(&I, m_c_Mul(m_OneUse(m_Select(m_Value(Cond), m_AllOnes(), m_One())),
m_Value(OtherOp)))) {
bool HasAnyNoWrap = I.hasNoSignedWrap() || I.hasNoUnsignedWrap();
Value *Neg = Builder.CreateNeg(OtherOp, "", HasAnyNoWrap);
return Builder.CreateSelect(Cond, Neg, OtherOp);
}
// fmul (select Cond, 1.0, -1.0), OtherOp --> select Cond, OtherOp, -OtherOp
// fmul OtherOp, (select Cond, 1.0, -1.0) --> select Cond, OtherOp, -OtherOp
if (match(&I, m_c_FMul(m_OneUse(m_Select(m_Value(Cond), m_SpecificFP(1.0),
m_SpecificFP(-1.0))),
m_Value(OtherOp))))
return Builder.CreateSelectFMF(Cond, OtherOp,
Builder.CreateFNegFMF(OtherOp, &I), &I);
// fmul (select Cond, -1.0, 1.0), OtherOp --> select Cond, -OtherOp, OtherOp
// fmul OtherOp, (select Cond, -1.0, 1.0) --> select Cond, -OtherOp, OtherOp
if (match(&I, m_c_FMul(m_OneUse(m_Select(m_Value(Cond), m_SpecificFP(-1.0),
m_SpecificFP(1.0))),
m_Value(OtherOp))))
return Builder.CreateSelectFMF(Cond, Builder.CreateFNegFMF(OtherOp, &I),
OtherOp, &I);
return nullptr;
}
/// Reduce integer multiplication patterns that contain a (+/-1 << Z) factor.
/// Callers are expected to call this twice to handle commuted patterns.
static Value *foldMulShl1(BinaryOperator &Mul, bool CommuteOperands,
InstCombiner::BuilderTy &Builder) {
Value *X = Mul.getOperand(0), *Y = Mul.getOperand(1);
if (CommuteOperands)
std::swap(X, Y);
const bool HasNSW = Mul.hasNoSignedWrap();
const bool HasNUW = Mul.hasNoUnsignedWrap();
// X * (1 << Z) --> X << Z
Value *Z;
if (match(Y, m_Shl(m_One(), m_Value(Z)))) {
bool PropagateNSW = HasNSW && cast<ShlOperator>(Y)->hasNoSignedWrap();
return Builder.CreateShl(X, Z, Mul.getName(), HasNUW, PropagateNSW);
}
// Similar to above, but an increment of the shifted value becomes an add:
// X * ((1 << Z) + 1) --> (X * (1 << Z)) + X --> (X << Z) + X
// This increases uses of X, so it may require a freeze, but that is still
// expected to be an improvement because it removes the multiply.
BinaryOperator *Shift;
if (match(Y, m_OneUse(m_Add(m_BinOp(Shift), m_One()))) &&
match(Shift, m_OneUse(m_Shl(m_One(), m_Value(Z))))) {
bool PropagateNSW = HasNSW && Shift->hasNoSignedWrap();
Value *FrX = X;
if (!isGuaranteedNotToBeUndef(X))
FrX = Builder.CreateFreeze(X, X->getName() + ".fr");
Value *Shl = Builder.CreateShl(FrX, Z, "mulshl", HasNUW, PropagateNSW);
return Builder.CreateAdd(Shl, FrX, Mul.getName(), HasNUW, PropagateNSW);
}
// Similar to above, but a decrement of the shifted value is disguised as
// 'not' and becomes a sub:
// X * (~(-1 << Z)) --> X * ((1 << Z) - 1) --> (X << Z) - X
// This increases uses of X, so it may require a freeze, but that is still
// expected to be an improvement because it removes the multiply.
if (match(Y, m_OneUse(m_Not(m_OneUse(m_Shl(m_AllOnes(), m_Value(Z))))))) {
Value *FrX = X;
if (!isGuaranteedNotToBeUndef(X))
FrX = Builder.CreateFreeze(X, X->getName() + ".fr");
Value *Shl = Builder.CreateShl(FrX, Z, "mulshl");
return Builder.CreateSub(Shl, FrX, Mul.getName());
}
return nullptr;
}
Instruction *InstCombinerImpl::visitMul(BinaryOperator &I) {
Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
if (Value *V =
simplifyMulInst(Op0, Op1, I.hasNoSignedWrap(), I.hasNoUnsignedWrap(),
SQ.getWithInstruction(&I)))
return replaceInstUsesWith(I, V);
if (SimplifyAssociativeOrCommutative(I))
return &I;
if (Instruction *X = foldVectorBinop(I))
return X;
if (Instruction *Phi = foldBinopWithPhiOperands(I))
return Phi;
if (Value *V = foldUsingDistributiveLaws(I))
return replaceInstUsesWith(I, V);
Type *Ty = I.getType();
const unsigned BitWidth = Ty->getScalarSizeInBits();
const bool HasNSW = I.hasNoSignedWrap();
const bool HasNUW = I.hasNoUnsignedWrap();
// X * -1 --> 0 - X
if (match(Op1, m_AllOnes())) {
return HasNSW ? BinaryOperator::CreateNSWNeg(Op0)
: BinaryOperator::CreateNeg(Op0);
}
// Also allow combining multiply instructions on vectors.
{
Value *NewOp;
Constant *C1, *C2;
const APInt *IVal;
if (match(&I, m_Mul(m_Shl(m_Value(NewOp), m_ImmConstant(C2)),
m_ImmConstant(C1))) &&
match(C1, m_APInt(IVal))) {
// ((X << C2)*C1) == (X * (C1 << C2))
Constant *Shl =
ConstantFoldBinaryOpOperands(Instruction::Shl, C1, C2, DL);
assert(Shl && "Constant folding of immediate constants failed");
BinaryOperator *Mul = cast<BinaryOperator>(I.getOperand(0));
BinaryOperator *BO = BinaryOperator::CreateMul(NewOp, Shl);
if (HasNUW && Mul->hasNoUnsignedWrap())
BO->setHasNoUnsignedWrap();
if (HasNSW && Mul->hasNoSignedWrap() && Shl->isNotMinSignedValue())
BO->setHasNoSignedWrap();
return BO;
}
if (match(&I, m_Mul(m_Value(NewOp), m_Constant(C1)))) {
// Replace X*(2^C) with X << C, where C is either a scalar or a vector.
if (Constant *NewCst = ConstantExpr::getExactLogBase2(C1)) {
BinaryOperator *Shl = BinaryOperator::CreateShl(NewOp, NewCst);
if (HasNUW)
Shl->setHasNoUnsignedWrap();
if (HasNSW) {
const APInt *V;
if (match(NewCst, m_APInt(V)) && *V != V->getBitWidth() - 1)
Shl->setHasNoSignedWrap();
}
return Shl;
}
}
}
// mul (shr exact X, N), (2^N + 1) -> add (X, shr exact (X, N))
{
Value *NewOp;
const APInt *ShiftC;
const APInt *MulAP;
if (BitWidth > 2 &&
match(&I, m_Mul(m_Exact(m_Shr(m_Value(NewOp), m_APInt(ShiftC))),
m_APInt(MulAP))) &&
(*MulAP - 1).isPowerOf2() && *ShiftC == MulAP->logBase2()) {
Value *BinOp = Op0;
BinaryOperator *OpBO = cast<BinaryOperator>(Op0);
// mul nuw (ashr exact X, N) -> add nuw (X, lshr exact (X, N))
if (HasNUW && OpBO->getOpcode() == Instruction::AShr && OpBO->hasOneUse())
BinOp = Builder.CreateLShr(NewOp, ConstantInt::get(Ty, *ShiftC), "",
/*isExact=*/true);
auto *NewAdd = BinaryOperator::CreateAdd(NewOp, BinOp);
if (HasNSW && (HasNUW || OpBO->getOpcode() == Instruction::LShr ||
ShiftC->getZExtValue() < BitWidth - 1))
NewAdd->setHasNoSignedWrap(true);
NewAdd->setHasNoUnsignedWrap(HasNUW);
return NewAdd;
}
}
if (Op0->hasOneUse() && match(Op1, m_NegatedPower2())) {
// Interpret X * (-1<<C) as (-X) * (1<<C) and try to sink the negation.
// The "* (1<<C)" thus becomes a potential shifting opportunity.
if (Value *NegOp0 =
Negator::Negate(/*IsNegation*/ true, HasNSW, Op0, *this)) {
auto *Op1C = cast<Constant>(Op1);
return replaceInstUsesWith(
I, Builder.CreateMul(NegOp0, ConstantExpr::getNeg(Op1C), "",
/*HasNUW=*/false,
HasNSW && Op1C->isNotMinSignedValue()));
}
// Try to convert multiply of extended operand to narrow negate and shift
// for better analysis.
// This is valid if the shift amount (trailing zeros in the multiplier
// constant) clears more high bits than the bitwidth difference between
// source and destination types:
// ({z/s}ext X) * (-1<<C) --> (zext (-X)) << C
const APInt *NegPow2C;
Value *X;
if (match(Op0, m_ZExtOrSExt(m_Value(X))) &&
match(Op1, m_APIntAllowPoison(NegPow2C))) {
unsigned SrcWidth = X->getType()->getScalarSizeInBits();
unsigned ShiftAmt = NegPow2C->countr_zero();
if (ShiftAmt >= BitWidth - SrcWidth) {
Value *N = Builder.CreateNeg(X, X->getName() + ".neg");
Value *Z = Builder.CreateZExt(N, Ty, N->getName() + ".z");
return BinaryOperator::CreateShl(Z, ConstantInt::get(Ty, ShiftAmt));
}
}
}
if (Instruction *FoldedMul = foldBinOpIntoSelectOrPhi(I))
return FoldedMul;
if (Value *FoldedMul = foldMulSelectToNegate(I, Builder))
return replaceInstUsesWith(I, FoldedMul);
// Simplify mul instructions with a constant RHS.
Constant *MulC;
if (match(Op1, m_ImmConstant(MulC))) {
// Canonicalize (X+C1)*MulC -> X*MulC+C1*MulC.
// Canonicalize (X|C1)*MulC -> X*MulC+C1*MulC.
Value *X;
Constant *C1;
if (match(Op0, m_OneUse(m_AddLike(m_Value(X), m_ImmConstant(C1))))) {
// C1*MulC simplifies to a tidier constant.
Value *NewC = Builder.CreateMul(C1, MulC);
auto *BOp0 = cast<BinaryOperator>(Op0);
bool Op0NUW =
(BOp0->getOpcode() == Instruction::Or || BOp0->hasNoUnsignedWrap());
Value *NewMul = Builder.CreateMul(X, MulC);
auto *BO = BinaryOperator::CreateAdd(NewMul, NewC);
if (HasNUW && Op0NUW) {
// If NewMulBO is constant we also can set BO to nuw.
if (auto *NewMulBO = dyn_cast<BinaryOperator>(NewMul))
NewMulBO->setHasNoUnsignedWrap();
BO->setHasNoUnsignedWrap();
}
return BO;
}
}
// abs(X) * abs(X) -> X * X
Value *X;
if (Op0 == Op1 && match(Op0, m_Intrinsic<Intrinsic::abs>(m_Value(X))))
return BinaryOperator::CreateMul(X, X);
{
Value *Y;
// abs(X) * abs(Y) -> abs(X * Y)
if (I.hasNoSignedWrap() &&
match(Op0,
m_OneUse(m_Intrinsic<Intrinsic::abs>(m_Value(X), m_One()))) &&
match(Op1, m_OneUse(m_Intrinsic<Intrinsic::abs>(m_Value(Y), m_One()))))
return replaceInstUsesWith(
I, Builder.CreateBinaryIntrinsic(Intrinsic::abs,
Builder.CreateNSWMul(X, Y),
Builder.getTrue()));
}
// -X * C --> X * -C
Value *Y;
Constant *Op1C;
if (match(Op0, m_Neg(m_Value(X))) && match(Op1, m_Constant(Op1C)))
return BinaryOperator::CreateMul(X, ConstantExpr::getNeg(Op1C));
// -X * -Y --> X * Y
if (match(Op0, m_Neg(m_Value(X))) && match(Op1, m_Neg(m_Value(Y)))) {
auto *NewMul = BinaryOperator::CreateMul(X, Y);
if (HasNSW && cast<OverflowingBinaryOperator>(Op0)->hasNoSignedWrap() &&
cast<OverflowingBinaryOperator>(Op1)->hasNoSignedWrap())
NewMul->setHasNoSignedWrap();
return NewMul;
}
// -X * Y --> -(X * Y)
// X * -Y --> -(X * Y)
if (match(&I, m_c_Mul(m_OneUse(m_Neg(m_Value(X))), m_Value(Y))))
return BinaryOperator::CreateNeg(Builder.CreateMul(X, Y));
// (-X * Y) * -X --> (X * Y) * X
// (-X << Y) * -X --> (X << Y) * X
if (match(Op1, m_Neg(m_Value(X)))) {
if (Value *NegOp0 = Negator::Negate(false, /*IsNSW*/ false, Op0, *this))
return BinaryOperator::CreateMul(NegOp0, X);
}
if (Op0->hasOneUse()) {
// (mul (div exact X, C0), C1)
// -> (div exact X, C0 / C1)
// iff C0 % C1 == 0 and X / (C0 / C1) doesn't create UB.
const APInt *C1;
auto UDivCheck = [&C1](const APInt &C) { return C.urem(*C1).isZero(); };
auto SDivCheck = [&C1](const APInt &C) {
APInt Quot, Rem;
APInt::sdivrem(C, *C1, Quot, Rem);
return Rem.isZero() && !Quot.isAllOnes();
};
if (match(Op1, m_APInt(C1)) &&
(match(Op0, m_Exact(m_UDiv(m_Value(X), m_CheckedInt(UDivCheck)))) ||
match(Op0, m_Exact(m_SDiv(m_Value(X), m_CheckedInt(SDivCheck)))))) {
auto BOpc = cast<BinaryOperator>(Op0)->getOpcode();
return BinaryOperator::CreateExact(
BOpc, X,
Builder.CreateBinOp(BOpc, cast<BinaryOperator>(Op0)->getOperand(1),
Op1));
}
}
// (X / Y) * Y = X - (X % Y)
// (X / Y) * -Y = (X % Y) - X
{
Value *Y = Op1;
BinaryOperator *Div = dyn_cast<BinaryOperator>(Op0);
if (!Div || (Div->getOpcode() != Instruction::UDiv &&
Div->getOpcode() != Instruction::SDiv)) {
Y = Op0;
Div = dyn_cast<BinaryOperator>(Op1);
}
Value *Neg = dyn_castNegVal(Y);
if (Div && Div->hasOneUse() &&
(Div->getOperand(1) == Y || Div->getOperand(1) == Neg) &&
(Div->getOpcode() == Instruction::UDiv ||
Div->getOpcode() == Instruction::SDiv)) {
Value *X = Div->getOperand(0), *DivOp1 = Div->getOperand(1);
// If the division is exact, X % Y is zero, so we end up with X or -X.
if (Div->isExact()) {
if (DivOp1 == Y)
return replaceInstUsesWith(I, X);
return BinaryOperator::CreateNeg(X);
}
auto RemOpc = Div->getOpcode() == Instruction::UDiv ? Instruction::URem
: Instruction::SRem;
// X must be frozen because we are increasing its number of uses.
Value *XFreeze = X;
if (!isGuaranteedNotToBeUndef(X))
XFreeze = Builder.CreateFreeze(X, X->getName() + ".fr");
Value *Rem = Builder.CreateBinOp(RemOpc, XFreeze, DivOp1);
if (DivOp1 == Y)
return BinaryOperator::CreateSub(XFreeze, Rem);
return BinaryOperator::CreateSub(Rem, XFreeze);
}
}
// Fold the following two scenarios:
// 1) i1 mul -> i1 and.
// 2) X * Y --> X & Y, iff X, Y can be only {0,1}.
// Note: We could use known bits to generalize this and related patterns with
// shifts/truncs
if (Ty->isIntOrIntVectorTy(1) ||
(match(Op0, m_And(m_Value(), m_One())) &&
match(Op1, m_And(m_Value(), m_One()))))
return BinaryOperator::CreateAnd(Op0, Op1);
if (Value *R = foldMulShl1(I, /* CommuteOperands */ false, Builder))
return replaceInstUsesWith(I, R);
if (Value *R = foldMulShl1(I, /* CommuteOperands */ true, Builder))
return replaceInstUsesWith(I, R);
// (zext bool X) * (zext bool Y) --> zext (and X, Y)
// (sext bool X) * (sext bool Y) --> zext (and X, Y)
// Note: -1 * -1 == 1 * 1 == 1 (if the extends match, the result is the same)
if (((match(Op0, m_ZExt(m_Value(X))) && match(Op1, m_ZExt(m_Value(Y)))) ||
(match(Op0, m_SExt(m_Value(X))) && match(Op1, m_SExt(m_Value(Y))))) &&
X->getType()->isIntOrIntVectorTy(1) && X->getType() == Y->getType() &&
(Op0->hasOneUse() || Op1->hasOneUse() || X == Y)) {
Value *And = Builder.CreateAnd(X, Y, "mulbool");
return CastInst::Create(Instruction::ZExt, And, Ty);
}
// (sext bool X) * (zext bool Y) --> sext (and X, Y)
// (zext bool X) * (sext bool Y) --> sext (and X, Y)
// Note: -1 * 1 == 1 * -1 == -1
if (((match(Op0, m_SExt(m_Value(X))) && match(Op1, m_ZExt(m_Value(Y)))) ||
(match(Op0, m_ZExt(m_Value(X))) && match(Op1, m_SExt(m_Value(Y))))) &&
X->getType()->isIntOrIntVectorTy(1) && X->getType() == Y->getType() &&
(Op0->hasOneUse() || Op1->hasOneUse())) {
Value *And = Builder.CreateAnd(X, Y, "mulbool");
return CastInst::Create(Instruction::SExt, And, Ty);
}
// (zext bool X) * Y --> X ? Y : 0
// Y * (zext bool X) --> X ? Y : 0
if (match(Op0, m_ZExt(m_Value(X))) && X->getType()->isIntOrIntVectorTy(1))
return SelectInst::Create(X, Op1, ConstantInt::getNullValue(Ty));
if (match(Op1, m_ZExt(m_Value(X))) && X->getType()->isIntOrIntVectorTy(1))
return SelectInst::Create(X, Op0, ConstantInt::getNullValue(Ty));
// mul (sext X), Y -> select X, -Y, 0
// mul Y, (sext X) -> select X, -Y, 0
if (match(&I, m_c_Mul(m_OneUse(m_SExt(m_Value(X))), m_Value(Y))) &&
X->getType()->isIntOrIntVectorTy(1))
return SelectInst::Create(X, Builder.CreateNeg(Y, "", I.hasNoSignedWrap()),
ConstantInt::getNullValue(Op0->getType()));
Constant *ImmC;
if (match(Op1, m_ImmConstant(ImmC))) {
// (sext bool X) * C --> X ? -C : 0
if (match(Op0, m_SExt(m_Value(X))) && X->getType()->isIntOrIntVectorTy(1)) {
Constant *NegC = ConstantExpr::getNeg(ImmC);
return SelectInst::Create(X, NegC, ConstantInt::getNullValue(Ty));
}
// (ashr i32 X, 31) * C --> (X < 0) ? -C : 0
const APInt *C;
if (match(Op0, m_OneUse(m_AShr(m_Value(X), m_APInt(C)))) &&
*C == C->getBitWidth() - 1) {
Constant *NegC = ConstantExpr::getNeg(ImmC);
Value *IsNeg = Builder.CreateIsNeg(X, "isneg");
return SelectInst::Create(IsNeg, NegC, ConstantInt::getNullValue(Ty));
}
}
// (lshr X, 31) * Y --> (X < 0) ? Y : 0
// TODO: We are not checking one-use because the elimination of the multiply
// is better for analysis?
const APInt *C;
if (match(&I, m_c_BinOp(m_LShr(m_Value(X), m_APInt(C)), m_Value(Y))) &&
*C == C->getBitWidth() - 1) {
Value *IsNeg = Builder.CreateIsNeg(X, "isneg");
return SelectInst::Create(IsNeg, Y, ConstantInt::getNullValue(Ty));
}
// (and X, 1) * Y --> (trunc X) ? Y : 0
if (match(&I, m_c_BinOp(m_OneUse(m_And(m_Value(X), m_One())), m_Value(Y)))) {
Value *Tr = Builder.CreateTrunc(X, CmpInst::makeCmpResultType(Ty));
return SelectInst::Create(Tr, Y, ConstantInt::getNullValue(Ty));
}
// ((ashr X, 31) | 1) * X --> abs(X)
// X * ((ashr X, 31) | 1) --> abs(X)
if (match(&I, m_c_BinOp(m_Or(m_AShr(m_Value(X),
m_SpecificIntAllowPoison(BitWidth - 1)),
m_One()),
m_Deferred(X)))) {
Value *Abs = Builder.CreateBinaryIntrinsic(
Intrinsic::abs, X, ConstantInt::getBool(I.getContext(), HasNSW));
Abs->takeName(&I);
return replaceInstUsesWith(I, Abs);
}
if (Instruction *Ext = narrowMathIfNoOverflow(I))
return Ext;
if (Instruction *Res = foldBinOpOfSelectAndCastOfSelectCondition(I))
return Res;
// (mul Op0 Op1):
// if Log2(Op0) folds away ->
// (shl Op1, Log2(Op0))
// if Log2(Op1) folds away ->
// (shl Op0, Log2(Op1))
if (Value *Res = tryGetLog2(Op0, /*AssumeNonZero=*/false)) {
BinaryOperator *Shl = BinaryOperator::CreateShl(Op1, Res);
// We can only propegate nuw flag.
Shl->setHasNoUnsignedWrap(HasNUW);
return Shl;
}
if (Value *Res = tryGetLog2(Op1, /*AssumeNonZero=*/false)) {
BinaryOperator *Shl = BinaryOperator::CreateShl(Op0, Res);
// We can only propegate nuw flag.
Shl->setHasNoUnsignedWrap(HasNUW);
return Shl;
}
bool Changed = false;
if (!HasNSW && willNotOverflowSignedMul(Op0, Op1, I)) {
Changed = true;
I.setHasNoSignedWrap(true);
}
if (!HasNUW && willNotOverflowUnsignedMul(Op0, Op1, I, I.hasNoSignedWrap())) {
Changed = true;
I.setHasNoUnsignedWrap(true);
}
return Changed ? &I : nullptr;
}
Instruction *InstCombinerImpl::foldFPSignBitOps(BinaryOperator &I) {
BinaryOperator::BinaryOps Opcode = I.getOpcode();
assert((Opcode == Instruction::FMul || Opcode == Instruction::FDiv) &&
"Expected fmul or fdiv");
Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
Value *X, *Y;
// -X * -Y --> X * Y
// -X / -Y --> X / Y
if (match(Op0, m_FNeg(m_Value(X))) && match(Op1, m_FNeg(m_Value(Y))))
return BinaryOperator::CreateWithCopiedFlags(Opcode, X, Y, &I);
// fabs(X) * fabs(X) -> X * X
// fabs(X) / fabs(X) -> X / X
if (Op0 == Op1 && match(Op0, m_FAbs(m_Value(X))))
return BinaryOperator::CreateWithCopiedFlags(Opcode, X, X, &I);
// fabs(X) * fabs(Y) --> fabs(X * Y)
// fabs(X) / fabs(Y) --> fabs(X / Y)
if (match(Op0, m_FAbs(m_Value(X))) && match(Op1, m_FAbs(m_Value(Y))) &&
(Op0->hasOneUse() || Op1->hasOneUse())) {
Value *XY = Builder.CreateBinOpFMF(Opcode, X, Y, &I);
Value *Fabs =
Builder.CreateUnaryIntrinsic(Intrinsic::fabs, XY, &I, I.getName());
return replaceInstUsesWith(I, Fabs);
}
return nullptr;
}
Instruction *InstCombinerImpl::foldPowiReassoc(BinaryOperator &I) {
auto createPowiExpr = [](BinaryOperator &I, InstCombinerImpl &IC, Value *X,
Value *Y, Value *Z) {
InstCombiner::BuilderTy &Builder = IC.Builder;
Value *YZ = Builder.CreateAdd(Y, Z);
Instruction *NewPow = Builder.CreateIntrinsic(
Intrinsic::powi, {X->getType(), YZ->getType()}, {X, YZ}, &I);
return NewPow;
};
Value *X, *Y, *Z;
unsigned Opcode = I.getOpcode();
assert((Opcode == Instruction::FMul || Opcode == Instruction::FDiv) &&
"Unexpected opcode");
// powi(X, Y) * X --> powi(X, Y+1)
// X * powi(X, Y) --> powi(X, Y+1)
if (match(&I, m_c_FMul(m_OneUse(m_AllowReassoc(m_Intrinsic<Intrinsic::powi>(
m_Value(X), m_Value(Y)))),
m_Deferred(X)))) {
Constant *One = ConstantInt::get(Y->getType(), 1);
if (willNotOverflowSignedAdd(Y, One, I)) {
Instruction *NewPow = createPowiExpr(I, *this, X, Y, One);
return replaceInstUsesWith(I, NewPow);
}
}
// powi(x, y) * powi(x, z) -> powi(x, y + z)
Value *Op0 = I.getOperand(0);
Value *Op1 = I.getOperand(1);
if (Opcode == Instruction::FMul && I.isOnlyUserOfAnyOperand() &&
match(Op0, m_AllowReassoc(
m_Intrinsic<Intrinsic::powi>(m_Value(X), m_Value(Y)))) &&
match(Op1, m_AllowReassoc(m_Intrinsic<Intrinsic::powi>(m_Specific(X),
m_Value(Z)))) &&
Y->getType() == Z->getType()) {
Instruction *NewPow = createPowiExpr(I, *this, X, Y, Z);
return replaceInstUsesWith(I, NewPow);
}
if (Opcode == Instruction::FDiv && I.hasAllowReassoc() && I.hasNoNaNs()) {
// powi(X, Y) / X --> powi(X, Y-1)
// This is legal when (Y - 1) can't wraparound, in which case reassoc and
// nnan are required.
// TODO: Multi-use may be also better off creating Powi(x,y-1)
if (match(Op0, m_OneUse(m_AllowReassoc(m_Intrinsic<Intrinsic::powi>(
m_Specific(Op1), m_Value(Y))))) &&
willNotOverflowSignedSub(Y, ConstantInt::get(Y->getType(), 1), I)) {
Constant *NegOne = ConstantInt::getAllOnesValue(Y->getType());
Instruction *NewPow = createPowiExpr(I, *this, Op1, Y, NegOne);
return replaceInstUsesWith(I, NewPow);
}
// powi(X, Y) / (X * Z) --> powi(X, Y-1) / Z
// This is legal when (Y - 1) can't wraparound, in which case reassoc and
// nnan are required.
// TODO: Multi-use may be also better off creating Powi(x,y-1)
if (match(Op0, m_OneUse(m_AllowReassoc(m_Intrinsic<Intrinsic::powi>(
m_Value(X), m_Value(Y))))) &&
match(Op1, m_AllowReassoc(m_c_FMul(m_Specific(X), m_Value(Z)))) &&
willNotOverflowSignedSub(Y, ConstantInt::get(Y->getType(), 1), I)) {
Constant *NegOne = ConstantInt::getAllOnesValue(Y->getType());
auto *NewPow = createPowiExpr(I, *this, X, Y, NegOne);
return BinaryOperator::CreateFDivFMF(NewPow, Z, &I);
}
}
return nullptr;
}
// If we have the following pattern,
// X = 1.0/sqrt(a)
// R1 = X * X
// R2 = a/sqrt(a)
// then this method collects all the instructions that match R1 and R2.
static bool getFSqrtDivOptPattern(Instruction *Div,
SmallPtrSetImpl<Instruction *> &R1,
SmallPtrSetImpl<Instruction *> &R2) {
Value *A;
if (match(Div, m_FDiv(m_FPOne(), m_Sqrt(m_Value(A)))) ||
match(Div, m_FDiv(m_SpecificFP(-1.0), m_Sqrt(m_Value(A))))) {
for (User *U : Div->users()) {
Instruction *I = cast<Instruction>(U);
if (match(I, m_FMul(m_Specific(Div), m_Specific(Div))))
R1.insert(I);
}
CallInst *CI = cast<CallInst>(Div->getOperand(1));
for (User *U : CI->users()) {
Instruction *I = cast<Instruction>(U);
if (match(I, m_FDiv(m_Specific(A), m_Sqrt(m_Specific(A)))))
R2.insert(I);
}
}
return !R1.empty() && !R2.empty();
}
// Check legality for transforming
// x = 1.0/sqrt(a)
// r1 = x * x;
// r2 = a/sqrt(a);
//
// TO
//
// r1 = 1/a
// r2 = sqrt(a)
// x = r1 * r2
// This transform works only when 'a' is known positive.
static bool isFSqrtDivToFMulLegal(Instruction *X,
SmallPtrSetImpl<Instruction *> &R1,
SmallPtrSetImpl<Instruction *> &R2) {
// Check if the required pattern for the transformation exists.
if (!getFSqrtDivOptPattern(X, R1, R2))
return false;
BasicBlock *BBx = X->getParent();
BasicBlock *BBr1 = (*R1.begin())->getParent();
BasicBlock *BBr2 = (*R2.begin())->getParent();
CallInst *FSqrt = cast<CallInst>(X->getOperand(1));
if (!FSqrt->hasAllowReassoc() || !FSqrt->hasNoNaNs() ||
!FSqrt->hasNoSignedZeros() || !FSqrt->hasNoInfs())
return false;
// We change x = 1/sqrt(a) to x = sqrt(a) * 1/a . This change isn't allowed
// by recip fp as it is strictly meant to transform ops of type a/b to
// a * 1/b. So, this can be considered as algebraic rewrite and reassoc flag
// has been used(rather abused)in the past for algebraic rewrites.
if (!X->hasAllowReassoc() || !X->hasAllowReciprocal() || !X->hasNoInfs())
return false;
// Check the constraints on X, R1 and R2 combined.
// fdiv instruction and one of the multiplications must reside in the same
// block. If not, the optimized code may execute more ops than before and
// this may hamper the performance.
if (BBx != BBr1 && BBx != BBr2)
return false;
// Check the constraints on instructions in R1.
if (any_of(R1, [BBr1](Instruction *I) {
// When you have multiple instructions residing in R1 and R2
// respectively, it's difficult to generate combinations of (R1,R2) and
// then check if we have the required pattern. So, for now, just be
// conservative.
return (I->getParent() != BBr1 || !I->hasAllowReassoc());
}))
return false;
// Check the constraints on instructions in R2.
return all_of(R2, [BBr2](Instruction *I) {
// When you have multiple instructions residing in R1 and R2
// respectively, it's difficult to generate combination of (R1,R2) and
// then check if we have the required pattern. So, for now, just be
// conservative.
return (I->getParent() == BBr2 && I->hasAllowReassoc());
});
}
Instruction *InstCombinerImpl::foldFMulReassoc(BinaryOperator &I) {
Value *Op0 = I.getOperand(0);
Value *Op1 = I.getOperand(1);
Value *X, *Y;
Constant *C;
BinaryOperator *Op0BinOp;
// Reassociate constant RHS with another constant to form constant
// expression.
if (match(Op1, m_Constant(C)) && C->isFiniteNonZeroFP() &&
match(Op0, m_AllowReassoc(m_BinOp(Op0BinOp)))) {
// Everything in this scope folds I with Op0, intersecting their FMF.
FastMathFlags FMF = I.getFastMathFlags() & Op0BinOp->getFastMathFlags();
Constant *C1;
if (match(Op0, m_OneUse(m_FDiv(m_Constant(C1), m_Value(X))))) {
// (C1 / X) * C --> (C * C1) / X
Constant *CC1 =
ConstantFoldBinaryOpOperands(Instruction::FMul, C, C1, DL);
if (CC1 && CC1->isNormalFP())
return BinaryOperator::CreateFDivFMF(CC1, X, FMF);
}
if (match(Op0, m_FDiv(m_Value(X), m_Constant(C1)))) {
// FIXME: This seems like it should also be checking for arcp
// (X / C1) * C --> X * (C / C1)
Constant *CDivC1 =
ConstantFoldBinaryOpOperands(Instruction::FDiv, C, C1, DL);
if (CDivC1 && CDivC1->isNormalFP())
return BinaryOperator::CreateFMulFMF(X, CDivC1, FMF);
// If the constant was a denormal, try reassociating differently.
// (X / C1) * C --> X / (C1 / C)
Constant *C1DivC =
ConstantFoldBinaryOpOperands(Instruction::FDiv, C1, C, DL);
if (C1DivC && Op0->hasOneUse() && C1DivC->isNormalFP())
return BinaryOperator::CreateFDivFMF(X, C1DivC, FMF);
}
// We do not need to match 'fadd C, X' and 'fsub X, C' because they are
// canonicalized to 'fadd X, C'. Distributing the multiply may allow
// further folds and (X * C) + C2 is 'fma'.
if (match(Op0, m_OneUse(m_FAdd(m_Value(X), m_Constant(C1))))) {
// (X + C1) * C --> (X * C) + (C * C1)
if (Constant *CC1 =
ConstantFoldBinaryOpOperands(Instruction::FMul, C, C1, DL)) {
Value *XC = Builder.CreateFMulFMF(X, C, FMF);
return BinaryOperator::CreateFAddFMF(XC, CC1, FMF);
}
}
if (match(Op0, m_OneUse(m_FSub(m_Constant(C1), m_Value(X))))) {
// (C1 - X) * C --> (C * C1) - (X * C)
if (Constant *CC1 =
ConstantFoldBinaryOpOperands(Instruction::FMul, C, C1, DL)) {
Value *XC = Builder.CreateFMulFMF(X, C, FMF);
return BinaryOperator::CreateFSubFMF(CC1, XC, FMF);
}
}
}
Value *Z;
if (match(&I,
m_c_FMul(m_AllowReassoc(m_OneUse(m_FDiv(m_Value(X), m_Value(Y)))),
m_Value(Z)))) {
BinaryOperator *DivOp = cast<BinaryOperator>(((Z == Op0) ? Op1 : Op0));
FastMathFlags FMF = I.getFastMathFlags() & DivOp->getFastMathFlags();
if (FMF.allowReassoc()) {
// Sink division: (X / Y) * Z --> (X * Z) / Y
auto *NewFMul = Builder.CreateFMulFMF(X, Z, FMF);
return BinaryOperator::CreateFDivFMF(NewFMul, Y, FMF);
}
}
// sqrt(X) * sqrt(Y) -> sqrt(X * Y)
// nnan disallows the possibility of returning a number if both operands are
// negative (in that case, we should return NaN).
if (I.hasNoNaNs() && match(Op0, m_OneUse(m_Sqrt(m_Value(X)))) &&
match(Op1, m_OneUse(m_Sqrt(m_Value(Y))))) {
Value *XY = Builder.CreateFMulFMF(X, Y, &I);
Value *Sqrt = Builder.CreateUnaryIntrinsic(Intrinsic::sqrt, XY, &I);
return replaceInstUsesWith(I, Sqrt);
}
// The following transforms are done irrespective of the number of uses
// for the expression "1.0/sqrt(X)".
// 1) 1.0/sqrt(X) * X -> X/sqrt(X)
// 2) X * 1.0/sqrt(X) -> X/sqrt(X)
// We always expect the backend to reduce X/sqrt(X) to sqrt(X), if it
// has the necessary (reassoc) fast-math-flags.
if (I.hasNoSignedZeros() &&
match(Op0, (m_FDiv(m_SpecificFP(1.0), m_Value(Y)))) &&
match(Y, m_Sqrt(m_Value(X))) && Op1 == X)
return BinaryOperator::CreateFDivFMF(X, Y, &I);
if (I.hasNoSignedZeros() &&
match(Op1, (m_FDiv(m_SpecificFP(1.0), m_Value(Y)))) &&
match(Y, m_Sqrt(m_Value(X))) && Op0 == X)
return BinaryOperator::CreateFDivFMF(X, Y, &I);
// Like the similar transform in instsimplify, this requires 'nsz' because
// sqrt(-0.0) = -0.0, and -0.0 * -0.0 does not simplify to -0.0.
if (I.hasNoNaNs() && I.hasNoSignedZeros() && Op0 == Op1 && Op0->hasNUses(2)) {
// Peek through fdiv to find squaring of square root:
// (X / sqrt(Y)) * (X / sqrt(Y)) --> (X * X) / Y
if (match(Op0, m_FDiv(m_Value(X), m_Sqrt(m_Value(Y))))) {
Value *XX = Builder.CreateFMulFMF(X, X, &I);
return BinaryOperator::CreateFDivFMF(XX, Y, &I);
}
// (sqrt(Y) / X) * (sqrt(Y) / X) --> Y / (X * X)
if (match(Op0, m_FDiv(m_Sqrt(m_Value(Y)), m_Value(X)))) {
Value *XX = Builder.CreateFMulFMF(X, X, &I);
return BinaryOperator::CreateFDivFMF(Y, XX, &I);
}
}
// pow(X, Y) * X --> pow(X, Y+1)
// X * pow(X, Y) --> pow(X, Y+1)
if (match(&I, m_c_FMul(m_OneUse(m_Intrinsic<Intrinsic::pow>(m_Value(X),
m_Value(Y))),
m_Deferred(X)))) {
Value *Y1 = Builder.CreateFAddFMF(Y, ConstantFP::get(I.getType(), 1.0), &I);
Value *Pow = Builder.CreateBinaryIntrinsic(Intrinsic::pow, X, Y1, &I);
return replaceInstUsesWith(I, Pow);
}
if (Instruction *FoldedPowi = foldPowiReassoc(I))
return FoldedPowi;
if (I.isOnlyUserOfAnyOperand()) {
// pow(X, Y) * pow(X, Z) -> pow(X, Y + Z)
if (match(Op0, m_Intrinsic<Intrinsic::pow>(m_Value(X), m_Value(Y))) &&
match(Op1, m_Intrinsic<Intrinsic::pow>(m_Specific(X), m_Value(Z)))) {
auto *YZ = Builder.CreateFAddFMF(Y, Z, &I);
auto *NewPow = Builder.CreateBinaryIntrinsic(Intrinsic::pow, X, YZ, &I);
return replaceInstUsesWith(I, NewPow);
}
// pow(X, Y) * pow(Z, Y) -> pow(X * Z, Y)
if (match(Op0, m_Intrinsic<Intrinsic::pow>(m_Value(X), m_Value(Y))) &&
match(Op1, m_Intrinsic<Intrinsic::pow>(m_Value(Z), m_Specific(Y)))) {
auto *XZ = Builder.CreateFMulFMF(X, Z, &I);
auto *NewPow = Builder.CreateBinaryIntrinsic(Intrinsic::pow, XZ, Y, &I);
return replaceInstUsesWith(I, NewPow);
}
// exp(X) * exp(Y) -> exp(X + Y)
if (match(Op0, m_Intrinsic<Intrinsic::exp>(m_Value(X))) &&
match(Op1, m_Intrinsic<Intrinsic::exp>(m_Value(Y)))) {
Value *XY = Builder.CreateFAddFMF(X, Y, &I);
Value *Exp = Builder.CreateUnaryIntrinsic(Intrinsic::exp, XY, &I);
return replaceInstUsesWith(I, Exp);
}
// exp2(X) * exp2(Y) -> exp2(X + Y)
if (match(Op0, m_Intrinsic<Intrinsic::exp2>(m_Value(X))) &&
match(Op1, m_Intrinsic<Intrinsic::exp2>(m_Value(Y)))) {
Value *XY = Builder.CreateFAddFMF(X, Y, &I);
Value *Exp2 = Builder.CreateUnaryIntrinsic(Intrinsic::exp2, XY, &I);
return replaceInstUsesWith(I, Exp2);
}
}
// (X*Y) * X => (X*X) * Y where Y != X
// The purpose is two-fold:
// 1) to form a power expression (of X).
// 2) potentially shorten the critical path: After transformation, the
// latency of the instruction Y is amortized by the expression of X*X,
// and therefore Y is in a "less critical" position compared to what it
// was before the transformation.
if (match(Op0, m_OneUse(m_c_FMul(m_Specific(Op1), m_Value(Y)))) && Op1 != Y) {
Value *XX = Builder.CreateFMulFMF(Op1, Op1, &I);
return BinaryOperator::CreateFMulFMF(XX, Y, &I);
}
if (match(Op1, m_OneUse(m_c_FMul(m_Specific(Op0), m_Value(Y)))) && Op0 != Y) {
Value *XX = Builder.CreateFMulFMF(Op0, Op0, &I);
return BinaryOperator::CreateFMulFMF(XX, Y, &I);
}
return nullptr;
}
Instruction *InstCombinerImpl::visitFMul(BinaryOperator &I) {
if (Value *V = simplifyFMulInst(I.getOperand(0), I.getOperand(1),
I.getFastMathFlags(),
SQ.getWithInstruction(&I)))
return replaceInstUsesWith(I, V);
if (SimplifyAssociativeOrCommutative(I))
return &I;
if (Instruction *X = foldVectorBinop(I))
return X;
if (Instruction *Phi = foldBinopWithPhiOperands(I))
return Phi;
if (Instruction *FoldedMul = foldBinOpIntoSelectOrPhi(I))
return FoldedMul;
if (Value *FoldedMul = foldMulSelectToNegate(I, Builder))
return replaceInstUsesWith(I, FoldedMul);
if (Instruction *R = foldFPSignBitOps(I))
return R;
if (Instruction *R = foldFBinOpOfIntCasts(I))
return R;
// X * -1.0 --> -X
Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
if (match(Op1, m_SpecificFP(-1.0)))
return UnaryOperator::CreateFNegFMF(Op0, &I);
// With no-nans/no-infs:
// X * 0.0 --> copysign(0.0, X)
// X * -0.0 --> copysign(0.0, -X)
const APFloat *FPC;
if (match(Op1, m_APFloatAllowPoison(FPC)) && FPC->isZero() &&
((I.hasNoInfs() && isKnownNeverNaN(Op0, SQ.getWithInstruction(&I))) ||
isKnownNeverNaN(&I, SQ.getWithInstruction(&I)))) {
if (FPC->isNegative())
Op0 = Builder.CreateFNegFMF(Op0, &I);
CallInst *CopySign = Builder.CreateIntrinsic(Intrinsic::copysign,
{I.getType()}, {Op1, Op0}, &I);
return replaceInstUsesWith(I, CopySign);
}
// -X * C --> X * -C
Value *X, *Y;
Constant *C;
if (match(Op0, m_FNeg(m_Value(X))) && match(Op1, m_Constant(C)))
if (Constant *NegC = ConstantFoldUnaryOpOperand(Instruction::FNeg, C, DL))
return BinaryOperator::CreateFMulFMF(X, NegC, &I);
if (I.hasNoNaNs() && I.hasNoSignedZeros()) {
// (uitofp bool X) * Y --> X ? Y : 0
// Y * (uitofp bool X) --> X ? Y : 0
// Note INF * 0 is NaN.
if (match(Op0, m_UIToFP(m_Value(X))) &&
X->getType()->isIntOrIntVectorTy(1)) {
auto *SI = SelectInst::Create(X, Op1, ConstantFP::get(I.getType(), 0.0));
SI->copyFastMathFlags(I.getFastMathFlags());
return SI;
}
if (match(Op1, m_UIToFP(m_Value(X))) &&
X->getType()->isIntOrIntVectorTy(1)) {
auto *SI = SelectInst::Create(X, Op0, ConstantFP::get(I.getType(), 0.0));
SI->copyFastMathFlags(I.getFastMathFlags());
return SI;
}
}
// (select A, B, C) * (select A, D, E) --> select A, (B*D), (C*E)
if (Value *V = SimplifySelectsFeedingBinaryOp(I, Op0, Op1))
return replaceInstUsesWith(I, V);
if (I.hasAllowReassoc())
if (Instruction *FoldedMul = foldFMulReassoc(I))
return FoldedMul;
// log2(X * 0.5) * Y = log2(X) * Y - Y
if (I.isFast()) {
IntrinsicInst *Log2 = nullptr;
if (match(Op0, m_OneUse(m_Intrinsic<Intrinsic::log2>(
m_OneUse(m_FMul(m_Value(X), m_SpecificFP(0.5))))))) {
Log2 = cast<IntrinsicInst>(Op0);
Y = Op1;
}
if (match(Op1, m_OneUse(m_Intrinsic<Intrinsic::log2>(
m_OneUse(m_FMul(m_Value(X), m_SpecificFP(0.5))))))) {
Log2 = cast<IntrinsicInst>(Op1);
Y = Op0;
}
if (Log2) {
Value *Log2 = Builder.CreateUnaryIntrinsic(Intrinsic::log2, X, &I);
Value *LogXTimesY = Builder.CreateFMulFMF(Log2, Y, &I);
return BinaryOperator::CreateFSubFMF(LogXTimesY, Y, &I);
}
}
// Simplify FMUL recurrences starting with 0.0 to 0.0 if nnan and nsz are set.
// Given a phi node with entry value as 0 and it used in fmul operation,
// we can replace fmul with 0 safely and eleminate loop operation.
PHINode *PN = nullptr;
Value *Start = nullptr, *Step = nullptr;
if (matchSimpleRecurrence(&I, PN, Start, Step) && I.hasNoNaNs() &&
I.hasNoSignedZeros() && match(Start, m_Zero()))
return replaceInstUsesWith(I, Start);
// minimum(X, Y) * maximum(X, Y) => X * Y.
if (match(&I,
m_c_FMul(m_Intrinsic<Intrinsic::maximum>(m_Value(X), m_Value(Y)),
m_c_Intrinsic<Intrinsic::minimum>(m_Deferred(X),
m_Deferred(Y))))) {
BinaryOperator *Result = BinaryOperator::CreateFMulFMF(X, Y, &I);
// We cannot preserve ninf if nnan flag is not set.
// If X is NaN and Y is Inf then in original program we had NaN * NaN,
// while in optimized version NaN * Inf and this is a poison with ninf flag.
if (!Result->hasNoNaNs())
Result->setHasNoInfs(false);
return Result;
}
// tan(X) * cos(X) -> sin(X)
if (I.hasAllowContract() &&
match(&I,
m_c_FMul(m_OneUse(m_Intrinsic<Intrinsic::tan>(m_Value(X))),
m_OneUse(m_Intrinsic<Intrinsic::cos>(m_Deferred(X)))))) {
auto *Sin = Builder.CreateUnaryIntrinsic(Intrinsic::sin, X, &I);
if (auto *Metadata = I.getMetadata(LLVMContext::MD_fpmath)) {
Sin->setMetadata(LLVMContext::MD_fpmath, Metadata);
}
return replaceInstUsesWith(I, Sin);
}
return nullptr;
}
/// Fold a divide or remainder with a select instruction divisor when one of the
/// select operands is zero. In that case, we can use the other select operand
/// because div/rem by zero is undefined.
bool InstCombinerImpl::simplifyDivRemOfSelectWithZeroOp(BinaryOperator &I) {
SelectInst *SI = dyn_cast<SelectInst>(I.getOperand(1));
if (!SI)
return false;
int NonNullOperand;
if (match(SI->getTrueValue(), m_Zero()))
// div/rem X, (Cond ? 0 : Y) -> div/rem X, Y
NonNullOperand = 2;
else if (match(SI->getFalseValue(), m_Zero()))
// div/rem X, (Cond ? Y : 0) -> div/rem X, Y
NonNullOperand = 1;
else
return false;
// Change the div/rem to use 'Y' instead of the select.
replaceOperand(I, 1, SI->getOperand(NonNullOperand));
// Okay, we know we replace the operand of the div/rem with 'Y' with no
// problem. However, the select, or the condition of the select may have
// multiple uses. Based on our knowledge that the operand must be non-zero,
// propagate the known value for the select into other uses of it, and
// propagate a known value of the condition into its other users.
// If the select and condition only have a single use, don't bother with this,
// early exit.
Value *SelectCond = SI->getCondition();
if (SI->use_empty() && SelectCond->hasOneUse())
return true;
// Scan the current block backward, looking for other uses of SI.
BasicBlock::iterator BBI = I.getIterator(), BBFront = I.getParent()->begin();
Type *CondTy = SelectCond->getType();
while (BBI != BBFront) {
--BBI;
// If we found an instruction that we can't assume will return, so
// information from below it cannot be propagated above it.
if (!isGuaranteedToTransferExecutionToSuccessor(&*BBI))
break;
// Replace uses of the select or its condition with the known values.
for (Use &Op : BBI->operands()) {
if (Op == SI) {
replaceUse(Op, SI->getOperand(NonNullOperand));
Worklist.push(&*BBI);
} else if (Op == SelectCond) {
replaceUse(Op, NonNullOperand == 1 ? ConstantInt::getTrue(CondTy)
: ConstantInt::getFalse(CondTy));
Worklist.push(&*BBI);
}
}
// If we past the instruction, quit looking for it.
if (&*BBI == SI)
SI = nullptr;
if (&*BBI == SelectCond)
SelectCond = nullptr;
// If we ran out of things to eliminate, break out of the loop.
if (!SelectCond && !SI)
break;
}
return true;
}
/// True if the multiply can not be expressed in an int this size.
static bool multiplyOverflows(const APInt &C1, const APInt &C2, APInt &Product,
bool IsSigned) {
bool Overflow;
Product = IsSigned ? C1.smul_ov(C2, Overflow) : C1.umul_ov(C2, Overflow);
return Overflow;
}
/// True if C1 is a multiple of C2. Quotient contains C1/C2.
static bool isMultiple(const APInt &C1, const APInt &C2, APInt &Quotient,
bool IsSigned) {
assert(C1.getBitWidth() == C2.getBitWidth() && "Constant widths not equal");
// Bail if we will divide by zero.
if (C2.isZero())
return false;
// Bail if we would divide INT_MIN by -1.
if (IsSigned && C1.isMinSignedValue() && C2.isAllOnes())
return false;
APInt Remainder(C1.getBitWidth(), /*val=*/0ULL, IsSigned);
if (IsSigned)
APInt::sdivrem(C1, C2, Quotient, Remainder);
else
APInt::udivrem(C1, C2, Quotient, Remainder);
return Remainder.isMinValue();
}
static Value *foldIDivShl(BinaryOperator &I, InstCombiner::BuilderTy &Builder) {
assert((I.getOpcode() == Instruction::SDiv ||
I.getOpcode() == Instruction::UDiv) &&
"Expected integer divide");
bool IsSigned = I.getOpcode() == Instruction::SDiv;
Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
Type *Ty = I.getType();
Value *X, *Y, *Z;
// With appropriate no-wrap constraints, remove a common factor in the
// dividend and divisor that is disguised as a left-shifted value.
if (match(Op1, m_Shl(m_Value(X), m_Value(Z))) &&
match(Op0, m_c_Mul(m_Specific(X), m_Value(Y)))) {
// Both operands must have the matching no-wrap for this kind of division.
auto *Mul = cast<OverflowingBinaryOperator>(Op0);
auto *Shl = cast<OverflowingBinaryOperator>(Op1);
bool HasNUW = Mul->hasNoUnsignedWrap() && Shl->hasNoUnsignedWrap();
bool HasNSW = Mul->hasNoSignedWrap() && Shl->hasNoSignedWrap();
// (X * Y) u/ (X << Z) --> Y u>> Z
if (!IsSigned && HasNUW)
return Builder.CreateLShr(Y, Z, "", I.isExact());
// (X * Y) s/ (X << Z) --> Y s/ (1 << Z)
if (IsSigned && HasNSW && (Op0->hasOneUse() || Op1->hasOneUse())) {
Value *Shl = Builder.CreateShl(ConstantInt::get(Ty, 1), Z);
return Builder.CreateSDiv(Y, Shl, "", I.isExact());
}
}
// With appropriate no-wrap constraints, remove a common factor in the
// dividend and divisor that is disguised as a left-shift amount.
if (match(Op0, m_Shl(m_Value(X), m_Value(Z))) &&
match(Op1, m_Shl(m_Value(Y), m_Specific(Z)))) {
auto *Shl0 = cast<OverflowingBinaryOperator>(Op0);
auto *Shl1 = cast<OverflowingBinaryOperator>(Op1);
// For unsigned div, we need 'nuw' on both shifts or
// 'nsw' on both shifts + 'nuw' on the dividend.
// (X << Z) / (Y << Z) --> X / Y
if (!IsSigned &&
((Shl0->hasNoUnsignedWrap() && Shl1->hasNoUnsignedWrap()) ||
(Shl0->hasNoUnsignedWrap() && Shl0->hasNoSignedWrap() &&
Shl1->hasNoSignedWrap())))
return Builder.CreateUDiv(X, Y, "", I.isExact());
// For signed div, we need 'nsw' on both shifts + 'nuw' on the divisor.
// (X << Z) / (Y << Z) --> X / Y
if (IsSigned && Shl0->hasNoSignedWrap() && Shl1->hasNoSignedWrap() &&
Shl1->hasNoUnsignedWrap())
return Builder.CreateSDiv(X, Y, "", I.isExact());
}
// If X << Y and X << Z does not overflow, then:
// (X << Y) / (X << Z) -> (1 << Y) / (1 << Z) -> 1 << Y >> Z
if (match(Op0, m_Shl(m_Value(X), m_Value(Y))) &&
match(Op1, m_Shl(m_Specific(X), m_Value(Z)))) {
auto *Shl0 = cast<OverflowingBinaryOperator>(Op0);
auto *Shl1 = cast<OverflowingBinaryOperator>(Op1);
if (IsSigned ? (Shl0->hasNoSignedWrap() && Shl1->hasNoSignedWrap())
: (Shl0->hasNoUnsignedWrap() && Shl1->hasNoUnsignedWrap())) {
Constant *One = ConstantInt::get(X->getType(), 1);
// Only preserve the nsw flag if dividend has nsw
// or divisor has nsw and operator is sdiv.
Value *Dividend = Builder.CreateShl(
One, Y, "shl.dividend",
/*HasNUW=*/true,
/*HasNSW=*/
IsSigned ? (Shl0->hasNoUnsignedWrap() || Shl1->hasNoUnsignedWrap())
: Shl0->hasNoSignedWrap());
return Builder.CreateLShr(Dividend, Z, "", I.isExact());
}
}
return nullptr;
}
/// Common integer divide/remainder transforms
Instruction *InstCombinerImpl::commonIDivRemTransforms(BinaryOperator &I) {
assert(I.isIntDivRem() && "Unexpected instruction");
Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
// If any element of a constant divisor fixed width vector is zero or undef
// the behavior is undefined and we can fold the whole op to poison.
auto *Op1C = dyn_cast<Constant>(Op1);
Type *Ty = I.getType();
auto *VTy = dyn_cast<FixedVectorType>(Ty);
if (Op1C && VTy) {
unsigned NumElts = VTy->getNumElements();
for (unsigned i = 0; i != NumElts; ++i) {
Constant *Elt = Op1C->getAggregateElement(i);
if (Elt && (Elt->isNullValue() || isa<UndefValue>(Elt)))
return replaceInstUsesWith(I, PoisonValue::get(Ty));
}
}
if (Instruction *Phi = foldBinopWithPhiOperands(I))
return Phi;
// The RHS is known non-zero.
if (Value *V = simplifyValueKnownNonZero(I.getOperand(1), *this, I))
return replaceOperand(I, 1, V);
// Handle cases involving: div/rem X, (select Cond, Y, Z)
if (simplifyDivRemOfSelectWithZeroOp(I))
return &I;
// If the divisor is a select-of-constants, try to constant fold all div ops:
// C div/rem (select Cond, TrueC, FalseC) --> select Cond, (C div/rem TrueC),
// (C div/rem FalseC)
// TODO: Adapt simplifyDivRemOfSelectWithZeroOp to allow this and other folds.
if (match(Op0, m_ImmConstant()) &&
match(Op1, m_Select(m_Value(), m_ImmConstant(), m_ImmConstant()))) {
if (Instruction *R = FoldOpIntoSelect(I, cast<SelectInst>(Op1),
/*FoldWithMultiUse*/ true))
return R;
}
return nullptr;
}
/// This function implements the transforms common to both integer division
/// instructions (udiv and sdiv). It is called by the visitors to those integer
/// division instructions.
/// Common integer divide transforms
Instruction *InstCombinerImpl::commonIDivTransforms(BinaryOperator &I) {
if (Instruction *Res = commonIDivRemTransforms(I))
return Res;
Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
bool IsSigned = I.getOpcode() == Instruction::SDiv;
Type *Ty = I.getType();
const APInt *C2;
if (match(Op1, m_APInt(C2))) {
Value *X;
const APInt *C1;
// (X / C1) / C2 -> X / (C1*C2)
if ((IsSigned && match(Op0, m_SDiv(m_Value(X), m_APInt(C1)))) ||
(!IsSigned && match(Op0, m_UDiv(m_Value(X), m_APInt(C1))))) {
APInt Product(C1->getBitWidth(), /*val=*/0ULL, IsSigned);
if (!multiplyOverflows(*C1, *C2, Product, IsSigned))
return BinaryOperator::Create(I.getOpcode(), X,
ConstantInt::get(Ty, Product));
}
APInt Quotient(C2->getBitWidth(), /*val=*/0ULL, IsSigned);
if ((IsSigned && match(Op0, m_NSWMul(m_Value(X), m_APInt(C1)))) ||
(!IsSigned && match(Op0, m_NUWMul(m_Value(X), m_APInt(C1))))) {
// (X * C1) / C2 -> X / (C2 / C1) if C2 is a multiple of C1.
if (isMultiple(*C2, *C1, Quotient, IsSigned)) {
auto *NewDiv = BinaryOperator::Create(I.getOpcode(), X,
ConstantInt::get(Ty, Quotient));
NewDiv->setIsExact(I.isExact());
return NewDiv;
}
// (X * C1) / C2 -> X * (C1 / C2) if C1 is a multiple of C2.
if (isMultiple(*C1, *C2, Quotient, IsSigned)) {
auto *Mul = BinaryOperator::Create(Instruction::Mul, X,
ConstantInt::get(Ty, Quotient));
auto *OBO = cast<OverflowingBinaryOperator>(Op0);
Mul->setHasNoUnsignedWrap(!IsSigned && OBO->hasNoUnsignedWrap());
Mul->setHasNoSignedWrap(OBO->hasNoSignedWrap());
return Mul;
}
}
if ((IsSigned && match(Op0, m_NSWShl(m_Value(X), m_APInt(C1))) &&
C1->ult(C1->getBitWidth() - 1)) ||
(!IsSigned && match(Op0, m_NUWShl(m_Value(X), m_APInt(C1))) &&
C1->ult(C1->getBitWidth()))) {
APInt C1Shifted = APInt::getOneBitSet(
C1->getBitWidth(), static_cast<unsigned>(C1->getZExtValue()));
// (X << C1) / C2 -> X / (C2 >> C1) if C2 is a multiple of 1 << C1.
if (isMultiple(*C2, C1Shifted, Quotient, IsSigned)) {
auto *BO = BinaryOperator::Create(I.getOpcode(), X,
ConstantInt::get(Ty, Quotient));
BO->setIsExact(I.isExact());
return BO;
}
// (X << C1) / C2 -> X * ((1 << C1) / C2) if 1 << C1 is a multiple of C2.
if (isMultiple(C1Shifted, *C2, Quotient, IsSigned)) {
auto *Mul = BinaryOperator::Create(Instruction::Mul, X,
ConstantInt::get(Ty, Quotient));
auto *OBO = cast<OverflowingBinaryOperator>(Op0);
Mul->setHasNoUnsignedWrap(!IsSigned && OBO->hasNoUnsignedWrap());
Mul->setHasNoSignedWrap(OBO->hasNoSignedWrap());
return Mul;
}
}
// Distribute div over add to eliminate a matching div/mul pair:
// ((X * C2) + C1) / C2 --> X + C1/C2
// We need a multiple of the divisor for a signed add constant, but
// unsigned is fine with any constant pair.
if (IsSigned &&
match(Op0, m_NSWAddLike(m_NSWMul(m_Value(X), m_SpecificInt(*C2)),
m_APInt(C1))) &&
isMultiple(*C1, *C2, Quotient, IsSigned)) {
return BinaryOperator::CreateNSWAdd(X, ConstantInt::get(Ty, Quotient));
}
if (!IsSigned &&
match(Op0, m_NUWAddLike(m_NUWMul(m_Value(X), m_SpecificInt(*C2)),
m_APInt(C1)))) {
return BinaryOperator::CreateNUWAdd(X,
ConstantInt::get(Ty, C1->udiv(*C2)));
}
if (!C2->isZero()) // avoid X udiv 0
if (Instruction *FoldedDiv = foldBinOpIntoSelectOrPhi(I))
return FoldedDiv;
}
if (match(Op0, m_One())) {
assert(!Ty->isIntOrIntVectorTy(1) && "i1 divide not removed?");
if (IsSigned) {
// 1 / 0 --> undef ; 1 / 1 --> 1 ; 1 / -1 --> -1 ; 1 / anything else --> 0
// (Op1 + 1) u< 3 ? Op1 : 0
// Op1 must be frozen because we are increasing its number of uses.
Value *F1 = Op1;
if (!isGuaranteedNotToBeUndef(Op1))
F1 = Builder.CreateFreeze(Op1, Op1->getName() + ".fr");
Value *Inc = Builder.CreateAdd(F1, Op0);
Value *Cmp = Builder.CreateICmpULT(Inc, ConstantInt::get(Ty, 3));
return SelectInst::Create(Cmp, F1, ConstantInt::get(Ty, 0));
} else {
// If Op1 is 0 then it's undefined behaviour. If Op1 is 1 then the
// result is one, otherwise it's zero.
return new ZExtInst(Builder.CreateICmpEQ(Op1, Op0), Ty);
}
}
// See if we can fold away this div instruction.
if (SimplifyDemandedInstructionBits(I))
return &I;
// (X - (X rem Y)) / Y -> X / Y; usually originates as ((X / Y) * Y) / Y
Value *X, *Z;
if (match(Op0, m_Sub(m_Value(X), m_Value(Z)))) // (X - Z) / Y; Y = Op1
if ((IsSigned && match(Z, m_SRem(m_Specific(X), m_Specific(Op1)))) ||
(!IsSigned && match(Z, m_URem(m_Specific(X), m_Specific(Op1)))))
return BinaryOperator::Create(I.getOpcode(), X, Op1);
// (X << Y) / X -> 1 << Y
Value *Y;
if (IsSigned && match(Op0, m_NSWShl(m_Specific(Op1), m_Value(Y))))
return BinaryOperator::CreateNSWShl(ConstantInt::get(Ty, 1), Y);
if (!IsSigned && match(Op0, m_NUWShl(m_Specific(Op1), m_Value(Y))))
return BinaryOperator::CreateNUWShl(ConstantInt::get(Ty, 1), Y);
// X / (X * Y) -> 1 / Y if the multiplication does not overflow.
if (match(Op1, m_c_Mul(m_Specific(Op0), m_Value(Y)))) {
bool HasNSW = cast<OverflowingBinaryOperator>(Op1)->hasNoSignedWrap();
bool HasNUW = cast<OverflowingBinaryOperator>(Op1)->hasNoUnsignedWrap();
if ((IsSigned && HasNSW) || (!IsSigned && HasNUW)) {
replaceOperand(I, 0, ConstantInt::get(Ty, 1));
replaceOperand(I, 1, Y);
return &I;
}
}
// (X << Z) / (X * Y) -> (1 << Z) / Y
// TODO: Handle sdiv.
if (!IsSigned && Op1->hasOneUse() &&
match(Op0, m_NUWShl(m_Value(X), m_Value(Z))) &&
match(Op1, m_c_Mul(m_Specific(X), m_Value(Y))))
if (cast<OverflowingBinaryOperator>(Op1)->hasNoUnsignedWrap()) {
Instruction *NewDiv = BinaryOperator::CreateUDiv(
Builder.CreateShl(ConstantInt::get(Ty, 1), Z, "", /*NUW*/ true), Y);
NewDiv->setIsExact(I.isExact());
return NewDiv;
}
if (Value *R = foldIDivShl(I, Builder))
return replaceInstUsesWith(I, R);
// With the appropriate no-wrap constraint, remove a multiply by the divisor
// after peeking through another divide:
// ((Op1 * X) / Y) / Op1 --> X / Y
if (match(Op0, m_BinOp(I.getOpcode(), m_c_Mul(m_Specific(Op1), m_Value(X)),
m_Value(Y)))) {
auto *InnerDiv = cast<PossiblyExactOperator>(Op0);
auto *Mul = cast<OverflowingBinaryOperator>(InnerDiv->getOperand(0));
Instruction *NewDiv = nullptr;
if (!IsSigned && Mul->hasNoUnsignedWrap())
NewDiv = BinaryOperator::CreateUDiv(X, Y);
else if (IsSigned && Mul->hasNoSignedWrap())
NewDiv = BinaryOperator::CreateSDiv(X, Y);
// Exact propagates only if both of the original divides are exact.
if (NewDiv) {
NewDiv->setIsExact(I.isExact() && InnerDiv->isExact());
return NewDiv;
}
}
// (X * Y) / (X * Z) --> Y / Z (and commuted variants)
if (match(Op0, m_Mul(m_Value(X), m_Value(Y)))) {
auto OB0HasNSW = cast<OverflowingBinaryOperator>(Op0)->hasNoSignedWrap();
auto OB0HasNUW = cast<OverflowingBinaryOperator>(Op0)->hasNoUnsignedWrap();
auto CreateDivOrNull = [&](Value *A, Value *B) -> Instruction * {
auto OB1HasNSW = cast<OverflowingBinaryOperator>(Op1)->hasNoSignedWrap();
auto OB1HasNUW =
cast<OverflowingBinaryOperator>(Op1)->hasNoUnsignedWrap();
const APInt *C1, *C2;
if (IsSigned && OB0HasNSW) {
if (OB1HasNSW && match(B, m_APInt(C1)) && !C1->isAllOnes())
return BinaryOperator::CreateSDiv(A, B);
}
if (!IsSigned && OB0HasNUW) {
if (OB1HasNUW)
return BinaryOperator::CreateUDiv(A, B);
if (match(A, m_APInt(C1)) && match(B, m_APInt(C2)) && C2->ule(*C1))
return BinaryOperator::CreateUDiv(A, B);
}
return nullptr;
};
if (match(Op1, m_c_Mul(m_Specific(X), m_Value(Z)))) {
if (auto *Val = CreateDivOrNull(Y, Z))
return Val;
}
if (match(Op1, m_c_Mul(m_Specific(Y), m_Value(Z)))) {
if (auto *Val = CreateDivOrNull(X, Z))
return Val;
}
}
return nullptr;
}
Value *InstCombinerImpl::takeLog2(Value *Op, unsigned Depth, bool AssumeNonZero,
bool DoFold) {
auto IfFold = [DoFold](function_ref<Value *()> Fn) {
if (!DoFold)
return reinterpret_cast<Value *>(-1);
return Fn();
};
// FIXME: assert that Op1 isn't/doesn't contain undef.
// log2(2^C) -> C
if (match(Op, m_Power2()))
return IfFold([&]() {
Constant *C = ConstantExpr::getExactLogBase2(cast<Constant>(Op));
if (!C)
llvm_unreachable("Failed to constant fold udiv -> logbase2");
return C;
});
// The remaining tests are all recursive, so bail out if we hit the limit.
if (Depth++ == MaxAnalysisRecursionDepth)
return nullptr;
// log2(zext X) -> zext log2(X)
// FIXME: Require one use?
Value *X, *Y;
if (match(Op, m_ZExt(m_Value(X))))
if (Value *LogX = takeLog2(X, Depth, AssumeNonZero, DoFold))
return IfFold([&]() { return Builder.CreateZExt(LogX, Op->getType()); });
// log2(trunc x) -> trunc log2(X)
// FIXME: Require one use?
if (match(Op, m_Trunc(m_Value(X)))) {
auto *TI = cast<TruncInst>(Op);
if (AssumeNonZero || TI->hasNoUnsignedWrap())
if (Value *LogX = takeLog2(X, Depth, AssumeNonZero, DoFold))
return IfFold([&]() {
return Builder.CreateTrunc(LogX, Op->getType(), "",
/*IsNUW=*/TI->hasNoUnsignedWrap());
});
}
// log2(X << Y) -> log2(X) + Y
// FIXME: Require one use unless X is 1?
if (match(Op, m_Shl(m_Value(X), m_Value(Y)))) {
auto *BO = cast<OverflowingBinaryOperator>(Op);
// nuw will be set if the `shl` is trivially non-zero.
if (AssumeNonZero || BO->hasNoUnsignedWrap() || BO->hasNoSignedWrap())
if (Value *LogX = takeLog2(X, Depth, AssumeNonZero, DoFold))
return IfFold([&]() { return Builder.CreateAdd(LogX, Y); });
}
// log2(X >>u Y) -> log2(X) - Y
// FIXME: Require one use?
if (match(Op, m_LShr(m_Value(X), m_Value(Y)))) {
auto *PEO = cast<PossiblyExactOperator>(Op);
if (AssumeNonZero || PEO->isExact())
if (Value *LogX = takeLog2(X, Depth, AssumeNonZero, DoFold))
return IfFold([&]() { return Builder.CreateSub(LogX, Y); });
}
// log2(X & Y) -> either log2(X) or log2(Y)
// This requires `AssumeNonZero` as `X & Y` may be zero when X != Y.
if (AssumeNonZero && match(Op, m_And(m_Value(X), m_Value(Y)))) {
if (Value *LogX = takeLog2(X, Depth, AssumeNonZero, DoFold))
return IfFold([&]() { return LogX; });
if (Value *LogY = takeLog2(Y, Depth, AssumeNonZero, DoFold))
return IfFold([&]() { return LogY; });
}
// log2(Cond ? X : Y) -> Cond ? log2(X) : log2(Y)
// FIXME: Require one use?
if (SelectInst *SI = dyn_cast<SelectInst>(Op))
if (Value *LogX = takeLog2(SI->getOperand(1), Depth, AssumeNonZero, DoFold))
if (Value *LogY =
takeLog2(SI->getOperand(2), Depth, AssumeNonZero, DoFold))
return IfFold([&]() {
return Builder.CreateSelect(SI->getOperand(0), LogX, LogY);
});
// log2(umin(X, Y)) -> umin(log2(X), log2(Y))
// log2(umax(X, Y)) -> umax(log2(X), log2(Y))
auto *MinMax = dyn_cast<MinMaxIntrinsic>(Op);
if (MinMax && MinMax->hasOneUse() && !MinMax->isSigned()) {
// Use AssumeNonZero as false here. Otherwise we can hit case where
// log2(umax(X, Y)) != umax(log2(X), log2(Y)) (because overflow).
if (Value *LogX = takeLog2(MinMax->getLHS(), Depth,
/*AssumeNonZero*/ false, DoFold))
if (Value *LogY = takeLog2(MinMax->getRHS(), Depth,
/*AssumeNonZero*/ false, DoFold))
return IfFold([&]() {
return Builder.CreateBinaryIntrinsic(MinMax->getIntrinsicID(), LogX,
LogY);
});
}
return nullptr;
}
/// If we have zero-extended operands of an unsigned div or rem, we may be able
/// to narrow the operation (sink the zext below the math).
static Instruction *narrowUDivURem(BinaryOperator &I,
InstCombinerImpl &IC) {
Instruction::BinaryOps Opcode = I.getOpcode();
Value *N = I.getOperand(0);
Value *D = I.getOperand(1);
Type *Ty = I.getType();
Value *X, *Y;
if (match(N, m_ZExt(m_Value(X))) && match(D, m_ZExt(m_Value(Y))) &&
X->getType() == Y->getType() && (N->hasOneUse() || D->hasOneUse())) {
// udiv (zext X), (zext Y) --> zext (udiv X, Y)
// urem (zext X), (zext Y) --> zext (urem X, Y)
Value *NarrowOp = IC.Builder.CreateBinOp(Opcode, X, Y);
return new ZExtInst(NarrowOp, Ty);
}
Constant *C;
if (isa<Instruction>(N) && match(N, m_OneUse(m_ZExt(m_Value(X)))) &&
match(D, m_Constant(C))) {
// If the constant is the same in the smaller type, use the narrow version.
Constant *TruncC = IC.getLosslessUnsignedTrunc(C, X->getType());
if (!TruncC)
return nullptr;
// udiv (zext X), C --> zext (udiv X, C')
// urem (zext X), C --> zext (urem X, C')
return new ZExtInst(IC.Builder.CreateBinOp(Opcode, X, TruncC), Ty);
}
if (isa<Instruction>(D) && match(D, m_OneUse(m_ZExt(m_Value(X)))) &&
match(N, m_Constant(C))) {
// If the constant is the same in the smaller type, use the narrow version.
Constant *TruncC = IC.getLosslessUnsignedTrunc(C, X->getType());
if (!TruncC)
return nullptr;
// udiv C, (zext X) --> zext (udiv C', X)
// urem C, (zext X) --> zext (urem C', X)
return new ZExtInst(IC.Builder.CreateBinOp(Opcode, TruncC, X), Ty);
}
return nullptr;
}
Instruction *InstCombinerImpl::visitUDiv(BinaryOperator &I) {
if (Value *V = simplifyUDivInst(I.getOperand(0), I.getOperand(1), I.isExact(),
SQ.getWithInstruction(&I)))
return replaceInstUsesWith(I, V);
if (Instruction *X = foldVectorBinop(I))
return X;
// Handle the integer div common cases
if (Instruction *Common = commonIDivTransforms(I))
return Common;
Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
Value *X;
const APInt *C1, *C2;
if (match(Op0, m_LShr(m_Value(X), m_APInt(C1))) && match(Op1, m_APInt(C2))) {
// (X lshr C1) udiv C2 --> X udiv (C2 << C1)
bool Overflow;
APInt C2ShlC1 = C2->ushl_ov(*C1, Overflow);
if (!Overflow) {
bool IsExact = I.isExact() && match(Op0, m_Exact(m_Value()));
BinaryOperator *BO = BinaryOperator::CreateUDiv(
X, ConstantInt::get(X->getType(), C2ShlC1));
if (IsExact)
BO->setIsExact();
return BO;
}
}
// Op0 / C where C is large (negative) --> zext (Op0 >= C)
// TODO: Could use isKnownNegative() to handle non-constant values.
Type *Ty = I.getType();
if (match(Op1, m_Negative())) {
Value *Cmp = Builder.CreateICmpUGE(Op0, Op1);
return CastInst::CreateZExtOrBitCast(Cmp, Ty);
}
// Op0 / (sext i1 X) --> zext (Op0 == -1) (if X is 0, the div is undefined)
if (match(Op1, m_SExt(m_Value(X))) && X->getType()->isIntOrIntVectorTy(1)) {
Value *Cmp = Builder.CreateICmpEQ(Op0, ConstantInt::getAllOnesValue(Ty));
return CastInst::CreateZExtOrBitCast(Cmp, Ty);
}
if (Instruction *NarrowDiv = narrowUDivURem(I, *this))
return NarrowDiv;
Value *A, *B;
// Look through a right-shift to find the common factor:
// ((Op1 *nuw A) >> B) / Op1 --> A >> B
if (match(Op0, m_LShr(m_NUWMul(m_Specific(Op1), m_Value(A)), m_Value(B))) ||
match(Op0, m_LShr(m_NUWMul(m_Value(A), m_Specific(Op1)), m_Value(B)))) {
Instruction *Lshr = BinaryOperator::CreateLShr(A, B);
if (I.isExact() && cast<PossiblyExactOperator>(Op0)->isExact())
Lshr->setIsExact();
return Lshr;
}
auto GetShiftableDenom = [&](Value *Denom) -> Value * {
// Op0 udiv Op1 -> Op0 lshr log2(Op1), if log2() folds away.
if (Value *Log2 = tryGetLog2(Op1, /*AssumeNonZero=*/true))
return Log2;
// Op0 udiv Op1 -> Op0 lshr cttz(Op1), if Op1 is a power of 2.
if (isKnownToBeAPowerOfTwo(Denom, /*OrZero=*/true, &I))
// This will increase instruction count but it's okay
// since bitwise operations are substantially faster than
// division.
return Builder.CreateBinaryIntrinsic(Intrinsic::cttz, Denom,
Builder.getTrue());
return nullptr;
};
if (auto *Res = GetShiftableDenom(Op1))
return replaceInstUsesWith(
I, Builder.CreateLShr(Op0, Res, I.getName(), I.isExact()));
return nullptr;
}
Instruction *InstCombinerImpl::visitSDiv(BinaryOperator &I) {
if (Value *V = simplifySDivInst(I.getOperand(0), I.getOperand(1), I.isExact(),
SQ.getWithInstruction(&I)))
return replaceInstUsesWith(I, V);
if (Instruction *X = foldVectorBinop(I))
return X;
// Handle the integer div common cases
if (Instruction *Common = commonIDivTransforms(I))
return Common;
Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
Type *Ty = I.getType();
Value *X;
// sdiv Op0, -1 --> -Op0
// sdiv Op0, (sext i1 X) --> -Op0 (because if X is 0, the op is undefined)
if (match(Op1, m_AllOnes()) ||
(match(Op1, m_SExt(m_Value(X))) && X->getType()->isIntOrIntVectorTy(1)))
return BinaryOperator::CreateNSWNeg(Op0);
// X / INT_MIN --> X == INT_MIN
if (match(Op1, m_SignMask()))
return new ZExtInst(Builder.CreateICmpEQ(Op0, Op1), Ty);
if (I.isExact()) {
// sdiv exact X, 1<<C --> ashr exact X, C iff 1<<C is non-negative
if (match(Op1, m_Power2()) && match(Op1, m_NonNegative())) {
Constant *C = ConstantExpr::getExactLogBase2(cast<Constant>(Op1));
return BinaryOperator::CreateExactAShr(Op0, C);
}
// sdiv exact X, (1<<ShAmt) --> ashr exact X, ShAmt (if shl is non-negative)
Value *ShAmt;
if (match(Op1, m_NSWShl(m_One(), m_Value(ShAmt))))
return BinaryOperator::CreateExactAShr(Op0, ShAmt);
// sdiv exact X, -1<<C --> -(ashr exact X, C)
if (match(Op1, m_NegatedPower2())) {
Constant *NegPow2C = ConstantExpr::getNeg(cast<Constant>(Op1));
Constant *C = ConstantExpr::getExactLogBase2(NegPow2C);
Value *Ashr = Builder.CreateAShr(Op0, C, I.getName() + ".neg", true);
return BinaryOperator::CreateNSWNeg(Ashr);
}
}
const APInt *Op1C;
if (match(Op1, m_APInt(Op1C))) {
// If the dividend is sign-extended and the constant divisor is small enough
// to fit in the source type, shrink the division to the narrower type:
// (sext X) sdiv C --> sext (X sdiv C)
Value *Op0Src;
if (match(Op0, m_OneUse(m_SExt(m_Value(Op0Src)))) &&
Op0Src->getType()->getScalarSizeInBits() >=
Op1C->getSignificantBits()) {
// In the general case, we need to make sure that the dividend is not the
// minimum signed value because dividing that by -1 is UB. But here, we
// know that the -1 divisor case is already handled above.
Constant *NarrowDivisor =
ConstantExpr::getTrunc(cast<Constant>(Op1), Op0Src->getType());
Value *NarrowOp = Builder.CreateSDiv(Op0Src, NarrowDivisor);
return new SExtInst(NarrowOp, Ty);
}
// -X / C --> X / -C (if the negation doesn't overflow).
// TODO: This could be enhanced to handle arbitrary vector constants by
// checking if all elements are not the min-signed-val.
if (!Op1C->isMinSignedValue() && match(Op0, m_NSWNeg(m_Value(X)))) {
Constant *NegC = ConstantInt::get(Ty, -(*Op1C));
Instruction *BO = BinaryOperator::CreateSDiv(X, NegC);
BO->setIsExact(I.isExact());
return BO;
}
}
// -X / Y --> -(X / Y)
Value *Y;
if (match(&I, m_SDiv(m_OneUse(m_NSWNeg(m_Value(X))), m_Value(Y))))
return BinaryOperator::CreateNSWNeg(
Builder.CreateSDiv(X, Y, I.getName(), I.isExact()));
// abs(X) / X --> X > -1 ? 1 : -1
// X / abs(X) --> X > -1 ? 1 : -1
if (match(&I, m_c_BinOp(
m_OneUse(m_Intrinsic<Intrinsic::abs>(m_Value(X), m_One())),
m_Deferred(X)))) {
Value *Cond = Builder.CreateIsNotNeg(X);
return SelectInst::Create(Cond, ConstantInt::get(Ty, 1),
ConstantInt::getAllOnesValue(Ty));
}
KnownBits KnownDividend = computeKnownBits(Op0, &I);
if (!I.isExact() &&
(match(Op1, m_Power2(Op1C)) || match(Op1, m_NegatedPower2(Op1C))) &&
KnownDividend.countMinTrailingZeros() >= Op1C->countr_zero()) {
I.setIsExact();
return &I;
}
if (KnownDividend.isNonNegative()) {
// If both operands are unsigned, turn this into a udiv.
if (isKnownNonNegative(Op1, SQ.getWithInstruction(&I))) {
auto *BO = BinaryOperator::CreateUDiv(Op0, Op1, I.getName());
BO->setIsExact(I.isExact());
return BO;
}
if (match(Op1, m_NegatedPower2())) {
// X sdiv (-(1 << C)) -> -(X sdiv (1 << C)) ->
// -> -(X udiv (1 << C)) -> -(X u>> C)
Constant *CNegLog2 = ConstantExpr::getExactLogBase2(
ConstantExpr::getNeg(cast<Constant>(Op1)));
Value *Shr = Builder.CreateLShr(Op0, CNegLog2, I.getName(), I.isExact());
return BinaryOperator::CreateNeg(Shr);
}
if (isKnownToBeAPowerOfTwo(Op1, /*OrZero*/ true, &I)) {
// X sdiv (1 << Y) -> X udiv (1 << Y) ( -> X u>> Y)
// Safe because the only negative value (1 << Y) can take on is
// INT_MIN, and X sdiv INT_MIN == X udiv INT_MIN == 0 if X doesn't have
// the sign bit set.
auto *BO = BinaryOperator::CreateUDiv(Op0, Op1, I.getName());
BO->setIsExact(I.isExact());
return BO;
}
}
// -X / X --> X == INT_MIN ? 1 : -1
if (isKnownNegation(Op0, Op1)) {
APInt MinVal = APInt::getSignedMinValue(Ty->getScalarSizeInBits());
Value *Cond = Builder.CreateICmpEQ(Op0, ConstantInt::get(Ty, MinVal));
return SelectInst::Create(Cond, ConstantInt::get(Ty, 1),
ConstantInt::getAllOnesValue(Ty));
}
return nullptr;
}
/// Remove negation and try to convert division into multiplication.
Instruction *InstCombinerImpl::foldFDivConstantDivisor(BinaryOperator &I) {
Constant *C;
if (!match(I.getOperand(1), m_Constant(C)))
return nullptr;
// -X / C --> X / -C
Value *X;
const DataLayout &DL = I.getDataLayout();
if (match(I.getOperand(0), m_FNeg(m_Value(X))))
if (Constant *NegC = ConstantFoldUnaryOpOperand(Instruction::FNeg, C, DL))
return BinaryOperator::CreateFDivFMF(X, NegC, &I);
// nnan X / +0.0 -> copysign(inf, X)
// nnan nsz X / -0.0 -> copysign(inf, X)
if (I.hasNoNaNs() &&
(match(I.getOperand(1), m_PosZeroFP()) ||
(I.hasNoSignedZeros() && match(I.getOperand(1), m_AnyZeroFP())))) {
IRBuilder<> B(&I);
CallInst *CopySign = B.CreateIntrinsic(
Intrinsic::copysign, {C->getType()},
{ConstantFP::getInfinity(I.getType()), I.getOperand(0)}, &I);
CopySign->takeName(&I);
return replaceInstUsesWith(I, CopySign);
}
// If the constant divisor has an exact inverse, this is always safe. If not,
// then we can still create a reciprocal if fast-math-flags allow it and the
// constant is a regular number (not zero, infinite, or denormal).
if (!(C->hasExactInverseFP() || (I.hasAllowReciprocal() && C->isNormalFP())))
return nullptr;
// Disallow denormal constants because we don't know what would happen
// on all targets.
// TODO: Use Intrinsic::canonicalize or let function attributes tell us that
// denorms are flushed?
auto *RecipC = ConstantFoldBinaryOpOperands(
Instruction::FDiv, ConstantFP::get(I.getType(), 1.0), C, DL);
if (!RecipC || !RecipC->isNormalFP())
return nullptr;
// X / C --> X * (1 / C)
return BinaryOperator::CreateFMulFMF(I.getOperand(0), RecipC, &I);
}
/// Remove negation and try to reassociate constant math.
static Instruction *foldFDivConstantDividend(BinaryOperator &I) {
Constant *C;
if (!match(I.getOperand(0), m_Constant(C)))
return nullptr;
// C / -X --> -C / X
Value *X;
const DataLayout &DL = I.getDataLayout();
if (match(I.getOperand(1), m_FNeg(m_Value(X))))
if (Constant *NegC = ConstantFoldUnaryOpOperand(Instruction::FNeg, C, DL))
return BinaryOperator::CreateFDivFMF(NegC, X, &I);
if (!I.hasAllowReassoc() || !I.hasAllowReciprocal())
return nullptr;
// Try to reassociate C / X expressions where X includes another constant.
Constant *C2, *NewC = nullptr;
if (match(I.getOperand(1), m_FMul(m_Value(X), m_Constant(C2)))) {
// C / (X * C2) --> (C / C2) / X
NewC = ConstantFoldBinaryOpOperands(Instruction::FDiv, C, C2, DL);
} else if (match(I.getOperand(1), m_FDiv(m_Value(X), m_Constant(C2)))) {
// C / (X / C2) --> (C * C2) / X
NewC = ConstantFoldBinaryOpOperands(Instruction::FMul, C, C2, DL);
}
// Disallow denormal constants because we don't know what would happen
// on all targets.
// TODO: Use Intrinsic::canonicalize or let function attributes tell us that
// denorms are flushed?
if (!NewC || !NewC->isNormalFP())
return nullptr;
return BinaryOperator::CreateFDivFMF(NewC, X, &I);
}
/// Negate the exponent of pow/exp to fold division-by-pow() into multiply.
static Instruction *foldFDivPowDivisor(BinaryOperator &I,
InstCombiner::BuilderTy &Builder) {
Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
auto *II = dyn_cast<IntrinsicInst>(Op1);
if (!II || !II->hasOneUse() || !I.hasAllowReassoc() ||
!I.hasAllowReciprocal())
return nullptr;
// Z / pow(X, Y) --> Z * pow(X, -Y)
// Z / exp{2}(Y) --> Z * exp{2}(-Y)
// In the general case, this creates an extra instruction, but fmul allows
// for better canonicalization and optimization than fdiv.
Intrinsic::ID IID = II->getIntrinsicID();
SmallVector<Value *> Args;
switch (IID) {
case Intrinsic::pow:
Args.push_back(II->getArgOperand(0));
Args.push_back(Builder.CreateFNegFMF(II->getArgOperand(1), &I));
break;
case Intrinsic::powi: {
// Require 'ninf' assuming that makes powi(X, -INT_MIN) acceptable.
// That is, X ** (huge negative number) is 0.0, ~1.0, or INF and so
// dividing by that is INF, ~1.0, or 0.0. Code that uses powi allows
// non-standard results, so this corner case should be acceptable if the
// code rules out INF values.
if (!I.hasNoInfs())
return nullptr;
Args.push_back(II->getArgOperand(0));
Args.push_back(Builder.CreateNeg(II->getArgOperand(1)));
Type *Tys[] = {I.getType(), II->getArgOperand(1)->getType()};
Value *Pow = Builder.CreateIntrinsic(IID, Tys, Args, &I);
return BinaryOperator::CreateFMulFMF(Op0, Pow, &I);
}
case Intrinsic::exp:
case Intrinsic::exp2:
Args.push_back(Builder.CreateFNegFMF(II->getArgOperand(0), &I));
break;
default:
return nullptr;
}
Value *Pow = Builder.CreateIntrinsic(IID, I.getType(), Args, &I);
return BinaryOperator::CreateFMulFMF(Op0, Pow, &I);
}
/// Convert div to mul if we have an sqrt divisor iff sqrt's operand is a fdiv
/// instruction.
static Instruction *foldFDivSqrtDivisor(BinaryOperator &I,
InstCombiner::BuilderTy &Builder) {
// X / sqrt(Y / Z) --> X * sqrt(Z / Y)
if (!I.hasAllowReassoc() || !I.hasAllowReciprocal())
return nullptr;
Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
auto *II = dyn_cast<IntrinsicInst>(Op1);
if (!II || II->getIntrinsicID() != Intrinsic::sqrt || !II->hasOneUse() ||
!II->hasAllowReassoc() || !II->hasAllowReciprocal())
return nullptr;
Value *Y, *Z;
auto *DivOp = dyn_cast<Instruction>(II->getOperand(0));
if (!DivOp)
return nullptr;
if (!match(DivOp, m_FDiv(m_Value(Y), m_Value(Z))))
return nullptr;
if (!DivOp->hasAllowReassoc() || !I.hasAllowReciprocal() ||
!DivOp->hasOneUse())
return nullptr;
Value *SwapDiv = Builder.CreateFDivFMF(Z, Y, DivOp);
Value *NewSqrt =
Builder.CreateUnaryIntrinsic(II->getIntrinsicID(), SwapDiv, II);
return BinaryOperator::CreateFMulFMF(Op0, NewSqrt, &I);
}
// Change
// X = 1/sqrt(a)
// R1 = X * X
// R2 = a * X
//
// TO
//
// FDiv = 1/a
// FSqrt = sqrt(a)
// FMul = FDiv * FSqrt
// Replace Uses Of R1 With FDiv
// Replace Uses Of R2 With FSqrt
// Replace Uses Of X With FMul
static Instruction *
convertFSqrtDivIntoFMul(CallInst *CI, Instruction *X,
const SmallPtrSetImpl<Instruction *> &R1,
const SmallPtrSetImpl<Instruction *> &R2,
InstCombiner::BuilderTy &B, InstCombinerImpl *IC) {
B.SetInsertPoint(X);
// Have an instruction that is representative of all of instructions in R1 and
// get the most common fpmath metadata and fast-math flags on it.
Value *SqrtOp = CI->getArgOperand(0);
auto *FDiv = cast<Instruction>(
B.CreateFDiv(ConstantFP::get(X->getType(), 1.0), SqrtOp));
auto *R1FPMathMDNode = (*R1.begin())->getMetadata(LLVMContext::MD_fpmath);
FastMathFlags R1FMF = (*R1.begin())->getFastMathFlags(); // Common FMF
for (Instruction *I : R1) {
R1FPMathMDNode = MDNode::getMostGenericFPMath(
R1FPMathMDNode, I->getMetadata(LLVMContext::MD_fpmath));
R1FMF &= I->getFastMathFlags();
IC->replaceInstUsesWith(*I, FDiv);
IC->eraseInstFromFunction(*I);
}
FDiv->setMetadata(LLVMContext::MD_fpmath, R1FPMathMDNode);
FDiv->copyFastMathFlags(R1FMF);
// Have a single sqrt call instruction that is representative of all of
// instructions in R2 and get the most common fpmath metadata and fast-math
// flags on it.
auto *FSqrt = cast<CallInst>(CI->clone());
FSqrt->insertBefore(CI->getIterator());
auto *R2FPMathMDNode = (*R2.begin())->getMetadata(LLVMContext::MD_fpmath);
FastMathFlags R2FMF = (*R2.begin())->getFastMathFlags(); // Common FMF
for (Instruction *I : R2) {
R2FPMathMDNode = MDNode::getMostGenericFPMath(
R2FPMathMDNode, I->getMetadata(LLVMContext::MD_fpmath));
R2FMF &= I->getFastMathFlags();
IC->replaceInstUsesWith(*I, FSqrt);
IC->eraseInstFromFunction(*I);
}
FSqrt->setMetadata(LLVMContext::MD_fpmath, R2FPMathMDNode);
FSqrt->copyFastMathFlags(R2FMF);
Instruction *FMul;
// If X = -1/sqrt(a) initially,then FMul = -(FDiv * FSqrt)
if (match(X, m_FDiv(m_SpecificFP(-1.0), m_Specific(CI)))) {
Value *Mul = B.CreateFMul(FDiv, FSqrt);
FMul = cast<Instruction>(B.CreateFNeg(Mul));
} else
FMul = cast<Instruction>(B.CreateFMul(FDiv, FSqrt));
FMul->copyMetadata(*X);
FMul->copyFastMathFlags(FastMathFlags::intersectRewrite(R1FMF, R2FMF) |
FastMathFlags::unionValue(R1FMF, R2FMF));
return IC->replaceInstUsesWith(*X, FMul);
}
Instruction *InstCombinerImpl::visitFDiv(BinaryOperator &I) {
Module *M = I.getModule();
if (Value *V = simplifyFDivInst(I.getOperand(0), I.getOperand(1),
I.getFastMathFlags(),
SQ.getWithInstruction(&I)))
return replaceInstUsesWith(I, V);
if (Instruction *X = foldVectorBinop(I))
return X;
if (Instruction *Phi = foldBinopWithPhiOperands(I))
return Phi;
if (Instruction *R = foldFDivConstantDivisor(I))
return R;
if (Instruction *R = foldFDivConstantDividend(I))
return R;
if (Instruction *R = foldFPSignBitOps(I))
return R;
Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
// Convert
// x = 1.0/sqrt(a)
// r1 = x * x;
// r2 = a/sqrt(a);
//
// TO
//
// r1 = 1/a
// r2 = sqrt(a)
// x = r1 * r2
SmallPtrSet<Instruction *, 2> R1, R2;
if (isFSqrtDivToFMulLegal(&I, R1, R2)) {
CallInst *CI = cast<CallInst>(I.getOperand(1));
if (Instruction *D = convertFSqrtDivIntoFMul(CI, &I, R1, R2, Builder, this))
return D;
}
if (isa<Constant>(Op0))
if (SelectInst *SI = dyn_cast<SelectInst>(Op1))
if (Instruction *R = FoldOpIntoSelect(I, SI))
return R;
if (isa<Constant>(Op1))
if (SelectInst *SI = dyn_cast<SelectInst>(Op0))
if (Instruction *R = FoldOpIntoSelect(I, SI))
return R;
if (I.hasAllowReassoc() && I.hasAllowReciprocal()) {
Value *X, *Y;
if (match(Op0, m_OneUse(m_FDiv(m_Value(X), m_Value(Y)))) &&
(!isa<Constant>(Y) || !isa<Constant>(Op1))) {
// (X / Y) / Z => X / (Y * Z)
Value *YZ = Builder.CreateFMulFMF(Y, Op1, &I);
return BinaryOperator::CreateFDivFMF(X, YZ, &I);
}
if (match(Op1, m_OneUse(m_FDiv(m_Value(X), m_Value(Y)))) &&
(!isa<Constant>(Y) || !isa<Constant>(Op0))) {
// Z / (X / Y) => (Y * Z) / X
Value *YZ = Builder.CreateFMulFMF(Y, Op0, &I);
return BinaryOperator::CreateFDivFMF(YZ, X, &I);
}
// Z / (1.0 / Y) => (Y * Z)
//
// This is a special case of Z / (X / Y) => (Y * Z) / X, with X = 1.0. The
// m_OneUse check is avoided because even in the case of the multiple uses
// for 1.0/Y, the number of instructions remain the same and a division is
// replaced by a multiplication.
if (match(Op1, m_FDiv(m_SpecificFP(1.0), m_Value(Y))))
return BinaryOperator::CreateFMulFMF(Y, Op0, &I);
}
if (I.hasAllowReassoc() && Op0->hasOneUse() && Op1->hasOneUse()) {
// sin(X) / cos(X) -> tan(X)
// cos(X) / sin(X) -> 1/tan(X) (cotangent)
Value *X;
bool IsTan = match(Op0, m_Intrinsic<Intrinsic::sin>(m_Value(X))) &&
match(Op1, m_Intrinsic<Intrinsic::cos>(m_Specific(X)));
bool IsCot =
!IsTan && match(Op0, m_Intrinsic<Intrinsic::cos>(m_Value(X))) &&
match(Op1, m_Intrinsic<Intrinsic::sin>(m_Specific(X)));
if ((IsTan || IsCot) && hasFloatFn(M, &TLI, I.getType(), LibFunc_tan,
LibFunc_tanf, LibFunc_tanl)) {
IRBuilder<> B(&I);
IRBuilder<>::FastMathFlagGuard FMFGuard(B);
B.setFastMathFlags(I.getFastMathFlags());
AttributeList Attrs =
cast<CallBase>(Op0)->getCalledFunction()->getAttributes();
Value *Res = emitUnaryFloatFnCall(X, &TLI, LibFunc_tan, LibFunc_tanf,
LibFunc_tanl, B, Attrs);
if (IsCot)
Res = B.CreateFDiv(ConstantFP::get(I.getType(), 1.0), Res);
return replaceInstUsesWith(I, Res);
}
}
// X / (X * Y) --> 1.0 / Y
// Reassociate to (X / X -> 1.0) is legal when NaNs are not allowed.
// We can ignore the possibility that X is infinity because INF/INF is NaN.
Value *X, *Y;
if (I.hasNoNaNs() && I.hasAllowReassoc() &&
match(Op1, m_c_FMul(m_Specific(Op0), m_Value(Y)))) {
replaceOperand(I, 0, ConstantFP::get(I.getType(), 1.0));
replaceOperand(I, 1, Y);
return &I;
}
// X / fabs(X) -> copysign(1.0, X)
// fabs(X) / X -> copysign(1.0, X)
if (I.hasNoNaNs() && I.hasNoInfs() &&
(match(&I, m_FDiv(m_Value(X), m_FAbs(m_Deferred(X)))) ||
match(&I, m_FDiv(m_FAbs(m_Value(X)), m_Deferred(X))))) {
Value *V = Builder.CreateBinaryIntrinsic(
Intrinsic::copysign, ConstantFP::get(I.getType(), 1.0), X, &I);
return replaceInstUsesWith(I, V);
}
if (Instruction *Mul = foldFDivPowDivisor(I, Builder))
return Mul;
if (Instruction *Mul = foldFDivSqrtDivisor(I, Builder))
return Mul;
// pow(X, Y) / X --> pow(X, Y-1)
if (I.hasAllowReassoc() &&
match(Op0, m_OneUse(m_Intrinsic<Intrinsic::pow>(m_Specific(Op1),
m_Value(Y))))) {
Value *Y1 =
Builder.CreateFAddFMF(Y, ConstantFP::get(I.getType(), -1.0), &I);
Value *Pow = Builder.CreateBinaryIntrinsic(Intrinsic::pow, Op1, Y1, &I);
return replaceInstUsesWith(I, Pow);
}
if (Instruction *FoldedPowi = foldPowiReassoc(I))
return FoldedPowi;
return nullptr;
}
// Variety of transform for:
// (urem/srem (mul X, Y), (mul X, Z))
// (urem/srem (shl X, Y), (shl X, Z))
// (urem/srem (shl Y, X), (shl Z, X))
// NB: The shift cases are really just extensions of the mul case. We treat
// shift as Val * (1 << Amt).
static Instruction *simplifyIRemMulShl(BinaryOperator &I,
InstCombinerImpl &IC) {
Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1), *X = nullptr;
APInt Y, Z;
bool ShiftByX = false;
// If V is not nullptr, it will be matched using m_Specific.
auto MatchShiftOrMulXC = [](Value *Op, Value *&V, APInt &C,
bool &PreserveNSW) -> bool {
const APInt *Tmp = nullptr;
if ((!V && match(Op, m_Mul(m_Value(V), m_APInt(Tmp)))) ||
(V && match(Op, m_Mul(m_Specific(V), m_APInt(Tmp)))))
C = *Tmp;
else if ((!V && match(Op, m_Shl(m_Value(V), m_APInt(Tmp)))) ||
(V && match(Op, m_Shl(m_Specific(V), m_APInt(Tmp))))) {
C = APInt(Tmp->getBitWidth(), 1) << *Tmp;
// We cannot preserve NSW when shifting by BW - 1.
PreserveNSW = Tmp->ult(Tmp->getBitWidth() - 1);
}
if (Tmp != nullptr)
return true;
// Reset `V` so we don't start with specific value on next match attempt.
V = nullptr;
return false;
};
auto MatchShiftCX = [](Value *Op, APInt &C, Value *&V) -> bool {
const APInt *Tmp = nullptr;
if ((!V && match(Op, m_Shl(m_APInt(Tmp), m_Value(V)))) ||
(V && match(Op, m_Shl(m_APInt(Tmp), m_Specific(V))))) {
C = *Tmp;
return true;
}
// Reset `V` so we don't start with specific value on next match attempt.
V = nullptr;
return false;
};
bool Op0PreserveNSW = true, Op1PreserveNSW = true;
if (MatchShiftOrMulXC(Op0, X, Y, Op0PreserveNSW) &&
MatchShiftOrMulXC(Op1, X, Z, Op1PreserveNSW)) {
// pass
} else if (MatchShiftCX(Op0, Y, X) && MatchShiftCX(Op1, Z, X)) {
ShiftByX = true;
} else {
return nullptr;
}
bool IsSRem = I.getOpcode() == Instruction::SRem;
OverflowingBinaryOperator *BO0 = cast<OverflowingBinaryOperator>(Op0);
// TODO: We may be able to deduce more about nsw/nuw of BO0/BO1 based on Y >=
// Z or Z >= Y.
bool BO0HasNSW = Op0PreserveNSW && BO0->hasNoSignedWrap();
bool BO0HasNUW = BO0->hasNoUnsignedWrap();
bool BO0NoWrap = IsSRem ? BO0HasNSW : BO0HasNUW;
APInt RemYZ = IsSRem ? Y.srem(Z) : Y.urem(Z);
// (rem (mul nuw/nsw X, Y), (mul X, Z))
// if (rem Y, Z) == 0
// -> 0
if (RemYZ.isZero() && BO0NoWrap)
return IC.replaceInstUsesWith(I, ConstantInt::getNullValue(I.getType()));
// Helper function to emit either (RemSimplificationC << X) or
// (RemSimplificationC * X) depending on whether we matched Op0/Op1 as
// (shl V, X) or (mul V, X) respectively.
auto CreateMulOrShift =
[&](const APInt &RemSimplificationC) -> BinaryOperator * {
Value *RemSimplification =
ConstantInt::get(I.getType(), RemSimplificationC);
return ShiftByX ? BinaryOperator::CreateShl(RemSimplification, X)
: BinaryOperator::CreateMul(X, RemSimplification);
};
OverflowingBinaryOperator *BO1 = cast<OverflowingBinaryOperator>(Op1);
bool BO1HasNSW = Op1PreserveNSW && BO1->hasNoSignedWrap();
bool BO1HasNUW = BO1->hasNoUnsignedWrap();
bool BO1NoWrap = IsSRem ? BO1HasNSW : BO1HasNUW;
// (rem (mul X, Y), (mul nuw/nsw X, Z))
// if (rem Y, Z) == Y
// -> (mul nuw/nsw X, Y)
if (RemYZ == Y && BO1NoWrap) {
BinaryOperator *BO = CreateMulOrShift(Y);
// Copy any overflow flags from Op0.
BO->setHasNoSignedWrap(IsSRem || BO0HasNSW);
BO->setHasNoUnsignedWrap(!IsSRem || BO0HasNUW);
return BO;
}
// (rem (mul nuw/nsw X, Y), (mul {nsw} X, Z))
// if Y >= Z
// -> (mul {nuw} nsw X, (rem Y, Z))
if (Y.uge(Z) && (IsSRem ? (BO0HasNSW && BO1HasNSW) : BO0HasNUW)) {
BinaryOperator *BO = CreateMulOrShift(RemYZ);
BO->setHasNoSignedWrap();
BO->setHasNoUnsignedWrap(BO0HasNUW);
return BO;
}
return nullptr;
}
/// This function implements the transforms common to both integer remainder
/// instructions (urem and srem). It is called by the visitors to those integer
/// remainder instructions.
/// Common integer remainder transforms
Instruction *InstCombinerImpl::commonIRemTransforms(BinaryOperator &I) {
if (Instruction *Res = commonIDivRemTransforms(I))
return Res;
Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
if (isa<Constant>(Op1)) {
if (Instruction *Op0I = dyn_cast<Instruction>(Op0)) {
if (SelectInst *SI = dyn_cast<SelectInst>(Op0I)) {
if (Instruction *R = FoldOpIntoSelect(I, SI))
return R;
} else if (auto *PN = dyn_cast<PHINode>(Op0I)) {
const APInt *Op1Int;
if (match(Op1, m_APInt(Op1Int)) && !Op1Int->isMinValue() &&
(I.getOpcode() == Instruction::URem ||
!Op1Int->isMinSignedValue())) {
// foldOpIntoPhi will speculate instructions to the end of the PHI's
// predecessor blocks, so do this only if we know the srem or urem
// will not fault.
if (Instruction *NV = foldOpIntoPhi(I, PN))
return NV;
}
}
// See if we can fold away this rem instruction.
if (SimplifyDemandedInstructionBits(I))
return &I;
}
}
if (Instruction *R = simplifyIRemMulShl(I, *this))
return R;
return nullptr;
}
Instruction *InstCombinerImpl::visitURem(BinaryOperator &I) {
if (Value *V = simplifyURemInst(I.getOperand(0), I.getOperand(1),
SQ.getWithInstruction(&I)))
return replaceInstUsesWith(I, V);
if (Instruction *X = foldVectorBinop(I))
return X;
if (Instruction *common = commonIRemTransforms(I))
return common;
if (Instruction *NarrowRem = narrowUDivURem(I, *this))
return NarrowRem;
// X urem Y -> X and Y-1, where Y is a power of 2,
Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
Type *Ty = I.getType();
if (isKnownToBeAPowerOfTwo(Op1, /*OrZero*/ true, &I)) {
// This may increase instruction count, we don't enforce that Y is a
// constant.
Constant *N1 = Constant::getAllOnesValue(Ty);
Value *Add = Builder.CreateAdd(Op1, N1);
return BinaryOperator::CreateAnd(Op0, Add);
}
// 1 urem X -> zext(X != 1)
if (match(Op0, m_One())) {
Value *Cmp = Builder.CreateICmpNE(Op1, ConstantInt::get(Ty, 1));
return CastInst::CreateZExtOrBitCast(Cmp, Ty);
}
// Op0 urem C -> Op0 < C ? Op0 : Op0 - C, where C >= signbit.
// Op0 must be frozen because we are increasing its number of uses.
if (match(Op1, m_Negative())) {
Value *F0 = Op0;
if (!isGuaranteedNotToBeUndef(Op0))
F0 = Builder.CreateFreeze(Op0, Op0->getName() + ".fr");
Value *Cmp = Builder.CreateICmpULT(F0, Op1);
Value *Sub = Builder.CreateSub(F0, Op1);
return SelectInst::Create(Cmp, F0, Sub);
}
// If the divisor is a sext of a boolean, then the divisor must be max
// unsigned value (-1). Therefore, the remainder is Op0 unless Op0 is also
// max unsigned value. In that case, the remainder is 0:
// urem Op0, (sext i1 X) --> (Op0 == -1) ? 0 : Op0
Value *X;
if (match(Op1, m_SExt(m_Value(X))) && X->getType()->isIntOrIntVectorTy(1)) {
Value *FrozenOp0 = Op0;
if (!isGuaranteedNotToBeUndef(Op0))
FrozenOp0 = Builder.CreateFreeze(Op0, Op0->getName() + ".frozen");
Value *Cmp =
Builder.CreateICmpEQ(FrozenOp0, ConstantInt::getAllOnesValue(Ty));
return SelectInst::Create(Cmp, ConstantInt::getNullValue(Ty), FrozenOp0);
}
// For "(X + 1) % Op1" and if (X u< Op1) => (X + 1) == Op1 ? 0 : X + 1 .
if (match(Op0, m_Add(m_Value(X), m_One()))) {
Value *Val =
simplifyICmpInst(ICmpInst::ICMP_ULT, X, Op1, SQ.getWithInstruction(&I));
if (Val && match(Val, m_One())) {
Value *FrozenOp0 = Op0;
if (!isGuaranteedNotToBeUndef(Op0))
FrozenOp0 = Builder.CreateFreeze(Op0, Op0->getName() + ".frozen");
Value *Cmp = Builder.CreateICmpEQ(FrozenOp0, Op1);
return SelectInst::Create(Cmp, ConstantInt::getNullValue(Ty), FrozenOp0);
}
}
return nullptr;
}
Instruction *InstCombinerImpl::visitSRem(BinaryOperator &I) {
if (Value *V = simplifySRemInst(I.getOperand(0), I.getOperand(1),
SQ.getWithInstruction(&I)))
return replaceInstUsesWith(I, V);
if (Instruction *X = foldVectorBinop(I))
return X;
// Handle the integer rem common cases
if (Instruction *Common = commonIRemTransforms(I))
return Common;
Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
{
const APInt *Y;
// X % -Y -> X % Y
if (match(Op1, m_Negative(Y)) && !Y->isMinSignedValue())
return replaceOperand(I, 1, ConstantInt::get(I.getType(), -*Y));
}
// -X srem Y --> -(X srem Y)
Value *X, *Y;
if (match(&I, m_SRem(m_OneUse(m_NSWNeg(m_Value(X))), m_Value(Y))))
return BinaryOperator::CreateNSWNeg(Builder.CreateSRem(X, Y));
// If the sign bits of both operands are zero (i.e. we can prove they are
// unsigned inputs), turn this into a urem.
APInt Mask(APInt::getSignMask(I.getType()->getScalarSizeInBits()));
if (MaskedValueIsZero(Op1, Mask, &I) && MaskedValueIsZero(Op0, Mask, &I)) {
// X srem Y -> X urem Y, iff X and Y don't have sign bit set
return BinaryOperator::CreateURem(Op0, Op1, I.getName());
}
// If it's a constant vector, flip any negative values positive.
if (isa<ConstantVector>(Op1) || isa<ConstantDataVector>(Op1)) {
Constant *C = cast<Constant>(Op1);
unsigned VWidth = cast<FixedVectorType>(C->getType())->getNumElements();
bool hasNegative = false;
bool hasMissing = false;
for (unsigned i = 0; i != VWidth; ++i) {
Constant *Elt = C->getAggregateElement(i);
if (!Elt) {
hasMissing = true;
break;
}
if (ConstantInt *RHS = dyn_cast<ConstantInt>(Elt))
if (RHS->isNegative())
hasNegative = true;
}
if (hasNegative && !hasMissing) {
SmallVector<Constant *, 16> Elts(VWidth);
for (unsigned i = 0; i != VWidth; ++i) {
Elts[i] = C->getAggregateElement(i); // Handle undef, etc.
if (ConstantInt *RHS = dyn_cast<ConstantInt>(Elts[i])) {
if (RHS->isNegative())
Elts[i] = cast<ConstantInt>(ConstantExpr::getNeg(RHS));
}
}
Constant *NewRHSV = ConstantVector::get(Elts);
if (NewRHSV != C) // Don't loop on -MININT
return replaceOperand(I, 1, NewRHSV);
}
}
return nullptr;
}
Instruction *InstCombinerImpl::visitFRem(BinaryOperator &I) {
if (Value *V = simplifyFRemInst(I.getOperand(0), I.getOperand(1),
I.getFastMathFlags(),
SQ.getWithInstruction(&I)))
return replaceInstUsesWith(I, V);
if (Instruction *X = foldVectorBinop(I))
return X;
if (Instruction *Phi = foldBinopWithPhiOperands(I))
return Phi;
return nullptr;
}