Revert "[SeparateConstOffsetFromGEP] Decompose constant xor operand if possible" (#179339)
A miscompile was found (see #175724), and it's complicated to fix. We're going to revert for now, and look at reimplementing a fixed version later.
This commit is contained in:
parent
139e2fb602
commit
a2c7c6032f
@ -295,10 +295,6 @@ private:
|
||||
bool CanTraceInto(bool SignExtended, bool ZeroExtended, BinaryOperator *BO,
|
||||
bool NonNegative);
|
||||
|
||||
/// Analyze XOR instruction to extract disjoint constant bits that behave
|
||||
/// like addition operations for improved address mode folding.
|
||||
APInt extractDisjointBitsFromXor(BinaryOperator *XorInst);
|
||||
|
||||
/// The path from the constant offset to the old GEP index. e.g., if the GEP
|
||||
/// index is "a * b + (c + 5)". After running function find, UserChain[0] will
|
||||
/// be the constant 5, UserChain[1] will be the subexpression "c + 5", and
|
||||
@ -601,9 +597,6 @@ APInt ConstantOffsetExtractor::find(Value *V, bool SignExtended,
|
||||
// Trace into subexpressions for more hoisting opportunities.
|
||||
if (CanTraceInto(SignExtended, ZeroExtended, BO, NonNegative))
|
||||
ConstantOffset = findInEitherOperand(BO, SignExtended, ZeroExtended);
|
||||
// Handle XOR with disjoint bits that can be treated as addition.
|
||||
else if (BO->getOpcode() == Instruction::Xor)
|
||||
ConstantOffset = extractDisjointBitsFromXor(BO);
|
||||
} else if (isa<TruncInst>(V)) {
|
||||
ConstantOffset =
|
||||
find(U->getOperand(0), SignExtended, ZeroExtended, NonNegative)
|
||||
@ -723,20 +716,11 @@ Value *ConstantOffsetExtractor::removeConstOffset(unsigned ChainIndex) {
|
||||
Value *NextInChain = removeConstOffset(ChainIndex - 1);
|
||||
Value *TheOther = BO->getOperand(1 - OpNo);
|
||||
|
||||
// If NextInChain is 0 and not the LHS of a sub, we can simplify the
|
||||
// sub-expression to be just TheOther.
|
||||
if (ConstantInt *CI = dyn_cast<ConstantInt>(NextInChain)) {
|
||||
if (CI->isZero()) {
|
||||
// Custom XOR handling for disjoint bits - preserves original XOR
|
||||
// with non-disjoint constant bits.
|
||||
// TODO: The design should be updated to support partial constant
|
||||
// extraction.
|
||||
if (BO->getOpcode() == Instruction::Xor)
|
||||
return BO;
|
||||
|
||||
// If NextInChain is 0 and not the LHS of a sub, we can simplify the
|
||||
// sub-expression to be just TheOther.
|
||||
if (!(BO->getOpcode() == Instruction::Sub && OpNo == 0))
|
||||
return TheOther;
|
||||
}
|
||||
if (CI->isZero() && !(BO->getOpcode() == Instruction::Sub && OpNo == 0))
|
||||
return TheOther;
|
||||
}
|
||||
|
||||
BinaryOperator::BinaryOps NewOp = BO->getOpcode();
|
||||
@ -767,67 +751,6 @@ Value *ConstantOffsetExtractor::removeConstOffset(unsigned ChainIndex) {
|
||||
return NewBO;
|
||||
}
|
||||
|
||||
/// Analyze XOR instruction to extract disjoint constant bits for address
|
||||
/// folding
|
||||
///
|
||||
/// This function identifies bits in an XOR constant operand that are disjoint
|
||||
/// from the base operand's known set bits. For these disjoint bits, XOR behaves
|
||||
/// identically to addition, allowing us to extract them as constant offsets
|
||||
/// that can be folded into addressing modes.
|
||||
///
|
||||
/// Transformation: `Base ^ Const` becomes `(Base ^ NonDisjointBits) +
|
||||
/// DisjointBits` where DisjointBits = Const & KnownZeros(Base)
|
||||
///
|
||||
/// Example with ptr having known-zero low bit:
|
||||
/// Original: `xor %ptr, 3` ; 3 = 0b11
|
||||
/// Analysis: DisjointBits = 3 & KnownZeros(%ptr) = 0b11 & 0b01 = 0b01
|
||||
/// Result: `(xor %ptr, 2) + 1` where 1 can be folded into address mode
|
||||
///
|
||||
/// \param XorInst The XOR binary operator to analyze
|
||||
/// \return APInt containing the disjoint bits that can be extracted as offset,
|
||||
/// or zero if no disjoint bits exist
|
||||
APInt ConstantOffsetExtractor::extractDisjointBitsFromXor(
|
||||
BinaryOperator *XorInst) {
|
||||
assert(XorInst && XorInst->getOpcode() == Instruction::Xor &&
|
||||
"Expected XOR instruction");
|
||||
|
||||
const unsigned BitWidth = XorInst->getType()->getScalarSizeInBits();
|
||||
Value *BaseOperand;
|
||||
ConstantInt *XorConstant;
|
||||
|
||||
// Match pattern: xor BaseOperand, Constant.
|
||||
if (!match(XorInst, m_Xor(m_Value(BaseOperand), m_ConstantInt(XorConstant))))
|
||||
return APInt::getZero(BitWidth);
|
||||
|
||||
// Compute known bits for the base operand.
|
||||
const SimplifyQuery SQ(DL);
|
||||
const KnownBits BaseKnownBits = computeKnownBits(BaseOperand, SQ);
|
||||
const APInt &ConstantValue = XorConstant->getValue();
|
||||
|
||||
// Identify disjoint bits: constant bits that are known zero in base.
|
||||
const APInt DisjointBits = ConstantValue & BaseKnownBits.Zero;
|
||||
|
||||
// Early exit if no disjoint bits found.
|
||||
if (DisjointBits.isZero())
|
||||
return APInt::getZero(BitWidth);
|
||||
|
||||
// Compute the remaining non-disjoint bits that stay in the XOR.
|
||||
const APInt NonDisjointBits = ConstantValue & ~DisjointBits;
|
||||
|
||||
// FIXME: Enhance XOR constant extraction to handle nested binary operations.
|
||||
// Currently we only extract disjoint bits from the immediate XOR constant,
|
||||
// but we could recursively process cases like:
|
||||
// xor (add %base, C1), C2 -> add %base, (C1 ^ disjoint_bits(C2))
|
||||
// This requires careful analysis to ensure the transformation preserves
|
||||
// semantics, particularly around sign extension and overflow behavior.
|
||||
|
||||
// Add the non-disjoint constant to the user chain for later transformation
|
||||
// This will replace the original constant in the XOR with the new
|
||||
// constant.
|
||||
UserChain.push_back(ConstantInt::get(XorInst->getType(), NonDisjointBits));
|
||||
return DisjointBits;
|
||||
}
|
||||
|
||||
/// A helper function to check if reassociating through an entry in the user
|
||||
/// chain would invalidate the GEP's nuw flag.
|
||||
static bool allowsPreservingNUW(const User *U) {
|
||||
|
||||
@ -1,435 +0,0 @@
|
||||
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
|
||||
; Test the xor with constant operand is decomposed in to gep.
|
||||
; RUN: opt -mtriple=amdgcn-amd-amdhsa -passes=separate-const-offset-from-gep \
|
||||
; RUN: -S < %s | FileCheck %s
|
||||
; Test the gvn pass eliminates the redundant xor instructions from decomposition.
|
||||
; RUN: opt -mtriple=amdgcn-amd-amdhsa -passes=separate-const-offset-from-gep,gvn \
|
||||
; RUN: -S < %s | FileCheck --check-prefix=GVN %s
|
||||
|
||||
; Check that disjoint constants are properly extracted and folded into GEP
|
||||
; addressing modes and GVN to eliminate redundant computations
|
||||
define amdgpu_kernel void @test1(i1 %0, ptr addrspace(3) %1) {
|
||||
; CHECK-LABEL: define amdgpu_kernel void @test1(
|
||||
; CHECK-SAME: i1 [[TMP0:%.*]], ptr addrspace(3) [[TMP1:%.*]]) {
|
||||
; CHECK-NEXT: [[ENTRY:.*:]]
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = select i1 [[TMP0]], i32 0, i32 288
|
||||
; CHECK-NEXT: [[TMP3:%.*]] = xor i32 [[TMP2]], 32
|
||||
; CHECK-NEXT: [[TMP4:%.*]] = getelementptr half, ptr addrspace(3) [[TMP1]], i32 [[TMP3]]
|
||||
; CHECK-NEXT: [[TMP5:%.*]] = xor i32 [[TMP2]], 32
|
||||
; CHECK-NEXT: [[TMP6:%.*]] = getelementptr half, ptr addrspace(3) [[TMP1]], i32 [[TMP5]]
|
||||
; CHECK-NEXT: [[TMP7:%.*]] = getelementptr i8, ptr addrspace(3) [[TMP6]], i32 8192
|
||||
; CHECK-NEXT: [[TMP8:%.*]] = xor i32 [[TMP2]], 32
|
||||
; CHECK-NEXT: [[TMP9:%.*]] = getelementptr half, ptr addrspace(3) [[TMP1]], i32 [[TMP8]]
|
||||
; CHECK-NEXT: [[TMP10:%.*]] = getelementptr i8, ptr addrspace(3) [[TMP9]], i32 16384
|
||||
; CHECK-NEXT: [[TMP11:%.*]] = xor i32 [[TMP2]], 32
|
||||
; CHECK-NEXT: [[TMP12:%.*]] = getelementptr half, ptr addrspace(3) [[TMP1]], i32 [[TMP11]]
|
||||
; CHECK-NEXT: [[TMP13:%.*]] = getelementptr i8, ptr addrspace(3) [[TMP12]], i32 24576
|
||||
; CHECK-NEXT: [[TMP14:%.*]] = load <8 x half>, ptr addrspace(3) [[TMP4]], align 16
|
||||
; CHECK-NEXT: [[TMP15:%.*]] = load <8 x half>, ptr addrspace(3) [[TMP7]], align 16
|
||||
; CHECK-NEXT: [[TMP16:%.*]] = load <8 x half>, ptr addrspace(3) [[TMP10]], align 16
|
||||
; CHECK-NEXT: [[TMP17:%.*]] = load <8 x half>, ptr addrspace(3) [[TMP13]], align 16
|
||||
; CHECK-NEXT: [[TMP18:%.*]] = fadd <8 x half> [[TMP14]], [[TMP15]]
|
||||
; CHECK-NEXT: [[TMP19:%.*]] = fadd <8 x half> [[TMP16]], [[TMP17]]
|
||||
; CHECK-NEXT: [[TMP20:%.*]] = fadd <8 x half> [[TMP18]], [[TMP19]]
|
||||
; CHECK-NEXT: store <8 x half> [[TMP20]], ptr addrspace(3) [[TMP1]], align 16
|
||||
; CHECK-NEXT: ret void
|
||||
;
|
||||
; GVN-LABEL: define amdgpu_kernel void @test1(
|
||||
; GVN-SAME: i1 [[TMP0:%.*]], ptr addrspace(3) [[TMP1:%.*]]) {
|
||||
; GVN-NEXT: [[ENTRY:.*:]]
|
||||
; GVN-NEXT: [[TMP2:%.*]] = select i1 [[TMP0]], i32 0, i32 288
|
||||
; GVN-NEXT: [[TMP3:%.*]] = xor i32 [[TMP2]], 32
|
||||
; GVN-NEXT: [[TMP4:%.*]] = getelementptr half, ptr addrspace(3) [[TMP1]], i32 [[TMP3]]
|
||||
; GVN-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr addrspace(3) [[TMP4]], i32 8192
|
||||
; GVN-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr addrspace(3) [[TMP4]], i32 16384
|
||||
; GVN-NEXT: [[TMP7:%.*]] = getelementptr i8, ptr addrspace(3) [[TMP4]], i32 24576
|
||||
; GVN-NEXT: [[TMP8:%.*]] = load <8 x half>, ptr addrspace(3) [[TMP4]], align 16
|
||||
; GVN-NEXT: [[TMP9:%.*]] = load <8 x half>, ptr addrspace(3) [[TMP5]], align 16
|
||||
; GVN-NEXT: [[TMP10:%.*]] = load <8 x half>, ptr addrspace(3) [[TMP6]], align 16
|
||||
; GVN-NEXT: [[TMP11:%.*]] = load <8 x half>, ptr addrspace(3) [[TMP7]], align 16
|
||||
; GVN-NEXT: [[TMP12:%.*]] = fadd <8 x half> [[TMP8]], [[TMP9]]
|
||||
; GVN-NEXT: [[TMP13:%.*]] = fadd <8 x half> [[TMP10]], [[TMP11]]
|
||||
; GVN-NEXT: [[TMP14:%.*]] = fadd <8 x half> [[TMP12]], [[TMP13]]
|
||||
; GVN-NEXT: store <8 x half> [[TMP14]], ptr addrspace(3) [[TMP1]], align 16
|
||||
; GVN-NEXT: ret void
|
||||
;
|
||||
entry:
|
||||
%2 = select i1 %0, i32 0, i32 288
|
||||
%3 = xor i32 %2, 32
|
||||
%4 = xor i32 %2, 4128
|
||||
%5 = xor i32 %2, 8224
|
||||
%6 = xor i32 %2, 12320
|
||||
%7 = getelementptr half, ptr addrspace(3) %1, i32 %3
|
||||
%8 = getelementptr half, ptr addrspace(3) %1, i32 %4
|
||||
%9 = getelementptr half, ptr addrspace(3) %1, i32 %5
|
||||
%10 = getelementptr half, ptr addrspace(3) %1, i32 %6
|
||||
%11 = load <8 x half>, ptr addrspace(3) %7, align 16
|
||||
%12 = load <8 x half>, ptr addrspace(3) %8, align 16
|
||||
%13 = load <8 x half>, ptr addrspace(3) %9, align 16
|
||||
%14 = load <8 x half>, ptr addrspace(3) %10, align 16
|
||||
%15 = fadd <8 x half> %11, %12
|
||||
%16 = fadd <8 x half> %13, %14
|
||||
%17 = fadd <8 x half> %15, %16
|
||||
store <8 x half> %17, ptr addrspace(3) %1, align 16
|
||||
ret void
|
||||
}
|
||||
|
||||
; Check that disjoint constants are properly extracted and folded into GEP
|
||||
; addressing modes and GVN to eliminate redundant computations
|
||||
define amdgpu_kernel void @test2(i1 %0, ptr addrspace(3) %1) {
|
||||
; CHECK-LABEL: define amdgpu_kernel void @test2(
|
||||
; CHECK-SAME: i1 [[TMP0:%.*]], ptr addrspace(3) [[TMP1:%.*]]) {
|
||||
; CHECK-NEXT: [[ENTRY:.*:]]
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = select i1 [[TMP0]], i32 0, i32 288
|
||||
; CHECK-NEXT: [[TMP3:%.*]] = xor i32 [[TMP2]], 32
|
||||
; CHECK-NEXT: [[TMP4:%.*]] = xor i32 [[TMP2]], 32
|
||||
; CHECK-NEXT: [[TMP5:%.*]] = getelementptr half, ptr addrspace(3) [[TMP1]], i32 [[TMP4]]
|
||||
; CHECK-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr addrspace(3) [[TMP5]], i32 24576
|
||||
; CHECK-NEXT: [[TMP7:%.*]] = xor i32 [[TMP2]], 32
|
||||
; CHECK-NEXT: [[TMP8:%.*]] = getelementptr half, ptr addrspace(3) [[TMP1]], i32 [[TMP7]]
|
||||
; CHECK-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr addrspace(3) [[TMP8]], i32 16384
|
||||
; CHECK-NEXT: [[TMP10:%.*]] = xor i32 [[TMP2]], 32
|
||||
; CHECK-NEXT: [[TMP11:%.*]] = getelementptr half, ptr addrspace(3) [[TMP1]], i32 [[TMP10]]
|
||||
; CHECK-NEXT: [[TMP12:%.*]] = getelementptr i8, ptr addrspace(3) [[TMP11]], i32 8192
|
||||
; CHECK-NEXT: [[TMP13:%.*]] = getelementptr half, ptr addrspace(3) [[TMP1]], i32 [[TMP3]]
|
||||
; CHECK-NEXT: [[TMP14:%.*]] = load <8 x half>, ptr addrspace(3) [[TMP6]], align 16
|
||||
; CHECK-NEXT: [[TMP15:%.*]] = load <8 x half>, ptr addrspace(3) [[TMP9]], align 16
|
||||
; CHECK-NEXT: [[TMP16:%.*]] = load <8 x half>, ptr addrspace(3) [[TMP12]], align 16
|
||||
; CHECK-NEXT: [[TMP17:%.*]] = load <8 x half>, ptr addrspace(3) [[TMP13]], align 16
|
||||
; CHECK-NEXT: [[TMP18:%.*]] = fadd <8 x half> [[TMP14]], [[TMP15]]
|
||||
; CHECK-NEXT: [[TMP19:%.*]] = fadd <8 x half> [[TMP16]], [[TMP17]]
|
||||
; CHECK-NEXT: [[TMP20:%.*]] = fadd <8 x half> [[TMP18]], [[TMP19]]
|
||||
; CHECK-NEXT: store <8 x half> [[TMP20]], ptr addrspace(3) [[TMP1]], align 16
|
||||
; CHECK-NEXT: ret void
|
||||
;
|
||||
; GVN-LABEL: define amdgpu_kernel void @test2(
|
||||
; GVN-SAME: i1 [[TMP0:%.*]], ptr addrspace(3) [[TMP1:%.*]]) {
|
||||
; GVN-NEXT: [[ENTRY:.*:]]
|
||||
; GVN-NEXT: [[TMP2:%.*]] = select i1 [[TMP0]], i32 0, i32 288
|
||||
; GVN-NEXT: [[TMP3:%.*]] = xor i32 [[TMP2]], 32
|
||||
; GVN-NEXT: [[TMP4:%.*]] = getelementptr half, ptr addrspace(3) [[TMP1]], i32 [[TMP3]]
|
||||
; GVN-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr addrspace(3) [[TMP4]], i32 24576
|
||||
; GVN-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr addrspace(3) [[TMP4]], i32 16384
|
||||
; GVN-NEXT: [[TMP7:%.*]] = getelementptr i8, ptr addrspace(3) [[TMP4]], i32 8192
|
||||
; GVN-NEXT: [[TMP8:%.*]] = load <8 x half>, ptr addrspace(3) [[TMP5]], align 16
|
||||
; GVN-NEXT: [[TMP9:%.*]] = load <8 x half>, ptr addrspace(3) [[TMP6]], align 16
|
||||
; GVN-NEXT: [[TMP10:%.*]] = load <8 x half>, ptr addrspace(3) [[TMP7]], align 16
|
||||
; GVN-NEXT: [[TMP11:%.*]] = load <8 x half>, ptr addrspace(3) [[TMP4]], align 16
|
||||
; GVN-NEXT: [[TMP12:%.*]] = fadd <8 x half> [[TMP8]], [[TMP9]]
|
||||
; GVN-NEXT: [[TMP13:%.*]] = fadd <8 x half> [[TMP10]], [[TMP11]]
|
||||
; GVN-NEXT: [[TMP14:%.*]] = fadd <8 x half> [[TMP12]], [[TMP13]]
|
||||
; GVN-NEXT: store <8 x half> [[TMP14]], ptr addrspace(3) [[TMP1]], align 16
|
||||
; GVN-NEXT: ret void
|
||||
;
|
||||
entry:
|
||||
%2 = select i1 %0, i32 0, i32 288
|
||||
%3 = xor i32 %2, 12320
|
||||
%4 = xor i32 %2, 8224
|
||||
%5 = xor i32 %2, 4128
|
||||
%6 = xor i32 %2, 32
|
||||
%7 = getelementptr half, ptr addrspace(3) %1, i32 %3
|
||||
%8 = getelementptr half, ptr addrspace(3) %1, i32 %4
|
||||
%9 = getelementptr half, ptr addrspace(3) %1, i32 %5
|
||||
%10 = getelementptr half, ptr addrspace(3) %1, i32 %6
|
||||
%11 = load <8 x half>, ptr addrspace(3) %7, align 16
|
||||
%12 = load <8 x half>, ptr addrspace(3) %8, align 16
|
||||
%13 = load <8 x half>, ptr addrspace(3) %9, align 16
|
||||
%14 = load <8 x half>, ptr addrspace(3) %10, align 16
|
||||
%15 = fadd <8 x half> %11, %12
|
||||
%16 = fadd <8 x half> %13, %14
|
||||
%17 = fadd <8 x half> %15, %16
|
||||
store <8 x half> %17, ptr addrspace(3) %1, align 16
|
||||
ret void
|
||||
}
|
||||
|
||||
; Verify that xor instructions with different non-disjoint constants are optimized
|
||||
define amdgpu_kernel void @test3(i1 %0, ptr addrspace(3) %1) {
|
||||
; CHECK-LABEL: define amdgpu_kernel void @test3(
|
||||
; CHECK-SAME: i1 [[TMP0:%.*]], ptr addrspace(3) [[TMP1:%.*]]) {
|
||||
; CHECK-NEXT: [[ENTRY:.*:]]
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = select i1 [[TMP0]], i32 0, i32 288
|
||||
; CHECK-NEXT: [[TMP3:%.*]] = xor i32 [[TMP2]], 32
|
||||
; CHECK-NEXT: [[TMP4:%.*]] = getelementptr half, ptr addrspace(3) [[TMP1]], i32 [[TMP3]]
|
||||
; CHECK-NEXT: [[TMP5:%.*]] = xor i32 [[TMP2]], 288
|
||||
; CHECK-NEXT: [[TMP6:%.*]] = getelementptr half, ptr addrspace(3) [[TMP1]], i32 [[TMP5]]
|
||||
; CHECK-NEXT: [[TMP7:%.*]] = getelementptr i8, ptr addrspace(3) [[TMP6]], i32 4096
|
||||
; CHECK-NEXT: [[TMP8:%.*]] = xor i32 [[TMP2]], 32
|
||||
; CHECK-NEXT: [[TMP9:%.*]] = getelementptr half, ptr addrspace(3) [[TMP1]], i32 [[TMP8]]
|
||||
; CHECK-NEXT: [[TMP10:%.*]] = getelementptr i8, ptr addrspace(3) [[TMP9]], i32 8192
|
||||
; CHECK-NEXT: [[TMP11:%.*]] = load <8 x half>, ptr addrspace(3) [[TMP4]], align 16
|
||||
; CHECK-NEXT: [[TMP12:%.*]] = load <8 x half>, ptr addrspace(3) [[TMP7]], align 16
|
||||
; CHECK-NEXT: [[TMP13:%.*]] = load <8 x half>, ptr addrspace(3) [[TMP10]], align 16
|
||||
; CHECK-NEXT: [[TMP14:%.*]] = fadd <8 x half> [[TMP11]], [[TMP12]]
|
||||
; CHECK-NEXT: [[TMP15:%.*]] = fadd <8 x half> [[TMP13]], [[TMP14]]
|
||||
; CHECK-NEXT: store <8 x half> [[TMP15]], ptr addrspace(3) [[TMP1]], align 16
|
||||
; CHECK-NEXT: ret void
|
||||
;
|
||||
; GVN-LABEL: define amdgpu_kernel void @test3(
|
||||
; GVN-SAME: i1 [[TMP0:%.*]], ptr addrspace(3) [[TMP1:%.*]]) {
|
||||
; GVN-NEXT: [[ENTRY:.*:]]
|
||||
; GVN-NEXT: [[TMP2:%.*]] = select i1 [[TMP0]], i32 0, i32 288
|
||||
; GVN-NEXT: [[TMP3:%.*]] = xor i32 [[TMP2]], 32
|
||||
; GVN-NEXT: [[TMP4:%.*]] = getelementptr half, ptr addrspace(3) [[TMP1]], i32 [[TMP3]]
|
||||
; GVN-NEXT: [[TMP5:%.*]] = xor i32 [[TMP2]], 288
|
||||
; GVN-NEXT: [[TMP6:%.*]] = getelementptr half, ptr addrspace(3) [[TMP1]], i32 [[TMP5]]
|
||||
; GVN-NEXT: [[TMP7:%.*]] = getelementptr i8, ptr addrspace(3) [[TMP6]], i32 4096
|
||||
; GVN-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr addrspace(3) [[TMP4]], i32 8192
|
||||
; GVN-NEXT: [[TMP9:%.*]] = load <8 x half>, ptr addrspace(3) [[TMP4]], align 16
|
||||
; GVN-NEXT: [[TMP10:%.*]] = load <8 x half>, ptr addrspace(3) [[TMP7]], align 16
|
||||
; GVN-NEXT: [[TMP11:%.*]] = load <8 x half>, ptr addrspace(3) [[TMP8]], align 16
|
||||
; GVN-NEXT: [[TMP12:%.*]] = fadd <8 x half> [[TMP9]], [[TMP10]]
|
||||
; GVN-NEXT: [[TMP13:%.*]] = fadd <8 x half> [[TMP11]], [[TMP12]]
|
||||
; GVN-NEXT: store <8 x half> [[TMP13]], ptr addrspace(3) [[TMP1]], align 16
|
||||
; GVN-NEXT: ret void
|
||||
;
|
||||
entry:
|
||||
%2 = select i1 %0, i32 0, i32 288
|
||||
%3 = xor i32 %2, 32
|
||||
%4 = xor i32 %2, 2336
|
||||
%5 = xor i32 %2, 4128
|
||||
%6 = getelementptr half, ptr addrspace(3) %1, i32 %3
|
||||
%7 = getelementptr half, ptr addrspace(3) %1, i32 %4
|
||||
%8 = getelementptr half, ptr addrspace(3) %1, i32 %5
|
||||
%9 = load <8 x half>, ptr addrspace(3) %6, align 16
|
||||
%10 = load <8 x half>, ptr addrspace(3) %7, align 16
|
||||
%11 = load <8 x half>, ptr addrspace(3) %8, align 16
|
||||
%12 = fadd <8 x half> %9, %10
|
||||
%13 = fadd <8 x half> %11, %12
|
||||
store <8 x half> %13, ptr addrspace(3) %1, align 16
|
||||
ret void
|
||||
}
|
||||
|
||||
; Verify that no optimization occurs when disjoint constants are absent
|
||||
define amdgpu_kernel void @test4(i1 %0, ptr addrspace(3) %1) {
|
||||
; CHECK-LABEL: define amdgpu_kernel void @test4(
|
||||
; CHECK-SAME: i1 [[TMP0:%.*]], ptr addrspace(3) [[TMP1:%.*]]) {
|
||||
; CHECK-NEXT: [[ENTRY:.*:]]
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = select i1 [[TMP0]], i32 0, i32 288
|
||||
; CHECK-NEXT: [[TMP3:%.*]] = xor i32 [[TMP2]], 32
|
||||
; CHECK-NEXT: [[TMP4:%.*]] = xor i32 [[TMP2]], 288
|
||||
; CHECK-NEXT: [[TMP5:%.*]] = getelementptr half, ptr addrspace(3) [[TMP1]], i32 [[TMP3]]
|
||||
; CHECK-NEXT: [[TMP6:%.*]] = getelementptr half, ptr addrspace(3) [[TMP1]], i32 [[TMP4]]
|
||||
; CHECK-NEXT: [[TMP7:%.*]] = load <8 x half>, ptr addrspace(3) [[TMP5]], align 16
|
||||
; CHECK-NEXT: [[TMP8:%.*]] = load <8 x half>, ptr addrspace(3) [[TMP6]], align 16
|
||||
; CHECK-NEXT: [[TMP9:%.*]] = fadd <8 x half> [[TMP7]], [[TMP8]]
|
||||
; CHECK-NEXT: store <8 x half> [[TMP9]], ptr addrspace(3) [[TMP1]], align 16
|
||||
; CHECK-NEXT: ret void
|
||||
;
|
||||
; GVN-LABEL: define amdgpu_kernel void @test4(
|
||||
; GVN-SAME: i1 [[TMP0:%.*]], ptr addrspace(3) [[TMP1:%.*]]) {
|
||||
; GVN-NEXT: [[ENTRY:.*:]]
|
||||
; GVN-NEXT: [[TMP2:%.*]] = select i1 [[TMP0]], i32 0, i32 288
|
||||
; GVN-NEXT: [[TMP3:%.*]] = xor i32 [[TMP2]], 32
|
||||
; GVN-NEXT: [[TMP4:%.*]] = xor i32 [[TMP2]], 288
|
||||
; GVN-NEXT: [[TMP5:%.*]] = getelementptr half, ptr addrspace(3) [[TMP1]], i32 [[TMP3]]
|
||||
; GVN-NEXT: [[TMP6:%.*]] = getelementptr half, ptr addrspace(3) [[TMP1]], i32 [[TMP4]]
|
||||
; GVN-NEXT: [[TMP7:%.*]] = load <8 x half>, ptr addrspace(3) [[TMP5]], align 16
|
||||
; GVN-NEXT: [[TMP8:%.*]] = load <8 x half>, ptr addrspace(3) [[TMP6]], align 16
|
||||
; GVN-NEXT: [[TMP9:%.*]] = fadd <8 x half> [[TMP7]], [[TMP8]]
|
||||
; GVN-NEXT: store <8 x half> [[TMP9]], ptr addrspace(3) [[TMP1]], align 16
|
||||
; GVN-NEXT: ret void
|
||||
;
|
||||
entry:
|
||||
%2 = select i1 %0, i32 0, i32 288
|
||||
%3 = xor i32 %2, 32
|
||||
%4 = xor i32 %2, 288
|
||||
%5 = getelementptr half, ptr addrspace(3) %1, i32 %3
|
||||
%6 = getelementptr half, ptr addrspace(3) %1, i32 %4
|
||||
%7 = load <8 x half>, ptr addrspace(3) %5, align 16
|
||||
%8 = load <8 x half>, ptr addrspace(3) %6, align 16
|
||||
%9 = fadd <8 x half> %7, %8
|
||||
store <8 x half> %9, ptr addrspace(3) %1, align 16
|
||||
ret void
|
||||
}
|
||||
|
||||
|
||||
; Verify that XOR-BinOp-GEP usage chains are properly optimized
|
||||
define amdgpu_kernel void @test5(i1 %0, ptr addrspace(3) %1) {
|
||||
; CHECK-LABEL: define amdgpu_kernel void @test5(
|
||||
; CHECK-SAME: i1 [[TMP0:%.*]], ptr addrspace(3) [[TMP1:%.*]]) {
|
||||
; CHECK-NEXT: [[ENTRY:.*:]]
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = select i1 [[TMP0]], i32 0, i32 288
|
||||
; CHECK-NEXT: [[TMP3:%.*]] = xor i32 [[TMP2]], 32
|
||||
; CHECK-NEXT: [[TMP4:%.*]] = getelementptr half, ptr addrspace(3) [[TMP1]], i32 [[TMP3]]
|
||||
; CHECK-NEXT: [[TMP5:%.*]] = xor i32 [[TMP2]], 32
|
||||
; CHECK-NEXT: [[TMP6:%.*]] = add i32 [[TMP5]], 256
|
||||
; CHECK-NEXT: [[TMP7:%.*]] = getelementptr half, ptr addrspace(3) [[TMP1]], i32 [[TMP6]]
|
||||
; CHECK-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr addrspace(3) [[TMP7]], i32 8192
|
||||
; CHECK-NEXT: [[TMP9:%.*]] = load <8 x half>, ptr addrspace(3) [[TMP4]], align 16
|
||||
; CHECK-NEXT: [[TMP10:%.*]] = load <8 x half>, ptr addrspace(3) [[TMP8]], align 16
|
||||
; CHECK-NEXT: [[TMP11:%.*]] = fadd <8 x half> [[TMP9]], [[TMP10]]
|
||||
; CHECK-NEXT: store <8 x half> [[TMP11]], ptr addrspace(3) [[TMP1]], align 16
|
||||
; CHECK-NEXT: ret void
|
||||
;
|
||||
; GVN-LABEL: define amdgpu_kernel void @test5(
|
||||
; GVN-SAME: i1 [[TMP0:%.*]], ptr addrspace(3) [[TMP1:%.*]]) {
|
||||
; GVN-NEXT: [[ENTRY:.*:]]
|
||||
; GVN-NEXT: [[TMP2:%.*]] = select i1 [[TMP0]], i32 0, i32 288
|
||||
; GVN-NEXT: [[TMP3:%.*]] = xor i32 [[TMP2]], 32
|
||||
; GVN-NEXT: [[TMP4:%.*]] = getelementptr half, ptr addrspace(3) [[TMP1]], i32 [[TMP3]]
|
||||
; GVN-NEXT: [[TMP5:%.*]] = add i32 [[TMP3]], 256
|
||||
; GVN-NEXT: [[TMP6:%.*]] = getelementptr half, ptr addrspace(3) [[TMP1]], i32 [[TMP5]]
|
||||
; GVN-NEXT: [[TMP7:%.*]] = getelementptr i8, ptr addrspace(3) [[TMP6]], i32 8192
|
||||
; GVN-NEXT: [[TMP8:%.*]] = load <8 x half>, ptr addrspace(3) [[TMP4]], align 16
|
||||
; GVN-NEXT: [[TMP9:%.*]] = load <8 x half>, ptr addrspace(3) [[TMP7]], align 16
|
||||
; GVN-NEXT: [[TMP10:%.*]] = fadd <8 x half> [[TMP8]], [[TMP9]]
|
||||
; GVN-NEXT: store <8 x half> [[TMP10]], ptr addrspace(3) [[TMP1]], align 16
|
||||
; GVN-NEXT: ret void
|
||||
;
|
||||
entry:
|
||||
%2 = select i1 %0, i32 0, i32 288
|
||||
%3 = xor i32 %2, 32
|
||||
%4 = xor i32 %2, 4128
|
||||
%5 = add i32 %4, 256
|
||||
%6 = getelementptr half, ptr addrspace(3) %1, i32 %3
|
||||
%7 = getelementptr half, ptr addrspace(3) %1, i32 %5
|
||||
%8 = load <8 x half>, ptr addrspace(3) %6, align 16
|
||||
%9 = load <8 x half>, ptr addrspace(3) %7, align 16
|
||||
%10 = fadd <8 x half> %8, %9
|
||||
store <8 x half> %10, ptr addrspace(3) %1, align 16
|
||||
ret void
|
||||
}
|
||||
|
||||
; Verify that BinOp-XOR-GEP usage chains are properly optimized.
|
||||
; In the below test, make sure we stop processing the chain at xor
|
||||
; and not fold the constant from add instruction in to gep. The
|
||||
; constant from add can be folded and the future work will cover
|
||||
; these cases.
|
||||
define amdgpu_kernel void @test6(i1 %0, ptr addrspace(3) %1) {
|
||||
; CHECK-LABEL: define amdgpu_kernel void @test6(
|
||||
; CHECK-SAME: i1 [[TMP0:%.*]], ptr addrspace(3) [[TMP1:%.*]]) {
|
||||
; CHECK-NEXT: [[ENTRY:.*:]]
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = select i1 [[TMP0]], i32 0, i32 288
|
||||
; CHECK-NEXT: [[TMP3:%.*]] = xor i32 [[TMP2]], 32
|
||||
; CHECK-NEXT: [[TMP4:%.*]] = add i32 [[TMP2]], 256
|
||||
; CHECK-NEXT: [[TMP5:%.*]] = getelementptr half, ptr addrspace(3) [[TMP1]], i32 [[TMP3]]
|
||||
; CHECK-NEXT: [[TMP6:%.*]] = xor i32 [[TMP4]], 32
|
||||
; CHECK-NEXT: [[TMP7:%.*]] = getelementptr half, ptr addrspace(3) [[TMP1]], i32 [[TMP6]]
|
||||
; CHECK-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr addrspace(3) [[TMP7]], i32 8192
|
||||
; CHECK-NEXT: [[TMP9:%.*]] = load <8 x half>, ptr addrspace(3) [[TMP5]], align 16
|
||||
; CHECK-NEXT: [[TMP10:%.*]] = load <8 x half>, ptr addrspace(3) [[TMP8]], align 16
|
||||
; CHECK-NEXT: [[TMP11:%.*]] = fadd <8 x half> [[TMP9]], [[TMP10]]
|
||||
; CHECK-NEXT: store <8 x half> [[TMP11]], ptr addrspace(3) [[TMP1]], align 16
|
||||
; CHECK-NEXT: ret void
|
||||
;
|
||||
; GVN-LABEL: define amdgpu_kernel void @test6(
|
||||
; GVN-SAME: i1 [[TMP0:%.*]], ptr addrspace(3) [[TMP1:%.*]]) {
|
||||
; GVN-NEXT: [[ENTRY:.*:]]
|
||||
; GVN-NEXT: [[TMP2:%.*]] = select i1 [[TMP0]], i32 0, i32 288
|
||||
; GVN-NEXT: [[TMP3:%.*]] = xor i32 [[TMP2]], 32
|
||||
; GVN-NEXT: [[TMP4:%.*]] = add i32 [[TMP2]], 256
|
||||
; GVN-NEXT: [[TMP5:%.*]] = getelementptr half, ptr addrspace(3) [[TMP1]], i32 [[TMP3]]
|
||||
; GVN-NEXT: [[TMP6:%.*]] = xor i32 [[TMP4]], 32
|
||||
; GVN-NEXT: [[TMP7:%.*]] = getelementptr half, ptr addrspace(3) [[TMP1]], i32 [[TMP6]]
|
||||
; GVN-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr addrspace(3) [[TMP7]], i32 8192
|
||||
; GVN-NEXT: [[TMP9:%.*]] = load <8 x half>, ptr addrspace(3) [[TMP5]], align 16
|
||||
; GVN-NEXT: [[TMP10:%.*]] = load <8 x half>, ptr addrspace(3) [[TMP8]], align 16
|
||||
; GVN-NEXT: [[TMP11:%.*]] = fadd <8 x half> [[TMP9]], [[TMP10]]
|
||||
; GVN-NEXT: store <8 x half> [[TMP11]], ptr addrspace(3) [[TMP1]], align 16
|
||||
; GVN-NEXT: ret void
|
||||
;
|
||||
entry:
|
||||
%2 = select i1 %0, i32 0, i32 288
|
||||
%3 = xor i32 %2, 32
|
||||
%4 = add i32 %2, 256
|
||||
%5 = xor i32 %4, 4128
|
||||
%6 = getelementptr half, ptr addrspace(3) %1, i32 %3
|
||||
%7 = getelementptr half, ptr addrspace(3) %1, i32 %5
|
||||
%8 = load <8 x half>, ptr addrspace(3) %6, align 16
|
||||
%9 = load <8 x half>, ptr addrspace(3) %7, align 16
|
||||
%10 = fadd <8 x half> %8, %9
|
||||
store <8 x half> %10, ptr addrspace(3) %1, align 16
|
||||
ret void
|
||||
}
|
||||
|
||||
; Verify that BinOp-XOR-GEP usage chains with non disjoint xor works as
|
||||
; intended.
|
||||
define amdgpu_kernel void @test6a(i1 %0, ptr addrspace(3) %1) {
|
||||
; CHECK-LABEL: define amdgpu_kernel void @test6a(
|
||||
; CHECK-SAME: i1 [[TMP0:%.*]], ptr addrspace(3) [[TMP1:%.*]]) {
|
||||
; CHECK-NEXT: [[ENTRY:.*:]]
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = select i1 [[TMP0]], i32 0, i32 288
|
||||
; CHECK-NEXT: [[TMP3:%.*]] = xor i32 [[TMP2]], 32
|
||||
; CHECK-NEXT: [[TMP4:%.*]] = add i32 [[TMP2]], 256
|
||||
; CHECK-NEXT: [[TMP5:%.*]] = xor i32 [[TMP4]], 288
|
||||
; CHECK-NEXT: [[TMP6:%.*]] = getelementptr half, ptr addrspace(3) [[TMP1]], i32 [[TMP3]]
|
||||
; CHECK-NEXT: [[TMP7:%.*]] = getelementptr half, ptr addrspace(3) [[TMP1]], i32 [[TMP5]]
|
||||
; CHECK-NEXT: [[TMP8:%.*]] = load <8 x half>, ptr addrspace(3) [[TMP6]], align 16
|
||||
; CHECK-NEXT: [[TMP9:%.*]] = load <8 x half>, ptr addrspace(3) [[TMP7]], align 16
|
||||
; CHECK-NEXT: [[TMP10:%.*]] = fadd <8 x half> [[TMP8]], [[TMP9]]
|
||||
; CHECK-NEXT: store <8 x half> [[TMP10]], ptr addrspace(3) [[TMP1]], align 16
|
||||
; CHECK-NEXT: ret void
|
||||
;
|
||||
; GVN-LABEL: define amdgpu_kernel void @test6a(
|
||||
; GVN-SAME: i1 [[TMP0:%.*]], ptr addrspace(3) [[TMP1:%.*]]) {
|
||||
; GVN-NEXT: [[ENTRY:.*:]]
|
||||
; GVN-NEXT: [[TMP2:%.*]] = select i1 [[TMP0]], i32 0, i32 288
|
||||
; GVN-NEXT: [[TMP3:%.*]] = xor i32 [[TMP2]], 32
|
||||
; GVN-NEXT: [[TMP4:%.*]] = add i32 [[TMP2]], 256
|
||||
; GVN-NEXT: [[TMP5:%.*]] = xor i32 [[TMP4]], 288
|
||||
; GVN-NEXT: [[TMP6:%.*]] = getelementptr half, ptr addrspace(3) [[TMP1]], i32 [[TMP3]]
|
||||
; GVN-NEXT: [[TMP7:%.*]] = getelementptr half, ptr addrspace(3) [[TMP1]], i32 [[TMP5]]
|
||||
; GVN-NEXT: [[TMP8:%.*]] = load <8 x half>, ptr addrspace(3) [[TMP6]], align 16
|
||||
; GVN-NEXT: [[TMP9:%.*]] = load <8 x half>, ptr addrspace(3) [[TMP7]], align 16
|
||||
; GVN-NEXT: [[TMP10:%.*]] = fadd <8 x half> [[TMP8]], [[TMP9]]
|
||||
; GVN-NEXT: store <8 x half> [[TMP10]], ptr addrspace(3) [[TMP1]], align 16
|
||||
; GVN-NEXT: ret void
|
||||
;
|
||||
entry:
|
||||
%2 = select i1 %0, i32 0, i32 288
|
||||
%3 = xor i32 %2, 32
|
||||
%4 = add i32 %2, 256
|
||||
%5 = xor i32 %4, 288
|
||||
%6 = getelementptr half, ptr addrspace(3) %1, i32 %3
|
||||
%7 = getelementptr half, ptr addrspace(3) %1, i32 %5
|
||||
%8 = load <8 x half>, ptr addrspace(3) %6, align 16
|
||||
%9 = load <8 x half>, ptr addrspace(3) %7, align 16
|
||||
%10 = fadd <8 x half> %8, %9
|
||||
store <8 x half> %10, ptr addrspace(3) %1, align 16
|
||||
ret void
|
||||
}
|
||||
|
||||
; Ensure disjoint constants exceeding addressing mode limits (e.g., 32768) are
|
||||
; not extracted
|
||||
define amdgpu_kernel void @test7(i1 %0, ptr addrspace(3) %1) {
|
||||
; CHECK-LABEL: define amdgpu_kernel void @test7(
|
||||
; CHECK-SAME: i1 [[TMP0:%.*]], ptr addrspace(3) [[TMP1:%.*]]) {
|
||||
; CHECK-NEXT: [[ENTRY:.*:]]
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = select i1 [[TMP0]], i32 0, i32 288
|
||||
; CHECK-NEXT: [[TMP3:%.*]] = xor i32 [[TMP2]], 32
|
||||
; CHECK-NEXT: [[TMP4:%.*]] = xor i32 [[TMP2]], 32800
|
||||
; CHECK-NEXT: [[TMP5:%.*]] = getelementptr half, ptr addrspace(3) [[TMP1]], i32 [[TMP3]]
|
||||
; CHECK-NEXT: [[TMP6:%.*]] = getelementptr half, ptr addrspace(3) [[TMP1]], i32 [[TMP4]]
|
||||
; CHECK-NEXT: [[TMP7:%.*]] = load <8 x half>, ptr addrspace(3) [[TMP5]], align 16
|
||||
; CHECK-NEXT: [[TMP8:%.*]] = load <8 x half>, ptr addrspace(3) [[TMP6]], align 16
|
||||
; CHECK-NEXT: [[TMP9:%.*]] = fadd <8 x half> [[TMP7]], [[TMP8]]
|
||||
; CHECK-NEXT: store <8 x half> [[TMP9]], ptr addrspace(3) [[TMP1]], align 16
|
||||
; CHECK-NEXT: ret void
|
||||
;
|
||||
; GVN-LABEL: define amdgpu_kernel void @test7(
|
||||
; GVN-SAME: i1 [[TMP0:%.*]], ptr addrspace(3) [[TMP1:%.*]]) {
|
||||
; GVN-NEXT: [[ENTRY:.*:]]
|
||||
; GVN-NEXT: [[TMP2:%.*]] = select i1 [[TMP0]], i32 0, i32 288
|
||||
; GVN-NEXT: [[TMP3:%.*]] = xor i32 [[TMP2]], 32
|
||||
; GVN-NEXT: [[TMP4:%.*]] = xor i32 [[TMP2]], 32800
|
||||
; GVN-NEXT: [[TMP5:%.*]] = getelementptr half, ptr addrspace(3) [[TMP1]], i32 [[TMP3]]
|
||||
; GVN-NEXT: [[TMP6:%.*]] = getelementptr half, ptr addrspace(3) [[TMP1]], i32 [[TMP4]]
|
||||
; GVN-NEXT: [[TMP7:%.*]] = load <8 x half>, ptr addrspace(3) [[TMP5]], align 16
|
||||
; GVN-NEXT: [[TMP8:%.*]] = load <8 x half>, ptr addrspace(3) [[TMP6]], align 16
|
||||
; GVN-NEXT: [[TMP9:%.*]] = fadd <8 x half> [[TMP7]], [[TMP8]]
|
||||
; GVN-NEXT: store <8 x half> [[TMP9]], ptr addrspace(3) [[TMP1]], align 16
|
||||
; GVN-NEXT: ret void
|
||||
;
|
||||
entry:
|
||||
%2 = select i1 %0, i32 0, i32 288
|
||||
%3 = xor i32 %2, 32
|
||||
%4 = xor i32 %2, 32800
|
||||
%5 = getelementptr half, ptr addrspace(3) %1, i32 %3
|
||||
%6 = getelementptr half, ptr addrspace(3) %1, i32 %4
|
||||
%7 = load <8 x half>, ptr addrspace(3) %5, align 16
|
||||
%8 = load <8 x half>, ptr addrspace(3) %6, align 16
|
||||
%9 = fadd <8 x half> %7, %8
|
||||
store <8 x half> %9, ptr addrspace(3) %1, align 16
|
||||
ret void
|
||||
}
|
||||
|
||||
@ -32,9 +32,8 @@ define ptr @test_overflow(ptr %p, i32 %a) {
|
||||
define ptr @test_xor_overflow(ptr %p, i32 range(i32 0, -2147483648) %a) {
|
||||
; CHECK-LABEL: define ptr @test_xor_overflow(
|
||||
; CHECK-SAME: ptr [[P:%.*]], i32 range(i32 0, -2147483648) [[A:%.*]]) {
|
||||
; CHECK-NEXT: [[XOR1:%.*]] = xor i32 [[A]], 2147483647
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = shl i32 [[XOR1]], 2
|
||||
; CHECK-NEXT: [[UGLYGEP:%.*]] = getelementptr i8, ptr [[P]], i32 [[TMP1]]
|
||||
; CHECK-NEXT: [[XOR:%.*]] = xor i32 [[A]], -1
|
||||
; CHECK-NEXT: [[UGLYGEP:%.*]] = getelementptr inbounds i32, ptr [[P]], i32 [[XOR]]
|
||||
; CHECK-NEXT: ret ptr [[UGLYGEP]]
|
||||
;
|
||||
%xor = xor i32 %a, -1
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user