[GISel] Enforce G_PTR_ADD RHS type matching index size for addr space (#84352)
This commit is contained in:
parent
a84e66a92d
commit
fd3eaf76ba
@ -4004,7 +4004,14 @@ Register LegalizerHelper::getVectorElementPointer(Register VecPtr, LLT VecTy,
|
||||
|
||||
Index = clampVectorIndex(MIRBuilder, Index, VecTy);
|
||||
|
||||
LLT IdxTy = MRI.getType(Index);
|
||||
// Convert index to the correct size for the address space.
|
||||
const DataLayout &DL = MIRBuilder.getDataLayout();
|
||||
unsigned AS = MRI.getType(VecPtr).getAddressSpace();
|
||||
unsigned IndexSizeInBits = DL.getIndexSize(AS) * 8;
|
||||
LLT IdxTy = MRI.getType(Index).changeElementSize(IndexSizeInBits);
|
||||
if (IdxTy != MRI.getType(Index))
|
||||
Index = MIRBuilder.buildSExtOrTrunc(IdxTy, Index).getReg(0);
|
||||
|
||||
auto Mul = MIRBuilder.buildMul(IdxTy, Index,
|
||||
MIRBuilder.buildConstant(IdxTy, EltSize));
|
||||
|
||||
|
@ -1301,6 +1301,16 @@ void MachineVerifier::verifyPreISelGenericInstruction(const MachineInstr *MI) {
|
||||
if (OffsetTy.isPointerOrPointerVector())
|
||||
report("gep offset operand must not be a pointer", MI);
|
||||
|
||||
if (PtrTy.isPointerOrPointerVector()) {
|
||||
const DataLayout &DL = MF->getDataLayout();
|
||||
unsigned AS = PtrTy.getAddressSpace();
|
||||
unsigned IndexSizeInBits = DL.getIndexSize(AS) * 8;
|
||||
if (OffsetTy.getScalarSizeInBits() != IndexSizeInBits) {
|
||||
report("gep offset operand must match index size for address space",
|
||||
MI);
|
||||
}
|
||||
}
|
||||
|
||||
// TODO: Is the offset allowed to be a scalar with a vector?
|
||||
break;
|
||||
}
|
||||
|
@ -11,7 +11,7 @@ body: |
|
||||
; CHECK: [[C:%[0-9]+]]:_(p64) = G_CONSTANT i64 44
|
||||
; CHECK: [[PTRTOINT:%[0-9]+]]:_(s64) = G_PTRTOINT [[C]](p64)
|
||||
; CHECK: $x0 = COPY [[PTRTOINT]](s64)
|
||||
%1:_(s32) = G_CONSTANT i32 42
|
||||
%1:_(s64) = G_CONSTANT i64 42
|
||||
%2:_(s32) = G_CONSTANT i32 2
|
||||
%3:_(p64) = G_INTTOPTR %2
|
||||
%4:_(p64) = G_PTR_ADD %3, %1
|
||||
@ -26,7 +26,7 @@ body: |
|
||||
; CHECK-LABEL: name: agc.test_combine_ptradd_constants_ptrres
|
||||
; CHECK: [[C:%[0-9]+]]:_(p64) = G_CONSTANT i64 44
|
||||
; CHECK: $x0 = COPY [[C]](p64)
|
||||
%1:_(s32) = G_CONSTANT i32 42
|
||||
%1:_(s64) = G_CONSTANT i64 42
|
||||
%2:_(s32) = G_CONSTANT i32 2
|
||||
%3:_(p64) = G_INTTOPTR %2
|
||||
%4:_(p64) = G_PTR_ADD %3, %1
|
||||
@ -39,12 +39,12 @@ body: |
|
||||
liveins: $x0, $x1
|
||||
; Ensure non-constant G_PTR_ADDs are not folded.
|
||||
; CHECK-LABEL: name: agc.test_not_combine_variable_ptradd
|
||||
; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 42
|
||||
; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 42
|
||||
; CHECK: [[COPY:%[0-9]+]]:_(p64) = COPY $x1
|
||||
; CHECK: [[PTR_ADD:%[0-9]+]]:_(p64) = G_PTR_ADD [[COPY]], [[C]](s32)
|
||||
; CHECK: [[PTR_ADD:%[0-9]+]]:_(p64) = G_PTR_ADD [[COPY]], [[C]](s64)
|
||||
; CHECK: [[PTRTOINT:%[0-9]+]]:_(s64) = G_PTRTOINT [[PTR_ADD]](p64)
|
||||
; CHECK: $x0 = COPY [[PTRTOINT]](s64)
|
||||
%1:_(s32) = G_CONSTANT i32 42
|
||||
%1:_(s64) = G_CONSTANT i64 42
|
||||
%2:_(p64) = COPY $x1
|
||||
%3:_(p64) = G_PTR_ADD %2, %1
|
||||
%4:_(s64) = G_PTRTOINT %3
|
||||
|
@ -1,23 +1,6 @@
|
||||
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
|
||||
# RUN: llc -mtriple=aarch64 -run-pass=legalizer %s -o - | FileCheck %s
|
||||
---
|
||||
name: test_ptr_add_small
|
||||
body: |
|
||||
bb.0.entry:
|
||||
; CHECK-LABEL: name: test_ptr_add_small
|
||||
; CHECK: [[COPY:%[0-9]+]]:_(p0) = COPY $x0
|
||||
; CHECK: [[COPY1:%[0-9]+]]:_(s64) = COPY $x1
|
||||
; CHECK: [[SEXT_INREG:%[0-9]+]]:_(s64) = G_SEXT_INREG [[COPY1]], 8
|
||||
; CHECK: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[SEXT_INREG]](s64)
|
||||
; CHECK: $x0 = COPY [[PTR_ADD]](p0)
|
||||
%0:_(p0) = COPY $x0
|
||||
%1:_(s64) = COPY $x1
|
||||
%2:_(s8) = G_TRUNC %1(s64)
|
||||
%3:_(p0) = G_PTR_ADD %0, %2(s8)
|
||||
$x0 = COPY %3(p0)
|
||||
|
||||
...
|
||||
---
|
||||
name: test_ptr_add_vec_p0
|
||||
body: |
|
||||
bb.0.entry:
|
||||
|
@ -38,18 +38,18 @@ body: |
|
||||
; BIG: %full_load:_(s32) = G_BSWAP [[LOAD]]
|
||||
; BIG: $w1 = COPY %full_load(s32)
|
||||
; BIG: RET_ReallyLR implicit $w1
|
||||
%cst_1:_(s32) = G_CONSTANT i32 1
|
||||
%cst_2:_(s32) = G_CONSTANT i32 2
|
||||
%cst_3:_(s32) = G_CONSTANT i32 3
|
||||
%cst_1:_(s64) = G_CONSTANT i64 1
|
||||
%cst_2:_(s64) = G_CONSTANT i64 2
|
||||
%cst_3:_(s64) = G_CONSTANT i64 3
|
||||
|
||||
%cst_8:_(s32) = G_CONSTANT i32 8
|
||||
%cst_16:_(s32) = G_CONSTANT i32 16
|
||||
%cst_24:_(s32) = G_CONSTANT i32 24
|
||||
|
||||
%ptr:_(p0) = COPY $x1
|
||||
%ptr_elt_1:_(p0) = G_PTR_ADD %ptr, %cst_1(s32)
|
||||
%ptr_elt_2:_(p0) = G_PTR_ADD %ptr, %cst_2(s32)
|
||||
%ptr_elt_3:_(p0) = G_PTR_ADD %ptr, %cst_3(s32)
|
||||
%ptr_elt_1:_(p0) = G_PTR_ADD %ptr, %cst_1(s64)
|
||||
%ptr_elt_2:_(p0) = G_PTR_ADD %ptr, %cst_2(s64)
|
||||
%ptr_elt_3:_(p0) = G_PTR_ADD %ptr, %cst_3(s64)
|
||||
|
||||
%byte0:_(s32) = G_ZEXTLOAD %ptr(p0) :: (load (s8))
|
||||
|
||||
@ -104,18 +104,18 @@ body: |
|
||||
; BIG: %full_load:_(s32) = G_LOAD %ptr(p0) :: (load (s32), align 1)
|
||||
; BIG: $w1 = COPY %full_load(s32)
|
||||
; BIG: RET_ReallyLR implicit $w1
|
||||
%cst_1:_(s32) = G_CONSTANT i32 1
|
||||
%cst_2:_(s32) = G_CONSTANT i32 2
|
||||
%cst_3:_(s32) = G_CONSTANT i32 3
|
||||
%cst_1:_(s64) = G_CONSTANT i64 1
|
||||
%cst_2:_(s64) = G_CONSTANT i64 2
|
||||
%cst_3:_(s64) = G_CONSTANT i64 3
|
||||
|
||||
%cst_8:_(s32) = G_CONSTANT i32 8
|
||||
%cst_16:_(s32) = G_CONSTANT i32 16
|
||||
%cst_24:_(s32) = G_CONSTANT i32 24
|
||||
|
||||
%ptr:_(p0) = COPY $x1
|
||||
%ptr_elt_1:_(p0) = G_PTR_ADD %ptr, %cst_1(s32)
|
||||
%ptr_elt_2:_(p0) = G_PTR_ADD %ptr, %cst_2(s32)
|
||||
%ptr_elt_3:_(p0) = G_PTR_ADD %ptr, %cst_3(s32)
|
||||
%ptr_elt_1:_(p0) = G_PTR_ADD %ptr, %cst_1(s64)
|
||||
%ptr_elt_2:_(p0) = G_PTR_ADD %ptr, %cst_2(s64)
|
||||
%ptr_elt_3:_(p0) = G_PTR_ADD %ptr, %cst_3(s64)
|
||||
|
||||
%elt0:_(s32) = G_ZEXTLOAD %ptr(p0) :: (load (s8))
|
||||
%elt1:_(s32) = G_ZEXTLOAD %ptr_elt_1(p0) :: (load (s8))
|
||||
@ -162,18 +162,18 @@ body: |
|
||||
; BIG: %full_load:_(s32) = G_BSWAP [[LOAD]]
|
||||
; BIG: $w1 = COPY %full_load(s32)
|
||||
; BIG: RET_ReallyLR implicit $w1
|
||||
%cst_1:_(s32) = G_CONSTANT i32 1
|
||||
%cst_2:_(s32) = G_CONSTANT i32 2
|
||||
%cst_3:_(s32) = G_CONSTANT i32 3
|
||||
%cst_1:_(s64) = G_CONSTANT i64 1
|
||||
%cst_2:_(s64) = G_CONSTANT i64 2
|
||||
%cst_3:_(s64) = G_CONSTANT i64 3
|
||||
|
||||
%cst_8:_(s32) = G_CONSTANT i32 8
|
||||
%cst_16:_(s32) = G_CONSTANT i32 16
|
||||
%cst_24:_(s32) = G_CONSTANT i32 24
|
||||
|
||||
%ptr:_(p0) = COPY $x1
|
||||
%ptr_elt_1:_(p0) = G_PTR_ADD %ptr, %cst_1(s32)
|
||||
%ptr_elt_2:_(p0) = G_PTR_ADD %ptr, %cst_2(s32)
|
||||
%ptr_elt_3:_(p0) = G_PTR_ADD %ptr, %cst_3(s32)
|
||||
%ptr_elt_1:_(p0) = G_PTR_ADD %ptr, %cst_1(s64)
|
||||
%ptr_elt_2:_(p0) = G_PTR_ADD %ptr, %cst_2(s64)
|
||||
%ptr_elt_3:_(p0) = G_PTR_ADD %ptr, %cst_3(s64)
|
||||
|
||||
%byte0:_(s32) = G_ZEXTLOAD %ptr(p0) :: (load (s8))
|
||||
|
||||
@ -414,35 +414,35 @@ body: |
|
||||
|
||||
; LITTLE-LABEL: name: nonzero_start_idx_positive_little_endian_pat
|
||||
; LITTLE: liveins: $x0, $x1
|
||||
; LITTLE: %cst_1:_(s32) = G_CONSTANT i32 1
|
||||
; LITTLE: %cst_1:_(s64) = G_CONSTANT i64 1
|
||||
; LITTLE: %ptr:_(p0) = COPY $x0
|
||||
; LITTLE: %ptr_elt_1:_(p0) = G_PTR_ADD %ptr, %cst_1(s32)
|
||||
; LITTLE: %ptr_elt_1:_(p0) = G_PTR_ADD %ptr, %cst_1(s64)
|
||||
; LITTLE: %full_load:_(s32) = G_LOAD %ptr_elt_1(p0) :: (load (s32), align 1)
|
||||
; LITTLE: $w1 = COPY %full_load(s32)
|
||||
; LITTLE: RET_ReallyLR implicit $w1
|
||||
; BIG-LABEL: name: nonzero_start_idx_positive_little_endian_pat
|
||||
; BIG: liveins: $x0, $x1
|
||||
; BIG: %cst_1:_(s32) = G_CONSTANT i32 1
|
||||
; BIG: %cst_1:_(s64) = G_CONSTANT i64 1
|
||||
; BIG: %ptr:_(p0) = COPY $x0
|
||||
; BIG: %ptr_elt_1:_(p0) = G_PTR_ADD %ptr, %cst_1(s32)
|
||||
; BIG: %ptr_elt_1:_(p0) = G_PTR_ADD %ptr, %cst_1(s64)
|
||||
; BIG: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD %ptr_elt_1(p0) :: (load (s32), align 1)
|
||||
; BIG: %full_load:_(s32) = G_BSWAP [[LOAD]]
|
||||
; BIG: $w1 = COPY %full_load(s32)
|
||||
; BIG: RET_ReallyLR implicit $w1
|
||||
%cst_1:_(s32) = G_CONSTANT i32 1
|
||||
%cst_2:_(s32) = G_CONSTANT i32 2
|
||||
%cst_3:_(s32) = G_CONSTANT i32 3
|
||||
%cst_4:_(s32) = G_CONSTANT i32 4
|
||||
%cst_1:_(s64) = G_CONSTANT i64 1
|
||||
%cst_2:_(s64) = G_CONSTANT i64 2
|
||||
%cst_3:_(s64) = G_CONSTANT i64 3
|
||||
%cst_4:_(s64) = G_CONSTANT i64 4
|
||||
|
||||
%cst_8:_(s32) = G_CONSTANT i32 8
|
||||
%cst_16:_(s32) = G_CONSTANT i32 16
|
||||
%cst_24:_(s32) = G_CONSTANT i32 24
|
||||
|
||||
%ptr:_(p0) = COPY $x0
|
||||
%ptr_elt_1:_(p0) = G_PTR_ADD %ptr, %cst_1(s32)
|
||||
%ptr_elt_2:_(p0) = G_PTR_ADD %ptr, %cst_2(s32)
|
||||
%ptr_elt_3:_(p0) = G_PTR_ADD %ptr, %cst_3(s32)
|
||||
%ptr_elt_4:_(p0) = G_PTR_ADD %ptr, %cst_4(s32)
|
||||
%ptr_elt_1:_(p0) = G_PTR_ADD %ptr, %cst_1(s64)
|
||||
%ptr_elt_2:_(p0) = G_PTR_ADD %ptr, %cst_2(s64)
|
||||
%ptr_elt_3:_(p0) = G_PTR_ADD %ptr, %cst_3(s64)
|
||||
%ptr_elt_4:_(p0) = G_PTR_ADD %ptr, %cst_4(s64)
|
||||
|
||||
%elt2:_(s32) = G_ZEXTLOAD %ptr_elt_2(p0) :: (load (s8))
|
||||
%elt3:_(s32) = G_ZEXTLOAD %ptr_elt_3(p0) :: (load (s8))
|
||||
@ -476,35 +476,35 @@ body: |
|
||||
|
||||
; LITTLE-LABEL: name: nonzero_start_idx_positive_big_endian_pat
|
||||
; LITTLE: liveins: $x0, $x1
|
||||
; LITTLE: %cst_1:_(s32) = G_CONSTANT i32 1
|
||||
; LITTLE: %cst_1:_(s64) = G_CONSTANT i64 1
|
||||
; LITTLE: %ptr:_(p0) = COPY $x0
|
||||
; LITTLE: %ptr_elt_1:_(p0) = G_PTR_ADD %ptr, %cst_1(s32)
|
||||
; LITTLE: %ptr_elt_1:_(p0) = G_PTR_ADD %ptr, %cst_1(s64)
|
||||
; LITTLE: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD %ptr_elt_1(p0) :: (load (s32), align 1)
|
||||
; LITTLE: %full_load:_(s32) = G_BSWAP [[LOAD]]
|
||||
; LITTLE: $w1 = COPY %full_load(s32)
|
||||
; LITTLE: RET_ReallyLR implicit $w1
|
||||
; BIG-LABEL: name: nonzero_start_idx_positive_big_endian_pat
|
||||
; BIG: liveins: $x0, $x1
|
||||
; BIG: %cst_1:_(s32) = G_CONSTANT i32 1
|
||||
; BIG: %cst_1:_(s64) = G_CONSTANT i64 1
|
||||
; BIG: %ptr:_(p0) = COPY $x0
|
||||
; BIG: %ptr_elt_1:_(p0) = G_PTR_ADD %ptr, %cst_1(s32)
|
||||
; BIG: %ptr_elt_1:_(p0) = G_PTR_ADD %ptr, %cst_1(s64)
|
||||
; BIG: %full_load:_(s32) = G_LOAD %ptr_elt_1(p0) :: (load (s32), align 1)
|
||||
; BIG: $w1 = COPY %full_load(s32)
|
||||
; BIG: RET_ReallyLR implicit $w1
|
||||
%cst_1:_(s32) = G_CONSTANT i32 1
|
||||
%cst_2:_(s32) = G_CONSTANT i32 2
|
||||
%cst_3:_(s32) = G_CONSTANT i32 3
|
||||
%cst_4:_(s32) = G_CONSTANT i32 4
|
||||
%cst_1:_(s64) = G_CONSTANT i64 1
|
||||
%cst_2:_(s64) = G_CONSTANT i64 2
|
||||
%cst_3:_(s64) = G_CONSTANT i64 3
|
||||
%cst_4:_(s64) = G_CONSTANT i64 4
|
||||
|
||||
%cst_8:_(s32) = G_CONSTANT i32 8
|
||||
%cst_16:_(s32) = G_CONSTANT i32 16
|
||||
%cst_24:_(s32) = G_CONSTANT i32 24
|
||||
|
||||
%ptr:_(p0) = COPY $x0
|
||||
%ptr_elt_1:_(p0) = G_PTR_ADD %ptr, %cst_1(s32)
|
||||
%ptr_elt_2:_(p0) = G_PTR_ADD %ptr, %cst_2(s32)
|
||||
%ptr_elt_3:_(p0) = G_PTR_ADD %ptr, %cst_3(s32)
|
||||
%ptr_elt_4:_(p0) = G_PTR_ADD %ptr, %cst_4(s32)
|
||||
%ptr_elt_1:_(p0) = G_PTR_ADD %ptr, %cst_1(s64)
|
||||
%ptr_elt_2:_(p0) = G_PTR_ADD %ptr, %cst_2(s64)
|
||||
%ptr_elt_3:_(p0) = G_PTR_ADD %ptr, %cst_3(s64)
|
||||
%ptr_elt_4:_(p0) = G_PTR_ADD %ptr, %cst_4(s64)
|
||||
|
||||
%elt1:_(s32) = G_ZEXTLOAD %ptr_elt_1(p0) :: (load (s8))
|
||||
%elt2:_(s32) = G_ZEXTLOAD %ptr_elt_2(p0) :: (load (s8))
|
||||
@ -538,33 +538,33 @@ body: |
|
||||
|
||||
; LITTLE-LABEL: name: nonzero_start_idx_negative_little_endian_pat
|
||||
; LITTLE: liveins: $x0, $x1
|
||||
; LITTLE: %cst_neg_3:_(s32) = G_CONSTANT i32 -3
|
||||
; LITTLE: %cst_neg_3:_(s64) = G_CONSTANT i64 -3
|
||||
; LITTLE: %ptr:_(p0) = COPY $x0
|
||||
; LITTLE: %ptr_elt_neg_3:_(p0) = G_PTR_ADD %ptr, %cst_neg_3(s32)
|
||||
; LITTLE: %ptr_elt_neg_3:_(p0) = G_PTR_ADD %ptr, %cst_neg_3(s64)
|
||||
; LITTLE: %full_load:_(s32) = G_LOAD %ptr_elt_neg_3(p0) :: (load (s32), align 1)
|
||||
; LITTLE: $w1 = COPY %full_load(s32)
|
||||
; LITTLE: RET_ReallyLR implicit $w1
|
||||
; BIG-LABEL: name: nonzero_start_idx_negative_little_endian_pat
|
||||
; BIG: liveins: $x0, $x1
|
||||
; BIG: %cst_neg_3:_(s32) = G_CONSTANT i32 -3
|
||||
; BIG: %cst_neg_3:_(s64) = G_CONSTANT i64 -3
|
||||
; BIG: %ptr:_(p0) = COPY $x0
|
||||
; BIG: %ptr_elt_neg_3:_(p0) = G_PTR_ADD %ptr, %cst_neg_3(s32)
|
||||
; BIG: %ptr_elt_neg_3:_(p0) = G_PTR_ADD %ptr, %cst_neg_3(s64)
|
||||
; BIG: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD %ptr_elt_neg_3(p0) :: (load (s32), align 1)
|
||||
; BIG: %full_load:_(s32) = G_BSWAP [[LOAD]]
|
||||
; BIG: $w1 = COPY %full_load(s32)
|
||||
; BIG: RET_ReallyLR implicit $w1
|
||||
%cst_neg_1:_(s32) = G_CONSTANT i32 -1
|
||||
%cst_neg_2:_(s32) = G_CONSTANT i32 -2
|
||||
%cst_neg_3:_(s32) = G_CONSTANT i32 -3
|
||||
%cst_neg_1:_(s64) = G_CONSTANT i64 -1
|
||||
%cst_neg_2:_(s64) = G_CONSTANT i64 -2
|
||||
%cst_neg_3:_(s64) = G_CONSTANT i64 -3
|
||||
|
||||
%cst_8:_(s32) = G_CONSTANT i32 8
|
||||
%cst_16:_(s32) = G_CONSTANT i32 16
|
||||
%cst_24:_(s32) = G_CONSTANT i32 24
|
||||
|
||||
%ptr:_(p0) = COPY $x0
|
||||
%ptr_elt_neg_3:_(p0) = G_PTR_ADD %ptr, %cst_neg_3(s32)
|
||||
%ptr_elt_neg_2:_(p0) = G_PTR_ADD %ptr, %cst_neg_2(s32)
|
||||
%ptr_elt_neg_1:_(p0) = G_PTR_ADD %ptr, %cst_neg_1(s32)
|
||||
%ptr_elt_neg_3:_(p0) = G_PTR_ADD %ptr, %cst_neg_3(s64)
|
||||
%ptr_elt_neg_2:_(p0) = G_PTR_ADD %ptr, %cst_neg_2(s64)
|
||||
%ptr_elt_neg_1:_(p0) = G_PTR_ADD %ptr, %cst_neg_1(s64)
|
||||
|
||||
%elt_neg_2:_(s32) = G_ZEXTLOAD %ptr_elt_neg_2(p0) :: (load (s8))
|
||||
%elt_neg_1:_(s32) = G_ZEXTLOAD %ptr_elt_neg_1(p0) :: (load (s8))
|
||||
@ -598,33 +598,33 @@ body: |
|
||||
|
||||
; LITTLE-LABEL: name: nonzero_start_idx_negative_big_endian_pat
|
||||
; LITTLE: liveins: $x0, $x1
|
||||
; LITTLE: %cst_neg_3:_(s32) = G_CONSTANT i32 -3
|
||||
; LITTLE: %cst_neg_3:_(s64) = G_CONSTANT i64 -3
|
||||
; LITTLE: %ptr:_(p0) = COPY $x0
|
||||
; LITTLE: %ptr_elt_neg_3:_(p0) = G_PTR_ADD %ptr, %cst_neg_3(s32)
|
||||
; LITTLE: %ptr_elt_neg_3:_(p0) = G_PTR_ADD %ptr, %cst_neg_3(s64)
|
||||
; LITTLE: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD %ptr_elt_neg_3(p0) :: (load (s32), align 1)
|
||||
; LITTLE: %full_load:_(s32) = G_BSWAP [[LOAD]]
|
||||
; LITTLE: $w1 = COPY %full_load(s32)
|
||||
; LITTLE: RET_ReallyLR implicit $w1
|
||||
; BIG-LABEL: name: nonzero_start_idx_negative_big_endian_pat
|
||||
; BIG: liveins: $x0, $x1
|
||||
; BIG: %cst_neg_3:_(s32) = G_CONSTANT i32 -3
|
||||
; BIG: %cst_neg_3:_(s64) = G_CONSTANT i64 -3
|
||||
; BIG: %ptr:_(p0) = COPY $x0
|
||||
; BIG: %ptr_elt_neg_3:_(p0) = G_PTR_ADD %ptr, %cst_neg_3(s32)
|
||||
; BIG: %ptr_elt_neg_3:_(p0) = G_PTR_ADD %ptr, %cst_neg_3(s64)
|
||||
; BIG: %full_load:_(s32) = G_LOAD %ptr_elt_neg_3(p0) :: (load (s32), align 1)
|
||||
; BIG: $w1 = COPY %full_load(s32)
|
||||
; BIG: RET_ReallyLR implicit $w1
|
||||
%cst_neg_1:_(s32) = G_CONSTANT i32 -1
|
||||
%cst_neg_2:_(s32) = G_CONSTANT i32 -2
|
||||
%cst_neg_3:_(s32) = G_CONSTANT i32 -3
|
||||
%cst_neg_1:_(s64) = G_CONSTANT i64 -1
|
||||
%cst_neg_2:_(s64) = G_CONSTANT i64 -2
|
||||
%cst_neg_3:_(s64) = G_CONSTANT i64 -3
|
||||
|
||||
%cst_8:_(s32) = G_CONSTANT i32 8
|
||||
%cst_16:_(s32) = G_CONSTANT i32 16
|
||||
%cst_24:_(s32) = G_CONSTANT i32 24
|
||||
|
||||
%ptr:_(p0) = COPY $x0
|
||||
%ptr_elt_neg_3:_(p0) = G_PTR_ADD %ptr, %cst_neg_3(s32)
|
||||
%ptr_elt_neg_2:_(p0) = G_PTR_ADD %ptr, %cst_neg_2(s32)
|
||||
%ptr_elt_neg_1:_(p0) = G_PTR_ADD %ptr, %cst_neg_1(s32)
|
||||
%ptr_elt_neg_3:_(p0) = G_PTR_ADD %ptr, %cst_neg_3(s64)
|
||||
%ptr_elt_neg_2:_(p0) = G_PTR_ADD %ptr, %cst_neg_2(s64)
|
||||
%ptr_elt_neg_1:_(p0) = G_PTR_ADD %ptr, %cst_neg_1(s64)
|
||||
|
||||
%elt_neg_3:_(s32) = G_ZEXTLOAD %ptr_elt_neg_3(p0) :: (load (s8))
|
||||
%elt_neg_2:_(s32) = G_ZEXTLOAD %ptr_elt_neg_2(p0) :: (load (s8))
|
||||
@ -977,15 +977,15 @@ body: |
|
||||
|
||||
; LITTLE-LABEL: name: dont_combine_duplicate_idx
|
||||
; LITTLE: liveins: $x0, $x1
|
||||
; LITTLE: %cst_1:_(s32) = G_CONSTANT i32 1
|
||||
; LITTLE: %reused_idx:_(s32) = G_CONSTANT i32 2
|
||||
; LITTLE: %cst_1:_(s64) = G_CONSTANT i64 1
|
||||
; LITTLE: %reused_idx:_(s64) = G_CONSTANT i64 2
|
||||
; LITTLE: %cst_8:_(s32) = G_CONSTANT i32 8
|
||||
; LITTLE: %cst_16:_(s32) = G_CONSTANT i32 16
|
||||
; LITTLE: %cst_24:_(s32) = G_CONSTANT i32 24
|
||||
; LITTLE: %ptr:_(p0) = COPY $x1
|
||||
; LITTLE: %ptr_elt_1:_(p0) = G_PTR_ADD %ptr, %cst_1(s32)
|
||||
; LITTLE: %uses_idx_2:_(p0) = G_PTR_ADD %ptr, %reused_idx(s32)
|
||||
; LITTLE: %also_uses_idx_2:_(p0) = G_PTR_ADD %ptr, %reused_idx(s32)
|
||||
; LITTLE: %ptr_elt_1:_(p0) = G_PTR_ADD %ptr, %cst_1(s64)
|
||||
; LITTLE: %uses_idx_2:_(p0) = G_PTR_ADD %ptr, %reused_idx(s64)
|
||||
; LITTLE: %also_uses_idx_2:_(p0) = G_PTR_ADD %ptr, %reused_idx(s64)
|
||||
; LITTLE: %byte0:_(s32) = G_ZEXTLOAD %ptr(p0) :: (load (s8))
|
||||
; LITTLE: %elt1:_(s32) = G_ZEXTLOAD %ptr_elt_1(p0) :: (load (s8))
|
||||
; LITTLE: %elt2:_(s32) = G_ZEXTLOAD %uses_idx_2(p0) :: (load (s8))
|
||||
@ -1000,15 +1000,15 @@ body: |
|
||||
; LITTLE: RET_ReallyLR implicit $w1
|
||||
; BIG-LABEL: name: dont_combine_duplicate_idx
|
||||
; BIG: liveins: $x0, $x1
|
||||
; BIG: %cst_1:_(s32) = G_CONSTANT i32 1
|
||||
; BIG: %reused_idx:_(s32) = G_CONSTANT i32 2
|
||||
; BIG: %cst_1:_(s64) = G_CONSTANT i64 1
|
||||
; BIG: %reused_idx:_(s64) = G_CONSTANT i64 2
|
||||
; BIG: %cst_8:_(s32) = G_CONSTANT i32 8
|
||||
; BIG: %cst_16:_(s32) = G_CONSTANT i32 16
|
||||
; BIG: %cst_24:_(s32) = G_CONSTANT i32 24
|
||||
; BIG: %ptr:_(p0) = COPY $x1
|
||||
; BIG: %ptr_elt_1:_(p0) = G_PTR_ADD %ptr, %cst_1(s32)
|
||||
; BIG: %uses_idx_2:_(p0) = G_PTR_ADD %ptr, %reused_idx(s32)
|
||||
; BIG: %also_uses_idx_2:_(p0) = G_PTR_ADD %ptr, %reused_idx(s32)
|
||||
; BIG: %ptr_elt_1:_(p0) = G_PTR_ADD %ptr, %cst_1(s64)
|
||||
; BIG: %uses_idx_2:_(p0) = G_PTR_ADD %ptr, %reused_idx(s64)
|
||||
; BIG: %also_uses_idx_2:_(p0) = G_PTR_ADD %ptr, %reused_idx(s64)
|
||||
; BIG: %byte0:_(s32) = G_ZEXTLOAD %ptr(p0) :: (load (s8))
|
||||
; BIG: %elt1:_(s32) = G_ZEXTLOAD %ptr_elt_1(p0) :: (load (s8))
|
||||
; BIG: %elt2:_(s32) = G_ZEXTLOAD %uses_idx_2(p0) :: (load (s8))
|
||||
@ -1021,17 +1021,17 @@ body: |
|
||||
; BIG: %full_load:_(s32) = G_OR %or1, %or2
|
||||
; BIG: $w1 = COPY %full_load(s32)
|
||||
; BIG: RET_ReallyLR implicit $w1
|
||||
%cst_1:_(s32) = G_CONSTANT i32 1
|
||||
%reused_idx:_(s32) = G_CONSTANT i32 2
|
||||
%cst_1:_(s64) = G_CONSTANT i64 1
|
||||
%reused_idx:_(s64) = G_CONSTANT i64 2
|
||||
|
||||
%cst_8:_(s32) = G_CONSTANT i32 8
|
||||
%cst_16:_(s32) = G_CONSTANT i32 16
|
||||
%cst_24:_(s32) = G_CONSTANT i32 24
|
||||
|
||||
%ptr:_(p0) = COPY $x1
|
||||
%ptr_elt_1:_(p0) = G_PTR_ADD %ptr, %cst_1(s32)
|
||||
%uses_idx_2:_(p0) = G_PTR_ADD %ptr, %reused_idx(s32)
|
||||
%also_uses_idx_2:_(p0) = G_PTR_ADD %ptr, %reused_idx(s32)
|
||||
%ptr_elt_1:_(p0) = G_PTR_ADD %ptr, %cst_1(s64)
|
||||
%uses_idx_2:_(p0) = G_PTR_ADD %ptr, %reused_idx(s64)
|
||||
%also_uses_idx_2:_(p0) = G_PTR_ADD %ptr, %reused_idx(s64)
|
||||
|
||||
%byte0:_(s32) = G_ZEXTLOAD %ptr(p0) :: (load (s8))
|
||||
|
||||
@ -1064,15 +1064,15 @@ body: |
|
||||
|
||||
; LITTLE-LABEL: name: dont_combine_duplicate_offset
|
||||
; LITTLE: liveins: $x0, $x1
|
||||
; LITTLE: %cst_1:_(s32) = G_CONSTANT i32 1
|
||||
; LITTLE: %cst_2:_(s32) = G_CONSTANT i32 2
|
||||
; LITTLE: %cst_3:_(s32) = G_CONSTANT i32 3
|
||||
; LITTLE: %cst_1:_(s64) = G_CONSTANT i64 1
|
||||
; LITTLE: %cst_2:_(s64) = G_CONSTANT i64 2
|
||||
; LITTLE: %cst_3:_(s64) = G_CONSTANT i64 3
|
||||
; LITTLE: %cst_8:_(s32) = G_CONSTANT i32 8
|
||||
; LITTLE: %duplicate_shl_cst:_(s32) = G_CONSTANT i32 16
|
||||
; LITTLE: %ptr:_(p0) = COPY $x1
|
||||
; LITTLE: %ptr_elt_1:_(p0) = G_PTR_ADD %ptr, %cst_1(s32)
|
||||
; LITTLE: %ptr_elt_2:_(p0) = G_PTR_ADD %ptr, %cst_2(s32)
|
||||
; LITTLE: %ptr_elt_3:_(p0) = G_PTR_ADD %ptr, %cst_3(s32)
|
||||
; LITTLE: %ptr_elt_1:_(p0) = G_PTR_ADD %ptr, %cst_1(s64)
|
||||
; LITTLE: %ptr_elt_2:_(p0) = G_PTR_ADD %ptr, %cst_2(s64)
|
||||
; LITTLE: %ptr_elt_3:_(p0) = G_PTR_ADD %ptr, %cst_3(s64)
|
||||
; LITTLE: %byte0:_(s32) = G_ZEXTLOAD %ptr(p0) :: (load (s8))
|
||||
; LITTLE: %elt1:_(s32) = G_ZEXTLOAD %ptr_elt_1(p0) :: (load (s8))
|
||||
; LITTLE: %elt2:_(s32) = G_ZEXTLOAD %ptr_elt_2(p0) :: (load (s8))
|
||||
@ -1087,15 +1087,15 @@ body: |
|
||||
; LITTLE: RET_ReallyLR implicit $w1
|
||||
; BIG-LABEL: name: dont_combine_duplicate_offset
|
||||
; BIG: liveins: $x0, $x1
|
||||
; BIG: %cst_1:_(s32) = G_CONSTANT i32 1
|
||||
; BIG: %cst_2:_(s32) = G_CONSTANT i32 2
|
||||
; BIG: %cst_3:_(s32) = G_CONSTANT i32 3
|
||||
; BIG: %cst_1:_(s64) = G_CONSTANT i64 1
|
||||
; BIG: %cst_2:_(s64) = G_CONSTANT i64 2
|
||||
; BIG: %cst_3:_(s64) = G_CONSTANT i64 3
|
||||
; BIG: %cst_8:_(s32) = G_CONSTANT i32 8
|
||||
; BIG: %duplicate_shl_cst:_(s32) = G_CONSTANT i32 16
|
||||
; BIG: %ptr:_(p0) = COPY $x1
|
||||
; BIG: %ptr_elt_1:_(p0) = G_PTR_ADD %ptr, %cst_1(s32)
|
||||
; BIG: %ptr_elt_2:_(p0) = G_PTR_ADD %ptr, %cst_2(s32)
|
||||
; BIG: %ptr_elt_3:_(p0) = G_PTR_ADD %ptr, %cst_3(s32)
|
||||
; BIG: %ptr_elt_1:_(p0) = G_PTR_ADD %ptr, %cst_1(s64)
|
||||
; BIG: %ptr_elt_2:_(p0) = G_PTR_ADD %ptr, %cst_2(s64)
|
||||
; BIG: %ptr_elt_3:_(p0) = G_PTR_ADD %ptr, %cst_3(s64)
|
||||
; BIG: %byte0:_(s32) = G_ZEXTLOAD %ptr(p0) :: (load (s8))
|
||||
; BIG: %elt1:_(s32) = G_ZEXTLOAD %ptr_elt_1(p0) :: (load (s8))
|
||||
; BIG: %elt2:_(s32) = G_ZEXTLOAD %ptr_elt_2(p0) :: (load (s8))
|
||||
@ -1108,17 +1108,17 @@ body: |
|
||||
; BIG: %full_load:_(s32) = G_OR %or1, %or2
|
||||
; BIG: $w1 = COPY %full_load(s32)
|
||||
; BIG: RET_ReallyLR implicit $w1
|
||||
%cst_1:_(s32) = G_CONSTANT i32 1
|
||||
%cst_2:_(s32) = G_CONSTANT i32 2
|
||||
%cst_3:_(s32) = G_CONSTANT i32 3
|
||||
%cst_1:_(s64) = G_CONSTANT i64 1
|
||||
%cst_2:_(s64) = G_CONSTANT i64 2
|
||||
%cst_3:_(s64) = G_CONSTANT i64 3
|
||||
|
||||
%cst_8:_(s32) = G_CONSTANT i32 8
|
||||
%duplicate_shl_cst:_(s32) = G_CONSTANT i32 16
|
||||
|
||||
%ptr:_(p0) = COPY $x1
|
||||
%ptr_elt_1:_(p0) = G_PTR_ADD %ptr, %cst_1(s32)
|
||||
%ptr_elt_2:_(p0) = G_PTR_ADD %ptr, %cst_2(s32)
|
||||
%ptr_elt_3:_(p0) = G_PTR_ADD %ptr, %cst_3(s32)
|
||||
%ptr_elt_1:_(p0) = G_PTR_ADD %ptr, %cst_1(s64)
|
||||
%ptr_elt_2:_(p0) = G_PTR_ADD %ptr, %cst_2(s64)
|
||||
%ptr_elt_3:_(p0) = G_PTR_ADD %ptr, %cst_3(s64)
|
||||
|
||||
%byte0:_(s32) = G_ZEXTLOAD %ptr(p0) :: (load (s8))
|
||||
|
||||
@ -1153,16 +1153,16 @@ body: |
|
||||
|
||||
; LITTLE-LABEL: name: dont_combine_lowest_index_not_zero_offset
|
||||
; LITTLE: liveins: $x0, $x1
|
||||
; LITTLE: %cst_1:_(s32) = G_CONSTANT i32 1
|
||||
; LITTLE: %cst_2:_(s32) = G_CONSTANT i32 2
|
||||
; LITTLE: %cst_3:_(s32) = G_CONSTANT i32 3
|
||||
; LITTLE: %cst_1:_(s64) = G_CONSTANT i64 1
|
||||
; LITTLE: %cst_2:_(s64) = G_CONSTANT i64 2
|
||||
; LITTLE: %cst_3:_(s64) = G_CONSTANT i64 3
|
||||
; LITTLE: %cst_8:_(s32) = G_CONSTANT i32 8
|
||||
; LITTLE: %cst_16:_(s32) = G_CONSTANT i32 16
|
||||
; LITTLE: %cst_24:_(s32) = G_CONSTANT i32 24
|
||||
; LITTLE: %ptr:_(p0) = COPY $x1
|
||||
; LITTLE: %ptr_elt_1:_(p0) = G_PTR_ADD %ptr, %cst_1(s32)
|
||||
; LITTLE: %ptr_elt_2:_(p0) = G_PTR_ADD %ptr, %cst_2(s32)
|
||||
; LITTLE: %ptr_elt_3:_(p0) = G_PTR_ADD %ptr, %cst_3(s32)
|
||||
; LITTLE: %ptr_elt_1:_(p0) = G_PTR_ADD %ptr, %cst_1(s64)
|
||||
; LITTLE: %ptr_elt_2:_(p0) = G_PTR_ADD %ptr, %cst_2(s64)
|
||||
; LITTLE: %ptr_elt_3:_(p0) = G_PTR_ADD %ptr, %cst_3(s64)
|
||||
; LITTLE: %lowest_idx_load:_(s32) = G_ZEXTLOAD %ptr(p0) :: (load (s8))
|
||||
; LITTLE: %byte0:_(s32) = G_ZEXTLOAD %ptr_elt_1(p0) :: (load (s8))
|
||||
; LITTLE: %elt2:_(s32) = G_ZEXTLOAD %ptr_elt_2(p0) :: (load (s8))
|
||||
@ -1177,16 +1177,16 @@ body: |
|
||||
; LITTLE: RET_ReallyLR implicit $w1
|
||||
; BIG-LABEL: name: dont_combine_lowest_index_not_zero_offset
|
||||
; BIG: liveins: $x0, $x1
|
||||
; BIG: %cst_1:_(s32) = G_CONSTANT i32 1
|
||||
; BIG: %cst_2:_(s32) = G_CONSTANT i32 2
|
||||
; BIG: %cst_3:_(s32) = G_CONSTANT i32 3
|
||||
; BIG: %cst_1:_(s64) = G_CONSTANT i64 1
|
||||
; BIG: %cst_2:_(s64) = G_CONSTANT i64 2
|
||||
; BIG: %cst_3:_(s64) = G_CONSTANT i64 3
|
||||
; BIG: %cst_8:_(s32) = G_CONSTANT i32 8
|
||||
; BIG: %cst_16:_(s32) = G_CONSTANT i32 16
|
||||
; BIG: %cst_24:_(s32) = G_CONSTANT i32 24
|
||||
; BIG: %ptr:_(p0) = COPY $x1
|
||||
; BIG: %ptr_elt_1:_(p0) = G_PTR_ADD %ptr, %cst_1(s32)
|
||||
; BIG: %ptr_elt_2:_(p0) = G_PTR_ADD %ptr, %cst_2(s32)
|
||||
; BIG: %ptr_elt_3:_(p0) = G_PTR_ADD %ptr, %cst_3(s32)
|
||||
; BIG: %ptr_elt_1:_(p0) = G_PTR_ADD %ptr, %cst_1(s64)
|
||||
; BIG: %ptr_elt_2:_(p0) = G_PTR_ADD %ptr, %cst_2(s64)
|
||||
; BIG: %ptr_elt_3:_(p0) = G_PTR_ADD %ptr, %cst_3(s64)
|
||||
; BIG: %lowest_idx_load:_(s32) = G_ZEXTLOAD %ptr(p0) :: (load (s8))
|
||||
; BIG: %byte0:_(s32) = G_ZEXTLOAD %ptr_elt_1(p0) :: (load (s8))
|
||||
; BIG: %elt2:_(s32) = G_ZEXTLOAD %ptr_elt_2(p0) :: (load (s8))
|
||||
@ -1199,18 +1199,18 @@ body: |
|
||||
; BIG: %full_load:_(s32) = G_OR %or1, %or2
|
||||
; BIG: $w1 = COPY %full_load(s32)
|
||||
; BIG: RET_ReallyLR implicit $w1
|
||||
%cst_1:_(s32) = G_CONSTANT i32 1
|
||||
%cst_2:_(s32) = G_CONSTANT i32 2
|
||||
%cst_3:_(s32) = G_CONSTANT i32 3
|
||||
%cst_1:_(s64) = G_CONSTANT i64 1
|
||||
%cst_2:_(s64) = G_CONSTANT i64 2
|
||||
%cst_3:_(s64) = G_CONSTANT i64 3
|
||||
|
||||
%cst_8:_(s32) = G_CONSTANT i32 8
|
||||
%cst_16:_(s32) = G_CONSTANT i32 16
|
||||
%cst_24:_(s32) = G_CONSTANT i32 24
|
||||
|
||||
%ptr:_(p0) = COPY $x1
|
||||
%ptr_elt_1:_(p0) = G_PTR_ADD %ptr, %cst_1(s32)
|
||||
%ptr_elt_2:_(p0) = G_PTR_ADD %ptr, %cst_2(s32)
|
||||
%ptr_elt_3:_(p0) = G_PTR_ADD %ptr, %cst_3(s32)
|
||||
%ptr_elt_1:_(p0) = G_PTR_ADD %ptr, %cst_1(s64)
|
||||
%ptr_elt_2:_(p0) = G_PTR_ADD %ptr, %cst_2(s64)
|
||||
%ptr_elt_3:_(p0) = G_PTR_ADD %ptr, %cst_3(s64)
|
||||
|
||||
; This load is index 0
|
||||
%lowest_idx_load:_(s32) = G_ZEXTLOAD %ptr(p0) :: (load (s8))
|
||||
|
@ -8,8 +8,9 @@ tracksRegLiveness: true
|
||||
body: |
|
||||
bb.0:
|
||||
; CHECK-LABEL: name: test_ptradd_crash__offset_smaller
|
||||
; CHECK: [[C:%[0-9]+]]:_(p1) = G_CONSTANT i64 12
|
||||
; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[C]](p1) :: (load (s32), addrspace 1)
|
||||
; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 12
|
||||
; CHECK-NEXT: [[INTTOPTR:%[0-9]+]]:_(p1) = G_INTTOPTR [[C]](s64)
|
||||
; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[INTTOPTR]](p1) :: (load (s32), addrspace 1)
|
||||
; CHECK-NEXT: $sgpr0 = COPY [[LOAD]](s32)
|
||||
; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0
|
||||
%1:_(p1) = G_CONSTANT i64 0
|
||||
@ -27,8 +28,12 @@ tracksRegLiveness: true
|
||||
body: |
|
||||
bb.0:
|
||||
; CHECK-LABEL: name: test_ptradd_crash__offset_wider
|
||||
; CHECK: [[C:%[0-9]+]]:_(p1) = G_CONSTANT i64 12
|
||||
; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[C]](p1) :: (load (s32), addrspace 1)
|
||||
; CHECK: [[C:%[0-9]+]]:_(s128) = G_CONSTANT i128 3
|
||||
; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s64) = G_TRUNC [[C]](s128)
|
||||
; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
|
||||
; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[TRUNC]], [[C1]](s64)
|
||||
; CHECK-NEXT: [[INTTOPTR:%[0-9]+]]:_(p1) = G_INTTOPTR [[SHL]](s64)
|
||||
; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[INTTOPTR]](p1) :: (load (s32), addrspace 1)
|
||||
; CHECK-NEXT: $sgpr0 = COPY [[LOAD]](s32)
|
||||
; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0
|
||||
%1:_(p1) = G_CONSTANT i64 0
|
||||
|
@ -11,9 +11,8 @@ define i32 @v_extract_v64i32_varidx(ptr addrspace(1) %ptr, i32 %idx) {
|
||||
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX9-NEXT: v_and_b32_e32 v2, 63, v2
|
||||
; GFX9-NEXT: v_lshlrev_b32_e32 v2, 2, v2
|
||||
; GFX9-NEXT: v_ashrrev_i32_e32 v3, 31, v2
|
||||
; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2
|
||||
; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, v1, v3, vcc
|
||||
; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
|
||||
; GFX9-NEXT: global_load_dword v0, v[0:1], off
|
||||
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX9-NEXT: s_setpc_b64 s[30:31]
|
||||
@ -28,10 +27,8 @@ define i32 @v_extract_v64i32_varidx(ptr addrspace(1) %ptr, i32 %idx) {
|
||||
; GFX12-NEXT: v_and_b32_e32 v2, 63, v2
|
||||
; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
|
||||
; GFX12-NEXT: v_lshlrev_b32_e32 v2, 2, v2
|
||||
; GFX12-NEXT: v_ashrrev_i32_e32 v3, 31, v2
|
||||
; GFX12-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
|
||||
; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_2)
|
||||
; GFX12-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo
|
||||
; GFX12-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
|
||||
; GFX12-NEXT: global_load_b32 v0, v[0:1], off
|
||||
; GFX12-NEXT: s_wait_loadcnt 0x0
|
||||
; GFX12-NEXT: s_setpc_b64 s[30:31]
|
||||
@ -46,9 +43,8 @@ define i16 @v_extract_v128i16_varidx(ptr addrspace(1) %ptr, i32 %idx) {
|
||||
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX9-NEXT: v_and_b32_e32 v2, 0x7f, v2
|
||||
; GFX9-NEXT: v_lshlrev_b32_e32 v2, 1, v2
|
||||
; GFX9-NEXT: v_ashrrev_i32_e32 v3, 31, v2
|
||||
; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2
|
||||
; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, v1, v3, vcc
|
||||
; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
|
||||
; GFX9-NEXT: global_load_ushort v0, v[0:1], off
|
||||
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX9-NEXT: s_setpc_b64 s[30:31]
|
||||
@ -63,10 +59,8 @@ define i16 @v_extract_v128i16_varidx(ptr addrspace(1) %ptr, i32 %idx) {
|
||||
; GFX12-NEXT: v_and_b32_e32 v2, 0x7f, v2
|
||||
; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
|
||||
; GFX12-NEXT: v_lshlrev_b32_e32 v2, 1, v2
|
||||
; GFX12-NEXT: v_ashrrev_i32_e32 v3, 31, v2
|
||||
; GFX12-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
|
||||
; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_2)
|
||||
; GFX12-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo
|
||||
; GFX12-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
|
||||
; GFX12-NEXT: global_load_u16 v0, v[0:1], off
|
||||
; GFX12-NEXT: s_wait_loadcnt 0x0
|
||||
; GFX12-NEXT: s_setpc_b64 s[30:31]
|
||||
@ -81,9 +75,8 @@ define i64 @v_extract_v32i64_varidx(ptr addrspace(1) %ptr, i32 %idx) {
|
||||
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX9-NEXT: v_and_b32_e32 v2, 31, v2
|
||||
; GFX9-NEXT: v_lshlrev_b32_e32 v2, 3, v2
|
||||
; GFX9-NEXT: v_ashrrev_i32_e32 v3, 31, v2
|
||||
; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2
|
||||
; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, v1, v3, vcc
|
||||
; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
|
||||
; GFX9-NEXT: global_load_dwordx2 v[0:1], v[0:1], off
|
||||
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX9-NEXT: s_setpc_b64 s[30:31]
|
||||
@ -98,10 +91,8 @@ define i64 @v_extract_v32i64_varidx(ptr addrspace(1) %ptr, i32 %idx) {
|
||||
; GFX12-NEXT: v_and_b32_e32 v2, 31, v2
|
||||
; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
|
||||
; GFX12-NEXT: v_lshlrev_b32_e32 v2, 3, v2
|
||||
; GFX12-NEXT: v_ashrrev_i32_e32 v3, 31, v2
|
||||
; GFX12-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
|
||||
; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_2)
|
||||
; GFX12-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo
|
||||
; GFX12-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
|
||||
; GFX12-NEXT: global_load_b64 v[0:1], v[0:1], off
|
||||
; GFX12-NEXT: s_wait_loadcnt 0x0
|
||||
; GFX12-NEXT: s_setpc_b64 s[30:31]
|
||||
|
@ -6,37 +6,44 @@
|
||||
; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1100 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX11 %s
|
||||
|
||||
define amdgpu_ps i128 @extractelement_sgpr_v4i128_sgpr_idx(ptr addrspace(4) inreg %ptr, i32 inreg %idx) {
|
||||
; GCN-LABEL: extractelement_sgpr_v4i128_sgpr_idx:
|
||||
; GCN: ; %bb.0:
|
||||
; GCN-NEXT: s_and_b32 s0, s4, 3
|
||||
; GCN-NEXT: s_lshl_b32 s0, s0, 4
|
||||
; GCN-NEXT: s_ashr_i32 s1, s0, 31
|
||||
; GCN-NEXT: s_add_u32 s0, s2, s0
|
||||
; GCN-NEXT: s_addc_u32 s1, s3, s1
|
||||
; GCN-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0
|
||||
; GCN-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GCN-NEXT: ; return to shader part epilog
|
||||
; GFX9-LABEL: extractelement_sgpr_v4i128_sgpr_idx:
|
||||
; GFX9: ; %bb.0:
|
||||
; GFX9-NEXT: s_and_b32 s0, s4, 3
|
||||
; GFX9-NEXT: s_lshl_b32 s0, s0, 4
|
||||
; GFX9-NEXT: s_load_dwordx4 s[0:3], s[2:3], s0 offset:0x0
|
||||
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX9-NEXT: ; return to shader part epilog
|
||||
;
|
||||
; GFX8-LABEL: extractelement_sgpr_v4i128_sgpr_idx:
|
||||
; GFX8: ; %bb.0:
|
||||
; GFX8-NEXT: s_and_b32 s0, s4, 3
|
||||
; GFX8-NEXT: s_lshl_b32 s0, s0, 4
|
||||
; GFX8-NEXT: s_load_dwordx4 s[0:3], s[2:3], s0
|
||||
; GFX8-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX8-NEXT: ; return to shader part epilog
|
||||
;
|
||||
; GFX7-LABEL: extractelement_sgpr_v4i128_sgpr_idx:
|
||||
; GFX7: ; %bb.0:
|
||||
; GFX7-NEXT: s_and_b32 s0, s4, 3
|
||||
; GFX7-NEXT: s_lshl_b32 s0, s0, 4
|
||||
; GFX7-NEXT: s_load_dwordx4 s[0:3], s[2:3], s0
|
||||
; GFX7-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX7-NEXT: ; return to shader part epilog
|
||||
;
|
||||
; GFX10-LABEL: extractelement_sgpr_v4i128_sgpr_idx:
|
||||
; GFX10: ; %bb.0:
|
||||
; GFX10-NEXT: s_and_b32 s0, s4, 3
|
||||
; GFX10-NEXT: s_lshl_b32 s0, s0, 4
|
||||
; GFX10-NEXT: s_ashr_i32 s1, s0, 31
|
||||
; GFX10-NEXT: s_add_u32 s0, s2, s0
|
||||
; GFX10-NEXT: s_addc_u32 s1, s3, s1
|
||||
; GFX10-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0
|
||||
; GFX10-NEXT: s_load_dwordx4 s[0:3], s[2:3], s0 offset:0x0
|
||||
; GFX10-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX10-NEXT: ; return to shader part epilog
|
||||
;
|
||||
; GFX11-LABEL: extractelement_sgpr_v4i128_sgpr_idx:
|
||||
; GFX11: ; %bb.0:
|
||||
; GFX11-NEXT: s_and_b32 s0, s4, 3
|
||||
; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
|
||||
; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
|
||||
; GFX11-NEXT: s_lshl_b32 s0, s0, 4
|
||||
; GFX11-NEXT: s_ashr_i32 s1, s0, 31
|
||||
; GFX11-NEXT: s_add_u32 s0, s2, s0
|
||||
; GFX11-NEXT: s_addc_u32 s1, s3, s1
|
||||
; GFX11-NEXT: s_load_b128 s[0:3], s[0:1], 0x0
|
||||
; GFX11-NEXT: s_load_b128 s[0:3], s[2:3], s0 offset:0x0
|
||||
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX11-NEXT: ; return to shader part epilog
|
||||
%vector = load <4 x i128>, ptr addrspace(4) %ptr
|
||||
@ -48,8 +55,8 @@ define amdgpu_ps i128 @extractelement_vgpr_v4i128_sgpr_idx(ptr addrspace(1) %ptr
|
||||
; GFX9-LABEL: extractelement_vgpr_v4i128_sgpr_idx:
|
||||
; GFX9: ; %bb.0:
|
||||
; GFX9-NEXT: s_and_b32 s0, s2, 3
|
||||
; GFX9-NEXT: s_mov_b32 s1, 0
|
||||
; GFX9-NEXT: s_lshl_b32 s0, s0, 4
|
||||
; GFX9-NEXT: s_ashr_i32 s1, s0, 31
|
||||
; GFX9-NEXT: v_mov_b32_e32 v3, s1
|
||||
; GFX9-NEXT: v_mov_b32_e32 v2, s0
|
||||
; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2
|
||||
@ -65,8 +72,8 @@ define amdgpu_ps i128 @extractelement_vgpr_v4i128_sgpr_idx(ptr addrspace(1) %ptr
|
||||
; GFX8-LABEL: extractelement_vgpr_v4i128_sgpr_idx:
|
||||
; GFX8: ; %bb.0:
|
||||
; GFX8-NEXT: s_and_b32 s0, s2, 3
|
||||
; GFX8-NEXT: s_mov_b32 s1, 0
|
||||
; GFX8-NEXT: s_lshl_b32 s0, s0, 4
|
||||
; GFX8-NEXT: s_ashr_i32 s1, s0, 31
|
||||
; GFX8-NEXT: v_mov_b32_e32 v3, s1
|
||||
; GFX8-NEXT: v_mov_b32_e32 v2, s0
|
||||
; GFX8-NEXT: v_add_u32_e32 v0, vcc, v0, v2
|
||||
@ -82,10 +89,10 @@ define amdgpu_ps i128 @extractelement_vgpr_v4i128_sgpr_idx(ptr addrspace(1) %ptr
|
||||
; GFX7-LABEL: extractelement_vgpr_v4i128_sgpr_idx:
|
||||
; GFX7: ; %bb.0:
|
||||
; GFX7-NEXT: s_and_b32 s0, s2, 3
|
||||
; GFX7-NEXT: s_mov_b32 s1, 0
|
||||
; GFX7-NEXT: s_lshl_b32 s0, s0, 4
|
||||
; GFX7-NEXT: s_ashr_i32 s1, s0, 31
|
||||
; GFX7-NEXT: s_mov_b32 s2, 0
|
||||
; GFX7-NEXT: s_mov_b32 s3, 0xf000
|
||||
; GFX7-NEXT: s_mov_b32 s2, s1
|
||||
; GFX7-NEXT: buffer_load_dwordx4 v[0:3], v[0:1], s[0:3], 0 addr64
|
||||
; GFX7-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX7-NEXT: v_readfirstlane_b32 s0, v0
|
||||
@ -97,8 +104,8 @@ define amdgpu_ps i128 @extractelement_vgpr_v4i128_sgpr_idx(ptr addrspace(1) %ptr
|
||||
; GFX10-LABEL: extractelement_vgpr_v4i128_sgpr_idx:
|
||||
; GFX10: ; %bb.0:
|
||||
; GFX10-NEXT: s_and_b32 s0, s2, 3
|
||||
; GFX10-NEXT: s_mov_b32 s1, 0
|
||||
; GFX10-NEXT: s_lshl_b32 s0, s0, 4
|
||||
; GFX10-NEXT: s_ashr_i32 s1, s0, 31
|
||||
; GFX10-NEXT: v_mov_b32_e32 v3, s1
|
||||
; GFX10-NEXT: v_mov_b32_e32 v2, s0
|
||||
; GFX10-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
|
||||
@ -114,9 +121,8 @@ define amdgpu_ps i128 @extractelement_vgpr_v4i128_sgpr_idx(ptr addrspace(1) %ptr
|
||||
; GFX11-LABEL: extractelement_vgpr_v4i128_sgpr_idx:
|
||||
; GFX11: ; %bb.0:
|
||||
; GFX11-NEXT: s_and_b32 s0, s2, 3
|
||||
; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
|
||||
; GFX11-NEXT: s_mov_b32 s1, 0
|
||||
; GFX11-NEXT: s_lshl_b32 s0, s0, 4
|
||||
; GFX11-NEXT: s_ashr_i32 s1, s0, 31
|
||||
; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_1)
|
||||
; GFX11-NEXT: v_dual_mov_b32 v3, s1 :: v_dual_mov_b32 v2, s0
|
||||
; GFX11-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
|
||||
@ -140,9 +146,8 @@ define i128 @extractelement_vgpr_v4i128_vgpr_idx(ptr addrspace(1) %ptr, i32 %idx
|
||||
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX9-NEXT: v_and_b32_e32 v2, 3, v2
|
||||
; GFX9-NEXT: v_lshlrev_b32_e32 v2, 4, v2
|
||||
; GFX9-NEXT: v_ashrrev_i32_e32 v3, 31, v2
|
||||
; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2
|
||||
; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, v1, v3, vcc
|
||||
; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
|
||||
; GFX9-NEXT: global_load_dwordx4 v[0:3], v[0:1], off
|
||||
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX9-NEXT: s_setpc_b64 s[30:31]
|
||||
@ -152,9 +157,8 @@ define i128 @extractelement_vgpr_v4i128_vgpr_idx(ptr addrspace(1) %ptr, i32 %idx
|
||||
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX8-NEXT: v_and_b32_e32 v2, 3, v2
|
||||
; GFX8-NEXT: v_lshlrev_b32_e32 v2, 4, v2
|
||||
; GFX8-NEXT: v_ashrrev_i32_e32 v3, 31, v2
|
||||
; GFX8-NEXT: v_add_u32_e32 v0, vcc, v0, v2
|
||||
; GFX8-NEXT: v_addc_u32_e32 v1, vcc, v1, v3, vcc
|
||||
; GFX8-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
|
||||
; GFX8-NEXT: flat_load_dwordx4 v[0:3], v[0:1]
|
||||
; GFX8-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX8-NEXT: s_setpc_b64 s[30:31]
|
||||
@ -164,9 +168,8 @@ define i128 @extractelement_vgpr_v4i128_vgpr_idx(ptr addrspace(1) %ptr, i32 %idx
|
||||
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX7-NEXT: v_and_b32_e32 v2, 3, v2
|
||||
; GFX7-NEXT: v_lshlrev_b32_e32 v2, 4, v2
|
||||
; GFX7-NEXT: v_ashrrev_i32_e32 v3, 31, v2
|
||||
; GFX7-NEXT: v_add_i32_e32 v0, vcc, v0, v2
|
||||
; GFX7-NEXT: v_addc_u32_e32 v1, vcc, v1, v3, vcc
|
||||
; GFX7-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
|
||||
; GFX7-NEXT: s_mov_b32 s6, 0
|
||||
; GFX7-NEXT: s_mov_b32 s7, 0xf000
|
||||
; GFX7-NEXT: s_mov_b64 s[4:5], 0
|
||||
@ -179,9 +182,8 @@ define i128 @extractelement_vgpr_v4i128_vgpr_idx(ptr addrspace(1) %ptr, i32 %idx
|
||||
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX10-NEXT: v_and_b32_e32 v2, 3, v2
|
||||
; GFX10-NEXT: v_lshlrev_b32_e32 v2, 4, v2
|
||||
; GFX10-NEXT: v_ashrrev_i32_e32 v3, 31, v2
|
||||
; GFX10-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
|
||||
; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo
|
||||
; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
|
||||
; GFX10-NEXT: global_load_dwordx4 v[0:3], v[0:1], off
|
||||
; GFX10-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX10-NEXT: s_setpc_b64 s[30:31]
|
||||
@ -192,10 +194,8 @@ define i128 @extractelement_vgpr_v4i128_vgpr_idx(ptr addrspace(1) %ptr, i32 %idx
|
||||
; GFX11-NEXT: v_and_b32_e32 v2, 3, v2
|
||||
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
|
||||
; GFX11-NEXT: v_lshlrev_b32_e32 v2, 4, v2
|
||||
; GFX11-NEXT: v_ashrrev_i32_e32 v3, 31, v2
|
||||
; GFX11-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
|
||||
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2)
|
||||
; GFX11-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo
|
||||
; GFX11-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
|
||||
; GFX11-NEXT: global_load_b128 v[0:3], v[0:1], off
|
||||
; GFX11-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX11-NEXT: s_setpc_b64 s[30:31]
|
||||
@ -208,13 +208,8 @@ define amdgpu_ps i128 @extractelement_sgpr_v4i128_vgpr_idx(ptr addrspace(4) inre
|
||||
; GFX9-LABEL: extractelement_sgpr_v4i128_vgpr_idx:
|
||||
; GFX9: ; %bb.0:
|
||||
; GFX9-NEXT: v_and_b32_e32 v0, 3, v0
|
||||
; GFX9-NEXT: v_lshlrev_b32_e32 v2, 4, v0
|
||||
; GFX9-NEXT: v_mov_b32_e32 v0, s2
|
||||
; GFX9-NEXT: v_ashrrev_i32_e32 v3, 31, v2
|
||||
; GFX9-NEXT: v_mov_b32_e32 v1, s3
|
||||
; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2
|
||||
; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, v1, v3, vcc
|
||||
; GFX9-NEXT: global_load_dwordx4 v[0:3], v[0:1], off
|
||||
; GFX9-NEXT: v_lshlrev_b32_e32 v0, 4, v0
|
||||
; GFX9-NEXT: global_load_dwordx4 v[0:3], v0, s[2:3]
|
||||
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX9-NEXT: v_readfirstlane_b32 s0, v0
|
||||
; GFX9-NEXT: v_readfirstlane_b32 s1, v1
|
||||
@ -227,10 +222,9 @@ define amdgpu_ps i128 @extractelement_sgpr_v4i128_vgpr_idx(ptr addrspace(4) inre
|
||||
; GFX8-NEXT: v_and_b32_e32 v0, 3, v0
|
||||
; GFX8-NEXT: v_lshlrev_b32_e32 v2, 4, v0
|
||||
; GFX8-NEXT: v_mov_b32_e32 v0, s2
|
||||
; GFX8-NEXT: v_ashrrev_i32_e32 v3, 31, v2
|
||||
; GFX8-NEXT: v_mov_b32_e32 v1, s3
|
||||
; GFX8-NEXT: v_add_u32_e32 v0, vcc, v0, v2
|
||||
; GFX8-NEXT: v_addc_u32_e32 v1, vcc, v1, v3, vcc
|
||||
; GFX8-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
|
||||
; GFX8-NEXT: flat_load_dwordx4 v[0:3], v[0:1]
|
||||
; GFX8-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX8-NEXT: v_readfirstlane_b32 s0, v0
|
||||
@ -242,10 +236,10 @@ define amdgpu_ps i128 @extractelement_sgpr_v4i128_vgpr_idx(ptr addrspace(4) inre
|
||||
; GFX7-LABEL: extractelement_sgpr_v4i128_vgpr_idx:
|
||||
; GFX7: ; %bb.0:
|
||||
; GFX7-NEXT: v_and_b32_e32 v0, 3, v0
|
||||
; GFX7-NEXT: v_lshlrev_b32_e32 v0, 4, v0
|
||||
; GFX7-NEXT: s_mov_b32 s0, s2
|
||||
; GFX7-NEXT: s_mov_b32 s1, s3
|
||||
; GFX7-NEXT: v_ashrrev_i32_e32 v1, 31, v0
|
||||
; GFX7-NEXT: v_lshlrev_b32_e32 v0, 4, v0
|
||||
; GFX7-NEXT: v_mov_b32_e32 v1, 0
|
||||
; GFX7-NEXT: s_mov_b32 s2, 0
|
||||
; GFX7-NEXT: s_mov_b32 s3, 0xf000
|
||||
; GFX7-NEXT: buffer_load_dwordx4 v[0:3], v[0:1], s[0:3], 0 addr64
|
||||
@ -259,13 +253,8 @@ define amdgpu_ps i128 @extractelement_sgpr_v4i128_vgpr_idx(ptr addrspace(4) inre
|
||||
; GFX10-LABEL: extractelement_sgpr_v4i128_vgpr_idx:
|
||||
; GFX10: ; %bb.0:
|
||||
; GFX10-NEXT: v_and_b32_e32 v0, 3, v0
|
||||
; GFX10-NEXT: v_lshlrev_b32_e32 v2, 4, v0
|
||||
; GFX10-NEXT: v_mov_b32_e32 v0, s2
|
||||
; GFX10-NEXT: v_mov_b32_e32 v1, s3
|
||||
; GFX10-NEXT: v_ashrrev_i32_e32 v3, 31, v2
|
||||
; GFX10-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
|
||||
; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo
|
||||
; GFX10-NEXT: global_load_dwordx4 v[0:3], v[0:1], off
|
||||
; GFX10-NEXT: v_lshlrev_b32_e32 v0, 4, v0
|
||||
; GFX10-NEXT: global_load_dwordx4 v[0:3], v0, s[2:3]
|
||||
; GFX10-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX10-NEXT: v_readfirstlane_b32 s0, v0
|
||||
; GFX10-NEXT: v_readfirstlane_b32 s1, v1
|
||||
@ -276,14 +265,9 @@ define amdgpu_ps i128 @extractelement_sgpr_v4i128_vgpr_idx(ptr addrspace(4) inre
|
||||
; GFX11-LABEL: extractelement_sgpr_v4i128_vgpr_idx:
|
||||
; GFX11: ; %bb.0:
|
||||
; GFX11-NEXT: v_and_b32_e32 v0, 3, v0
|
||||
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
|
||||
; GFX11-NEXT: v_lshlrev_b32_e32 v2, 4, v0
|
||||
; GFX11-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3
|
||||
; GFX11-NEXT: v_ashrrev_i32_e32 v3, 31, v2
|
||||
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
|
||||
; GFX11-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
|
||||
; GFX11-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo
|
||||
; GFX11-NEXT: global_load_b128 v[0:3], v[0:1], off
|
||||
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
|
||||
; GFX11-NEXT: v_lshlrev_b32_e32 v0, 4, v0
|
||||
; GFX11-NEXT: global_load_b128 v[0:3], v0, s[2:3]
|
||||
; GFX11-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX11-NEXT: v_readfirstlane_b32 s0, v0
|
||||
; GFX11-NEXT: v_readfirstlane_b32 s1, v1
|
||||
|
@ -10,11 +10,8 @@ define amdgpu_ps i16 @extractelement_sgpr_v4i16_sgpr_idx(ptr addrspace(4) inreg
|
||||
; GFX9: ; %bb.0:
|
||||
; GFX9-NEXT: s_and_b32 s0, s4, 3
|
||||
; GFX9-NEXT: s_lshl_b32 s0, s0, 1
|
||||
; GFX9-NEXT: s_ashr_i32 s1, s0, 31
|
||||
; GFX9-NEXT: s_add_u32 s0, s2, s0
|
||||
; GFX9-NEXT: s_addc_u32 s1, s3, s1
|
||||
; GFX9-NEXT: v_mov_b32_e32 v0, 0
|
||||
; GFX9-NEXT: global_load_ushort v0, v0, s[0:1]
|
||||
; GFX9-NEXT: v_mov_b32_e32 v0, s0
|
||||
; GFX9-NEXT: global_load_ushort v0, v0, s[2:3]
|
||||
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX9-NEXT: v_readfirstlane_b32 s0, v0
|
||||
; GFX9-NEXT: ; return to shader part epilog
|
||||
@ -23,9 +20,8 @@ define amdgpu_ps i16 @extractelement_sgpr_v4i16_sgpr_idx(ptr addrspace(4) inreg
|
||||
; GFX8: ; %bb.0:
|
||||
; GFX8-NEXT: s_and_b32 s0, s4, 3
|
||||
; GFX8-NEXT: s_lshl_b32 s0, s0, 1
|
||||
; GFX8-NEXT: s_ashr_i32 s1, s0, 31
|
||||
; GFX8-NEXT: s_add_u32 s0, s2, s0
|
||||
; GFX8-NEXT: s_addc_u32 s1, s3, s1
|
||||
; GFX8-NEXT: s_addc_u32 s1, s3, 0
|
||||
; GFX8-NEXT: v_mov_b32_e32 v0, s0
|
||||
; GFX8-NEXT: v_mov_b32_e32 v1, s1
|
||||
; GFX8-NEXT: flat_load_ushort v0, v[0:1]
|
||||
@ -38,11 +34,11 @@ define amdgpu_ps i16 @extractelement_sgpr_v4i16_sgpr_idx(ptr addrspace(4) inreg
|
||||
; GFX7-NEXT: s_mov_b32 s0, s2
|
||||
; GFX7-NEXT: s_and_b32 s2, s4, 3
|
||||
; GFX7-NEXT: s_lshl_b32 s4, s2, 1
|
||||
; GFX7-NEXT: s_ashr_i32 s5, s4, 31
|
||||
; GFX7-NEXT: s_mov_b32 s5, 0
|
||||
; GFX7-NEXT: v_mov_b32_e32 v0, s4
|
||||
; GFX7-NEXT: s_mov_b32 s1, s3
|
||||
; GFX7-NEXT: s_mov_b32 s2, 0
|
||||
; GFX7-NEXT: s_mov_b32 s3, 0xf000
|
||||
; GFX7-NEXT: s_mov_b32 s2, s5
|
||||
; GFX7-NEXT: v_mov_b32_e32 v1, s5
|
||||
; GFX7-NEXT: buffer_load_ushort v0, v[0:1], s[0:3], 0 addr64
|
||||
; GFX7-NEXT: s_waitcnt vmcnt(0)
|
||||
@ -52,12 +48,9 @@ define amdgpu_ps i16 @extractelement_sgpr_v4i16_sgpr_idx(ptr addrspace(4) inreg
|
||||
; GFX10-LABEL: extractelement_sgpr_v4i16_sgpr_idx:
|
||||
; GFX10: ; %bb.0:
|
||||
; GFX10-NEXT: s_and_b32 s0, s4, 3
|
||||
; GFX10-NEXT: v_mov_b32_e32 v0, 0
|
||||
; GFX10-NEXT: s_lshl_b32 s0, s0, 1
|
||||
; GFX10-NEXT: s_ashr_i32 s1, s0, 31
|
||||
; GFX10-NEXT: s_add_u32 s0, s2, s0
|
||||
; GFX10-NEXT: s_addc_u32 s1, s3, s1
|
||||
; GFX10-NEXT: global_load_ushort v0, v0, s[0:1]
|
||||
; GFX10-NEXT: v_mov_b32_e32 v0, s0
|
||||
; GFX10-NEXT: global_load_ushort v0, v0, s[2:3]
|
||||
; GFX10-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX10-NEXT: v_readfirstlane_b32 s0, v0
|
||||
; GFX10-NEXT: ; return to shader part epilog
|
||||
@ -65,13 +58,10 @@ define amdgpu_ps i16 @extractelement_sgpr_v4i16_sgpr_idx(ptr addrspace(4) inreg
|
||||
; GFX11-LABEL: extractelement_sgpr_v4i16_sgpr_idx:
|
||||
; GFX11: ; %bb.0:
|
||||
; GFX11-NEXT: s_and_b32 s0, s4, 3
|
||||
; GFX11-NEXT: v_mov_b32_e32 v0, 0
|
||||
; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
|
||||
; GFX11-NEXT: s_lshl_b32 s0, s0, 1
|
||||
; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
|
||||
; GFX11-NEXT: s_ashr_i32 s1, s0, 31
|
||||
; GFX11-NEXT: s_add_u32 s0, s2, s0
|
||||
; GFX11-NEXT: s_addc_u32 s1, s3, s1
|
||||
; GFX11-NEXT: global_load_u16 v0, v0, s[0:1]
|
||||
; GFX11-NEXT: v_mov_b32_e32 v0, s0
|
||||
; GFX11-NEXT: global_load_u16 v0, v0, s[2:3]
|
||||
; GFX11-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX11-NEXT: v_readfirstlane_b32 s0, v0
|
||||
; GFX11-NEXT: ; return to shader part epilog
|
||||
@ -84,8 +74,8 @@ define amdgpu_ps i16 @extractelement_vgpr_v4i16_sgpr_idx(ptr addrspace(1) %ptr,
|
||||
; GFX9-LABEL: extractelement_vgpr_v4i16_sgpr_idx:
|
||||
; GFX9: ; %bb.0:
|
||||
; GFX9-NEXT: s_and_b32 s0, s2, 3
|
||||
; GFX9-NEXT: s_mov_b32 s1, 0
|
||||
; GFX9-NEXT: s_lshl_b32 s0, s0, 1
|
||||
; GFX9-NEXT: s_ashr_i32 s1, s0, 31
|
||||
; GFX9-NEXT: v_mov_b32_e32 v3, s1
|
||||
; GFX9-NEXT: v_mov_b32_e32 v2, s0
|
||||
; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2
|
||||
@ -98,8 +88,8 @@ define amdgpu_ps i16 @extractelement_vgpr_v4i16_sgpr_idx(ptr addrspace(1) %ptr,
|
||||
; GFX8-LABEL: extractelement_vgpr_v4i16_sgpr_idx:
|
||||
; GFX8: ; %bb.0:
|
||||
; GFX8-NEXT: s_and_b32 s0, s2, 3
|
||||
; GFX8-NEXT: s_mov_b32 s1, 0
|
||||
; GFX8-NEXT: s_lshl_b32 s0, s0, 1
|
||||
; GFX8-NEXT: s_ashr_i32 s1, s0, 31
|
||||
; GFX8-NEXT: v_mov_b32_e32 v3, s1
|
||||
; GFX8-NEXT: v_mov_b32_e32 v2, s0
|
||||
; GFX8-NEXT: v_add_u32_e32 v0, vcc, v0, v2
|
||||
@ -112,10 +102,10 @@ define amdgpu_ps i16 @extractelement_vgpr_v4i16_sgpr_idx(ptr addrspace(1) %ptr,
|
||||
; GFX7-LABEL: extractelement_vgpr_v4i16_sgpr_idx:
|
||||
; GFX7: ; %bb.0:
|
||||
; GFX7-NEXT: s_and_b32 s0, s2, 3
|
||||
; GFX7-NEXT: s_mov_b32 s1, 0
|
||||
; GFX7-NEXT: s_lshl_b32 s0, s0, 1
|
||||
; GFX7-NEXT: s_ashr_i32 s1, s0, 31
|
||||
; GFX7-NEXT: s_mov_b32 s2, 0
|
||||
; GFX7-NEXT: s_mov_b32 s3, 0xf000
|
||||
; GFX7-NEXT: s_mov_b32 s2, s1
|
||||
; GFX7-NEXT: buffer_load_ushort v0, v[0:1], s[0:3], 0 addr64
|
||||
; GFX7-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX7-NEXT: v_readfirstlane_b32 s0, v0
|
||||
@ -124,8 +114,8 @@ define amdgpu_ps i16 @extractelement_vgpr_v4i16_sgpr_idx(ptr addrspace(1) %ptr,
|
||||
; GFX10-LABEL: extractelement_vgpr_v4i16_sgpr_idx:
|
||||
; GFX10: ; %bb.0:
|
||||
; GFX10-NEXT: s_and_b32 s0, s2, 3
|
||||
; GFX10-NEXT: s_mov_b32 s1, 0
|
||||
; GFX10-NEXT: s_lshl_b32 s0, s0, 1
|
||||
; GFX10-NEXT: s_ashr_i32 s1, s0, 31
|
||||
; GFX10-NEXT: v_mov_b32_e32 v3, s1
|
||||
; GFX10-NEXT: v_mov_b32_e32 v2, s0
|
||||
; GFX10-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
|
||||
@ -138,9 +128,8 @@ define amdgpu_ps i16 @extractelement_vgpr_v4i16_sgpr_idx(ptr addrspace(1) %ptr,
|
||||
; GFX11-LABEL: extractelement_vgpr_v4i16_sgpr_idx:
|
||||
; GFX11: ; %bb.0:
|
||||
; GFX11-NEXT: s_and_b32 s0, s2, 3
|
||||
; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
|
||||
; GFX11-NEXT: s_mov_b32 s1, 0
|
||||
; GFX11-NEXT: s_lshl_b32 s0, s0, 1
|
||||
; GFX11-NEXT: s_ashr_i32 s1, s0, 31
|
||||
; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_1)
|
||||
; GFX11-NEXT: v_dual_mov_b32 v3, s1 :: v_dual_mov_b32 v2, s0
|
||||
; GFX11-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
|
||||
@ -161,9 +150,8 @@ define i16 @extractelement_vgpr_v4i16_vgpr_idx(ptr addrspace(1) %ptr, i32 %idx)
|
||||
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX9-NEXT: v_and_b32_e32 v2, 3, v2
|
||||
; GFX9-NEXT: v_lshlrev_b32_e32 v2, 1, v2
|
||||
; GFX9-NEXT: v_ashrrev_i32_e32 v3, 31, v2
|
||||
; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2
|
||||
; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, v1, v3, vcc
|
||||
; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
|
||||
; GFX9-NEXT: global_load_ushort v0, v[0:1], off
|
||||
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX9-NEXT: s_setpc_b64 s[30:31]
|
||||
@ -173,9 +161,8 @@ define i16 @extractelement_vgpr_v4i16_vgpr_idx(ptr addrspace(1) %ptr, i32 %idx)
|
||||
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX8-NEXT: v_and_b32_e32 v2, 3, v2
|
||||
; GFX8-NEXT: v_lshlrev_b32_e32 v2, 1, v2
|
||||
; GFX8-NEXT: v_ashrrev_i32_e32 v3, 31, v2
|
||||
; GFX8-NEXT: v_add_u32_e32 v0, vcc, v0, v2
|
||||
; GFX8-NEXT: v_addc_u32_e32 v1, vcc, v1, v3, vcc
|
||||
; GFX8-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
|
||||
; GFX8-NEXT: flat_load_ushort v0, v[0:1]
|
||||
; GFX8-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX8-NEXT: s_setpc_b64 s[30:31]
|
||||
@ -185,9 +172,8 @@ define i16 @extractelement_vgpr_v4i16_vgpr_idx(ptr addrspace(1) %ptr, i32 %idx)
|
||||
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX7-NEXT: v_and_b32_e32 v2, 3, v2
|
||||
; GFX7-NEXT: v_lshlrev_b32_e32 v2, 1, v2
|
||||
; GFX7-NEXT: v_ashrrev_i32_e32 v3, 31, v2
|
||||
; GFX7-NEXT: v_add_i32_e32 v0, vcc, v0, v2
|
||||
; GFX7-NEXT: v_addc_u32_e32 v1, vcc, v1, v3, vcc
|
||||
; GFX7-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
|
||||
; GFX7-NEXT: s_mov_b32 s6, 0
|
||||
; GFX7-NEXT: s_mov_b32 s7, 0xf000
|
||||
; GFX7-NEXT: s_mov_b64 s[4:5], 0
|
||||
@ -200,9 +186,8 @@ define i16 @extractelement_vgpr_v4i16_vgpr_idx(ptr addrspace(1) %ptr, i32 %idx)
|
||||
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX10-NEXT: v_and_b32_e32 v2, 3, v2
|
||||
; GFX10-NEXT: v_lshlrev_b32_e32 v2, 1, v2
|
||||
; GFX10-NEXT: v_ashrrev_i32_e32 v3, 31, v2
|
||||
; GFX10-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
|
||||
; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo
|
||||
; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
|
||||
; GFX10-NEXT: global_load_ushort v0, v[0:1], off
|
||||
; GFX10-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX10-NEXT: s_setpc_b64 s[30:31]
|
||||
@ -213,10 +198,8 @@ define i16 @extractelement_vgpr_v4i16_vgpr_idx(ptr addrspace(1) %ptr, i32 %idx)
|
||||
; GFX11-NEXT: v_and_b32_e32 v2, 3, v2
|
||||
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
|
||||
; GFX11-NEXT: v_lshlrev_b32_e32 v2, 1, v2
|
||||
; GFX11-NEXT: v_ashrrev_i32_e32 v3, 31, v2
|
||||
; GFX11-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
|
||||
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2)
|
||||
; GFX11-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo
|
||||
; GFX11-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
|
||||
; GFX11-NEXT: global_load_u16 v0, v[0:1], off
|
||||
; GFX11-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX11-NEXT: s_setpc_b64 s[30:31]
|
||||
@ -229,13 +212,8 @@ define amdgpu_ps i16 @extractelement_sgpr_v4i16_vgpr_idx(ptr addrspace(4) inreg
|
||||
; GFX9-LABEL: extractelement_sgpr_v4i16_vgpr_idx:
|
||||
; GFX9: ; %bb.0:
|
||||
; GFX9-NEXT: v_and_b32_e32 v0, 3, v0
|
||||
; GFX9-NEXT: v_lshlrev_b32_e32 v2, 1, v0
|
||||
; GFX9-NEXT: v_mov_b32_e32 v0, s2
|
||||
; GFX9-NEXT: v_ashrrev_i32_e32 v3, 31, v2
|
||||
; GFX9-NEXT: v_mov_b32_e32 v1, s3
|
||||
; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2
|
||||
; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, v1, v3, vcc
|
||||
; GFX9-NEXT: global_load_ushort v0, v[0:1], off
|
||||
; GFX9-NEXT: v_lshlrev_b32_e32 v0, 1, v0
|
||||
; GFX9-NEXT: global_load_ushort v0, v0, s[2:3]
|
||||
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX9-NEXT: v_readfirstlane_b32 s0, v0
|
||||
; GFX9-NEXT: ; return to shader part epilog
|
||||
@ -245,10 +223,9 @@ define amdgpu_ps i16 @extractelement_sgpr_v4i16_vgpr_idx(ptr addrspace(4) inreg
|
||||
; GFX8-NEXT: v_and_b32_e32 v0, 3, v0
|
||||
; GFX8-NEXT: v_lshlrev_b32_e32 v2, 1, v0
|
||||
; GFX8-NEXT: v_mov_b32_e32 v0, s2
|
||||
; GFX8-NEXT: v_ashrrev_i32_e32 v3, 31, v2
|
||||
; GFX8-NEXT: v_mov_b32_e32 v1, s3
|
||||
; GFX8-NEXT: v_add_u32_e32 v0, vcc, v0, v2
|
||||
; GFX8-NEXT: v_addc_u32_e32 v1, vcc, v1, v3, vcc
|
||||
; GFX8-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
|
||||
; GFX8-NEXT: flat_load_ushort v0, v[0:1]
|
||||
; GFX8-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX8-NEXT: v_readfirstlane_b32 s0, v0
|
||||
@ -257,10 +234,10 @@ define amdgpu_ps i16 @extractelement_sgpr_v4i16_vgpr_idx(ptr addrspace(4) inreg
|
||||
; GFX7-LABEL: extractelement_sgpr_v4i16_vgpr_idx:
|
||||
; GFX7: ; %bb.0:
|
||||
; GFX7-NEXT: v_and_b32_e32 v0, 3, v0
|
||||
; GFX7-NEXT: v_lshlrev_b32_e32 v0, 1, v0
|
||||
; GFX7-NEXT: s_mov_b32 s0, s2
|
||||
; GFX7-NEXT: s_mov_b32 s1, s3
|
||||
; GFX7-NEXT: v_ashrrev_i32_e32 v1, 31, v0
|
||||
; GFX7-NEXT: v_lshlrev_b32_e32 v0, 1, v0
|
||||
; GFX7-NEXT: v_mov_b32_e32 v1, 0
|
||||
; GFX7-NEXT: s_mov_b32 s2, 0
|
||||
; GFX7-NEXT: s_mov_b32 s3, 0xf000
|
||||
; GFX7-NEXT: buffer_load_ushort v0, v[0:1], s[0:3], 0 addr64
|
||||
@ -271,13 +248,8 @@ define amdgpu_ps i16 @extractelement_sgpr_v4i16_vgpr_idx(ptr addrspace(4) inreg
|
||||
; GFX10-LABEL: extractelement_sgpr_v4i16_vgpr_idx:
|
||||
; GFX10: ; %bb.0:
|
||||
; GFX10-NEXT: v_and_b32_e32 v0, 3, v0
|
||||
; GFX10-NEXT: v_lshlrev_b32_e32 v2, 1, v0
|
||||
; GFX10-NEXT: v_mov_b32_e32 v0, s2
|
||||
; GFX10-NEXT: v_mov_b32_e32 v1, s3
|
||||
; GFX10-NEXT: v_ashrrev_i32_e32 v3, 31, v2
|
||||
; GFX10-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
|
||||
; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo
|
||||
; GFX10-NEXT: global_load_ushort v0, v[0:1], off
|
||||
; GFX10-NEXT: v_lshlrev_b32_e32 v0, 1, v0
|
||||
; GFX10-NEXT: global_load_ushort v0, v0, s[2:3]
|
||||
; GFX10-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX10-NEXT: v_readfirstlane_b32 s0, v0
|
||||
; GFX10-NEXT: ; return to shader part epilog
|
||||
@ -285,14 +257,9 @@ define amdgpu_ps i16 @extractelement_sgpr_v4i16_vgpr_idx(ptr addrspace(4) inreg
|
||||
; GFX11-LABEL: extractelement_sgpr_v4i16_vgpr_idx:
|
||||
; GFX11: ; %bb.0:
|
||||
; GFX11-NEXT: v_and_b32_e32 v0, 3, v0
|
||||
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
|
||||
; GFX11-NEXT: v_lshlrev_b32_e32 v2, 1, v0
|
||||
; GFX11-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3
|
||||
; GFX11-NEXT: v_ashrrev_i32_e32 v3, 31, v2
|
||||
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
|
||||
; GFX11-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
|
||||
; GFX11-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo
|
||||
; GFX11-NEXT: global_load_u16 v0, v[0:1], off
|
||||
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
|
||||
; GFX11-NEXT: v_lshlrev_b32_e32 v0, 1, v0
|
||||
; GFX11-NEXT: global_load_u16 v0, v0, s[2:3]
|
||||
; GFX11-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX11-NEXT: v_readfirstlane_b32 s0, v0
|
||||
; GFX11-NEXT: ; return to shader part epilog
|
||||
@ -686,11 +653,8 @@ define amdgpu_ps i16 @extractelement_sgpr_v8i16_sgpr_idx(ptr addrspace(4) inreg
|
||||
; GFX9: ; %bb.0:
|
||||
; GFX9-NEXT: s_and_b32 s0, s4, 7
|
||||
; GFX9-NEXT: s_lshl_b32 s0, s0, 1
|
||||
; GFX9-NEXT: s_ashr_i32 s1, s0, 31
|
||||
; GFX9-NEXT: s_add_u32 s0, s2, s0
|
||||
; GFX9-NEXT: s_addc_u32 s1, s3, s1
|
||||
; GFX9-NEXT: v_mov_b32_e32 v0, 0
|
||||
; GFX9-NEXT: global_load_ushort v0, v0, s[0:1]
|
||||
; GFX9-NEXT: v_mov_b32_e32 v0, s0
|
||||
; GFX9-NEXT: global_load_ushort v0, v0, s[2:3]
|
||||
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX9-NEXT: v_readfirstlane_b32 s0, v0
|
||||
; GFX9-NEXT: ; return to shader part epilog
|
||||
@ -699,9 +663,8 @@ define amdgpu_ps i16 @extractelement_sgpr_v8i16_sgpr_idx(ptr addrspace(4) inreg
|
||||
; GFX8: ; %bb.0:
|
||||
; GFX8-NEXT: s_and_b32 s0, s4, 7
|
||||
; GFX8-NEXT: s_lshl_b32 s0, s0, 1
|
||||
; GFX8-NEXT: s_ashr_i32 s1, s0, 31
|
||||
; GFX8-NEXT: s_add_u32 s0, s2, s0
|
||||
; GFX8-NEXT: s_addc_u32 s1, s3, s1
|
||||
; GFX8-NEXT: s_addc_u32 s1, s3, 0
|
||||
; GFX8-NEXT: v_mov_b32_e32 v0, s0
|
||||
; GFX8-NEXT: v_mov_b32_e32 v1, s1
|
||||
; GFX8-NEXT: flat_load_ushort v0, v[0:1]
|
||||
@ -714,11 +677,11 @@ define amdgpu_ps i16 @extractelement_sgpr_v8i16_sgpr_idx(ptr addrspace(4) inreg
|
||||
; GFX7-NEXT: s_mov_b32 s0, s2
|
||||
; GFX7-NEXT: s_and_b32 s2, s4, 7
|
||||
; GFX7-NEXT: s_lshl_b32 s4, s2, 1
|
||||
; GFX7-NEXT: s_ashr_i32 s5, s4, 31
|
||||
; GFX7-NEXT: s_mov_b32 s5, 0
|
||||
; GFX7-NEXT: v_mov_b32_e32 v0, s4
|
||||
; GFX7-NEXT: s_mov_b32 s1, s3
|
||||
; GFX7-NEXT: s_mov_b32 s2, 0
|
||||
; GFX7-NEXT: s_mov_b32 s3, 0xf000
|
||||
; GFX7-NEXT: s_mov_b32 s2, s5
|
||||
; GFX7-NEXT: v_mov_b32_e32 v1, s5
|
||||
; GFX7-NEXT: buffer_load_ushort v0, v[0:1], s[0:3], 0 addr64
|
||||
; GFX7-NEXT: s_waitcnt vmcnt(0)
|
||||
@ -728,12 +691,9 @@ define amdgpu_ps i16 @extractelement_sgpr_v8i16_sgpr_idx(ptr addrspace(4) inreg
|
||||
; GFX10-LABEL: extractelement_sgpr_v8i16_sgpr_idx:
|
||||
; GFX10: ; %bb.0:
|
||||
; GFX10-NEXT: s_and_b32 s0, s4, 7
|
||||
; GFX10-NEXT: v_mov_b32_e32 v0, 0
|
||||
; GFX10-NEXT: s_lshl_b32 s0, s0, 1
|
||||
; GFX10-NEXT: s_ashr_i32 s1, s0, 31
|
||||
; GFX10-NEXT: s_add_u32 s0, s2, s0
|
||||
; GFX10-NEXT: s_addc_u32 s1, s3, s1
|
||||
; GFX10-NEXT: global_load_ushort v0, v0, s[0:1]
|
||||
; GFX10-NEXT: v_mov_b32_e32 v0, s0
|
||||
; GFX10-NEXT: global_load_ushort v0, v0, s[2:3]
|
||||
; GFX10-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX10-NEXT: v_readfirstlane_b32 s0, v0
|
||||
; GFX10-NEXT: ; return to shader part epilog
|
||||
@ -741,13 +701,10 @@ define amdgpu_ps i16 @extractelement_sgpr_v8i16_sgpr_idx(ptr addrspace(4) inreg
|
||||
; GFX11-LABEL: extractelement_sgpr_v8i16_sgpr_idx:
|
||||
; GFX11: ; %bb.0:
|
||||
; GFX11-NEXT: s_and_b32 s0, s4, 7
|
||||
; GFX11-NEXT: v_mov_b32_e32 v0, 0
|
||||
; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
|
||||
; GFX11-NEXT: s_lshl_b32 s0, s0, 1
|
||||
; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
|
||||
; GFX11-NEXT: s_ashr_i32 s1, s0, 31
|
||||
; GFX11-NEXT: s_add_u32 s0, s2, s0
|
||||
; GFX11-NEXT: s_addc_u32 s1, s3, s1
|
||||
; GFX11-NEXT: global_load_u16 v0, v0, s[0:1]
|
||||
; GFX11-NEXT: v_mov_b32_e32 v0, s0
|
||||
; GFX11-NEXT: global_load_u16 v0, v0, s[2:3]
|
||||
; GFX11-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX11-NEXT: v_readfirstlane_b32 s0, v0
|
||||
; GFX11-NEXT: ; return to shader part epilog
|
||||
@ -760,8 +717,8 @@ define amdgpu_ps i16 @extractelement_vgpr_v8i16_sgpr_idx(ptr addrspace(1) %ptr,
|
||||
; GFX9-LABEL: extractelement_vgpr_v8i16_sgpr_idx:
|
||||
; GFX9: ; %bb.0:
|
||||
; GFX9-NEXT: s_and_b32 s0, s2, 7
|
||||
; GFX9-NEXT: s_mov_b32 s1, 0
|
||||
; GFX9-NEXT: s_lshl_b32 s0, s0, 1
|
||||
; GFX9-NEXT: s_ashr_i32 s1, s0, 31
|
||||
; GFX9-NEXT: v_mov_b32_e32 v3, s1
|
||||
; GFX9-NEXT: v_mov_b32_e32 v2, s0
|
||||
; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2
|
||||
@ -774,8 +731,8 @@ define amdgpu_ps i16 @extractelement_vgpr_v8i16_sgpr_idx(ptr addrspace(1) %ptr,
|
||||
; GFX8-LABEL: extractelement_vgpr_v8i16_sgpr_idx:
|
||||
; GFX8: ; %bb.0:
|
||||
; GFX8-NEXT: s_and_b32 s0, s2, 7
|
||||
; GFX8-NEXT: s_mov_b32 s1, 0
|
||||
; GFX8-NEXT: s_lshl_b32 s0, s0, 1
|
||||
; GFX8-NEXT: s_ashr_i32 s1, s0, 31
|
||||
; GFX8-NEXT: v_mov_b32_e32 v3, s1
|
||||
; GFX8-NEXT: v_mov_b32_e32 v2, s0
|
||||
; GFX8-NEXT: v_add_u32_e32 v0, vcc, v0, v2
|
||||
@ -788,10 +745,10 @@ define amdgpu_ps i16 @extractelement_vgpr_v8i16_sgpr_idx(ptr addrspace(1) %ptr,
|
||||
; GFX7-LABEL: extractelement_vgpr_v8i16_sgpr_idx:
|
||||
; GFX7: ; %bb.0:
|
||||
; GFX7-NEXT: s_and_b32 s0, s2, 7
|
||||
; GFX7-NEXT: s_mov_b32 s1, 0
|
||||
; GFX7-NEXT: s_lshl_b32 s0, s0, 1
|
||||
; GFX7-NEXT: s_ashr_i32 s1, s0, 31
|
||||
; GFX7-NEXT: s_mov_b32 s2, 0
|
||||
; GFX7-NEXT: s_mov_b32 s3, 0xf000
|
||||
; GFX7-NEXT: s_mov_b32 s2, s1
|
||||
; GFX7-NEXT: buffer_load_ushort v0, v[0:1], s[0:3], 0 addr64
|
||||
; GFX7-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX7-NEXT: v_readfirstlane_b32 s0, v0
|
||||
@ -800,8 +757,8 @@ define amdgpu_ps i16 @extractelement_vgpr_v8i16_sgpr_idx(ptr addrspace(1) %ptr,
|
||||
; GFX10-LABEL: extractelement_vgpr_v8i16_sgpr_idx:
|
||||
; GFX10: ; %bb.0:
|
||||
; GFX10-NEXT: s_and_b32 s0, s2, 7
|
||||
; GFX10-NEXT: s_mov_b32 s1, 0
|
||||
; GFX10-NEXT: s_lshl_b32 s0, s0, 1
|
||||
; GFX10-NEXT: s_ashr_i32 s1, s0, 31
|
||||
; GFX10-NEXT: v_mov_b32_e32 v3, s1
|
||||
; GFX10-NEXT: v_mov_b32_e32 v2, s0
|
||||
; GFX10-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
|
||||
@ -814,9 +771,8 @@ define amdgpu_ps i16 @extractelement_vgpr_v8i16_sgpr_idx(ptr addrspace(1) %ptr,
|
||||
; GFX11-LABEL: extractelement_vgpr_v8i16_sgpr_idx:
|
||||
; GFX11: ; %bb.0:
|
||||
; GFX11-NEXT: s_and_b32 s0, s2, 7
|
||||
; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
|
||||
; GFX11-NEXT: s_mov_b32 s1, 0
|
||||
; GFX11-NEXT: s_lshl_b32 s0, s0, 1
|
||||
; GFX11-NEXT: s_ashr_i32 s1, s0, 31
|
||||
; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_1)
|
||||
; GFX11-NEXT: v_dual_mov_b32 v3, s1 :: v_dual_mov_b32 v2, s0
|
||||
; GFX11-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
|
||||
@ -837,9 +793,8 @@ define i16 @extractelement_vgpr_v8i16_vgpr_idx(ptr addrspace(1) %ptr, i32 %idx)
|
||||
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX9-NEXT: v_and_b32_e32 v2, 7, v2
|
||||
; GFX9-NEXT: v_lshlrev_b32_e32 v2, 1, v2
|
||||
; GFX9-NEXT: v_ashrrev_i32_e32 v3, 31, v2
|
||||
; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2
|
||||
; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, v1, v3, vcc
|
||||
; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
|
||||
; GFX9-NEXT: global_load_ushort v0, v[0:1], off
|
||||
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX9-NEXT: s_setpc_b64 s[30:31]
|
||||
@ -849,9 +804,8 @@ define i16 @extractelement_vgpr_v8i16_vgpr_idx(ptr addrspace(1) %ptr, i32 %idx)
|
||||
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX8-NEXT: v_and_b32_e32 v2, 7, v2
|
||||
; GFX8-NEXT: v_lshlrev_b32_e32 v2, 1, v2
|
||||
; GFX8-NEXT: v_ashrrev_i32_e32 v3, 31, v2
|
||||
; GFX8-NEXT: v_add_u32_e32 v0, vcc, v0, v2
|
||||
; GFX8-NEXT: v_addc_u32_e32 v1, vcc, v1, v3, vcc
|
||||
; GFX8-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
|
||||
; GFX8-NEXT: flat_load_ushort v0, v[0:1]
|
||||
; GFX8-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX8-NEXT: s_setpc_b64 s[30:31]
|
||||
@ -861,9 +815,8 @@ define i16 @extractelement_vgpr_v8i16_vgpr_idx(ptr addrspace(1) %ptr, i32 %idx)
|
||||
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX7-NEXT: v_and_b32_e32 v2, 7, v2
|
||||
; GFX7-NEXT: v_lshlrev_b32_e32 v2, 1, v2
|
||||
; GFX7-NEXT: v_ashrrev_i32_e32 v3, 31, v2
|
||||
; GFX7-NEXT: v_add_i32_e32 v0, vcc, v0, v2
|
||||
; GFX7-NEXT: v_addc_u32_e32 v1, vcc, v1, v3, vcc
|
||||
; GFX7-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
|
||||
; GFX7-NEXT: s_mov_b32 s6, 0
|
||||
; GFX7-NEXT: s_mov_b32 s7, 0xf000
|
||||
; GFX7-NEXT: s_mov_b64 s[4:5], 0
|
||||
@ -876,9 +829,8 @@ define i16 @extractelement_vgpr_v8i16_vgpr_idx(ptr addrspace(1) %ptr, i32 %idx)
|
||||
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX10-NEXT: v_and_b32_e32 v2, 7, v2
|
||||
; GFX10-NEXT: v_lshlrev_b32_e32 v2, 1, v2
|
||||
; GFX10-NEXT: v_ashrrev_i32_e32 v3, 31, v2
|
||||
; GFX10-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
|
||||
; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo
|
||||
; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
|
||||
; GFX10-NEXT: global_load_ushort v0, v[0:1], off
|
||||
; GFX10-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX10-NEXT: s_setpc_b64 s[30:31]
|
||||
@ -889,10 +841,8 @@ define i16 @extractelement_vgpr_v8i16_vgpr_idx(ptr addrspace(1) %ptr, i32 %idx)
|
||||
; GFX11-NEXT: v_and_b32_e32 v2, 7, v2
|
||||
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
|
||||
; GFX11-NEXT: v_lshlrev_b32_e32 v2, 1, v2
|
||||
; GFX11-NEXT: v_ashrrev_i32_e32 v3, 31, v2
|
||||
; GFX11-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
|
||||
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2)
|
||||
; GFX11-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo
|
||||
; GFX11-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
|
||||
; GFX11-NEXT: global_load_u16 v0, v[0:1], off
|
||||
; GFX11-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX11-NEXT: s_setpc_b64 s[30:31]
|
||||
@ -905,13 +855,8 @@ define amdgpu_ps i16 @extractelement_sgpr_v8i16_vgpr_idx(ptr addrspace(4) inreg
|
||||
; GFX9-LABEL: extractelement_sgpr_v8i16_vgpr_idx:
|
||||
; GFX9: ; %bb.0:
|
||||
; GFX9-NEXT: v_and_b32_e32 v0, 7, v0
|
||||
; GFX9-NEXT: v_lshlrev_b32_e32 v2, 1, v0
|
||||
; GFX9-NEXT: v_mov_b32_e32 v0, s2
|
||||
; GFX9-NEXT: v_ashrrev_i32_e32 v3, 31, v2
|
||||
; GFX9-NEXT: v_mov_b32_e32 v1, s3
|
||||
; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2
|
||||
; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, v1, v3, vcc
|
||||
; GFX9-NEXT: global_load_ushort v0, v[0:1], off
|
||||
; GFX9-NEXT: v_lshlrev_b32_e32 v0, 1, v0
|
||||
; GFX9-NEXT: global_load_ushort v0, v0, s[2:3]
|
||||
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX9-NEXT: v_readfirstlane_b32 s0, v0
|
||||
; GFX9-NEXT: ; return to shader part epilog
|
||||
@ -921,10 +866,9 @@ define amdgpu_ps i16 @extractelement_sgpr_v8i16_vgpr_idx(ptr addrspace(4) inreg
|
||||
; GFX8-NEXT: v_and_b32_e32 v0, 7, v0
|
||||
; GFX8-NEXT: v_lshlrev_b32_e32 v2, 1, v0
|
||||
; GFX8-NEXT: v_mov_b32_e32 v0, s2
|
||||
; GFX8-NEXT: v_ashrrev_i32_e32 v3, 31, v2
|
||||
; GFX8-NEXT: v_mov_b32_e32 v1, s3
|
||||
; GFX8-NEXT: v_add_u32_e32 v0, vcc, v0, v2
|
||||
; GFX8-NEXT: v_addc_u32_e32 v1, vcc, v1, v3, vcc
|
||||
; GFX8-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
|
||||
; GFX8-NEXT: flat_load_ushort v0, v[0:1]
|
||||
; GFX8-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX8-NEXT: v_readfirstlane_b32 s0, v0
|
||||
@ -933,10 +877,10 @@ define amdgpu_ps i16 @extractelement_sgpr_v8i16_vgpr_idx(ptr addrspace(4) inreg
|
||||
; GFX7-LABEL: extractelement_sgpr_v8i16_vgpr_idx:
|
||||
; GFX7: ; %bb.0:
|
||||
; GFX7-NEXT: v_and_b32_e32 v0, 7, v0
|
||||
; GFX7-NEXT: v_lshlrev_b32_e32 v0, 1, v0
|
||||
; GFX7-NEXT: s_mov_b32 s0, s2
|
||||
; GFX7-NEXT: s_mov_b32 s1, s3
|
||||
; GFX7-NEXT: v_ashrrev_i32_e32 v1, 31, v0
|
||||
; GFX7-NEXT: v_lshlrev_b32_e32 v0, 1, v0
|
||||
; GFX7-NEXT: v_mov_b32_e32 v1, 0
|
||||
; GFX7-NEXT: s_mov_b32 s2, 0
|
||||
; GFX7-NEXT: s_mov_b32 s3, 0xf000
|
||||
; GFX7-NEXT: buffer_load_ushort v0, v[0:1], s[0:3], 0 addr64
|
||||
@ -947,13 +891,8 @@ define amdgpu_ps i16 @extractelement_sgpr_v8i16_vgpr_idx(ptr addrspace(4) inreg
|
||||
; GFX10-LABEL: extractelement_sgpr_v8i16_vgpr_idx:
|
||||
; GFX10: ; %bb.0:
|
||||
; GFX10-NEXT: v_and_b32_e32 v0, 7, v0
|
||||
; GFX10-NEXT: v_lshlrev_b32_e32 v2, 1, v0
|
||||
; GFX10-NEXT: v_mov_b32_e32 v0, s2
|
||||
; GFX10-NEXT: v_mov_b32_e32 v1, s3
|
||||
; GFX10-NEXT: v_ashrrev_i32_e32 v3, 31, v2
|
||||
; GFX10-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
|
||||
; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo
|
||||
; GFX10-NEXT: global_load_ushort v0, v[0:1], off
|
||||
; GFX10-NEXT: v_lshlrev_b32_e32 v0, 1, v0
|
||||
; GFX10-NEXT: global_load_ushort v0, v0, s[2:3]
|
||||
; GFX10-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX10-NEXT: v_readfirstlane_b32 s0, v0
|
||||
; GFX10-NEXT: ; return to shader part epilog
|
||||
@ -961,14 +900,9 @@ define amdgpu_ps i16 @extractelement_sgpr_v8i16_vgpr_idx(ptr addrspace(4) inreg
|
||||
; GFX11-LABEL: extractelement_sgpr_v8i16_vgpr_idx:
|
||||
; GFX11: ; %bb.0:
|
||||
; GFX11-NEXT: v_and_b32_e32 v0, 7, v0
|
||||
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
|
||||
; GFX11-NEXT: v_lshlrev_b32_e32 v2, 1, v0
|
||||
; GFX11-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3
|
||||
; GFX11-NEXT: v_ashrrev_i32_e32 v3, 31, v2
|
||||
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
|
||||
; GFX11-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
|
||||
; GFX11-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo
|
||||
; GFX11-NEXT: global_load_u16 v0, v[0:1], off
|
||||
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
|
||||
; GFX11-NEXT: v_lshlrev_b32_e32 v0, 1, v0
|
||||
; GFX11-NEXT: global_load_u16 v0, v0, s[2:3]
|
||||
; GFX11-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX11-NEXT: v_readfirstlane_b32 s0, v0
|
||||
; GFX11-NEXT: ; return to shader part epilog
|
||||
|
@ -205,210 +205,3 @@ body: |
|
||||
%2:_(<2 x p3>) = G_PTR_ADD %0, %1
|
||||
$vgpr0_vgpr1 = COPY %2
|
||||
...
|
||||
|
||||
---
|
||||
name: test_gep_global_s16_idx
|
||||
body: |
|
||||
bb.0:
|
||||
liveins: $vgpr0_vgpr1, $vgpr2
|
||||
|
||||
; CHECK-LABEL: name: test_gep_global_s16_idx
|
||||
; CHECK: liveins: $vgpr0_vgpr1, $vgpr2
|
||||
; CHECK-NEXT: {{ $}}
|
||||
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
|
||||
; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2
|
||||
; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[COPY1]](s32)
|
||||
; CHECK-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s64) = G_SEXT_INREG [[ANYEXT]], 16
|
||||
; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[SEXT_INREG]](s64)
|
||||
; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[PTR_ADD]](p1)
|
||||
%0:_(p1) = COPY $vgpr0_vgpr1
|
||||
%1:_(s32) = COPY $vgpr2
|
||||
%2:_(s16) = G_TRUNC %1
|
||||
%3:_(p1) = G_PTR_ADD %0, %2
|
||||
$vgpr0_vgpr1 = COPY %3
|
||||
...
|
||||
|
||||
---
|
||||
name: test_gep_global_s32_idx
|
||||
body: |
|
||||
bb.0:
|
||||
liveins: $vgpr0_vgpr1, $vgpr2
|
||||
|
||||
; CHECK-LABEL: name: test_gep_global_s32_idx
|
||||
; CHECK: liveins: $vgpr0_vgpr1, $vgpr2
|
||||
; CHECK-NEXT: {{ $}}
|
||||
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
|
||||
; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2
|
||||
; CHECK-NEXT: [[SEXT:%[0-9]+]]:_(s64) = G_SEXT [[COPY1]](s32)
|
||||
; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[SEXT]](s64)
|
||||
; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[PTR_ADD]](p1)
|
||||
%0:_(p1) = COPY $vgpr0_vgpr1
|
||||
%1:_(s32) = COPY $vgpr2
|
||||
%2:_(p1) = G_PTR_ADD %0, %1
|
||||
$vgpr0_vgpr1 = COPY %2
|
||||
...
|
||||
|
||||
---
|
||||
name: test_gep_global_s96_idx
|
||||
body: |
|
||||
bb.0:
|
||||
liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4
|
||||
|
||||
; CHECK-LABEL: name: test_gep_global_s96_idx
|
||||
; CHECK: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4
|
||||
; CHECK-NEXT: {{ $}}
|
||||
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
|
||||
; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s96) = COPY $vgpr2_vgpr3_vgpr4
|
||||
; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s64) = G_TRUNC [[COPY1]](s96)
|
||||
; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[TRUNC]](s64)
|
||||
; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[PTR_ADD]](p1)
|
||||
%0:_(p1) = COPY $vgpr0_vgpr1
|
||||
%1:_(s96) = COPY $vgpr2_vgpr3_vgpr4
|
||||
%2:_(p1) = G_PTR_ADD %0, %1
|
||||
$vgpr0_vgpr1 = COPY %2
|
||||
...
|
||||
|
||||
---
|
||||
name: test_gep_local_i16_idx
|
||||
body: |
|
||||
bb.0:
|
||||
liveins: $vgpr0, $vgpr1
|
||||
|
||||
; CHECK-LABEL: name: test_gep_local_i16_idx
|
||||
; CHECK: liveins: $vgpr0, $vgpr1
|
||||
; CHECK-NEXT: {{ $}}
|
||||
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
|
||||
; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
|
||||
; CHECK-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY1]], 16
|
||||
; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[SEXT_INREG]](s32)
|
||||
; CHECK-NEXT: $vgpr0 = COPY [[PTR_ADD]](p3)
|
||||
%0:_(p3) = COPY $vgpr0
|
||||
%1:_(s32) = COPY $vgpr1
|
||||
%2:_(s16) = G_TRUNC %1
|
||||
%3:_(p3) = G_PTR_ADD %0, %2
|
||||
$vgpr0 = COPY %3
|
||||
...
|
||||
|
||||
---
|
||||
name: test_gep_local_i64_idx
|
||||
body: |
|
||||
bb.0:
|
||||
liveins: $vgpr0, $vgpr1_vgpr2
|
||||
|
||||
; CHECK-LABEL: name: test_gep_local_i64_idx
|
||||
; CHECK: liveins: $vgpr0, $vgpr1_vgpr2
|
||||
; CHECK-NEXT: {{ $}}
|
||||
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
|
||||
; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr1_vgpr2
|
||||
; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY1]](s64)
|
||||
; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[TRUNC]](s32)
|
||||
; CHECK-NEXT: $vgpr0 = COPY [[PTR_ADD]](p3)
|
||||
%0:_(p3) = COPY $vgpr0
|
||||
%1:_(s64) = COPY $vgpr1_vgpr2
|
||||
%2:_(p3) = G_PTR_ADD %0, %1
|
||||
$vgpr0 = COPY %2
|
||||
...
|
||||
|
||||
---
|
||||
name: test_gep_v2p1_v2i32
|
||||
body: |
|
||||
bb.0:
|
||||
liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4_vgpr5
|
||||
|
||||
; CHECK-LABEL: name: test_gep_v2p1_v2i32
|
||||
; CHECK: liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4_vgpr5
|
||||
; CHECK-NEXT: {{ $}}
|
||||
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x p1>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
|
||||
; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr4_vgpr5
|
||||
; CHECK-NEXT: [[UV:%[0-9]+]]:_(p1), [[UV1:%[0-9]+]]:_(p1) = G_UNMERGE_VALUES [[COPY]](<2 x p1>)
|
||||
; CHECK-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>)
|
||||
; CHECK-NEXT: [[SEXT:%[0-9]+]]:_(s64) = G_SEXT [[UV2]](s32)
|
||||
; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[UV]], [[SEXT]](s64)
|
||||
; CHECK-NEXT: [[SEXT1:%[0-9]+]]:_(s64) = G_SEXT [[UV3]](s32)
|
||||
; CHECK-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[UV1]], [[SEXT1]](s64)
|
||||
; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x p1>) = G_BUILD_VECTOR [[PTR_ADD]](p1), [[PTR_ADD1]](p1)
|
||||
; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x p1>)
|
||||
%0:_(<2 x p1>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
|
||||
%1:_(<2 x s32>) = COPY $vgpr4_vgpr5
|
||||
%2:_(<2 x p1>) = G_PTR_ADD %0, %1
|
||||
$vgpr0_vgpr1_vgpr2_vgpr3 = COPY %2
|
||||
...
|
||||
|
||||
---
|
||||
name: test_gep_v2p1_v2i96
|
||||
body: |
|
||||
bb.0:
|
||||
liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4_vgpr5_vgpr6, $vgpr7_vgpr8_vgpr9
|
||||
|
||||
; CHECK-LABEL: name: test_gep_v2p1_v2i96
|
||||
; CHECK: liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4_vgpr5_vgpr6, $vgpr7_vgpr8_vgpr9
|
||||
; CHECK-NEXT: {{ $}}
|
||||
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x p1>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
|
||||
; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s96) = COPY $vgpr4_vgpr5_vgpr6
|
||||
; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY $vgpr7_vgpr8_vgpr9
|
||||
; CHECK-NEXT: [[UV:%[0-9]+]]:_(p1), [[UV1:%[0-9]+]]:_(p1) = G_UNMERGE_VALUES [[COPY]](<2 x p1>)
|
||||
; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s64) = G_TRUNC [[COPY1]](s96)
|
||||
; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[UV]], [[TRUNC]](s64)
|
||||
; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s64) = G_TRUNC [[COPY2]](s96)
|
||||
; CHECK-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[UV1]], [[TRUNC1]](s64)
|
||||
; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x p1>) = G_BUILD_VECTOR [[PTR_ADD]](p1), [[PTR_ADD1]](p1)
|
||||
; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x p1>)
|
||||
%0:_(<2 x p1>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
|
||||
%1:_(s96) = COPY $vgpr4_vgpr5_vgpr6
|
||||
%2:_(s96) = COPY $vgpr7_vgpr8_vgpr9
|
||||
%3:_(<2 x s96>) = G_BUILD_VECTOR %1, %2
|
||||
%4:_(<2 x p1>) = G_PTR_ADD %0, %3
|
||||
$vgpr0_vgpr1_vgpr2_vgpr3 = COPY %4
|
||||
...
|
||||
|
||||
---
|
||||
name: test_gep_v2p3_v2s16
|
||||
body: |
|
||||
bb.0:
|
||||
liveins: $vgpr0_vgpr1, $vgpr2
|
||||
|
||||
; CHECK-LABEL: name: test_gep_v2p3_v2s16
|
||||
; CHECK: liveins: $vgpr0_vgpr1, $vgpr2
|
||||
; CHECK-NEXT: {{ $}}
|
||||
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x p3>) = COPY $vgpr0_vgpr1
|
||||
; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2
|
||||
; CHECK-NEXT: [[UV:%[0-9]+]]:_(p3), [[UV1:%[0-9]+]]:_(p3) = G_UNMERGE_VALUES [[COPY]](<2 x p3>)
|
||||
; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>)
|
||||
; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
|
||||
; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
|
||||
; CHECK-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[BITCAST]], 16
|
||||
; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[UV]], [[SEXT_INREG]](s32)
|
||||
; CHECK-NEXT: [[SEXT_INREG1:%[0-9]+]]:_(s32) = G_SEXT_INREG [[LSHR]], 16
|
||||
; CHECK-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[UV1]], [[SEXT_INREG1]](s32)
|
||||
; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x p3>) = G_BUILD_VECTOR [[PTR_ADD]](p3), [[PTR_ADD1]](p3)
|
||||
; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x p3>)
|
||||
%0:_(<2 x p3>) = COPY $vgpr0_vgpr1
|
||||
%1:_(<2 x s16>) = COPY $vgpr2
|
||||
%2:_(<2 x p3>) = G_PTR_ADD %0, %1
|
||||
$vgpr0_vgpr1 = COPY %2
|
||||
...
|
||||
|
||||
---
|
||||
name: test_gep_v2p3_v2s64
|
||||
body: |
|
||||
bb.0:
|
||||
liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5
|
||||
|
||||
; CHECK-LABEL: name: test_gep_v2p3_v2s64
|
||||
; CHECK: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5
|
||||
; CHECK-NEXT: {{ $}}
|
||||
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x p3>) = COPY $vgpr0_vgpr1
|
||||
; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5
|
||||
; CHECK-NEXT: [[UV:%[0-9]+]]:_(p3), [[UV1:%[0-9]+]]:_(p3) = G_UNMERGE_VALUES [[COPY]](<2 x p3>)
|
||||
; CHECK-NEXT: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY1]](<2 x s64>)
|
||||
; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[UV2]](s64)
|
||||
; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[UV]], [[TRUNC]](s32)
|
||||
; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s32) = G_TRUNC [[UV3]](s64)
|
||||
; CHECK-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[UV1]], [[TRUNC1]](s32)
|
||||
; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x p3>) = G_BUILD_VECTOR [[PTR_ADD]](p3), [[PTR_ADD1]](p3)
|
||||
; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x p3>)
|
||||
%0:_(<2 x p3>) = COPY $vgpr0_vgpr1
|
||||
%1:_(<2 x s64>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5
|
||||
%2:_(<2 x p3>) = G_PTR_ADD %0, %1
|
||||
$vgpr0_vgpr1 = COPY %2
|
||||
...
|
||||
|
@ -9,7 +9,6 @@
|
||||
define void @test_load_store_64_novfp() #1 { ret void }
|
||||
|
||||
define void @test_gep_s32() { ret void }
|
||||
define void @test_gep_s16() { ret void }
|
||||
|
||||
attributes #0 = { "target-features"="+vfp2" }
|
||||
attributes #1 = { "target-features"="-vfp2sp" }
|
||||
@ -211,30 +210,3 @@ body: |
|
||||
$r0 = COPY %2(p0)
|
||||
BX_RET 14, $noreg, implicit $r0
|
||||
...
|
||||
---
|
||||
name: test_gep_s16
|
||||
# CHECK-LABEL: name: test_gep_s16
|
||||
legalized: false
|
||||
# CHECK: legalized: true
|
||||
regBankSelected: false
|
||||
selected: false
|
||||
tracksRegLiveness: true
|
||||
registers:
|
||||
- { id: 0, class: _ }
|
||||
- { id: 1, class: _ }
|
||||
- { id: 2, class: _ }
|
||||
body: |
|
||||
bb.0:
|
||||
liveins: $r0
|
||||
|
||||
%0(p0) = COPY $r0
|
||||
%1(s16) = G_LOAD %0(p0) :: (load (s16))
|
||||
|
||||
; CHECK-NOT: G_PTR_ADD {{%[0-9]+}}, {{%[0-9]+}}(s16)
|
||||
; CHECK: {{%[0-9]+}}:_(p0) = G_PTR_ADD {{%[0-9]+}}, {{%[0-9]+}}(s32)
|
||||
; CHECK-NOT: G_PTR_ADD {{%[0-9]+}}, {{%[0-9]+}}(s16)
|
||||
%2(p0) = G_PTR_ADD %0, %1(s16)
|
||||
|
||||
$r0 = COPY %2(p0)
|
||||
BX_RET 14, $noreg, implicit $r0
|
||||
...
|
||||
|
55
llvm/test/CodeGen/X86/GlobalISel/legalize-ptr-add-32.mir
Normal file
55
llvm/test/CodeGen/X86/GlobalISel/legalize-ptr-add-32.mir
Normal file
@ -0,0 +1,55 @@
|
||||
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
|
||||
# RUN: llc -mtriple=i386-linux-gnu -run-pass=legalizer %s -o - | FileCheck %s --check-prefixes=CHECK
|
||||
|
||||
--- |
|
||||
define void @test_gep_i32c(ptr %addr) {
|
||||
%arrayidx = getelementptr i32, ptr undef, i32 5
|
||||
ret void
|
||||
}
|
||||
define void @test_gep_i32(ptr %addr, i32 %ofs) {
|
||||
%arrayidx = getelementptr i32, ptr undef, i32 %ofs
|
||||
ret void
|
||||
}
|
||||
...
|
||||
---
|
||||
name: test_gep_i32c
|
||||
legalized: false
|
||||
registers:
|
||||
- { id: 0, class: _ }
|
||||
- { id: 1, class: _ }
|
||||
- { id: 2, class: _ }
|
||||
body: |
|
||||
bb.1 (%ir-block.0):
|
||||
; CHECK-LABEL: name: test_gep_i32c
|
||||
; CHECK: [[DEF:%[0-9]+]]:_(p0) = IMPLICIT_DEF
|
||||
; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
|
||||
; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[DEF]], [[C]](s32)
|
||||
; CHECK-NEXT: G_STORE [[PTR_ADD]](p0), [[DEF]](p0) :: (store (p0) into %ir.addr)
|
||||
; CHECK-NEXT: RET 0
|
||||
%0(p0) = IMPLICIT_DEF
|
||||
%1(s32) = G_CONSTANT i32 20
|
||||
%2(p0) = G_PTR_ADD %0, %1(s32)
|
||||
G_STORE %2, %0 :: (store (p0) into %ir.addr)
|
||||
RET 0
|
||||
...
|
||||
---
|
||||
name: test_gep_i32
|
||||
legalized: false
|
||||
registers:
|
||||
- { id: 0, class: _ }
|
||||
- { id: 1, class: _ }
|
||||
- { id: 2, class: _ }
|
||||
body: |
|
||||
bb.1 (%ir-block.0):
|
||||
; CHECK-LABEL: name: test_gep_i32
|
||||
; CHECK: [[DEF:%[0-9]+]]:_(p0) = IMPLICIT_DEF
|
||||
; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(s32) = IMPLICIT_DEF
|
||||
; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[DEF]], [[DEF1]](s32)
|
||||
; CHECK-NEXT: G_STORE [[PTR_ADD]](p0), [[DEF]](p0) :: (store (p0) into %ir.addr)
|
||||
; CHECK-NEXT: RET 0
|
||||
%0(p0) = IMPLICIT_DEF
|
||||
%1(s32) = IMPLICIT_DEF
|
||||
%2(p0) = G_PTR_ADD %0, %1(s32)
|
||||
G_STORE %2, %0 :: (store (p0) into %ir.addr)
|
||||
RET 0
|
||||
...
|
55
llvm/test/CodeGen/X86/GlobalISel/legalize-ptr-add-64.mir
Normal file
55
llvm/test/CodeGen/X86/GlobalISel/legalize-ptr-add-64.mir
Normal file
@ -0,0 +1,55 @@
|
||||
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
|
||||
# RUN: llc -mtriple=x86_64-linux-gnu -run-pass=legalizer %s -o - | FileCheck %s --check-prefixes=X64
|
||||
|
||||
--- |
|
||||
define void @test_gep_i64c(ptr %addr) {
|
||||
%arrayidx = getelementptr i32, ptr undef, i64 5
|
||||
ret void
|
||||
}
|
||||
define void @test_gep_i64(ptr %addr, i64 %ofs) {
|
||||
%arrayidx = getelementptr i32, ptr undef, i64 %ofs
|
||||
ret void
|
||||
}
|
||||
...
|
||||
---
|
||||
name: test_gep_i64c
|
||||
legalized: false
|
||||
registers:
|
||||
- { id: 0, class: _ }
|
||||
- { id: 1, class: _ }
|
||||
- { id: 2, class: _ }
|
||||
body: |
|
||||
bb.1 (%ir-block.0):
|
||||
; X64-LABEL: name: test_gep_i64c
|
||||
; X64: [[DEF:%[0-9]+]]:_(p0) = IMPLICIT_DEF
|
||||
; X64-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 20
|
||||
; X64-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[DEF]], [[C]](s64)
|
||||
; X64-NEXT: G_STORE [[PTR_ADD]](p0), [[DEF]](p0) :: (store (p0) into %ir.addr)
|
||||
; X64-NEXT: RET 0
|
||||
%0(p0) = IMPLICIT_DEF
|
||||
%1(s64) = G_CONSTANT i64 20
|
||||
%2(p0) = G_PTR_ADD %0, %1(s64)
|
||||
G_STORE %2, %0 :: (store (p0) into %ir.addr)
|
||||
RET 0
|
||||
...
|
||||
---
|
||||
name: test_gep_i64
|
||||
legalized: false
|
||||
registers:
|
||||
- { id: 0, class: _ }
|
||||
- { id: 1, class: _ }
|
||||
- { id: 2, class: _ }
|
||||
body: |
|
||||
bb.1 (%ir-block.0):
|
||||
; X64-LABEL: name: test_gep_i64
|
||||
; X64: [[DEF:%[0-9]+]]:_(p0) = IMPLICIT_DEF
|
||||
; X64-NEXT: [[DEF1:%[0-9]+]]:_(s64) = IMPLICIT_DEF
|
||||
; X64-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[DEF]], [[DEF1]](s64)
|
||||
; X64-NEXT: G_STORE [[PTR_ADD]](p0), [[DEF]](p0) :: (store (p0) into %ir.addr)
|
||||
; X64-NEXT: RET 0
|
||||
%0(p0) = IMPLICIT_DEF
|
||||
%1(s64) = IMPLICIT_DEF
|
||||
%2(p0) = G_PTR_ADD %0, %1(s64)
|
||||
G_STORE %2, %0 :: (store (p0) into %ir.addr)
|
||||
RET 0
|
||||
...
|
@ -1,224 +0,0 @@
|
||||
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
|
||||
# RUN: llc -mtriple=x86_64-linux-gnu -run-pass=legalizer %s -o - | FileCheck %s --check-prefixes=CHECK,X64
|
||||
# RUN: llc -mtriple=i386-linux-gnu -run-pass=legalizer %s -o - | FileCheck %s --check-prefixes=CHECK,X86
|
||||
|
||||
--- |
|
||||
define void @test_gep_i8c(ptr %addr) {
|
||||
%arrayidx = getelementptr i32, ptr undef, i8 5
|
||||
ret void
|
||||
}
|
||||
define void @test_gep_i8(ptr %addr, i8 %ofs) {
|
||||
%arrayidx = getelementptr i32, ptr undef, i8 %ofs
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @test_gep_i16c(ptr %addr) {
|
||||
%arrayidx = getelementptr i32, ptr undef, i16 5
|
||||
ret void
|
||||
}
|
||||
define void @test_gep_i16(ptr %addr, i16 %ofs) {
|
||||
%arrayidx = getelementptr i32, ptr undef, i16 %ofs
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @test_gep_i32c(ptr %addr) {
|
||||
%arrayidx = getelementptr i32, ptr undef, i32 5
|
||||
ret void
|
||||
}
|
||||
define void @test_gep_i32(ptr %addr, i32 %ofs) {
|
||||
%arrayidx = getelementptr i32, ptr undef, i32 %ofs
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @test_gep_i64c(ptr %addr) {
|
||||
%arrayidx = getelementptr i32, ptr undef, i64 5
|
||||
ret void
|
||||
}
|
||||
define void @test_gep_i64(ptr %addr, i64 %ofs) {
|
||||
%arrayidx = getelementptr i32, ptr undef, i64 %ofs
|
||||
ret void
|
||||
}
|
||||
...
|
||||
---
|
||||
name: test_gep_i8c
|
||||
legalized: false
|
||||
registers:
|
||||
- { id: 0, class: _ }
|
||||
- { id: 1, class: _ }
|
||||
- { id: 2, class: _ }
|
||||
body: |
|
||||
bb.1 (%ir-block.0):
|
||||
; CHECK-LABEL: name: test_gep_i8c
|
||||
; CHECK: [[DEF:%[0-9]+]]:_(p0) = IMPLICIT_DEF
|
||||
; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
|
||||
; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[DEF]], [[C]](s32)
|
||||
; CHECK-NEXT: G_STORE [[PTR_ADD]](p0), [[DEF]](p0) :: (store (p0) into %ir.addr)
|
||||
; CHECK-NEXT: RET 0
|
||||
%0(p0) = IMPLICIT_DEF
|
||||
%1(s8) = G_CONSTANT i8 20
|
||||
%2(p0) = G_PTR_ADD %0, %1(s8)
|
||||
G_STORE %2, %0 :: (store (p0) into %ir.addr)
|
||||
RET 0
|
||||
...
|
||||
---
|
||||
name: test_gep_i8
|
||||
legalized: false
|
||||
registers:
|
||||
- { id: 0, class: _ }
|
||||
- { id: 1, class: _ }
|
||||
- { id: 2, class: _ }
|
||||
body: |
|
||||
bb.1 (%ir-block.0):
|
||||
; CHECK-LABEL: name: test_gep_i8
|
||||
; CHECK: [[DEF:%[0-9]+]]:_(p0) = IMPLICIT_DEF
|
||||
; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(s8) = IMPLICIT_DEF
|
||||
; CHECK-NEXT: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[DEF1]](s8)
|
||||
; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[DEF]], [[SEXT]](s32)
|
||||
; CHECK-NEXT: G_STORE [[PTR_ADD]](p0), [[DEF]](p0) :: (store (p0) into %ir.addr)
|
||||
; CHECK-NEXT: RET 0
|
||||
%0(p0) = IMPLICIT_DEF
|
||||
%1(s8) = IMPLICIT_DEF
|
||||
%2(p0) = G_PTR_ADD %0, %1(s8)
|
||||
G_STORE %2, %0 :: (store (p0) into %ir.addr)
|
||||
RET 0
|
||||
...
|
||||
---
|
||||
name: test_gep_i16c
|
||||
legalized: false
|
||||
registers:
|
||||
- { id: 0, class: _ }
|
||||
- { id: 1, class: _ }
|
||||
- { id: 2, class: _ }
|
||||
body: |
|
||||
bb.1 (%ir-block.0):
|
||||
; CHECK-LABEL: name: test_gep_i16c
|
||||
; CHECK: [[DEF:%[0-9]+]]:_(p0) = IMPLICIT_DEF
|
||||
; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
|
||||
; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[DEF]], [[C]](s32)
|
||||
; CHECK-NEXT: G_STORE [[PTR_ADD]](p0), [[DEF]](p0) :: (store (p0) into %ir.addr)
|
||||
; CHECK-NEXT: RET 0
|
||||
%0(p0) = IMPLICIT_DEF
|
||||
%1(s16) = G_CONSTANT i16 20
|
||||
%2(p0) = G_PTR_ADD %0, %1(s16)
|
||||
G_STORE %2, %0 :: (store (p0) into %ir.addr)
|
||||
RET 0
|
||||
...
|
||||
---
|
||||
name: test_gep_i16
|
||||
legalized: false
|
||||
registers:
|
||||
- { id: 0, class: _ }
|
||||
- { id: 1, class: _ }
|
||||
- { id: 2, class: _ }
|
||||
body: |
|
||||
bb.1 (%ir-block.0):
|
||||
; CHECK-LABEL: name: test_gep_i16
|
||||
; CHECK: [[DEF:%[0-9]+]]:_(p0) = IMPLICIT_DEF
|
||||
; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(s16) = IMPLICIT_DEF
|
||||
; CHECK-NEXT: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[DEF1]](s16)
|
||||
; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[DEF]], [[SEXT]](s32)
|
||||
; CHECK-NEXT: G_STORE [[PTR_ADD]](p0), [[DEF]](p0) :: (store (p0) into %ir.addr)
|
||||
; CHECK-NEXT: RET 0
|
||||
%0(p0) = IMPLICIT_DEF
|
||||
%1(s16) = IMPLICIT_DEF
|
||||
%2(p0) = G_PTR_ADD %0, %1(s16)
|
||||
G_STORE %2, %0 :: (store (p0) into %ir.addr)
|
||||
RET 0
|
||||
...
|
||||
---
|
||||
name: test_gep_i32c
|
||||
legalized: false
|
||||
registers:
|
||||
- { id: 0, class: _ }
|
||||
- { id: 1, class: _ }
|
||||
- { id: 2, class: _ }
|
||||
body: |
|
||||
bb.1 (%ir-block.0):
|
||||
; CHECK-LABEL: name: test_gep_i32c
|
||||
; CHECK: [[DEF:%[0-9]+]]:_(p0) = IMPLICIT_DEF
|
||||
; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
|
||||
; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[DEF]], [[C]](s32)
|
||||
; CHECK-NEXT: G_STORE [[PTR_ADD]](p0), [[DEF]](p0) :: (store (p0) into %ir.addr)
|
||||
; CHECK-NEXT: RET 0
|
||||
%0(p0) = IMPLICIT_DEF
|
||||
%1(s32) = G_CONSTANT i32 20
|
||||
%2(p0) = G_PTR_ADD %0, %1(s32)
|
||||
G_STORE %2, %0 :: (store (p0) into %ir.addr)
|
||||
RET 0
|
||||
...
|
||||
---
|
||||
name: test_gep_i32
|
||||
legalized: false
|
||||
registers:
|
||||
- { id: 0, class: _ }
|
||||
- { id: 1, class: _ }
|
||||
- { id: 2, class: _ }
|
||||
body: |
|
||||
bb.1 (%ir-block.0):
|
||||
; CHECK-LABEL: name: test_gep_i32
|
||||
; CHECK: [[DEF:%[0-9]+]]:_(p0) = IMPLICIT_DEF
|
||||
; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(s32) = IMPLICIT_DEF
|
||||
; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[DEF]], [[DEF1]](s32)
|
||||
; CHECK-NEXT: G_STORE [[PTR_ADD]](p0), [[DEF]](p0) :: (store (p0) into %ir.addr)
|
||||
; CHECK-NEXT: RET 0
|
||||
%0(p0) = IMPLICIT_DEF
|
||||
%1(s32) = IMPLICIT_DEF
|
||||
%2(p0) = G_PTR_ADD %0, %1(s32)
|
||||
G_STORE %2, %0 :: (store (p0) into %ir.addr)
|
||||
RET 0
|
||||
...
|
||||
---
|
||||
name: test_gep_i64c
|
||||
legalized: false
|
||||
registers:
|
||||
- { id: 0, class: _ }
|
||||
- { id: 1, class: _ }
|
||||
- { id: 2, class: _ }
|
||||
body: |
|
||||
bb.1 (%ir-block.0):
|
||||
; X64-LABEL: name: test_gep_i64c
|
||||
; X64: [[DEF:%[0-9]+]]:_(p0) = IMPLICIT_DEF
|
||||
; X64-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 20
|
||||
; X64-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[DEF]], [[C]](s64)
|
||||
; X64-NEXT: G_STORE [[PTR_ADD]](p0), [[DEF]](p0) :: (store (p0) into %ir.addr)
|
||||
; X64-NEXT: RET 0
|
||||
; X86-LABEL: name: test_gep_i64c
|
||||
; X86: [[DEF:%[0-9]+]]:_(p0) = IMPLICIT_DEF
|
||||
; X86-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
|
||||
; X86-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[DEF]], [[C]](s32)
|
||||
; X86-NEXT: G_STORE [[PTR_ADD]](p0), [[DEF]](p0) :: (store (p0) into %ir.addr)
|
||||
; X86-NEXT: RET 0
|
||||
%0(p0) = IMPLICIT_DEF
|
||||
%1(s64) = G_CONSTANT i64 20
|
||||
%2(p0) = G_PTR_ADD %0, %1(s64)
|
||||
G_STORE %2, %0 :: (store (p0) into %ir.addr)
|
||||
RET 0
|
||||
...
|
||||
---
|
||||
name: test_gep_i64
|
||||
legalized: false
|
||||
registers:
|
||||
- { id: 0, class: _ }
|
||||
- { id: 1, class: _ }
|
||||
- { id: 2, class: _ }
|
||||
body: |
|
||||
bb.1 (%ir-block.0):
|
||||
; X64-LABEL: name: test_gep_i64
|
||||
; X64: [[DEF:%[0-9]+]]:_(p0) = IMPLICIT_DEF
|
||||
; X64-NEXT: [[DEF1:%[0-9]+]]:_(s64) = IMPLICIT_DEF
|
||||
; X64-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[DEF]], [[DEF1]](s64)
|
||||
; X64-NEXT: G_STORE [[PTR_ADD]](p0), [[DEF]](p0) :: (store (p0) into %ir.addr)
|
||||
; X64-NEXT: RET 0
|
||||
; X86-LABEL: name: test_gep_i64
|
||||
; X86: [[DEF:%[0-9]+]]:_(p0) = IMPLICIT_DEF
|
||||
; X86-NEXT: [[DEF1:%[0-9]+]]:_(s64) = IMPLICIT_DEF
|
||||
; X86-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[DEF1]](s64)
|
||||
; X86-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[DEF]], [[TRUNC]](s32)
|
||||
; X86-NEXT: G_STORE [[PTR_ADD]](p0), [[DEF]](p0) :: (store (p0) into %ir.addr)
|
||||
; X86-NEXT: RET 0
|
||||
%0(p0) = IMPLICIT_DEF
|
||||
%1(s64) = IMPLICIT_DEF
|
||||
%2(p0) = G_PTR_ADD %0, %1(s64)
|
||||
G_STORE %2, %0 :: (store (p0) into %ir.addr)
|
||||
RET 0
|
||||
...
|
@ -1380,23 +1380,18 @@ body: |
|
||||
bb.0 (%ir-block.0):
|
||||
; FAST-LABEL: name: test_gep
|
||||
; FAST: [[DEF:%[0-9]+]]:gpr(p0) = G_IMPLICIT_DEF
|
||||
; FAST: [[C:%[0-9]+]]:gpr(s32) = G_CONSTANT i32 20
|
||||
; FAST: [[PTR_ADD:%[0-9]+]]:gpr(p0) = G_PTR_ADD [[DEF]], [[C]](s32)
|
||||
; FAST: [[C1:%[0-9]+]]:gpr(s64) = G_CONSTANT i64 20
|
||||
; FAST: [[PTR_ADD1:%[0-9]+]]:gpr(p0) = G_PTR_ADD [[DEF]], [[C1]](s64)
|
||||
; FAST: [[C:%[0-9]+]]:gpr(s64) = G_CONSTANT i64 20
|
||||
; FAST: [[PTR_ADD:%[0-9]+]]:gpr(p0) = G_PTR_ADD [[DEF]], [[C]](s64)
|
||||
; FAST: RET 0
|
||||
;
|
||||
; GREEDY-LABEL: name: test_gep
|
||||
; GREEDY: [[DEF:%[0-9]+]]:gpr(p0) = G_IMPLICIT_DEF
|
||||
; GREEDY: [[C:%[0-9]+]]:gpr(s32) = G_CONSTANT i32 20
|
||||
; GREEDY: [[PTR_ADD:%[0-9]+]]:gpr(p0) = G_PTR_ADD [[DEF]], [[C]](s32)
|
||||
; GREEDY: [[C1:%[0-9]+]]:gpr(s64) = G_CONSTANT i64 20
|
||||
; GREEDY: [[PTR_ADD1:%[0-9]+]]:gpr(p0) = G_PTR_ADD [[DEF]], [[C1]](s64)
|
||||
; GREEDY: [[C:%[0-9]+]]:gpr(s64) = G_CONSTANT i64 20
|
||||
; GREEDY: [[PTR_ADD:%[0-9]+]]:gpr(p0) = G_PTR_ADD [[DEF]], [[C]](s64)
|
||||
; GREEDY: RET 0
|
||||
%0(p0) = G_IMPLICIT_DEF
|
||||
%1(s32) = G_CONSTANT i32 20
|
||||
%2(p0) = G_PTR_ADD %0, %1(s32)
|
||||
%3(s64) = G_CONSTANT i64 20
|
||||
%4(p0) = G_PTR_ADD %0, %3(s64)
|
||||
%1(s64) = G_CONSTANT i64 20
|
||||
%2(p0) = G_PTR_ADD %0, %1(s64)
|
||||
RET 0
|
||||
|
||||
...
|
||||
|
@ -1,4 +1,4 @@
|
||||
#RUN: not --crash llc -o - -mtriple=arm64 -run-pass=none -verify-machineinstrs %s 2>&1 | FileCheck %s
|
||||
# RUN: not --crash llc -o - -mtriple=arm64 -run-pass=none -verify-machineinstrs %s 2>&1 | FileCheck %s
|
||||
# REQUIRES: aarch64-registered-target
|
||||
|
||||
---
|
||||
@ -29,4 +29,8 @@ body: |
|
||||
; CHECK: Bad machine code: gep first operand must be a pointer
|
||||
%6:_(s64) = G_PTR_ADD %1, %1
|
||||
|
||||
%7:_(s32) = G_IMPLICIT_DEF
|
||||
|
||||
; CHECK: Bad machine code: gep offset operand must match index size for address space
|
||||
%8:_(p0) = G_PTR_ADD %0, %7
|
||||
...
|
||||
|
Loading…
x
Reference in New Issue
Block a user