This patch implements the diamond pattern where we are vectorizing toward the top of the diamond from both edges, but the second edge may use elements from a different vector or just scalar values. This requires some additional packing code (see lit test).
272 lines
11 KiB
LLVM
272 lines
11 KiB
LLVM
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
|
|
; RUN: opt -passes=sandbox-vectorizer -sbvec-vec-reg-bits=1024 -sbvec-allow-non-pow2 -sbvec-passes="bottom-up-vec<>" %s -S | FileCheck %s
|
|
|
|
define void @store_load(ptr %ptr) {
|
|
; CHECK-LABEL: define void @store_load(
|
|
; CHECK-SAME: ptr [[PTR:%.*]]) {
|
|
; CHECK-NEXT: [[PTR0:%.*]] = getelementptr float, ptr [[PTR]], i32 0
|
|
; CHECK-NEXT: [[VECL:%.*]] = load <2 x float>, ptr [[PTR0]], align 4
|
|
; CHECK-NEXT: store <2 x float> [[VECL]], ptr [[PTR0]], align 4
|
|
; CHECK-NEXT: ret void
|
|
;
|
|
%ptr0 = getelementptr float, ptr %ptr, i32 0
|
|
%ptr1 = getelementptr float, ptr %ptr, i32 1
|
|
%ld0 = load float, ptr %ptr0
|
|
%ld1 = load float, ptr %ptr1
|
|
store float %ld0, ptr %ptr0
|
|
store float %ld1, ptr %ptr1
|
|
ret void
|
|
}
|
|
|
|
|
|
define void @store_fpext_load(ptr %ptr) {
|
|
; CHECK-LABEL: define void @store_fpext_load(
|
|
; CHECK-SAME: ptr [[PTR:%.*]]) {
|
|
; CHECK-NEXT: [[PTR0:%.*]] = getelementptr float, ptr [[PTR]], i32 0
|
|
; CHECK-NEXT: [[PTRD0:%.*]] = getelementptr double, ptr [[PTR]], i32 0
|
|
; CHECK-NEXT: [[VECL:%.*]] = load <2 x float>, ptr [[PTR0]], align 4
|
|
; CHECK-NEXT: [[VCAST:%.*]] = fpext <2 x float> [[VECL]] to <2 x double>
|
|
; CHECK-NEXT: store <2 x double> [[VCAST]], ptr [[PTRD0]], align 8
|
|
; CHECK-NEXT: ret void
|
|
;
|
|
%ptr0 = getelementptr float, ptr %ptr, i32 0
|
|
%ptr1 = getelementptr float, ptr %ptr, i32 1
|
|
%ptrd0 = getelementptr double, ptr %ptr, i32 0
|
|
%ptrd1 = getelementptr double, ptr %ptr, i32 1
|
|
%ld0 = load float, ptr %ptr0
|
|
%ld1 = load float, ptr %ptr1
|
|
%fpext0 = fpext float %ld0 to double
|
|
%fpext1 = fpext float %ld1 to double
|
|
store double %fpext0, ptr %ptrd0
|
|
store double %fpext1, ptr %ptrd1
|
|
ret void
|
|
}
|
|
|
|
define void @store_fcmp_zext_load(ptr %ptr) {
|
|
; CHECK-LABEL: define void @store_fcmp_zext_load(
|
|
; CHECK-SAME: ptr [[PTR:%.*]]) {
|
|
; CHECK-NEXT: [[PTR0:%.*]] = getelementptr float, ptr [[PTR]], i32 0
|
|
; CHECK-NEXT: [[PTRB0:%.*]] = getelementptr i32, ptr [[PTR]], i32 0
|
|
; CHECK-NEXT: [[VECL1:%.*]] = load <2 x float>, ptr [[PTR0]], align 4
|
|
; CHECK-NEXT: [[VECL:%.*]] = load <2 x float>, ptr [[PTR0]], align 4
|
|
; CHECK-NEXT: [[VCMP:%.*]] = fcmp ogt <2 x float> [[VECL]], [[VECL1]]
|
|
; CHECK-NEXT: [[VCAST:%.*]] = zext <2 x i1> [[VCMP]] to <2 x i32>
|
|
; CHECK-NEXT: store <2 x i32> [[VCAST]], ptr [[PTRB0]], align 4
|
|
; CHECK-NEXT: ret void
|
|
;
|
|
%ptr0 = getelementptr float, ptr %ptr, i32 0
|
|
%ptr1 = getelementptr float, ptr %ptr, i32 1
|
|
%ptrb0 = getelementptr i32, ptr %ptr, i32 0
|
|
%ptrb1 = getelementptr i32, ptr %ptr, i32 1
|
|
%ldB0 = load float, ptr %ptr0
|
|
%ldB1 = load float, ptr %ptr1
|
|
%ldA0 = load float, ptr %ptr0
|
|
%ldA1 = load float, ptr %ptr1
|
|
%fcmp0 = fcmp ogt float %ldA0, %ldB0
|
|
%fcmp1 = fcmp ogt float %ldA1, %ldB1
|
|
%zext0 = zext i1 %fcmp0 to i32
|
|
%zext1 = zext i1 %fcmp1 to i32
|
|
store i32 %zext0, ptr %ptrb0
|
|
store i32 %zext1, ptr %ptrb1
|
|
ret void
|
|
}
|
|
|
|
define void @store_fadd_load(ptr %ptr) {
|
|
; CHECK-LABEL: define void @store_fadd_load(
|
|
; CHECK-SAME: ptr [[PTR:%.*]]) {
|
|
; CHECK-NEXT: [[PTR0:%.*]] = getelementptr float, ptr [[PTR]], i32 0
|
|
; CHECK-NEXT: [[VECL:%.*]] = load <2 x float>, ptr [[PTR0]], align 4
|
|
; CHECK-NEXT: [[VECL1:%.*]] = load <2 x float>, ptr [[PTR0]], align 4
|
|
; CHECK-NEXT: [[VEC:%.*]] = fadd <2 x float> [[VECL]], [[VECL1]]
|
|
; CHECK-NEXT: store <2 x float> [[VEC]], ptr [[PTR0]], align 4
|
|
; CHECK-NEXT: ret void
|
|
;
|
|
%ptr0 = getelementptr float, ptr %ptr, i32 0
|
|
%ptr1 = getelementptr float, ptr %ptr, i32 1
|
|
%ldA0 = load float, ptr %ptr0
|
|
%ldA1 = load float, ptr %ptr1
|
|
%ldB0 = load float, ptr %ptr0
|
|
%ldB1 = load float, ptr %ptr1
|
|
%fadd0 = fadd float %ldA0, %ldB0
|
|
%fadd1 = fadd float %ldA1, %ldB1
|
|
store float %fadd0, ptr %ptr0
|
|
store float %fadd1, ptr %ptr1
|
|
ret void
|
|
}
|
|
|
|
define void @store_fneg_load(ptr %ptr) {
|
|
; CHECK-LABEL: define void @store_fneg_load(
|
|
; CHECK-SAME: ptr [[PTR:%.*]]) {
|
|
; CHECK-NEXT: [[PTR0:%.*]] = getelementptr float, ptr [[PTR]], i32 0
|
|
; CHECK-NEXT: [[VECL:%.*]] = load <2 x float>, ptr [[PTR0]], align 4
|
|
; CHECK-NEXT: [[VEC:%.*]] = fneg <2 x float> [[VECL]]
|
|
; CHECK-NEXT: store <2 x float> [[VEC]], ptr [[PTR0]], align 4
|
|
; CHECK-NEXT: ret void
|
|
;
|
|
%ptr0 = getelementptr float, ptr %ptr, i32 0
|
|
%ptr1 = getelementptr float, ptr %ptr, i32 1
|
|
%ld0 = load float, ptr %ptr0
|
|
%ld1 = load float, ptr %ptr1
|
|
%fneg0 = fneg float %ld0
|
|
%fneg1 = fneg float %ld1
|
|
store float %fneg0, ptr %ptr0
|
|
store float %fneg1, ptr %ptr1
|
|
ret void
|
|
}
|
|
|
|
define float @scalars_with_external_uses_not_dead(ptr %ptr) {
|
|
; CHECK-LABEL: define float @scalars_with_external_uses_not_dead(
|
|
; CHECK-SAME: ptr [[PTR:%.*]]) {
|
|
; CHECK-NEXT: [[PTR0:%.*]] = getelementptr float, ptr [[PTR]], i32 0
|
|
; CHECK-NEXT: [[PTR1:%.*]] = getelementptr float, ptr [[PTR]], i32 1
|
|
; CHECK-NEXT: [[LD0:%.*]] = load float, ptr [[PTR0]], align 4
|
|
; CHECK-NEXT: [[LD1:%.*]] = load float, ptr [[PTR1]], align 4
|
|
; CHECK-NEXT: [[VECL:%.*]] = load <2 x float>, ptr [[PTR0]], align 4
|
|
; CHECK-NEXT: store <2 x float> [[VECL]], ptr [[PTR0]], align 4
|
|
; CHECK-NEXT: [[USER:%.*]] = fneg float [[LD1]]
|
|
; CHECK-NEXT: ret float [[LD0]]
|
|
;
|
|
%ptr0 = getelementptr float, ptr %ptr, i32 0
|
|
%ptr1 = getelementptr float, ptr %ptr, i32 1
|
|
%ld0 = load float, ptr %ptr0
|
|
%ld1 = load float, ptr %ptr1
|
|
store float %ld0, ptr %ptr0
|
|
store float %ld1, ptr %ptr1
|
|
%user = fneg float %ld1
|
|
ret float %ld0
|
|
}
|
|
|
|
define void @pack_scalars(ptr %ptr, ptr %ptr2) {
|
|
; CHECK-LABEL: define void @pack_scalars(
|
|
; CHECK-SAME: ptr [[PTR:%.*]], ptr [[PTR2:%.*]]) {
|
|
; CHECK-NEXT: [[PTR0:%.*]] = getelementptr float, ptr [[PTR]], i32 0
|
|
; CHECK-NEXT: [[LD0:%.*]] = load float, ptr [[PTR0]], align 4
|
|
; CHECK-NEXT: [[LD1:%.*]] = load float, ptr [[PTR2]], align 4
|
|
; CHECK-NEXT: [[PACK:%.*]] = insertelement <2 x float> poison, float [[LD0]], i32 0
|
|
; CHECK-NEXT: [[PACK1:%.*]] = insertelement <2 x float> [[PACK]], float [[LD1]], i32 1
|
|
; CHECK-NEXT: store <2 x float> [[PACK1]], ptr [[PTR0]], align 4
|
|
; CHECK-NEXT: ret void
|
|
;
|
|
%ptr0 = getelementptr float, ptr %ptr, i32 0
|
|
%ptr1 = getelementptr float, ptr %ptr, i32 1
|
|
%ld0 = load float, ptr %ptr0
|
|
%ld1 = load float, ptr %ptr2
|
|
store float %ld0, ptr %ptr0
|
|
store float %ld1, ptr %ptr1
|
|
ret void
|
|
}
|
|
|
|
declare void @foo()
|
|
define void @cant_vectorize_seeds(ptr %ptr) {
|
|
; CHECK-LABEL: define void @cant_vectorize_seeds(
|
|
; CHECK-SAME: ptr [[PTR:%.*]]) {
|
|
; CHECK-NEXT: [[PTR0:%.*]] = getelementptr float, ptr [[PTR]], i32 0
|
|
; CHECK-NEXT: [[PTR1:%.*]] = getelementptr float, ptr [[PTR]], i32 1
|
|
; CHECK-NEXT: [[LD0:%.*]] = load float, ptr [[PTR0]], align 4
|
|
; CHECK-NEXT: [[LD1:%.*]] = load float, ptr [[PTR1]], align 4
|
|
; CHECK-NEXT: store float [[LD1]], ptr [[PTR1]], align 4
|
|
; CHECK-NEXT: call void @foo()
|
|
; CHECK-NEXT: store float [[LD1]], ptr [[PTR1]], align 4
|
|
; CHECK-NEXT: ret void
|
|
;
|
|
%ptr0 = getelementptr float, ptr %ptr, i32 0
|
|
%ptr1 = getelementptr float, ptr %ptr, i32 1
|
|
%ld0 = load float, ptr %ptr0
|
|
%ld1 = load float, ptr %ptr1
|
|
store float %ld1, ptr %ptr1
|
|
call void @foo() ; This call blocks scheduling of the store seeds.
|
|
store float %ld1, ptr %ptr1
|
|
ret void
|
|
}
|
|
|
|
define void @pack_vectors(ptr %ptr, ptr %ptr2) {
|
|
; CHECK-LABEL: define void @pack_vectors(
|
|
; CHECK-SAME: ptr [[PTR:%.*]], ptr [[PTR2:%.*]]) {
|
|
; CHECK-NEXT: [[PTR0:%.*]] = getelementptr <2 x float>, ptr [[PTR]], i32 0
|
|
; CHECK-NEXT: [[LD0:%.*]] = load <2 x float>, ptr [[PTR0]], align 8
|
|
; CHECK-NEXT: [[LD1:%.*]] = load float, ptr [[PTR2]], align 4
|
|
; CHECK-NEXT: [[VPACK:%.*]] = extractelement <2 x float> [[LD0]], i32 0
|
|
; CHECK-NEXT: [[VPACK1:%.*]] = insertelement <3 x float> poison, float [[VPACK]], i32 0
|
|
; CHECK-NEXT: [[VPACK2:%.*]] = extractelement <2 x float> [[LD0]], i32 1
|
|
; CHECK-NEXT: [[VPACK3:%.*]] = insertelement <3 x float> [[VPACK1]], float [[VPACK2]], i32 1
|
|
; CHECK-NEXT: [[PACK:%.*]] = insertelement <3 x float> [[VPACK3]], float [[LD1]], i32 2
|
|
; CHECK-NEXT: store <3 x float> [[PACK]], ptr [[PTR0]], align 8
|
|
; CHECK-NEXT: ret void
|
|
;
|
|
%ptr0 = getelementptr <2 x float>, ptr %ptr, i32 0
|
|
%ptr1 = getelementptr float, ptr %ptr, i32 2
|
|
%ld0 = load <2 x float>, ptr %ptr0
|
|
%ld1 = load float, ptr %ptr2
|
|
store <2 x float> %ld0, ptr %ptr0
|
|
store float %ld1, ptr %ptr1
|
|
ret void
|
|
}
|
|
|
|
define void @diamond(ptr %ptr) {
|
|
; CHECK-LABEL: define void @diamond(
|
|
; CHECK-SAME: ptr [[PTR:%.*]]) {
|
|
; CHECK-NEXT: [[PTR0:%.*]] = getelementptr float, ptr [[PTR]], i32 0
|
|
; CHECK-NEXT: [[VECL:%.*]] = load <2 x float>, ptr [[PTR0]], align 4
|
|
; CHECK-NEXT: [[VEC:%.*]] = fsub <2 x float> [[VECL]], [[VECL]]
|
|
; CHECK-NEXT: store <2 x float> [[VEC]], ptr [[PTR0]], align 4
|
|
; CHECK-NEXT: ret void
|
|
;
|
|
%ptr0 = getelementptr float, ptr %ptr, i32 0
|
|
%ptr1 = getelementptr float, ptr %ptr, i32 1
|
|
%ld0 = load float, ptr %ptr0
|
|
%ld1 = load float, ptr %ptr1
|
|
%sub0 = fsub float %ld0, %ld0
|
|
%sub1 = fsub float %ld1, %ld1
|
|
store float %sub0, ptr %ptr0
|
|
store float %sub1, ptr %ptr1
|
|
ret void
|
|
}
|
|
|
|
define void @diamondWithShuffle(ptr %ptr) {
|
|
; CHECK-LABEL: define void @diamondWithShuffle(
|
|
; CHECK-SAME: ptr [[PTR:%.*]]) {
|
|
; CHECK-NEXT: [[PTR0:%.*]] = getelementptr float, ptr [[PTR]], i32 0
|
|
; CHECK-NEXT: [[VECL:%.*]] = load <2 x float>, ptr [[PTR0]], align 4
|
|
; CHECK-NEXT: [[VSHUF:%.*]] = shufflevector <2 x float> [[VECL]], <2 x float> [[VECL]], <2 x i32> <i32 1, i32 0>
|
|
; CHECK-NEXT: [[VEC:%.*]] = fsub <2 x float> [[VECL]], [[VSHUF]]
|
|
; CHECK-NEXT: store <2 x float> [[VEC]], ptr [[PTR0]], align 4
|
|
; CHECK-NEXT: ret void
|
|
;
|
|
%ptr0 = getelementptr float, ptr %ptr, i32 0
|
|
%ptr1 = getelementptr float, ptr %ptr, i32 1
|
|
%ld0 = load float, ptr %ptr0
|
|
%ld1 = load float, ptr %ptr1
|
|
%sub0 = fsub float %ld0, %ld1
|
|
%sub1 = fsub float %ld1, %ld0
|
|
store float %sub0, ptr %ptr0
|
|
store float %sub1, ptr %ptr1
|
|
ret void
|
|
}
|
|
|
|
define void @diamondMultiInput(ptr %ptr, ptr %ptrX) {
|
|
; CHECK-LABEL: define void @diamondMultiInput(
|
|
; CHECK-SAME: ptr [[PTR:%.*]], ptr [[PTRX:%.*]]) {
|
|
; CHECK-NEXT: [[PTR0:%.*]] = getelementptr float, ptr [[PTR]], i32 0
|
|
; CHECK-NEXT: [[VECL:%.*]] = load <2 x float>, ptr [[PTR0]], align 4
|
|
; CHECK-NEXT: [[LDX:%.*]] = load float, ptr [[PTRX]], align 4
|
|
; CHECK-NEXT: [[VINS:%.*]] = insertelement <2 x float> poison, float [[LDX]], i32 0
|
|
; CHECK-NEXT: [[VEXT:%.*]] = extractelement <2 x float> [[VECL]], i32 0
|
|
; CHECK-NEXT: [[VINS1:%.*]] = insertelement <2 x float> [[VINS]], float [[VEXT]], i32 1
|
|
; CHECK-NEXT: [[VEC:%.*]] = fsub <2 x float> [[VECL]], [[VINS1]]
|
|
; CHECK-NEXT: store <2 x float> [[VEC]], ptr [[PTR0]], align 4
|
|
; CHECK-NEXT: ret void
|
|
;
|
|
%ptr0 = getelementptr float, ptr %ptr, i32 0
|
|
%ptr1 = getelementptr float, ptr %ptr, i32 1
|
|
%ld0 = load float, ptr %ptr0
|
|
%ld1 = load float, ptr %ptr1
|
|
|
|
%ldX = load float, ptr %ptrX
|
|
|
|
%sub0 = fsub float %ld0, %ldX
|
|
%sub1 = fsub float %ld1, %ld0
|
|
store float %sub0, ptr %ptr0
|
|
store float %sub1, ptr %ptr1
|
|
ret void
|
|
}
|