The compiler should not generate subvectors with the same extractelement instructions, it may cause a crash and leads to inefficient vectorization. Fixes #174773
83 lines
6.1 KiB
LLVM
83 lines
6.1 KiB
LLVM
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 3
|
|
; RUN: opt < %s -passes=slp-vectorizer -S -mtriple=x86_64-unknown-linux-gnu | FileCheck %s
|
|
|
|
define <16 x double> @test(ptr %x, double %v, double %a) {
|
|
; CHECK-LABEL: define <16 x double> @test(
|
|
; CHECK-SAME: ptr [[X:%.*]], double [[V:%.*]], double [[A:%.*]]) {
|
|
; CHECK-NEXT: [[GEP6:%.*]] = getelementptr inbounds double, ptr [[X]], i64 8
|
|
; CHECK-NEXT: [[GEP8:%.*]] = getelementptr inbounds double, ptr [[X]], i64 9
|
|
; CHECK-NEXT: [[TMP1:%.*]] = load <6 x double>, ptr [[X]], align 4
|
|
; CHECK-NEXT: [[TMP6:%.*]] = load <2 x double>, ptr [[GEP6]], align 4
|
|
; CHECK-NEXT: [[TMP7:%.*]] = load <2 x double>, ptr [[GEP8]], align 4
|
|
; CHECK-NEXT: [[TMP4:%.*]] = insertelement <16 x double> poison, double [[A]], i32 0
|
|
; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <16 x double> [[TMP4]], <16 x double> poison, <16 x i32> zeroinitializer
|
|
; CHECK-NEXT: [[TMP2:%.*]] = insertelement <4 x double> poison, double [[V]], i32 0
|
|
; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <4 x double> [[TMP2]], <4 x double> poison, <4 x i32> zeroinitializer
|
|
; CHECK-NEXT: [[TMP8:%.*]] = insertelement <2 x double> poison, double [[V]], i32 0
|
|
; CHECK-NEXT: [[TMP9:%.*]] = shufflevector <2 x double> [[TMP8]], <2 x double> poison, <2 x i32> zeroinitializer
|
|
; CHECK-NEXT: [[TMP10:%.*]] = shufflevector <6 x double> [[TMP1]], <6 x double> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
|
|
; CHECK-NEXT: [[TMP11:%.*]] = shufflevector <4 x double> [[TMP3]], <4 x double> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
|
|
; CHECK-NEXT: [[TMP12:%.*]] = shufflevector <16 x double> [[TMP10]], <16 x double> [[TMP11]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 poison, i32 poison, i32 poison, i32 poison, i32 16, i32 17, i32 18, i32 19, i32 poison, i32 poison>
|
|
; CHECK-NEXT: [[TMP13:%.*]] = shufflevector <2 x double> [[TMP6]], <2 x double> poison, <16 x i32> <i32 0, i32 1, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
|
|
; CHECK-NEXT: [[TMP14:%.*]] = shufflevector <16 x double> [[TMP12]], <16 x double> [[TMP13]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 16, i32 17, i32 poison, i32 poison, i32 10, i32 11, i32 12, i32 13, i32 poison, i32 poison>
|
|
; CHECK-NEXT: [[TMP15:%.*]] = shufflevector <2 x double> [[TMP7]], <2 x double> poison, <16 x i32> <i32 0, i32 1, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
|
|
; CHECK-NEXT: [[TMP16:%.*]] = shufflevector <16 x double> [[TMP14]], <16 x double> [[TMP15]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 10, i32 11, i32 12, i32 13, i32 poison, i32 poison>
|
|
; CHECK-NEXT: [[TMP20:%.*]] = shufflevector <2 x double> [[TMP9]], <2 x double> poison, <16 x i32> <i32 0, i32 1, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
|
|
; CHECK-NEXT: [[TMP17:%.*]] = shufflevector <16 x double> [[TMP16]], <16 x double> [[TMP20]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 16, i32 17>
|
|
; CHECK-NEXT: [[TMP18:%.*]] = fadd <16 x double> [[TMP5]], [[TMP17]]
|
|
; CHECK-NEXT: ret <16 x double> [[TMP18]]
|
|
;
|
|
%gep1 = getelementptr inbounds double, ptr %x, i64 1
|
|
%gep2 = getelementptr inbounds double, ptr %x, i64 2
|
|
%gep3 = getelementptr inbounds double, ptr %x, i64 3
|
|
%gep4 = getelementptr inbounds double, ptr %x, i64 4
|
|
%gep5 = getelementptr inbounds double, ptr %x, i64 5
|
|
%gep6 = getelementptr inbounds double, ptr %x, i64 8
|
|
%gep7 = getelementptr inbounds double, ptr %x, i64 9
|
|
%gep8 = getelementptr inbounds double, ptr %x, i64 9
|
|
%gep9 = getelementptr inbounds double, ptr %x, i64 10
|
|
%x0 = load double, ptr %x, align 4
|
|
%x1 = load double, ptr %gep1, align 4
|
|
%x2 = load double, ptr %gep2, align 4
|
|
%x3 = load double, ptr %gep3, align 4
|
|
%x4 = load double, ptr %gep4, align 4
|
|
%x5 = load double, ptr %gep5, align 4
|
|
%x6 = load double, ptr %gep6, align 4
|
|
%x7 = load double, ptr %gep7, align 4
|
|
%x8 = load double, ptr %gep8, align 4
|
|
%x9 = load double, ptr %gep9, align 4
|
|
%add1 = fadd double %a, %x0
|
|
%add2 = fadd double %a, %x1
|
|
%add3 = fadd double %a, %x2
|
|
%add4 = fadd double %a, %x3
|
|
%add5 = fadd double %a, %x4
|
|
%add6 = fadd double %a, %x5
|
|
%add7 = fadd double %a, %x6
|
|
%add8 = fadd double %a, %x7
|
|
%add9 = fadd double %a, %x8
|
|
%add10 = fadd double %a, %x9
|
|
%add11 = fadd double %a, %v
|
|
%add12 = fadd double %a, %v
|
|
%add13 = fadd double %a, %v
|
|
%add14 = fadd double %a, %v
|
|
%add15 = fadd double %a, %v
|
|
%add16 = fadd double %a, %v
|
|
%i0 = insertelement <16 x double> poison, double %add1, i32 0
|
|
%i1 = insertelement <16 x double> %i0, double %add2, i32 1
|
|
%i2 = insertelement <16 x double> %i1, double %add3, i32 2
|
|
%i3 = insertelement <16 x double> %i2, double %add4, i32 3
|
|
%i4 = insertelement <16 x double> %i3, double %add5, i32 4
|
|
%i5 = insertelement <16 x double> %i4, double %add6, i32 5
|
|
%i6 = insertelement <16 x double> %i5, double %add7, i32 6
|
|
%i7 = insertelement <16 x double> %i6, double %add8, i32 7
|
|
%i8 = insertelement <16 x double> %i7, double %add9, i32 8
|
|
%i9 = insertelement <16 x double> %i8, double %add10, i32 9
|
|
%i10 = insertelement <16 x double> %i9, double %add11, i32 10
|
|
%i11 = insertelement <16 x double> %i10, double %add12, i32 11
|
|
%i12 = insertelement <16 x double> %i11, double %add13, i32 12
|
|
%i13 = insertelement <16 x double> %i12, double %add14, i32 13
|
|
%i14 = insertelement <16 x double> %i13, double %add15, i32 14
|
|
%i15 = insertelement <16 x double> %i14, double %add16, i32 15
|
|
ret <16 x double> %i15
|
|
}
|