
The patch adds patterns to select the EXT_ZZI_CONSTRUCTIVE pseudo instead of the EXT_ZZI destructive instruction for vector_splice. This only works when the two inputs to vector_splice are identical. Given that registers aren't tied anymore, this gives the register allocator more freedom and a lot of MOVs get replaced with MOVPRFX. In some cases however, we could have just chosen the same input and output register, but regalloc preferred not to. This means we end up with some test cases now having more instructions: there is now a MOVPRFX while no MOV was previously needed.
94 lines
3.7 KiB
LLVM
94 lines
3.7 KiB
LLVM
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
|
; RUN: llc < %s | FileCheck %s
|
|
|
|
target triple = "aarch64-unknown-linux-gnu"
|
|
|
|
; Currently there is no custom lowering for vector shuffles operating on types
|
|
; bigger than NEON. However, having no support opens us up to a code generator
|
|
; hang when expanding BUILD_VECTOR. Here we just validate the promblematic case
|
|
; successfully exits code generation.
|
|
define void @hang_when_merging_stores_after_legalisation(ptr %a, <2 x i32> %b) vscale_range(2,2) #0 {
|
|
; CHECK-LABEL: hang_when_merging_stores_after_legalisation:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
|
|
; CHECK-NEXT: mov z0.s, s0
|
|
; CHECK-NEXT: movprfx z1, z0
|
|
; CHECK-NEXT: ext z1.b, z1.b, z0.b, #16
|
|
; CHECK-NEXT: st2 { v0.4s, v1.4s }, [x0]
|
|
; CHECK-NEXT: ret
|
|
%splat = shufflevector <2 x i32> %b, <2 x i32> poison, <8 x i32> zeroinitializer
|
|
%interleaved.vec = shufflevector <8 x i32> %splat, <8 x i32> poison, <8 x i32> <i32 0, i32 4, i32 1, i32 5, i32 2, i32 6, i32 3, i32 7>
|
|
store <8 x i32> %interleaved.vec, ptr %a, align 4
|
|
ret void
|
|
}
|
|
|
|
; Ensure we don't crash when trying to lower a shuffle via an extract
|
|
define void @crash_when_lowering_extract_shuffle(ptr %dst, i1 %cond) vscale_range(2,2) #0 {
|
|
; CHECK-LABEL: crash_when_lowering_extract_shuffle:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: tbnz w1, #0, .LBB1_2
|
|
; CHECK-NEXT: // %bb.1: // %vector.body
|
|
; CHECK-NEXT: movi v0.2d, #0000000000000000
|
|
; CHECK-NEXT: movi v1.2d, #0000000000000000
|
|
; CHECK-NEXT: ptrue p0.s
|
|
; CHECK-NEXT: umov w8, v0.b[8]
|
|
; CHECK-NEXT: mov v1.b[1], v0.b[1]
|
|
; CHECK-NEXT: movprfx z3, z0
|
|
; CHECK-NEXT: ext z3.b, z3.b, z0.b, #16
|
|
; CHECK-NEXT: ext v4.16b, v3.16b, v3.16b, #8
|
|
; CHECK-NEXT: fmov s2, w8
|
|
; CHECK-NEXT: mov v1.b[2], v0.b[2]
|
|
; CHECK-NEXT: mov v2.b[1], v0.b[9]
|
|
; CHECK-NEXT: mov v1.b[3], v0.b[3]
|
|
; CHECK-NEXT: mov v2.b[2], v0.b[10]
|
|
; CHECK-NEXT: mov v1.b[4], v0.b[4]
|
|
; CHECK-NEXT: mov v2.b[3], v0.b[11]
|
|
; CHECK-NEXT: mov v1.b[5], v0.b[5]
|
|
; CHECK-NEXT: mov v2.b[4], v0.b[12]
|
|
; CHECK-NEXT: mov v1.b[6], v0.b[6]
|
|
; CHECK-NEXT: mov v2.b[5], v0.b[13]
|
|
; CHECK-NEXT: mov v1.b[7], v0.b[7]
|
|
; CHECK-NEXT: mov v2.b[6], v0.b[14]
|
|
; CHECK-NEXT: uunpklo z1.h, z1.b
|
|
; CHECK-NEXT: mov v2.b[7], v0.b[15]
|
|
; CHECK-NEXT: uunpklo z0.h, z3.b
|
|
; CHECK-NEXT: uunpklo z3.h, z4.b
|
|
; CHECK-NEXT: uunpklo z1.s, z1.h
|
|
; CHECK-NEXT: uunpklo z2.h, z2.b
|
|
; CHECK-NEXT: uunpklo z0.s, z0.h
|
|
; CHECK-NEXT: uunpklo z3.s, z3.h
|
|
; CHECK-NEXT: lsl z1.s, z1.s, #31
|
|
; CHECK-NEXT: uunpklo z2.s, z2.h
|
|
; CHECK-NEXT: lsl z0.s, z0.s, #31
|
|
; CHECK-NEXT: lsl z3.s, z3.s, #31
|
|
; CHECK-NEXT: asr z1.s, z1.s, #31
|
|
; CHECK-NEXT: asr z0.s, z0.s, #31
|
|
; CHECK-NEXT: asr z3.s, z3.s, #31
|
|
; CHECK-NEXT: lsl z2.s, z2.s, #31
|
|
; CHECK-NEXT: cmpne p3.s, p0/z, z1.s, #0
|
|
; CHECK-NEXT: cmpne p1.s, p0/z, z0.s, #0
|
|
; CHECK-NEXT: movi v0.2d, #0000000000000000
|
|
; CHECK-NEXT: cmpne p2.s, p0/z, z3.s, #0
|
|
; CHECK-NEXT: asr z2.s, z2.s, #31
|
|
; CHECK-NEXT: cmpne p0.s, p0/z, z2.s, #0
|
|
; CHECK-NEXT: st1w { z0.s }, p1, [x0, #2, mul vl]
|
|
; CHECK-NEXT: st1w { z0.s }, p2, [x0, #3, mul vl]
|
|
; CHECK-NEXT: st1w { z0.s }, p3, [x0]
|
|
; CHECK-NEXT: st1w { z0.s }, p0, [x0, #1, mul vl]
|
|
; CHECK-NEXT: .LBB1_2: // %exit
|
|
; CHECK-NEXT: ret
|
|
%broadcast.splat = shufflevector <32 x i1> zeroinitializer, <32 x i1> zeroinitializer, <32 x i32> zeroinitializer
|
|
br i1 %cond, label %exit, label %vector.body
|
|
|
|
vector.body:
|
|
%1 = load <32 x i32>, ptr %dst, align 16
|
|
%predphi = select <32 x i1> %broadcast.splat, <32 x i32> zeroinitializer, <32 x i32> %1
|
|
store <32 x i32> %predphi, ptr %dst, align 16
|
|
br label %exit
|
|
|
|
exit:
|
|
ret void
|
|
}
|
|
|
|
attributes #0 = { "target-features"="+sve" }
|