
If we have a store of a load with no other uses in between it, it's considered dead and is removed. So sometimes when legalizing a fixed length vector store of an insert, we end up producing better code through scalarization than without. An example is the follow below: %a = load <4 x i64>, ptr %x %b = insertelement <4 x i64> %a, i64 %y, i32 2 store <4 x i64> %b, ptr %x If this is scalarized, then DAGCombine successfully removes 3 of the 4 stores which are considered dead, and on RISC-V we get: sd a1, 16(a0) However if we make the vector type legal (-mattr=+v), then we lose the optimisation because we don't scalarize it. This patch attempts to recover the optimisation for vectors by identifying patterns where we store a load with a single insert inbetween, replacing it with a scalar store of the inserted element. Reviewed By: RKSimon Differential Revision: https://reviews.llvm.org/D152276
34 lines
1.0 KiB
LLVM
34 lines
1.0 KiB
LLVM
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
|
; RUN: llc %s -o - | FileCheck %s
|
|
|
|
target triple = "arm64-apple-ios13.4.0"
|
|
|
|
; Make we do not get stuck in a cycle in DAGCombiner.
|
|
|
|
define void @test(i1 %c, ptr %ptr) {
|
|
; CHECK-LABEL: test:
|
|
; CHECK: ; %bb.0: ; %entry
|
|
; CHECK-NEXT: movi d0, #0000000000000000
|
|
; CHECK-NEXT: tbz w0, #0, LBB0_2
|
|
; CHECK-NEXT: ; %bb.1: ; %bb1
|
|
; CHECK-NEXT: ldr d0, [x1]
|
|
; CHECK-NEXT: LBB0_2: ; %bb2
|
|
; CHECK-NEXT: str d0, [x8]
|
|
; CHECK-NEXT: ret
|
|
entry:
|
|
br i1 %c, label %bb1, label %bb2
|
|
|
|
bb1:
|
|
%lv1 = load <1 x double>, ptr %ptr, align 16
|
|
br label %bb2
|
|
|
|
bb2:
|
|
%p = phi <1 x double> [ %lv1, %bb1 ], [ zeroinitializer, %entry ]
|
|
%vecext19 = extractelement <1 x double> %p, i32 0
|
|
%arrayidx21 = getelementptr inbounds [4 x <4 x double>], ptr undef, i64 0, i64 3
|
|
%lv2 = load <4 x double>, ptr %arrayidx21, align 16
|
|
%vecins22 = insertelement <4 x double> %lv2, double %vecext19, i32 2
|
|
store <4 x double> %vecins22, ptr %arrayidx21, align 16
|
|
ret void
|
|
}
|