
An LSR formula may require the addition of multiple base or scale registers, this sum reduction requires a temporary register to perform. Since the formulas are independent, we only need one temporary, regardless of the number of unique formula. Each formula can reuse the same temporary. A later CSE pass may come along and combine sub-expressions - but then the register pressure would be that passes problem to consider. This change fixes up the costing in the RISCV specific way, but this is really a generic LSR problem. I just didn't feel like fighting with LSR and dealing with all the various targets swinging slightly in hard to reason about ways. This problem is more pronounced on RISCV than any other target due to our lack of addressing modes. This change is not hugely important on it's own, but I have an upcoming change to add support fo shNadd in LSR which biases us fairly strongly towards adding more "base adds". Without this change, we see net regression due to the increase in register pressure which is not accounted for.
96 lines
2.8 KiB
LLVM
96 lines
2.8 KiB
LLVM
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
|
; RUN: llc < %s -mtriple=riscv32 -verify-machineinstrs | FileCheck %s -check-prefixes=RV32
|
|
; RUN: llc < %s -mtriple=riscv64 -verify-machineinstrs | FileCheck %s -check-prefixes=RV64
|
|
|
|
; Test case:
|
|
; - `A[row]` is loop invariant and should be hoisted up to preheader
|
|
; FIXME: RV32 is working as expected, but RV64 doesn't
|
|
|
|
; The following LLVM IR simulates:
|
|
; int A[16][16];
|
|
; void test(int row, int N) {
|
|
; for (int i=0; i<N; ++I) {
|
|
; A[row][i+1] = 4;
|
|
; A[row][i+2] = 5;
|
|
; }
|
|
; }
|
|
|
|
; After LSR:
|
|
; int A[16][16];
|
|
; void test(int row, int N) {
|
|
; for (int *ptr = A[row][2]; N>0; N--) {
|
|
; *(ptr-1) = 4;
|
|
; *(ptr) = 5;
|
|
; ++ptr;
|
|
; }
|
|
; }
|
|
|
|
@A = internal global [16 x [16 x i32]] zeroinitializer, align 32 ; <ptr> [#uses=2]
|
|
|
|
define void @test(i32 signext %row, i32 signext %N.in) nounwind {
|
|
; RV32-LABEL: test:
|
|
; RV32: # %bb.0: # %entry
|
|
; RV32-NEXT: blez a1, .LBB0_3
|
|
; RV32-NEXT: # %bb.1: # %cond_true.preheader
|
|
; RV32-NEXT: slli a0, a0, 6
|
|
; RV32-NEXT: lui a2, %hi(A)
|
|
; RV32-NEXT: addi a2, a2, %lo(A)
|
|
; RV32-NEXT: add a0, a0, a2
|
|
; RV32-NEXT: addi a0, a0, 8
|
|
; RV32-NEXT: li a2, 4
|
|
; RV32-NEXT: li a3, 5
|
|
; RV32-NEXT: .LBB0_2: # %cond_true
|
|
; RV32-NEXT: # =>This Inner Loop Header: Depth=1
|
|
; RV32-NEXT: sw a2, -4(a0)
|
|
; RV32-NEXT: sw a3, 0(a0)
|
|
; RV32-NEXT: addi a1, a1, -1
|
|
; RV32-NEXT: addi a0, a0, 4
|
|
; RV32-NEXT: bnez a1, .LBB0_2
|
|
; RV32-NEXT: .LBB0_3: # %return
|
|
; RV32-NEXT: ret
|
|
;
|
|
; RV64-LABEL: test:
|
|
; RV64: # %bb.0: # %entry
|
|
; RV64-NEXT: blez a1, .LBB0_3
|
|
; RV64-NEXT: # %bb.1: # %cond_true.preheader
|
|
; RV64-NEXT: slli a0, a0, 6
|
|
; RV64-NEXT: lui a2, %hi(A)
|
|
; RV64-NEXT: addi a2, a2, %lo(A)
|
|
; RV64-NEXT: add a0, a0, a2
|
|
; RV64-NEXT: addi a2, a0, 4
|
|
; RV64-NEXT: addiw a1, a1, 2
|
|
; RV64-NEXT: li a3, 2
|
|
; RV64-NEXT: li a4, 4
|
|
; RV64-NEXT: li a5, 5
|
|
; RV64-NEXT: .LBB0_2: # %cond_true
|
|
; RV64-NEXT: # =>This Inner Loop Header: Depth=1
|
|
; RV64-NEXT: sw a4, 0(a2)
|
|
; RV64-NEXT: slli a6, a3, 2
|
|
; RV64-NEXT: add a6, a0, a6
|
|
; RV64-NEXT: sw a5, 0(a6)
|
|
; RV64-NEXT: addiw a3, a3, 1
|
|
; RV64-NEXT: addi a2, a2, 4
|
|
; RV64-NEXT: bne a3, a1, .LBB0_2
|
|
; RV64-NEXT: .LBB0_3: # %return
|
|
; RV64-NEXT: ret
|
|
entry:
|
|
%N = bitcast i32 %N.in to i32
|
|
%tmp5 = icmp sgt i32 %N.in, 0
|
|
br i1 %tmp5, label %cond_true, label %return
|
|
|
|
cond_true:
|
|
%indvar = phi i32 [ 0, %entry ], [ %indvar.next, %cond_true ]
|
|
%tmp2 = add i32 %indvar, 1
|
|
%tmp = getelementptr [16 x [16 x i32]], ptr @A, i32 0, i32 %row, i32 %tmp2
|
|
store i32 4, ptr %tmp
|
|
%tmp5.upgrd.1 = add i32 %indvar, 2
|
|
%tmp7 = getelementptr [16 x [16 x i32]], ptr @A, i32 0, i32 %row, i32 %tmp5.upgrd.1
|
|
store i32 5, ptr %tmp7
|
|
%indvar.next = add i32 %indvar, 1
|
|
%exitcond = icmp eq i32 %indvar.next, %N
|
|
br i1 %exitcond, label %return, label %cond_true
|
|
|
|
return:
|
|
ret void
|
|
}
|