
This an alternative to #84935 to fix the miscompile, but not be optimal. The immediate for cm.push/pop must be a multiple of 16. For RVE, it might not be. It's not easy to increase the stack size without messing up cfa directives and maybe other things. This patch rounds the stack size down to a multiple of 16 before clamping it to 48. This causes an extra addi to be emitted to handle the remainder. Once this commited, I can commit #84989 to add verification for these instructions being generated with valid offsets.
52 lines
1.9 KiB
LLVM
52 lines
1.9 KiB
LLVM
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
|
|
; RUN: llc -mtriple=riscv32 -mattr=+zcmp,+e -target-abi ilp32e -verify-machineinstrs < %s | FileCheck %s --check-prefix=RV32
|
|
define ptr @func(ptr %s, i32 %_c, ptr %incdec.ptr, i1 %0, i8 %conv14) #0 {
|
|
; RV32-LABEL: func:
|
|
; RV32: # %bb.0: # %entry
|
|
; RV32-NEXT: cm.push {ra, s0-s1}, -16
|
|
; RV32-NEXT: addi sp, sp, -8
|
|
; RV32-NEXT: .cfi_def_cfa_offset 24
|
|
; RV32-NEXT: .cfi_offset ra, -12
|
|
; RV32-NEXT: .cfi_offset s0, -8
|
|
; RV32-NEXT: .cfi_offset s1, -4
|
|
; RV32-NEXT: sw a4, 4(sp) # 4-byte Folded Spill
|
|
; RV32-NEXT: sw a2, 0(sp) # 4-byte Folded Spill
|
|
; RV32-NEXT: mv a2, a1
|
|
; RV32-NEXT: mv s1, a0
|
|
; RV32-NEXT: li a0, 1
|
|
; RV32-NEXT: andi a3, a3, 1
|
|
; RV32-NEXT: .LBB0_1: # %while.body
|
|
; RV32-NEXT: # =>This Inner Loop Header: Depth=1
|
|
; RV32-NEXT: mv s0, a0
|
|
; RV32-NEXT: li a0, 0
|
|
; RV32-NEXT: bnez a3, .LBB0_1
|
|
; RV32-NEXT: # %bb.2: # %while.end
|
|
; RV32-NEXT: lui a0, 4112
|
|
; RV32-NEXT: addi a1, a0, 257
|
|
; RV32-NEXT: mv a0, a2
|
|
; RV32-NEXT: call __mulsi3
|
|
; RV32-NEXT: sw a0, 0(zero)
|
|
; RV32-NEXT: andi s0, s0, 1
|
|
; RV32-NEXT: lw a0, 0(sp) # 4-byte Folded Reload
|
|
; RV32-NEXT: add s0, s0, a0
|
|
; RV32-NEXT: lw a0, 4(sp) # 4-byte Folded Reload
|
|
; RV32-NEXT: sb a0, 0(s0)
|
|
; RV32-NEXT: mv a0, s1
|
|
; RV32-NEXT: addi sp, sp, 8
|
|
; RV32-NEXT: cm.popret {ra, s0-s1}, 16
|
|
entry:
|
|
br label %while.body
|
|
|
|
while.body: ; preds = %while.body, %entry
|
|
%n.addr.042 = phi i32 [ 1, %entry ], [ 0, %while.body ]
|
|
br i1 %0, label %while.body, label %while.end
|
|
|
|
while.end: ; preds = %while.body
|
|
%or5 = mul i32 %_c, 16843009
|
|
store i32 %or5, ptr null, align 4
|
|
%1 = and i32 %n.addr.042, 1
|
|
%scevgep = getelementptr i8, ptr %incdec.ptr, i32 %1
|
|
store i8 %conv14, ptr %scevgep, align 1
|
|
ret ptr %s
|
|
}
|