llvm-project/llvm/test/CodeGen/RISCV/xqccmp-additional-stack.ll
Iris Shi dae5c4e1e7
[RISCV] Expand constant multiplication for targets without M extension (#137195)
Closes #137023

On RISC-V machines without a native multiply instruction (e.g., `rv32i`
base), multiplying a variable by a constant integer often compiles to a
call to a library routine like `__mul{s,d}i3`.

```assembly
	.globl __mulxi3
	.type  __mulxi3, @function
__mulxi3:
	mv     a2, a0
	mv     a0, zero
.L1:
	andi   a3, a1, 1
	beqz   a3, .L2
	add    a0, a0, a2
.L2:
	srli   a1, a1, 1
	slli   a2, a2, 1
	bnez   a1, .L1
	ret
```

This library function implements multiplication in software using a loop
of shifts and adds, processing the constant bit by bit. On rv32i, it
requires a minimum of 8 instructions (for multiply by `0`) and up to
about 200 instructions (by `0xffffffff`), involves heavy branching and
function call overhead.

When not optimizing for size, we could expand the constant
multiplication into a sequence of shift and add/sub instructions. For
now we use non-adjacent form for the shift and add/sub sequence, which
could save 1/2 - 2/3 instructions compared to a shl+add-only sequence.
2025-05-16 09:40:11 +08:00

57 lines
2.0 KiB
LLVM

; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
; RUN: llc -mtriple=riscv32 -mattr=+experimental-xqccmp,+e -target-abi ilp32e -verify-machineinstrs < %s | FileCheck %s --check-prefix=RV32
declare i32 @__mulsi3(i32, i32)
define ptr @func(ptr %s, i32 %_c, ptr %incdec.ptr, i1 %0, i8 %conv14) #0 {
; RV32-LABEL: func:
; RV32: # %bb.0: # %entry
; RV32-NEXT: qc.cm.push {ra, s0-s1}, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
; RV32-NEXT: .cfi_offset ra, -4
; RV32-NEXT: .cfi_offset s0, -8
; RV32-NEXT: .cfi_offset s1, -12
; RV32-NEXT: addi sp, sp, -8
; RV32-NEXT: .cfi_def_cfa_offset 24
; RV32-NEXT: sw a4, 4(sp) # 4-byte Folded Spill
; RV32-NEXT: sw a2, 0(sp) # 4-byte Folded Spill
; RV32-NEXT: mv a2, a1
; RV32-NEXT: mv s1, a0
; RV32-NEXT: li a0, 1
; RV32-NEXT: andi a3, a3, 1
; RV32-NEXT: .LBB0_1: # %while.body
; RV32-NEXT: # =>This Inner Loop Header: Depth=1
; RV32-NEXT: mv s0, a0
; RV32-NEXT: li a0, 0
; RV32-NEXT: bnez a3, .LBB0_1
; RV32-NEXT: # %bb.2: # %while.end
; RV32-NEXT: lui a0, 4112
; RV32-NEXT: addi a1, a0, 257
; RV32-NEXT: mv a0, a2
; RV32-NEXT: call __mulsi3
; RV32-NEXT: sw a0, 0(zero)
; RV32-NEXT: andi s0, s0, 1
; RV32-NEXT: lw a0, 0(sp) # 4-byte Folded Reload
; RV32-NEXT: add s0, s0, a0
; RV32-NEXT: lw a0, 4(sp) # 4-byte Folded Reload
; RV32-NEXT: sb a0, 0(s0)
; RV32-NEXT: mv a0, s1
; RV32-NEXT: addi sp, sp, 8
; RV32-NEXT: .cfi_def_cfa_offset 16
; RV32-NEXT: qc.cm.popret {ra, s0-s1}, 16
entry:
br label %while.body
while.body: ; preds = %while.body, %entry
%n.addr.042 = phi i32 [ 1, %entry ], [ 0, %while.body ]
br i1 %0, label %while.body, label %while.end
while.end: ; preds = %while.body
%mul_result = call i32 @__mulsi3(i32 %_c, i32 16843009)
store i32 %mul_result, ptr null, align 4
%1 = and i32 %n.addr.042, 1
%scevgep = getelementptr i8, ptr %incdec.ptr, i32 %1
store i8 %conv14, ptr %scevgep, align 1
ret ptr %s
}