
Closes #137023 On RISC-V machines without a native multiply instruction (e.g., `rv32i` base), multiplying a variable by a constant integer often compiles to a call to a library routine like `__mul{s,d}i3`. ```assembly .globl __mulxi3 .type __mulxi3, @function __mulxi3: mv a2, a0 mv a0, zero .L1: andi a3, a1, 1 beqz a3, .L2 add a0, a0, a2 .L2: srli a1, a1, 1 slli a2, a2, 1 bnez a1, .L1 ret ``` This library function implements multiplication in software using a loop of shifts and adds, processing the constant bit by bit. On rv32i, it requires a minimum of 8 instructions (for multiply by `0`) and up to about 200 instructions (by `0xffffffff`), involves heavy branching and function call overhead. When not optimizing for size, we could expand the constant multiplication into a sequence of shift and add/sub instructions. For now we use non-adjacent form for the shift and add/sub sequence, which could save 1/2 - 2/3 instructions compared to a shl+add-only sequence.
57 lines
2.0 KiB
LLVM
57 lines
2.0 KiB
LLVM
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
|
|
; RUN: llc -mtriple=riscv32 -mattr=+experimental-xqccmp,+e -target-abi ilp32e -verify-machineinstrs < %s | FileCheck %s --check-prefix=RV32
|
|
|
|
declare i32 @__mulsi3(i32, i32)
|
|
|
|
define ptr @func(ptr %s, i32 %_c, ptr %incdec.ptr, i1 %0, i8 %conv14) #0 {
|
|
; RV32-LABEL: func:
|
|
; RV32: # %bb.0: # %entry
|
|
; RV32-NEXT: qc.cm.push {ra, s0-s1}, -16
|
|
; RV32-NEXT: .cfi_def_cfa_offset 16
|
|
; RV32-NEXT: .cfi_offset ra, -4
|
|
; RV32-NEXT: .cfi_offset s0, -8
|
|
; RV32-NEXT: .cfi_offset s1, -12
|
|
; RV32-NEXT: addi sp, sp, -8
|
|
; RV32-NEXT: .cfi_def_cfa_offset 24
|
|
; RV32-NEXT: sw a4, 4(sp) # 4-byte Folded Spill
|
|
; RV32-NEXT: sw a2, 0(sp) # 4-byte Folded Spill
|
|
; RV32-NEXT: mv a2, a1
|
|
; RV32-NEXT: mv s1, a0
|
|
; RV32-NEXT: li a0, 1
|
|
; RV32-NEXT: andi a3, a3, 1
|
|
; RV32-NEXT: .LBB0_1: # %while.body
|
|
; RV32-NEXT: # =>This Inner Loop Header: Depth=1
|
|
; RV32-NEXT: mv s0, a0
|
|
; RV32-NEXT: li a0, 0
|
|
; RV32-NEXT: bnez a3, .LBB0_1
|
|
; RV32-NEXT: # %bb.2: # %while.end
|
|
; RV32-NEXT: lui a0, 4112
|
|
; RV32-NEXT: addi a1, a0, 257
|
|
; RV32-NEXT: mv a0, a2
|
|
; RV32-NEXT: call __mulsi3
|
|
; RV32-NEXT: sw a0, 0(zero)
|
|
; RV32-NEXT: andi s0, s0, 1
|
|
; RV32-NEXT: lw a0, 0(sp) # 4-byte Folded Reload
|
|
; RV32-NEXT: add s0, s0, a0
|
|
; RV32-NEXT: lw a0, 4(sp) # 4-byte Folded Reload
|
|
; RV32-NEXT: sb a0, 0(s0)
|
|
; RV32-NEXT: mv a0, s1
|
|
; RV32-NEXT: addi sp, sp, 8
|
|
; RV32-NEXT: .cfi_def_cfa_offset 16
|
|
; RV32-NEXT: qc.cm.popret {ra, s0-s1}, 16
|
|
entry:
|
|
br label %while.body
|
|
|
|
while.body: ; preds = %while.body, %entry
|
|
%n.addr.042 = phi i32 [ 1, %entry ], [ 0, %while.body ]
|
|
br i1 %0, label %while.body, label %while.end
|
|
|
|
while.end: ; preds = %while.body
|
|
%mul_result = call i32 @__mulsi3(i32 %_c, i32 16843009)
|
|
store i32 %mul_result, ptr null, align 4
|
|
%1 = and i32 %n.addr.042, 1
|
|
%scevgep = getelementptr i8, ptr %incdec.ptr, i32 %1
|
|
store i8 %conv14, ptr %scevgep, align 1
|
|
ret ptr %s
|
|
}
|