Alex Bradbury fc69f25a8f
[RISCV] Convert LWU to LW if possible in RISCVOptWInstrs (#144703)
After the refactoring in #149710 the logic change is trivial.

Motivation for preferring sign-extended 32-bit loads (LW) vs
zero-extended (LWU):
* LW is compressible while LWU is not.
* Helps to minimise the diff vs RV32 (e.g. LWU vs LW)
* Helps to minimise distracting diffs vs GCC. I see this come up
frequently when comparing GCC code and in these cases it's a red
herring.

Similar normalisation could be done for LHU and LH, but this is less
well motivated as there is a compressed LHU (and if performing the
change in RISCVOptWInstrs it wouldn't be done for RV32). There is a
compressed LBU but not LB, meaning doing a similar normalisation for
byte-sized loads would actually be a regression in terms of code size.
Load narrowing when allowed by hasAllNBitUsers isn't explored in this
patch.

This changes ~20500 instructions in an RVA22 build of the
llvm-test-suite including SPEC 2017. As part of the review, the option
of doing the change at ISel time was explored but was found to be less
effective.
2025-07-21 11:48:33 +01:00

498 lines
13 KiB
LLVM

; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mtriple=riscv64 -global-isel -verify-machineinstrs < %s \
; RUN: | FileCheck %s -check-prefix=RV64I
; RUN: llc -mtriple=riscv64 -global-isel -mattr=+zbkb -verify-machineinstrs < %s \
; RUN: | FileCheck %s -check-prefix=RV64ZBKB
; FIXME: Use packw
define signext i32 @pack_i32(i32 signext %a, i32 signext %b) nounwind {
; RV64I-LABEL: pack_i32:
; RV64I: # %bb.0:
; RV64I-NEXT: slli a0, a0, 48
; RV64I-NEXT: srli a0, a0, 48
; RV64I-NEXT: slliw a1, a1, 16
; RV64I-NEXT: or a0, a1, a0
; RV64I-NEXT: ret
;
; RV64ZBKB-LABEL: pack_i32:
; RV64ZBKB: # %bb.0:
; RV64ZBKB-NEXT: packw a0, a0, a1
; RV64ZBKB-NEXT: ret
%shl = and i32 %a, 65535
%shl1 = shl i32 %b, 16
%or = or i32 %shl1, %shl
ret i32 %or
}
; FIXME: Use packw
define signext i32 @pack_i32_2(i16 zeroext %a, i16 zeroext %b) nounwind {
; RV64I-LABEL: pack_i32_2:
; RV64I: # %bb.0:
; RV64I-NEXT: slli a1, a1, 16
; RV64I-NEXT: or a0, a1, a0
; RV64I-NEXT: sext.w a0, a0
; RV64I-NEXT: ret
;
; RV64ZBKB-LABEL: pack_i32_2:
; RV64ZBKB: # %bb.0:
; RV64ZBKB-NEXT: packw a0, a0, a1
; RV64ZBKB-NEXT: ret
%zexta = zext i16 %a to i32
%zextb = zext i16 %b to i32
%shl1 = shl i32 %zextb, 16
%or = or i32 %shl1, %zexta
ret i32 %or
}
; Test case where we don't have a sign_extend_inreg after the or.
; FIXME: Use packw
define signext i32 @pack_i32_3(i16 zeroext %0, i16 zeroext %1, i32 signext %2) {
; RV64I-LABEL: pack_i32_3:
; RV64I: # %bb.0:
; RV64I-NEXT: slli a0, a0, 16
; RV64I-NEXT: or a0, a0, a1
; RV64I-NEXT: addw a0, a0, a2
; RV64I-NEXT: ret
;
; RV64ZBKB-LABEL: pack_i32_3:
; RV64ZBKB: # %bb.0:
; RV64ZBKB-NEXT: packw a0, a1, a0
; RV64ZBKB-NEXT: addw a0, a0, a2
; RV64ZBKB-NEXT: ret
%4 = zext i16 %0 to i32
%5 = shl nuw i32 %4, 16
%6 = zext i16 %1 to i32
%7 = or i32 %5, %6
%8 = add i32 %7, %2
ret i32 %8
}
define i64 @pack_i64(i64 %a, i64 %b) nounwind {
; RV64I-LABEL: pack_i64:
; RV64I: # %bb.0:
; RV64I-NEXT: slli a0, a0, 32
; RV64I-NEXT: srli a0, a0, 32
; RV64I-NEXT: slli a1, a1, 32
; RV64I-NEXT: or a0, a1, a0
; RV64I-NEXT: ret
;
; RV64ZBKB-LABEL: pack_i64:
; RV64ZBKB: # %bb.0:
; RV64ZBKB-NEXT: pack a0, a0, a1
; RV64ZBKB-NEXT: ret
%shl = and i64 %a, 4294967295
%shl1 = shl i64 %b, 32
%or = or i64 %shl1, %shl
ret i64 %or
}
; FIXME: The slli+srli isn't needed with pack.
define i64 @pack_i64_2(i32 signext %a, i32 signext %b) nounwind {
; RV64I-LABEL: pack_i64_2:
; RV64I: # %bb.0:
; RV64I-NEXT: slli a0, a0, 32
; RV64I-NEXT: slli a1, a1, 32
; RV64I-NEXT: srli a0, a0, 32
; RV64I-NEXT: srli a1, a1, 32
; RV64I-NEXT: slli a1, a1, 32
; RV64I-NEXT: or a0, a1, a0
; RV64I-NEXT: ret
;
; RV64ZBKB-LABEL: pack_i64_2:
; RV64ZBKB: # %bb.0:
; RV64ZBKB-NEXT: slli a1, a1, 32
; RV64ZBKB-NEXT: srli a1, a1, 32
; RV64ZBKB-NEXT: pack a0, a0, a1
; RV64ZBKB-NEXT: ret
%zexta = zext i32 %a to i64
%zextb = zext i32 %b to i64
%shl1 = shl i64 %zextb, 32
%or = or i64 %shl1, %zexta
ret i64 %or
}
define i64 @pack_i64_3(ptr %0, ptr %1) {
; RV64I-LABEL: pack_i64_3:
; RV64I: # %bb.0:
; RV64I-NEXT: lw a0, 0(a0)
; RV64I-NEXT: lwu a1, 0(a1)
; RV64I-NEXT: slli a0, a0, 32
; RV64I-NEXT: or a0, a0, a1
; RV64I-NEXT: ret
;
; RV64ZBKB-LABEL: pack_i64_3:
; RV64ZBKB: # %bb.0:
; RV64ZBKB-NEXT: lw a0, 0(a0)
; RV64ZBKB-NEXT: lw a1, 0(a1)
; RV64ZBKB-NEXT: pack a0, a1, a0
; RV64ZBKB-NEXT: ret
%3 = load i32, ptr %0, align 4
%4 = zext i32 %3 to i64
%5 = shl i64 %4, 32
%6 = load i32, ptr %1, align 4
%7 = zext i32 %6 to i64
%8 = or i64 %5, %7
ret i64 %8
}
; FIXME: Use packh
define signext i32 @packh_i32(i32 signext %a, i32 signext %b) nounwind {
; RV64I-LABEL: packh_i32:
; RV64I: # %bb.0:
; RV64I-NEXT: lui a2, 16
; RV64I-NEXT: zext.b a0, a0
; RV64I-NEXT: addi a2, a2, -256
; RV64I-NEXT: slli a1, a1, 8
; RV64I-NEXT: and a1, a1, a2
; RV64I-NEXT: or a0, a1, a0
; RV64I-NEXT: ret
;
; RV64ZBKB-LABEL: packh_i32:
; RV64ZBKB: # %bb.0:
; RV64ZBKB-NEXT: lui a2, 16
; RV64ZBKB-NEXT: zext.b a0, a0
; RV64ZBKB-NEXT: addi a2, a2, -256
; RV64ZBKB-NEXT: slli a1, a1, 8
; RV64ZBKB-NEXT: and a1, a1, a2
; RV64ZBKB-NEXT: or a0, a1, a0
; RV64ZBKB-NEXT: ret
%and = and i32 %a, 255
%and1 = shl i32 %b, 8
%shl = and i32 %and1, 65280
%or = or i32 %shl, %and
ret i32 %or
}
define i32 @packh_i32_2(i32 %a, i32 %b) nounwind {
; RV64I-LABEL: packh_i32_2:
; RV64I: # %bb.0:
; RV64I-NEXT: zext.b a0, a0
; RV64I-NEXT: zext.b a1, a1
; RV64I-NEXT: slli a1, a1, 8
; RV64I-NEXT: or a0, a1, a0
; RV64I-NEXT: ret
;
; RV64ZBKB-LABEL: packh_i32_2:
; RV64ZBKB: # %bb.0:
; RV64ZBKB-NEXT: packh a0, a0, a1
; RV64ZBKB-NEXT: ret
%and = and i32 %a, 255
%and1 = and i32 %b, 255
%shl = shl i32 %and1, 8
%or = or i32 %shl, %and
ret i32 %or
}
; FIXME: Use packh
define i64 @packh_i64(i64 %a, i64 %b) nounwind {
; RV64I-LABEL: packh_i64:
; RV64I: # %bb.0:
; RV64I-NEXT: lui a2, 16
; RV64I-NEXT: zext.b a0, a0
; RV64I-NEXT: addi a2, a2, -256
; RV64I-NEXT: slli a1, a1, 8
; RV64I-NEXT: and a1, a1, a2
; RV64I-NEXT: or a0, a1, a0
; RV64I-NEXT: ret
;
; RV64ZBKB-LABEL: packh_i64:
; RV64ZBKB: # %bb.0:
; RV64ZBKB-NEXT: lui a2, 16
; RV64ZBKB-NEXT: zext.b a0, a0
; RV64ZBKB-NEXT: addi a2, a2, -256
; RV64ZBKB-NEXT: slli a1, a1, 8
; RV64ZBKB-NEXT: and a1, a1, a2
; RV64ZBKB-NEXT: or a0, a1, a0
; RV64ZBKB-NEXT: ret
%and = and i64 %a, 255
%and1 = shl i64 %b, 8
%shl = and i64 %and1, 65280
%or = or i64 %shl, %and
ret i64 %or
}
define i64 @packh_i64_2(i64 %a, i64 %b) nounwind {
; RV64I-LABEL: packh_i64_2:
; RV64I: # %bb.0:
; RV64I-NEXT: zext.b a0, a0
; RV64I-NEXT: zext.b a1, a1
; RV64I-NEXT: slli a1, a1, 8
; RV64I-NEXT: or a0, a1, a0
; RV64I-NEXT: ret
;
; RV64ZBKB-LABEL: packh_i64_2:
; RV64ZBKB: # %bb.0:
; RV64ZBKB-NEXT: packh a0, a0, a1
; RV64ZBKB-NEXT: ret
%and = and i64 %a, 255
%and1 = and i64 %b, 255
%shl = shl i64 %and1, 8
%or = or i64 %shl, %and
ret i64 %or
}
define zeroext i16 @packh_i16(i8 zeroext %a, i8 zeroext %b) nounwind {
; RV64I-LABEL: packh_i16:
; RV64I: # %bb.0:
; RV64I-NEXT: slli a1, a1, 8
; RV64I-NEXT: or a0, a1, a0
; RV64I-NEXT: ret
;
; RV64ZBKB-LABEL: packh_i16:
; RV64ZBKB: # %bb.0:
; RV64ZBKB-NEXT: packh a0, a0, a1
; RV64ZBKB-NEXT: ret
%zext = zext i8 %a to i16
%zext1 = zext i8 %b to i16
%shl = shl i16 %zext1, 8
%or = or i16 %shl, %zext
ret i16 %or
}
define zeroext i16 @packh_i16_2(i8 zeroext %0, i8 zeroext %1, i8 zeroext %2) {
; RV64I-LABEL: packh_i16_2:
; RV64I: # %bb.0:
; RV64I-NEXT: add a0, a1, a0
; RV64I-NEXT: zext.b a0, a0
; RV64I-NEXT: slli a0, a0, 8
; RV64I-NEXT: or a0, a0, a2
; RV64I-NEXT: ret
;
; RV64ZBKB-LABEL: packh_i16_2:
; RV64ZBKB: # %bb.0:
; RV64ZBKB-NEXT: add a0, a1, a0
; RV64ZBKB-NEXT: packh a0, a2, a0
; RV64ZBKB-NEXT: ret
%4 = add i8 %1, %0
%5 = zext i8 %4 to i16
%6 = shl i16 %5, 8
%7 = zext i8 %2 to i16
%8 = or i16 %6, %7
ret i16 %8
}
define void @packh_i16_3(i8 zeroext %0, i8 zeroext %1, i8 zeroext %2, ptr %p) {
; RV64I-LABEL: packh_i16_3:
; RV64I: # %bb.0:
; RV64I-NEXT: add a0, a1, a0
; RV64I-NEXT: zext.b a0, a0
; RV64I-NEXT: slli a0, a0, 8
; RV64I-NEXT: or a0, a0, a2
; RV64I-NEXT: sh a0, 0(a3)
; RV64I-NEXT: ret
;
; RV64ZBKB-LABEL: packh_i16_3:
; RV64ZBKB: # %bb.0:
; RV64ZBKB-NEXT: add a0, a1, a0
; RV64ZBKB-NEXT: packh a0, a2, a0
; RV64ZBKB-NEXT: sh a0, 0(a3)
; RV64ZBKB-NEXT: ret
%4 = add i8 %1, %0
%5 = zext i8 %4 to i16
%6 = shl i16 %5, 8
%7 = zext i8 %2 to i16
%8 = or i16 %6, %7
store i16 %8, ptr %p
ret void
}
define i64 @pack_i64_allWUsers(i32 signext %0, i32 signext %1, i32 signext %2) {
; RV64I-LABEL: pack_i64_allWUsers:
; RV64I: # %bb.0:
; RV64I-NEXT: add a0, a1, a0
; RV64I-NEXT: slli a2, a2, 32
; RV64I-NEXT: slli a0, a0, 32
; RV64I-NEXT: srli a0, a0, 32
; RV64I-NEXT: slli a0, a0, 32
; RV64I-NEXT: srli a2, a2, 32
; RV64I-NEXT: or a0, a0, a2
; RV64I-NEXT: ret
;
; RV64ZBKB-LABEL: pack_i64_allWUsers:
; RV64ZBKB: # %bb.0:
; RV64ZBKB-NEXT: add a0, a1, a0
; RV64ZBKB-NEXT: slli a0, a0, 32
; RV64ZBKB-NEXT: srli a0, a0, 32
; RV64ZBKB-NEXT: pack a0, a2, a0
; RV64ZBKB-NEXT: ret
%4 = add i32 %1, %0
%5 = zext i32 %4 to i64
%6 = shl i64 %5, 32
%7 = zext i32 %2 to i64
%8 = or i64 %6, %7
ret i64 %8
}
define signext i32 @pack_i32_allWUsers(i16 zeroext %0, i16 zeroext %1, i16 zeroext %2) {
; RV64I-LABEL: pack_i32_allWUsers:
; RV64I: # %bb.0:
; RV64I-NEXT: add a0, a1, a0
; RV64I-NEXT: slli a0, a0, 48
; RV64I-NEXT: srli a0, a0, 48
; RV64I-NEXT: slli a0, a0, 16
; RV64I-NEXT: or a0, a0, a2
; RV64I-NEXT: sext.w a0, a0
; RV64I-NEXT: ret
;
; RV64ZBKB-LABEL: pack_i32_allWUsers:
; RV64ZBKB: # %bb.0:
; RV64ZBKB-NEXT: add a0, a1, a0
; RV64ZBKB-NEXT: zext.h a0, a0
; RV64ZBKB-NEXT: packw a0, a2, a0
; RV64ZBKB-NEXT: ret
%4 = add i16 %1, %0
%5 = zext i16 %4 to i32
%6 = shl i32 %5, 16
%7 = zext i16 %2 to i32
%8 = or i32 %6, %7
ret i32 %8
}
define i64 @pack_i64_imm() {
; RV64I-LABEL: pack_i64_imm:
; RV64I: # %bb.0:
; RV64I-NEXT: lui a0, 65793
; RV64I-NEXT: slli a0, a0, 4
; RV64I-NEXT: addi a0, a0, 257
; RV64I-NEXT: slli a0, a0, 16
; RV64I-NEXT: addi a0, a0, 257
; RV64I-NEXT: slli a0, a0, 12
; RV64I-NEXT: addi a0, a0, 16
; RV64I-NEXT: ret
;
; RV64ZBKB-LABEL: pack_i64_imm:
; RV64ZBKB: # %bb.0:
; RV64ZBKB-NEXT: lui a0, 65793
; RV64ZBKB-NEXT: addi a0, a0, 16
; RV64ZBKB-NEXT: pack a0, a0, a0
; RV64ZBKB-NEXT: ret
ret i64 1157442765409226768 ; 0x0101010101010101
}
define i32 @zexth_i32(i32 %a) nounwind {
; RV64I-LABEL: zexth_i32:
; RV64I: # %bb.0:
; RV64I-NEXT: slli a0, a0, 48
; RV64I-NEXT: srli a0, a0, 48
; RV64I-NEXT: ret
;
; RV64ZBKB-LABEL: zexth_i32:
; RV64ZBKB: # %bb.0:
; RV64ZBKB-NEXT: zext.h a0, a0
; RV64ZBKB-NEXT: ret
%and = and i32 %a, 65535
ret i32 %and
}
define i64 @zexth_i64(i64 %a) nounwind {
; RV64I-LABEL: zexth_i64:
; RV64I: # %bb.0:
; RV64I-NEXT: slli a0, a0, 48
; RV64I-NEXT: srli a0, a0, 48
; RV64I-NEXT: ret
;
; RV64ZBKB-LABEL: zexth_i64:
; RV64ZBKB: # %bb.0:
; RV64ZBKB-NEXT: zext.h a0, a0
; RV64ZBKB-NEXT: ret
%and = and i64 %a, 65535
ret i64 %and
}
define i32 @zext_i16_to_i32(i16 %a) nounwind {
; RV64I-LABEL: zext_i16_to_i32:
; RV64I: # %bb.0:
; RV64I-NEXT: slli a0, a0, 48
; RV64I-NEXT: srli a0, a0, 48
; RV64I-NEXT: ret
;
; RV64ZBKB-LABEL: zext_i16_to_i32:
; RV64ZBKB: # %bb.0:
; RV64ZBKB-NEXT: zext.h a0, a0
; RV64ZBKB-NEXT: ret
%1 = zext i16 %a to i32
ret i32 %1
}
define i64 @zext_i16_to_i64(i16 %a) nounwind {
; RV64I-LABEL: zext_i16_to_i64:
; RV64I: # %bb.0:
; RV64I-NEXT: slli a0, a0, 48
; RV64I-NEXT: srli a0, a0, 48
; RV64I-NEXT: ret
;
; RV64ZBKB-LABEL: zext_i16_to_i64:
; RV64ZBKB: # %bb.0:
; RV64ZBKB-NEXT: zext.h a0, a0
; RV64ZBKB-NEXT: ret
%1 = zext i16 %a to i64
ret i64 %1
}
; This creates a i16->i32 G_ZEXT that we need to be able to select
define i32 @zext_i16_i32_2(i1 %z, ptr %x, i32 %y) {
; RV64I-LABEL: zext_i16_i32_2:
; RV64I: # %bb.0:
; RV64I-NEXT: andi a3, a0, 1
; RV64I-NEXT: bnez a3, .LBB20_2
; RV64I-NEXT: # %bb.1:
; RV64I-NEXT: mv a0, a2
; RV64I-NEXT: ret
; RV64I-NEXT: .LBB20_2:
; RV64I-NEXT: lh a0, 0(a1)
; RV64I-NEXT: slli a0, a0, 48
; RV64I-NEXT: srli a0, a0, 48
; RV64I-NEXT: ret
;
; RV64ZBKB-LABEL: zext_i16_i32_2:
; RV64ZBKB: # %bb.0:
; RV64ZBKB-NEXT: andi a3, a0, 1
; RV64ZBKB-NEXT: bnez a3, .LBB20_2
; RV64ZBKB-NEXT: # %bb.1:
; RV64ZBKB-NEXT: mv a0, a2
; RV64ZBKB-NEXT: ret
; RV64ZBKB-NEXT: .LBB20_2:
; RV64ZBKB-NEXT: lh a0, 0(a1)
; RV64ZBKB-NEXT: zext.h a0, a0
; RV64ZBKB-NEXT: ret
%w = load i16, ptr %x
%a = freeze i16 %w
%b = zext i16 %a to i32
%c = select i1 %z, i32 %b, i32 %y
ret i32 %c
}
; This creates a i16->i32 G_SEXT that we need to be able to select
define i32 @sext_i16_i32(i1 %z, ptr %x, i32 %y) {
; RV64I-LABEL: sext_i16_i32:
; RV64I: # %bb.0:
; RV64I-NEXT: andi a3, a0, 1
; RV64I-NEXT: bnez a3, .LBB21_2
; RV64I-NEXT: # %bb.1:
; RV64I-NEXT: mv a0, a2
; RV64I-NEXT: ret
; RV64I-NEXT: .LBB21_2:
; RV64I-NEXT: lh a0, 0(a1)
; RV64I-NEXT: slli a0, a0, 48
; RV64I-NEXT: srai a0, a0, 48
; RV64I-NEXT: ret
;
; RV64ZBKB-LABEL: sext_i16_i32:
; RV64ZBKB: # %bb.0:
; RV64ZBKB-NEXT: andi a3, a0, 1
; RV64ZBKB-NEXT: bnez a3, .LBB21_2
; RV64ZBKB-NEXT: # %bb.1:
; RV64ZBKB-NEXT: mv a0, a2
; RV64ZBKB-NEXT: ret
; RV64ZBKB-NEXT: .LBB21_2:
; RV64ZBKB-NEXT: lh a0, 0(a1)
; RV64ZBKB-NEXT: slli a0, a0, 48
; RV64ZBKB-NEXT: srai a0, a0, 48
; RV64ZBKB-NEXT: ret
%w = load i16, ptr %x
%a = freeze i16 %w
%b = sext i16 %a to i32
%c = select i1 %z, i32 %b, i32 %y
ret i32 %c
}