
This rewrites the fold from #129756 to apply to all types, including stores of i8s. This required adding a new `aarch64mfp8` MVT to represent FPR8 types on AArch64, which can be used to extract and store 8-bit values using b sub-registers. Follow on from: #129756 Closes: #131793
259 lines
7.6 KiB
LLVM
259 lines
7.6 KiB
LLVM
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
|
; RUN: llc -verify-machineinstrs -o - %s -mtriple=aarch64-linux-gnu | FileCheck %s --check-prefixes=CHECK,CHECK-SDAG
|
|
; RUN: llc -global-isel -global-isel-abort=1 -verify-machineinstrs -o - %s -mtriple=aarch64-linux-gnu | FileCheck %s --check-prefixes=CHECK,CHECK-GISEL
|
|
|
|
@var32 = global i32 0
|
|
@var64 = global i64 0
|
|
|
|
define void @rev_i32() {
|
|
; CHECK-LABEL: rev_i32:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: adrp x8, :got:var32
|
|
; CHECK-NEXT: ldr x8, [x8, :got_lo12:var32]
|
|
; CHECK-NEXT: ldr w9, [x8]
|
|
; CHECK-NEXT: rev w9, w9
|
|
; CHECK-NEXT: str w9, [x8]
|
|
; CHECK-NEXT: ret
|
|
%val0_tmp = load i32, ptr @var32
|
|
%val1_tmp = call i32 @llvm.bswap.i32(i32 %val0_tmp)
|
|
store volatile i32 %val1_tmp, ptr @var32
|
|
ret void
|
|
}
|
|
|
|
define void @rev_i64() {
|
|
; CHECK-LABEL: rev_i64:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: adrp x8, :got:var64
|
|
; CHECK-NEXT: ldr x8, [x8, :got_lo12:var64]
|
|
; CHECK-NEXT: ldr x9, [x8]
|
|
; CHECK-NEXT: rev x9, x9
|
|
; CHECK-NEXT: str x9, [x8]
|
|
; CHECK-NEXT: ret
|
|
%val0_tmp = load i64, ptr @var64
|
|
%val1_tmp = call i64 @llvm.bswap.i64(i64 %val0_tmp)
|
|
store volatile i64 %val1_tmp, ptr @var64
|
|
ret void
|
|
}
|
|
|
|
define void @rev32_i64() {
|
|
; CHECK-LABEL: rev32_i64:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: adrp x8, :got:var64
|
|
; CHECK-NEXT: ldr x8, [x8, :got_lo12:var64]
|
|
; CHECK-NEXT: ldr x9, [x8]
|
|
; CHECK-NEXT: rev32 x9, x9
|
|
; CHECK-NEXT: str x9, [x8]
|
|
; CHECK-NEXT: ret
|
|
%val0_tmp = load i64, ptr @var64
|
|
%val1_tmp = shl i64 %val0_tmp, 32
|
|
%val5_tmp = sub i64 64, 32
|
|
%val2_tmp = lshr i64 %val0_tmp, %val5_tmp
|
|
%val3_tmp = or i64 %val1_tmp, %val2_tmp
|
|
%val4_tmp = call i64 @llvm.bswap.i64(i64 %val3_tmp)
|
|
store volatile i64 %val4_tmp, ptr @var64
|
|
ret void
|
|
}
|
|
|
|
define void @rev16_i32() {
|
|
; CHECK-LABEL: rev16_i32:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: adrp x8, :got:var32
|
|
; CHECK-NEXT: ldr x8, [x8, :got_lo12:var32]
|
|
; CHECK-NEXT: ldr w9, [x8]
|
|
; CHECK-NEXT: rev16 w9, w9
|
|
; CHECK-NEXT: str w9, [x8]
|
|
; CHECK-NEXT: ret
|
|
%val0_tmp = load i32, ptr @var32
|
|
%val1_tmp = shl i32 %val0_tmp, 16
|
|
%val2_tmp = lshr i32 %val0_tmp, 16
|
|
%val3_tmp = or i32 %val1_tmp, %val2_tmp
|
|
%val4_tmp = call i32 @llvm.bswap.i32(i32 %val3_tmp)
|
|
store volatile i32 %val4_tmp, ptr @var32
|
|
ret void
|
|
}
|
|
|
|
define void @clz_zerodef_i32() {
|
|
; CHECK-LABEL: clz_zerodef_i32:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: adrp x8, :got:var32
|
|
; CHECK-NEXT: ldr x8, [x8, :got_lo12:var32]
|
|
; CHECK-NEXT: ldr w9, [x8]
|
|
; CHECK-NEXT: clz w9, w9
|
|
; CHECK-NEXT: str w9, [x8]
|
|
; CHECK-NEXT: ret
|
|
%val0_tmp = load i32, ptr @var32
|
|
%val4_tmp = call i32 @llvm.ctlz.i32(i32 %val0_tmp, i1 0)
|
|
store volatile i32 %val4_tmp, ptr @var32
|
|
ret void
|
|
}
|
|
|
|
define void @clz_zerodef_i64() {
|
|
; CHECK-LABEL: clz_zerodef_i64:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: adrp x8, :got:var64
|
|
; CHECK-NEXT: ldr x8, [x8, :got_lo12:var64]
|
|
; CHECK-NEXT: ldr x9, [x8]
|
|
; CHECK-NEXT: clz x9, x9
|
|
; CHECK-NEXT: str x9, [x8]
|
|
; CHECK-NEXT: ret
|
|
%val0_tmp = load i64, ptr @var64
|
|
%val4_tmp = call i64 @llvm.ctlz.i64(i64 %val0_tmp, i1 0)
|
|
store volatile i64 %val4_tmp, ptr @var64
|
|
ret void
|
|
}
|
|
|
|
define void @clz_zeroundef_i32() {
|
|
; CHECK-LABEL: clz_zeroundef_i32:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: adrp x8, :got:var32
|
|
; CHECK-NEXT: ldr x8, [x8, :got_lo12:var32]
|
|
; CHECK-NEXT: ldr w9, [x8]
|
|
; CHECK-NEXT: clz w9, w9
|
|
; CHECK-NEXT: str w9, [x8]
|
|
; CHECK-NEXT: ret
|
|
%val0_tmp = load i32, ptr @var32
|
|
%val4_tmp = call i32 @llvm.ctlz.i32(i32 %val0_tmp, i1 1)
|
|
store volatile i32 %val4_tmp, ptr @var32
|
|
ret void
|
|
}
|
|
|
|
define void @clz_zeroundef_i64() {
|
|
; CHECK-LABEL: clz_zeroundef_i64:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: adrp x8, :got:var64
|
|
; CHECK-NEXT: ldr x8, [x8, :got_lo12:var64]
|
|
; CHECK-NEXT: ldr x9, [x8]
|
|
; CHECK-NEXT: clz x9, x9
|
|
; CHECK-NEXT: str x9, [x8]
|
|
; CHECK-NEXT: ret
|
|
%val0_tmp = load i64, ptr @var64
|
|
%val4_tmp = call i64 @llvm.ctlz.i64(i64 %val0_tmp, i1 1)
|
|
store volatile i64 %val4_tmp, ptr @var64
|
|
ret void
|
|
}
|
|
|
|
define void @cttz_zerodef_i32() {
|
|
; CHECK-LABEL: cttz_zerodef_i32:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: adrp x8, :got:var32
|
|
; CHECK-NEXT: ldr x8, [x8, :got_lo12:var32]
|
|
; CHECK-NEXT: ldr w9, [x8]
|
|
; CHECK-NEXT: rbit w9, w9
|
|
; CHECK-NEXT: clz w9, w9
|
|
; CHECK-NEXT: str w9, [x8]
|
|
; CHECK-NEXT: ret
|
|
%val0_tmp = load i32, ptr @var32
|
|
%val4_tmp = call i32 @llvm.cttz.i32(i32 %val0_tmp, i1 0)
|
|
store volatile i32 %val4_tmp, ptr @var32
|
|
ret void
|
|
}
|
|
|
|
define void @cttz_zerodef_i64() {
|
|
; CHECK-LABEL: cttz_zerodef_i64:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: adrp x8, :got:var64
|
|
; CHECK-NEXT: ldr x8, [x8, :got_lo12:var64]
|
|
; CHECK-NEXT: ldr x9, [x8]
|
|
; CHECK-NEXT: rbit x9, x9
|
|
; CHECK-NEXT: clz x9, x9
|
|
; CHECK-NEXT: str x9, [x8]
|
|
; CHECK-NEXT: ret
|
|
%val0_tmp = load i64, ptr @var64
|
|
%val4_tmp = call i64 @llvm.cttz.i64(i64 %val0_tmp, i1 0)
|
|
store volatile i64 %val4_tmp, ptr @var64
|
|
ret void
|
|
}
|
|
|
|
define void @cttz_zeroundef_i32() {
|
|
; CHECK-LABEL: cttz_zeroundef_i32:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: adrp x8, :got:var32
|
|
; CHECK-NEXT: ldr x8, [x8, :got_lo12:var32]
|
|
; CHECK-NEXT: ldr w9, [x8]
|
|
; CHECK-NEXT: rbit w9, w9
|
|
; CHECK-NEXT: clz w9, w9
|
|
; CHECK-NEXT: str w9, [x8]
|
|
; CHECK-NEXT: ret
|
|
%val0_tmp = load i32, ptr @var32
|
|
%val4_tmp = call i32 @llvm.cttz.i32(i32 %val0_tmp, i1 1)
|
|
store volatile i32 %val4_tmp, ptr @var32
|
|
ret void
|
|
}
|
|
|
|
define void @cttz_zeroundef_i64() {
|
|
; CHECK-LABEL: cttz_zeroundef_i64:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: adrp x8, :got:var64
|
|
; CHECK-NEXT: ldr x8, [x8, :got_lo12:var64]
|
|
; CHECK-NEXT: ldr x9, [x8]
|
|
; CHECK-NEXT: rbit x9, x9
|
|
; CHECK-NEXT: clz x9, x9
|
|
; CHECK-NEXT: str x9, [x8]
|
|
; CHECK-NEXT: ret
|
|
%val0_tmp = load i64, ptr @var64
|
|
%val4_tmp = call i64 @llvm.cttz.i64(i64 %val0_tmp, i1 1)
|
|
store volatile i64 %val4_tmp, ptr @var64
|
|
ret void
|
|
}
|
|
|
|
define void @ctpop_i32() {
|
|
; CHECK-SDAG-LABEL: ctpop_i32:
|
|
; CHECK-SDAG: // %bb.0:
|
|
; CHECK-SDAG-NEXT: adrp x8, :got:var32
|
|
; CHECK-SDAG-NEXT: ldr x8, [x8, :got_lo12:var32]
|
|
; CHECK-SDAG-NEXT: ldr w9, [x8]
|
|
; CHECK-SDAG-NEXT: fmov d0, x9
|
|
; CHECK-SDAG-NEXT: cnt v0.8b, v0.8b
|
|
; CHECK-SDAG-NEXT: addv b0, v0.8b
|
|
; CHECK-SDAG-NEXT: str s0, [x8]
|
|
; CHECK-SDAG-NEXT: ret
|
|
;
|
|
; CHECK-GISEL-LABEL: ctpop_i32:
|
|
; CHECK-GISEL: // %bb.0:
|
|
; CHECK-GISEL-NEXT: adrp x8, :got:var32
|
|
; CHECK-GISEL-NEXT: ldr x8, [x8, :got_lo12:var32]
|
|
; CHECK-GISEL-NEXT: ldr w9, [x8]
|
|
; CHECK-GISEL-NEXT: fmov d0, x9
|
|
; CHECK-GISEL-NEXT: cnt v0.8b, v0.8b
|
|
; CHECK-GISEL-NEXT: uaddlv h0, v0.8b
|
|
; CHECK-GISEL-NEXT: str s0, [x8]
|
|
; CHECK-GISEL-NEXT: ret
|
|
%val0_tmp = load i32, ptr @var32
|
|
%val4_tmp = call i32 @llvm.ctpop.i32(i32 %val0_tmp)
|
|
store volatile i32 %val4_tmp, ptr @var32
|
|
ret void
|
|
}
|
|
|
|
define i64 @popcnt(i64 %a, ptr %p) {
|
|
; CHECK-SDAG-LABEL: popcnt:
|
|
; CHECK-SDAG: // %bb.0:
|
|
; CHECK-SDAG-NEXT: fmov d0, x0
|
|
; CHECK-SDAG-NEXT: mov x0, xzr
|
|
; CHECK-SDAG-NEXT: cnt v0.8b, v0.8b
|
|
; CHECK-SDAG-NEXT: addv b0, v0.8b
|
|
; CHECK-SDAG-NEXT: str d0, [x1]
|
|
; CHECK-SDAG-NEXT: ret
|
|
;
|
|
; CHECK-GISEL-LABEL: popcnt:
|
|
; CHECK-GISEL: // %bb.0:
|
|
; CHECK-GISEL-NEXT: fmov d0, x0
|
|
; CHECK-GISEL-NEXT: mov x0, xzr
|
|
; CHECK-GISEL-NEXT: cnt v0.8b, v0.8b
|
|
; CHECK-GISEL-NEXT: uaddlv h0, v0.8b
|
|
; CHECK-GISEL-NEXT: mov w8, v0.s[0]
|
|
; CHECK-GISEL-NEXT: str x8, [x1]
|
|
; CHECK-GISEL-NEXT: ret
|
|
%2 = call i64 @llvm.ctpop(i64 %a)
|
|
store i64 %2, ptr %p
|
|
ret i64 0
|
|
}
|
|
|
|
declare i32 @llvm.bswap.i32(i32)
|
|
declare i64 @llvm.bswap.i64(i64)
|
|
declare i32 @llvm.ctlz.i32 (i32, i1)
|
|
declare i64 @llvm.ctlz.i64 (i64, i1)
|
|
declare i32 @llvm.cttz.i32 (i32, i1)
|
|
declare i64 @llvm.cttz.i64 (i64, i1)
|
|
declare i32 @llvm.ctpop.i32 (i32)
|
|
declare i64 @llvm.ctpop.i64 (i64)
|