This is first patch for Zvfbfa codegen and I'm going to break it down to several patches to make it easier to reivew. The codegen supports both scalable vector and fixed length vector on both native operations and vp intrinsics.
57 lines
2.1 KiB
LLVM
57 lines
2.1 KiB
LLVM
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
|
; RUN: llc -mtriple=riscv32 -mattr=+v,+experimental-zvfbfa \
|
|
; RUN: -target-abi=ilp32d -verify-machineinstrs < %s | FileCheck %s
|
|
; RUN: llc -mtriple=riscv64 -mattr=+v,+experimental-zvfbfa \
|
|
; RUN: -target-abi=lp64d -verify-machineinstrs < %s | FileCheck %s
|
|
|
|
define <2 x bfloat> @copysign_v2bf16(<2 x bfloat> %vm, <2 x bfloat> %vs) {
|
|
; CHECK-LABEL: copysign_v2bf16:
|
|
; CHECK: # %bb.0:
|
|
; CHECK-NEXT: vsetivli zero, 2, e16alt, mf4, ta, ma
|
|
; CHECK-NEXT: vfsgnj.vv v8, v8, v9
|
|
; CHECK-NEXT: ret
|
|
%r = call <2 x bfloat> @llvm.copysign.v2bf16(<2 x bfloat> %vm, <2 x bfloat> %vs)
|
|
ret <2 x bfloat> %r
|
|
}
|
|
|
|
define <4 x bfloat> @copysign_v4bf16(<4 x bfloat> %vm, <4 x bfloat> %vs) {
|
|
; CHECK-LABEL: copysign_v4bf16:
|
|
; CHECK: # %bb.0:
|
|
; CHECK-NEXT: vsetivli zero, 4, e16alt, mf2, ta, ma
|
|
; CHECK-NEXT: vfsgnj.vv v8, v8, v9
|
|
; CHECK-NEXT: ret
|
|
%r = call <4 x bfloat> @llvm.copysign.v4bf16(<4 x bfloat> %vm, <4 x bfloat> %vs)
|
|
ret <4 x bfloat> %r
|
|
}
|
|
|
|
define <8 x bfloat> @copysign_v8bf16(<8 x bfloat> %vm, <8 x bfloat> %vs) {
|
|
; CHECK-LABEL: copysign_v8bf16:
|
|
; CHECK: # %bb.0:
|
|
; CHECK-NEXT: vsetivli zero, 8, e16alt, m1, ta, ma
|
|
; CHECK-NEXT: vfsgnj.vv v8, v8, v9
|
|
; CHECK-NEXT: ret
|
|
%r = call <8 x bfloat> @llvm.copysign.v8bf16(<8 x bfloat> %vm, <8 x bfloat> %vs)
|
|
ret <8 x bfloat> %r
|
|
}
|
|
|
|
define <16 x bfloat> @copysign_v16bf16(<16 x bfloat> %vm, <16 x bfloat> %vs) {
|
|
; CHECK-LABEL: copysign_v16bf16:
|
|
; CHECK: # %bb.0:
|
|
; CHECK-NEXT: vsetivli zero, 16, e16alt, m2, ta, ma
|
|
; CHECK-NEXT: vfsgnj.vv v8, v8, v10
|
|
; CHECK-NEXT: ret
|
|
%r = call <16 x bfloat> @llvm.copysign.v16bf16(<16 x bfloat> %vm, <16 x bfloat> %vs)
|
|
ret <16 x bfloat> %r
|
|
}
|
|
|
|
define <32 x bfloat> @copysign_v32bf32(<32 x bfloat> %vm, <32 x bfloat> %vs) {
|
|
; CHECK-LABEL: copysign_v32bf32:
|
|
; CHECK: # %bb.0:
|
|
; CHECK-NEXT: li a0, 32
|
|
; CHECK-NEXT: vsetvli zero, a0, e16alt, m4, ta, ma
|
|
; CHECK-NEXT: vfsgnj.vv v8, v8, v12
|
|
; CHECK-NEXT: ret
|
|
%r = call <32 x bfloat> @llvm.copysign.v32bf32(<32 x bfloat> %vm, <32 x bfloat> %vs)
|
|
ret <32 x bfloat> %r
|
|
}
|