Fateme Hosseini 80d327e846
[Hexagon] Enable soft bf16 in hexagon (#167924)
This patch adds:
1. Support to recognize bf16 type in the frontend and isel/abi support
for scalar bf16 programs
Limitations: fp_to_bf16 is being generated with a tablegen pattern
instead of lowering via expansion. This is because we do not have
support for fcanonincalize instruction which should prevent an SNaN
being converted to an infinity due to truncation.

2. Vector codegen support for bf16

Patch By: Fateme Hosseini

Co-authored-by: Muntasir Mallick <quic_mallick@quicinc.com>
Co-authored-by: Muntasir Mallick <mallick@qti.qualcomm.com>
Co-authored-by: Kaushik Kulkarni <quic_kauskulk@quicinc.com>
2025-11-19 09:52:46 -06:00

187 lines
5.7 KiB
LLVM

; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mtriple=hexagon -hexagon-bit=false < %s | FileCheck %s
define bfloat @load_scalar_bf(ptr %addr) {
; CHECK-LABEL: load_scalar_bf:
; CHECK: .cfi_startproc
; CHECK-NEXT: // %bb.0: // %entry
; CHECK-NEXT: {
; CHECK-NEXT: [[REG:r[0-9]+]] = memuh(r0+#0)
; CHECK-NEXT: }
; CHECK-NEXT: {
; CHECK-NEXT: r0 = aslh([[REG]])
; CHECK-NEXT: jumpr r31
; CHECK-NEXT: }
entry:
%0 = load bfloat, ptr %addr, align 2
ret bfloat %0
}
define void @store_scalar_bf(bfloat %v, ptr %addr) {
; CHECK-LABEL: store_scalar_bf:
; CHECK: .cfi_startproc
; CHECK-NEXT: // %bb.0: // %entry
; CHECK-NEXT: {
; CHECK-NEXT: [[R_A:r[0-9]+]] = ##131071
; CHECK-NEXT: [[R_B:r[0-9]+]] = ##32768
; CHECK-NEXT: }
; CHECK-NEXT: {
; CHECK-NEXT: [[R_C:r[0-9]+]] = ##65535
; CHECK-NEXT: [[R_D:r[0-9]+]] = asrh(r0)
; CHECK-NEXT: [[R_A]] = and(r0,[[R_A]])
; CHECK-NEXT: }
; CHECK-NEXT: {
; CHECK-NEXT: [[P0:p[0-9]+]] = sfclass(r0,#16)
; CHECK-NEXT: [[R_E:r[0-9]+]] = and(r0,[[R_B]])
; CHECK-NEXT: [[P1:p[0-9]+]] = cmp.eq([[R_A]],[[R_B]])
; CHECK-NEXT: }
; CHECK-NEXT: {
; CHECK-NEXT: r0 = add(r0,[[R_E]])
; CHECK-NEXT: }
; CHECK-NEXT: {
; CHECK-NEXT: r0 = asrh(r0)
; CHECK-NEXT: if ([[P1]]) [[R_B]] = and([[R_D]],[[R_C]])
; CHECK-NEXT: }
; CHECK-NEXT: {
; CHECK-NEXT: if (![[P1]]) [[R_B]] = and(r0,[[R_C]])
; CHECK-NEXT: }
; CHECK-NEXT: {
; CHECK-NEXT: if ([[P0]]) [[R_B]] = ##32767
; CHECK-NEXT: }
; CHECK-NEXT: {
; CHECK-NEXT: jumpr r31
; CHECK-NEXT: memh(r1+#0) = [[R_B]]
; CHECK-NEXT: }
entry:
store bfloat %v, ptr %addr, align 2
ret void
}
define bfloat @sum(bfloat %a, bfloat %b) #0 {
; CHECK-LABEL: sum:
; CHECK: .cfi_startproc
; CHECK-NEXT: // %bb.0: // %entry
; CHECK-NEXT: {
; CHECK-NEXT: [[R3:r[0-9]+]] = ##32768
; CHECK-NEXT: [[R4:r[0-9]+]] = ##131071
; CHECK-NEXT: }
; CHECK-NEXT: {
; CHECK-NEXT: [[R2:r[0-9]+]] = ##65535
; CHECK-NEXT: [[R6:r[0-9]+]] = and(r0,[[R3]])
; CHECK-NEXT: [[R5:r[0-9]+]] = and(r0,[[R4]])
; CHECK-NEXT: }
; CHECK-NEXT: {
; CHECK-NEXT: [[R8:r[0-9]+]] = and(r1,[[R3]])
; CHECK-NEXT: [[R7:r[0-9]+]] = and(r1,[[R4]])
; CHECK-NEXT: [[R6]] = add(r0,[[R6]])
; CHECK-NEXT: [[P0:p[0-9]+]] = cmp.eq([[R5]],[[R3]])
; CHECK-NEXT: }
; CHECK-NEXT: {
; CHECK-NEXT: [[R5]] = asrh(r0)
; CHECK-NEXT: [[P1:p[0-9]+]] = cmp.eq([[R7]],[[R3]])
; CHECK-NEXT: [[R7]] = asrh(r1)
; CHECK-NEXT: [[R8]] = add(r1,[[R8]])
; CHECK-NEXT: }
; CHECK-NEXT: {
; CHECK-NEXT: [[R6]] = asrh([[R6]])
; CHECK-NEXT: if ([[P0]]) [[R5]] = and([[R5]],[[R2]])
; CHECK-NEXT: }
; CHECK-NEXT: {
; CHECK-NEXT: [[P0]] = sfclass(r0,#16)
; CHECK-NEXT: if (![[P0]]) [[R5]] = and([[R6]],[[R2]])
; CHECK-NEXT: [[R6]] = asrh([[R8]])
; CHECK-NEXT: if ([[P1]]) [[R7]] = and([[R7]],[[R2]])
; CHECK-NEXT: }
; CHECK-NEXT: {
; CHECK-NEXT: [[P1]] = sfclass(r1,#16)
; CHECK-NEXT: [[R0:r[0-9]+]] = #32767
; CHECK-NEXT: if (![[P1]]) [[R7]] = and([[R6]],[[R2]])
; CHECK-NEXT: }
; CHECK-NEXT: {
; CHECK-NEXT: if ([[P0]]) [[R5]] = add([[R0]],#0)
; CHECK-NEXT: if ([[P1]]) [[R7]] = add([[R0]],#0)
; CHECK-NEXT: }
; CHECK-NEXT: {
; CHECK-NEXT: [[R6]] = aslh([[R7]])
; CHECK-NEXT: [[R5]] = aslh([[R5]])
; CHECK-NEXT: }
; CHECK-NEXT: {
; CHECK-NEXT: [[R1:r[0-9]+]] = sfadd([[R5]],[[R6]])
; CHECK-NEXT: }
; CHECK-NEXT: {
; CHECK-NEXT: [[R5]] = and([[R1]],[[R3]])
; CHECK-NEXT: [[R4]] = and([[R1]],[[R4]])
; CHECK-NEXT: [[R6]] = asrh([[R1]])
; CHECK-NEXT: }
; CHECK-NEXT: {
; CHECK-NEXT: [[P0]] = cmp.eq([[R4]],[[R3]])
; CHECK-NEXT: [[R5]] = add([[R1]],[[R5]])
; CHECK-NEXT: }
; CHECK-NEXT: {
; CHECK-NEXT: [[R3]] = asrh([[R5]])
; CHECK-NEXT: if ([[P0]]) [[R4]] = and([[R6]],[[R2]])
; CHECK-NEXT: }
; CHECK-NEXT: {
; CHECK-NEXT: [[P0]] = sfclass([[R1]],#16)
; CHECK-NEXT: if (![[P0]]) [[R4]] = and([[R3]],[[R2]])
; CHECK-NEXT: }
; CHECK-NEXT: {
; CHECK-NEXT: if ([[P0]]) [[R4]] = add([[R0]],#0)
; CHECK-NEXT: }
; CHECK-NEXT: {
; CHECK-NEXT: r0 = aslh([[R4]])
; CHECK-NEXT: jumpr r31
; CHECK-NEXT: }
entry:
%add = fadd bfloat %a, %b
ret bfloat %add
}
; Function Attrs: noinline nounwind optnone
define dso_local i32 @double_bf16(bfloat %a, bfloat %b) #0 {
; CHECK-LABEL: double_bf16:
; CHECK: .cfi_startproc
; CHECK-NEXT: // %bb.0: // %entry
; CHECK-NEXT: {
; CHECK-NEXT: [[PAIR_A:r[0-9]+:[0-9]+]] = convert_sf2df(r0)
; CHECK-NEXT: [[PAIR_B:r[0-9]+:[0-9]+]] = convert_sf2df(r1)
; CHECK-NEXT: [[SP:r[0-9]+]] = add([[SP]],#-16)
; CHECK-NEXT: }
; CHECK-NEXT: {
; CHECK-NEXT: [[RESULT:r[0-9]+:[0-9]+]] = dfadd([[PAIR_A]],[[PAIR_B]])
; CHECK-NEXT: }
; CHECK-NEXT: {
; CHECK-NEXT: memd([[SP]]+#8) = [[RESULT]]
; CHECK-NEXT: }
; CHECK-NEXT: {
; CHECK-NEXT: [[TMP:r[0-9]+]] = convert_df2sf([[RESULT]])
; CHECK-NEXT: }
; CHECK-NEXT: {
; CHECK-NEXT: [[TMP1:r[0-9]+]] = lsr([[TMP]],#16)
; CHECK-NEXT: [[R0:r[0-9]+]] = and([[TMP]],##-65536)
; CHECK-NEXT: memh([[SP]]+#6) = [[TMP1]].new
; CHECK-NEXT: }
; CHECK-NEXT: {
; CHECK-NEXT: [[R0]] = convert_sf2w([[R0]]):chop
; CHECK-NEXT: jumpr r31
; CHECK-NEXT: [[SP]] = add([[SP]],#16)
; CHECK-NEXT: }
entry:
%c = alloca double, align 8
%d = alloca bfloat, align 2
%conv = fpext bfloat %a to double
%conv1 = fpext bfloat %b to double
%add = fadd double %conv, %conv1
store double %add, ptr %c, align 8
%2 = load double, ptr %c, align 8
%conv2 = fptrunc double %2 to bfloat
store bfloat %conv2, ptr %d, align 2
%3 = load bfloat, ptr %d, align 2
%conv3 = fptosi bfloat %3 to i32
ret i32 %conv3
}