This patch adds: 1. Support to recognize bf16 type in the frontend and isel/abi support for scalar bf16 programs Limitations: fp_to_bf16 is being generated with a tablegen pattern instead of lowering via expansion. This is because we do not have support for fcanonincalize instruction which should prevent an SNaN being converted to an infinity due to truncation. 2. Vector codegen support for bf16 Patch By: Fateme Hosseini Co-authored-by: Muntasir Mallick <quic_mallick@quicinc.com> Co-authored-by: Muntasir Mallick <mallick@qti.qualcomm.com> Co-authored-by: Kaushik Kulkarni <quic_kauskulk@quicinc.com>
187 lines
5.7 KiB
LLVM
187 lines
5.7 KiB
LLVM
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
|
; RUN: llc -mtriple=hexagon -hexagon-bit=false < %s | FileCheck %s
|
|
|
|
define bfloat @load_scalar_bf(ptr %addr) {
|
|
; CHECK-LABEL: load_scalar_bf:
|
|
; CHECK: .cfi_startproc
|
|
; CHECK-NEXT: // %bb.0: // %entry
|
|
; CHECK-NEXT: {
|
|
; CHECK-NEXT: [[REG:r[0-9]+]] = memuh(r0+#0)
|
|
; CHECK-NEXT: }
|
|
; CHECK-NEXT: {
|
|
; CHECK-NEXT: r0 = aslh([[REG]])
|
|
; CHECK-NEXT: jumpr r31
|
|
; CHECK-NEXT: }
|
|
|
|
entry:
|
|
%0 = load bfloat, ptr %addr, align 2
|
|
ret bfloat %0
|
|
}
|
|
|
|
define void @store_scalar_bf(bfloat %v, ptr %addr) {
|
|
; CHECK-LABEL: store_scalar_bf:
|
|
; CHECK: .cfi_startproc
|
|
; CHECK-NEXT: // %bb.0: // %entry
|
|
; CHECK-NEXT: {
|
|
; CHECK-NEXT: [[R_A:r[0-9]+]] = ##131071
|
|
; CHECK-NEXT: [[R_B:r[0-9]+]] = ##32768
|
|
; CHECK-NEXT: }
|
|
; CHECK-NEXT: {
|
|
; CHECK-NEXT: [[R_C:r[0-9]+]] = ##65535
|
|
; CHECK-NEXT: [[R_D:r[0-9]+]] = asrh(r0)
|
|
; CHECK-NEXT: [[R_A]] = and(r0,[[R_A]])
|
|
; CHECK-NEXT: }
|
|
; CHECK-NEXT: {
|
|
; CHECK-NEXT: [[P0:p[0-9]+]] = sfclass(r0,#16)
|
|
; CHECK-NEXT: [[R_E:r[0-9]+]] = and(r0,[[R_B]])
|
|
; CHECK-NEXT: [[P1:p[0-9]+]] = cmp.eq([[R_A]],[[R_B]])
|
|
; CHECK-NEXT: }
|
|
; CHECK-NEXT: {
|
|
; CHECK-NEXT: r0 = add(r0,[[R_E]])
|
|
; CHECK-NEXT: }
|
|
; CHECK-NEXT: {
|
|
; CHECK-NEXT: r0 = asrh(r0)
|
|
; CHECK-NEXT: if ([[P1]]) [[R_B]] = and([[R_D]],[[R_C]])
|
|
; CHECK-NEXT: }
|
|
; CHECK-NEXT: {
|
|
; CHECK-NEXT: if (![[P1]]) [[R_B]] = and(r0,[[R_C]])
|
|
; CHECK-NEXT: }
|
|
; CHECK-NEXT: {
|
|
; CHECK-NEXT: if ([[P0]]) [[R_B]] = ##32767
|
|
; CHECK-NEXT: }
|
|
; CHECK-NEXT: {
|
|
; CHECK-NEXT: jumpr r31
|
|
; CHECK-NEXT: memh(r1+#0) = [[R_B]]
|
|
; CHECK-NEXT: }
|
|
|
|
entry:
|
|
store bfloat %v, ptr %addr, align 2
|
|
ret void
|
|
}
|
|
|
|
define bfloat @sum(bfloat %a, bfloat %b) #0 {
|
|
; CHECK-LABEL: sum:
|
|
; CHECK: .cfi_startproc
|
|
; CHECK-NEXT: // %bb.0: // %entry
|
|
; CHECK-NEXT: {
|
|
; CHECK-NEXT: [[R3:r[0-9]+]] = ##32768
|
|
; CHECK-NEXT: [[R4:r[0-9]+]] = ##131071
|
|
; CHECK-NEXT: }
|
|
; CHECK-NEXT: {
|
|
; CHECK-NEXT: [[R2:r[0-9]+]] = ##65535
|
|
; CHECK-NEXT: [[R6:r[0-9]+]] = and(r0,[[R3]])
|
|
; CHECK-NEXT: [[R5:r[0-9]+]] = and(r0,[[R4]])
|
|
; CHECK-NEXT: }
|
|
; CHECK-NEXT: {
|
|
; CHECK-NEXT: [[R8:r[0-9]+]] = and(r1,[[R3]])
|
|
; CHECK-NEXT: [[R7:r[0-9]+]] = and(r1,[[R4]])
|
|
; CHECK-NEXT: [[R6]] = add(r0,[[R6]])
|
|
; CHECK-NEXT: [[P0:p[0-9]+]] = cmp.eq([[R5]],[[R3]])
|
|
; CHECK-NEXT: }
|
|
; CHECK-NEXT: {
|
|
; CHECK-NEXT: [[R5]] = asrh(r0)
|
|
; CHECK-NEXT: [[P1:p[0-9]+]] = cmp.eq([[R7]],[[R3]])
|
|
; CHECK-NEXT: [[R7]] = asrh(r1)
|
|
; CHECK-NEXT: [[R8]] = add(r1,[[R8]])
|
|
; CHECK-NEXT: }
|
|
; CHECK-NEXT: {
|
|
; CHECK-NEXT: [[R6]] = asrh([[R6]])
|
|
; CHECK-NEXT: if ([[P0]]) [[R5]] = and([[R5]],[[R2]])
|
|
; CHECK-NEXT: }
|
|
; CHECK-NEXT: {
|
|
; CHECK-NEXT: [[P0]] = sfclass(r0,#16)
|
|
; CHECK-NEXT: if (![[P0]]) [[R5]] = and([[R6]],[[R2]])
|
|
; CHECK-NEXT: [[R6]] = asrh([[R8]])
|
|
; CHECK-NEXT: if ([[P1]]) [[R7]] = and([[R7]],[[R2]])
|
|
; CHECK-NEXT: }
|
|
; CHECK-NEXT: {
|
|
; CHECK-NEXT: [[P1]] = sfclass(r1,#16)
|
|
; CHECK-NEXT: [[R0:r[0-9]+]] = #32767
|
|
; CHECK-NEXT: if (![[P1]]) [[R7]] = and([[R6]],[[R2]])
|
|
; CHECK-NEXT: }
|
|
; CHECK-NEXT: {
|
|
; CHECK-NEXT: if ([[P0]]) [[R5]] = add([[R0]],#0)
|
|
; CHECK-NEXT: if ([[P1]]) [[R7]] = add([[R0]],#0)
|
|
; CHECK-NEXT: }
|
|
; CHECK-NEXT: {
|
|
; CHECK-NEXT: [[R6]] = aslh([[R7]])
|
|
; CHECK-NEXT: [[R5]] = aslh([[R5]])
|
|
; CHECK-NEXT: }
|
|
; CHECK-NEXT: {
|
|
; CHECK-NEXT: [[R1:r[0-9]+]] = sfadd([[R5]],[[R6]])
|
|
; CHECK-NEXT: }
|
|
; CHECK-NEXT: {
|
|
; CHECK-NEXT: [[R5]] = and([[R1]],[[R3]])
|
|
; CHECK-NEXT: [[R4]] = and([[R1]],[[R4]])
|
|
; CHECK-NEXT: [[R6]] = asrh([[R1]])
|
|
; CHECK-NEXT: }
|
|
; CHECK-NEXT: {
|
|
; CHECK-NEXT: [[P0]] = cmp.eq([[R4]],[[R3]])
|
|
; CHECK-NEXT: [[R5]] = add([[R1]],[[R5]])
|
|
; CHECK-NEXT: }
|
|
; CHECK-NEXT: {
|
|
; CHECK-NEXT: [[R3]] = asrh([[R5]])
|
|
; CHECK-NEXT: if ([[P0]]) [[R4]] = and([[R6]],[[R2]])
|
|
; CHECK-NEXT: }
|
|
; CHECK-NEXT: {
|
|
; CHECK-NEXT: [[P0]] = sfclass([[R1]],#16)
|
|
; CHECK-NEXT: if (![[P0]]) [[R4]] = and([[R3]],[[R2]])
|
|
; CHECK-NEXT: }
|
|
; CHECK-NEXT: {
|
|
; CHECK-NEXT: if ([[P0]]) [[R4]] = add([[R0]],#0)
|
|
; CHECK-NEXT: }
|
|
; CHECK-NEXT: {
|
|
; CHECK-NEXT: r0 = aslh([[R4]])
|
|
; CHECK-NEXT: jumpr r31
|
|
; CHECK-NEXT: }
|
|
|
|
entry:
|
|
%add = fadd bfloat %a, %b
|
|
ret bfloat %add
|
|
}
|
|
|
|
; Function Attrs: noinline nounwind optnone
|
|
define dso_local i32 @double_bf16(bfloat %a, bfloat %b) #0 {
|
|
; CHECK-LABEL: double_bf16:
|
|
; CHECK: .cfi_startproc
|
|
; CHECK-NEXT: // %bb.0: // %entry
|
|
; CHECK-NEXT: {
|
|
; CHECK-NEXT: [[PAIR_A:r[0-9]+:[0-9]+]] = convert_sf2df(r0)
|
|
; CHECK-NEXT: [[PAIR_B:r[0-9]+:[0-9]+]] = convert_sf2df(r1)
|
|
; CHECK-NEXT: [[SP:r[0-9]+]] = add([[SP]],#-16)
|
|
; CHECK-NEXT: }
|
|
; CHECK-NEXT: {
|
|
; CHECK-NEXT: [[RESULT:r[0-9]+:[0-9]+]] = dfadd([[PAIR_A]],[[PAIR_B]])
|
|
; CHECK-NEXT: }
|
|
; CHECK-NEXT: {
|
|
; CHECK-NEXT: memd([[SP]]+#8) = [[RESULT]]
|
|
; CHECK-NEXT: }
|
|
; CHECK-NEXT: {
|
|
; CHECK-NEXT: [[TMP:r[0-9]+]] = convert_df2sf([[RESULT]])
|
|
; CHECK-NEXT: }
|
|
; CHECK-NEXT: {
|
|
; CHECK-NEXT: [[TMP1:r[0-9]+]] = lsr([[TMP]],#16)
|
|
; CHECK-NEXT: [[R0:r[0-9]+]] = and([[TMP]],##-65536)
|
|
; CHECK-NEXT: memh([[SP]]+#6) = [[TMP1]].new
|
|
; CHECK-NEXT: }
|
|
; CHECK-NEXT: {
|
|
; CHECK-NEXT: [[R0]] = convert_sf2w([[R0]]):chop
|
|
; CHECK-NEXT: jumpr r31
|
|
; CHECK-NEXT: [[SP]] = add([[SP]],#16)
|
|
; CHECK-NEXT: }
|
|
|
|
entry:
|
|
%c = alloca double, align 8
|
|
%d = alloca bfloat, align 2
|
|
%conv = fpext bfloat %a to double
|
|
%conv1 = fpext bfloat %b to double
|
|
%add = fadd double %conv, %conv1
|
|
store double %add, ptr %c, align 8
|
|
%2 = load double, ptr %c, align 8
|
|
%conv2 = fptrunc double %2 to bfloat
|
|
store bfloat %conv2, ptr %d, align 2
|
|
%3 = load bfloat, ptr %d, align 2
|
|
%conv3 = fptosi bfloat %3 to i32
|
|
ret i32 %conv3
|
|
}
|