Ami-zhang 0ed5d9aff6
[LoongArch][BF16] Add support for the __bf16 type (#142548)
The LoongArch psABI recently added __bf16 type support. Now we can
enable this new type in clang.

Currently, bf16 operations are automatically supported by promoting to
float. This patch adds bf16 support by ensuring that load extension /
truncate store operations are properly expanded.

And this commit implements support for bf16 truncate/extend on hard FP
targets. The extend operation is implemented by a shift just as in the
standard legalization. This requires custom lowering of the truncate
libcall on hard float ABIs (the normal libcall code path is used on soft
ABIs).
2025-06-09 11:15:41 +08:00

173 lines
5.6 KiB
LLVM

; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mtriple=loongarch64 -mattr=+d -target-abi=lp64d < %s | FileCheck --check-prefixes=CHECK,LA64 %s
; RUN: llc -mtriple=loongarch32 -mattr=+d -target-abi=ilp32d < %s | FileCheck --check-prefixes=CHECK,LA32 %s
define void @test_load_store(ptr %p, ptr %q) nounwind {
; CHECK-LABEL: test_load_store:
; CHECK: # %bb.0:
; CHECK-NEXT: ld.h $a0, $a0, 0
; CHECK-NEXT: st.h $a0, $a1, 0
; CHECK-NEXT: ret
%a = load bfloat, ptr %p
store bfloat %a, ptr %q
ret void
}
define float @test_fpextend_float(ptr %p) nounwind {
; LA64-LABEL: test_fpextend_float:
; LA64: # %bb.0:
; LA64-NEXT: ld.hu $a0, $a0, 0
; LA64-NEXT: slli.d $a0, $a0, 16
; LA64-NEXT: movgr2fr.w $fa0, $a0
; LA64-NEXT: ret
;
; LA32-LABEL: test_fpextend_float:
; LA32: # %bb.0:
; LA32-NEXT: ld.hu $a0, $a0, 0
; LA32-NEXT: slli.w $a0, $a0, 16
; LA32-NEXT: movgr2fr.w $fa0, $a0
; LA32-NEXT: ret
%a = load bfloat, ptr %p
%r = fpext bfloat %a to float
ret float %r
}
define double @test_fpextend_double(ptr %p) nounwind {
; LA64-LABEL: test_fpextend_double:
; LA64: # %bb.0:
; LA64-NEXT: ld.hu $a0, $a0, 0
; LA64-NEXT: slli.d $a0, $a0, 16
; LA64-NEXT: movgr2fr.w $fa0, $a0
; LA64-NEXT: fcvt.d.s $fa0, $fa0
; LA64-NEXT: ret
;
; LA32-LABEL: test_fpextend_double:
; LA32: # %bb.0:
; LA32-NEXT: ld.hu $a0, $a0, 0
; LA32-NEXT: slli.w $a0, $a0, 16
; LA32-NEXT: movgr2fr.w $fa0, $a0
; LA32-NEXT: fcvt.d.s $fa0, $fa0
; LA32-NEXT: ret
%a = load bfloat, ptr %p
%r = fpext bfloat %a to double
ret double %r
}
define void @test_fptrunc_float(float %f, ptr %p) nounwind {
; LA64-LABEL: test_fptrunc_float:
; LA64: # %bb.0:
; LA64-NEXT: addi.d $sp, $sp, -16
; LA64-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill
; LA64-NEXT: st.d $fp, $sp, 0 # 8-byte Folded Spill
; LA64-NEXT: move $fp, $a0
; LA64-NEXT: pcaddu18i $ra, %call36(__truncsfbf2)
; LA64-NEXT: jirl $ra, $ra, 0
; LA64-NEXT: movfr2gr.s $a0, $fa0
; LA64-NEXT: st.h $a0, $fp, 0
; LA64-NEXT: ld.d $fp, $sp, 0 # 8-byte Folded Reload
; LA64-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload
; LA64-NEXT: addi.d $sp, $sp, 16
; LA64-NEXT: ret
;
; LA32-LABEL: test_fptrunc_float:
; LA32: # %bb.0:
; LA32-NEXT: addi.w $sp, $sp, -16
; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill
; LA32-NEXT: st.w $fp, $sp, 8 # 4-byte Folded Spill
; LA32-NEXT: move $fp, $a0
; LA32-NEXT: bl __truncsfbf2
; LA32-NEXT: movfr2gr.s $a0, $fa0
; LA32-NEXT: st.h $a0, $fp, 0
; LA32-NEXT: ld.w $fp, $sp, 8 # 4-byte Folded Reload
; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload
; LA32-NEXT: addi.w $sp, $sp, 16
; LA32-NEXT: ret
%a = fptrunc float %f to bfloat
store bfloat %a, ptr %p
ret void
}
define void @test_fptrunc_double(double %d, ptr %p) nounwind {
; LA64-LABEL: test_fptrunc_double:
; LA64: # %bb.0:
; LA64-NEXT: addi.d $sp, $sp, -16
; LA64-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill
; LA64-NEXT: st.d $fp, $sp, 0 # 8-byte Folded Spill
; LA64-NEXT: move $fp, $a0
; LA64-NEXT: pcaddu18i $ra, %call36(__truncdfbf2)
; LA64-NEXT: jirl $ra, $ra, 0
; LA64-NEXT: movfr2gr.s $a0, $fa0
; LA64-NEXT: st.h $a0, $fp, 0
; LA64-NEXT: ld.d $fp, $sp, 0 # 8-byte Folded Reload
; LA64-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload
; LA64-NEXT: addi.d $sp, $sp, 16
; LA64-NEXT: ret
;
; LA32-LABEL: test_fptrunc_double:
; LA32: # %bb.0:
; LA32-NEXT: addi.w $sp, $sp, -16
; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill
; LA32-NEXT: st.w $fp, $sp, 8 # 4-byte Folded Spill
; LA32-NEXT: move $fp, $a0
; LA32-NEXT: bl __truncdfbf2
; LA32-NEXT: movfr2gr.s $a0, $fa0
; LA32-NEXT: st.h $a0, $fp, 0
; LA32-NEXT: ld.w $fp, $sp, 8 # 4-byte Folded Reload
; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload
; LA32-NEXT: addi.w $sp, $sp, 16
; LA32-NEXT: ret
%a = fptrunc double %d to bfloat
store bfloat %a, ptr %p
ret void
}
define void @test_fadd(ptr %p, ptr %q) nounwind {
; LA64-LABEL: test_fadd:
; LA64: # %bb.0:
; LA64-NEXT: addi.d $sp, $sp, -16
; LA64-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill
; LA64-NEXT: st.d $fp, $sp, 0 # 8-byte Folded Spill
; LA64-NEXT: ld.hu $a1, $a1, 0
; LA64-NEXT: move $fp, $a0
; LA64-NEXT: ld.hu $a0, $a0, 0
; LA64-NEXT: slli.d $a1, $a1, 16
; LA64-NEXT: movgr2fr.w $fa0, $a1
; LA64-NEXT: slli.d $a0, $a0, 16
; LA64-NEXT: movgr2fr.w $fa1, $a0
; LA64-NEXT: fadd.s $fa0, $fa1, $fa0
; LA64-NEXT: pcaddu18i $ra, %call36(__truncsfbf2)
; LA64-NEXT: jirl $ra, $ra, 0
; LA64-NEXT: movfr2gr.s $a0, $fa0
; LA64-NEXT: st.h $a0, $fp, 0
; LA64-NEXT: ld.d $fp, $sp, 0 # 8-byte Folded Reload
; LA64-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload
; LA64-NEXT: addi.d $sp, $sp, 16
; LA64-NEXT: ret
;
; LA32-LABEL: test_fadd:
; LA32: # %bb.0:
; LA32-NEXT: addi.w $sp, $sp, -16
; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill
; LA32-NEXT: st.w $fp, $sp, 8 # 4-byte Folded Spill
; LA32-NEXT: ld.hu $a1, $a1, 0
; LA32-NEXT: move $fp, $a0
; LA32-NEXT: ld.hu $a0, $a0, 0
; LA32-NEXT: slli.w $a1, $a1, 16
; LA32-NEXT: movgr2fr.w $fa0, $a1
; LA32-NEXT: slli.w $a0, $a0, 16
; LA32-NEXT: movgr2fr.w $fa1, $a0
; LA32-NEXT: fadd.s $fa0, $fa1, $fa0
; LA32-NEXT: bl __truncsfbf2
; LA32-NEXT: movfr2gr.s $a0, $fa0
; LA32-NEXT: st.h $a0, $fp, 0
; LA32-NEXT: ld.w $fp, $sp, 8 # 4-byte Folded Reload
; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload
; LA32-NEXT: addi.w $sp, $sp, 16
; LA32-NEXT: ret
%a = load bfloat, ptr %p
%b = load bfloat, ptr %q
%r = fadd bfloat %a, %b
store bfloat %r, ptr %p
ret void
}