
The LoongArch psABI recently added __bf16 type support. Now we can enable this new type in clang. Currently, bf16 operations are automatically supported by promoting to float. This patch adds bf16 support by ensuring that load extension / truncate store operations are properly expanded. And this commit implements support for bf16 truncate/extend on hard FP targets. The extend operation is implemented by a shift just as in the standard legalization. This requires custom lowering of the truncate libcall on hard float ABIs (the normal libcall code path is used on soft ABIs).
173 lines
5.6 KiB
LLVM
173 lines
5.6 KiB
LLVM
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
|
; RUN: llc -mtriple=loongarch64 -mattr=+d -target-abi=lp64d < %s | FileCheck --check-prefixes=CHECK,LA64 %s
|
|
; RUN: llc -mtriple=loongarch32 -mattr=+d -target-abi=ilp32d < %s | FileCheck --check-prefixes=CHECK,LA32 %s
|
|
|
|
define void @test_load_store(ptr %p, ptr %q) nounwind {
|
|
; CHECK-LABEL: test_load_store:
|
|
; CHECK: # %bb.0:
|
|
; CHECK-NEXT: ld.h $a0, $a0, 0
|
|
; CHECK-NEXT: st.h $a0, $a1, 0
|
|
; CHECK-NEXT: ret
|
|
%a = load bfloat, ptr %p
|
|
store bfloat %a, ptr %q
|
|
ret void
|
|
}
|
|
|
|
define float @test_fpextend_float(ptr %p) nounwind {
|
|
; LA64-LABEL: test_fpextend_float:
|
|
; LA64: # %bb.0:
|
|
; LA64-NEXT: ld.hu $a0, $a0, 0
|
|
; LA64-NEXT: slli.d $a0, $a0, 16
|
|
; LA64-NEXT: movgr2fr.w $fa0, $a0
|
|
; LA64-NEXT: ret
|
|
;
|
|
; LA32-LABEL: test_fpextend_float:
|
|
; LA32: # %bb.0:
|
|
; LA32-NEXT: ld.hu $a0, $a0, 0
|
|
; LA32-NEXT: slli.w $a0, $a0, 16
|
|
; LA32-NEXT: movgr2fr.w $fa0, $a0
|
|
; LA32-NEXT: ret
|
|
%a = load bfloat, ptr %p
|
|
%r = fpext bfloat %a to float
|
|
ret float %r
|
|
}
|
|
|
|
define double @test_fpextend_double(ptr %p) nounwind {
|
|
; LA64-LABEL: test_fpextend_double:
|
|
; LA64: # %bb.0:
|
|
; LA64-NEXT: ld.hu $a0, $a0, 0
|
|
; LA64-NEXT: slli.d $a0, $a0, 16
|
|
; LA64-NEXT: movgr2fr.w $fa0, $a0
|
|
; LA64-NEXT: fcvt.d.s $fa0, $fa0
|
|
; LA64-NEXT: ret
|
|
;
|
|
; LA32-LABEL: test_fpextend_double:
|
|
; LA32: # %bb.0:
|
|
; LA32-NEXT: ld.hu $a0, $a0, 0
|
|
; LA32-NEXT: slli.w $a0, $a0, 16
|
|
; LA32-NEXT: movgr2fr.w $fa0, $a0
|
|
; LA32-NEXT: fcvt.d.s $fa0, $fa0
|
|
; LA32-NEXT: ret
|
|
%a = load bfloat, ptr %p
|
|
%r = fpext bfloat %a to double
|
|
ret double %r
|
|
}
|
|
|
|
define void @test_fptrunc_float(float %f, ptr %p) nounwind {
|
|
; LA64-LABEL: test_fptrunc_float:
|
|
; LA64: # %bb.0:
|
|
; LA64-NEXT: addi.d $sp, $sp, -16
|
|
; LA64-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill
|
|
; LA64-NEXT: st.d $fp, $sp, 0 # 8-byte Folded Spill
|
|
; LA64-NEXT: move $fp, $a0
|
|
; LA64-NEXT: pcaddu18i $ra, %call36(__truncsfbf2)
|
|
; LA64-NEXT: jirl $ra, $ra, 0
|
|
; LA64-NEXT: movfr2gr.s $a0, $fa0
|
|
; LA64-NEXT: st.h $a0, $fp, 0
|
|
; LA64-NEXT: ld.d $fp, $sp, 0 # 8-byte Folded Reload
|
|
; LA64-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload
|
|
; LA64-NEXT: addi.d $sp, $sp, 16
|
|
; LA64-NEXT: ret
|
|
;
|
|
; LA32-LABEL: test_fptrunc_float:
|
|
; LA32: # %bb.0:
|
|
; LA32-NEXT: addi.w $sp, $sp, -16
|
|
; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill
|
|
; LA32-NEXT: st.w $fp, $sp, 8 # 4-byte Folded Spill
|
|
; LA32-NEXT: move $fp, $a0
|
|
; LA32-NEXT: bl __truncsfbf2
|
|
; LA32-NEXT: movfr2gr.s $a0, $fa0
|
|
; LA32-NEXT: st.h $a0, $fp, 0
|
|
; LA32-NEXT: ld.w $fp, $sp, 8 # 4-byte Folded Reload
|
|
; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload
|
|
; LA32-NEXT: addi.w $sp, $sp, 16
|
|
; LA32-NEXT: ret
|
|
%a = fptrunc float %f to bfloat
|
|
store bfloat %a, ptr %p
|
|
ret void
|
|
}
|
|
|
|
define void @test_fptrunc_double(double %d, ptr %p) nounwind {
|
|
; LA64-LABEL: test_fptrunc_double:
|
|
; LA64: # %bb.0:
|
|
; LA64-NEXT: addi.d $sp, $sp, -16
|
|
; LA64-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill
|
|
; LA64-NEXT: st.d $fp, $sp, 0 # 8-byte Folded Spill
|
|
; LA64-NEXT: move $fp, $a0
|
|
; LA64-NEXT: pcaddu18i $ra, %call36(__truncdfbf2)
|
|
; LA64-NEXT: jirl $ra, $ra, 0
|
|
; LA64-NEXT: movfr2gr.s $a0, $fa0
|
|
; LA64-NEXT: st.h $a0, $fp, 0
|
|
; LA64-NEXT: ld.d $fp, $sp, 0 # 8-byte Folded Reload
|
|
; LA64-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload
|
|
; LA64-NEXT: addi.d $sp, $sp, 16
|
|
; LA64-NEXT: ret
|
|
;
|
|
; LA32-LABEL: test_fptrunc_double:
|
|
; LA32: # %bb.0:
|
|
; LA32-NEXT: addi.w $sp, $sp, -16
|
|
; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill
|
|
; LA32-NEXT: st.w $fp, $sp, 8 # 4-byte Folded Spill
|
|
; LA32-NEXT: move $fp, $a0
|
|
; LA32-NEXT: bl __truncdfbf2
|
|
; LA32-NEXT: movfr2gr.s $a0, $fa0
|
|
; LA32-NEXT: st.h $a0, $fp, 0
|
|
; LA32-NEXT: ld.w $fp, $sp, 8 # 4-byte Folded Reload
|
|
; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload
|
|
; LA32-NEXT: addi.w $sp, $sp, 16
|
|
; LA32-NEXT: ret
|
|
%a = fptrunc double %d to bfloat
|
|
store bfloat %a, ptr %p
|
|
ret void
|
|
}
|
|
|
|
define void @test_fadd(ptr %p, ptr %q) nounwind {
|
|
; LA64-LABEL: test_fadd:
|
|
; LA64: # %bb.0:
|
|
; LA64-NEXT: addi.d $sp, $sp, -16
|
|
; LA64-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill
|
|
; LA64-NEXT: st.d $fp, $sp, 0 # 8-byte Folded Spill
|
|
; LA64-NEXT: ld.hu $a1, $a1, 0
|
|
; LA64-NEXT: move $fp, $a0
|
|
; LA64-NEXT: ld.hu $a0, $a0, 0
|
|
; LA64-NEXT: slli.d $a1, $a1, 16
|
|
; LA64-NEXT: movgr2fr.w $fa0, $a1
|
|
; LA64-NEXT: slli.d $a0, $a0, 16
|
|
; LA64-NEXT: movgr2fr.w $fa1, $a0
|
|
; LA64-NEXT: fadd.s $fa0, $fa1, $fa0
|
|
; LA64-NEXT: pcaddu18i $ra, %call36(__truncsfbf2)
|
|
; LA64-NEXT: jirl $ra, $ra, 0
|
|
; LA64-NEXT: movfr2gr.s $a0, $fa0
|
|
; LA64-NEXT: st.h $a0, $fp, 0
|
|
; LA64-NEXT: ld.d $fp, $sp, 0 # 8-byte Folded Reload
|
|
; LA64-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload
|
|
; LA64-NEXT: addi.d $sp, $sp, 16
|
|
; LA64-NEXT: ret
|
|
;
|
|
; LA32-LABEL: test_fadd:
|
|
; LA32: # %bb.0:
|
|
; LA32-NEXT: addi.w $sp, $sp, -16
|
|
; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill
|
|
; LA32-NEXT: st.w $fp, $sp, 8 # 4-byte Folded Spill
|
|
; LA32-NEXT: ld.hu $a1, $a1, 0
|
|
; LA32-NEXT: move $fp, $a0
|
|
; LA32-NEXT: ld.hu $a0, $a0, 0
|
|
; LA32-NEXT: slli.w $a1, $a1, 16
|
|
; LA32-NEXT: movgr2fr.w $fa0, $a1
|
|
; LA32-NEXT: slli.w $a0, $a0, 16
|
|
; LA32-NEXT: movgr2fr.w $fa1, $a0
|
|
; LA32-NEXT: fadd.s $fa0, $fa1, $fa0
|
|
; LA32-NEXT: bl __truncsfbf2
|
|
; LA32-NEXT: movfr2gr.s $a0, $fa0
|
|
; LA32-NEXT: st.h $a0, $fp, 0
|
|
; LA32-NEXT: ld.w $fp, $sp, 8 # 4-byte Folded Reload
|
|
; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload
|
|
; LA32-NEXT: addi.w $sp, $sp, 16
|
|
; LA32-NEXT: ret
|
|
%a = load bfloat, ptr %p
|
|
%b = load bfloat, ptr %q
|
|
%r = fadd bfloat %a, %b
|
|
store bfloat %r, ptr %p
|
|
ret void
|
|
}
|