llvm-project/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.div.fixup.f16.ll

301 lines
9.9 KiB
LLVM

; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --filter-out "load" --filter-out "store" --filter-out "wait" --version 6
; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=fiji -mattr=-flat-for-global < %s | FileCheck -check-prefix=VI-SDAG %s
; RUN: llc -global-isel=1 -new-reg-bank-select -mtriple=amdgcn -mcpu=fiji -mattr=-flat-for-global < %s | FileCheck -check-prefix=VI-GISEL %s
declare half @llvm.amdgcn.div.fixup.f16(half %a, half %b, half %c)
define amdgpu_kernel void @div_fixup_f16(
; VI-SDAG-LABEL: div_fixup_f16:
; VI-SDAG: ; %bb.0: ; %entry
; VI-SDAG: s_mov_b32 s11, 0xf000
; VI-SDAG: s_mov_b32 s10, -1
; VI-SDAG: s_mov_b32 s14, s10
; VI-SDAG: s_mov_b32 s15, s11
; VI-SDAG: s_mov_b32 s12, s2
; VI-SDAG: s_mov_b32 s13, s3
; VI-SDAG: s_mov_b32 s16, s4
; VI-SDAG: s_mov_b32 s17, s5
; VI-SDAG: s_mov_b32 s18, s10
; VI-SDAG: s_mov_b32 s19, s11
; VI-SDAG: s_mov_b32 s4, s6
; VI-SDAG: s_mov_b32 s5, s7
; VI-SDAG: s_mov_b32 s6, s10
; VI-SDAG: s_mov_b32 s7, s11
; VI-SDAG: s_mov_b32 s8, s0
; VI-SDAG: s_mov_b32 s9, s1
; VI-SDAG: v_div_fixup_f16 v0, v0, v1, v2
; VI-SDAG: s_endpgm
;
; VI-GISEL-LABEL: div_fixup_f16:
; VI-GISEL: ; %bb.0: ; %entry
; VI-GISEL: s_mov_b32 s10, -1
; VI-GISEL: s_mov_b32 s11, 0xf000
; VI-GISEL: s_mov_b64 s[8:9], s[2:3]
; VI-GISEL: s_mov_b64 s[8:9], s[4:5]
; VI-GISEL: s_mov_b64 s[8:9], s[6:7]
; VI-GISEL: v_readfirstlane_b32 s2, v0
; VI-GISEL: v_readfirstlane_b32 s3, v1
; VI-GISEL: v_mov_b32_e32 v0, s3
; VI-GISEL: v_readfirstlane_b32 s4, v2
; VI-GISEL: v_mov_b32_e32 v1, s4
; VI-GISEL: v_div_fixup_f16 v0, s2, v0, v1
; VI-GISEL: s_mov_b64 s[2:3], s[10:11]
; VI-GISEL: s_endpgm
ptr addrspace(1) %r,
ptr addrspace(1) %a,
ptr addrspace(1) %b,
ptr addrspace(1) %c) {
entry:
%a.val = load volatile half, ptr addrspace(1) %a
%b.val = load volatile half, ptr addrspace(1) %b
%c.val = load volatile half, ptr addrspace(1) %c
%r.val = call half @llvm.amdgcn.div.fixup.f16(half %a.val, half %b.val, half %c.val)
store half %r.val, ptr addrspace(1) %r
ret void
}
define amdgpu_kernel void @div_fixup_f16_imm_a(
; VI-SDAG-LABEL: div_fixup_f16_imm_a:
; VI-SDAG: ; %bb.0: ; %entry
; VI-SDAG: s_mov_b32 s7, 0xf000
; VI-SDAG: s_mov_b32 s6, -1
; VI-SDAG: s_mov_b32 s14, s6
; VI-SDAG: s_mov_b32 s12, s2
; VI-SDAG: s_mov_b32 s13, s3
; VI-SDAG: s_mov_b32 s15, s7
; VI-SDAG: s_mov_b32 s10, s6
; VI-SDAG: s_mov_b32 s11, s7
; VI-SDAG: s_mov_b32 s4, s0
; VI-SDAG: s_movk_i32 s0, 0x4200
; VI-SDAG: s_mov_b32 s5, s1
; VI-SDAG: v_div_fixup_f16 v0, s0, v0, v1
; VI-SDAG: s_endpgm
;
; VI-GISEL-LABEL: div_fixup_f16_imm_a:
; VI-GISEL: ; %bb.0: ; %entry
; VI-GISEL: s_mov_b32 s6, -1
; VI-GISEL: s_mov_b32 s7, 0xf000
; VI-GISEL: s_mov_b64 s[10:11], s[6:7]
; VI-GISEL: s_mov_b64 s[4:5], s[2:3]
; VI-GISEL: v_mov_b32_e32 v2, 0x4200
; VI-GISEL: v_readfirstlane_b32 s2, v0
; VI-GISEL: v_readfirstlane_b32 s3, v1
; VI-GISEL: v_mov_b32_e32 v0, s3
; VI-GISEL: v_div_fixup_f16 v0, v2, s2, v0
; VI-GISEL: s_mov_b64 s[2:3], s[6:7]
; VI-GISEL: s_nop 1
; VI-GISEL: s_endpgm
ptr addrspace(1) %r,
ptr addrspace(1) %b,
ptr addrspace(1) %c) {
entry:
%b.val = load volatile half, ptr addrspace(1) %b
%c.val = load volatile half, ptr addrspace(1) %c
%r.val = call half @llvm.amdgcn.div.fixup.f16(half 3.0, half %b.val, half %c.val)
store half %r.val, ptr addrspace(1) %r
ret void
}
define amdgpu_kernel void @div_fixup_f16_imm_b(
; VI-SDAG-LABEL: div_fixup_f16_imm_b:
; VI-SDAG: ; %bb.0: ; %entry
; VI-SDAG: s_mov_b32 s7, 0xf000
; VI-SDAG: s_mov_b32 s6, -1
; VI-SDAG: s_mov_b32 s14, s6
; VI-SDAG: s_mov_b32 s12, s2
; VI-SDAG: s_mov_b32 s13, s3
; VI-SDAG: s_mov_b32 s15, s7
; VI-SDAG: s_mov_b32 s10, s6
; VI-SDAG: s_mov_b32 s11, s7
; VI-SDAG: s_mov_b32 s4, s0
; VI-SDAG: s_movk_i32 s0, 0x4200
; VI-SDAG: s_mov_b32 s5, s1
; VI-SDAG: v_div_fixup_f16 v0, v0, s0, v1
; VI-SDAG: s_endpgm
;
; VI-GISEL-LABEL: div_fixup_f16_imm_b:
; VI-GISEL: ; %bb.0: ; %entry
; VI-GISEL: s_mov_b32 s6, -1
; VI-GISEL: s_mov_b32 s7, 0xf000
; VI-GISEL: s_mov_b64 s[10:11], s[6:7]
; VI-GISEL: s_mov_b64 s[4:5], s[2:3]
; VI-GISEL: v_mov_b32_e32 v2, 0x4200
; VI-GISEL: v_readfirstlane_b32 s2, v0
; VI-GISEL: v_readfirstlane_b32 s3, v1
; VI-GISEL: v_mov_b32_e32 v0, s3
; VI-GISEL: v_div_fixup_f16 v0, s2, v2, v0
; VI-GISEL: s_mov_b64 s[2:3], s[6:7]
; VI-GISEL: s_nop 1
; VI-GISEL: s_endpgm
ptr addrspace(1) %r,
ptr addrspace(1) %a,
ptr addrspace(1) %c) {
entry:
%a.val = load volatile half, ptr addrspace(1) %a
%c.val = load volatile half, ptr addrspace(1) %c
%r.val = call half @llvm.amdgcn.div.fixup.f16(half %a.val, half 3.0, half %c.val)
store half %r.val, ptr addrspace(1) %r
ret void
}
define amdgpu_kernel void @div_fixup_f16_imm_c(
; VI-SDAG-LABEL: div_fixup_f16_imm_c:
; VI-SDAG: ; %bb.0: ; %entry
; VI-SDAG: s_mov_b32 s7, 0xf000
; VI-SDAG: s_mov_b32 s6, -1
; VI-SDAG: s_mov_b32 s14, s6
; VI-SDAG: s_mov_b32 s12, s2
; VI-SDAG: s_mov_b32 s13, s3
; VI-SDAG: s_mov_b32 s15, s7
; VI-SDAG: s_mov_b32 s10, s6
; VI-SDAG: s_mov_b32 s11, s7
; VI-SDAG: s_mov_b32 s4, s0
; VI-SDAG: s_movk_i32 s0, 0x4200
; VI-SDAG: s_mov_b32 s5, s1
; VI-SDAG: v_div_fixup_f16 v0, v0, v1, s0
; VI-SDAG: s_endpgm
;
; VI-GISEL-LABEL: div_fixup_f16_imm_c:
; VI-GISEL: ; %bb.0: ; %entry
; VI-GISEL: s_mov_b32 s6, -1
; VI-GISEL: s_mov_b32 s7, 0xf000
; VI-GISEL: s_mov_b64 s[10:11], s[6:7]
; VI-GISEL: s_mov_b64 s[4:5], s[2:3]
; VI-GISEL: v_mov_b32_e32 v2, 0x4200
; VI-GISEL: v_readfirstlane_b32 s2, v0
; VI-GISEL: v_readfirstlane_b32 s3, v1
; VI-GISEL: v_mov_b32_e32 v0, s3
; VI-GISEL: v_div_fixup_f16 v0, s2, v0, v2
; VI-GISEL: s_mov_b64 s[2:3], s[6:7]
; VI-GISEL: s_nop 1
; VI-GISEL: s_endpgm
ptr addrspace(1) %r,
ptr addrspace(1) %a,
ptr addrspace(1) %b) {
entry:
%a.val = load volatile half, ptr addrspace(1) %a
%b.val = load volatile half, ptr addrspace(1) %b
%r.val = call half @llvm.amdgcn.div.fixup.f16(half %a.val, half %b.val, half 3.0)
store half %r.val, ptr addrspace(1) %r
ret void
}
define amdgpu_kernel void @div_fixup_f16_imm_a_imm_b(
; VI-SDAG-LABEL: div_fixup_f16_imm_a_imm_b:
; VI-SDAG: ; %bb.0: ; %entry
; VI-SDAG: s_mov_b32 s7, 0xf000
; VI-SDAG: s_mov_b32 s6, -1
; VI-SDAG: s_mov_b32 s10, s6
; VI-SDAG: s_mov_b32 s11, s7
; VI-SDAG: s_mov_b32 s8, s2
; VI-SDAG: s_mov_b32 s9, s3
; VI-SDAG: s_mov_b32 s4, s0
; VI-SDAG: s_movk_i32 s0, 0x4200
; VI-SDAG: s_mov_b32 s5, s1
; VI-SDAG: v_div_fixup_f16 v0, s0, s0, v0
; VI-SDAG: s_endpgm
;
; VI-GISEL-LABEL: div_fixup_f16_imm_a_imm_b:
; VI-GISEL: ; %bb.0: ; %entry
; VI-GISEL: s_mov_b32 s6, -1
; VI-GISEL: s_mov_b32 s7, 0xf000
; VI-GISEL: v_mov_b32_e32 v1, 0x4200
; VI-GISEL: s_mov_b64 s[4:5], s[2:3]
; VI-GISEL: v_readfirstlane_b32 s2, v0
; VI-GISEL: v_div_fixup_f16 v0, v1, v1, s2
; VI-GISEL: s_mov_b64 s[2:3], s[6:7]
; VI-GISEL: s_nop 2
; VI-GISEL: s_endpgm
ptr addrspace(1) %r,
ptr addrspace(1) %c) {
entry:
%c.val = load volatile half, ptr addrspace(1) %c
%r.val = call half @llvm.amdgcn.div.fixup.f16(half 3.0, half 3.0, half %c.val)
store half %r.val, ptr addrspace(1) %r
ret void
}
define amdgpu_kernel void @div_fixup_f16_imm_b_imm_c(
; VI-SDAG-LABEL: div_fixup_f16_imm_b_imm_c:
; VI-SDAG: ; %bb.0: ; %entry
; VI-SDAG: s_mov_b32 s7, 0xf000
; VI-SDAG: s_mov_b32 s6, -1
; VI-SDAG: s_mov_b32 s10, s6
; VI-SDAG: s_mov_b32 s11, s7
; VI-SDAG: s_mov_b32 s8, s2
; VI-SDAG: s_mov_b32 s9, s3
; VI-SDAG: s_mov_b32 s4, s0
; VI-SDAG: s_movk_i32 s0, 0x4200
; VI-SDAG: s_mov_b32 s5, s1
; VI-SDAG: v_div_fixup_f16 v0, v0, s0, s0
; VI-SDAG: s_endpgm
;
; VI-GISEL-LABEL: div_fixup_f16_imm_b_imm_c:
; VI-GISEL: ; %bb.0: ; %entry
; VI-GISEL: s_mov_b32 s6, -1
; VI-GISEL: s_mov_b32 s7, 0xf000
; VI-GISEL: v_mov_b32_e32 v1, 0x4200
; VI-GISEL: s_mov_b64 s[4:5], s[2:3]
; VI-GISEL: v_readfirstlane_b32 s2, v0
; VI-GISEL: v_div_fixup_f16 v0, s2, v1, v1
; VI-GISEL: s_mov_b64 s[2:3], s[6:7]
; VI-GISEL: s_nop 2
; VI-GISEL: s_endpgm
ptr addrspace(1) %r,
ptr addrspace(1) %a) {
entry:
%a.val = load half, ptr addrspace(1) %a
%r.val = call half @llvm.amdgcn.div.fixup.f16(half %a.val, half 3.0, half 3.0)
store half %r.val, ptr addrspace(1) %r
ret void
}
define amdgpu_kernel void @div_fixup_f16_imm_a_imm_c(
; VI-SDAG-LABEL: div_fixup_f16_imm_a_imm_c:
; VI-SDAG: ; %bb.0: ; %entry
; VI-SDAG: s_mov_b32 s7, 0xf000
; VI-SDAG: s_mov_b32 s6, -1
; VI-SDAG: s_mov_b32 s10, s6
; VI-SDAG: s_mov_b32 s11, s7
; VI-SDAG: s_mov_b32 s8, s2
; VI-SDAG: s_mov_b32 s9, s3
; VI-SDAG: s_mov_b32 s4, s0
; VI-SDAG: s_movk_i32 s0, 0x4200
; VI-SDAG: s_mov_b32 s5, s1
; VI-SDAG: v_div_fixup_f16 v0, s0, v0, s0
; VI-SDAG: s_endpgm
;
; VI-GISEL-LABEL: div_fixup_f16_imm_a_imm_c:
; VI-GISEL: ; %bb.0: ; %entry
; VI-GISEL: s_mov_b32 s6, -1
; VI-GISEL: s_mov_b32 s7, 0xf000
; VI-GISEL: v_mov_b32_e32 v1, 0x4200
; VI-GISEL: s_mov_b64 s[4:5], s[2:3]
; VI-GISEL: v_readfirstlane_b32 s2, v0
; VI-GISEL: v_div_fixup_f16 v0, v1, s2, v1
; VI-GISEL: s_mov_b64 s[2:3], s[6:7]
; VI-GISEL: s_nop 2
; VI-GISEL: s_endpgm
ptr addrspace(1) %r,
ptr addrspace(1) %b) {
entry:
%b.val = load half, ptr addrspace(1) %b
%r.val = call half @llvm.amdgcn.div.fixup.f16(half 3.0, half %b.val, half 3.0)
store half %r.val, ptr addrspace(1) %r
ret void
}
define half @div_fixup_f16_vgpr(half %a, half %b, half %c) {
; VI-SDAG-LABEL: div_fixup_f16_vgpr:
; VI-SDAG: ; %bb.0:
; VI-SDAG: v_div_fixup_f16 v0, v0, v1, v2
; VI-SDAG: s_setpc_b64 s[30:31]
;
; VI-GISEL-LABEL: div_fixup_f16_vgpr:
; VI-GISEL: ; %bb.0:
; VI-GISEL: v_div_fixup_f16 v0, v0, v1, v2
; VI-GISEL: s_setpc_b64 s[30:31]
%r = call half @llvm.amdgcn.div.fixup.f16(half %a, half %b, half %c)
ret half %r
}