301 lines
9.9 KiB
LLVM
301 lines
9.9 KiB
LLVM
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --filter-out "load" --filter-out "store" --filter-out "wait" --version 6
|
|
; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=fiji -mattr=-flat-for-global < %s | FileCheck -check-prefix=VI-SDAG %s
|
|
; RUN: llc -global-isel=1 -new-reg-bank-select -mtriple=amdgcn -mcpu=fiji -mattr=-flat-for-global < %s | FileCheck -check-prefix=VI-GISEL %s
|
|
|
|
declare half @llvm.amdgcn.div.fixup.f16(half %a, half %b, half %c)
|
|
|
|
define amdgpu_kernel void @div_fixup_f16(
|
|
; VI-SDAG-LABEL: div_fixup_f16:
|
|
; VI-SDAG: ; %bb.0: ; %entry
|
|
; VI-SDAG: s_mov_b32 s11, 0xf000
|
|
; VI-SDAG: s_mov_b32 s10, -1
|
|
; VI-SDAG: s_mov_b32 s14, s10
|
|
; VI-SDAG: s_mov_b32 s15, s11
|
|
; VI-SDAG: s_mov_b32 s12, s2
|
|
; VI-SDAG: s_mov_b32 s13, s3
|
|
; VI-SDAG: s_mov_b32 s16, s4
|
|
; VI-SDAG: s_mov_b32 s17, s5
|
|
; VI-SDAG: s_mov_b32 s18, s10
|
|
; VI-SDAG: s_mov_b32 s19, s11
|
|
; VI-SDAG: s_mov_b32 s4, s6
|
|
; VI-SDAG: s_mov_b32 s5, s7
|
|
; VI-SDAG: s_mov_b32 s6, s10
|
|
; VI-SDAG: s_mov_b32 s7, s11
|
|
; VI-SDAG: s_mov_b32 s8, s0
|
|
; VI-SDAG: s_mov_b32 s9, s1
|
|
; VI-SDAG: v_div_fixup_f16 v0, v0, v1, v2
|
|
; VI-SDAG: s_endpgm
|
|
;
|
|
; VI-GISEL-LABEL: div_fixup_f16:
|
|
; VI-GISEL: ; %bb.0: ; %entry
|
|
; VI-GISEL: s_mov_b32 s10, -1
|
|
; VI-GISEL: s_mov_b32 s11, 0xf000
|
|
; VI-GISEL: s_mov_b64 s[8:9], s[2:3]
|
|
; VI-GISEL: s_mov_b64 s[8:9], s[4:5]
|
|
; VI-GISEL: s_mov_b64 s[8:9], s[6:7]
|
|
; VI-GISEL: v_readfirstlane_b32 s2, v0
|
|
; VI-GISEL: v_readfirstlane_b32 s3, v1
|
|
; VI-GISEL: v_mov_b32_e32 v0, s3
|
|
; VI-GISEL: v_readfirstlane_b32 s4, v2
|
|
; VI-GISEL: v_mov_b32_e32 v1, s4
|
|
; VI-GISEL: v_div_fixup_f16 v0, s2, v0, v1
|
|
; VI-GISEL: s_mov_b64 s[2:3], s[10:11]
|
|
; VI-GISEL: s_endpgm
|
|
ptr addrspace(1) %r,
|
|
ptr addrspace(1) %a,
|
|
ptr addrspace(1) %b,
|
|
ptr addrspace(1) %c) {
|
|
entry:
|
|
%a.val = load volatile half, ptr addrspace(1) %a
|
|
%b.val = load volatile half, ptr addrspace(1) %b
|
|
%c.val = load volatile half, ptr addrspace(1) %c
|
|
%r.val = call half @llvm.amdgcn.div.fixup.f16(half %a.val, half %b.val, half %c.val)
|
|
store half %r.val, ptr addrspace(1) %r
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_kernel void @div_fixup_f16_imm_a(
|
|
; VI-SDAG-LABEL: div_fixup_f16_imm_a:
|
|
; VI-SDAG: ; %bb.0: ; %entry
|
|
; VI-SDAG: s_mov_b32 s7, 0xf000
|
|
; VI-SDAG: s_mov_b32 s6, -1
|
|
; VI-SDAG: s_mov_b32 s14, s6
|
|
; VI-SDAG: s_mov_b32 s12, s2
|
|
; VI-SDAG: s_mov_b32 s13, s3
|
|
; VI-SDAG: s_mov_b32 s15, s7
|
|
; VI-SDAG: s_mov_b32 s10, s6
|
|
; VI-SDAG: s_mov_b32 s11, s7
|
|
; VI-SDAG: s_mov_b32 s4, s0
|
|
; VI-SDAG: s_movk_i32 s0, 0x4200
|
|
; VI-SDAG: s_mov_b32 s5, s1
|
|
; VI-SDAG: v_div_fixup_f16 v0, s0, v0, v1
|
|
; VI-SDAG: s_endpgm
|
|
;
|
|
; VI-GISEL-LABEL: div_fixup_f16_imm_a:
|
|
; VI-GISEL: ; %bb.0: ; %entry
|
|
; VI-GISEL: s_mov_b32 s6, -1
|
|
; VI-GISEL: s_mov_b32 s7, 0xf000
|
|
; VI-GISEL: s_mov_b64 s[10:11], s[6:7]
|
|
; VI-GISEL: s_mov_b64 s[4:5], s[2:3]
|
|
; VI-GISEL: v_mov_b32_e32 v2, 0x4200
|
|
; VI-GISEL: v_readfirstlane_b32 s2, v0
|
|
; VI-GISEL: v_readfirstlane_b32 s3, v1
|
|
; VI-GISEL: v_mov_b32_e32 v0, s3
|
|
; VI-GISEL: v_div_fixup_f16 v0, v2, s2, v0
|
|
; VI-GISEL: s_mov_b64 s[2:3], s[6:7]
|
|
; VI-GISEL: s_nop 1
|
|
; VI-GISEL: s_endpgm
|
|
ptr addrspace(1) %r,
|
|
ptr addrspace(1) %b,
|
|
ptr addrspace(1) %c) {
|
|
entry:
|
|
%b.val = load volatile half, ptr addrspace(1) %b
|
|
%c.val = load volatile half, ptr addrspace(1) %c
|
|
%r.val = call half @llvm.amdgcn.div.fixup.f16(half 3.0, half %b.val, half %c.val)
|
|
store half %r.val, ptr addrspace(1) %r
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_kernel void @div_fixup_f16_imm_b(
|
|
; VI-SDAG-LABEL: div_fixup_f16_imm_b:
|
|
; VI-SDAG: ; %bb.0: ; %entry
|
|
; VI-SDAG: s_mov_b32 s7, 0xf000
|
|
; VI-SDAG: s_mov_b32 s6, -1
|
|
; VI-SDAG: s_mov_b32 s14, s6
|
|
; VI-SDAG: s_mov_b32 s12, s2
|
|
; VI-SDAG: s_mov_b32 s13, s3
|
|
; VI-SDAG: s_mov_b32 s15, s7
|
|
; VI-SDAG: s_mov_b32 s10, s6
|
|
; VI-SDAG: s_mov_b32 s11, s7
|
|
; VI-SDAG: s_mov_b32 s4, s0
|
|
; VI-SDAG: s_movk_i32 s0, 0x4200
|
|
; VI-SDAG: s_mov_b32 s5, s1
|
|
; VI-SDAG: v_div_fixup_f16 v0, v0, s0, v1
|
|
; VI-SDAG: s_endpgm
|
|
;
|
|
; VI-GISEL-LABEL: div_fixup_f16_imm_b:
|
|
; VI-GISEL: ; %bb.0: ; %entry
|
|
; VI-GISEL: s_mov_b32 s6, -1
|
|
; VI-GISEL: s_mov_b32 s7, 0xf000
|
|
; VI-GISEL: s_mov_b64 s[10:11], s[6:7]
|
|
; VI-GISEL: s_mov_b64 s[4:5], s[2:3]
|
|
; VI-GISEL: v_mov_b32_e32 v2, 0x4200
|
|
; VI-GISEL: v_readfirstlane_b32 s2, v0
|
|
; VI-GISEL: v_readfirstlane_b32 s3, v1
|
|
; VI-GISEL: v_mov_b32_e32 v0, s3
|
|
; VI-GISEL: v_div_fixup_f16 v0, s2, v2, v0
|
|
; VI-GISEL: s_mov_b64 s[2:3], s[6:7]
|
|
; VI-GISEL: s_nop 1
|
|
; VI-GISEL: s_endpgm
|
|
ptr addrspace(1) %r,
|
|
ptr addrspace(1) %a,
|
|
ptr addrspace(1) %c) {
|
|
entry:
|
|
%a.val = load volatile half, ptr addrspace(1) %a
|
|
%c.val = load volatile half, ptr addrspace(1) %c
|
|
%r.val = call half @llvm.amdgcn.div.fixup.f16(half %a.val, half 3.0, half %c.val)
|
|
store half %r.val, ptr addrspace(1) %r
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_kernel void @div_fixup_f16_imm_c(
|
|
; VI-SDAG-LABEL: div_fixup_f16_imm_c:
|
|
; VI-SDAG: ; %bb.0: ; %entry
|
|
; VI-SDAG: s_mov_b32 s7, 0xf000
|
|
; VI-SDAG: s_mov_b32 s6, -1
|
|
; VI-SDAG: s_mov_b32 s14, s6
|
|
; VI-SDAG: s_mov_b32 s12, s2
|
|
; VI-SDAG: s_mov_b32 s13, s3
|
|
; VI-SDAG: s_mov_b32 s15, s7
|
|
; VI-SDAG: s_mov_b32 s10, s6
|
|
; VI-SDAG: s_mov_b32 s11, s7
|
|
; VI-SDAG: s_mov_b32 s4, s0
|
|
; VI-SDAG: s_movk_i32 s0, 0x4200
|
|
; VI-SDAG: s_mov_b32 s5, s1
|
|
; VI-SDAG: v_div_fixup_f16 v0, v0, v1, s0
|
|
; VI-SDAG: s_endpgm
|
|
;
|
|
; VI-GISEL-LABEL: div_fixup_f16_imm_c:
|
|
; VI-GISEL: ; %bb.0: ; %entry
|
|
; VI-GISEL: s_mov_b32 s6, -1
|
|
; VI-GISEL: s_mov_b32 s7, 0xf000
|
|
; VI-GISEL: s_mov_b64 s[10:11], s[6:7]
|
|
; VI-GISEL: s_mov_b64 s[4:5], s[2:3]
|
|
; VI-GISEL: v_mov_b32_e32 v2, 0x4200
|
|
; VI-GISEL: v_readfirstlane_b32 s2, v0
|
|
; VI-GISEL: v_readfirstlane_b32 s3, v1
|
|
; VI-GISEL: v_mov_b32_e32 v0, s3
|
|
; VI-GISEL: v_div_fixup_f16 v0, s2, v0, v2
|
|
; VI-GISEL: s_mov_b64 s[2:3], s[6:7]
|
|
; VI-GISEL: s_nop 1
|
|
; VI-GISEL: s_endpgm
|
|
ptr addrspace(1) %r,
|
|
ptr addrspace(1) %a,
|
|
ptr addrspace(1) %b) {
|
|
entry:
|
|
%a.val = load volatile half, ptr addrspace(1) %a
|
|
%b.val = load volatile half, ptr addrspace(1) %b
|
|
%r.val = call half @llvm.amdgcn.div.fixup.f16(half %a.val, half %b.val, half 3.0)
|
|
store half %r.val, ptr addrspace(1) %r
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_kernel void @div_fixup_f16_imm_a_imm_b(
|
|
; VI-SDAG-LABEL: div_fixup_f16_imm_a_imm_b:
|
|
; VI-SDAG: ; %bb.0: ; %entry
|
|
; VI-SDAG: s_mov_b32 s7, 0xf000
|
|
; VI-SDAG: s_mov_b32 s6, -1
|
|
; VI-SDAG: s_mov_b32 s10, s6
|
|
; VI-SDAG: s_mov_b32 s11, s7
|
|
; VI-SDAG: s_mov_b32 s8, s2
|
|
; VI-SDAG: s_mov_b32 s9, s3
|
|
; VI-SDAG: s_mov_b32 s4, s0
|
|
; VI-SDAG: s_movk_i32 s0, 0x4200
|
|
; VI-SDAG: s_mov_b32 s5, s1
|
|
; VI-SDAG: v_div_fixup_f16 v0, s0, s0, v0
|
|
; VI-SDAG: s_endpgm
|
|
;
|
|
; VI-GISEL-LABEL: div_fixup_f16_imm_a_imm_b:
|
|
; VI-GISEL: ; %bb.0: ; %entry
|
|
; VI-GISEL: s_mov_b32 s6, -1
|
|
; VI-GISEL: s_mov_b32 s7, 0xf000
|
|
; VI-GISEL: v_mov_b32_e32 v1, 0x4200
|
|
; VI-GISEL: s_mov_b64 s[4:5], s[2:3]
|
|
; VI-GISEL: v_readfirstlane_b32 s2, v0
|
|
; VI-GISEL: v_div_fixup_f16 v0, v1, v1, s2
|
|
; VI-GISEL: s_mov_b64 s[2:3], s[6:7]
|
|
; VI-GISEL: s_nop 2
|
|
; VI-GISEL: s_endpgm
|
|
ptr addrspace(1) %r,
|
|
ptr addrspace(1) %c) {
|
|
entry:
|
|
%c.val = load volatile half, ptr addrspace(1) %c
|
|
%r.val = call half @llvm.amdgcn.div.fixup.f16(half 3.0, half 3.0, half %c.val)
|
|
store half %r.val, ptr addrspace(1) %r
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_kernel void @div_fixup_f16_imm_b_imm_c(
|
|
; VI-SDAG-LABEL: div_fixup_f16_imm_b_imm_c:
|
|
; VI-SDAG: ; %bb.0: ; %entry
|
|
; VI-SDAG: s_mov_b32 s7, 0xf000
|
|
; VI-SDAG: s_mov_b32 s6, -1
|
|
; VI-SDAG: s_mov_b32 s10, s6
|
|
; VI-SDAG: s_mov_b32 s11, s7
|
|
; VI-SDAG: s_mov_b32 s8, s2
|
|
; VI-SDAG: s_mov_b32 s9, s3
|
|
; VI-SDAG: s_mov_b32 s4, s0
|
|
; VI-SDAG: s_movk_i32 s0, 0x4200
|
|
; VI-SDAG: s_mov_b32 s5, s1
|
|
; VI-SDAG: v_div_fixup_f16 v0, v0, s0, s0
|
|
; VI-SDAG: s_endpgm
|
|
;
|
|
; VI-GISEL-LABEL: div_fixup_f16_imm_b_imm_c:
|
|
; VI-GISEL: ; %bb.0: ; %entry
|
|
; VI-GISEL: s_mov_b32 s6, -1
|
|
; VI-GISEL: s_mov_b32 s7, 0xf000
|
|
; VI-GISEL: v_mov_b32_e32 v1, 0x4200
|
|
; VI-GISEL: s_mov_b64 s[4:5], s[2:3]
|
|
; VI-GISEL: v_readfirstlane_b32 s2, v0
|
|
; VI-GISEL: v_div_fixup_f16 v0, s2, v1, v1
|
|
; VI-GISEL: s_mov_b64 s[2:3], s[6:7]
|
|
; VI-GISEL: s_nop 2
|
|
; VI-GISEL: s_endpgm
|
|
ptr addrspace(1) %r,
|
|
ptr addrspace(1) %a) {
|
|
entry:
|
|
%a.val = load half, ptr addrspace(1) %a
|
|
%r.val = call half @llvm.amdgcn.div.fixup.f16(half %a.val, half 3.0, half 3.0)
|
|
store half %r.val, ptr addrspace(1) %r
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_kernel void @div_fixup_f16_imm_a_imm_c(
|
|
; VI-SDAG-LABEL: div_fixup_f16_imm_a_imm_c:
|
|
; VI-SDAG: ; %bb.0: ; %entry
|
|
; VI-SDAG: s_mov_b32 s7, 0xf000
|
|
; VI-SDAG: s_mov_b32 s6, -1
|
|
; VI-SDAG: s_mov_b32 s10, s6
|
|
; VI-SDAG: s_mov_b32 s11, s7
|
|
; VI-SDAG: s_mov_b32 s8, s2
|
|
; VI-SDAG: s_mov_b32 s9, s3
|
|
; VI-SDAG: s_mov_b32 s4, s0
|
|
; VI-SDAG: s_movk_i32 s0, 0x4200
|
|
; VI-SDAG: s_mov_b32 s5, s1
|
|
; VI-SDAG: v_div_fixup_f16 v0, s0, v0, s0
|
|
; VI-SDAG: s_endpgm
|
|
;
|
|
; VI-GISEL-LABEL: div_fixup_f16_imm_a_imm_c:
|
|
; VI-GISEL: ; %bb.0: ; %entry
|
|
; VI-GISEL: s_mov_b32 s6, -1
|
|
; VI-GISEL: s_mov_b32 s7, 0xf000
|
|
; VI-GISEL: v_mov_b32_e32 v1, 0x4200
|
|
; VI-GISEL: s_mov_b64 s[4:5], s[2:3]
|
|
; VI-GISEL: v_readfirstlane_b32 s2, v0
|
|
; VI-GISEL: v_div_fixup_f16 v0, v1, s2, v1
|
|
; VI-GISEL: s_mov_b64 s[2:3], s[6:7]
|
|
; VI-GISEL: s_nop 2
|
|
; VI-GISEL: s_endpgm
|
|
ptr addrspace(1) %r,
|
|
ptr addrspace(1) %b) {
|
|
entry:
|
|
%b.val = load half, ptr addrspace(1) %b
|
|
%r.val = call half @llvm.amdgcn.div.fixup.f16(half 3.0, half %b.val, half 3.0)
|
|
store half %r.val, ptr addrspace(1) %r
|
|
ret void
|
|
}
|
|
|
|
define half @div_fixup_f16_vgpr(half %a, half %b, half %c) {
|
|
; VI-SDAG-LABEL: div_fixup_f16_vgpr:
|
|
; VI-SDAG: ; %bb.0:
|
|
; VI-SDAG: v_div_fixup_f16 v0, v0, v1, v2
|
|
; VI-SDAG: s_setpc_b64 s[30:31]
|
|
;
|
|
; VI-GISEL-LABEL: div_fixup_f16_vgpr:
|
|
; VI-GISEL: ; %bb.0:
|
|
; VI-GISEL: v_div_fixup_f16 v0, v0, v1, v2
|
|
; VI-GISEL: s_setpc_b64 s[30:31]
|
|
%r = call half @llvm.amdgcn.div.fixup.f16(half %a, half %b, half %c)
|
|
ret half %r
|
|
}
|