294 lines
9.5 KiB
LLVM
294 lines
9.5 KiB
LLVM
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --filter-out "store" --filter-out "load" --filter-out "wait"
|
|
; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=tonga -denormal-fp-math-f32=preserve-sign < %s | FileCheck -check-prefixes=GFX8 %s
|
|
; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=tonga -denormal-fp-math-f32=ieee < %s | FileCheck -check-prefixes=GFX8 %s
|
|
; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx900 -denormal-fp-math-f32=ieee < %s | FileCheck -check-prefixes=GFX9-SDAG %s
|
|
; RUN: llc -global-isel=1 -new-reg-bank-select -mtriple=amdgcn -mcpu=gfx900 -denormal-fp-math-f32=ieee < %s | FileCheck -check-prefixes=GFX9-GISEL %s
|
|
|
|
declare half @llvm.amdgcn.fmad.ftz.f16(half %a, half %b, half %c)
|
|
|
|
define amdgpu_kernel void @mad_f16(
|
|
; GFX8-LABEL: mad_f16:
|
|
; GFX8: ; %bb.0:
|
|
; GFX8: v_mov_b32_e32 v0, s2
|
|
; GFX8: v_mov_b32_e32 v1, s3
|
|
; GFX8: v_mov_b32_e32 v2, s4
|
|
; GFX8: v_mov_b32_e32 v3, s5
|
|
; GFX8: v_mov_b32_e32 v4, s6
|
|
; GFX8: v_mov_b32_e32 v5, s7
|
|
; GFX8: v_mov_b32_e32 v0, s0
|
|
; GFX8: v_mov_b32_e32 v1, s1
|
|
; GFX8: v_mac_f16_e32 v3, v6, v2
|
|
; GFX8: s_endpgm
|
|
;
|
|
; GFX9-SDAG-LABEL: mad_f16:
|
|
; GFX9-SDAG: ; %bb.0:
|
|
; GFX9-SDAG: v_mov_b32_e32 v0, 0
|
|
; GFX9-SDAG: v_mac_f16_e32 v3, v1, v2
|
|
; GFX9-SDAG: s_endpgm
|
|
;
|
|
; GFX9-GISEL-LABEL: mad_f16:
|
|
; GFX9-GISEL: ; %bb.0:
|
|
; GFX9-GISEL: v_mov_b32_e32 v0, 0
|
|
; GFX9-GISEL: v_readfirstlane_b32 s0, v1
|
|
; GFX9-GISEL: v_readfirstlane_b32 s1, v2
|
|
; GFX9-GISEL: v_readfirstlane_b32 s2, v3
|
|
; GFX9-GISEL: v_mov_b32_e32 v1, s1
|
|
; GFX9-GISEL: v_mov_b32_e32 v2, s2
|
|
; GFX9-GISEL: v_mac_f16_e32 v2, s0, v1
|
|
; GFX9-GISEL: s_endpgm
|
|
ptr addrspace(1) %r,
|
|
ptr addrspace(1) %a,
|
|
ptr addrspace(1) %b,
|
|
ptr addrspace(1) %c) {
|
|
%a.val = load half, ptr addrspace(1) %a
|
|
%b.val = load half, ptr addrspace(1) %b
|
|
%c.val = load half, ptr addrspace(1) %c
|
|
%r.val = call half @llvm.amdgcn.fmad.ftz.f16(half %a.val, half %b.val, half %c.val)
|
|
store half %r.val, ptr addrspace(1) %r
|
|
ret void
|
|
}
|
|
|
|
; TODO: GlobalISel should also fold the immediate
|
|
define amdgpu_kernel void @mad_f16_imm_a(
|
|
; GFX8-LABEL: mad_f16_imm_a:
|
|
; GFX8: ; %bb.0:
|
|
; GFX8: v_mov_b32_e32 v0, s2
|
|
; GFX8: v_mov_b32_e32 v1, s3
|
|
; GFX8: v_mov_b32_e32 v2, s4
|
|
; GFX8: v_mov_b32_e32 v3, s5
|
|
; GFX8: v_mov_b32_e32 v0, s0
|
|
; GFX8: v_mov_b32_e32 v1, s1
|
|
; GFX8: v_madmk_f16 v2, v4, 0x4800, v2
|
|
; GFX8: s_endpgm
|
|
;
|
|
; GFX9-SDAG-LABEL: mad_f16_imm_a:
|
|
; GFX9-SDAG: ; %bb.0:
|
|
; GFX9-SDAG: v_mov_b32_e32 v0, 0
|
|
; GFX9-SDAG: v_madmk_f16 v1, v1, 0x4800, v2
|
|
; GFX9-SDAG: s_endpgm
|
|
;
|
|
; GFX9-GISEL-LABEL: mad_f16_imm_a:
|
|
; GFX9-GISEL: ; %bb.0:
|
|
; GFX9-GISEL: v_mov_b32_e32 v0, 0
|
|
; GFX9-GISEL: v_readfirstlane_b32 s2, v1
|
|
; GFX9-GISEL: v_readfirstlane_b32 s3, v2
|
|
; GFX9-GISEL: v_mov_b32_e32 v1, 0x4800
|
|
; GFX9-GISEL: v_mov_b32_e32 v2, s3
|
|
; GFX9-GISEL: v_mac_f16_e32 v2, s2, v1
|
|
; GFX9-GISEL: s_endpgm
|
|
ptr addrspace(1) %r,
|
|
ptr addrspace(1) %b,
|
|
ptr addrspace(1) %c) {
|
|
%b.val = load half, ptr addrspace(1) %b
|
|
%c.val = load half, ptr addrspace(1) %c
|
|
%r.val = call half @llvm.amdgcn.fmad.ftz.f16(half 8.0, half %b.val, half %c.val)
|
|
store half %r.val, ptr addrspace(1) %r
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_kernel void @mad_f16_imm_b(
|
|
; GFX8-LABEL: mad_f16_imm_b:
|
|
; GFX8: ; %bb.0:
|
|
; GFX8: v_mov_b32_e32 v0, s2
|
|
; GFX8: v_mov_b32_e32 v1, s3
|
|
; GFX8: v_mov_b32_e32 v2, s4
|
|
; GFX8: v_mov_b32_e32 v3, s5
|
|
; GFX8: v_mov_b32_e32 v0, s0
|
|
; GFX8: v_mov_b32_e32 v1, s1
|
|
; GFX8: v_madmk_f16 v2, v4, 0x4800, v2
|
|
; GFX8: s_endpgm
|
|
;
|
|
; GFX9-SDAG-LABEL: mad_f16_imm_b:
|
|
; GFX9-SDAG: ; %bb.0:
|
|
; GFX9-SDAG: v_mov_b32_e32 v0, 0
|
|
; GFX9-SDAG: v_madmk_f16 v1, v1, 0x4800, v2
|
|
; GFX9-SDAG: s_endpgm
|
|
;
|
|
; GFX9-GISEL-LABEL: mad_f16_imm_b:
|
|
; GFX9-GISEL: ; %bb.0:
|
|
; GFX9-GISEL: v_mov_b32_e32 v0, 0
|
|
; GFX9-GISEL: v_readfirstlane_b32 s2, v1
|
|
; GFX9-GISEL: v_readfirstlane_b32 s3, v2
|
|
; GFX9-GISEL: v_mov_b32_e32 v1, 0x4800
|
|
; GFX9-GISEL: v_mov_b32_e32 v2, s3
|
|
; GFX9-GISEL: v_mac_f16_e32 v2, s2, v1
|
|
; GFX9-GISEL: s_endpgm
|
|
ptr addrspace(1) %r,
|
|
ptr addrspace(1) %a,
|
|
ptr addrspace(1) %c) {
|
|
%a.val = load half, ptr addrspace(1) %a
|
|
%c.val = load half, ptr addrspace(1) %c
|
|
%r.val = call half @llvm.amdgcn.fmad.ftz.f16(half %a.val, half 8.0, half %c.val)
|
|
store half %r.val, ptr addrspace(1) %r
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_kernel void @mad_f16_imm_c(
|
|
; GFX8-LABEL: mad_f16_imm_c:
|
|
; GFX8: ; %bb.0:
|
|
; GFX8: v_mov_b32_e32 v0, s2
|
|
; GFX8: v_mov_b32_e32 v1, s3
|
|
; GFX8: v_mov_b32_e32 v2, s4
|
|
; GFX8: v_mov_b32_e32 v3, s5
|
|
; GFX8: v_mov_b32_e32 v0, s0
|
|
; GFX8: v_mov_b32_e32 v1, s1
|
|
; GFX8: v_madak_f16 v2, v4, v2, 0x4800
|
|
; GFX8: s_endpgm
|
|
;
|
|
; GFX9-SDAG-LABEL: mad_f16_imm_c:
|
|
; GFX9-SDAG: ; %bb.0:
|
|
; GFX9-SDAG: v_mov_b32_e32 v0, 0
|
|
; GFX9-SDAG: v_madak_f16 v1, v1, v2, 0x4800
|
|
; GFX9-SDAG: s_endpgm
|
|
;
|
|
; GFX9-GISEL-LABEL: mad_f16_imm_c:
|
|
; GFX9-GISEL: ; %bb.0:
|
|
; GFX9-GISEL: v_mov_b32_e32 v0, 0
|
|
; GFX9-GISEL: v_readfirstlane_b32 s2, v1
|
|
; GFX9-GISEL: v_readfirstlane_b32 s3, v2
|
|
; GFX9-GISEL: v_mov_b32_e32 v1, s3
|
|
; GFX9-GISEL: v_mov_b32_e32 v2, 0x4800
|
|
; GFX9-GISEL: v_mac_f16_e32 v2, s2, v1
|
|
; GFX9-GISEL: s_endpgm
|
|
ptr addrspace(1) %r,
|
|
ptr addrspace(1) %a,
|
|
ptr addrspace(1) %b) {
|
|
%a.val = load half, ptr addrspace(1) %a
|
|
%b.val = load half, ptr addrspace(1) %b
|
|
%r.val = call half @llvm.amdgcn.fmad.ftz.f16(half %a.val, half %b.val, half 8.0)
|
|
store half %r.val, ptr addrspace(1) %r
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_kernel void @mad_f16_neg_b(
|
|
; GFX8-LABEL: mad_f16_neg_b:
|
|
; GFX8: ; %bb.0:
|
|
; GFX8: v_mov_b32_e32 v0, s2
|
|
; GFX8: v_mov_b32_e32 v1, s3
|
|
; GFX8: v_mov_b32_e32 v2, s4
|
|
; GFX8: v_mov_b32_e32 v3, s5
|
|
; GFX8: v_mov_b32_e32 v4, s6
|
|
; GFX8: v_mov_b32_e32 v5, s7
|
|
; GFX8: v_mov_b32_e32 v0, s0
|
|
; GFX8: v_mov_b32_e32 v1, s1
|
|
; GFX8: v_mad_f16 v2, v6, -v2, v3
|
|
; GFX8: s_endpgm
|
|
;
|
|
; GFX9-SDAG-LABEL: mad_f16_neg_b:
|
|
; GFX9-SDAG: ; %bb.0:
|
|
; GFX9-SDAG: v_mov_b32_e32 v0, 0
|
|
; GFX9-SDAG: v_mad_legacy_f16 v1, v1, -v2, v3
|
|
; GFX9-SDAG: s_endpgm
|
|
;
|
|
; GFX9-GISEL-LABEL: mad_f16_neg_b:
|
|
; GFX9-GISEL: ; %bb.0:
|
|
; GFX9-GISEL: v_mov_b32_e32 v0, 0
|
|
; GFX9-GISEL: v_readfirstlane_b32 s0, v1
|
|
; GFX9-GISEL: v_readfirstlane_b32 s1, v2
|
|
; GFX9-GISEL: v_readfirstlane_b32 s2, v3
|
|
; GFX9-GISEL: v_max_f16_e64 v1, s1, s1
|
|
; GFX9-GISEL: v_mov_b32_e32 v2, s2
|
|
; GFX9-GISEL: v_mad_legacy_f16 v1, s0, -v1, v2
|
|
; GFX9-GISEL: s_endpgm
|
|
ptr addrspace(1) %r,
|
|
ptr addrspace(1) %a,
|
|
ptr addrspace(1) %b,
|
|
ptr addrspace(1) %c) {
|
|
%a.val = load half, ptr addrspace(1) %a
|
|
%b.val = load half, ptr addrspace(1) %b
|
|
%c.val = load half, ptr addrspace(1) %c
|
|
%neg.b = fsub half -0.0, %b.val
|
|
%r.val = call half @llvm.amdgcn.fmad.ftz.f16(half %a.val, half %neg.b, half %c.val)
|
|
store half %r.val, ptr addrspace(1) %r
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_kernel void @mad_f16_abs_b(
|
|
; GFX8-LABEL: mad_f16_abs_b:
|
|
; GFX8: ; %bb.0:
|
|
; GFX8: v_mov_b32_e32 v0, s2
|
|
; GFX8: v_mov_b32_e32 v1, s3
|
|
; GFX8: v_mov_b32_e32 v2, s4
|
|
; GFX8: v_mov_b32_e32 v3, s5
|
|
; GFX8: v_mov_b32_e32 v4, s6
|
|
; GFX8: v_mov_b32_e32 v5, s7
|
|
; GFX8: v_mov_b32_e32 v0, s0
|
|
; GFX8: v_mov_b32_e32 v1, s1
|
|
; GFX8: v_mad_f16 v2, v6, |v2|, v3
|
|
; GFX8: s_endpgm
|
|
;
|
|
; GFX9-SDAG-LABEL: mad_f16_abs_b:
|
|
; GFX9-SDAG: ; %bb.0:
|
|
; GFX9-SDAG: v_mov_b32_e32 v0, 0
|
|
; GFX9-SDAG: v_mad_legacy_f16 v1, v1, |v2|, v3
|
|
; GFX9-SDAG: s_endpgm
|
|
;
|
|
; GFX9-GISEL-LABEL: mad_f16_abs_b:
|
|
; GFX9-GISEL: ; %bb.0:
|
|
; GFX9-GISEL: v_mov_b32_e32 v0, 0
|
|
; GFX9-GISEL: v_readfirstlane_b32 s0, v1
|
|
; GFX9-GISEL: v_readfirstlane_b32 s1, v2
|
|
; GFX9-GISEL: v_readfirstlane_b32 s2, v3
|
|
; GFX9-GISEL: v_mov_b32_e32 v1, s0
|
|
; GFX9-GISEL: v_mov_b32_e32 v2, s2
|
|
; GFX9-GISEL: v_mad_legacy_f16 v1, v1, |s1|, v2
|
|
; GFX9-GISEL: s_endpgm
|
|
ptr addrspace(1) %r,
|
|
ptr addrspace(1) %a,
|
|
ptr addrspace(1) %b,
|
|
ptr addrspace(1) %c) {
|
|
%a.val = load half, ptr addrspace(1) %a
|
|
%b.val = load half, ptr addrspace(1) %b
|
|
%c.val = load half, ptr addrspace(1) %c
|
|
%abs.b = call half @llvm.fabs.f16(half %b.val)
|
|
%r.val = call half @llvm.amdgcn.fmad.ftz.f16(half %a.val, half %abs.b, half %c.val)
|
|
store half %r.val, ptr addrspace(1) %r
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_kernel void @mad_f16_neg_abs_b(
|
|
; GFX8-LABEL: mad_f16_neg_abs_b:
|
|
; GFX8: ; %bb.0:
|
|
; GFX8: v_mov_b32_e32 v0, s2
|
|
; GFX8: v_mov_b32_e32 v1, s3
|
|
; GFX8: v_mov_b32_e32 v2, s4
|
|
; GFX8: v_mov_b32_e32 v3, s5
|
|
; GFX8: v_mov_b32_e32 v4, s6
|
|
; GFX8: v_mov_b32_e32 v5, s7
|
|
; GFX8: v_mov_b32_e32 v0, s0
|
|
; GFX8: v_mov_b32_e32 v1, s1
|
|
; GFX8: v_mad_f16 v2, v6, -|v2|, v3
|
|
; GFX8: s_endpgm
|
|
;
|
|
; GFX9-SDAG-LABEL: mad_f16_neg_abs_b:
|
|
; GFX9-SDAG: ; %bb.0:
|
|
; GFX9-SDAG: v_mov_b32_e32 v0, 0
|
|
; GFX9-SDAG: v_mad_legacy_f16 v1, v1, -|v2|, v3
|
|
; GFX9-SDAG: s_endpgm
|
|
;
|
|
; GFX9-GISEL-LABEL: mad_f16_neg_abs_b:
|
|
; GFX9-GISEL: ; %bb.0:
|
|
; GFX9-GISEL: v_mov_b32_e32 v0, 0
|
|
; GFX9-GISEL: v_readfirstlane_b32 s0, v1
|
|
; GFX9-GISEL: v_readfirstlane_b32 s1, v2
|
|
; GFX9-GISEL: v_readfirstlane_b32 s2, v3
|
|
; GFX9-GISEL: v_max_f16_e64 v1, |s1|, |s1|
|
|
; GFX9-GISEL: v_mov_b32_e32 v2, s2
|
|
; GFX9-GISEL: v_mad_legacy_f16 v1, s0, -v1, v2
|
|
; GFX9-GISEL: s_endpgm
|
|
ptr addrspace(1) %r,
|
|
ptr addrspace(1) %a,
|
|
ptr addrspace(1) %b,
|
|
ptr addrspace(1) %c) {
|
|
%a.val = load half, ptr addrspace(1) %a
|
|
%b.val = load half, ptr addrspace(1) %b
|
|
%c.val = load half, ptr addrspace(1) %c
|
|
%abs.b = call half @llvm.fabs.f16(half %b.val)
|
|
%neg.abs.b = fsub half -0.0, %abs.b
|
|
%r.val = call half @llvm.amdgcn.fmad.ftz.f16(half %a.val, half %neg.abs.b, half %c.val)
|
|
store half %r.val, ptr addrspace(1) %r
|
|
ret void
|
|
}
|
|
|
|
declare half @llvm.fabs.f16(half)
|