346 lines
11 KiB
LLVM
346 lines
11 KiB
LLVM
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --filter-out "store" --filter-out "load" --filter-out "wait"
|
|
; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=tahiti -denormal-fp-math-f32=preserve-sign < %s | FileCheck -check-prefixes=GFX6 %s
|
|
; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=tonga -denormal-fp-math-f32=preserve-sign < %s | FileCheck -check-prefixes=GFX8 %s
|
|
; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=tonga -denormal-fp-math-f32=ieee < %s | FileCheck -check-prefixes=GFX8 %s
|
|
; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx900 -denormal-fp-math-f32=ieee < %s | FileCheck -check-prefixes=GFX9-SDAG %s
|
|
; RUN: llc -global-isel=1 -new-reg-bank-select -mtriple=amdgcn -mcpu=gfx900 -denormal-fp-math-f32=ieee < %s | FileCheck -check-prefixes=GFX9-GISEL %s
|
|
|
|
declare float @llvm.amdgcn.fmad.ftz.f32(float %a, float %b, float %c)
|
|
|
|
define amdgpu_kernel void @mad_f32(
|
|
; GFX6-LABEL: mad_f32:
|
|
; GFX6: ; %bb.0:
|
|
; GFX6: s_mov_b32 s11, 0xf000
|
|
; GFX6: s_mov_b32 s10, -1
|
|
; GFX6: s_mov_b32 s8, s0
|
|
; GFX6: s_mov_b32 s9, s1
|
|
; GFX6: v_mov_b32_e32 v0, s6
|
|
; GFX6: v_mov_b32_e32 v1, s4
|
|
; GFX6: v_mac_f32_e32 v0, s2, v1
|
|
; GFX6: s_endpgm
|
|
;
|
|
; GFX8-LABEL: mad_f32:
|
|
; GFX8: ; %bb.0:
|
|
; GFX8: v_mov_b32_e32 v0, s0
|
|
; GFX8: v_mov_b32_e32 v1, s1
|
|
; GFX8: v_mov_b32_e32 v2, s6
|
|
; GFX8: v_mov_b32_e32 v3, s4
|
|
; GFX8: v_mac_f32_e32 v2, s2, v3
|
|
; GFX8: s_endpgm
|
|
;
|
|
; GFX9-SDAG-LABEL: mad_f32:
|
|
; GFX9-SDAG: ; %bb.0:
|
|
; GFX9-SDAG: v_mov_b32_e32 v0, 0
|
|
; GFX9-SDAG: v_mov_b32_e32 v1, s0
|
|
; GFX9-SDAG: v_mov_b32_e32 v2, s1
|
|
; GFX9-SDAG: v_mac_f32_e32 v1, s2, v2
|
|
; GFX9-SDAG: s_endpgm
|
|
;
|
|
; GFX9-GISEL-LABEL: mad_f32:
|
|
; GFX9-GISEL: ; %bb.0:
|
|
; GFX9-GISEL: v_mov_b32_e32 v0, s0
|
|
; GFX9-GISEL: v_mov_b32_e32 v1, s1
|
|
; GFX9-GISEL: v_mac_f32_e32 v1, s2, v0
|
|
; GFX9-GISEL: v_mov_b32_e32 v0, 0
|
|
; GFX9-GISEL: s_endpgm
|
|
ptr addrspace(1) %r,
|
|
ptr addrspace(1) %a,
|
|
ptr addrspace(1) %b,
|
|
ptr addrspace(1) %c) {
|
|
%a.val = load float, ptr addrspace(1) %a
|
|
%b.val = load float, ptr addrspace(1) %b
|
|
%c.val = load float, ptr addrspace(1) %c
|
|
%r.val = call float @llvm.amdgcn.fmad.ftz.f32(float %a.val, float %b.val, float %c.val)
|
|
store float %r.val, ptr addrspace(1) %r
|
|
ret void
|
|
}
|
|
|
|
; TODO: GlobalISel should also fold the immediate
|
|
define amdgpu_kernel void @mad_f32_imm_a(
|
|
; GFX6-LABEL: mad_f32_imm_a:
|
|
; GFX6: ; %bb.0:
|
|
; GFX6: s_mov_b32 s7, 0xf000
|
|
; GFX6: s_mov_b32 s6, -1
|
|
; GFX6: s_mov_b32 s4, s0
|
|
; GFX6: s_mov_b32 s5, s1
|
|
; GFX6: v_mov_b32_e32 v0, s8
|
|
; GFX6: v_mov_b32_e32 v1, s2
|
|
; GFX6: v_madmk_f32 v0, v1, 0x41000000, v0
|
|
; GFX6: s_endpgm
|
|
;
|
|
; GFX8-LABEL: mad_f32_imm_a:
|
|
; GFX8: ; %bb.0:
|
|
; GFX8: v_mov_b32_e32 v0, s0
|
|
; GFX8: v_mov_b32_e32 v1, s1
|
|
; GFX8: v_mov_b32_e32 v2, s4
|
|
; GFX8: v_mov_b32_e32 v3, s2
|
|
; GFX8: v_madmk_f32 v2, v3, 0x41000000, v2
|
|
; GFX8: s_endpgm
|
|
;
|
|
; GFX9-SDAG-LABEL: mad_f32_imm_a:
|
|
; GFX9-SDAG: ; %bb.0:
|
|
; GFX9-SDAG: v_mov_b32_e32 v0, 0
|
|
; GFX9-SDAG: v_mov_b32_e32 v1, s4
|
|
; GFX9-SDAG: v_mov_b32_e32 v2, s5
|
|
; GFX9-SDAG: v_madmk_f32 v1, v2, 0x41000000, v1
|
|
; GFX9-SDAG: s_endpgm
|
|
;
|
|
; GFX9-GISEL-LABEL: mad_f32_imm_a:
|
|
; GFX9-GISEL: ; %bb.0:
|
|
; GFX9-GISEL: v_mov_b32_e32 v0, 0x41000000
|
|
; GFX9-GISEL: v_mov_b32_e32 v1, s4
|
|
; GFX9-GISEL: v_mac_f32_e32 v1, s5, v0
|
|
; GFX9-GISEL: v_mov_b32_e32 v0, 0
|
|
; GFX9-GISEL: s_endpgm
|
|
ptr addrspace(1) %r,
|
|
ptr addrspace(1) %b,
|
|
ptr addrspace(1) %c) {
|
|
%b.val = load float, ptr addrspace(1) %b
|
|
%c.val = load float, ptr addrspace(1) %c
|
|
%r.val = call float @llvm.amdgcn.fmad.ftz.f32(float 8.0, float %b.val, float %c.val)
|
|
store float %r.val, ptr addrspace(1) %r
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_kernel void @mad_f32_imm_b(
|
|
; GFX6-LABEL: mad_f32_imm_b:
|
|
; GFX6: ; %bb.0:
|
|
; GFX6: v_mov_b32_e32 v1, 0x41000000
|
|
; GFX6: s_mov_b32 s7, 0xf000
|
|
; GFX6: s_mov_b32 s6, -1
|
|
; GFX6: s_mov_b32 s4, s0
|
|
; GFX6: s_mov_b32 s5, s1
|
|
; GFX6: v_mov_b32_e32 v0, s8
|
|
; GFX6: v_mac_f32_e32 v0, s2, v1
|
|
; GFX6: s_endpgm
|
|
;
|
|
; GFX8-LABEL: mad_f32_imm_b:
|
|
; GFX8: ; %bb.0:
|
|
; GFX8: v_mov_b32_e32 v3, 0x41000000
|
|
; GFX8: v_mov_b32_e32 v0, s0
|
|
; GFX8: v_mov_b32_e32 v1, s1
|
|
; GFX8: v_mov_b32_e32 v2, s4
|
|
; GFX8: v_mac_f32_e32 v2, s2, v3
|
|
; GFX8: s_endpgm
|
|
;
|
|
; GFX9-SDAG-LABEL: mad_f32_imm_b:
|
|
; GFX9-SDAG: ; %bb.0:
|
|
; GFX9-SDAG: v_mov_b32_e32 v2, 0x41000000
|
|
; GFX9-SDAG: v_mov_b32_e32 v0, 0
|
|
; GFX9-SDAG: v_mov_b32_e32 v1, s4
|
|
; GFX9-SDAG: v_mac_f32_e32 v1, s5, v2
|
|
; GFX9-SDAG: s_endpgm
|
|
;
|
|
; GFX9-GISEL-LABEL: mad_f32_imm_b:
|
|
; GFX9-GISEL: ; %bb.0:
|
|
; GFX9-GISEL: v_mov_b32_e32 v0, 0x41000000
|
|
; GFX9-GISEL: v_mov_b32_e32 v1, s4
|
|
; GFX9-GISEL: v_mac_f32_e32 v1, s5, v0
|
|
; GFX9-GISEL: v_mov_b32_e32 v0, 0
|
|
; GFX9-GISEL: s_endpgm
|
|
ptr addrspace(1) %r,
|
|
ptr addrspace(1) %a,
|
|
ptr addrspace(1) %c) {
|
|
%a.val = load float, ptr addrspace(1) %a
|
|
%c.val = load float, ptr addrspace(1) %c
|
|
%r.val = call float @llvm.amdgcn.fmad.ftz.f32(float %a.val, float 8.0, float %c.val)
|
|
store float %r.val, ptr addrspace(1) %r
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_kernel void @mad_f32_imm_c(
|
|
; GFX6-LABEL: mad_f32_imm_c:
|
|
; GFX6: ; %bb.0:
|
|
; GFX6: v_mov_b32_e32 v0, 0x41000000
|
|
; GFX6: s_mov_b32 s7, 0xf000
|
|
; GFX6: s_mov_b32 s6, -1
|
|
; GFX6: s_mov_b32 s4, s0
|
|
; GFX6: s_mov_b32 s5, s1
|
|
; GFX6: v_mov_b32_e32 v1, s8
|
|
; GFX6: v_mac_f32_e32 v0, s2, v1
|
|
; GFX6: s_endpgm
|
|
;
|
|
; GFX8-LABEL: mad_f32_imm_c:
|
|
; GFX8: ; %bb.0:
|
|
; GFX8: v_mov_b32_e32 v2, 0x41000000
|
|
; GFX8: v_mov_b32_e32 v0, s0
|
|
; GFX8: v_mov_b32_e32 v1, s1
|
|
; GFX8: v_mov_b32_e32 v3, s4
|
|
; GFX8: v_mac_f32_e32 v2, s2, v3
|
|
; GFX8: s_endpgm
|
|
;
|
|
; GFX9-SDAG-LABEL: mad_f32_imm_c:
|
|
; GFX9-SDAG: ; %bb.0:
|
|
; GFX9-SDAG: v_mov_b32_e32 v1, 0x41000000
|
|
; GFX9-SDAG: v_mov_b32_e32 v0, 0
|
|
; GFX9-SDAG: v_mov_b32_e32 v2, s4
|
|
; GFX9-SDAG: v_mac_f32_e32 v1, s5, v2
|
|
; GFX9-SDAG: s_endpgm
|
|
;
|
|
; GFX9-GISEL-LABEL: mad_f32_imm_c:
|
|
; GFX9-GISEL: ; %bb.0:
|
|
; GFX9-GISEL: v_mov_b32_e32 v0, 0x41000000
|
|
; GFX9-GISEL: v_mov_b32_e32 v1, s4
|
|
; GFX9-GISEL: v_mac_f32_e32 v0, s5, v1
|
|
; GFX9-GISEL: v_mov_b32_e32 v1, 0
|
|
; GFX9-GISEL: s_endpgm
|
|
ptr addrspace(1) %r,
|
|
ptr addrspace(1) %a,
|
|
ptr addrspace(1) %b) {
|
|
%a.val = load float, ptr addrspace(1) %a
|
|
%b.val = load float, ptr addrspace(1) %b
|
|
%r.val = call float @llvm.amdgcn.fmad.ftz.f32(float %a.val, float %b.val, float 8.0)
|
|
store float %r.val, ptr addrspace(1) %r
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_kernel void @mad_f32_neg_b(
|
|
; GFX6-LABEL: mad_f32_neg_b:
|
|
; GFX6: ; %bb.0:
|
|
; GFX6: s_mov_b32 s11, 0xf000
|
|
; GFX6: s_mov_b32 s10, -1
|
|
; GFX6: s_mov_b32 s8, s0
|
|
; GFX6: s_mov_b32 s9, s1
|
|
; GFX6: v_mov_b32_e32 v0, s4
|
|
; GFX6: v_mov_b32_e32 v1, s5
|
|
; GFX6: v_mad_f32 v0, s2, -v0, v1
|
|
; GFX6: s_endpgm
|
|
;
|
|
; GFX8-LABEL: mad_f32_neg_b:
|
|
; GFX8: ; %bb.0:
|
|
; GFX8: v_mov_b32_e32 v0, s0
|
|
; GFX8: v_mov_b32_e32 v1, s1
|
|
; GFX8: v_mov_b32_e32 v2, s4
|
|
; GFX8: v_mov_b32_e32 v3, s5
|
|
; GFX8: v_mad_f32 v2, s2, -v2, v3
|
|
; GFX8: s_endpgm
|
|
;
|
|
; GFX9-SDAG-LABEL: mad_f32_neg_b:
|
|
; GFX9-SDAG: ; %bb.0:
|
|
; GFX9-SDAG: v_mov_b32_e32 v0, 0
|
|
; GFX9-SDAG: v_mov_b32_e32 v1, s0
|
|
; GFX9-SDAG: v_mov_b32_e32 v2, s1
|
|
; GFX9-SDAG: v_mad_f32 v1, s2, -v1, v2
|
|
; GFX9-SDAG: s_endpgm
|
|
;
|
|
; GFX9-GISEL-LABEL: mad_f32_neg_b:
|
|
; GFX9-GISEL: ; %bb.0:
|
|
; GFX9-GISEL: v_mov_b32_e32 v0, s0
|
|
; GFX9-GISEL: v_mov_b32_e32 v1, s1
|
|
; GFX9-GISEL: v_mad_f32 v0, v0, -s2, v1
|
|
; GFX9-GISEL: v_mov_b32_e32 v1, 0
|
|
; GFX9-GISEL: s_endpgm
|
|
ptr addrspace(1) %r,
|
|
ptr addrspace(1) %a,
|
|
ptr addrspace(1) %b,
|
|
ptr addrspace(1) %c) {
|
|
%a.val = load float, ptr addrspace(1) %a
|
|
%b.val = load float, ptr addrspace(1) %b
|
|
%c.val = load float, ptr addrspace(1) %c
|
|
%neg.b = fneg float %b.val
|
|
%r.val = call float @llvm.amdgcn.fmad.ftz.f32(float %a.val, float %neg.b, float %c.val)
|
|
store float %r.val, ptr addrspace(1) %r
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_kernel void @mad_f32_abs_b(
|
|
; GFX6-LABEL: mad_f32_abs_b:
|
|
; GFX6: ; %bb.0:
|
|
; GFX6: s_mov_b32 s11, 0xf000
|
|
; GFX6: s_mov_b32 s10, -1
|
|
; GFX6: s_mov_b32 s8, s0
|
|
; GFX6: s_mov_b32 s9, s1
|
|
; GFX6: v_mov_b32_e32 v0, s4
|
|
; GFX6: v_mov_b32_e32 v1, s5
|
|
; GFX6: v_mad_f32 v0, s2, |v0|, v1
|
|
; GFX6: s_endpgm
|
|
;
|
|
; GFX8-LABEL: mad_f32_abs_b:
|
|
; GFX8: ; %bb.0:
|
|
; GFX8: v_mov_b32_e32 v0, s0
|
|
; GFX8: v_mov_b32_e32 v1, s1
|
|
; GFX8: v_mov_b32_e32 v2, s4
|
|
; GFX8: v_mov_b32_e32 v3, s5
|
|
; GFX8: v_mad_f32 v2, s2, |v2|, v3
|
|
; GFX8: s_endpgm
|
|
;
|
|
; GFX9-SDAG-LABEL: mad_f32_abs_b:
|
|
; GFX9-SDAG: ; %bb.0:
|
|
; GFX9-SDAG: v_mov_b32_e32 v0, 0
|
|
; GFX9-SDAG: v_mov_b32_e32 v1, s0
|
|
; GFX9-SDAG: v_mov_b32_e32 v2, s1
|
|
; GFX9-SDAG: v_mad_f32 v1, s2, |v1|, v2
|
|
; GFX9-SDAG: s_endpgm
|
|
;
|
|
; GFX9-GISEL-LABEL: mad_f32_abs_b:
|
|
; GFX9-GISEL: ; %bb.0:
|
|
; GFX9-GISEL: v_mov_b32_e32 v0, s0
|
|
; GFX9-GISEL: v_mov_b32_e32 v1, s1
|
|
; GFX9-GISEL: v_mad_f32 v0, v0, |s2|, v1
|
|
; GFX9-GISEL: v_mov_b32_e32 v1, 0
|
|
; GFX9-GISEL: s_endpgm
|
|
ptr addrspace(1) %r,
|
|
ptr addrspace(1) %a,
|
|
ptr addrspace(1) %b,
|
|
ptr addrspace(1) %c) {
|
|
%a.val = load float, ptr addrspace(1) %a
|
|
%b.val = load float, ptr addrspace(1) %b
|
|
%c.val = load float, ptr addrspace(1) %c
|
|
%abs.b = call float @llvm.fabs.f32(float %b.val)
|
|
%r.val = call float @llvm.amdgcn.fmad.ftz.f32(float %a.val, float %abs.b, float %c.val)
|
|
store float %r.val, ptr addrspace(1) %r
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_kernel void @mad_f32_neg_abs_b(
|
|
; GFX6-LABEL: mad_f32_neg_abs_b:
|
|
; GFX6: ; %bb.0:
|
|
; GFX6: s_mov_b32 s11, 0xf000
|
|
; GFX6: s_mov_b32 s10, -1
|
|
; GFX6: s_mov_b32 s8, s0
|
|
; GFX6: s_mov_b32 s9, s1
|
|
; GFX6: v_mov_b32_e32 v0, s4
|
|
; GFX6: v_mov_b32_e32 v1, s5
|
|
; GFX6: v_mad_f32 v0, s2, -|v0|, v1
|
|
; GFX6: s_endpgm
|
|
;
|
|
; GFX8-LABEL: mad_f32_neg_abs_b:
|
|
; GFX8: ; %bb.0:
|
|
; GFX8: v_mov_b32_e32 v0, s0
|
|
; GFX8: v_mov_b32_e32 v1, s1
|
|
; GFX8: v_mov_b32_e32 v2, s4
|
|
; GFX8: v_mov_b32_e32 v3, s5
|
|
; GFX8: v_mad_f32 v2, s2, -|v2|, v3
|
|
; GFX8: s_endpgm
|
|
;
|
|
; GFX9-SDAG-LABEL: mad_f32_neg_abs_b:
|
|
; GFX9-SDAG: ; %bb.0:
|
|
; GFX9-SDAG: v_mov_b32_e32 v0, 0
|
|
; GFX9-SDAG: v_mov_b32_e32 v1, s0
|
|
; GFX9-SDAG: v_mov_b32_e32 v2, s1
|
|
; GFX9-SDAG: v_mad_f32 v1, s2, -|v1|, v2
|
|
; GFX9-SDAG: s_endpgm
|
|
;
|
|
; GFX9-GISEL-LABEL: mad_f32_neg_abs_b:
|
|
; GFX9-GISEL: ; %bb.0:
|
|
; GFX9-GISEL: v_mov_b32_e32 v0, s0
|
|
; GFX9-GISEL: v_mov_b32_e32 v1, s1
|
|
; GFX9-GISEL: v_mad_f32 v0, v0, -|s2|, v1
|
|
; GFX9-GISEL: v_mov_b32_e32 v1, 0
|
|
; GFX9-GISEL: s_endpgm
|
|
ptr addrspace(1) %r,
|
|
ptr addrspace(1) %a,
|
|
ptr addrspace(1) %b,
|
|
ptr addrspace(1) %c) {
|
|
%a.val = load float, ptr addrspace(1) %a
|
|
%b.val = load float, ptr addrspace(1) %b
|
|
%c.val = load float, ptr addrspace(1) %c
|
|
%abs.b = call float @llvm.fabs.f32(float %b.val)
|
|
%neg.abs.b = fneg float %abs.b
|
|
%r.val = call float @llvm.amdgcn.fmad.ftz.f32(float %a.val, float %neg.abs.b, float %c.val)
|
|
store float %r.val, ptr addrspace(1) %r
|
|
ret void
|
|
}
|
|
|
|
declare float @llvm.fabs.f32(float)
|