The main improvement is to the mfma tests. There are some mild regressions scattered around, and a few major ones. The worst regressions are in some of the bitcast tests; these are cases where the SGPR argument list runs out and uses VGPRs, and the copies-from-VGPR are misidentified as divergent. Most of the shufflevector tests are also regressions. These end up with cleaner MIR, but then get poor regalloc decisions.
592 lines
22 KiB
LLVM
592 lines
22 KiB
LLVM
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
|
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=hawaii < %s | FileCheck -enable-var-scope -check-prefixes=GCN,CI %s
|
|
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=fiji < %s | FileCheck -enable-var-scope -check-prefixes=GCN,VI %s
|
|
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx942 < %s | FileCheck -enable-var-scope -check-prefixes=GFX942 %s
|
|
|
|
declare i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
|
|
|
|
define amdgpu_kernel void @sint_to_fp_i32_to_f64(ptr addrspace(1) %out, i32 %in) {
|
|
; CI-LABEL: sint_to_fp_i32_to_f64:
|
|
; CI: ; %bb.0:
|
|
; CI-NEXT: s_load_dword s2, s[8:9], 0x2
|
|
; CI-NEXT: s_load_dwordx2 s[0:1], s[8:9], 0x0
|
|
; CI-NEXT: s_add_i32 s12, s12, s17
|
|
; CI-NEXT: s_mov_b32 flat_scratch_lo, s13
|
|
; CI-NEXT: s_lshr_b32 flat_scratch_hi, s12, 8
|
|
; CI-NEXT: s_waitcnt lgkmcnt(0)
|
|
; CI-NEXT: v_cvt_f64_i32_e32 v[0:1], s2
|
|
; CI-NEXT: v_mov_b32_e32 v3, s1
|
|
; CI-NEXT: v_mov_b32_e32 v2, s0
|
|
; CI-NEXT: flat_store_dwordx2 v[2:3], v[0:1]
|
|
; CI-NEXT: s_endpgm
|
|
;
|
|
; VI-LABEL: sint_to_fp_i32_to_f64:
|
|
; VI: ; %bb.0:
|
|
; VI-NEXT: s_load_dword s2, s[8:9], 0x8
|
|
; VI-NEXT: s_load_dwordx2 s[0:1], s[8:9], 0x0
|
|
; VI-NEXT: s_add_i32 s12, s12, s17
|
|
; VI-NEXT: s_mov_b32 flat_scratch_lo, s13
|
|
; VI-NEXT: s_lshr_b32 flat_scratch_hi, s12, 8
|
|
; VI-NEXT: s_waitcnt lgkmcnt(0)
|
|
; VI-NEXT: v_cvt_f64_i32_e32 v[0:1], s2
|
|
; VI-NEXT: v_mov_b32_e32 v3, s1
|
|
; VI-NEXT: v_mov_b32_e32 v2, s0
|
|
; VI-NEXT: flat_store_dwordx2 v[2:3], v[0:1]
|
|
; VI-NEXT: s_endpgm
|
|
;
|
|
; GFX942-LABEL: sint_to_fp_i32_to_f64:
|
|
; GFX942: ; %bb.0:
|
|
; GFX942-NEXT: s_load_dword s2, s[4:5], 0x8
|
|
; GFX942-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
|
|
; GFX942-NEXT: v_mov_b32_e32 v2, 0
|
|
; GFX942-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX942-NEXT: v_cvt_f64_i32_e32 v[0:1], s2
|
|
; GFX942-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1]
|
|
; GFX942-NEXT: s_endpgm
|
|
%result = sitofp i32 %in to double
|
|
store double %result, ptr addrspace(1) %out
|
|
ret void
|
|
}
|
|
|
|
; We can't fold the SGPRs into v_cndmask_b32_e64, because it already
|
|
; uses an SGPR (implicit vcc).
|
|
define amdgpu_kernel void @sint_to_fp_i1_f64(ptr addrspace(1) %out, i32 %in) {
|
|
; CI-LABEL: sint_to_fp_i1_f64:
|
|
; CI: ; %bb.0:
|
|
; CI-NEXT: s_load_dword s2, s[8:9], 0x2
|
|
; CI-NEXT: s_load_dwordx2 s[0:1], s[8:9], 0x0
|
|
; CI-NEXT: s_add_i32 s12, s12, s17
|
|
; CI-NEXT: s_lshr_b32 flat_scratch_hi, s12, 8
|
|
; CI-NEXT: s_mov_b32 flat_scratch_lo, s13
|
|
; CI-NEXT: s_waitcnt lgkmcnt(0)
|
|
; CI-NEXT: s_cmp_eq_u32 s2, 0
|
|
; CI-NEXT: s_cselect_b32 s2, 0xbff00000, 0
|
|
; CI-NEXT: v_mov_b32_e32 v3, s1
|
|
; CI-NEXT: v_mov_b32_e32 v0, 0
|
|
; CI-NEXT: v_mov_b32_e32 v1, s2
|
|
; CI-NEXT: v_mov_b32_e32 v2, s0
|
|
; CI-NEXT: flat_store_dwordx2 v[2:3], v[0:1]
|
|
; CI-NEXT: s_endpgm
|
|
;
|
|
; VI-LABEL: sint_to_fp_i1_f64:
|
|
; VI: ; %bb.0:
|
|
; VI-NEXT: s_load_dword s2, s[8:9], 0x8
|
|
; VI-NEXT: s_load_dwordx2 s[0:1], s[8:9], 0x0
|
|
; VI-NEXT: s_add_i32 s12, s12, s17
|
|
; VI-NEXT: s_lshr_b32 flat_scratch_hi, s12, 8
|
|
; VI-NEXT: s_mov_b32 flat_scratch_lo, s13
|
|
; VI-NEXT: s_waitcnt lgkmcnt(0)
|
|
; VI-NEXT: s_cmp_eq_u32 s2, 0
|
|
; VI-NEXT: s_cselect_b32 s2, 0xbff00000, 0
|
|
; VI-NEXT: v_mov_b32_e32 v3, s1
|
|
; VI-NEXT: v_mov_b32_e32 v0, 0
|
|
; VI-NEXT: v_mov_b32_e32 v1, s2
|
|
; VI-NEXT: v_mov_b32_e32 v2, s0
|
|
; VI-NEXT: flat_store_dwordx2 v[2:3], v[0:1]
|
|
; VI-NEXT: s_endpgm
|
|
;
|
|
; GFX942-LABEL: sint_to_fp_i1_f64:
|
|
; GFX942: ; %bb.0:
|
|
; GFX942-NEXT: s_load_dword s2, s[4:5], 0x8
|
|
; GFX942-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
|
|
; GFX942-NEXT: v_mov_b32_e32 v0, 0
|
|
; GFX942-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX942-NEXT: s_cmp_eq_u32 s2, 0
|
|
; GFX942-NEXT: s_cselect_b32 s2, 0xbff00000, 0
|
|
; GFX942-NEXT: v_mov_b32_e32 v1, s2
|
|
; GFX942-NEXT: global_store_dwordx2 v0, v[0:1], s[0:1]
|
|
; GFX942-NEXT: s_endpgm
|
|
%cmp = icmp eq i32 %in, 0
|
|
%fp = sitofp i1 %cmp to double
|
|
store double %fp, ptr addrspace(1) %out, align 4
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_kernel void @sint_to_fp_i1_f64_load(ptr addrspace(1) %out, i1 %in) {
|
|
; CI-LABEL: sint_to_fp_i1_f64_load:
|
|
; CI: ; %bb.0:
|
|
; CI-NEXT: s_load_dword s2, s[8:9], 0x2
|
|
; CI-NEXT: s_load_dwordx2 s[0:1], s[8:9], 0x0
|
|
; CI-NEXT: s_add_i32 s12, s12, s17
|
|
; CI-NEXT: s_lshr_b32 flat_scratch_hi, s12, 8
|
|
; CI-NEXT: s_mov_b32 flat_scratch_lo, s13
|
|
; CI-NEXT: s_waitcnt lgkmcnt(0)
|
|
; CI-NEXT: s_bitcmp1_b32 s2, 0
|
|
; CI-NEXT: s_cselect_b64 s[2:3], -1, 0
|
|
; CI-NEXT: v_cndmask_b32_e64 v0, 0, -1, s[2:3]
|
|
; CI-NEXT: v_cvt_f64_i32_e32 v[0:1], v0
|
|
; CI-NEXT: v_mov_b32_e32 v3, s1
|
|
; CI-NEXT: v_mov_b32_e32 v2, s0
|
|
; CI-NEXT: flat_store_dwordx2 v[2:3], v[0:1]
|
|
; CI-NEXT: s_endpgm
|
|
;
|
|
; VI-LABEL: sint_to_fp_i1_f64_load:
|
|
; VI: ; %bb.0:
|
|
; VI-NEXT: s_load_dword s2, s[8:9], 0x8
|
|
; VI-NEXT: s_load_dwordx2 s[0:1], s[8:9], 0x0
|
|
; VI-NEXT: s_add_i32 s12, s12, s17
|
|
; VI-NEXT: s_lshr_b32 flat_scratch_hi, s12, 8
|
|
; VI-NEXT: s_mov_b32 flat_scratch_lo, s13
|
|
; VI-NEXT: s_waitcnt lgkmcnt(0)
|
|
; VI-NEXT: s_bitcmp1_b32 s2, 0
|
|
; VI-NEXT: s_cselect_b64 s[2:3], -1, 0
|
|
; VI-NEXT: v_cndmask_b32_e64 v0, 0, -1, s[2:3]
|
|
; VI-NEXT: v_cvt_f64_i32_e32 v[0:1], v0
|
|
; VI-NEXT: v_mov_b32_e32 v3, s1
|
|
; VI-NEXT: v_mov_b32_e32 v2, s0
|
|
; VI-NEXT: flat_store_dwordx2 v[2:3], v[0:1]
|
|
; VI-NEXT: s_endpgm
|
|
;
|
|
; GFX942-LABEL: sint_to_fp_i1_f64_load:
|
|
; GFX942: ; %bb.0:
|
|
; GFX942-NEXT: s_load_dword s2, s[4:5], 0x8
|
|
; GFX942-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
|
|
; GFX942-NEXT: v_mov_b32_e32 v2, 0
|
|
; GFX942-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX942-NEXT: s_bitcmp1_b32 s2, 0
|
|
; GFX942-NEXT: s_cselect_b64 s[2:3], -1, 0
|
|
; GFX942-NEXT: v_cndmask_b32_e64 v0, 0, -1, s[2:3]
|
|
; GFX942-NEXT: v_cvt_f64_i32_e32 v[0:1], v0
|
|
; GFX942-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1]
|
|
; GFX942-NEXT: s_endpgm
|
|
%fp = sitofp i1 %in to double
|
|
store double %fp, ptr addrspace(1) %out, align 8
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_kernel void @s_sint_to_fp_i64_to_f64(ptr addrspace(1) %out, i64 %in) {
|
|
; CI-LABEL: s_sint_to_fp_i64_to_f64:
|
|
; CI: ; %bb.0:
|
|
; CI-NEXT: s_load_dwordx4 s[0:3], s[8:9], 0x0
|
|
; CI-NEXT: s_add_i32 s12, s12, s17
|
|
; CI-NEXT: s_mov_b32 flat_scratch_lo, s13
|
|
; CI-NEXT: s_lshr_b32 flat_scratch_hi, s12, 8
|
|
; CI-NEXT: s_waitcnt lgkmcnt(0)
|
|
; CI-NEXT: v_cvt_f64_i32_e32 v[0:1], s3
|
|
; CI-NEXT: v_cvt_f64_u32_e32 v[2:3], s2
|
|
; CI-NEXT: v_mov_b32_e32 v4, s0
|
|
; CI-NEXT: v_mov_b32_e32 v5, s1
|
|
; CI-NEXT: v_ldexp_f64 v[0:1], v[0:1], 32
|
|
; CI-NEXT: v_add_f64 v[0:1], v[0:1], v[2:3]
|
|
; CI-NEXT: flat_store_dwordx2 v[4:5], v[0:1]
|
|
; CI-NEXT: s_endpgm
|
|
;
|
|
; VI-LABEL: s_sint_to_fp_i64_to_f64:
|
|
; VI: ; %bb.0:
|
|
; VI-NEXT: s_load_dwordx4 s[0:3], s[8:9], 0x0
|
|
; VI-NEXT: s_add_i32 s12, s12, s17
|
|
; VI-NEXT: s_mov_b32 flat_scratch_lo, s13
|
|
; VI-NEXT: s_lshr_b32 flat_scratch_hi, s12, 8
|
|
; VI-NEXT: s_waitcnt lgkmcnt(0)
|
|
; VI-NEXT: v_cvt_f64_i32_e32 v[0:1], s3
|
|
; VI-NEXT: v_cvt_f64_u32_e32 v[2:3], s2
|
|
; VI-NEXT: v_ldexp_f64 v[0:1], v[0:1], 32
|
|
; VI-NEXT: v_add_f64 v[0:1], v[0:1], v[2:3]
|
|
; VI-NEXT: v_mov_b32_e32 v2, s0
|
|
; VI-NEXT: v_mov_b32_e32 v3, s1
|
|
; VI-NEXT: flat_store_dwordx2 v[2:3], v[0:1]
|
|
; VI-NEXT: s_endpgm
|
|
;
|
|
; GFX942-LABEL: s_sint_to_fp_i64_to_f64:
|
|
; GFX942: ; %bb.0:
|
|
; GFX942-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0
|
|
; GFX942-NEXT: v_mov_b32_e32 v4, 0
|
|
; GFX942-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX942-NEXT: v_cvt_f64_i32_e32 v[0:1], s3
|
|
; GFX942-NEXT: v_ldexp_f64 v[0:1], v[0:1], 32
|
|
; GFX942-NEXT: v_cvt_f64_u32_e32 v[2:3], s2
|
|
; GFX942-NEXT: v_add_f64 v[0:1], v[0:1], v[2:3]
|
|
; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1]
|
|
; GFX942-NEXT: s_endpgm
|
|
%result = sitofp i64 %in to double
|
|
store double %result, ptr addrspace(1) %out
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_kernel void @v_sint_to_fp_i64_to_f64(ptr addrspace(1) %out, ptr addrspace(1) %in) {
|
|
; CI-LABEL: v_sint_to_fp_i64_to_f64:
|
|
; CI: ; %bb.0:
|
|
; CI-NEXT: s_load_dwordx4 s[0:3], s[8:9], 0x0
|
|
; CI-NEXT: v_lshlrev_b32_e32 v0, 3, v0
|
|
; CI-NEXT: s_add_i32 s12, s12, s17
|
|
; CI-NEXT: s_mov_b32 flat_scratch_lo, s13
|
|
; CI-NEXT: s_lshr_b32 flat_scratch_hi, s12, 8
|
|
; CI-NEXT: s_waitcnt lgkmcnt(0)
|
|
; CI-NEXT: v_mov_b32_e32 v1, s3
|
|
; CI-NEXT: v_add_i32_e32 v0, vcc, s2, v0
|
|
; CI-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
|
|
; CI-NEXT: flat_load_dwordx2 v[0:1], v[0:1]
|
|
; CI-NEXT: s_waitcnt vmcnt(0)
|
|
; CI-NEXT: v_cvt_f64_i32_e32 v[1:2], v1
|
|
; CI-NEXT: v_cvt_f64_u32_e32 v[3:4], v0
|
|
; CI-NEXT: v_ldexp_f64 v[0:1], v[1:2], 32
|
|
; CI-NEXT: v_mov_b32_e32 v2, s0
|
|
; CI-NEXT: v_add_f64 v[0:1], v[0:1], v[3:4]
|
|
; CI-NEXT: v_mov_b32_e32 v3, s1
|
|
; CI-NEXT: flat_store_dwordx2 v[2:3], v[0:1]
|
|
; CI-NEXT: s_endpgm
|
|
;
|
|
; VI-LABEL: v_sint_to_fp_i64_to_f64:
|
|
; VI: ; %bb.0:
|
|
; VI-NEXT: s_load_dwordx4 s[0:3], s[8:9], 0x0
|
|
; VI-NEXT: v_lshlrev_b32_e32 v0, 3, v0
|
|
; VI-NEXT: s_add_i32 s12, s12, s17
|
|
; VI-NEXT: s_mov_b32 flat_scratch_lo, s13
|
|
; VI-NEXT: s_lshr_b32 flat_scratch_hi, s12, 8
|
|
; VI-NEXT: s_waitcnt lgkmcnt(0)
|
|
; VI-NEXT: v_mov_b32_e32 v1, s3
|
|
; VI-NEXT: v_add_u32_e32 v0, vcc, s2, v0
|
|
; VI-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
|
|
; VI-NEXT: flat_load_dwordx2 v[0:1], v[0:1]
|
|
; VI-NEXT: s_waitcnt vmcnt(0)
|
|
; VI-NEXT: v_cvt_f64_i32_e32 v[1:2], v1
|
|
; VI-NEXT: v_cvt_f64_u32_e32 v[3:4], v0
|
|
; VI-NEXT: v_ldexp_f64 v[1:2], v[1:2], 32
|
|
; VI-NEXT: v_add_f64 v[0:1], v[1:2], v[3:4]
|
|
; VI-NEXT: v_mov_b32_e32 v2, s0
|
|
; VI-NEXT: v_mov_b32_e32 v3, s1
|
|
; VI-NEXT: flat_store_dwordx2 v[2:3], v[0:1]
|
|
; VI-NEXT: s_endpgm
|
|
;
|
|
; GFX942-LABEL: v_sint_to_fp_i64_to_f64:
|
|
; GFX942: ; %bb.0:
|
|
; GFX942-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0
|
|
; GFX942-NEXT: v_and_b32_e32 v0, 0x3ff, v0
|
|
; GFX942-NEXT: v_lshlrev_b32_e32 v0, 3, v0
|
|
; GFX942-NEXT: v_mov_b32_e32 v4, 0
|
|
; GFX942-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX942-NEXT: global_load_dwordx2 v[0:1], v0, s[2:3]
|
|
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX942-NEXT: v_cvt_f64_i32_e32 v[2:3], v1
|
|
; GFX942-NEXT: v_ldexp_f64 v[2:3], v[2:3], 32
|
|
; GFX942-NEXT: v_cvt_f64_u32_e32 v[0:1], v0
|
|
; GFX942-NEXT: v_add_f64 v[0:1], v[2:3], v[0:1]
|
|
; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1]
|
|
; GFX942-NEXT: s_endpgm
|
|
%tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
|
|
%gep = getelementptr i64, ptr addrspace(1) %in, i32 %tid
|
|
%val = load i64, ptr addrspace(1) %gep, align 8
|
|
%result = sitofp i64 %val to double
|
|
store double %result, ptr addrspace(1) %out
|
|
ret void
|
|
}
|
|
|
|
; FIXME: bfe and sext on VI+
|
|
define amdgpu_kernel void @s_sint_to_fp_i8_to_f64(ptr addrspace(1) %out, i8 %in) {
|
|
; CI-LABEL: s_sint_to_fp_i8_to_f64:
|
|
; CI: ; %bb.0:
|
|
; CI-NEXT: s_load_dword s2, s[8:9], 0x2
|
|
; CI-NEXT: s_load_dwordx2 s[0:1], s[8:9], 0x0
|
|
; CI-NEXT: s_add_i32 s12, s12, s17
|
|
; CI-NEXT: s_mov_b32 flat_scratch_lo, s13
|
|
; CI-NEXT: s_lshr_b32 flat_scratch_hi, s12, 8
|
|
; CI-NEXT: s_waitcnt lgkmcnt(0)
|
|
; CI-NEXT: s_sext_i32_i8 s2, s2
|
|
; CI-NEXT: v_cvt_f64_i32_e32 v[0:1], s2
|
|
; CI-NEXT: v_mov_b32_e32 v3, s1
|
|
; CI-NEXT: v_mov_b32_e32 v2, s0
|
|
; CI-NEXT: flat_store_dwordx2 v[2:3], v[0:1]
|
|
; CI-NEXT: s_endpgm
|
|
;
|
|
; VI-LABEL: s_sint_to_fp_i8_to_f64:
|
|
; VI: ; %bb.0:
|
|
; VI-NEXT: s_load_dword s2, s[8:9], 0x8
|
|
; VI-NEXT: s_load_dwordx2 s[0:1], s[8:9], 0x0
|
|
; VI-NEXT: s_add_i32 s12, s12, s17
|
|
; VI-NEXT: s_mov_b32 flat_scratch_lo, s13
|
|
; VI-NEXT: s_lshr_b32 flat_scratch_hi, s12, 8
|
|
; VI-NEXT: s_waitcnt lgkmcnt(0)
|
|
; VI-NEXT: s_sext_i32_i8 s2, s2
|
|
; VI-NEXT: v_cvt_f64_i32_e32 v[0:1], s2
|
|
; VI-NEXT: v_mov_b32_e32 v3, s1
|
|
; VI-NEXT: v_mov_b32_e32 v2, s0
|
|
; VI-NEXT: flat_store_dwordx2 v[2:3], v[0:1]
|
|
; VI-NEXT: s_endpgm
|
|
;
|
|
; GFX942-LABEL: s_sint_to_fp_i8_to_f64:
|
|
; GFX942: ; %bb.0:
|
|
; GFX942-NEXT: s_load_dword s2, s[4:5], 0x8
|
|
; GFX942-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
|
|
; GFX942-NEXT: v_mov_b32_e32 v2, 0
|
|
; GFX942-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX942-NEXT: s_sext_i32_i8 s2, s2
|
|
; GFX942-NEXT: v_cvt_f64_i32_e32 v[0:1], s2
|
|
; GFX942-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1]
|
|
; GFX942-NEXT: s_endpgm
|
|
%fp = sitofp i8 %in to double
|
|
store double %fp, ptr addrspace(1) %out
|
|
ret void
|
|
}
|
|
|
|
define double @v_sint_to_fp_i8_to_f64(i8 %in) {
|
|
; CI-LABEL: v_sint_to_fp_i8_to_f64:
|
|
; CI: ; %bb.0:
|
|
; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; CI-NEXT: v_bfe_i32 v0, v0, 0, 8
|
|
; CI-NEXT: v_cvt_f64_i32_e32 v[0:1], v0
|
|
; CI-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; VI-LABEL: v_sint_to_fp_i8_to_f64:
|
|
; VI: ; %bb.0:
|
|
; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; VI-NEXT: v_bfe_i32 v0, v0, 0, 8
|
|
; VI-NEXT: v_bfe_i32 v0, v0, 0, 16
|
|
; VI-NEXT: v_cvt_f64_i32_e32 v[0:1], v0
|
|
; VI-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX942-LABEL: v_sint_to_fp_i8_to_f64:
|
|
; GFX942: ; %bb.0:
|
|
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX942-NEXT: v_bfe_i32 v0, v0, 0, 8
|
|
; GFX942-NEXT: v_bfe_i32 v0, v0, 0, 16
|
|
; GFX942-NEXT: v_cvt_f64_i32_e32 v[0:1], v0
|
|
; GFX942-NEXT: s_setpc_b64 s[30:31]
|
|
%fp = sitofp i8 %in to double
|
|
ret double %fp
|
|
}
|
|
|
|
define amdgpu_kernel void @s_select_sint_to_fp_i1_vals_f64(ptr addrspace(1) %out, i32 %in) {
|
|
; CI-LABEL: s_select_sint_to_fp_i1_vals_f64:
|
|
; CI: ; %bb.0:
|
|
; CI-NEXT: s_load_dword s2, s[8:9], 0x2
|
|
; CI-NEXT: s_load_dwordx2 s[0:1], s[8:9], 0x0
|
|
; CI-NEXT: s_add_i32 s12, s12, s17
|
|
; CI-NEXT: s_lshr_b32 flat_scratch_hi, s12, 8
|
|
; CI-NEXT: s_mov_b32 flat_scratch_lo, s13
|
|
; CI-NEXT: s_waitcnt lgkmcnt(0)
|
|
; CI-NEXT: s_cmp_eq_u32 s2, 0
|
|
; CI-NEXT: s_cselect_b32 s2, 0xbff00000, 0
|
|
; CI-NEXT: v_mov_b32_e32 v3, s1
|
|
; CI-NEXT: v_mov_b32_e32 v0, 0
|
|
; CI-NEXT: v_mov_b32_e32 v1, s2
|
|
; CI-NEXT: v_mov_b32_e32 v2, s0
|
|
; CI-NEXT: flat_store_dwordx2 v[2:3], v[0:1]
|
|
; CI-NEXT: s_endpgm
|
|
;
|
|
; VI-LABEL: s_select_sint_to_fp_i1_vals_f64:
|
|
; VI: ; %bb.0:
|
|
; VI-NEXT: s_load_dword s2, s[8:9], 0x8
|
|
; VI-NEXT: s_load_dwordx2 s[0:1], s[8:9], 0x0
|
|
; VI-NEXT: s_add_i32 s12, s12, s17
|
|
; VI-NEXT: s_lshr_b32 flat_scratch_hi, s12, 8
|
|
; VI-NEXT: s_mov_b32 flat_scratch_lo, s13
|
|
; VI-NEXT: s_waitcnt lgkmcnt(0)
|
|
; VI-NEXT: s_cmp_eq_u32 s2, 0
|
|
; VI-NEXT: s_cselect_b32 s2, 0xbff00000, 0
|
|
; VI-NEXT: v_mov_b32_e32 v3, s1
|
|
; VI-NEXT: v_mov_b32_e32 v0, 0
|
|
; VI-NEXT: v_mov_b32_e32 v1, s2
|
|
; VI-NEXT: v_mov_b32_e32 v2, s0
|
|
; VI-NEXT: flat_store_dwordx2 v[2:3], v[0:1]
|
|
; VI-NEXT: s_endpgm
|
|
;
|
|
; GFX942-LABEL: s_select_sint_to_fp_i1_vals_f64:
|
|
; GFX942: ; %bb.0:
|
|
; GFX942-NEXT: s_load_dword s2, s[4:5], 0x8
|
|
; GFX942-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
|
|
; GFX942-NEXT: v_mov_b32_e32 v0, 0
|
|
; GFX942-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX942-NEXT: s_cmp_eq_u32 s2, 0
|
|
; GFX942-NEXT: s_cselect_b32 s2, 0xbff00000, 0
|
|
; GFX942-NEXT: v_mov_b32_e32 v1, s2
|
|
; GFX942-NEXT: global_store_dwordx2 v0, v[0:1], s[0:1]
|
|
; GFX942-NEXT: s_endpgm
|
|
%cmp = icmp eq i32 %in, 0
|
|
%select = select i1 %cmp, double -1.0, double 0.0
|
|
store double %select, ptr addrspace(1) %out, align 8
|
|
ret void
|
|
}
|
|
|
|
define void @v_select_sint_to_fp_i1_vals_f64(ptr addrspace(1) %out, i32 %in) {
|
|
; GCN-LABEL: v_select_sint_to_fp_i1_vals_f64:
|
|
; GCN: ; %bb.0:
|
|
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GCN-NEXT: v_mov_b32_e32 v4, 0xbff00000
|
|
; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 0, v2
|
|
; GCN-NEXT: v_mov_b32_e32 v3, 0
|
|
; GCN-NEXT: v_cndmask_b32_e32 v4, 0, v4, vcc
|
|
; GCN-NEXT: flat_store_dwordx2 v[0:1], v[3:4]
|
|
; GCN-NEXT: s_waitcnt vmcnt(0)
|
|
; GCN-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX942-LABEL: v_select_sint_to_fp_i1_vals_f64:
|
|
; GFX942: ; %bb.0:
|
|
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX942-NEXT: v_mov_b32_e32 v3, 0xbff00000
|
|
; GFX942-NEXT: v_cmp_eq_u32_e32 vcc, 0, v2
|
|
; GFX942-NEXT: v_mov_b32_e32 v4, 0
|
|
; GFX942-NEXT: s_nop 0
|
|
; GFX942-NEXT: v_cndmask_b32_e32 v5, 0, v3, vcc
|
|
; GFX942-NEXT: global_store_dwordx2 v[0:1], v[4:5], off
|
|
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX942-NEXT: s_setpc_b64 s[30:31]
|
|
%cmp = icmp eq i32 %in, 0
|
|
%select = select i1 %cmp, double -1.0, double 0.0
|
|
store double %select, ptr addrspace(1) %out, align 8
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_kernel void @s_select_sint_to_fp_i1_vals_i64(ptr addrspace(1) %out, i32 %in) {
|
|
; CI-LABEL: s_select_sint_to_fp_i1_vals_i64:
|
|
; CI: ; %bb.0:
|
|
; CI-NEXT: s_load_dword s2, s[8:9], 0x2
|
|
; CI-NEXT: s_load_dwordx2 s[0:1], s[8:9], 0x0
|
|
; CI-NEXT: s_add_i32 s12, s12, s17
|
|
; CI-NEXT: s_lshr_b32 flat_scratch_hi, s12, 8
|
|
; CI-NEXT: s_mov_b32 flat_scratch_lo, s13
|
|
; CI-NEXT: s_waitcnt lgkmcnt(0)
|
|
; CI-NEXT: s_cmp_eq_u32 s2, 0
|
|
; CI-NEXT: s_cselect_b32 s2, 0xbff00000, 0
|
|
; CI-NEXT: v_mov_b32_e32 v3, s1
|
|
; CI-NEXT: v_mov_b32_e32 v0, 0
|
|
; CI-NEXT: v_mov_b32_e32 v1, s2
|
|
; CI-NEXT: v_mov_b32_e32 v2, s0
|
|
; CI-NEXT: flat_store_dwordx2 v[2:3], v[0:1]
|
|
; CI-NEXT: s_endpgm
|
|
;
|
|
; VI-LABEL: s_select_sint_to_fp_i1_vals_i64:
|
|
; VI: ; %bb.0:
|
|
; VI-NEXT: s_load_dword s2, s[8:9], 0x8
|
|
; VI-NEXT: s_load_dwordx2 s[0:1], s[8:9], 0x0
|
|
; VI-NEXT: s_add_i32 s12, s12, s17
|
|
; VI-NEXT: s_lshr_b32 flat_scratch_hi, s12, 8
|
|
; VI-NEXT: s_mov_b32 flat_scratch_lo, s13
|
|
; VI-NEXT: s_waitcnt lgkmcnt(0)
|
|
; VI-NEXT: s_cmp_eq_u32 s2, 0
|
|
; VI-NEXT: s_cselect_b32 s2, 0xbff00000, 0
|
|
; VI-NEXT: v_mov_b32_e32 v3, s1
|
|
; VI-NEXT: v_mov_b32_e32 v0, 0
|
|
; VI-NEXT: v_mov_b32_e32 v1, s2
|
|
; VI-NEXT: v_mov_b32_e32 v2, s0
|
|
; VI-NEXT: flat_store_dwordx2 v[2:3], v[0:1]
|
|
; VI-NEXT: s_endpgm
|
|
;
|
|
; GFX942-LABEL: s_select_sint_to_fp_i1_vals_i64:
|
|
; GFX942: ; %bb.0:
|
|
; GFX942-NEXT: s_load_dword s2, s[4:5], 0x8
|
|
; GFX942-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
|
|
; GFX942-NEXT: v_mov_b32_e32 v0, 0
|
|
; GFX942-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX942-NEXT: s_cmp_eq_u32 s2, 0
|
|
; GFX942-NEXT: s_cselect_b32 s2, 0xbff00000, 0
|
|
; GFX942-NEXT: v_mov_b32_e32 v1, s2
|
|
; GFX942-NEXT: global_store_dwordx2 v0, v[0:1], s[0:1]
|
|
; GFX942-NEXT: s_endpgm
|
|
%cmp = icmp eq i32 %in, 0
|
|
%select = select i1 %cmp, i64 u0xbff0000000000000, i64 0
|
|
store i64 %select, ptr addrspace(1) %out, align 8
|
|
ret void
|
|
}
|
|
|
|
define void @v_select_sint_to_fp_i1_vals_i64(ptr addrspace(1) %out, i32 %in) {
|
|
; GCN-LABEL: v_select_sint_to_fp_i1_vals_i64:
|
|
; GCN: ; %bb.0:
|
|
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GCN-NEXT: v_mov_b32_e32 v4, 0xbff00000
|
|
; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 0, v2
|
|
; GCN-NEXT: v_mov_b32_e32 v3, 0
|
|
; GCN-NEXT: v_cndmask_b32_e32 v4, 0, v4, vcc
|
|
; GCN-NEXT: flat_store_dwordx2 v[0:1], v[3:4]
|
|
; GCN-NEXT: s_waitcnt vmcnt(0)
|
|
; GCN-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX942-LABEL: v_select_sint_to_fp_i1_vals_i64:
|
|
; GFX942: ; %bb.0:
|
|
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX942-NEXT: v_mov_b32_e32 v3, 0xbff00000
|
|
; GFX942-NEXT: v_cmp_eq_u32_e32 vcc, 0, v2
|
|
; GFX942-NEXT: v_mov_b32_e32 v4, 0
|
|
; GFX942-NEXT: s_nop 0
|
|
; GFX942-NEXT: v_cndmask_b32_e32 v5, 0, v3, vcc
|
|
; GFX942-NEXT: global_store_dwordx2 v[0:1], v[4:5], off
|
|
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX942-NEXT: s_setpc_b64 s[30:31]
|
|
%cmp = icmp eq i32 %in, 0
|
|
%select = select i1 %cmp, i64 u0xbff0000000000000, i64 0
|
|
store i64 %select, ptr addrspace(1) %out, align 8
|
|
ret void
|
|
}
|
|
|
|
; TODO: This should swap the selected order / invert the compare and do it.
|
|
define void @v_swap_select_sint_to_fp_i1_vals_f64(ptr addrspace(1) %out, i32 %in) {
|
|
; GCN-LABEL: v_swap_select_sint_to_fp_i1_vals_f64:
|
|
; GCN: ; %bb.0:
|
|
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GCN-NEXT: v_mov_b32_e32 v4, 0xbff00000
|
|
; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 0, v2
|
|
; GCN-NEXT: v_mov_b32_e32 v3, 0
|
|
; GCN-NEXT: v_cndmask_b32_e64 v4, v4, 0, vcc
|
|
; GCN-NEXT: flat_store_dwordx2 v[0:1], v[3:4]
|
|
; GCN-NEXT: s_waitcnt vmcnt(0)
|
|
; GCN-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GFX942-LABEL: v_swap_select_sint_to_fp_i1_vals_f64:
|
|
; GFX942: ; %bb.0:
|
|
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GFX942-NEXT: v_mov_b32_e32 v3, 0xbff00000
|
|
; GFX942-NEXT: v_cmp_eq_u32_e32 vcc, 0, v2
|
|
; GFX942-NEXT: v_mov_b32_e32 v4, 0
|
|
; GFX942-NEXT: s_nop 0
|
|
; GFX942-NEXT: v_cndmask_b32_e64 v5, v3, 0, vcc
|
|
; GFX942-NEXT: global_store_dwordx2 v[0:1], v[4:5], off
|
|
; GFX942-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX942-NEXT: s_setpc_b64 s[30:31]
|
|
%cmp = icmp eq i32 %in, 0
|
|
%select = select i1 %cmp, double 0.0, double -1.0
|
|
store double %select, ptr addrspace(1) %out, align 8
|
|
ret void
|
|
}
|
|
|
|
; TODO: This should swap the selected order / invert the compare and do it.
|
|
define amdgpu_kernel void @s_swap_select_sint_to_fp_i1_vals_f64(ptr addrspace(1) %out, i32 %in) {
|
|
; CI-LABEL: s_swap_select_sint_to_fp_i1_vals_f64:
|
|
; CI: ; %bb.0:
|
|
; CI-NEXT: s_load_dword s2, s[8:9], 0x2
|
|
; CI-NEXT: s_load_dwordx2 s[0:1], s[8:9], 0x0
|
|
; CI-NEXT: s_add_i32 s12, s12, s17
|
|
; CI-NEXT: s_lshr_b32 flat_scratch_hi, s12, 8
|
|
; CI-NEXT: s_mov_b32 flat_scratch_lo, s13
|
|
; CI-NEXT: s_waitcnt lgkmcnt(0)
|
|
; CI-NEXT: s_cmp_eq_u32 s2, 0
|
|
; CI-NEXT: s_cselect_b32 s2, 0, 0xbff00000
|
|
; CI-NEXT: v_mov_b32_e32 v3, s1
|
|
; CI-NEXT: v_mov_b32_e32 v0, 0
|
|
; CI-NEXT: v_mov_b32_e32 v1, s2
|
|
; CI-NEXT: v_mov_b32_e32 v2, s0
|
|
; CI-NEXT: flat_store_dwordx2 v[2:3], v[0:1]
|
|
; CI-NEXT: s_endpgm
|
|
;
|
|
; VI-LABEL: s_swap_select_sint_to_fp_i1_vals_f64:
|
|
; VI: ; %bb.0:
|
|
; VI-NEXT: s_load_dword s2, s[8:9], 0x8
|
|
; VI-NEXT: s_load_dwordx2 s[0:1], s[8:9], 0x0
|
|
; VI-NEXT: s_add_i32 s12, s12, s17
|
|
; VI-NEXT: s_lshr_b32 flat_scratch_hi, s12, 8
|
|
; VI-NEXT: s_mov_b32 flat_scratch_lo, s13
|
|
; VI-NEXT: s_waitcnt lgkmcnt(0)
|
|
; VI-NEXT: s_cmp_eq_u32 s2, 0
|
|
; VI-NEXT: s_cselect_b32 s2, 0, 0xbff00000
|
|
; VI-NEXT: v_mov_b32_e32 v3, s1
|
|
; VI-NEXT: v_mov_b32_e32 v0, 0
|
|
; VI-NEXT: v_mov_b32_e32 v1, s2
|
|
; VI-NEXT: v_mov_b32_e32 v2, s0
|
|
; VI-NEXT: flat_store_dwordx2 v[2:3], v[0:1]
|
|
; VI-NEXT: s_endpgm
|
|
;
|
|
; GFX942-LABEL: s_swap_select_sint_to_fp_i1_vals_f64:
|
|
; GFX942: ; %bb.0:
|
|
; GFX942-NEXT: s_load_dword s2, s[4:5], 0x8
|
|
; GFX942-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
|
|
; GFX942-NEXT: v_mov_b32_e32 v0, 0
|
|
; GFX942-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX942-NEXT: s_cmp_eq_u32 s2, 0
|
|
; GFX942-NEXT: s_cselect_b32 s2, 0, 0xbff00000
|
|
; GFX942-NEXT: v_mov_b32_e32 v1, s2
|
|
; GFX942-NEXT: global_store_dwordx2 v0, v[0:1], s[0:1]
|
|
; GFX942-NEXT: s_endpgm
|
|
%cmp = icmp eq i32 %in, 0
|
|
%select = select i1 %cmp, double 0.0, double -1.0
|
|
store double %select, ptr addrspace(1) %out, align 8
|
|
ret void
|
|
}
|