
Recent upstream trends have moved away from explicitly using `-verify-machineinstrs`, as it's already covered by the expensive checks. This PR removes almost all `-verify-machineinstrs` from tests in `llvm/test/CodeGen/AMDGPU/*.ll`, leaving only those tests where its removal currently causes failures.
1467 lines
58 KiB
LLVM
1467 lines
58 KiB
LLVM
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
|
|
; RUN: llc -mtriple=amdgcn -amdgpu-sdwa-peephole=0 < %s | FileCheck -check-prefix=GCN %s
|
|
|
|
declare i32 @llvm.amdgcn.workitem.id.x() #0
|
|
|
|
; --------------------------------------------------------------------------------
|
|
; i32 compares
|
|
; --------------------------------------------------------------------------------
|
|
|
|
define amdgpu_kernel void @commute_eq_64_i32(ptr addrspace(1) %out, ptr addrspace(1) %in) #1 {
|
|
; GCN-LABEL: commute_eq_64_i32:
|
|
; GCN: ; %bb.0:
|
|
; GCN-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9
|
|
; GCN-NEXT: s_mov_b32 s7, 0xf000
|
|
; GCN-NEXT: s_mov_b32 s6, 0
|
|
; GCN-NEXT: v_lshlrev_b32_e32 v0, 2, v0
|
|
; GCN-NEXT: v_mov_b32_e32 v1, 0
|
|
; GCN-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GCN-NEXT: s_mov_b64 s[4:5], s[2:3]
|
|
; GCN-NEXT: buffer_load_dword v2, v[0:1], s[4:7], 0 addr64
|
|
; GCN-NEXT: s_mov_b64 s[2:3], s[6:7]
|
|
; GCN-NEXT: s_waitcnt vmcnt(0)
|
|
; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 64, v2
|
|
; GCN-NEXT: v_cndmask_b32_e64 v2, 0, -1, vcc
|
|
; GCN-NEXT: buffer_store_dword v2, v[0:1], s[0:3], 0 addr64
|
|
; GCN-NEXT: s_endpgm
|
|
%tid = call i32 @llvm.amdgcn.workitem.id.x() #0
|
|
%gep.in = getelementptr i32, ptr addrspace(1) %in, i32 %tid
|
|
%gep.out = getelementptr i32, ptr addrspace(1) %out, i32 %tid
|
|
%val = load i32, ptr addrspace(1) %gep.in
|
|
%cmp = icmp eq i32 %val, 64
|
|
%ext = sext i1 %cmp to i32
|
|
store i32 %ext, ptr addrspace(1) %gep.out
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_kernel void @commute_ne_64_i32(ptr addrspace(1) %out, ptr addrspace(1) %in) #1 {
|
|
; GCN-LABEL: commute_ne_64_i32:
|
|
; GCN: ; %bb.0:
|
|
; GCN-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9
|
|
; GCN-NEXT: s_mov_b32 s7, 0xf000
|
|
; GCN-NEXT: s_mov_b32 s6, 0
|
|
; GCN-NEXT: v_lshlrev_b32_e32 v0, 2, v0
|
|
; GCN-NEXT: v_mov_b32_e32 v1, 0
|
|
; GCN-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GCN-NEXT: s_mov_b64 s[4:5], s[2:3]
|
|
; GCN-NEXT: buffer_load_dword v2, v[0:1], s[4:7], 0 addr64
|
|
; GCN-NEXT: s_mov_b64 s[2:3], s[6:7]
|
|
; GCN-NEXT: s_waitcnt vmcnt(0)
|
|
; GCN-NEXT: v_cmp_ne_u32_e32 vcc, 64, v2
|
|
; GCN-NEXT: v_cndmask_b32_e64 v2, 0, -1, vcc
|
|
; GCN-NEXT: buffer_store_dword v2, v[0:1], s[0:3], 0 addr64
|
|
; GCN-NEXT: s_endpgm
|
|
%tid = call i32 @llvm.amdgcn.workitem.id.x() #0
|
|
%gep.in = getelementptr i32, ptr addrspace(1) %in, i32 %tid
|
|
%gep.out = getelementptr i32, ptr addrspace(1) %out, i32 %tid
|
|
%val = load i32, ptr addrspace(1) %gep.in
|
|
%cmp = icmp ne i32 %val, 64
|
|
%ext = sext i1 %cmp to i32
|
|
store i32 %ext, ptr addrspace(1) %gep.out
|
|
ret void
|
|
}
|
|
|
|
; FIXME: Why isn't this being folded as a constant?
|
|
define amdgpu_kernel void @commute_ne_litk_i32(ptr addrspace(1) %out, ptr addrspace(1) %in) #1 {
|
|
; GCN-LABEL: commute_ne_litk_i32:
|
|
; GCN: ; %bb.0:
|
|
; GCN-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9
|
|
; GCN-NEXT: s_mov_b32 s7, 0xf000
|
|
; GCN-NEXT: s_mov_b32 s6, 0
|
|
; GCN-NEXT: v_lshlrev_b32_e32 v0, 2, v0
|
|
; GCN-NEXT: v_mov_b32_e32 v1, 0
|
|
; GCN-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GCN-NEXT: s_mov_b64 s[4:5], s[2:3]
|
|
; GCN-NEXT: buffer_load_dword v2, v[0:1], s[4:7], 0 addr64
|
|
; GCN-NEXT: s_movk_i32 s4, 0x3039
|
|
; GCN-NEXT: s_mov_b64 s[2:3], s[6:7]
|
|
; GCN-NEXT: s_waitcnt vmcnt(0)
|
|
; GCN-NEXT: v_cmp_ne_u32_e32 vcc, s4, v2
|
|
; GCN-NEXT: v_cndmask_b32_e64 v2, 0, -1, vcc
|
|
; GCN-NEXT: buffer_store_dword v2, v[0:1], s[0:3], 0 addr64
|
|
; GCN-NEXT: s_endpgm
|
|
%tid = call i32 @llvm.amdgcn.workitem.id.x() #0
|
|
%gep.in = getelementptr i32, ptr addrspace(1) %in, i32 %tid
|
|
%gep.out = getelementptr i32, ptr addrspace(1) %out, i32 %tid
|
|
%val = load i32, ptr addrspace(1) %gep.in
|
|
%cmp = icmp ne i32 %val, 12345
|
|
%ext = sext i1 %cmp to i32
|
|
store i32 %ext, ptr addrspace(1) %gep.out
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_kernel void @commute_ugt_64_i32(ptr addrspace(1) %out, ptr addrspace(1) %in) #1 {
|
|
; GCN-LABEL: commute_ugt_64_i32:
|
|
; GCN: ; %bb.0:
|
|
; GCN-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9
|
|
; GCN-NEXT: s_mov_b32 s7, 0xf000
|
|
; GCN-NEXT: s_mov_b32 s6, 0
|
|
; GCN-NEXT: v_lshlrev_b32_e32 v0, 2, v0
|
|
; GCN-NEXT: v_mov_b32_e32 v1, 0
|
|
; GCN-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GCN-NEXT: s_mov_b64 s[4:5], s[2:3]
|
|
; GCN-NEXT: buffer_load_dword v2, v[0:1], s[4:7], 0 addr64
|
|
; GCN-NEXT: s_mov_b64 s[2:3], s[6:7]
|
|
; GCN-NEXT: s_waitcnt vmcnt(0)
|
|
; GCN-NEXT: v_cmp_lt_u32_e32 vcc, 64, v2
|
|
; GCN-NEXT: v_cndmask_b32_e64 v2, 0, -1, vcc
|
|
; GCN-NEXT: buffer_store_dword v2, v[0:1], s[0:3], 0 addr64
|
|
; GCN-NEXT: s_endpgm
|
|
%tid = call i32 @llvm.amdgcn.workitem.id.x() #0
|
|
%gep.in = getelementptr i32, ptr addrspace(1) %in, i32 %tid
|
|
%gep.out = getelementptr i32, ptr addrspace(1) %out, i32 %tid
|
|
%val = load i32, ptr addrspace(1) %gep.in
|
|
%cmp = icmp ugt i32 %val, 64
|
|
%ext = sext i1 %cmp to i32
|
|
store i32 %ext, ptr addrspace(1) %gep.out
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_kernel void @commute_uge_64_i32(ptr addrspace(1) %out, ptr addrspace(1) %in) #1 {
|
|
; GCN-LABEL: commute_uge_64_i32:
|
|
; GCN: ; %bb.0:
|
|
; GCN-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9
|
|
; GCN-NEXT: s_mov_b32 s7, 0xf000
|
|
; GCN-NEXT: s_mov_b32 s6, 0
|
|
; GCN-NEXT: v_lshlrev_b32_e32 v0, 2, v0
|
|
; GCN-NEXT: v_mov_b32_e32 v1, 0
|
|
; GCN-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GCN-NEXT: s_mov_b64 s[4:5], s[2:3]
|
|
; GCN-NEXT: buffer_load_dword v2, v[0:1], s[4:7], 0 addr64
|
|
; GCN-NEXT: s_mov_b64 s[2:3], s[6:7]
|
|
; GCN-NEXT: s_waitcnt vmcnt(0)
|
|
; GCN-NEXT: v_cmp_lt_u32_e32 vcc, 63, v2
|
|
; GCN-NEXT: v_cndmask_b32_e64 v2, 0, -1, vcc
|
|
; GCN-NEXT: buffer_store_dword v2, v[0:1], s[0:3], 0 addr64
|
|
; GCN-NEXT: s_endpgm
|
|
%tid = call i32 @llvm.amdgcn.workitem.id.x() #0
|
|
%gep.in = getelementptr i32, ptr addrspace(1) %in, i32 %tid
|
|
%gep.out = getelementptr i32, ptr addrspace(1) %out, i32 %tid
|
|
%val = load i32, ptr addrspace(1) %gep.in
|
|
%cmp = icmp uge i32 %val, 64
|
|
%ext = sext i1 %cmp to i32
|
|
store i32 %ext, ptr addrspace(1) %gep.out
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_kernel void @commute_ult_64_i32(ptr addrspace(1) %out, ptr addrspace(1) %in) #1 {
|
|
; GCN-LABEL: commute_ult_64_i32:
|
|
; GCN: ; %bb.0:
|
|
; GCN-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9
|
|
; GCN-NEXT: s_mov_b32 s7, 0xf000
|
|
; GCN-NEXT: s_mov_b32 s6, 0
|
|
; GCN-NEXT: v_lshlrev_b32_e32 v0, 2, v0
|
|
; GCN-NEXT: v_mov_b32_e32 v1, 0
|
|
; GCN-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GCN-NEXT: s_mov_b64 s[4:5], s[2:3]
|
|
; GCN-NEXT: buffer_load_dword v2, v[0:1], s[4:7], 0 addr64
|
|
; GCN-NEXT: s_mov_b64 s[2:3], s[6:7]
|
|
; GCN-NEXT: s_waitcnt vmcnt(0)
|
|
; GCN-NEXT: v_cmp_gt_u32_e32 vcc, 64, v2
|
|
; GCN-NEXT: v_cndmask_b32_e64 v2, 0, -1, vcc
|
|
; GCN-NEXT: buffer_store_dword v2, v[0:1], s[0:3], 0 addr64
|
|
; GCN-NEXT: s_endpgm
|
|
%tid = call i32 @llvm.amdgcn.workitem.id.x() #0
|
|
%gep.in = getelementptr i32, ptr addrspace(1) %in, i32 %tid
|
|
%gep.out = getelementptr i32, ptr addrspace(1) %out, i32 %tid
|
|
%val = load i32, ptr addrspace(1) %gep.in
|
|
%cmp = icmp ult i32 %val, 64
|
|
%ext = sext i1 %cmp to i32
|
|
store i32 %ext, ptr addrspace(1) %gep.out
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_kernel void @commute_ule_63_i32(ptr addrspace(1) %out, ptr addrspace(1) %in) #1 {
|
|
; GCN-LABEL: commute_ule_63_i32:
|
|
; GCN: ; %bb.0:
|
|
; GCN-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9
|
|
; GCN-NEXT: s_mov_b32 s7, 0xf000
|
|
; GCN-NEXT: s_mov_b32 s6, 0
|
|
; GCN-NEXT: v_lshlrev_b32_e32 v0, 2, v0
|
|
; GCN-NEXT: v_mov_b32_e32 v1, 0
|
|
; GCN-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GCN-NEXT: s_mov_b64 s[4:5], s[2:3]
|
|
; GCN-NEXT: buffer_load_dword v2, v[0:1], s[4:7], 0 addr64
|
|
; GCN-NEXT: s_mov_b64 s[2:3], s[6:7]
|
|
; GCN-NEXT: s_waitcnt vmcnt(0)
|
|
; GCN-NEXT: v_cmp_gt_u32_e32 vcc, 64, v2
|
|
; GCN-NEXT: v_cndmask_b32_e64 v2, 0, -1, vcc
|
|
; GCN-NEXT: buffer_store_dword v2, v[0:1], s[0:3], 0 addr64
|
|
; GCN-NEXT: s_endpgm
|
|
%tid = call i32 @llvm.amdgcn.workitem.id.x() #0
|
|
%gep.in = getelementptr i32, ptr addrspace(1) %in, i32 %tid
|
|
%gep.out = getelementptr i32, ptr addrspace(1) %out, i32 %tid
|
|
%val = load i32, ptr addrspace(1) %gep.in
|
|
%cmp = icmp ule i32 %val, 63
|
|
%ext = sext i1 %cmp to i32
|
|
store i32 %ext, ptr addrspace(1) %gep.out
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_kernel void @commute_ule_64_i32(ptr addrspace(1) %out, ptr addrspace(1) %in) #1 {
|
|
; GCN-LABEL: commute_ule_64_i32:
|
|
; GCN: ; %bb.0:
|
|
; GCN-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9
|
|
; GCN-NEXT: s_mov_b32 s7, 0xf000
|
|
; GCN-NEXT: s_mov_b32 s6, 0
|
|
; GCN-NEXT: v_lshlrev_b32_e32 v0, 2, v0
|
|
; GCN-NEXT: v_mov_b32_e32 v1, 0
|
|
; GCN-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GCN-NEXT: s_mov_b64 s[4:5], s[2:3]
|
|
; GCN-NEXT: buffer_load_dword v2, v[0:1], s[4:7], 0 addr64
|
|
; GCN-NEXT: s_movk_i32 s4, 0x41
|
|
; GCN-NEXT: s_mov_b64 s[2:3], s[6:7]
|
|
; GCN-NEXT: s_waitcnt vmcnt(0)
|
|
; GCN-NEXT: v_cmp_gt_u32_e32 vcc, s4, v2
|
|
; GCN-NEXT: v_cndmask_b32_e64 v2, 0, -1, vcc
|
|
; GCN-NEXT: buffer_store_dword v2, v[0:1], s[0:3], 0 addr64
|
|
; GCN-NEXT: s_endpgm
|
|
%tid = call i32 @llvm.amdgcn.workitem.id.x() #0
|
|
%gep.in = getelementptr i32, ptr addrspace(1) %in, i32 %tid
|
|
%gep.out = getelementptr i32, ptr addrspace(1) %out, i32 %tid
|
|
%val = load i32, ptr addrspace(1) %gep.in
|
|
%cmp = icmp ule i32 %val, 64
|
|
%ext = sext i1 %cmp to i32
|
|
store i32 %ext, ptr addrspace(1) %gep.out
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_kernel void @commute_sgt_neg1_i32(ptr addrspace(1) %out, ptr addrspace(1) %in) #1 {
|
|
; GCN-LABEL: commute_sgt_neg1_i32:
|
|
; GCN: ; %bb.0:
|
|
; GCN-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9
|
|
; GCN-NEXT: s_mov_b32 s7, 0xf000
|
|
; GCN-NEXT: s_mov_b32 s6, 0
|
|
; GCN-NEXT: v_lshlrev_b32_e32 v0, 2, v0
|
|
; GCN-NEXT: v_mov_b32_e32 v1, 0
|
|
; GCN-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GCN-NEXT: s_mov_b64 s[4:5], s[2:3]
|
|
; GCN-NEXT: buffer_load_dword v2, v[0:1], s[4:7], 0 addr64
|
|
; GCN-NEXT: s_mov_b64 s[2:3], s[6:7]
|
|
; GCN-NEXT: s_waitcnt vmcnt(0)
|
|
; GCN-NEXT: v_not_b32_e32 v2, v2
|
|
; GCN-NEXT: v_ashrrev_i32_e32 v2, 31, v2
|
|
; GCN-NEXT: buffer_store_dword v2, v[0:1], s[0:3], 0 addr64
|
|
; GCN-NEXT: s_endpgm
|
|
%tid = call i32 @llvm.amdgcn.workitem.id.x() #0
|
|
%gep.in = getelementptr i32, ptr addrspace(1) %in, i32 %tid
|
|
%gep.out = getelementptr i32, ptr addrspace(1) %out, i32 %tid
|
|
%val = load i32, ptr addrspace(1) %gep.in
|
|
%cmp = icmp sgt i32 %val, -1
|
|
%ext = sext i1 %cmp to i32
|
|
store i32 %ext, ptr addrspace(1) %gep.out
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_kernel void @commute_sge_neg2_i32(ptr addrspace(1) %out, ptr addrspace(1) %in) #1 {
|
|
; GCN-LABEL: commute_sge_neg2_i32:
|
|
; GCN: ; %bb.0:
|
|
; GCN-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9
|
|
; GCN-NEXT: s_mov_b32 s7, 0xf000
|
|
; GCN-NEXT: s_mov_b32 s6, 0
|
|
; GCN-NEXT: v_lshlrev_b32_e32 v0, 2, v0
|
|
; GCN-NEXT: v_mov_b32_e32 v1, 0
|
|
; GCN-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GCN-NEXT: s_mov_b64 s[4:5], s[2:3]
|
|
; GCN-NEXT: buffer_load_dword v2, v[0:1], s[4:7], 0 addr64
|
|
; GCN-NEXT: s_mov_b64 s[2:3], s[6:7]
|
|
; GCN-NEXT: s_waitcnt vmcnt(0)
|
|
; GCN-NEXT: v_cmp_lt_i32_e32 vcc, -3, v2
|
|
; GCN-NEXT: v_cndmask_b32_e64 v2, 0, -1, vcc
|
|
; GCN-NEXT: buffer_store_dword v2, v[0:1], s[0:3], 0 addr64
|
|
; GCN-NEXT: s_endpgm
|
|
%tid = call i32 @llvm.amdgcn.workitem.id.x() #0
|
|
%gep.in = getelementptr i32, ptr addrspace(1) %in, i32 %tid
|
|
%gep.out = getelementptr i32, ptr addrspace(1) %out, i32 %tid
|
|
%val = load i32, ptr addrspace(1) %gep.in
|
|
%cmp = icmp sge i32 %val, -2
|
|
%ext = sext i1 %cmp to i32
|
|
store i32 %ext, ptr addrspace(1) %gep.out
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_kernel void @commute_slt_neg16_i32(ptr addrspace(1) %out, ptr addrspace(1) %in) #1 {
|
|
; GCN-LABEL: commute_slt_neg16_i32:
|
|
; GCN: ; %bb.0:
|
|
; GCN-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9
|
|
; GCN-NEXT: s_mov_b32 s7, 0xf000
|
|
; GCN-NEXT: s_mov_b32 s6, 0
|
|
; GCN-NEXT: v_lshlrev_b32_e32 v0, 2, v0
|
|
; GCN-NEXT: v_mov_b32_e32 v1, 0
|
|
; GCN-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GCN-NEXT: s_mov_b64 s[4:5], s[2:3]
|
|
; GCN-NEXT: buffer_load_dword v2, v[0:1], s[4:7], 0 addr64
|
|
; GCN-NEXT: s_mov_b64 s[2:3], s[6:7]
|
|
; GCN-NEXT: s_waitcnt vmcnt(0)
|
|
; GCN-NEXT: v_cmp_gt_i32_e32 vcc, -16, v2
|
|
; GCN-NEXT: v_cndmask_b32_e64 v2, 0, -1, vcc
|
|
; GCN-NEXT: buffer_store_dword v2, v[0:1], s[0:3], 0 addr64
|
|
; GCN-NEXT: s_endpgm
|
|
%tid = call i32 @llvm.amdgcn.workitem.id.x() #0
|
|
%gep.in = getelementptr i32, ptr addrspace(1) %in, i32 %tid
|
|
%gep.out = getelementptr i32, ptr addrspace(1) %out, i32 %tid
|
|
%val = load i32, ptr addrspace(1) %gep.in
|
|
%cmp = icmp slt i32 %val, -16
|
|
%ext = sext i1 %cmp to i32
|
|
store i32 %ext, ptr addrspace(1) %gep.out
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_kernel void @commute_sle_5_i32(ptr addrspace(1) %out, ptr addrspace(1) %in) #1 {
|
|
; GCN-LABEL: commute_sle_5_i32:
|
|
; GCN: ; %bb.0:
|
|
; GCN-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9
|
|
; GCN-NEXT: s_mov_b32 s7, 0xf000
|
|
; GCN-NEXT: s_mov_b32 s6, 0
|
|
; GCN-NEXT: v_lshlrev_b32_e32 v0, 2, v0
|
|
; GCN-NEXT: v_mov_b32_e32 v1, 0
|
|
; GCN-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GCN-NEXT: s_mov_b64 s[4:5], s[2:3]
|
|
; GCN-NEXT: buffer_load_dword v2, v[0:1], s[4:7], 0 addr64
|
|
; GCN-NEXT: s_mov_b64 s[2:3], s[6:7]
|
|
; GCN-NEXT: s_waitcnt vmcnt(0)
|
|
; GCN-NEXT: v_cmp_gt_i32_e32 vcc, 6, v2
|
|
; GCN-NEXT: v_cndmask_b32_e64 v2, 0, -1, vcc
|
|
; GCN-NEXT: buffer_store_dword v2, v[0:1], s[0:3], 0 addr64
|
|
; GCN-NEXT: s_endpgm
|
|
%tid = call i32 @llvm.amdgcn.workitem.id.x() #0
|
|
%gep.in = getelementptr i32, ptr addrspace(1) %in, i32 %tid
|
|
%gep.out = getelementptr i32, ptr addrspace(1) %out, i32 %tid
|
|
%val = load i32, ptr addrspace(1) %gep.in
|
|
%cmp = icmp sle i32 %val, 5
|
|
%ext = sext i1 %cmp to i32
|
|
store i32 %ext, ptr addrspace(1) %gep.out
|
|
ret void
|
|
}
|
|
|
|
; --------------------------------------------------------------------------------
|
|
; i64 compares
|
|
; --------------------------------------------------------------------------------
|
|
|
|
define amdgpu_kernel void @commute_eq_64_i64(ptr addrspace(1) %out, ptr addrspace(1) %in) #1 {
|
|
; GCN-LABEL: commute_eq_64_i64:
|
|
; GCN: ; %bb.0:
|
|
; GCN-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9
|
|
; GCN-NEXT: s_mov_b32 s7, 0xf000
|
|
; GCN-NEXT: s_mov_b32 s6, 0
|
|
; GCN-NEXT: v_lshlrev_b32_e32 v1, 3, v0
|
|
; GCN-NEXT: v_mov_b32_e32 v2, 0
|
|
; GCN-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GCN-NEXT: s_mov_b64 s[4:5], s[2:3]
|
|
; GCN-NEXT: buffer_load_dwordx2 v[3:4], v[1:2], s[4:7], 0 addr64
|
|
; GCN-NEXT: v_lshlrev_b32_e32 v1, 2, v0
|
|
; GCN-NEXT: s_mov_b64 s[2:3], s[6:7]
|
|
; GCN-NEXT: s_waitcnt vmcnt(0)
|
|
; GCN-NEXT: v_cmp_eq_u64_e32 vcc, 64, v[3:4]
|
|
; GCN-NEXT: v_cndmask_b32_e64 v0, 0, -1, vcc
|
|
; GCN-NEXT: buffer_store_dword v0, v[1:2], s[0:3], 0 addr64
|
|
; GCN-NEXT: s_endpgm
|
|
%tid = call i32 @llvm.amdgcn.workitem.id.x() #0
|
|
%gep.in = getelementptr i64, ptr addrspace(1) %in, i32 %tid
|
|
%gep.out = getelementptr i32, ptr addrspace(1) %out, i32 %tid
|
|
%val = load i64, ptr addrspace(1) %gep.in
|
|
%cmp = icmp eq i64 %val, 64
|
|
%ext = sext i1 %cmp to i32
|
|
store i32 %ext, ptr addrspace(1) %gep.out
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_kernel void @commute_ne_64_i64(ptr addrspace(1) %out, ptr addrspace(1) %in) #1 {
|
|
; GCN-LABEL: commute_ne_64_i64:
|
|
; GCN: ; %bb.0:
|
|
; GCN-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9
|
|
; GCN-NEXT: s_mov_b32 s7, 0xf000
|
|
; GCN-NEXT: s_mov_b32 s6, 0
|
|
; GCN-NEXT: v_lshlrev_b32_e32 v1, 3, v0
|
|
; GCN-NEXT: v_mov_b32_e32 v2, 0
|
|
; GCN-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GCN-NEXT: s_mov_b64 s[4:5], s[2:3]
|
|
; GCN-NEXT: buffer_load_dwordx2 v[3:4], v[1:2], s[4:7], 0 addr64
|
|
; GCN-NEXT: v_lshlrev_b32_e32 v1, 2, v0
|
|
; GCN-NEXT: s_mov_b64 s[2:3], s[6:7]
|
|
; GCN-NEXT: s_waitcnt vmcnt(0)
|
|
; GCN-NEXT: v_cmp_ne_u64_e32 vcc, 64, v[3:4]
|
|
; GCN-NEXT: v_cndmask_b32_e64 v0, 0, -1, vcc
|
|
; GCN-NEXT: buffer_store_dword v0, v[1:2], s[0:3], 0 addr64
|
|
; GCN-NEXT: s_endpgm
|
|
%tid = call i32 @llvm.amdgcn.workitem.id.x() #0
|
|
%gep.in = getelementptr i64, ptr addrspace(1) %in, i32 %tid
|
|
%gep.out = getelementptr i32, ptr addrspace(1) %out, i32 %tid
|
|
%val = load i64, ptr addrspace(1) %gep.in
|
|
%cmp = icmp ne i64 %val, 64
|
|
%ext = sext i1 %cmp to i32
|
|
store i32 %ext, ptr addrspace(1) %gep.out
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_kernel void @commute_ugt_64_i64(ptr addrspace(1) %out, ptr addrspace(1) %in) #1 {
|
|
; GCN-LABEL: commute_ugt_64_i64:
|
|
; GCN: ; %bb.0:
|
|
; GCN-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9
|
|
; GCN-NEXT: s_mov_b32 s7, 0xf000
|
|
; GCN-NEXT: s_mov_b32 s6, 0
|
|
; GCN-NEXT: v_lshlrev_b32_e32 v1, 3, v0
|
|
; GCN-NEXT: v_mov_b32_e32 v2, 0
|
|
; GCN-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GCN-NEXT: s_mov_b64 s[4:5], s[2:3]
|
|
; GCN-NEXT: buffer_load_dwordx2 v[3:4], v[1:2], s[4:7], 0 addr64
|
|
; GCN-NEXT: v_lshlrev_b32_e32 v1, 2, v0
|
|
; GCN-NEXT: s_mov_b64 s[2:3], s[6:7]
|
|
; GCN-NEXT: s_waitcnt vmcnt(0)
|
|
; GCN-NEXT: v_cmp_lt_u64_e32 vcc, 64, v[3:4]
|
|
; GCN-NEXT: v_cndmask_b32_e64 v0, 0, -1, vcc
|
|
; GCN-NEXT: buffer_store_dword v0, v[1:2], s[0:3], 0 addr64
|
|
; GCN-NEXT: s_endpgm
|
|
%tid = call i32 @llvm.amdgcn.workitem.id.x() #0
|
|
%gep.in = getelementptr i64, ptr addrspace(1) %in, i32 %tid
|
|
%gep.out = getelementptr i32, ptr addrspace(1) %out, i32 %tid
|
|
%val = load i64, ptr addrspace(1) %gep.in
|
|
%cmp = icmp ugt i64 %val, 64
|
|
%ext = sext i1 %cmp to i32
|
|
store i32 %ext, ptr addrspace(1) %gep.out
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_kernel void @commute_uge_64_i64(ptr addrspace(1) %out, ptr addrspace(1) %in) #1 {
|
|
; GCN-LABEL: commute_uge_64_i64:
|
|
; GCN: ; %bb.0:
|
|
; GCN-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9
|
|
; GCN-NEXT: s_mov_b32 s7, 0xf000
|
|
; GCN-NEXT: s_mov_b32 s6, 0
|
|
; GCN-NEXT: v_lshlrev_b32_e32 v1, 3, v0
|
|
; GCN-NEXT: v_mov_b32_e32 v2, 0
|
|
; GCN-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GCN-NEXT: s_mov_b64 s[4:5], s[2:3]
|
|
; GCN-NEXT: buffer_load_dwordx2 v[3:4], v[1:2], s[4:7], 0 addr64
|
|
; GCN-NEXT: v_lshlrev_b32_e32 v1, 2, v0
|
|
; GCN-NEXT: s_mov_b64 s[2:3], s[6:7]
|
|
; GCN-NEXT: s_waitcnt vmcnt(0)
|
|
; GCN-NEXT: v_cmp_lt_u64_e32 vcc, 63, v[3:4]
|
|
; GCN-NEXT: v_cndmask_b32_e64 v0, 0, -1, vcc
|
|
; GCN-NEXT: buffer_store_dword v0, v[1:2], s[0:3], 0 addr64
|
|
; GCN-NEXT: s_endpgm
|
|
%tid = call i32 @llvm.amdgcn.workitem.id.x() #0
|
|
%gep.in = getelementptr i64, ptr addrspace(1) %in, i32 %tid
|
|
%gep.out = getelementptr i32, ptr addrspace(1) %out, i32 %tid
|
|
%val = load i64, ptr addrspace(1) %gep.in
|
|
%cmp = icmp uge i64 %val, 64
|
|
%ext = sext i1 %cmp to i32
|
|
store i32 %ext, ptr addrspace(1) %gep.out
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_kernel void @commute_ult_64_i64(ptr addrspace(1) %out, ptr addrspace(1) %in) #1 {
|
|
; GCN-LABEL: commute_ult_64_i64:
|
|
; GCN: ; %bb.0:
|
|
; GCN-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9
|
|
; GCN-NEXT: s_mov_b32 s7, 0xf000
|
|
; GCN-NEXT: s_mov_b32 s6, 0
|
|
; GCN-NEXT: v_lshlrev_b32_e32 v1, 3, v0
|
|
; GCN-NEXT: v_mov_b32_e32 v2, 0
|
|
; GCN-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GCN-NEXT: s_mov_b64 s[4:5], s[2:3]
|
|
; GCN-NEXT: buffer_load_dwordx2 v[3:4], v[1:2], s[4:7], 0 addr64
|
|
; GCN-NEXT: v_lshlrev_b32_e32 v1, 2, v0
|
|
; GCN-NEXT: s_mov_b64 s[2:3], s[6:7]
|
|
; GCN-NEXT: s_waitcnt vmcnt(0)
|
|
; GCN-NEXT: v_cmp_gt_u64_e32 vcc, 64, v[3:4]
|
|
; GCN-NEXT: v_cndmask_b32_e64 v0, 0, -1, vcc
|
|
; GCN-NEXT: buffer_store_dword v0, v[1:2], s[0:3], 0 addr64
|
|
; GCN-NEXT: s_endpgm
|
|
%tid = call i32 @llvm.amdgcn.workitem.id.x() #0
|
|
%gep.in = getelementptr i64, ptr addrspace(1) %in, i32 %tid
|
|
%gep.out = getelementptr i32, ptr addrspace(1) %out, i32 %tid
|
|
%val = load i64, ptr addrspace(1) %gep.in
|
|
%cmp = icmp ult i64 %val, 64
|
|
%ext = sext i1 %cmp to i32
|
|
store i32 %ext, ptr addrspace(1) %gep.out
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_kernel void @commute_ule_63_i64(ptr addrspace(1) %out, ptr addrspace(1) %in) #1 {
|
|
; GCN-LABEL: commute_ule_63_i64:
|
|
; GCN: ; %bb.0:
|
|
; GCN-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9
|
|
; GCN-NEXT: s_mov_b32 s7, 0xf000
|
|
; GCN-NEXT: s_mov_b32 s6, 0
|
|
; GCN-NEXT: v_lshlrev_b32_e32 v1, 3, v0
|
|
; GCN-NEXT: v_mov_b32_e32 v2, 0
|
|
; GCN-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GCN-NEXT: s_mov_b64 s[4:5], s[2:3]
|
|
; GCN-NEXT: buffer_load_dwordx2 v[3:4], v[1:2], s[4:7], 0 addr64
|
|
; GCN-NEXT: v_lshlrev_b32_e32 v1, 2, v0
|
|
; GCN-NEXT: s_mov_b64 s[2:3], s[6:7]
|
|
; GCN-NEXT: s_waitcnt vmcnt(0)
|
|
; GCN-NEXT: v_cmp_gt_u64_e32 vcc, 64, v[3:4]
|
|
; GCN-NEXT: v_cndmask_b32_e64 v0, 0, -1, vcc
|
|
; GCN-NEXT: buffer_store_dword v0, v[1:2], s[0:3], 0 addr64
|
|
; GCN-NEXT: s_endpgm
|
|
%tid = call i32 @llvm.amdgcn.workitem.id.x() #0
|
|
%gep.in = getelementptr i64, ptr addrspace(1) %in, i32 %tid
|
|
%gep.out = getelementptr i32, ptr addrspace(1) %out, i32 %tid
|
|
%val = load i64, ptr addrspace(1) %gep.in
|
|
%cmp = icmp ule i64 %val, 63
|
|
%ext = sext i1 %cmp to i32
|
|
store i32 %ext, ptr addrspace(1) %gep.out
|
|
ret void
|
|
}
|
|
|
|
; FIXME: Undo canonicalization to gt (x + 1) since it doesn't use the inline imm
|
|
|
|
define amdgpu_kernel void @commute_ule_64_i64(ptr addrspace(1) %out, ptr addrspace(1) %in) #1 {
|
|
; GCN-LABEL: commute_ule_64_i64:
|
|
; GCN: ; %bb.0:
|
|
; GCN-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9
|
|
; GCN-NEXT: s_mov_b32 s7, 0xf000
|
|
; GCN-NEXT: s_mov_b32 s6, 0
|
|
; GCN-NEXT: v_lshlrev_b32_e32 v1, 3, v0
|
|
; GCN-NEXT: v_mov_b32_e32 v2, 0
|
|
; GCN-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GCN-NEXT: s_mov_b64 s[4:5], s[2:3]
|
|
; GCN-NEXT: buffer_load_dwordx2 v[3:4], v[1:2], s[4:7], 0 addr64
|
|
; GCN-NEXT: v_lshlrev_b32_e32 v1, 2, v0
|
|
; GCN-NEXT: s_mov_b64 s[4:5], 0x41
|
|
; GCN-NEXT: s_mov_b64 s[2:3], s[6:7]
|
|
; GCN-NEXT: s_waitcnt vmcnt(0)
|
|
; GCN-NEXT: v_cmp_gt_u64_e32 vcc, s[4:5], v[3:4]
|
|
; GCN-NEXT: v_cndmask_b32_e64 v0, 0, -1, vcc
|
|
; GCN-NEXT: buffer_store_dword v0, v[1:2], s[0:3], 0 addr64
|
|
; GCN-NEXT: s_endpgm
|
|
%tid = call i32 @llvm.amdgcn.workitem.id.x() #0
|
|
%gep.in = getelementptr i64, ptr addrspace(1) %in, i32 %tid
|
|
%gep.out = getelementptr i32, ptr addrspace(1) %out, i32 %tid
|
|
%val = load i64, ptr addrspace(1) %gep.in
|
|
%cmp = icmp ule i64 %val, 64
|
|
%ext = sext i1 %cmp to i32
|
|
store i32 %ext, ptr addrspace(1) %gep.out
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_kernel void @commute_sgt_neg1_i64(ptr addrspace(1) %out, ptr addrspace(1) %in) #1 {
|
|
; GCN-LABEL: commute_sgt_neg1_i64:
|
|
; GCN: ; %bb.0:
|
|
; GCN-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9
|
|
; GCN-NEXT: s_mov_b32 s7, 0xf000
|
|
; GCN-NEXT: s_mov_b32 s6, 0
|
|
; GCN-NEXT: v_lshlrev_b32_e32 v1, 3, v0
|
|
; GCN-NEXT: v_mov_b32_e32 v2, 0
|
|
; GCN-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GCN-NEXT: s_mov_b64 s[4:5], s[2:3]
|
|
; GCN-NEXT: buffer_load_dwordx2 v[3:4], v[1:2], s[4:7], 0 addr64
|
|
; GCN-NEXT: v_lshlrev_b32_e32 v1, 2, v0
|
|
; GCN-NEXT: s_mov_b64 s[2:3], s[6:7]
|
|
; GCN-NEXT: s_waitcnt vmcnt(0)
|
|
; GCN-NEXT: v_cmp_lt_i64_e32 vcc, -1, v[3:4]
|
|
; GCN-NEXT: v_cndmask_b32_e64 v0, 0, -1, vcc
|
|
; GCN-NEXT: buffer_store_dword v0, v[1:2], s[0:3], 0 addr64
|
|
; GCN-NEXT: s_endpgm
|
|
%tid = call i32 @llvm.amdgcn.workitem.id.x() #0
|
|
%gep.in = getelementptr i64, ptr addrspace(1) %in, i32 %tid
|
|
%gep.out = getelementptr i32, ptr addrspace(1) %out, i32 %tid
|
|
%val = load i64, ptr addrspace(1) %gep.in
|
|
%cmp = icmp sgt i64 %val, -1
|
|
%ext = sext i1 %cmp to i32
|
|
store i32 %ext, ptr addrspace(1) %gep.out
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_kernel void @commute_sge_neg2_i64(ptr addrspace(1) %out, ptr addrspace(1) %in) #1 {
|
|
; GCN-LABEL: commute_sge_neg2_i64:
|
|
; GCN: ; %bb.0:
|
|
; GCN-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9
|
|
; GCN-NEXT: s_mov_b32 s7, 0xf000
|
|
; GCN-NEXT: s_mov_b32 s6, 0
|
|
; GCN-NEXT: v_lshlrev_b32_e32 v1, 3, v0
|
|
; GCN-NEXT: v_mov_b32_e32 v2, 0
|
|
; GCN-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GCN-NEXT: s_mov_b64 s[4:5], s[2:3]
|
|
; GCN-NEXT: buffer_load_dwordx2 v[3:4], v[1:2], s[4:7], 0 addr64
|
|
; GCN-NEXT: v_lshlrev_b32_e32 v1, 2, v0
|
|
; GCN-NEXT: s_mov_b64 s[2:3], s[6:7]
|
|
; GCN-NEXT: s_waitcnt vmcnt(0)
|
|
; GCN-NEXT: v_cmp_lt_i64_e32 vcc, -3, v[3:4]
|
|
; GCN-NEXT: v_cndmask_b32_e64 v0, 0, -1, vcc
|
|
; GCN-NEXT: buffer_store_dword v0, v[1:2], s[0:3], 0 addr64
|
|
; GCN-NEXT: s_endpgm
|
|
%tid = call i32 @llvm.amdgcn.workitem.id.x() #0
|
|
%gep.in = getelementptr i64, ptr addrspace(1) %in, i32 %tid
|
|
%gep.out = getelementptr i32, ptr addrspace(1) %out, i32 %tid
|
|
%val = load i64, ptr addrspace(1) %gep.in
|
|
%cmp = icmp sge i64 %val, -2
|
|
%ext = sext i1 %cmp to i32
|
|
store i32 %ext, ptr addrspace(1) %gep.out
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_kernel void @commute_slt_neg16_i64(ptr addrspace(1) %out, ptr addrspace(1) %in) #1 {
|
|
; GCN-LABEL: commute_slt_neg16_i64:
|
|
; GCN: ; %bb.0:
|
|
; GCN-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9
|
|
; GCN-NEXT: s_mov_b32 s7, 0xf000
|
|
; GCN-NEXT: s_mov_b32 s6, 0
|
|
; GCN-NEXT: v_lshlrev_b32_e32 v1, 3, v0
|
|
; GCN-NEXT: v_mov_b32_e32 v2, 0
|
|
; GCN-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GCN-NEXT: s_mov_b64 s[4:5], s[2:3]
|
|
; GCN-NEXT: buffer_load_dwordx2 v[3:4], v[1:2], s[4:7], 0 addr64
|
|
; GCN-NEXT: v_lshlrev_b32_e32 v1, 2, v0
|
|
; GCN-NEXT: s_mov_b64 s[2:3], s[6:7]
|
|
; GCN-NEXT: s_waitcnt vmcnt(0)
|
|
; GCN-NEXT: v_cmp_gt_i64_e32 vcc, -16, v[3:4]
|
|
; GCN-NEXT: v_cndmask_b32_e64 v0, 0, -1, vcc
|
|
; GCN-NEXT: buffer_store_dword v0, v[1:2], s[0:3], 0 addr64
|
|
; GCN-NEXT: s_endpgm
|
|
%tid = call i32 @llvm.amdgcn.workitem.id.x() #0
|
|
%gep.in = getelementptr i64, ptr addrspace(1) %in, i32 %tid
|
|
%gep.out = getelementptr i32, ptr addrspace(1) %out, i32 %tid
|
|
%val = load i64, ptr addrspace(1) %gep.in
|
|
%cmp = icmp slt i64 %val, -16
|
|
%ext = sext i1 %cmp to i32
|
|
store i32 %ext, ptr addrspace(1) %gep.out
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_kernel void @commute_sle_5_i64(ptr addrspace(1) %out, ptr addrspace(1) %in) #1 {
|
|
; GCN-LABEL: commute_sle_5_i64:
|
|
; GCN: ; %bb.0:
|
|
; GCN-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9
|
|
; GCN-NEXT: s_mov_b32 s7, 0xf000
|
|
; GCN-NEXT: s_mov_b32 s6, 0
|
|
; GCN-NEXT: v_lshlrev_b32_e32 v1, 3, v0
|
|
; GCN-NEXT: v_mov_b32_e32 v2, 0
|
|
; GCN-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GCN-NEXT: s_mov_b64 s[4:5], s[2:3]
|
|
; GCN-NEXT: buffer_load_dwordx2 v[3:4], v[1:2], s[4:7], 0 addr64
|
|
; GCN-NEXT: v_lshlrev_b32_e32 v1, 2, v0
|
|
; GCN-NEXT: s_mov_b64 s[2:3], s[6:7]
|
|
; GCN-NEXT: s_waitcnt vmcnt(0)
|
|
; GCN-NEXT: v_cmp_gt_i64_e32 vcc, 6, v[3:4]
|
|
; GCN-NEXT: v_cndmask_b32_e64 v0, 0, -1, vcc
|
|
; GCN-NEXT: buffer_store_dword v0, v[1:2], s[0:3], 0 addr64
|
|
; GCN-NEXT: s_endpgm
|
|
%tid = call i32 @llvm.amdgcn.workitem.id.x() #0
|
|
%gep.in = getelementptr i64, ptr addrspace(1) %in, i32 %tid
|
|
%gep.out = getelementptr i32, ptr addrspace(1) %out, i32 %tid
|
|
%val = load i64, ptr addrspace(1) %gep.in
|
|
%cmp = icmp sle i64 %val, 5
|
|
%ext = sext i1 %cmp to i32
|
|
store i32 %ext, ptr addrspace(1) %gep.out
|
|
ret void
|
|
}
|
|
|
|
; --------------------------------------------------------------------------------
|
|
; f32 compares
|
|
; --------------------------------------------------------------------------------
|
|
|
|
define amdgpu_kernel void @commute_oeq_2.0_f32(ptr addrspace(1) %out, ptr addrspace(1) %in) #1 {
|
|
; GCN-LABEL: commute_oeq_2.0_f32:
|
|
; GCN: ; %bb.0:
|
|
; GCN-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9
|
|
; GCN-NEXT: s_mov_b32 s7, 0xf000
|
|
; GCN-NEXT: s_mov_b32 s6, 0
|
|
; GCN-NEXT: v_lshlrev_b32_e32 v0, 2, v0
|
|
; GCN-NEXT: v_mov_b32_e32 v1, 0
|
|
; GCN-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GCN-NEXT: s_mov_b64 s[4:5], s[2:3]
|
|
; GCN-NEXT: buffer_load_dword v2, v[0:1], s[4:7], 0 addr64
|
|
; GCN-NEXT: s_mov_b64 s[2:3], s[6:7]
|
|
; GCN-NEXT: s_waitcnt vmcnt(0)
|
|
; GCN-NEXT: v_cmp_eq_f32_e32 vcc, 2.0, v2
|
|
; GCN-NEXT: v_cndmask_b32_e64 v2, 0, -1, vcc
|
|
; GCN-NEXT: buffer_store_dword v2, v[0:1], s[0:3], 0 addr64
|
|
; GCN-NEXT: s_endpgm
|
|
%tid = call i32 @llvm.amdgcn.workitem.id.x() #0
|
|
%gep.in = getelementptr float, ptr addrspace(1) %in, i32 %tid
|
|
%gep.out = getelementptr i32, ptr addrspace(1) %out, i32 %tid
|
|
%val = load float, ptr addrspace(1) %gep.in
|
|
%cmp = fcmp oeq float %val, 2.0
|
|
%ext = sext i1 %cmp to i32
|
|
store i32 %ext, ptr addrspace(1) %gep.out
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_kernel void @commute_ogt_2.0_f32(ptr addrspace(1) %out, ptr addrspace(1) %in) #1 {
|
|
; GCN-LABEL: commute_ogt_2.0_f32:
|
|
; GCN: ; %bb.0:
|
|
; GCN-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9
|
|
; GCN-NEXT: s_mov_b32 s7, 0xf000
|
|
; GCN-NEXT: s_mov_b32 s6, 0
|
|
; GCN-NEXT: v_lshlrev_b32_e32 v0, 2, v0
|
|
; GCN-NEXT: v_mov_b32_e32 v1, 0
|
|
; GCN-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GCN-NEXT: s_mov_b64 s[4:5], s[2:3]
|
|
; GCN-NEXT: buffer_load_dword v2, v[0:1], s[4:7], 0 addr64
|
|
; GCN-NEXT: s_mov_b64 s[2:3], s[6:7]
|
|
; GCN-NEXT: s_waitcnt vmcnt(0)
|
|
; GCN-NEXT: v_cmp_lt_f32_e32 vcc, 2.0, v2
|
|
; GCN-NEXT: v_cndmask_b32_e64 v2, 0, -1, vcc
|
|
; GCN-NEXT: buffer_store_dword v2, v[0:1], s[0:3], 0 addr64
|
|
; GCN-NEXT: s_endpgm
|
|
%tid = call i32 @llvm.amdgcn.workitem.id.x() #0
|
|
%gep.in = getelementptr float, ptr addrspace(1) %in, i32 %tid
|
|
%gep.out = getelementptr i32, ptr addrspace(1) %out, i32 %tid
|
|
%val = load float, ptr addrspace(1) %gep.in
|
|
%cmp = fcmp ogt float %val, 2.0
|
|
%ext = sext i1 %cmp to i32
|
|
store i32 %ext, ptr addrspace(1) %gep.out
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_kernel void @commute_oge_2.0_f32(ptr addrspace(1) %out, ptr addrspace(1) %in) #1 {
|
|
; GCN-LABEL: commute_oge_2.0_f32:
|
|
; GCN: ; %bb.0:
|
|
; GCN-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9
|
|
; GCN-NEXT: s_mov_b32 s7, 0xf000
|
|
; GCN-NEXT: s_mov_b32 s6, 0
|
|
; GCN-NEXT: v_lshlrev_b32_e32 v0, 2, v0
|
|
; GCN-NEXT: v_mov_b32_e32 v1, 0
|
|
; GCN-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GCN-NEXT: s_mov_b64 s[4:5], s[2:3]
|
|
; GCN-NEXT: buffer_load_dword v2, v[0:1], s[4:7], 0 addr64
|
|
; GCN-NEXT: s_mov_b64 s[2:3], s[6:7]
|
|
; GCN-NEXT: s_waitcnt vmcnt(0)
|
|
; GCN-NEXT: v_cmp_le_f32_e32 vcc, 2.0, v2
|
|
; GCN-NEXT: v_cndmask_b32_e64 v2, 0, -1, vcc
|
|
; GCN-NEXT: buffer_store_dword v2, v[0:1], s[0:3], 0 addr64
|
|
; GCN-NEXT: s_endpgm
|
|
%tid = call i32 @llvm.amdgcn.workitem.id.x() #0
|
|
%gep.in = getelementptr float, ptr addrspace(1) %in, i32 %tid
|
|
%gep.out = getelementptr i32, ptr addrspace(1) %out, i32 %tid
|
|
%val = load float, ptr addrspace(1) %gep.in
|
|
%cmp = fcmp oge float %val, 2.0
|
|
%ext = sext i1 %cmp to i32
|
|
store i32 %ext, ptr addrspace(1) %gep.out
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_kernel void @commute_olt_2.0_f32(ptr addrspace(1) %out, ptr addrspace(1) %in) #1 {
|
|
; GCN-LABEL: commute_olt_2.0_f32:
|
|
; GCN: ; %bb.0:
|
|
; GCN-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9
|
|
; GCN-NEXT: s_mov_b32 s7, 0xf000
|
|
; GCN-NEXT: s_mov_b32 s6, 0
|
|
; GCN-NEXT: v_lshlrev_b32_e32 v0, 2, v0
|
|
; GCN-NEXT: v_mov_b32_e32 v1, 0
|
|
; GCN-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GCN-NEXT: s_mov_b64 s[4:5], s[2:3]
|
|
; GCN-NEXT: buffer_load_dword v2, v[0:1], s[4:7], 0 addr64
|
|
; GCN-NEXT: s_mov_b64 s[2:3], s[6:7]
|
|
; GCN-NEXT: s_waitcnt vmcnt(0)
|
|
; GCN-NEXT: v_cmp_gt_f32_e32 vcc, 2.0, v2
|
|
; GCN-NEXT: v_cndmask_b32_e64 v2, 0, -1, vcc
|
|
; GCN-NEXT: buffer_store_dword v2, v[0:1], s[0:3], 0 addr64
|
|
; GCN-NEXT: s_endpgm
|
|
%tid = call i32 @llvm.amdgcn.workitem.id.x() #0
|
|
%gep.in = getelementptr float, ptr addrspace(1) %in, i32 %tid
|
|
%gep.out = getelementptr i32, ptr addrspace(1) %out, i32 %tid
|
|
%val = load float, ptr addrspace(1) %gep.in
|
|
%cmp = fcmp olt float %val, 2.0
|
|
%ext = sext i1 %cmp to i32
|
|
store i32 %ext, ptr addrspace(1) %gep.out
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_kernel void @commute_ole_2.0_f32(ptr addrspace(1) %out, ptr addrspace(1) %in) #1 {
|
|
; GCN-LABEL: commute_ole_2.0_f32:
|
|
; GCN: ; %bb.0:
|
|
; GCN-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9
|
|
; GCN-NEXT: s_mov_b32 s7, 0xf000
|
|
; GCN-NEXT: s_mov_b32 s6, 0
|
|
; GCN-NEXT: v_lshlrev_b32_e32 v0, 2, v0
|
|
; GCN-NEXT: v_mov_b32_e32 v1, 0
|
|
; GCN-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GCN-NEXT: s_mov_b64 s[4:5], s[2:3]
|
|
; GCN-NEXT: buffer_load_dword v2, v[0:1], s[4:7], 0 addr64
|
|
; GCN-NEXT: s_mov_b64 s[2:3], s[6:7]
|
|
; GCN-NEXT: s_waitcnt vmcnt(0)
|
|
; GCN-NEXT: v_cmp_ge_f32_e32 vcc, 2.0, v2
|
|
; GCN-NEXT: v_cndmask_b32_e64 v2, 0, -1, vcc
|
|
; GCN-NEXT: buffer_store_dword v2, v[0:1], s[0:3], 0 addr64
|
|
; GCN-NEXT: s_endpgm
|
|
%tid = call i32 @llvm.amdgcn.workitem.id.x() #0
|
|
%gep.in = getelementptr float, ptr addrspace(1) %in, i32 %tid
|
|
%gep.out = getelementptr i32, ptr addrspace(1) %out, i32 %tid
|
|
%val = load float, ptr addrspace(1) %gep.in
|
|
%cmp = fcmp ole float %val, 2.0
|
|
%ext = sext i1 %cmp to i32
|
|
store i32 %ext, ptr addrspace(1) %gep.out
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_kernel void @commute_one_2.0_f32(ptr addrspace(1) %out, ptr addrspace(1) %in) #1 {
|
|
; GCN-LABEL: commute_one_2.0_f32:
|
|
; GCN: ; %bb.0:
|
|
; GCN-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9
|
|
; GCN-NEXT: s_mov_b32 s7, 0xf000
|
|
; GCN-NEXT: s_mov_b32 s6, 0
|
|
; GCN-NEXT: v_lshlrev_b32_e32 v0, 2, v0
|
|
; GCN-NEXT: v_mov_b32_e32 v1, 0
|
|
; GCN-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GCN-NEXT: s_mov_b64 s[4:5], s[2:3]
|
|
; GCN-NEXT: buffer_load_dword v2, v[0:1], s[4:7], 0 addr64
|
|
; GCN-NEXT: s_mov_b64 s[2:3], s[6:7]
|
|
; GCN-NEXT: s_waitcnt vmcnt(0)
|
|
; GCN-NEXT: v_cmp_lg_f32_e32 vcc, 2.0, v2
|
|
; GCN-NEXT: v_cndmask_b32_e64 v2, 0, -1, vcc
|
|
; GCN-NEXT: buffer_store_dword v2, v[0:1], s[0:3], 0 addr64
|
|
; GCN-NEXT: s_endpgm
|
|
%tid = call i32 @llvm.amdgcn.workitem.id.x() #0
|
|
%gep.in = getelementptr float, ptr addrspace(1) %in, i32 %tid
|
|
%gep.out = getelementptr i32, ptr addrspace(1) %out, i32 %tid
|
|
%val = load float, ptr addrspace(1) %gep.in
|
|
%cmp = fcmp one float %val, 2.0
|
|
%ext = sext i1 %cmp to i32
|
|
store i32 %ext, ptr addrspace(1) %gep.out
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_kernel void @commute_ord_2.0_f32(ptr addrspace(1) %out, ptr addrspace(1) %in) #1 {
|
|
; GCN-LABEL: commute_ord_2.0_f32:
|
|
; GCN: ; %bb.0:
|
|
; GCN-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9
|
|
; GCN-NEXT: s_mov_b32 s7, 0xf000
|
|
; GCN-NEXT: s_mov_b32 s6, 0
|
|
; GCN-NEXT: v_lshlrev_b32_e32 v0, 2, v0
|
|
; GCN-NEXT: v_mov_b32_e32 v1, 0
|
|
; GCN-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GCN-NEXT: s_mov_b64 s[4:5], s[2:3]
|
|
; GCN-NEXT: buffer_load_dword v2, v[0:1], s[4:7], 0 addr64
|
|
; GCN-NEXT: s_mov_b64 s[2:3], s[6:7]
|
|
; GCN-NEXT: s_waitcnt vmcnt(0)
|
|
; GCN-NEXT: v_cmp_o_f32_e32 vcc, v2, v2
|
|
; GCN-NEXT: v_cndmask_b32_e64 v2, 0, -1, vcc
|
|
; GCN-NEXT: buffer_store_dword v2, v[0:1], s[0:3], 0 addr64
|
|
; GCN-NEXT: s_endpgm
|
|
%tid = call i32 @llvm.amdgcn.workitem.id.x() #0
|
|
%gep.in = getelementptr float, ptr addrspace(1) %in, i32 %tid
|
|
%gep.out = getelementptr i32, ptr addrspace(1) %out, i32 %tid
|
|
%val = load float, ptr addrspace(1) %gep.in
|
|
%cmp = fcmp ord float %val, 2.0
|
|
%ext = sext i1 %cmp to i32
|
|
store i32 %ext, ptr addrspace(1) %gep.out
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_kernel void @commute_ueq_2.0_f32(ptr addrspace(1) %out, ptr addrspace(1) %in) #1 {
|
|
; GCN-LABEL: commute_ueq_2.0_f32:
|
|
; GCN: ; %bb.0:
|
|
; GCN-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9
|
|
; GCN-NEXT: s_mov_b32 s7, 0xf000
|
|
; GCN-NEXT: s_mov_b32 s6, 0
|
|
; GCN-NEXT: v_lshlrev_b32_e32 v0, 2, v0
|
|
; GCN-NEXT: v_mov_b32_e32 v1, 0
|
|
; GCN-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GCN-NEXT: s_mov_b64 s[4:5], s[2:3]
|
|
; GCN-NEXT: buffer_load_dword v2, v[0:1], s[4:7], 0 addr64
|
|
; GCN-NEXT: s_mov_b64 s[2:3], s[6:7]
|
|
; GCN-NEXT: s_waitcnt vmcnt(0)
|
|
; GCN-NEXT: v_cmp_nlg_f32_e32 vcc, 2.0, v2
|
|
; GCN-NEXT: v_cndmask_b32_e64 v2, 0, -1, vcc
|
|
; GCN-NEXT: buffer_store_dword v2, v[0:1], s[0:3], 0 addr64
|
|
; GCN-NEXT: s_endpgm
|
|
%tid = call i32 @llvm.amdgcn.workitem.id.x() #0
|
|
%gep.in = getelementptr float, ptr addrspace(1) %in, i32 %tid
|
|
%gep.out = getelementptr i32, ptr addrspace(1) %out, i32 %tid
|
|
%val = load float, ptr addrspace(1) %gep.in
|
|
%cmp = fcmp ueq float %val, 2.0
|
|
%ext = sext i1 %cmp to i32
|
|
store i32 %ext, ptr addrspace(1) %gep.out
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_kernel void @commute_ugt_2.0_f32(ptr addrspace(1) %out, ptr addrspace(1) %in) #1 {
|
|
; GCN-LABEL: commute_ugt_2.0_f32:
|
|
; GCN: ; %bb.0:
|
|
; GCN-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9
|
|
; GCN-NEXT: s_mov_b32 s7, 0xf000
|
|
; GCN-NEXT: s_mov_b32 s6, 0
|
|
; GCN-NEXT: v_lshlrev_b32_e32 v0, 2, v0
|
|
; GCN-NEXT: v_mov_b32_e32 v1, 0
|
|
; GCN-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GCN-NEXT: s_mov_b64 s[4:5], s[2:3]
|
|
; GCN-NEXT: buffer_load_dword v2, v[0:1], s[4:7], 0 addr64
|
|
; GCN-NEXT: s_mov_b64 s[2:3], s[6:7]
|
|
; GCN-NEXT: s_waitcnt vmcnt(0)
|
|
; GCN-NEXT: v_cmp_nge_f32_e32 vcc, 2.0, v2
|
|
; GCN-NEXT: v_cndmask_b32_e64 v2, 0, -1, vcc
|
|
; GCN-NEXT: buffer_store_dword v2, v[0:1], s[0:3], 0 addr64
|
|
; GCN-NEXT: s_endpgm
|
|
%tid = call i32 @llvm.amdgcn.workitem.id.x() #0
|
|
%gep.in = getelementptr float, ptr addrspace(1) %in, i32 %tid
|
|
%gep.out = getelementptr i32, ptr addrspace(1) %out, i32 %tid
|
|
%val = load float, ptr addrspace(1) %gep.in
|
|
%cmp = fcmp ugt float %val, 2.0
|
|
%ext = sext i1 %cmp to i32
|
|
store i32 %ext, ptr addrspace(1) %gep.out
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_kernel void @commute_uge_2.0_f32(ptr addrspace(1) %out, ptr addrspace(1) %in) #1 {
|
|
; GCN-LABEL: commute_uge_2.0_f32:
|
|
; GCN: ; %bb.0:
|
|
; GCN-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9
|
|
; GCN-NEXT: s_mov_b32 s7, 0xf000
|
|
; GCN-NEXT: s_mov_b32 s6, 0
|
|
; GCN-NEXT: v_lshlrev_b32_e32 v0, 2, v0
|
|
; GCN-NEXT: v_mov_b32_e32 v1, 0
|
|
; GCN-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GCN-NEXT: s_mov_b64 s[4:5], s[2:3]
|
|
; GCN-NEXT: buffer_load_dword v2, v[0:1], s[4:7], 0 addr64
|
|
; GCN-NEXT: s_mov_b64 s[2:3], s[6:7]
|
|
; GCN-NEXT: s_waitcnt vmcnt(0)
|
|
; GCN-NEXT: v_cmp_ngt_f32_e32 vcc, 2.0, v2
|
|
; GCN-NEXT: v_cndmask_b32_e64 v2, 0, -1, vcc
|
|
; GCN-NEXT: buffer_store_dword v2, v[0:1], s[0:3], 0 addr64
|
|
; GCN-NEXT: s_endpgm
|
|
%tid = call i32 @llvm.amdgcn.workitem.id.x() #0
|
|
%gep.in = getelementptr float, ptr addrspace(1) %in, i32 %tid
|
|
%gep.out = getelementptr i32, ptr addrspace(1) %out, i32 %tid
|
|
%val = load float, ptr addrspace(1) %gep.in
|
|
%cmp = fcmp uge float %val, 2.0
|
|
%ext = sext i1 %cmp to i32
|
|
store i32 %ext, ptr addrspace(1) %gep.out
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_kernel void @commute_ult_2.0_f32(ptr addrspace(1) %out, ptr addrspace(1) %in) #1 {
|
|
; GCN-LABEL: commute_ult_2.0_f32:
|
|
; GCN: ; %bb.0:
|
|
; GCN-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9
|
|
; GCN-NEXT: s_mov_b32 s7, 0xf000
|
|
; GCN-NEXT: s_mov_b32 s6, 0
|
|
; GCN-NEXT: v_lshlrev_b32_e32 v0, 2, v0
|
|
; GCN-NEXT: v_mov_b32_e32 v1, 0
|
|
; GCN-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GCN-NEXT: s_mov_b64 s[4:5], s[2:3]
|
|
; GCN-NEXT: buffer_load_dword v2, v[0:1], s[4:7], 0 addr64
|
|
; GCN-NEXT: s_mov_b64 s[2:3], s[6:7]
|
|
; GCN-NEXT: s_waitcnt vmcnt(0)
|
|
; GCN-NEXT: v_cmp_nle_f32_e32 vcc, 2.0, v2
|
|
; GCN-NEXT: v_cndmask_b32_e64 v2, 0, -1, vcc
|
|
; GCN-NEXT: buffer_store_dword v2, v[0:1], s[0:3], 0 addr64
|
|
; GCN-NEXT: s_endpgm
|
|
%tid = call i32 @llvm.amdgcn.workitem.id.x() #0
|
|
%gep.in = getelementptr float, ptr addrspace(1) %in, i32 %tid
|
|
%gep.out = getelementptr i32, ptr addrspace(1) %out, i32 %tid
|
|
%val = load float, ptr addrspace(1) %gep.in
|
|
%cmp = fcmp ult float %val, 2.0
|
|
%ext = sext i1 %cmp to i32
|
|
store i32 %ext, ptr addrspace(1) %gep.out
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_kernel void @commute_ule_2.0_f32(ptr addrspace(1) %out, ptr addrspace(1) %in) #1 {
|
|
; GCN-LABEL: commute_ule_2.0_f32:
|
|
; GCN: ; %bb.0:
|
|
; GCN-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9
|
|
; GCN-NEXT: s_mov_b32 s7, 0xf000
|
|
; GCN-NEXT: s_mov_b32 s6, 0
|
|
; GCN-NEXT: v_lshlrev_b32_e32 v0, 2, v0
|
|
; GCN-NEXT: v_mov_b32_e32 v1, 0
|
|
; GCN-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GCN-NEXT: s_mov_b64 s[4:5], s[2:3]
|
|
; GCN-NEXT: buffer_load_dword v2, v[0:1], s[4:7], 0 addr64
|
|
; GCN-NEXT: s_mov_b64 s[2:3], s[6:7]
|
|
; GCN-NEXT: s_waitcnt vmcnt(0)
|
|
; GCN-NEXT: v_cmp_nlt_f32_e32 vcc, 2.0, v2
|
|
; GCN-NEXT: v_cndmask_b32_e64 v2, 0, -1, vcc
|
|
; GCN-NEXT: buffer_store_dword v2, v[0:1], s[0:3], 0 addr64
|
|
; GCN-NEXT: s_endpgm
|
|
%tid = call i32 @llvm.amdgcn.workitem.id.x() #0
|
|
%gep.in = getelementptr float, ptr addrspace(1) %in, i32 %tid
|
|
%gep.out = getelementptr i32, ptr addrspace(1) %out, i32 %tid
|
|
%val = load float, ptr addrspace(1) %gep.in
|
|
%cmp = fcmp ule float %val, 2.0
|
|
%ext = sext i1 %cmp to i32
|
|
store i32 %ext, ptr addrspace(1) %gep.out
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_kernel void @commute_une_2.0_f32(ptr addrspace(1) %out, ptr addrspace(1) %in) #1 {
|
|
; GCN-LABEL: commute_une_2.0_f32:
|
|
; GCN: ; %bb.0:
|
|
; GCN-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9
|
|
; GCN-NEXT: s_mov_b32 s7, 0xf000
|
|
; GCN-NEXT: s_mov_b32 s6, 0
|
|
; GCN-NEXT: v_lshlrev_b32_e32 v0, 2, v0
|
|
; GCN-NEXT: v_mov_b32_e32 v1, 0
|
|
; GCN-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GCN-NEXT: s_mov_b64 s[4:5], s[2:3]
|
|
; GCN-NEXT: buffer_load_dword v2, v[0:1], s[4:7], 0 addr64
|
|
; GCN-NEXT: s_mov_b64 s[2:3], s[6:7]
|
|
; GCN-NEXT: s_waitcnt vmcnt(0)
|
|
; GCN-NEXT: v_cmp_neq_f32_e32 vcc, 2.0, v2
|
|
; GCN-NEXT: v_cndmask_b32_e64 v2, 0, -1, vcc
|
|
; GCN-NEXT: buffer_store_dword v2, v[0:1], s[0:3], 0 addr64
|
|
; GCN-NEXT: s_endpgm
|
|
%tid = call i32 @llvm.amdgcn.workitem.id.x() #0
|
|
%gep.in = getelementptr float, ptr addrspace(1) %in, i32 %tid
|
|
%gep.out = getelementptr i32, ptr addrspace(1) %out, i32 %tid
|
|
%val = load float, ptr addrspace(1) %gep.in
|
|
%cmp = fcmp une float %val, 2.0
|
|
%ext = sext i1 %cmp to i32
|
|
store i32 %ext, ptr addrspace(1) %gep.out
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_kernel void @commute_uno_2.0_f32(ptr addrspace(1) %out, ptr addrspace(1) %in) #1 {
|
|
; GCN-LABEL: commute_uno_2.0_f32:
|
|
; GCN: ; %bb.0:
|
|
; GCN-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9
|
|
; GCN-NEXT: s_mov_b32 s7, 0xf000
|
|
; GCN-NEXT: s_mov_b32 s6, 0
|
|
; GCN-NEXT: v_lshlrev_b32_e32 v0, 2, v0
|
|
; GCN-NEXT: v_mov_b32_e32 v1, 0
|
|
; GCN-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GCN-NEXT: s_mov_b64 s[4:5], s[2:3]
|
|
; GCN-NEXT: buffer_load_dword v2, v[0:1], s[4:7], 0 addr64
|
|
; GCN-NEXT: s_mov_b64 s[2:3], s[6:7]
|
|
; GCN-NEXT: s_waitcnt vmcnt(0)
|
|
; GCN-NEXT: v_cmp_u_f32_e32 vcc, v2, v2
|
|
; GCN-NEXT: v_cndmask_b32_e64 v2, 0, -1, vcc
|
|
; GCN-NEXT: buffer_store_dword v2, v[0:1], s[0:3], 0 addr64
|
|
; GCN-NEXT: s_endpgm
|
|
%tid = call i32 @llvm.amdgcn.workitem.id.x() #0
|
|
%gep.in = getelementptr float, ptr addrspace(1) %in, i32 %tid
|
|
%gep.out = getelementptr i32, ptr addrspace(1) %out, i32 %tid
|
|
%val = load float, ptr addrspace(1) %gep.in
|
|
%cmp = fcmp uno float %val, 2.0
|
|
%ext = sext i1 %cmp to i32
|
|
store i32 %ext, ptr addrspace(1) %gep.out
|
|
ret void
|
|
}
|
|
|
|
; --------------------------------------------------------------------------------
|
|
; f64 compares
|
|
; --------------------------------------------------------------------------------
|
|
|
|
define amdgpu_kernel void @commute_oeq_2.0_f64(ptr addrspace(1) %out, ptr addrspace(1) %in) #1 {
|
|
; GCN-LABEL: commute_oeq_2.0_f64:
|
|
; GCN: ; %bb.0:
|
|
; GCN-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9
|
|
; GCN-NEXT: s_mov_b32 s7, 0xf000
|
|
; GCN-NEXT: s_mov_b32 s6, 0
|
|
; GCN-NEXT: v_lshlrev_b32_e32 v1, 3, v0
|
|
; GCN-NEXT: v_mov_b32_e32 v2, 0
|
|
; GCN-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GCN-NEXT: s_mov_b64 s[4:5], s[2:3]
|
|
; GCN-NEXT: buffer_load_dwordx2 v[3:4], v[1:2], s[4:7], 0 addr64
|
|
; GCN-NEXT: v_lshlrev_b32_e32 v1, 2, v0
|
|
; GCN-NEXT: s_mov_b64 s[2:3], s[6:7]
|
|
; GCN-NEXT: s_waitcnt vmcnt(0)
|
|
; GCN-NEXT: v_cmp_eq_f64_e32 vcc, 2.0, v[3:4]
|
|
; GCN-NEXT: v_cndmask_b32_e64 v0, 0, -1, vcc
|
|
; GCN-NEXT: buffer_store_dword v0, v[1:2], s[0:3], 0 addr64
|
|
; GCN-NEXT: s_endpgm
|
|
%tid = call i32 @llvm.amdgcn.workitem.id.x() #0
|
|
%gep.in = getelementptr double, ptr addrspace(1) %in, i32 %tid
|
|
%gep.out = getelementptr i32, ptr addrspace(1) %out, i32 %tid
|
|
%val = load double, ptr addrspace(1) %gep.in
|
|
%cmp = fcmp oeq double %val, 2.0
|
|
%ext = sext i1 %cmp to i32
|
|
store i32 %ext, ptr addrspace(1) %gep.out
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_kernel void @commute_ogt_2.0_f64(ptr addrspace(1) %out, ptr addrspace(1) %in) #1 {
|
|
; GCN-LABEL: commute_ogt_2.0_f64:
|
|
; GCN: ; %bb.0:
|
|
; GCN-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9
|
|
; GCN-NEXT: s_mov_b32 s7, 0xf000
|
|
; GCN-NEXT: s_mov_b32 s6, 0
|
|
; GCN-NEXT: v_lshlrev_b32_e32 v1, 3, v0
|
|
; GCN-NEXT: v_mov_b32_e32 v2, 0
|
|
; GCN-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GCN-NEXT: s_mov_b64 s[4:5], s[2:3]
|
|
; GCN-NEXT: buffer_load_dwordx2 v[3:4], v[1:2], s[4:7], 0 addr64
|
|
; GCN-NEXT: v_lshlrev_b32_e32 v1, 2, v0
|
|
; GCN-NEXT: s_mov_b64 s[2:3], s[6:7]
|
|
; GCN-NEXT: s_waitcnt vmcnt(0)
|
|
; GCN-NEXT: v_cmp_lt_f64_e32 vcc, 2.0, v[3:4]
|
|
; GCN-NEXT: v_cndmask_b32_e64 v0, 0, -1, vcc
|
|
; GCN-NEXT: buffer_store_dword v0, v[1:2], s[0:3], 0 addr64
|
|
; GCN-NEXT: s_endpgm
|
|
%tid = call i32 @llvm.amdgcn.workitem.id.x() #0
|
|
%gep.in = getelementptr double, ptr addrspace(1) %in, i32 %tid
|
|
%gep.out = getelementptr i32, ptr addrspace(1) %out, i32 %tid
|
|
%val = load double, ptr addrspace(1) %gep.in
|
|
%cmp = fcmp ogt double %val, 2.0
|
|
%ext = sext i1 %cmp to i32
|
|
store i32 %ext, ptr addrspace(1) %gep.out
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_kernel void @commute_oge_2.0_f64(ptr addrspace(1) %out, ptr addrspace(1) %in) #1 {
|
|
; GCN-LABEL: commute_oge_2.0_f64:
|
|
; GCN: ; %bb.0:
|
|
; GCN-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9
|
|
; GCN-NEXT: s_mov_b32 s7, 0xf000
|
|
; GCN-NEXT: s_mov_b32 s6, 0
|
|
; GCN-NEXT: v_lshlrev_b32_e32 v1, 3, v0
|
|
; GCN-NEXT: v_mov_b32_e32 v2, 0
|
|
; GCN-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GCN-NEXT: s_mov_b64 s[4:5], s[2:3]
|
|
; GCN-NEXT: buffer_load_dwordx2 v[3:4], v[1:2], s[4:7], 0 addr64
|
|
; GCN-NEXT: v_lshlrev_b32_e32 v1, 2, v0
|
|
; GCN-NEXT: s_mov_b64 s[2:3], s[6:7]
|
|
; GCN-NEXT: s_waitcnt vmcnt(0)
|
|
; GCN-NEXT: v_cmp_le_f64_e32 vcc, 2.0, v[3:4]
|
|
; GCN-NEXT: v_cndmask_b32_e64 v0, 0, -1, vcc
|
|
; GCN-NEXT: buffer_store_dword v0, v[1:2], s[0:3], 0 addr64
|
|
; GCN-NEXT: s_endpgm
|
|
%tid = call i32 @llvm.amdgcn.workitem.id.x() #0
|
|
%gep.in = getelementptr double, ptr addrspace(1) %in, i32 %tid
|
|
%gep.out = getelementptr i32, ptr addrspace(1) %out, i32 %tid
|
|
%val = load double, ptr addrspace(1) %gep.in
|
|
%cmp = fcmp oge double %val, 2.0
|
|
%ext = sext i1 %cmp to i32
|
|
store i32 %ext, ptr addrspace(1) %gep.out
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_kernel void @commute_olt_2.0_f64(ptr addrspace(1) %out, ptr addrspace(1) %in) #1 {
|
|
; GCN-LABEL: commute_olt_2.0_f64:
|
|
; GCN: ; %bb.0:
|
|
; GCN-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9
|
|
; GCN-NEXT: s_mov_b32 s7, 0xf000
|
|
; GCN-NEXT: s_mov_b32 s6, 0
|
|
; GCN-NEXT: v_lshlrev_b32_e32 v1, 3, v0
|
|
; GCN-NEXT: v_mov_b32_e32 v2, 0
|
|
; GCN-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GCN-NEXT: s_mov_b64 s[4:5], s[2:3]
|
|
; GCN-NEXT: buffer_load_dwordx2 v[3:4], v[1:2], s[4:7], 0 addr64
|
|
; GCN-NEXT: v_lshlrev_b32_e32 v1, 2, v0
|
|
; GCN-NEXT: s_mov_b64 s[2:3], s[6:7]
|
|
; GCN-NEXT: s_waitcnt vmcnt(0)
|
|
; GCN-NEXT: v_cmp_gt_f64_e32 vcc, 2.0, v[3:4]
|
|
; GCN-NEXT: v_cndmask_b32_e64 v0, 0, -1, vcc
|
|
; GCN-NEXT: buffer_store_dword v0, v[1:2], s[0:3], 0 addr64
|
|
; GCN-NEXT: s_endpgm
|
|
%tid = call i32 @llvm.amdgcn.workitem.id.x() #0
|
|
%gep.in = getelementptr double, ptr addrspace(1) %in, i32 %tid
|
|
%gep.out = getelementptr i32, ptr addrspace(1) %out, i32 %tid
|
|
%val = load double, ptr addrspace(1) %gep.in
|
|
%cmp = fcmp olt double %val, 2.0
|
|
%ext = sext i1 %cmp to i32
|
|
store i32 %ext, ptr addrspace(1) %gep.out
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_kernel void @commute_ole_2.0_f64(ptr addrspace(1) %out, ptr addrspace(1) %in) #1 {
|
|
; GCN-LABEL: commute_ole_2.0_f64:
|
|
; GCN: ; %bb.0:
|
|
; GCN-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9
|
|
; GCN-NEXT: s_mov_b32 s7, 0xf000
|
|
; GCN-NEXT: s_mov_b32 s6, 0
|
|
; GCN-NEXT: v_lshlrev_b32_e32 v1, 3, v0
|
|
; GCN-NEXT: v_mov_b32_e32 v2, 0
|
|
; GCN-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GCN-NEXT: s_mov_b64 s[4:5], s[2:3]
|
|
; GCN-NEXT: buffer_load_dwordx2 v[3:4], v[1:2], s[4:7], 0 addr64
|
|
; GCN-NEXT: v_lshlrev_b32_e32 v1, 2, v0
|
|
; GCN-NEXT: s_mov_b64 s[2:3], s[6:7]
|
|
; GCN-NEXT: s_waitcnt vmcnt(0)
|
|
; GCN-NEXT: v_cmp_ge_f64_e32 vcc, 2.0, v[3:4]
|
|
; GCN-NEXT: v_cndmask_b32_e64 v0, 0, -1, vcc
|
|
; GCN-NEXT: buffer_store_dword v0, v[1:2], s[0:3], 0 addr64
|
|
; GCN-NEXT: s_endpgm
|
|
%tid = call i32 @llvm.amdgcn.workitem.id.x() #0
|
|
%gep.in = getelementptr double, ptr addrspace(1) %in, i32 %tid
|
|
%gep.out = getelementptr i32, ptr addrspace(1) %out, i32 %tid
|
|
%val = load double, ptr addrspace(1) %gep.in
|
|
%cmp = fcmp ole double %val, 2.0
|
|
%ext = sext i1 %cmp to i32
|
|
store i32 %ext, ptr addrspace(1) %gep.out
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_kernel void @commute_one_2.0_f64(ptr addrspace(1) %out, ptr addrspace(1) %in) #1 {
|
|
; GCN-LABEL: commute_one_2.0_f64:
|
|
; GCN: ; %bb.0:
|
|
; GCN-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9
|
|
; GCN-NEXT: s_mov_b32 s7, 0xf000
|
|
; GCN-NEXT: s_mov_b32 s6, 0
|
|
; GCN-NEXT: v_lshlrev_b32_e32 v1, 3, v0
|
|
; GCN-NEXT: v_mov_b32_e32 v2, 0
|
|
; GCN-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GCN-NEXT: s_mov_b64 s[4:5], s[2:3]
|
|
; GCN-NEXT: buffer_load_dwordx2 v[3:4], v[1:2], s[4:7], 0 addr64
|
|
; GCN-NEXT: v_lshlrev_b32_e32 v1, 2, v0
|
|
; GCN-NEXT: s_mov_b64 s[2:3], s[6:7]
|
|
; GCN-NEXT: s_waitcnt vmcnt(0)
|
|
; GCN-NEXT: v_cmp_lg_f64_e32 vcc, 2.0, v[3:4]
|
|
; GCN-NEXT: v_cndmask_b32_e64 v0, 0, -1, vcc
|
|
; GCN-NEXT: buffer_store_dword v0, v[1:2], s[0:3], 0 addr64
|
|
; GCN-NEXT: s_endpgm
|
|
%tid = call i32 @llvm.amdgcn.workitem.id.x() #0
|
|
%gep.in = getelementptr double, ptr addrspace(1) %in, i32 %tid
|
|
%gep.out = getelementptr i32, ptr addrspace(1) %out, i32 %tid
|
|
%val = load double, ptr addrspace(1) %gep.in
|
|
%cmp = fcmp one double %val, 2.0
|
|
%ext = sext i1 %cmp to i32
|
|
store i32 %ext, ptr addrspace(1) %gep.out
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_kernel void @commute_ord_2.0_f64(ptr addrspace(1) %out, ptr addrspace(1) %in) #1 {
|
|
; GCN-LABEL: commute_ord_2.0_f64:
|
|
; GCN: ; %bb.0:
|
|
; GCN-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9
|
|
; GCN-NEXT: s_mov_b32 s7, 0xf000
|
|
; GCN-NEXT: s_mov_b32 s6, 0
|
|
; GCN-NEXT: v_lshlrev_b32_e32 v1, 3, v0
|
|
; GCN-NEXT: v_mov_b32_e32 v2, 0
|
|
; GCN-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GCN-NEXT: s_mov_b64 s[4:5], s[2:3]
|
|
; GCN-NEXT: buffer_load_dwordx2 v[3:4], v[1:2], s[4:7], 0 addr64
|
|
; GCN-NEXT: v_lshlrev_b32_e32 v1, 2, v0
|
|
; GCN-NEXT: s_mov_b64 s[2:3], s[6:7]
|
|
; GCN-NEXT: s_waitcnt vmcnt(0)
|
|
; GCN-NEXT: v_cmp_o_f64_e32 vcc, v[3:4], v[3:4]
|
|
; GCN-NEXT: v_cndmask_b32_e64 v0, 0, -1, vcc
|
|
; GCN-NEXT: buffer_store_dword v0, v[1:2], s[0:3], 0 addr64
|
|
; GCN-NEXT: s_endpgm
|
|
%tid = call i32 @llvm.amdgcn.workitem.id.x() #0
|
|
%gep.in = getelementptr double, ptr addrspace(1) %in, i32 %tid
|
|
%gep.out = getelementptr i32, ptr addrspace(1) %out, i32 %tid
|
|
%val = load double, ptr addrspace(1) %gep.in
|
|
%cmp = fcmp ord double %val, 2.0
|
|
%ext = sext i1 %cmp to i32
|
|
store i32 %ext, ptr addrspace(1) %gep.out
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_kernel void @commute_ueq_2.0_f64(ptr addrspace(1) %out, ptr addrspace(1) %in) #1 {
|
|
; GCN-LABEL: commute_ueq_2.0_f64:
|
|
; GCN: ; %bb.0:
|
|
; GCN-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9
|
|
; GCN-NEXT: s_mov_b32 s7, 0xf000
|
|
; GCN-NEXT: s_mov_b32 s6, 0
|
|
; GCN-NEXT: v_lshlrev_b32_e32 v1, 3, v0
|
|
; GCN-NEXT: v_mov_b32_e32 v2, 0
|
|
; GCN-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GCN-NEXT: s_mov_b64 s[4:5], s[2:3]
|
|
; GCN-NEXT: buffer_load_dwordx2 v[3:4], v[1:2], s[4:7], 0 addr64
|
|
; GCN-NEXT: v_lshlrev_b32_e32 v1, 2, v0
|
|
; GCN-NEXT: s_mov_b64 s[2:3], s[6:7]
|
|
; GCN-NEXT: s_waitcnt vmcnt(0)
|
|
; GCN-NEXT: v_cmp_nlg_f64_e32 vcc, 2.0, v[3:4]
|
|
; GCN-NEXT: v_cndmask_b32_e64 v0, 0, -1, vcc
|
|
; GCN-NEXT: buffer_store_dword v0, v[1:2], s[0:3], 0 addr64
|
|
; GCN-NEXT: s_endpgm
|
|
%tid = call i32 @llvm.amdgcn.workitem.id.x() #0
|
|
%gep.in = getelementptr double, ptr addrspace(1) %in, i32 %tid
|
|
%gep.out = getelementptr i32, ptr addrspace(1) %out, i32 %tid
|
|
%val = load double, ptr addrspace(1) %gep.in
|
|
%cmp = fcmp ueq double %val, 2.0
|
|
%ext = sext i1 %cmp to i32
|
|
store i32 %ext, ptr addrspace(1) %gep.out
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_kernel void @commute_ugt_2.0_f64(ptr addrspace(1) %out, ptr addrspace(1) %in) #1 {
|
|
; GCN-LABEL: commute_ugt_2.0_f64:
|
|
; GCN: ; %bb.0:
|
|
; GCN-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9
|
|
; GCN-NEXT: s_mov_b32 s7, 0xf000
|
|
; GCN-NEXT: s_mov_b32 s6, 0
|
|
; GCN-NEXT: v_lshlrev_b32_e32 v1, 3, v0
|
|
; GCN-NEXT: v_mov_b32_e32 v2, 0
|
|
; GCN-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GCN-NEXT: s_mov_b64 s[4:5], s[2:3]
|
|
; GCN-NEXT: buffer_load_dwordx2 v[3:4], v[1:2], s[4:7], 0 addr64
|
|
; GCN-NEXT: v_lshlrev_b32_e32 v1, 2, v0
|
|
; GCN-NEXT: s_mov_b64 s[2:3], s[6:7]
|
|
; GCN-NEXT: s_waitcnt vmcnt(0)
|
|
; GCN-NEXT: v_cmp_nge_f64_e32 vcc, 2.0, v[3:4]
|
|
; GCN-NEXT: v_cndmask_b32_e64 v0, 0, -1, vcc
|
|
; GCN-NEXT: buffer_store_dword v0, v[1:2], s[0:3], 0 addr64
|
|
; GCN-NEXT: s_endpgm
|
|
%tid = call i32 @llvm.amdgcn.workitem.id.x() #0
|
|
%gep.in = getelementptr double, ptr addrspace(1) %in, i32 %tid
|
|
%gep.out = getelementptr i32, ptr addrspace(1) %out, i32 %tid
|
|
%val = load double, ptr addrspace(1) %gep.in
|
|
%cmp = fcmp ugt double %val, 2.0
|
|
%ext = sext i1 %cmp to i32
|
|
store i32 %ext, ptr addrspace(1) %gep.out
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_kernel void @commute_uge_2.0_f64(ptr addrspace(1) %out, ptr addrspace(1) %in) #1 {
|
|
; GCN-LABEL: commute_uge_2.0_f64:
|
|
; GCN: ; %bb.0:
|
|
; GCN-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9
|
|
; GCN-NEXT: s_mov_b32 s7, 0xf000
|
|
; GCN-NEXT: s_mov_b32 s6, 0
|
|
; GCN-NEXT: v_lshlrev_b32_e32 v1, 3, v0
|
|
; GCN-NEXT: v_mov_b32_e32 v2, 0
|
|
; GCN-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GCN-NEXT: s_mov_b64 s[4:5], s[2:3]
|
|
; GCN-NEXT: buffer_load_dwordx2 v[3:4], v[1:2], s[4:7], 0 addr64
|
|
; GCN-NEXT: v_lshlrev_b32_e32 v1, 2, v0
|
|
; GCN-NEXT: s_mov_b64 s[2:3], s[6:7]
|
|
; GCN-NEXT: s_waitcnt vmcnt(0)
|
|
; GCN-NEXT: v_cmp_ngt_f64_e32 vcc, 2.0, v[3:4]
|
|
; GCN-NEXT: v_cndmask_b32_e64 v0, 0, -1, vcc
|
|
; GCN-NEXT: buffer_store_dword v0, v[1:2], s[0:3], 0 addr64
|
|
; GCN-NEXT: s_endpgm
|
|
%tid = call i32 @llvm.amdgcn.workitem.id.x() #0
|
|
%gep.in = getelementptr double, ptr addrspace(1) %in, i32 %tid
|
|
%gep.out = getelementptr i32, ptr addrspace(1) %out, i32 %tid
|
|
%val = load double, ptr addrspace(1) %gep.in
|
|
%cmp = fcmp uge double %val, 2.0
|
|
%ext = sext i1 %cmp to i32
|
|
store i32 %ext, ptr addrspace(1) %gep.out
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_kernel void @commute_ult_2.0_f64(ptr addrspace(1) %out, ptr addrspace(1) %in) #1 {
|
|
; GCN-LABEL: commute_ult_2.0_f64:
|
|
; GCN: ; %bb.0:
|
|
; GCN-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9
|
|
; GCN-NEXT: s_mov_b32 s7, 0xf000
|
|
; GCN-NEXT: s_mov_b32 s6, 0
|
|
; GCN-NEXT: v_lshlrev_b32_e32 v1, 3, v0
|
|
; GCN-NEXT: v_mov_b32_e32 v2, 0
|
|
; GCN-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GCN-NEXT: s_mov_b64 s[4:5], s[2:3]
|
|
; GCN-NEXT: buffer_load_dwordx2 v[3:4], v[1:2], s[4:7], 0 addr64
|
|
; GCN-NEXT: v_lshlrev_b32_e32 v1, 2, v0
|
|
; GCN-NEXT: s_mov_b64 s[2:3], s[6:7]
|
|
; GCN-NEXT: s_waitcnt vmcnt(0)
|
|
; GCN-NEXT: v_cmp_nle_f64_e32 vcc, 2.0, v[3:4]
|
|
; GCN-NEXT: v_cndmask_b32_e64 v0, 0, -1, vcc
|
|
; GCN-NEXT: buffer_store_dword v0, v[1:2], s[0:3], 0 addr64
|
|
; GCN-NEXT: s_endpgm
|
|
%tid = call i32 @llvm.amdgcn.workitem.id.x() #0
|
|
%gep.in = getelementptr double, ptr addrspace(1) %in, i32 %tid
|
|
%gep.out = getelementptr i32, ptr addrspace(1) %out, i32 %tid
|
|
%val = load double, ptr addrspace(1) %gep.in
|
|
%cmp = fcmp ult double %val, 2.0
|
|
%ext = sext i1 %cmp to i32
|
|
store i32 %ext, ptr addrspace(1) %gep.out
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_kernel void @commute_ule_2.0_f64(ptr addrspace(1) %out, ptr addrspace(1) %in) #1 {
|
|
; GCN-LABEL: commute_ule_2.0_f64:
|
|
; GCN: ; %bb.0:
|
|
; GCN-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9
|
|
; GCN-NEXT: s_mov_b32 s7, 0xf000
|
|
; GCN-NEXT: s_mov_b32 s6, 0
|
|
; GCN-NEXT: v_lshlrev_b32_e32 v1, 3, v0
|
|
; GCN-NEXT: v_mov_b32_e32 v2, 0
|
|
; GCN-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GCN-NEXT: s_mov_b64 s[4:5], s[2:3]
|
|
; GCN-NEXT: buffer_load_dwordx2 v[3:4], v[1:2], s[4:7], 0 addr64
|
|
; GCN-NEXT: v_lshlrev_b32_e32 v1, 2, v0
|
|
; GCN-NEXT: s_mov_b64 s[2:3], s[6:7]
|
|
; GCN-NEXT: s_waitcnt vmcnt(0)
|
|
; GCN-NEXT: v_cmp_nlt_f64_e32 vcc, 2.0, v[3:4]
|
|
; GCN-NEXT: v_cndmask_b32_e64 v0, 0, -1, vcc
|
|
; GCN-NEXT: buffer_store_dword v0, v[1:2], s[0:3], 0 addr64
|
|
; GCN-NEXT: s_endpgm
|
|
%tid = call i32 @llvm.amdgcn.workitem.id.x() #0
|
|
%gep.in = getelementptr double, ptr addrspace(1) %in, i32 %tid
|
|
%gep.out = getelementptr i32, ptr addrspace(1) %out, i32 %tid
|
|
%val = load double, ptr addrspace(1) %gep.in
|
|
%cmp = fcmp ule double %val, 2.0
|
|
%ext = sext i1 %cmp to i32
|
|
store i32 %ext, ptr addrspace(1) %gep.out
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_kernel void @commute_une_2.0_f64(ptr addrspace(1) %out, ptr addrspace(1) %in) #1 {
|
|
; GCN-LABEL: commute_une_2.0_f64:
|
|
; GCN: ; %bb.0:
|
|
; GCN-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9
|
|
; GCN-NEXT: s_mov_b32 s7, 0xf000
|
|
; GCN-NEXT: s_mov_b32 s6, 0
|
|
; GCN-NEXT: v_lshlrev_b32_e32 v1, 3, v0
|
|
; GCN-NEXT: v_mov_b32_e32 v2, 0
|
|
; GCN-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GCN-NEXT: s_mov_b64 s[4:5], s[2:3]
|
|
; GCN-NEXT: buffer_load_dwordx2 v[3:4], v[1:2], s[4:7], 0 addr64
|
|
; GCN-NEXT: v_lshlrev_b32_e32 v1, 2, v0
|
|
; GCN-NEXT: s_mov_b64 s[2:3], s[6:7]
|
|
; GCN-NEXT: s_waitcnt vmcnt(0)
|
|
; GCN-NEXT: v_cmp_neq_f64_e32 vcc, 2.0, v[3:4]
|
|
; GCN-NEXT: v_cndmask_b32_e64 v0, 0, -1, vcc
|
|
; GCN-NEXT: buffer_store_dword v0, v[1:2], s[0:3], 0 addr64
|
|
; GCN-NEXT: s_endpgm
|
|
%tid = call i32 @llvm.amdgcn.workitem.id.x() #0
|
|
%gep.in = getelementptr double, ptr addrspace(1) %in, i32 %tid
|
|
%gep.out = getelementptr i32, ptr addrspace(1) %out, i32 %tid
|
|
%val = load double, ptr addrspace(1) %gep.in
|
|
%cmp = fcmp une double %val, 2.0
|
|
%ext = sext i1 %cmp to i32
|
|
store i32 %ext, ptr addrspace(1) %gep.out
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_kernel void @commute_uno_2.0_f64(ptr addrspace(1) %out, ptr addrspace(1) %in) #1 {
|
|
; GCN-LABEL: commute_uno_2.0_f64:
|
|
; GCN: ; %bb.0:
|
|
; GCN-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9
|
|
; GCN-NEXT: s_mov_b32 s7, 0xf000
|
|
; GCN-NEXT: s_mov_b32 s6, 0
|
|
; GCN-NEXT: v_lshlrev_b32_e32 v1, 3, v0
|
|
; GCN-NEXT: v_mov_b32_e32 v2, 0
|
|
; GCN-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GCN-NEXT: s_mov_b64 s[4:5], s[2:3]
|
|
; GCN-NEXT: buffer_load_dwordx2 v[3:4], v[1:2], s[4:7], 0 addr64
|
|
; GCN-NEXT: v_lshlrev_b32_e32 v1, 2, v0
|
|
; GCN-NEXT: s_mov_b64 s[2:3], s[6:7]
|
|
; GCN-NEXT: s_waitcnt vmcnt(0)
|
|
; GCN-NEXT: v_cmp_u_f64_e32 vcc, v[3:4], v[3:4]
|
|
; GCN-NEXT: v_cndmask_b32_e64 v0, 0, -1, vcc
|
|
; GCN-NEXT: buffer_store_dword v0, v[1:2], s[0:3], 0 addr64
|
|
; GCN-NEXT: s_endpgm
|
|
%tid = call i32 @llvm.amdgcn.workitem.id.x() #0
|
|
%gep.in = getelementptr double, ptr addrspace(1) %in, i32 %tid
|
|
%gep.out = getelementptr i32, ptr addrspace(1) %out, i32 %tid
|
|
%val = load double, ptr addrspace(1) %gep.in
|
|
%cmp = fcmp uno double %val, 2.0
|
|
%ext = sext i1 %cmp to i32
|
|
store i32 %ext, ptr addrspace(1) %gep.out
|
|
ret void
|
|
}
|
|
|
|
|
|
; FIXME: Should be able to fold this frameindex
|
|
; Without commuting the frame index in the pre-regalloc run of
|
|
; SIShrinkInstructions, this was using the VOP3 compare.
|
|
|
|
define amdgpu_kernel void @commute_frameindex(ptr addrspace(1) nocapture %out) #0 {
|
|
; GCN-LABEL: commute_frameindex:
|
|
; GCN: ; %bb.0: ; %entry
|
|
; GCN-NEXT: s_mov_b32 s12, SCRATCH_RSRC_DWORD0
|
|
; GCN-NEXT: s_mov_b32 s13, SCRATCH_RSRC_DWORD1
|
|
; GCN-NEXT: s_mov_b32 s14, -1
|
|
; GCN-NEXT: s_mov_b32 s15, 0xe8f000
|
|
; GCN-NEXT: s_add_u32 s12, s12, s11
|
|
; GCN-NEXT: s_addc_u32 s13, s13, 0
|
|
; GCN-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9
|
|
; GCN-NEXT: s_mov_b32 s3, 0xf000
|
|
; GCN-NEXT: s_mov_b32 s2, -1
|
|
; GCN-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GCN-NEXT: buffer_load_dword v0, off, s[0:3], 0 glc
|
|
; GCN-NEXT: s_waitcnt vmcnt(0)
|
|
; GCN-NEXT: s_mov_b32 s4, 0
|
|
; GCN-NEXT: v_cmp_eq_u32_e32 vcc, s4, v0
|
|
; GCN-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc
|
|
; GCN-NEXT: buffer_store_dword v0, off, s[0:3], 0
|
|
; GCN-NEXT: s_waitcnt vmcnt(0)
|
|
; GCN-NEXT: s_endpgm
|
|
entry:
|
|
%stack0 = alloca i32, addrspace(5)
|
|
%ptr0 = load volatile ptr addrspace(5), ptr addrspace(1) poison
|
|
%eq = icmp eq ptr addrspace(5) %ptr0, %stack0
|
|
%ext = zext i1 %eq to i32
|
|
store volatile i32 %ext, ptr addrspace(1) %out
|
|
ret void
|
|
}
|
|
|
|
attributes #0 = { nounwind readnone }
|
|
attributes #1 = { nounwind }
|