llvm-project/llvm/test/CodeGen/AMDGPU/commute-compares.ll
Shilei Tian fc0653f31c
[RFC][NFC][AMDGPU] Remove -verify-machineinstrs from llvm/test/CodeGen/AMDGPU/*.ll (#150024)
Recent upstream trends have moved away from explicitly using `-verify-machineinstrs`, as it's already covered by the expensive checks. This PR removes almost all `-verify-machineinstrs` from tests in `llvm/test/CodeGen/AMDGPU/*.ll`, leaving only those tests where its removal currently causes failures.
2025-07-23 13:42:46 -04:00

1467 lines
58 KiB
LLVM

; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
; RUN: llc -mtriple=amdgcn -amdgpu-sdwa-peephole=0 < %s | FileCheck -check-prefix=GCN %s
declare i32 @llvm.amdgcn.workitem.id.x() #0
; --------------------------------------------------------------------------------
; i32 compares
; --------------------------------------------------------------------------------
define amdgpu_kernel void @commute_eq_64_i32(ptr addrspace(1) %out, ptr addrspace(1) %in) #1 {
; GCN-LABEL: commute_eq_64_i32:
; GCN: ; %bb.0:
; GCN-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9
; GCN-NEXT: s_mov_b32 s7, 0xf000
; GCN-NEXT: s_mov_b32 s6, 0
; GCN-NEXT: v_lshlrev_b32_e32 v0, 2, v0
; GCN-NEXT: v_mov_b32_e32 v1, 0
; GCN-NEXT: s_waitcnt lgkmcnt(0)
; GCN-NEXT: s_mov_b64 s[4:5], s[2:3]
; GCN-NEXT: buffer_load_dword v2, v[0:1], s[4:7], 0 addr64
; GCN-NEXT: s_mov_b64 s[2:3], s[6:7]
; GCN-NEXT: s_waitcnt vmcnt(0)
; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 64, v2
; GCN-NEXT: v_cndmask_b32_e64 v2, 0, -1, vcc
; GCN-NEXT: buffer_store_dword v2, v[0:1], s[0:3], 0 addr64
; GCN-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x() #0
%gep.in = getelementptr i32, ptr addrspace(1) %in, i32 %tid
%gep.out = getelementptr i32, ptr addrspace(1) %out, i32 %tid
%val = load i32, ptr addrspace(1) %gep.in
%cmp = icmp eq i32 %val, 64
%ext = sext i1 %cmp to i32
store i32 %ext, ptr addrspace(1) %gep.out
ret void
}
define amdgpu_kernel void @commute_ne_64_i32(ptr addrspace(1) %out, ptr addrspace(1) %in) #1 {
; GCN-LABEL: commute_ne_64_i32:
; GCN: ; %bb.0:
; GCN-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9
; GCN-NEXT: s_mov_b32 s7, 0xf000
; GCN-NEXT: s_mov_b32 s6, 0
; GCN-NEXT: v_lshlrev_b32_e32 v0, 2, v0
; GCN-NEXT: v_mov_b32_e32 v1, 0
; GCN-NEXT: s_waitcnt lgkmcnt(0)
; GCN-NEXT: s_mov_b64 s[4:5], s[2:3]
; GCN-NEXT: buffer_load_dword v2, v[0:1], s[4:7], 0 addr64
; GCN-NEXT: s_mov_b64 s[2:3], s[6:7]
; GCN-NEXT: s_waitcnt vmcnt(0)
; GCN-NEXT: v_cmp_ne_u32_e32 vcc, 64, v2
; GCN-NEXT: v_cndmask_b32_e64 v2, 0, -1, vcc
; GCN-NEXT: buffer_store_dword v2, v[0:1], s[0:3], 0 addr64
; GCN-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x() #0
%gep.in = getelementptr i32, ptr addrspace(1) %in, i32 %tid
%gep.out = getelementptr i32, ptr addrspace(1) %out, i32 %tid
%val = load i32, ptr addrspace(1) %gep.in
%cmp = icmp ne i32 %val, 64
%ext = sext i1 %cmp to i32
store i32 %ext, ptr addrspace(1) %gep.out
ret void
}
; FIXME: Why isn't this being folded as a constant?
define amdgpu_kernel void @commute_ne_litk_i32(ptr addrspace(1) %out, ptr addrspace(1) %in) #1 {
; GCN-LABEL: commute_ne_litk_i32:
; GCN: ; %bb.0:
; GCN-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9
; GCN-NEXT: s_mov_b32 s7, 0xf000
; GCN-NEXT: s_mov_b32 s6, 0
; GCN-NEXT: v_lshlrev_b32_e32 v0, 2, v0
; GCN-NEXT: v_mov_b32_e32 v1, 0
; GCN-NEXT: s_waitcnt lgkmcnt(0)
; GCN-NEXT: s_mov_b64 s[4:5], s[2:3]
; GCN-NEXT: buffer_load_dword v2, v[0:1], s[4:7], 0 addr64
; GCN-NEXT: s_movk_i32 s4, 0x3039
; GCN-NEXT: s_mov_b64 s[2:3], s[6:7]
; GCN-NEXT: s_waitcnt vmcnt(0)
; GCN-NEXT: v_cmp_ne_u32_e32 vcc, s4, v2
; GCN-NEXT: v_cndmask_b32_e64 v2, 0, -1, vcc
; GCN-NEXT: buffer_store_dword v2, v[0:1], s[0:3], 0 addr64
; GCN-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x() #0
%gep.in = getelementptr i32, ptr addrspace(1) %in, i32 %tid
%gep.out = getelementptr i32, ptr addrspace(1) %out, i32 %tid
%val = load i32, ptr addrspace(1) %gep.in
%cmp = icmp ne i32 %val, 12345
%ext = sext i1 %cmp to i32
store i32 %ext, ptr addrspace(1) %gep.out
ret void
}
define amdgpu_kernel void @commute_ugt_64_i32(ptr addrspace(1) %out, ptr addrspace(1) %in) #1 {
; GCN-LABEL: commute_ugt_64_i32:
; GCN: ; %bb.0:
; GCN-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9
; GCN-NEXT: s_mov_b32 s7, 0xf000
; GCN-NEXT: s_mov_b32 s6, 0
; GCN-NEXT: v_lshlrev_b32_e32 v0, 2, v0
; GCN-NEXT: v_mov_b32_e32 v1, 0
; GCN-NEXT: s_waitcnt lgkmcnt(0)
; GCN-NEXT: s_mov_b64 s[4:5], s[2:3]
; GCN-NEXT: buffer_load_dword v2, v[0:1], s[4:7], 0 addr64
; GCN-NEXT: s_mov_b64 s[2:3], s[6:7]
; GCN-NEXT: s_waitcnt vmcnt(0)
; GCN-NEXT: v_cmp_lt_u32_e32 vcc, 64, v2
; GCN-NEXT: v_cndmask_b32_e64 v2, 0, -1, vcc
; GCN-NEXT: buffer_store_dword v2, v[0:1], s[0:3], 0 addr64
; GCN-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x() #0
%gep.in = getelementptr i32, ptr addrspace(1) %in, i32 %tid
%gep.out = getelementptr i32, ptr addrspace(1) %out, i32 %tid
%val = load i32, ptr addrspace(1) %gep.in
%cmp = icmp ugt i32 %val, 64
%ext = sext i1 %cmp to i32
store i32 %ext, ptr addrspace(1) %gep.out
ret void
}
define amdgpu_kernel void @commute_uge_64_i32(ptr addrspace(1) %out, ptr addrspace(1) %in) #1 {
; GCN-LABEL: commute_uge_64_i32:
; GCN: ; %bb.0:
; GCN-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9
; GCN-NEXT: s_mov_b32 s7, 0xf000
; GCN-NEXT: s_mov_b32 s6, 0
; GCN-NEXT: v_lshlrev_b32_e32 v0, 2, v0
; GCN-NEXT: v_mov_b32_e32 v1, 0
; GCN-NEXT: s_waitcnt lgkmcnt(0)
; GCN-NEXT: s_mov_b64 s[4:5], s[2:3]
; GCN-NEXT: buffer_load_dword v2, v[0:1], s[4:7], 0 addr64
; GCN-NEXT: s_mov_b64 s[2:3], s[6:7]
; GCN-NEXT: s_waitcnt vmcnt(0)
; GCN-NEXT: v_cmp_lt_u32_e32 vcc, 63, v2
; GCN-NEXT: v_cndmask_b32_e64 v2, 0, -1, vcc
; GCN-NEXT: buffer_store_dword v2, v[0:1], s[0:3], 0 addr64
; GCN-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x() #0
%gep.in = getelementptr i32, ptr addrspace(1) %in, i32 %tid
%gep.out = getelementptr i32, ptr addrspace(1) %out, i32 %tid
%val = load i32, ptr addrspace(1) %gep.in
%cmp = icmp uge i32 %val, 64
%ext = sext i1 %cmp to i32
store i32 %ext, ptr addrspace(1) %gep.out
ret void
}
define amdgpu_kernel void @commute_ult_64_i32(ptr addrspace(1) %out, ptr addrspace(1) %in) #1 {
; GCN-LABEL: commute_ult_64_i32:
; GCN: ; %bb.0:
; GCN-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9
; GCN-NEXT: s_mov_b32 s7, 0xf000
; GCN-NEXT: s_mov_b32 s6, 0
; GCN-NEXT: v_lshlrev_b32_e32 v0, 2, v0
; GCN-NEXT: v_mov_b32_e32 v1, 0
; GCN-NEXT: s_waitcnt lgkmcnt(0)
; GCN-NEXT: s_mov_b64 s[4:5], s[2:3]
; GCN-NEXT: buffer_load_dword v2, v[0:1], s[4:7], 0 addr64
; GCN-NEXT: s_mov_b64 s[2:3], s[6:7]
; GCN-NEXT: s_waitcnt vmcnt(0)
; GCN-NEXT: v_cmp_gt_u32_e32 vcc, 64, v2
; GCN-NEXT: v_cndmask_b32_e64 v2, 0, -1, vcc
; GCN-NEXT: buffer_store_dword v2, v[0:1], s[0:3], 0 addr64
; GCN-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x() #0
%gep.in = getelementptr i32, ptr addrspace(1) %in, i32 %tid
%gep.out = getelementptr i32, ptr addrspace(1) %out, i32 %tid
%val = load i32, ptr addrspace(1) %gep.in
%cmp = icmp ult i32 %val, 64
%ext = sext i1 %cmp to i32
store i32 %ext, ptr addrspace(1) %gep.out
ret void
}
define amdgpu_kernel void @commute_ule_63_i32(ptr addrspace(1) %out, ptr addrspace(1) %in) #1 {
; GCN-LABEL: commute_ule_63_i32:
; GCN: ; %bb.0:
; GCN-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9
; GCN-NEXT: s_mov_b32 s7, 0xf000
; GCN-NEXT: s_mov_b32 s6, 0
; GCN-NEXT: v_lshlrev_b32_e32 v0, 2, v0
; GCN-NEXT: v_mov_b32_e32 v1, 0
; GCN-NEXT: s_waitcnt lgkmcnt(0)
; GCN-NEXT: s_mov_b64 s[4:5], s[2:3]
; GCN-NEXT: buffer_load_dword v2, v[0:1], s[4:7], 0 addr64
; GCN-NEXT: s_mov_b64 s[2:3], s[6:7]
; GCN-NEXT: s_waitcnt vmcnt(0)
; GCN-NEXT: v_cmp_gt_u32_e32 vcc, 64, v2
; GCN-NEXT: v_cndmask_b32_e64 v2, 0, -1, vcc
; GCN-NEXT: buffer_store_dword v2, v[0:1], s[0:3], 0 addr64
; GCN-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x() #0
%gep.in = getelementptr i32, ptr addrspace(1) %in, i32 %tid
%gep.out = getelementptr i32, ptr addrspace(1) %out, i32 %tid
%val = load i32, ptr addrspace(1) %gep.in
%cmp = icmp ule i32 %val, 63
%ext = sext i1 %cmp to i32
store i32 %ext, ptr addrspace(1) %gep.out
ret void
}
define amdgpu_kernel void @commute_ule_64_i32(ptr addrspace(1) %out, ptr addrspace(1) %in) #1 {
; GCN-LABEL: commute_ule_64_i32:
; GCN: ; %bb.0:
; GCN-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9
; GCN-NEXT: s_mov_b32 s7, 0xf000
; GCN-NEXT: s_mov_b32 s6, 0
; GCN-NEXT: v_lshlrev_b32_e32 v0, 2, v0
; GCN-NEXT: v_mov_b32_e32 v1, 0
; GCN-NEXT: s_waitcnt lgkmcnt(0)
; GCN-NEXT: s_mov_b64 s[4:5], s[2:3]
; GCN-NEXT: buffer_load_dword v2, v[0:1], s[4:7], 0 addr64
; GCN-NEXT: s_movk_i32 s4, 0x41
; GCN-NEXT: s_mov_b64 s[2:3], s[6:7]
; GCN-NEXT: s_waitcnt vmcnt(0)
; GCN-NEXT: v_cmp_gt_u32_e32 vcc, s4, v2
; GCN-NEXT: v_cndmask_b32_e64 v2, 0, -1, vcc
; GCN-NEXT: buffer_store_dword v2, v[0:1], s[0:3], 0 addr64
; GCN-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x() #0
%gep.in = getelementptr i32, ptr addrspace(1) %in, i32 %tid
%gep.out = getelementptr i32, ptr addrspace(1) %out, i32 %tid
%val = load i32, ptr addrspace(1) %gep.in
%cmp = icmp ule i32 %val, 64
%ext = sext i1 %cmp to i32
store i32 %ext, ptr addrspace(1) %gep.out
ret void
}
define amdgpu_kernel void @commute_sgt_neg1_i32(ptr addrspace(1) %out, ptr addrspace(1) %in) #1 {
; GCN-LABEL: commute_sgt_neg1_i32:
; GCN: ; %bb.0:
; GCN-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9
; GCN-NEXT: s_mov_b32 s7, 0xf000
; GCN-NEXT: s_mov_b32 s6, 0
; GCN-NEXT: v_lshlrev_b32_e32 v0, 2, v0
; GCN-NEXT: v_mov_b32_e32 v1, 0
; GCN-NEXT: s_waitcnt lgkmcnt(0)
; GCN-NEXT: s_mov_b64 s[4:5], s[2:3]
; GCN-NEXT: buffer_load_dword v2, v[0:1], s[4:7], 0 addr64
; GCN-NEXT: s_mov_b64 s[2:3], s[6:7]
; GCN-NEXT: s_waitcnt vmcnt(0)
; GCN-NEXT: v_not_b32_e32 v2, v2
; GCN-NEXT: v_ashrrev_i32_e32 v2, 31, v2
; GCN-NEXT: buffer_store_dword v2, v[0:1], s[0:3], 0 addr64
; GCN-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x() #0
%gep.in = getelementptr i32, ptr addrspace(1) %in, i32 %tid
%gep.out = getelementptr i32, ptr addrspace(1) %out, i32 %tid
%val = load i32, ptr addrspace(1) %gep.in
%cmp = icmp sgt i32 %val, -1
%ext = sext i1 %cmp to i32
store i32 %ext, ptr addrspace(1) %gep.out
ret void
}
define amdgpu_kernel void @commute_sge_neg2_i32(ptr addrspace(1) %out, ptr addrspace(1) %in) #1 {
; GCN-LABEL: commute_sge_neg2_i32:
; GCN: ; %bb.0:
; GCN-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9
; GCN-NEXT: s_mov_b32 s7, 0xf000
; GCN-NEXT: s_mov_b32 s6, 0
; GCN-NEXT: v_lshlrev_b32_e32 v0, 2, v0
; GCN-NEXT: v_mov_b32_e32 v1, 0
; GCN-NEXT: s_waitcnt lgkmcnt(0)
; GCN-NEXT: s_mov_b64 s[4:5], s[2:3]
; GCN-NEXT: buffer_load_dword v2, v[0:1], s[4:7], 0 addr64
; GCN-NEXT: s_mov_b64 s[2:3], s[6:7]
; GCN-NEXT: s_waitcnt vmcnt(0)
; GCN-NEXT: v_cmp_lt_i32_e32 vcc, -3, v2
; GCN-NEXT: v_cndmask_b32_e64 v2, 0, -1, vcc
; GCN-NEXT: buffer_store_dword v2, v[0:1], s[0:3], 0 addr64
; GCN-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x() #0
%gep.in = getelementptr i32, ptr addrspace(1) %in, i32 %tid
%gep.out = getelementptr i32, ptr addrspace(1) %out, i32 %tid
%val = load i32, ptr addrspace(1) %gep.in
%cmp = icmp sge i32 %val, -2
%ext = sext i1 %cmp to i32
store i32 %ext, ptr addrspace(1) %gep.out
ret void
}
define amdgpu_kernel void @commute_slt_neg16_i32(ptr addrspace(1) %out, ptr addrspace(1) %in) #1 {
; GCN-LABEL: commute_slt_neg16_i32:
; GCN: ; %bb.0:
; GCN-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9
; GCN-NEXT: s_mov_b32 s7, 0xf000
; GCN-NEXT: s_mov_b32 s6, 0
; GCN-NEXT: v_lshlrev_b32_e32 v0, 2, v0
; GCN-NEXT: v_mov_b32_e32 v1, 0
; GCN-NEXT: s_waitcnt lgkmcnt(0)
; GCN-NEXT: s_mov_b64 s[4:5], s[2:3]
; GCN-NEXT: buffer_load_dword v2, v[0:1], s[4:7], 0 addr64
; GCN-NEXT: s_mov_b64 s[2:3], s[6:7]
; GCN-NEXT: s_waitcnt vmcnt(0)
; GCN-NEXT: v_cmp_gt_i32_e32 vcc, -16, v2
; GCN-NEXT: v_cndmask_b32_e64 v2, 0, -1, vcc
; GCN-NEXT: buffer_store_dword v2, v[0:1], s[0:3], 0 addr64
; GCN-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x() #0
%gep.in = getelementptr i32, ptr addrspace(1) %in, i32 %tid
%gep.out = getelementptr i32, ptr addrspace(1) %out, i32 %tid
%val = load i32, ptr addrspace(1) %gep.in
%cmp = icmp slt i32 %val, -16
%ext = sext i1 %cmp to i32
store i32 %ext, ptr addrspace(1) %gep.out
ret void
}
define amdgpu_kernel void @commute_sle_5_i32(ptr addrspace(1) %out, ptr addrspace(1) %in) #1 {
; GCN-LABEL: commute_sle_5_i32:
; GCN: ; %bb.0:
; GCN-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9
; GCN-NEXT: s_mov_b32 s7, 0xf000
; GCN-NEXT: s_mov_b32 s6, 0
; GCN-NEXT: v_lshlrev_b32_e32 v0, 2, v0
; GCN-NEXT: v_mov_b32_e32 v1, 0
; GCN-NEXT: s_waitcnt lgkmcnt(0)
; GCN-NEXT: s_mov_b64 s[4:5], s[2:3]
; GCN-NEXT: buffer_load_dword v2, v[0:1], s[4:7], 0 addr64
; GCN-NEXT: s_mov_b64 s[2:3], s[6:7]
; GCN-NEXT: s_waitcnt vmcnt(0)
; GCN-NEXT: v_cmp_gt_i32_e32 vcc, 6, v2
; GCN-NEXT: v_cndmask_b32_e64 v2, 0, -1, vcc
; GCN-NEXT: buffer_store_dword v2, v[0:1], s[0:3], 0 addr64
; GCN-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x() #0
%gep.in = getelementptr i32, ptr addrspace(1) %in, i32 %tid
%gep.out = getelementptr i32, ptr addrspace(1) %out, i32 %tid
%val = load i32, ptr addrspace(1) %gep.in
%cmp = icmp sle i32 %val, 5
%ext = sext i1 %cmp to i32
store i32 %ext, ptr addrspace(1) %gep.out
ret void
}
; --------------------------------------------------------------------------------
; i64 compares
; --------------------------------------------------------------------------------
define amdgpu_kernel void @commute_eq_64_i64(ptr addrspace(1) %out, ptr addrspace(1) %in) #1 {
; GCN-LABEL: commute_eq_64_i64:
; GCN: ; %bb.0:
; GCN-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9
; GCN-NEXT: s_mov_b32 s7, 0xf000
; GCN-NEXT: s_mov_b32 s6, 0
; GCN-NEXT: v_lshlrev_b32_e32 v1, 3, v0
; GCN-NEXT: v_mov_b32_e32 v2, 0
; GCN-NEXT: s_waitcnt lgkmcnt(0)
; GCN-NEXT: s_mov_b64 s[4:5], s[2:3]
; GCN-NEXT: buffer_load_dwordx2 v[3:4], v[1:2], s[4:7], 0 addr64
; GCN-NEXT: v_lshlrev_b32_e32 v1, 2, v0
; GCN-NEXT: s_mov_b64 s[2:3], s[6:7]
; GCN-NEXT: s_waitcnt vmcnt(0)
; GCN-NEXT: v_cmp_eq_u64_e32 vcc, 64, v[3:4]
; GCN-NEXT: v_cndmask_b32_e64 v0, 0, -1, vcc
; GCN-NEXT: buffer_store_dword v0, v[1:2], s[0:3], 0 addr64
; GCN-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x() #0
%gep.in = getelementptr i64, ptr addrspace(1) %in, i32 %tid
%gep.out = getelementptr i32, ptr addrspace(1) %out, i32 %tid
%val = load i64, ptr addrspace(1) %gep.in
%cmp = icmp eq i64 %val, 64
%ext = sext i1 %cmp to i32
store i32 %ext, ptr addrspace(1) %gep.out
ret void
}
define amdgpu_kernel void @commute_ne_64_i64(ptr addrspace(1) %out, ptr addrspace(1) %in) #1 {
; GCN-LABEL: commute_ne_64_i64:
; GCN: ; %bb.0:
; GCN-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9
; GCN-NEXT: s_mov_b32 s7, 0xf000
; GCN-NEXT: s_mov_b32 s6, 0
; GCN-NEXT: v_lshlrev_b32_e32 v1, 3, v0
; GCN-NEXT: v_mov_b32_e32 v2, 0
; GCN-NEXT: s_waitcnt lgkmcnt(0)
; GCN-NEXT: s_mov_b64 s[4:5], s[2:3]
; GCN-NEXT: buffer_load_dwordx2 v[3:4], v[1:2], s[4:7], 0 addr64
; GCN-NEXT: v_lshlrev_b32_e32 v1, 2, v0
; GCN-NEXT: s_mov_b64 s[2:3], s[6:7]
; GCN-NEXT: s_waitcnt vmcnt(0)
; GCN-NEXT: v_cmp_ne_u64_e32 vcc, 64, v[3:4]
; GCN-NEXT: v_cndmask_b32_e64 v0, 0, -1, vcc
; GCN-NEXT: buffer_store_dword v0, v[1:2], s[0:3], 0 addr64
; GCN-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x() #0
%gep.in = getelementptr i64, ptr addrspace(1) %in, i32 %tid
%gep.out = getelementptr i32, ptr addrspace(1) %out, i32 %tid
%val = load i64, ptr addrspace(1) %gep.in
%cmp = icmp ne i64 %val, 64
%ext = sext i1 %cmp to i32
store i32 %ext, ptr addrspace(1) %gep.out
ret void
}
define amdgpu_kernel void @commute_ugt_64_i64(ptr addrspace(1) %out, ptr addrspace(1) %in) #1 {
; GCN-LABEL: commute_ugt_64_i64:
; GCN: ; %bb.0:
; GCN-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9
; GCN-NEXT: s_mov_b32 s7, 0xf000
; GCN-NEXT: s_mov_b32 s6, 0
; GCN-NEXT: v_lshlrev_b32_e32 v1, 3, v0
; GCN-NEXT: v_mov_b32_e32 v2, 0
; GCN-NEXT: s_waitcnt lgkmcnt(0)
; GCN-NEXT: s_mov_b64 s[4:5], s[2:3]
; GCN-NEXT: buffer_load_dwordx2 v[3:4], v[1:2], s[4:7], 0 addr64
; GCN-NEXT: v_lshlrev_b32_e32 v1, 2, v0
; GCN-NEXT: s_mov_b64 s[2:3], s[6:7]
; GCN-NEXT: s_waitcnt vmcnt(0)
; GCN-NEXT: v_cmp_lt_u64_e32 vcc, 64, v[3:4]
; GCN-NEXT: v_cndmask_b32_e64 v0, 0, -1, vcc
; GCN-NEXT: buffer_store_dword v0, v[1:2], s[0:3], 0 addr64
; GCN-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x() #0
%gep.in = getelementptr i64, ptr addrspace(1) %in, i32 %tid
%gep.out = getelementptr i32, ptr addrspace(1) %out, i32 %tid
%val = load i64, ptr addrspace(1) %gep.in
%cmp = icmp ugt i64 %val, 64
%ext = sext i1 %cmp to i32
store i32 %ext, ptr addrspace(1) %gep.out
ret void
}
define amdgpu_kernel void @commute_uge_64_i64(ptr addrspace(1) %out, ptr addrspace(1) %in) #1 {
; GCN-LABEL: commute_uge_64_i64:
; GCN: ; %bb.0:
; GCN-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9
; GCN-NEXT: s_mov_b32 s7, 0xf000
; GCN-NEXT: s_mov_b32 s6, 0
; GCN-NEXT: v_lshlrev_b32_e32 v1, 3, v0
; GCN-NEXT: v_mov_b32_e32 v2, 0
; GCN-NEXT: s_waitcnt lgkmcnt(0)
; GCN-NEXT: s_mov_b64 s[4:5], s[2:3]
; GCN-NEXT: buffer_load_dwordx2 v[3:4], v[1:2], s[4:7], 0 addr64
; GCN-NEXT: v_lshlrev_b32_e32 v1, 2, v0
; GCN-NEXT: s_mov_b64 s[2:3], s[6:7]
; GCN-NEXT: s_waitcnt vmcnt(0)
; GCN-NEXT: v_cmp_lt_u64_e32 vcc, 63, v[3:4]
; GCN-NEXT: v_cndmask_b32_e64 v0, 0, -1, vcc
; GCN-NEXT: buffer_store_dword v0, v[1:2], s[0:3], 0 addr64
; GCN-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x() #0
%gep.in = getelementptr i64, ptr addrspace(1) %in, i32 %tid
%gep.out = getelementptr i32, ptr addrspace(1) %out, i32 %tid
%val = load i64, ptr addrspace(1) %gep.in
%cmp = icmp uge i64 %val, 64
%ext = sext i1 %cmp to i32
store i32 %ext, ptr addrspace(1) %gep.out
ret void
}
define amdgpu_kernel void @commute_ult_64_i64(ptr addrspace(1) %out, ptr addrspace(1) %in) #1 {
; GCN-LABEL: commute_ult_64_i64:
; GCN: ; %bb.0:
; GCN-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9
; GCN-NEXT: s_mov_b32 s7, 0xf000
; GCN-NEXT: s_mov_b32 s6, 0
; GCN-NEXT: v_lshlrev_b32_e32 v1, 3, v0
; GCN-NEXT: v_mov_b32_e32 v2, 0
; GCN-NEXT: s_waitcnt lgkmcnt(0)
; GCN-NEXT: s_mov_b64 s[4:5], s[2:3]
; GCN-NEXT: buffer_load_dwordx2 v[3:4], v[1:2], s[4:7], 0 addr64
; GCN-NEXT: v_lshlrev_b32_e32 v1, 2, v0
; GCN-NEXT: s_mov_b64 s[2:3], s[6:7]
; GCN-NEXT: s_waitcnt vmcnt(0)
; GCN-NEXT: v_cmp_gt_u64_e32 vcc, 64, v[3:4]
; GCN-NEXT: v_cndmask_b32_e64 v0, 0, -1, vcc
; GCN-NEXT: buffer_store_dword v0, v[1:2], s[0:3], 0 addr64
; GCN-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x() #0
%gep.in = getelementptr i64, ptr addrspace(1) %in, i32 %tid
%gep.out = getelementptr i32, ptr addrspace(1) %out, i32 %tid
%val = load i64, ptr addrspace(1) %gep.in
%cmp = icmp ult i64 %val, 64
%ext = sext i1 %cmp to i32
store i32 %ext, ptr addrspace(1) %gep.out
ret void
}
define amdgpu_kernel void @commute_ule_63_i64(ptr addrspace(1) %out, ptr addrspace(1) %in) #1 {
; GCN-LABEL: commute_ule_63_i64:
; GCN: ; %bb.0:
; GCN-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9
; GCN-NEXT: s_mov_b32 s7, 0xf000
; GCN-NEXT: s_mov_b32 s6, 0
; GCN-NEXT: v_lshlrev_b32_e32 v1, 3, v0
; GCN-NEXT: v_mov_b32_e32 v2, 0
; GCN-NEXT: s_waitcnt lgkmcnt(0)
; GCN-NEXT: s_mov_b64 s[4:5], s[2:3]
; GCN-NEXT: buffer_load_dwordx2 v[3:4], v[1:2], s[4:7], 0 addr64
; GCN-NEXT: v_lshlrev_b32_e32 v1, 2, v0
; GCN-NEXT: s_mov_b64 s[2:3], s[6:7]
; GCN-NEXT: s_waitcnt vmcnt(0)
; GCN-NEXT: v_cmp_gt_u64_e32 vcc, 64, v[3:4]
; GCN-NEXT: v_cndmask_b32_e64 v0, 0, -1, vcc
; GCN-NEXT: buffer_store_dword v0, v[1:2], s[0:3], 0 addr64
; GCN-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x() #0
%gep.in = getelementptr i64, ptr addrspace(1) %in, i32 %tid
%gep.out = getelementptr i32, ptr addrspace(1) %out, i32 %tid
%val = load i64, ptr addrspace(1) %gep.in
%cmp = icmp ule i64 %val, 63
%ext = sext i1 %cmp to i32
store i32 %ext, ptr addrspace(1) %gep.out
ret void
}
; FIXME: Undo canonicalization to gt (x + 1) since it doesn't use the inline imm
define amdgpu_kernel void @commute_ule_64_i64(ptr addrspace(1) %out, ptr addrspace(1) %in) #1 {
; GCN-LABEL: commute_ule_64_i64:
; GCN: ; %bb.0:
; GCN-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9
; GCN-NEXT: s_mov_b32 s7, 0xf000
; GCN-NEXT: s_mov_b32 s6, 0
; GCN-NEXT: v_lshlrev_b32_e32 v1, 3, v0
; GCN-NEXT: v_mov_b32_e32 v2, 0
; GCN-NEXT: s_waitcnt lgkmcnt(0)
; GCN-NEXT: s_mov_b64 s[4:5], s[2:3]
; GCN-NEXT: buffer_load_dwordx2 v[3:4], v[1:2], s[4:7], 0 addr64
; GCN-NEXT: v_lshlrev_b32_e32 v1, 2, v0
; GCN-NEXT: s_mov_b64 s[4:5], 0x41
; GCN-NEXT: s_mov_b64 s[2:3], s[6:7]
; GCN-NEXT: s_waitcnt vmcnt(0)
; GCN-NEXT: v_cmp_gt_u64_e32 vcc, s[4:5], v[3:4]
; GCN-NEXT: v_cndmask_b32_e64 v0, 0, -1, vcc
; GCN-NEXT: buffer_store_dword v0, v[1:2], s[0:3], 0 addr64
; GCN-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x() #0
%gep.in = getelementptr i64, ptr addrspace(1) %in, i32 %tid
%gep.out = getelementptr i32, ptr addrspace(1) %out, i32 %tid
%val = load i64, ptr addrspace(1) %gep.in
%cmp = icmp ule i64 %val, 64
%ext = sext i1 %cmp to i32
store i32 %ext, ptr addrspace(1) %gep.out
ret void
}
define amdgpu_kernel void @commute_sgt_neg1_i64(ptr addrspace(1) %out, ptr addrspace(1) %in) #1 {
; GCN-LABEL: commute_sgt_neg1_i64:
; GCN: ; %bb.0:
; GCN-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9
; GCN-NEXT: s_mov_b32 s7, 0xf000
; GCN-NEXT: s_mov_b32 s6, 0
; GCN-NEXT: v_lshlrev_b32_e32 v1, 3, v0
; GCN-NEXT: v_mov_b32_e32 v2, 0
; GCN-NEXT: s_waitcnt lgkmcnt(0)
; GCN-NEXT: s_mov_b64 s[4:5], s[2:3]
; GCN-NEXT: buffer_load_dwordx2 v[3:4], v[1:2], s[4:7], 0 addr64
; GCN-NEXT: v_lshlrev_b32_e32 v1, 2, v0
; GCN-NEXT: s_mov_b64 s[2:3], s[6:7]
; GCN-NEXT: s_waitcnt vmcnt(0)
; GCN-NEXT: v_cmp_lt_i64_e32 vcc, -1, v[3:4]
; GCN-NEXT: v_cndmask_b32_e64 v0, 0, -1, vcc
; GCN-NEXT: buffer_store_dword v0, v[1:2], s[0:3], 0 addr64
; GCN-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x() #0
%gep.in = getelementptr i64, ptr addrspace(1) %in, i32 %tid
%gep.out = getelementptr i32, ptr addrspace(1) %out, i32 %tid
%val = load i64, ptr addrspace(1) %gep.in
%cmp = icmp sgt i64 %val, -1
%ext = sext i1 %cmp to i32
store i32 %ext, ptr addrspace(1) %gep.out
ret void
}
define amdgpu_kernel void @commute_sge_neg2_i64(ptr addrspace(1) %out, ptr addrspace(1) %in) #1 {
; GCN-LABEL: commute_sge_neg2_i64:
; GCN: ; %bb.0:
; GCN-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9
; GCN-NEXT: s_mov_b32 s7, 0xf000
; GCN-NEXT: s_mov_b32 s6, 0
; GCN-NEXT: v_lshlrev_b32_e32 v1, 3, v0
; GCN-NEXT: v_mov_b32_e32 v2, 0
; GCN-NEXT: s_waitcnt lgkmcnt(0)
; GCN-NEXT: s_mov_b64 s[4:5], s[2:3]
; GCN-NEXT: buffer_load_dwordx2 v[3:4], v[1:2], s[4:7], 0 addr64
; GCN-NEXT: v_lshlrev_b32_e32 v1, 2, v0
; GCN-NEXT: s_mov_b64 s[2:3], s[6:7]
; GCN-NEXT: s_waitcnt vmcnt(0)
; GCN-NEXT: v_cmp_lt_i64_e32 vcc, -3, v[3:4]
; GCN-NEXT: v_cndmask_b32_e64 v0, 0, -1, vcc
; GCN-NEXT: buffer_store_dword v0, v[1:2], s[0:3], 0 addr64
; GCN-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x() #0
%gep.in = getelementptr i64, ptr addrspace(1) %in, i32 %tid
%gep.out = getelementptr i32, ptr addrspace(1) %out, i32 %tid
%val = load i64, ptr addrspace(1) %gep.in
%cmp = icmp sge i64 %val, -2
%ext = sext i1 %cmp to i32
store i32 %ext, ptr addrspace(1) %gep.out
ret void
}
define amdgpu_kernel void @commute_slt_neg16_i64(ptr addrspace(1) %out, ptr addrspace(1) %in) #1 {
; GCN-LABEL: commute_slt_neg16_i64:
; GCN: ; %bb.0:
; GCN-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9
; GCN-NEXT: s_mov_b32 s7, 0xf000
; GCN-NEXT: s_mov_b32 s6, 0
; GCN-NEXT: v_lshlrev_b32_e32 v1, 3, v0
; GCN-NEXT: v_mov_b32_e32 v2, 0
; GCN-NEXT: s_waitcnt lgkmcnt(0)
; GCN-NEXT: s_mov_b64 s[4:5], s[2:3]
; GCN-NEXT: buffer_load_dwordx2 v[3:4], v[1:2], s[4:7], 0 addr64
; GCN-NEXT: v_lshlrev_b32_e32 v1, 2, v0
; GCN-NEXT: s_mov_b64 s[2:3], s[6:7]
; GCN-NEXT: s_waitcnt vmcnt(0)
; GCN-NEXT: v_cmp_gt_i64_e32 vcc, -16, v[3:4]
; GCN-NEXT: v_cndmask_b32_e64 v0, 0, -1, vcc
; GCN-NEXT: buffer_store_dword v0, v[1:2], s[0:3], 0 addr64
; GCN-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x() #0
%gep.in = getelementptr i64, ptr addrspace(1) %in, i32 %tid
%gep.out = getelementptr i32, ptr addrspace(1) %out, i32 %tid
%val = load i64, ptr addrspace(1) %gep.in
%cmp = icmp slt i64 %val, -16
%ext = sext i1 %cmp to i32
store i32 %ext, ptr addrspace(1) %gep.out
ret void
}
define amdgpu_kernel void @commute_sle_5_i64(ptr addrspace(1) %out, ptr addrspace(1) %in) #1 {
; GCN-LABEL: commute_sle_5_i64:
; GCN: ; %bb.0:
; GCN-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9
; GCN-NEXT: s_mov_b32 s7, 0xf000
; GCN-NEXT: s_mov_b32 s6, 0
; GCN-NEXT: v_lshlrev_b32_e32 v1, 3, v0
; GCN-NEXT: v_mov_b32_e32 v2, 0
; GCN-NEXT: s_waitcnt lgkmcnt(0)
; GCN-NEXT: s_mov_b64 s[4:5], s[2:3]
; GCN-NEXT: buffer_load_dwordx2 v[3:4], v[1:2], s[4:7], 0 addr64
; GCN-NEXT: v_lshlrev_b32_e32 v1, 2, v0
; GCN-NEXT: s_mov_b64 s[2:3], s[6:7]
; GCN-NEXT: s_waitcnt vmcnt(0)
; GCN-NEXT: v_cmp_gt_i64_e32 vcc, 6, v[3:4]
; GCN-NEXT: v_cndmask_b32_e64 v0, 0, -1, vcc
; GCN-NEXT: buffer_store_dword v0, v[1:2], s[0:3], 0 addr64
; GCN-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x() #0
%gep.in = getelementptr i64, ptr addrspace(1) %in, i32 %tid
%gep.out = getelementptr i32, ptr addrspace(1) %out, i32 %tid
%val = load i64, ptr addrspace(1) %gep.in
%cmp = icmp sle i64 %val, 5
%ext = sext i1 %cmp to i32
store i32 %ext, ptr addrspace(1) %gep.out
ret void
}
; --------------------------------------------------------------------------------
; f32 compares
; --------------------------------------------------------------------------------
define amdgpu_kernel void @commute_oeq_2.0_f32(ptr addrspace(1) %out, ptr addrspace(1) %in) #1 {
; GCN-LABEL: commute_oeq_2.0_f32:
; GCN: ; %bb.0:
; GCN-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9
; GCN-NEXT: s_mov_b32 s7, 0xf000
; GCN-NEXT: s_mov_b32 s6, 0
; GCN-NEXT: v_lshlrev_b32_e32 v0, 2, v0
; GCN-NEXT: v_mov_b32_e32 v1, 0
; GCN-NEXT: s_waitcnt lgkmcnt(0)
; GCN-NEXT: s_mov_b64 s[4:5], s[2:3]
; GCN-NEXT: buffer_load_dword v2, v[0:1], s[4:7], 0 addr64
; GCN-NEXT: s_mov_b64 s[2:3], s[6:7]
; GCN-NEXT: s_waitcnt vmcnt(0)
; GCN-NEXT: v_cmp_eq_f32_e32 vcc, 2.0, v2
; GCN-NEXT: v_cndmask_b32_e64 v2, 0, -1, vcc
; GCN-NEXT: buffer_store_dword v2, v[0:1], s[0:3], 0 addr64
; GCN-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x() #0
%gep.in = getelementptr float, ptr addrspace(1) %in, i32 %tid
%gep.out = getelementptr i32, ptr addrspace(1) %out, i32 %tid
%val = load float, ptr addrspace(1) %gep.in
%cmp = fcmp oeq float %val, 2.0
%ext = sext i1 %cmp to i32
store i32 %ext, ptr addrspace(1) %gep.out
ret void
}
define amdgpu_kernel void @commute_ogt_2.0_f32(ptr addrspace(1) %out, ptr addrspace(1) %in) #1 {
; GCN-LABEL: commute_ogt_2.0_f32:
; GCN: ; %bb.0:
; GCN-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9
; GCN-NEXT: s_mov_b32 s7, 0xf000
; GCN-NEXT: s_mov_b32 s6, 0
; GCN-NEXT: v_lshlrev_b32_e32 v0, 2, v0
; GCN-NEXT: v_mov_b32_e32 v1, 0
; GCN-NEXT: s_waitcnt lgkmcnt(0)
; GCN-NEXT: s_mov_b64 s[4:5], s[2:3]
; GCN-NEXT: buffer_load_dword v2, v[0:1], s[4:7], 0 addr64
; GCN-NEXT: s_mov_b64 s[2:3], s[6:7]
; GCN-NEXT: s_waitcnt vmcnt(0)
; GCN-NEXT: v_cmp_lt_f32_e32 vcc, 2.0, v2
; GCN-NEXT: v_cndmask_b32_e64 v2, 0, -1, vcc
; GCN-NEXT: buffer_store_dword v2, v[0:1], s[0:3], 0 addr64
; GCN-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x() #0
%gep.in = getelementptr float, ptr addrspace(1) %in, i32 %tid
%gep.out = getelementptr i32, ptr addrspace(1) %out, i32 %tid
%val = load float, ptr addrspace(1) %gep.in
%cmp = fcmp ogt float %val, 2.0
%ext = sext i1 %cmp to i32
store i32 %ext, ptr addrspace(1) %gep.out
ret void
}
define amdgpu_kernel void @commute_oge_2.0_f32(ptr addrspace(1) %out, ptr addrspace(1) %in) #1 {
; GCN-LABEL: commute_oge_2.0_f32:
; GCN: ; %bb.0:
; GCN-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9
; GCN-NEXT: s_mov_b32 s7, 0xf000
; GCN-NEXT: s_mov_b32 s6, 0
; GCN-NEXT: v_lshlrev_b32_e32 v0, 2, v0
; GCN-NEXT: v_mov_b32_e32 v1, 0
; GCN-NEXT: s_waitcnt lgkmcnt(0)
; GCN-NEXT: s_mov_b64 s[4:5], s[2:3]
; GCN-NEXT: buffer_load_dword v2, v[0:1], s[4:7], 0 addr64
; GCN-NEXT: s_mov_b64 s[2:3], s[6:7]
; GCN-NEXT: s_waitcnt vmcnt(0)
; GCN-NEXT: v_cmp_le_f32_e32 vcc, 2.0, v2
; GCN-NEXT: v_cndmask_b32_e64 v2, 0, -1, vcc
; GCN-NEXT: buffer_store_dword v2, v[0:1], s[0:3], 0 addr64
; GCN-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x() #0
%gep.in = getelementptr float, ptr addrspace(1) %in, i32 %tid
%gep.out = getelementptr i32, ptr addrspace(1) %out, i32 %tid
%val = load float, ptr addrspace(1) %gep.in
%cmp = fcmp oge float %val, 2.0
%ext = sext i1 %cmp to i32
store i32 %ext, ptr addrspace(1) %gep.out
ret void
}
define amdgpu_kernel void @commute_olt_2.0_f32(ptr addrspace(1) %out, ptr addrspace(1) %in) #1 {
; GCN-LABEL: commute_olt_2.0_f32:
; GCN: ; %bb.0:
; GCN-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9
; GCN-NEXT: s_mov_b32 s7, 0xf000
; GCN-NEXT: s_mov_b32 s6, 0
; GCN-NEXT: v_lshlrev_b32_e32 v0, 2, v0
; GCN-NEXT: v_mov_b32_e32 v1, 0
; GCN-NEXT: s_waitcnt lgkmcnt(0)
; GCN-NEXT: s_mov_b64 s[4:5], s[2:3]
; GCN-NEXT: buffer_load_dword v2, v[0:1], s[4:7], 0 addr64
; GCN-NEXT: s_mov_b64 s[2:3], s[6:7]
; GCN-NEXT: s_waitcnt vmcnt(0)
; GCN-NEXT: v_cmp_gt_f32_e32 vcc, 2.0, v2
; GCN-NEXT: v_cndmask_b32_e64 v2, 0, -1, vcc
; GCN-NEXT: buffer_store_dword v2, v[0:1], s[0:3], 0 addr64
; GCN-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x() #0
%gep.in = getelementptr float, ptr addrspace(1) %in, i32 %tid
%gep.out = getelementptr i32, ptr addrspace(1) %out, i32 %tid
%val = load float, ptr addrspace(1) %gep.in
%cmp = fcmp olt float %val, 2.0
%ext = sext i1 %cmp to i32
store i32 %ext, ptr addrspace(1) %gep.out
ret void
}
define amdgpu_kernel void @commute_ole_2.0_f32(ptr addrspace(1) %out, ptr addrspace(1) %in) #1 {
; GCN-LABEL: commute_ole_2.0_f32:
; GCN: ; %bb.0:
; GCN-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9
; GCN-NEXT: s_mov_b32 s7, 0xf000
; GCN-NEXT: s_mov_b32 s6, 0
; GCN-NEXT: v_lshlrev_b32_e32 v0, 2, v0
; GCN-NEXT: v_mov_b32_e32 v1, 0
; GCN-NEXT: s_waitcnt lgkmcnt(0)
; GCN-NEXT: s_mov_b64 s[4:5], s[2:3]
; GCN-NEXT: buffer_load_dword v2, v[0:1], s[4:7], 0 addr64
; GCN-NEXT: s_mov_b64 s[2:3], s[6:7]
; GCN-NEXT: s_waitcnt vmcnt(0)
; GCN-NEXT: v_cmp_ge_f32_e32 vcc, 2.0, v2
; GCN-NEXT: v_cndmask_b32_e64 v2, 0, -1, vcc
; GCN-NEXT: buffer_store_dword v2, v[0:1], s[0:3], 0 addr64
; GCN-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x() #0
%gep.in = getelementptr float, ptr addrspace(1) %in, i32 %tid
%gep.out = getelementptr i32, ptr addrspace(1) %out, i32 %tid
%val = load float, ptr addrspace(1) %gep.in
%cmp = fcmp ole float %val, 2.0
%ext = sext i1 %cmp to i32
store i32 %ext, ptr addrspace(1) %gep.out
ret void
}
define amdgpu_kernel void @commute_one_2.0_f32(ptr addrspace(1) %out, ptr addrspace(1) %in) #1 {
; GCN-LABEL: commute_one_2.0_f32:
; GCN: ; %bb.0:
; GCN-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9
; GCN-NEXT: s_mov_b32 s7, 0xf000
; GCN-NEXT: s_mov_b32 s6, 0
; GCN-NEXT: v_lshlrev_b32_e32 v0, 2, v0
; GCN-NEXT: v_mov_b32_e32 v1, 0
; GCN-NEXT: s_waitcnt lgkmcnt(0)
; GCN-NEXT: s_mov_b64 s[4:5], s[2:3]
; GCN-NEXT: buffer_load_dword v2, v[0:1], s[4:7], 0 addr64
; GCN-NEXT: s_mov_b64 s[2:3], s[6:7]
; GCN-NEXT: s_waitcnt vmcnt(0)
; GCN-NEXT: v_cmp_lg_f32_e32 vcc, 2.0, v2
; GCN-NEXT: v_cndmask_b32_e64 v2, 0, -1, vcc
; GCN-NEXT: buffer_store_dword v2, v[0:1], s[0:3], 0 addr64
; GCN-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x() #0
%gep.in = getelementptr float, ptr addrspace(1) %in, i32 %tid
%gep.out = getelementptr i32, ptr addrspace(1) %out, i32 %tid
%val = load float, ptr addrspace(1) %gep.in
%cmp = fcmp one float %val, 2.0
%ext = sext i1 %cmp to i32
store i32 %ext, ptr addrspace(1) %gep.out
ret void
}
define amdgpu_kernel void @commute_ord_2.0_f32(ptr addrspace(1) %out, ptr addrspace(1) %in) #1 {
; GCN-LABEL: commute_ord_2.0_f32:
; GCN: ; %bb.0:
; GCN-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9
; GCN-NEXT: s_mov_b32 s7, 0xf000
; GCN-NEXT: s_mov_b32 s6, 0
; GCN-NEXT: v_lshlrev_b32_e32 v0, 2, v0
; GCN-NEXT: v_mov_b32_e32 v1, 0
; GCN-NEXT: s_waitcnt lgkmcnt(0)
; GCN-NEXT: s_mov_b64 s[4:5], s[2:3]
; GCN-NEXT: buffer_load_dword v2, v[0:1], s[4:7], 0 addr64
; GCN-NEXT: s_mov_b64 s[2:3], s[6:7]
; GCN-NEXT: s_waitcnt vmcnt(0)
; GCN-NEXT: v_cmp_o_f32_e32 vcc, v2, v2
; GCN-NEXT: v_cndmask_b32_e64 v2, 0, -1, vcc
; GCN-NEXT: buffer_store_dword v2, v[0:1], s[0:3], 0 addr64
; GCN-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x() #0
%gep.in = getelementptr float, ptr addrspace(1) %in, i32 %tid
%gep.out = getelementptr i32, ptr addrspace(1) %out, i32 %tid
%val = load float, ptr addrspace(1) %gep.in
%cmp = fcmp ord float %val, 2.0
%ext = sext i1 %cmp to i32
store i32 %ext, ptr addrspace(1) %gep.out
ret void
}
define amdgpu_kernel void @commute_ueq_2.0_f32(ptr addrspace(1) %out, ptr addrspace(1) %in) #1 {
; GCN-LABEL: commute_ueq_2.0_f32:
; GCN: ; %bb.0:
; GCN-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9
; GCN-NEXT: s_mov_b32 s7, 0xf000
; GCN-NEXT: s_mov_b32 s6, 0
; GCN-NEXT: v_lshlrev_b32_e32 v0, 2, v0
; GCN-NEXT: v_mov_b32_e32 v1, 0
; GCN-NEXT: s_waitcnt lgkmcnt(0)
; GCN-NEXT: s_mov_b64 s[4:5], s[2:3]
; GCN-NEXT: buffer_load_dword v2, v[0:1], s[4:7], 0 addr64
; GCN-NEXT: s_mov_b64 s[2:3], s[6:7]
; GCN-NEXT: s_waitcnt vmcnt(0)
; GCN-NEXT: v_cmp_nlg_f32_e32 vcc, 2.0, v2
; GCN-NEXT: v_cndmask_b32_e64 v2, 0, -1, vcc
; GCN-NEXT: buffer_store_dword v2, v[0:1], s[0:3], 0 addr64
; GCN-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x() #0
%gep.in = getelementptr float, ptr addrspace(1) %in, i32 %tid
%gep.out = getelementptr i32, ptr addrspace(1) %out, i32 %tid
%val = load float, ptr addrspace(1) %gep.in
%cmp = fcmp ueq float %val, 2.0
%ext = sext i1 %cmp to i32
store i32 %ext, ptr addrspace(1) %gep.out
ret void
}
define amdgpu_kernel void @commute_ugt_2.0_f32(ptr addrspace(1) %out, ptr addrspace(1) %in) #1 {
; GCN-LABEL: commute_ugt_2.0_f32:
; GCN: ; %bb.0:
; GCN-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9
; GCN-NEXT: s_mov_b32 s7, 0xf000
; GCN-NEXT: s_mov_b32 s6, 0
; GCN-NEXT: v_lshlrev_b32_e32 v0, 2, v0
; GCN-NEXT: v_mov_b32_e32 v1, 0
; GCN-NEXT: s_waitcnt lgkmcnt(0)
; GCN-NEXT: s_mov_b64 s[4:5], s[2:3]
; GCN-NEXT: buffer_load_dword v2, v[0:1], s[4:7], 0 addr64
; GCN-NEXT: s_mov_b64 s[2:3], s[6:7]
; GCN-NEXT: s_waitcnt vmcnt(0)
; GCN-NEXT: v_cmp_nge_f32_e32 vcc, 2.0, v2
; GCN-NEXT: v_cndmask_b32_e64 v2, 0, -1, vcc
; GCN-NEXT: buffer_store_dword v2, v[0:1], s[0:3], 0 addr64
; GCN-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x() #0
%gep.in = getelementptr float, ptr addrspace(1) %in, i32 %tid
%gep.out = getelementptr i32, ptr addrspace(1) %out, i32 %tid
%val = load float, ptr addrspace(1) %gep.in
%cmp = fcmp ugt float %val, 2.0
%ext = sext i1 %cmp to i32
store i32 %ext, ptr addrspace(1) %gep.out
ret void
}
define amdgpu_kernel void @commute_uge_2.0_f32(ptr addrspace(1) %out, ptr addrspace(1) %in) #1 {
; GCN-LABEL: commute_uge_2.0_f32:
; GCN: ; %bb.0:
; GCN-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9
; GCN-NEXT: s_mov_b32 s7, 0xf000
; GCN-NEXT: s_mov_b32 s6, 0
; GCN-NEXT: v_lshlrev_b32_e32 v0, 2, v0
; GCN-NEXT: v_mov_b32_e32 v1, 0
; GCN-NEXT: s_waitcnt lgkmcnt(0)
; GCN-NEXT: s_mov_b64 s[4:5], s[2:3]
; GCN-NEXT: buffer_load_dword v2, v[0:1], s[4:7], 0 addr64
; GCN-NEXT: s_mov_b64 s[2:3], s[6:7]
; GCN-NEXT: s_waitcnt vmcnt(0)
; GCN-NEXT: v_cmp_ngt_f32_e32 vcc, 2.0, v2
; GCN-NEXT: v_cndmask_b32_e64 v2, 0, -1, vcc
; GCN-NEXT: buffer_store_dword v2, v[0:1], s[0:3], 0 addr64
; GCN-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x() #0
%gep.in = getelementptr float, ptr addrspace(1) %in, i32 %tid
%gep.out = getelementptr i32, ptr addrspace(1) %out, i32 %tid
%val = load float, ptr addrspace(1) %gep.in
%cmp = fcmp uge float %val, 2.0
%ext = sext i1 %cmp to i32
store i32 %ext, ptr addrspace(1) %gep.out
ret void
}
define amdgpu_kernel void @commute_ult_2.0_f32(ptr addrspace(1) %out, ptr addrspace(1) %in) #1 {
; GCN-LABEL: commute_ult_2.0_f32:
; GCN: ; %bb.0:
; GCN-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9
; GCN-NEXT: s_mov_b32 s7, 0xf000
; GCN-NEXT: s_mov_b32 s6, 0
; GCN-NEXT: v_lshlrev_b32_e32 v0, 2, v0
; GCN-NEXT: v_mov_b32_e32 v1, 0
; GCN-NEXT: s_waitcnt lgkmcnt(0)
; GCN-NEXT: s_mov_b64 s[4:5], s[2:3]
; GCN-NEXT: buffer_load_dword v2, v[0:1], s[4:7], 0 addr64
; GCN-NEXT: s_mov_b64 s[2:3], s[6:7]
; GCN-NEXT: s_waitcnt vmcnt(0)
; GCN-NEXT: v_cmp_nle_f32_e32 vcc, 2.0, v2
; GCN-NEXT: v_cndmask_b32_e64 v2, 0, -1, vcc
; GCN-NEXT: buffer_store_dword v2, v[0:1], s[0:3], 0 addr64
; GCN-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x() #0
%gep.in = getelementptr float, ptr addrspace(1) %in, i32 %tid
%gep.out = getelementptr i32, ptr addrspace(1) %out, i32 %tid
%val = load float, ptr addrspace(1) %gep.in
%cmp = fcmp ult float %val, 2.0
%ext = sext i1 %cmp to i32
store i32 %ext, ptr addrspace(1) %gep.out
ret void
}
define amdgpu_kernel void @commute_ule_2.0_f32(ptr addrspace(1) %out, ptr addrspace(1) %in) #1 {
; GCN-LABEL: commute_ule_2.0_f32:
; GCN: ; %bb.0:
; GCN-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9
; GCN-NEXT: s_mov_b32 s7, 0xf000
; GCN-NEXT: s_mov_b32 s6, 0
; GCN-NEXT: v_lshlrev_b32_e32 v0, 2, v0
; GCN-NEXT: v_mov_b32_e32 v1, 0
; GCN-NEXT: s_waitcnt lgkmcnt(0)
; GCN-NEXT: s_mov_b64 s[4:5], s[2:3]
; GCN-NEXT: buffer_load_dword v2, v[0:1], s[4:7], 0 addr64
; GCN-NEXT: s_mov_b64 s[2:3], s[6:7]
; GCN-NEXT: s_waitcnt vmcnt(0)
; GCN-NEXT: v_cmp_nlt_f32_e32 vcc, 2.0, v2
; GCN-NEXT: v_cndmask_b32_e64 v2, 0, -1, vcc
; GCN-NEXT: buffer_store_dword v2, v[0:1], s[0:3], 0 addr64
; GCN-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x() #0
%gep.in = getelementptr float, ptr addrspace(1) %in, i32 %tid
%gep.out = getelementptr i32, ptr addrspace(1) %out, i32 %tid
%val = load float, ptr addrspace(1) %gep.in
%cmp = fcmp ule float %val, 2.0
%ext = sext i1 %cmp to i32
store i32 %ext, ptr addrspace(1) %gep.out
ret void
}
define amdgpu_kernel void @commute_une_2.0_f32(ptr addrspace(1) %out, ptr addrspace(1) %in) #1 {
; GCN-LABEL: commute_une_2.0_f32:
; GCN: ; %bb.0:
; GCN-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9
; GCN-NEXT: s_mov_b32 s7, 0xf000
; GCN-NEXT: s_mov_b32 s6, 0
; GCN-NEXT: v_lshlrev_b32_e32 v0, 2, v0
; GCN-NEXT: v_mov_b32_e32 v1, 0
; GCN-NEXT: s_waitcnt lgkmcnt(0)
; GCN-NEXT: s_mov_b64 s[4:5], s[2:3]
; GCN-NEXT: buffer_load_dword v2, v[0:1], s[4:7], 0 addr64
; GCN-NEXT: s_mov_b64 s[2:3], s[6:7]
; GCN-NEXT: s_waitcnt vmcnt(0)
; GCN-NEXT: v_cmp_neq_f32_e32 vcc, 2.0, v2
; GCN-NEXT: v_cndmask_b32_e64 v2, 0, -1, vcc
; GCN-NEXT: buffer_store_dword v2, v[0:1], s[0:3], 0 addr64
; GCN-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x() #0
%gep.in = getelementptr float, ptr addrspace(1) %in, i32 %tid
%gep.out = getelementptr i32, ptr addrspace(1) %out, i32 %tid
%val = load float, ptr addrspace(1) %gep.in
%cmp = fcmp une float %val, 2.0
%ext = sext i1 %cmp to i32
store i32 %ext, ptr addrspace(1) %gep.out
ret void
}
define amdgpu_kernel void @commute_uno_2.0_f32(ptr addrspace(1) %out, ptr addrspace(1) %in) #1 {
; GCN-LABEL: commute_uno_2.0_f32:
; GCN: ; %bb.0:
; GCN-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9
; GCN-NEXT: s_mov_b32 s7, 0xf000
; GCN-NEXT: s_mov_b32 s6, 0
; GCN-NEXT: v_lshlrev_b32_e32 v0, 2, v0
; GCN-NEXT: v_mov_b32_e32 v1, 0
; GCN-NEXT: s_waitcnt lgkmcnt(0)
; GCN-NEXT: s_mov_b64 s[4:5], s[2:3]
; GCN-NEXT: buffer_load_dword v2, v[0:1], s[4:7], 0 addr64
; GCN-NEXT: s_mov_b64 s[2:3], s[6:7]
; GCN-NEXT: s_waitcnt vmcnt(0)
; GCN-NEXT: v_cmp_u_f32_e32 vcc, v2, v2
; GCN-NEXT: v_cndmask_b32_e64 v2, 0, -1, vcc
; GCN-NEXT: buffer_store_dword v2, v[0:1], s[0:3], 0 addr64
; GCN-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x() #0
%gep.in = getelementptr float, ptr addrspace(1) %in, i32 %tid
%gep.out = getelementptr i32, ptr addrspace(1) %out, i32 %tid
%val = load float, ptr addrspace(1) %gep.in
%cmp = fcmp uno float %val, 2.0
%ext = sext i1 %cmp to i32
store i32 %ext, ptr addrspace(1) %gep.out
ret void
}
; --------------------------------------------------------------------------------
; f64 compares
; --------------------------------------------------------------------------------
define amdgpu_kernel void @commute_oeq_2.0_f64(ptr addrspace(1) %out, ptr addrspace(1) %in) #1 {
; GCN-LABEL: commute_oeq_2.0_f64:
; GCN: ; %bb.0:
; GCN-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9
; GCN-NEXT: s_mov_b32 s7, 0xf000
; GCN-NEXT: s_mov_b32 s6, 0
; GCN-NEXT: v_lshlrev_b32_e32 v1, 3, v0
; GCN-NEXT: v_mov_b32_e32 v2, 0
; GCN-NEXT: s_waitcnt lgkmcnt(0)
; GCN-NEXT: s_mov_b64 s[4:5], s[2:3]
; GCN-NEXT: buffer_load_dwordx2 v[3:4], v[1:2], s[4:7], 0 addr64
; GCN-NEXT: v_lshlrev_b32_e32 v1, 2, v0
; GCN-NEXT: s_mov_b64 s[2:3], s[6:7]
; GCN-NEXT: s_waitcnt vmcnt(0)
; GCN-NEXT: v_cmp_eq_f64_e32 vcc, 2.0, v[3:4]
; GCN-NEXT: v_cndmask_b32_e64 v0, 0, -1, vcc
; GCN-NEXT: buffer_store_dword v0, v[1:2], s[0:3], 0 addr64
; GCN-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x() #0
%gep.in = getelementptr double, ptr addrspace(1) %in, i32 %tid
%gep.out = getelementptr i32, ptr addrspace(1) %out, i32 %tid
%val = load double, ptr addrspace(1) %gep.in
%cmp = fcmp oeq double %val, 2.0
%ext = sext i1 %cmp to i32
store i32 %ext, ptr addrspace(1) %gep.out
ret void
}
define amdgpu_kernel void @commute_ogt_2.0_f64(ptr addrspace(1) %out, ptr addrspace(1) %in) #1 {
; GCN-LABEL: commute_ogt_2.0_f64:
; GCN: ; %bb.0:
; GCN-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9
; GCN-NEXT: s_mov_b32 s7, 0xf000
; GCN-NEXT: s_mov_b32 s6, 0
; GCN-NEXT: v_lshlrev_b32_e32 v1, 3, v0
; GCN-NEXT: v_mov_b32_e32 v2, 0
; GCN-NEXT: s_waitcnt lgkmcnt(0)
; GCN-NEXT: s_mov_b64 s[4:5], s[2:3]
; GCN-NEXT: buffer_load_dwordx2 v[3:4], v[1:2], s[4:7], 0 addr64
; GCN-NEXT: v_lshlrev_b32_e32 v1, 2, v0
; GCN-NEXT: s_mov_b64 s[2:3], s[6:7]
; GCN-NEXT: s_waitcnt vmcnt(0)
; GCN-NEXT: v_cmp_lt_f64_e32 vcc, 2.0, v[3:4]
; GCN-NEXT: v_cndmask_b32_e64 v0, 0, -1, vcc
; GCN-NEXT: buffer_store_dword v0, v[1:2], s[0:3], 0 addr64
; GCN-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x() #0
%gep.in = getelementptr double, ptr addrspace(1) %in, i32 %tid
%gep.out = getelementptr i32, ptr addrspace(1) %out, i32 %tid
%val = load double, ptr addrspace(1) %gep.in
%cmp = fcmp ogt double %val, 2.0
%ext = sext i1 %cmp to i32
store i32 %ext, ptr addrspace(1) %gep.out
ret void
}
define amdgpu_kernel void @commute_oge_2.0_f64(ptr addrspace(1) %out, ptr addrspace(1) %in) #1 {
; GCN-LABEL: commute_oge_2.0_f64:
; GCN: ; %bb.0:
; GCN-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9
; GCN-NEXT: s_mov_b32 s7, 0xf000
; GCN-NEXT: s_mov_b32 s6, 0
; GCN-NEXT: v_lshlrev_b32_e32 v1, 3, v0
; GCN-NEXT: v_mov_b32_e32 v2, 0
; GCN-NEXT: s_waitcnt lgkmcnt(0)
; GCN-NEXT: s_mov_b64 s[4:5], s[2:3]
; GCN-NEXT: buffer_load_dwordx2 v[3:4], v[1:2], s[4:7], 0 addr64
; GCN-NEXT: v_lshlrev_b32_e32 v1, 2, v0
; GCN-NEXT: s_mov_b64 s[2:3], s[6:7]
; GCN-NEXT: s_waitcnt vmcnt(0)
; GCN-NEXT: v_cmp_le_f64_e32 vcc, 2.0, v[3:4]
; GCN-NEXT: v_cndmask_b32_e64 v0, 0, -1, vcc
; GCN-NEXT: buffer_store_dword v0, v[1:2], s[0:3], 0 addr64
; GCN-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x() #0
%gep.in = getelementptr double, ptr addrspace(1) %in, i32 %tid
%gep.out = getelementptr i32, ptr addrspace(1) %out, i32 %tid
%val = load double, ptr addrspace(1) %gep.in
%cmp = fcmp oge double %val, 2.0
%ext = sext i1 %cmp to i32
store i32 %ext, ptr addrspace(1) %gep.out
ret void
}
define amdgpu_kernel void @commute_olt_2.0_f64(ptr addrspace(1) %out, ptr addrspace(1) %in) #1 {
; GCN-LABEL: commute_olt_2.0_f64:
; GCN: ; %bb.0:
; GCN-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9
; GCN-NEXT: s_mov_b32 s7, 0xf000
; GCN-NEXT: s_mov_b32 s6, 0
; GCN-NEXT: v_lshlrev_b32_e32 v1, 3, v0
; GCN-NEXT: v_mov_b32_e32 v2, 0
; GCN-NEXT: s_waitcnt lgkmcnt(0)
; GCN-NEXT: s_mov_b64 s[4:5], s[2:3]
; GCN-NEXT: buffer_load_dwordx2 v[3:4], v[1:2], s[4:7], 0 addr64
; GCN-NEXT: v_lshlrev_b32_e32 v1, 2, v0
; GCN-NEXT: s_mov_b64 s[2:3], s[6:7]
; GCN-NEXT: s_waitcnt vmcnt(0)
; GCN-NEXT: v_cmp_gt_f64_e32 vcc, 2.0, v[3:4]
; GCN-NEXT: v_cndmask_b32_e64 v0, 0, -1, vcc
; GCN-NEXT: buffer_store_dword v0, v[1:2], s[0:3], 0 addr64
; GCN-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x() #0
%gep.in = getelementptr double, ptr addrspace(1) %in, i32 %tid
%gep.out = getelementptr i32, ptr addrspace(1) %out, i32 %tid
%val = load double, ptr addrspace(1) %gep.in
%cmp = fcmp olt double %val, 2.0
%ext = sext i1 %cmp to i32
store i32 %ext, ptr addrspace(1) %gep.out
ret void
}
define amdgpu_kernel void @commute_ole_2.0_f64(ptr addrspace(1) %out, ptr addrspace(1) %in) #1 {
; GCN-LABEL: commute_ole_2.0_f64:
; GCN: ; %bb.0:
; GCN-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9
; GCN-NEXT: s_mov_b32 s7, 0xf000
; GCN-NEXT: s_mov_b32 s6, 0
; GCN-NEXT: v_lshlrev_b32_e32 v1, 3, v0
; GCN-NEXT: v_mov_b32_e32 v2, 0
; GCN-NEXT: s_waitcnt lgkmcnt(0)
; GCN-NEXT: s_mov_b64 s[4:5], s[2:3]
; GCN-NEXT: buffer_load_dwordx2 v[3:4], v[1:2], s[4:7], 0 addr64
; GCN-NEXT: v_lshlrev_b32_e32 v1, 2, v0
; GCN-NEXT: s_mov_b64 s[2:3], s[6:7]
; GCN-NEXT: s_waitcnt vmcnt(0)
; GCN-NEXT: v_cmp_ge_f64_e32 vcc, 2.0, v[3:4]
; GCN-NEXT: v_cndmask_b32_e64 v0, 0, -1, vcc
; GCN-NEXT: buffer_store_dword v0, v[1:2], s[0:3], 0 addr64
; GCN-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x() #0
%gep.in = getelementptr double, ptr addrspace(1) %in, i32 %tid
%gep.out = getelementptr i32, ptr addrspace(1) %out, i32 %tid
%val = load double, ptr addrspace(1) %gep.in
%cmp = fcmp ole double %val, 2.0
%ext = sext i1 %cmp to i32
store i32 %ext, ptr addrspace(1) %gep.out
ret void
}
define amdgpu_kernel void @commute_one_2.0_f64(ptr addrspace(1) %out, ptr addrspace(1) %in) #1 {
; GCN-LABEL: commute_one_2.0_f64:
; GCN: ; %bb.0:
; GCN-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9
; GCN-NEXT: s_mov_b32 s7, 0xf000
; GCN-NEXT: s_mov_b32 s6, 0
; GCN-NEXT: v_lshlrev_b32_e32 v1, 3, v0
; GCN-NEXT: v_mov_b32_e32 v2, 0
; GCN-NEXT: s_waitcnt lgkmcnt(0)
; GCN-NEXT: s_mov_b64 s[4:5], s[2:3]
; GCN-NEXT: buffer_load_dwordx2 v[3:4], v[1:2], s[4:7], 0 addr64
; GCN-NEXT: v_lshlrev_b32_e32 v1, 2, v0
; GCN-NEXT: s_mov_b64 s[2:3], s[6:7]
; GCN-NEXT: s_waitcnt vmcnt(0)
; GCN-NEXT: v_cmp_lg_f64_e32 vcc, 2.0, v[3:4]
; GCN-NEXT: v_cndmask_b32_e64 v0, 0, -1, vcc
; GCN-NEXT: buffer_store_dword v0, v[1:2], s[0:3], 0 addr64
; GCN-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x() #0
%gep.in = getelementptr double, ptr addrspace(1) %in, i32 %tid
%gep.out = getelementptr i32, ptr addrspace(1) %out, i32 %tid
%val = load double, ptr addrspace(1) %gep.in
%cmp = fcmp one double %val, 2.0
%ext = sext i1 %cmp to i32
store i32 %ext, ptr addrspace(1) %gep.out
ret void
}
define amdgpu_kernel void @commute_ord_2.0_f64(ptr addrspace(1) %out, ptr addrspace(1) %in) #1 {
; GCN-LABEL: commute_ord_2.0_f64:
; GCN: ; %bb.0:
; GCN-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9
; GCN-NEXT: s_mov_b32 s7, 0xf000
; GCN-NEXT: s_mov_b32 s6, 0
; GCN-NEXT: v_lshlrev_b32_e32 v1, 3, v0
; GCN-NEXT: v_mov_b32_e32 v2, 0
; GCN-NEXT: s_waitcnt lgkmcnt(0)
; GCN-NEXT: s_mov_b64 s[4:5], s[2:3]
; GCN-NEXT: buffer_load_dwordx2 v[3:4], v[1:2], s[4:7], 0 addr64
; GCN-NEXT: v_lshlrev_b32_e32 v1, 2, v0
; GCN-NEXT: s_mov_b64 s[2:3], s[6:7]
; GCN-NEXT: s_waitcnt vmcnt(0)
; GCN-NEXT: v_cmp_o_f64_e32 vcc, v[3:4], v[3:4]
; GCN-NEXT: v_cndmask_b32_e64 v0, 0, -1, vcc
; GCN-NEXT: buffer_store_dword v0, v[1:2], s[0:3], 0 addr64
; GCN-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x() #0
%gep.in = getelementptr double, ptr addrspace(1) %in, i32 %tid
%gep.out = getelementptr i32, ptr addrspace(1) %out, i32 %tid
%val = load double, ptr addrspace(1) %gep.in
%cmp = fcmp ord double %val, 2.0
%ext = sext i1 %cmp to i32
store i32 %ext, ptr addrspace(1) %gep.out
ret void
}
define amdgpu_kernel void @commute_ueq_2.0_f64(ptr addrspace(1) %out, ptr addrspace(1) %in) #1 {
; GCN-LABEL: commute_ueq_2.0_f64:
; GCN: ; %bb.0:
; GCN-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9
; GCN-NEXT: s_mov_b32 s7, 0xf000
; GCN-NEXT: s_mov_b32 s6, 0
; GCN-NEXT: v_lshlrev_b32_e32 v1, 3, v0
; GCN-NEXT: v_mov_b32_e32 v2, 0
; GCN-NEXT: s_waitcnt lgkmcnt(0)
; GCN-NEXT: s_mov_b64 s[4:5], s[2:3]
; GCN-NEXT: buffer_load_dwordx2 v[3:4], v[1:2], s[4:7], 0 addr64
; GCN-NEXT: v_lshlrev_b32_e32 v1, 2, v0
; GCN-NEXT: s_mov_b64 s[2:3], s[6:7]
; GCN-NEXT: s_waitcnt vmcnt(0)
; GCN-NEXT: v_cmp_nlg_f64_e32 vcc, 2.0, v[3:4]
; GCN-NEXT: v_cndmask_b32_e64 v0, 0, -1, vcc
; GCN-NEXT: buffer_store_dword v0, v[1:2], s[0:3], 0 addr64
; GCN-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x() #0
%gep.in = getelementptr double, ptr addrspace(1) %in, i32 %tid
%gep.out = getelementptr i32, ptr addrspace(1) %out, i32 %tid
%val = load double, ptr addrspace(1) %gep.in
%cmp = fcmp ueq double %val, 2.0
%ext = sext i1 %cmp to i32
store i32 %ext, ptr addrspace(1) %gep.out
ret void
}
define amdgpu_kernel void @commute_ugt_2.0_f64(ptr addrspace(1) %out, ptr addrspace(1) %in) #1 {
; GCN-LABEL: commute_ugt_2.0_f64:
; GCN: ; %bb.0:
; GCN-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9
; GCN-NEXT: s_mov_b32 s7, 0xf000
; GCN-NEXT: s_mov_b32 s6, 0
; GCN-NEXT: v_lshlrev_b32_e32 v1, 3, v0
; GCN-NEXT: v_mov_b32_e32 v2, 0
; GCN-NEXT: s_waitcnt lgkmcnt(0)
; GCN-NEXT: s_mov_b64 s[4:5], s[2:3]
; GCN-NEXT: buffer_load_dwordx2 v[3:4], v[1:2], s[4:7], 0 addr64
; GCN-NEXT: v_lshlrev_b32_e32 v1, 2, v0
; GCN-NEXT: s_mov_b64 s[2:3], s[6:7]
; GCN-NEXT: s_waitcnt vmcnt(0)
; GCN-NEXT: v_cmp_nge_f64_e32 vcc, 2.0, v[3:4]
; GCN-NEXT: v_cndmask_b32_e64 v0, 0, -1, vcc
; GCN-NEXT: buffer_store_dword v0, v[1:2], s[0:3], 0 addr64
; GCN-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x() #0
%gep.in = getelementptr double, ptr addrspace(1) %in, i32 %tid
%gep.out = getelementptr i32, ptr addrspace(1) %out, i32 %tid
%val = load double, ptr addrspace(1) %gep.in
%cmp = fcmp ugt double %val, 2.0
%ext = sext i1 %cmp to i32
store i32 %ext, ptr addrspace(1) %gep.out
ret void
}
define amdgpu_kernel void @commute_uge_2.0_f64(ptr addrspace(1) %out, ptr addrspace(1) %in) #1 {
; GCN-LABEL: commute_uge_2.0_f64:
; GCN: ; %bb.0:
; GCN-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9
; GCN-NEXT: s_mov_b32 s7, 0xf000
; GCN-NEXT: s_mov_b32 s6, 0
; GCN-NEXT: v_lshlrev_b32_e32 v1, 3, v0
; GCN-NEXT: v_mov_b32_e32 v2, 0
; GCN-NEXT: s_waitcnt lgkmcnt(0)
; GCN-NEXT: s_mov_b64 s[4:5], s[2:3]
; GCN-NEXT: buffer_load_dwordx2 v[3:4], v[1:2], s[4:7], 0 addr64
; GCN-NEXT: v_lshlrev_b32_e32 v1, 2, v0
; GCN-NEXT: s_mov_b64 s[2:3], s[6:7]
; GCN-NEXT: s_waitcnt vmcnt(0)
; GCN-NEXT: v_cmp_ngt_f64_e32 vcc, 2.0, v[3:4]
; GCN-NEXT: v_cndmask_b32_e64 v0, 0, -1, vcc
; GCN-NEXT: buffer_store_dword v0, v[1:2], s[0:3], 0 addr64
; GCN-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x() #0
%gep.in = getelementptr double, ptr addrspace(1) %in, i32 %tid
%gep.out = getelementptr i32, ptr addrspace(1) %out, i32 %tid
%val = load double, ptr addrspace(1) %gep.in
%cmp = fcmp uge double %val, 2.0
%ext = sext i1 %cmp to i32
store i32 %ext, ptr addrspace(1) %gep.out
ret void
}
define amdgpu_kernel void @commute_ult_2.0_f64(ptr addrspace(1) %out, ptr addrspace(1) %in) #1 {
; GCN-LABEL: commute_ult_2.0_f64:
; GCN: ; %bb.0:
; GCN-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9
; GCN-NEXT: s_mov_b32 s7, 0xf000
; GCN-NEXT: s_mov_b32 s6, 0
; GCN-NEXT: v_lshlrev_b32_e32 v1, 3, v0
; GCN-NEXT: v_mov_b32_e32 v2, 0
; GCN-NEXT: s_waitcnt lgkmcnt(0)
; GCN-NEXT: s_mov_b64 s[4:5], s[2:3]
; GCN-NEXT: buffer_load_dwordx2 v[3:4], v[1:2], s[4:7], 0 addr64
; GCN-NEXT: v_lshlrev_b32_e32 v1, 2, v0
; GCN-NEXT: s_mov_b64 s[2:3], s[6:7]
; GCN-NEXT: s_waitcnt vmcnt(0)
; GCN-NEXT: v_cmp_nle_f64_e32 vcc, 2.0, v[3:4]
; GCN-NEXT: v_cndmask_b32_e64 v0, 0, -1, vcc
; GCN-NEXT: buffer_store_dword v0, v[1:2], s[0:3], 0 addr64
; GCN-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x() #0
%gep.in = getelementptr double, ptr addrspace(1) %in, i32 %tid
%gep.out = getelementptr i32, ptr addrspace(1) %out, i32 %tid
%val = load double, ptr addrspace(1) %gep.in
%cmp = fcmp ult double %val, 2.0
%ext = sext i1 %cmp to i32
store i32 %ext, ptr addrspace(1) %gep.out
ret void
}
define amdgpu_kernel void @commute_ule_2.0_f64(ptr addrspace(1) %out, ptr addrspace(1) %in) #1 {
; GCN-LABEL: commute_ule_2.0_f64:
; GCN: ; %bb.0:
; GCN-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9
; GCN-NEXT: s_mov_b32 s7, 0xf000
; GCN-NEXT: s_mov_b32 s6, 0
; GCN-NEXT: v_lshlrev_b32_e32 v1, 3, v0
; GCN-NEXT: v_mov_b32_e32 v2, 0
; GCN-NEXT: s_waitcnt lgkmcnt(0)
; GCN-NEXT: s_mov_b64 s[4:5], s[2:3]
; GCN-NEXT: buffer_load_dwordx2 v[3:4], v[1:2], s[4:7], 0 addr64
; GCN-NEXT: v_lshlrev_b32_e32 v1, 2, v0
; GCN-NEXT: s_mov_b64 s[2:3], s[6:7]
; GCN-NEXT: s_waitcnt vmcnt(0)
; GCN-NEXT: v_cmp_nlt_f64_e32 vcc, 2.0, v[3:4]
; GCN-NEXT: v_cndmask_b32_e64 v0, 0, -1, vcc
; GCN-NEXT: buffer_store_dword v0, v[1:2], s[0:3], 0 addr64
; GCN-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x() #0
%gep.in = getelementptr double, ptr addrspace(1) %in, i32 %tid
%gep.out = getelementptr i32, ptr addrspace(1) %out, i32 %tid
%val = load double, ptr addrspace(1) %gep.in
%cmp = fcmp ule double %val, 2.0
%ext = sext i1 %cmp to i32
store i32 %ext, ptr addrspace(1) %gep.out
ret void
}
define amdgpu_kernel void @commute_une_2.0_f64(ptr addrspace(1) %out, ptr addrspace(1) %in) #1 {
; GCN-LABEL: commute_une_2.0_f64:
; GCN: ; %bb.0:
; GCN-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9
; GCN-NEXT: s_mov_b32 s7, 0xf000
; GCN-NEXT: s_mov_b32 s6, 0
; GCN-NEXT: v_lshlrev_b32_e32 v1, 3, v0
; GCN-NEXT: v_mov_b32_e32 v2, 0
; GCN-NEXT: s_waitcnt lgkmcnt(0)
; GCN-NEXT: s_mov_b64 s[4:5], s[2:3]
; GCN-NEXT: buffer_load_dwordx2 v[3:4], v[1:2], s[4:7], 0 addr64
; GCN-NEXT: v_lshlrev_b32_e32 v1, 2, v0
; GCN-NEXT: s_mov_b64 s[2:3], s[6:7]
; GCN-NEXT: s_waitcnt vmcnt(0)
; GCN-NEXT: v_cmp_neq_f64_e32 vcc, 2.0, v[3:4]
; GCN-NEXT: v_cndmask_b32_e64 v0, 0, -1, vcc
; GCN-NEXT: buffer_store_dword v0, v[1:2], s[0:3], 0 addr64
; GCN-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x() #0
%gep.in = getelementptr double, ptr addrspace(1) %in, i32 %tid
%gep.out = getelementptr i32, ptr addrspace(1) %out, i32 %tid
%val = load double, ptr addrspace(1) %gep.in
%cmp = fcmp une double %val, 2.0
%ext = sext i1 %cmp to i32
store i32 %ext, ptr addrspace(1) %gep.out
ret void
}
define amdgpu_kernel void @commute_uno_2.0_f64(ptr addrspace(1) %out, ptr addrspace(1) %in) #1 {
; GCN-LABEL: commute_uno_2.0_f64:
; GCN: ; %bb.0:
; GCN-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9
; GCN-NEXT: s_mov_b32 s7, 0xf000
; GCN-NEXT: s_mov_b32 s6, 0
; GCN-NEXT: v_lshlrev_b32_e32 v1, 3, v0
; GCN-NEXT: v_mov_b32_e32 v2, 0
; GCN-NEXT: s_waitcnt lgkmcnt(0)
; GCN-NEXT: s_mov_b64 s[4:5], s[2:3]
; GCN-NEXT: buffer_load_dwordx2 v[3:4], v[1:2], s[4:7], 0 addr64
; GCN-NEXT: v_lshlrev_b32_e32 v1, 2, v0
; GCN-NEXT: s_mov_b64 s[2:3], s[6:7]
; GCN-NEXT: s_waitcnt vmcnt(0)
; GCN-NEXT: v_cmp_u_f64_e32 vcc, v[3:4], v[3:4]
; GCN-NEXT: v_cndmask_b32_e64 v0, 0, -1, vcc
; GCN-NEXT: buffer_store_dword v0, v[1:2], s[0:3], 0 addr64
; GCN-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x() #0
%gep.in = getelementptr double, ptr addrspace(1) %in, i32 %tid
%gep.out = getelementptr i32, ptr addrspace(1) %out, i32 %tid
%val = load double, ptr addrspace(1) %gep.in
%cmp = fcmp uno double %val, 2.0
%ext = sext i1 %cmp to i32
store i32 %ext, ptr addrspace(1) %gep.out
ret void
}
; FIXME: Should be able to fold this frameindex
; Without commuting the frame index in the pre-regalloc run of
; SIShrinkInstructions, this was using the VOP3 compare.
define amdgpu_kernel void @commute_frameindex(ptr addrspace(1) nocapture %out) #0 {
; GCN-LABEL: commute_frameindex:
; GCN: ; %bb.0: ; %entry
; GCN-NEXT: s_mov_b32 s12, SCRATCH_RSRC_DWORD0
; GCN-NEXT: s_mov_b32 s13, SCRATCH_RSRC_DWORD1
; GCN-NEXT: s_mov_b32 s14, -1
; GCN-NEXT: s_mov_b32 s15, 0xe8f000
; GCN-NEXT: s_add_u32 s12, s12, s11
; GCN-NEXT: s_addc_u32 s13, s13, 0
; GCN-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9
; GCN-NEXT: s_mov_b32 s3, 0xf000
; GCN-NEXT: s_mov_b32 s2, -1
; GCN-NEXT: s_waitcnt lgkmcnt(0)
; GCN-NEXT: buffer_load_dword v0, off, s[0:3], 0 glc
; GCN-NEXT: s_waitcnt vmcnt(0)
; GCN-NEXT: s_mov_b32 s4, 0
; GCN-NEXT: v_cmp_eq_u32_e32 vcc, s4, v0
; GCN-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc
; GCN-NEXT: buffer_store_dword v0, off, s[0:3], 0
; GCN-NEXT: s_waitcnt vmcnt(0)
; GCN-NEXT: s_endpgm
entry:
%stack0 = alloca i32, addrspace(5)
%ptr0 = load volatile ptr addrspace(5), ptr addrspace(1) poison
%eq = icmp eq ptr addrspace(5) %ptr0, %stack0
%ext = zext i1 %eq to i32
store volatile i32 %ext, ptr addrspace(1) %out
ret void
}
attributes #0 = { nounwind readnone }
attributes #1 = { nounwind }