[AMDGPU][GlobalISel] Add RegBankLegalize rules for amdgcn.class (#178827)

This commit is contained in:
vangthao95 2026-03-20 12:50:06 -07:00 committed by GitHub
parent bb369f1c30
commit bd3b06b0a7
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
4 changed files with 113 additions and 49 deletions

View File

@ -1498,6 +1498,14 @@ RegBankLegalizeRules::RegBankLegalizeRules(const GCNSubtarget &_ST,
addRulesForIOpcs({amdgcn_s_prefetch_data})
.Any({{}, {{}, {IntrId, SgprB64_ReadFirstLane, SgprB32_ReadFirstLane}}});
addRulesForIOpcs({amdgcn_class})
.Any({{UniS1, _, S16}, {{UniInVcc}, {IntrId, Vgpr16, Vgpr32}}})
.Any({{DivS1, _, S16}, {{Vcc}, {IntrId, Vgpr16, Vgpr32}}})
.Any({{UniS1, _, S32}, {{UniInVcc}, {IntrId, Vgpr32, Vgpr32}}})
.Any({{DivS1, _, S32}, {{Vcc}, {IntrId, Vgpr32, Vgpr32}}})
.Any({{UniS1, _, S64}, {{UniInVcc}, {IntrId, Vgpr64, Vgpr32}}})
.Any({{DivS1, _, S64}, {{Vcc}, {IntrId, Vgpr64, Vgpr32}}});
// This is "intrinsic lane mask" it was set to i32/i64 in llvm-ir.
addRulesForIOpcs({amdgcn_end_cf})
.Any({{_, UniS32}, {{}, {IntrId, Sgpr32}}})

View File

@ -1,6 +1,5 @@
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
# RUN: llc -mtriple=amdgcn -mcpu=fiji -run-pass=regbankselect %s -verify-machineinstrs -o - -regbankselect-fast | FileCheck %s
# RUN: llc -mtriple=amdgcn -mcpu=fiji -run-pass=regbankselect %s -verify-machineinstrs -o - -regbankselect-greedy | FileCheck %s
# RUN: llc -mtriple=amdgcn -mcpu=fiji -run-pass='amdgpu-regbankselect,amdgpu-regbanklegalize' %s -o - | FileCheck %s
---
name: class_ss

View File

@ -1,6 +1,6 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6
; RUN: llc -global-isel=0 -amdgpu-scalarize-global-loads=false -mtriple=amdgcn-amd-amdhsa -mcpu=fiji -mattr=-flat-for-global < %s | FileCheck -enable-var-scope -check-prefixes=GCN,VI-SDAG %s
; RUN: llc -global-isel=1 -amdgpu-scalarize-global-loads=false -mtriple=amdgcn-amd-amdhsa -mcpu=fiji -mattr=-flat-for-global < %s | FileCheck -enable-var-scope -check-prefixes=GCN,VI-GISEL %s
; RUN: llc -global-isel=1 -new-reg-bank-select -amdgpu-scalarize-global-loads=false -mtriple=amdgcn-amd-amdhsa -mcpu=fiji -mattr=-flat-for-global < %s | FileCheck -enable-var-scope -check-prefixes=GCN,VI-GISEL %s
declare half @llvm.fabs.f16(half %a)
declare i1 @llvm.amdgcn.class.f16(half %a, i32 %b)
@ -39,11 +39,15 @@ define amdgpu_kernel void @class_f16(
; VI-GISEL-NEXT: s_mov_b64 s[4:5], s[2:3]
; VI-GISEL-NEXT: buffer_load_ushort v0, off, s[4:7], 0
; VI-GISEL-NEXT: s_load_dword s2, s[8:9], 0x0
; VI-GISEL-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
; VI-GISEL-NEXT: v_cmp_class_f16_e64 s[2:3], v0, s2
; VI-GISEL-NEXT: v_cndmask_b32_e64 v0, 0, -1, s[2:3]
; VI-GISEL-NEXT: s_waitcnt lgkmcnt(0)
; VI-GISEL-NEXT: v_mov_b32_e32 v1, s2
; VI-GISEL-NEXT: s_waitcnt vmcnt(0)
; VI-GISEL-NEXT: v_readfirstlane_b32 s2, v0
; VI-GISEL-NEXT: v_cmp_class_f16_e32 vcc, s2, v1
; VI-GISEL-NEXT: s_cmp_lg_u64 vcc, 0
; VI-GISEL-NEXT: s_cselect_b32 s2, -1, 0
; VI-GISEL-NEXT: v_mov_b32_e32 v0, s2
; VI-GISEL-NEXT: s_mov_b64 s[2:3], s[6:7]
; VI-GISEL-NEXT: s_nop 2
; VI-GISEL-NEXT: buffer_store_dword v0, off, s[0:3], 0
; VI-GISEL-NEXT: s_endpgm
ptr addrspace(1) %r,
@ -75,14 +79,16 @@ define amdgpu_kernel void @class_f16_fabs(
;
; VI-GISEL-LABEL: class_f16_fabs:
; VI-GISEL: ; %bb.0: ; %entry
; VI-GISEL-NEXT: s_load_dword s3, s[8:9], 0x28
; VI-GISEL-NEXT: s_load_dword s4, s[8:9], 0x4c
; VI-GISEL-NEXT: s_load_dword s3, s[8:9], 0x4c
; VI-GISEL-NEXT: s_load_dword s4, s[8:9], 0x28
; VI-GISEL-NEXT: s_load_dwordx2 s[0:1], s[8:9], 0x0
; VI-GISEL-NEXT: s_mov_b32 s2, -1
; VI-GISEL-NEXT: s_waitcnt lgkmcnt(0)
; VI-GISEL-NEXT: v_mov_b32_e32 v0, s3
; VI-GISEL-NEXT: v_cmp_class_f16_e64 s[4:5], |v0|, s4
; VI-GISEL-NEXT: v_cndmask_b32_e64 v0, 0, -1, s[4:5]
; VI-GISEL-NEXT: v_cmp_class_f16_e64 s[4:5], |s4|, v0
; VI-GISEL-NEXT: s_cmp_lg_u64 s[4:5], 0
; VI-GISEL-NEXT: s_cselect_b32 s3, -1, 0
; VI-GISEL-NEXT: v_mov_b32_e32 v0, s3
; VI-GISEL-NEXT: s_mov_b32 s3, 0x1100f000
; VI-GISEL-NEXT: buffer_store_dword v0, off, s[0:3], 0
; VI-GISEL-NEXT: s_endpgm
@ -123,7 +129,9 @@ define amdgpu_kernel void @class_f16_fneg(
; VI-GISEL-NEXT: s_waitcnt lgkmcnt(0)
; VI-GISEL-NEXT: v_max_f16_e64 v0, -s3, -s3
; VI-GISEL-NEXT: v_cmp_class_f16_e64 s[4:5], v0, s4
; VI-GISEL-NEXT: v_cndmask_b32_e64 v0, 0, -1, s[4:5]
; VI-GISEL-NEXT: s_cmp_lg_u64 s[4:5], 0
; VI-GISEL-NEXT: s_cselect_b32 s3, -1, 0
; VI-GISEL-NEXT: v_mov_b32_e32 v0, s3
; VI-GISEL-NEXT: s_mov_b32 s3, 0x1100f000
; VI-GISEL-NEXT: buffer_store_dword v0, off, s[0:3], 0
; VI-GISEL-NEXT: s_endpgm
@ -164,7 +172,9 @@ define amdgpu_kernel void @class_f16_fabs_fneg(
; VI-GISEL-NEXT: s_waitcnt lgkmcnt(0)
; VI-GISEL-NEXT: v_max_f16_e64 v0, -|s3|, -|s3|
; VI-GISEL-NEXT: v_cmp_class_f16_e64 s[4:5], v0, s4
; VI-GISEL-NEXT: v_cndmask_b32_e64 v0, 0, -1, s[4:5]
; VI-GISEL-NEXT: s_cmp_lg_u64 s[4:5], 0
; VI-GISEL-NEXT: s_cselect_b32 s3, -1, 0
; VI-GISEL-NEXT: v_mov_b32_e32 v0, s3
; VI-GISEL-NEXT: s_mov_b32 s3, 0x1100f000
; VI-GISEL-NEXT: buffer_store_dword v0, off, s[0:3], 0
; VI-GISEL-NEXT: s_endpgm
@ -202,7 +212,9 @@ define amdgpu_kernel void @class_f16_1(
; VI-GISEL-NEXT: s_mov_b32 s2, -1
; VI-GISEL-NEXT: s_waitcnt lgkmcnt(0)
; VI-GISEL-NEXT: v_cmp_class_f16_e64 s[4:5], s3, 1
; VI-GISEL-NEXT: v_cndmask_b32_e64 v0, 0, -1, s[4:5]
; VI-GISEL-NEXT: s_cmp_lg_u64 s[4:5], 0
; VI-GISEL-NEXT: s_cselect_b32 s3, -1, 0
; VI-GISEL-NEXT: v_mov_b32_e32 v0, s3
; VI-GISEL-NEXT: s_mov_b32 s3, 0x1100f000
; VI-GISEL-NEXT: buffer_store_dword v0, off, s[0:3], 0
; VI-GISEL-NEXT: s_endpgm
@ -235,7 +247,9 @@ define amdgpu_kernel void @class_f16_64(
; VI-GISEL-NEXT: s_mov_b32 s2, -1
; VI-GISEL-NEXT: s_waitcnt lgkmcnt(0)
; VI-GISEL-NEXT: v_cmp_class_f16_e64 s[4:5], s3, 64
; VI-GISEL-NEXT: v_cndmask_b32_e64 v0, 0, -1, s[4:5]
; VI-GISEL-NEXT: s_cmp_lg_u64 s[4:5], 0
; VI-GISEL-NEXT: s_cselect_b32 s3, -1, 0
; VI-GISEL-NEXT: v_mov_b32_e32 v0, s3
; VI-GISEL-NEXT: s_mov_b32 s3, 0x1100f000
; VI-GISEL-NEXT: buffer_store_dword v0, off, s[0:3], 0
; VI-GISEL-NEXT: s_endpgm
@ -270,7 +284,9 @@ define amdgpu_kernel void @class_f16_full_mask(
; VI-GISEL-NEXT: s_mov_b32 s2, -1
; VI-GISEL-NEXT: s_waitcnt lgkmcnt(0)
; VI-GISEL-NEXT: v_cmp_class_f16_e32 vcc, s3, v0
; VI-GISEL-NEXT: v_cndmask_b32_e64 v0, 0, -1, vcc
; VI-GISEL-NEXT: s_cmp_lg_u64 vcc, 0
; VI-GISEL-NEXT: s_cselect_b32 s3, -1, 0
; VI-GISEL-NEXT: v_mov_b32_e32 v0, s3
; VI-GISEL-NEXT: s_mov_b32 s3, 0x1100f000
; VI-GISEL-NEXT: buffer_store_dword v0, off, s[0:3], 0
; VI-GISEL-NEXT: s_endpgm
@ -305,7 +321,9 @@ define amdgpu_kernel void @class_f16_nine_bit_mask(
; VI-GISEL-NEXT: s_mov_b32 s2, -1
; VI-GISEL-NEXT: s_waitcnt lgkmcnt(0)
; VI-GISEL-NEXT: v_cmp_class_f16_e32 vcc, s3, v0
; VI-GISEL-NEXT: v_cndmask_b32_e64 v0, 0, -1, vcc
; VI-GISEL-NEXT: s_cmp_lg_u64 vcc, 0
; VI-GISEL-NEXT: s_cselect_b32 s3, -1, 0
; VI-GISEL-NEXT: v_mov_b32_e32 v0, s3
; VI-GISEL-NEXT: s_mov_b32 s3, 0x1100f000
; VI-GISEL-NEXT: buffer_store_dword v0, off, s[0:3], 0
; VI-GISEL-NEXT: s_endpgm

View File

@ -1,6 +1,6 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
; RUN: llc -global-isel=0 -amdgpu-scalarize-global-loads=false -mtriple=amdgcn < %s | FileCheck -enable-var-scope -check-prefixes=SI,SI-SDAG %s
; RUN: llc -global-isel=1 -amdgpu-scalarize-global-loads=false -mtriple=amdgcn < %s | FileCheck -enable-var-scope -check-prefixes=SI,SI-GISEL %s
; RUN: llc -global-isel=1 -new-reg-bank-select -amdgpu-scalarize-global-loads=false -mtriple=amdgcn < %s | FileCheck -enable-var-scope -check-prefixes=SI,SI-GISEL %s
declare i1 @llvm.amdgcn.class.f32(float, i32) #1
declare i1 @llvm.amdgcn.class.f64(double, i32) #1
@ -32,8 +32,10 @@ define amdgpu_kernel void @test_class_f32(ptr addrspace(1) %out, [8 x i32], floa
; SI-GISEL-NEXT: s_waitcnt lgkmcnt(0)
; SI-GISEL-NEXT: v_mov_b32_e32 v0, s3
; SI-GISEL-NEXT: v_cmp_class_f32_e32 vcc, s6, v0
; SI-GISEL-NEXT: v_cndmask_b32_e64 v0, 0, -1, vcc
; SI-GISEL-NEXT: s_or_b64 s[4:5], vcc, vcc
; SI-GISEL-NEXT: s_cselect_b32 s4, -1, 0
; SI-GISEL-NEXT: s_mov_b32 s3, 0xf000
; SI-GISEL-NEXT: v_mov_b32_e32 v0, s4
; SI-GISEL-NEXT: buffer_store_dword v0, off, s[0:3], 0
; SI-GISEL-NEXT: s_endpgm
%result = call i1 @llvm.amdgcn.class.f32(float %a, i32 %b) #1
@ -59,15 +61,17 @@ define amdgpu_kernel void @test_class_fabs_f32(ptr addrspace(1) %out, [8 x i32],
;
; SI-GISEL-LABEL: test_class_fabs_f32:
; SI-GISEL: ; %bb.0:
; SI-GISEL-NEXT: s_load_dword s3, s[4:5], 0x13
; SI-GISEL-NEXT: s_load_dword s6, s[4:5], 0x1c
; SI-GISEL-NEXT: s_load_dword s3, s[4:5], 0x1c
; SI-GISEL-NEXT: s_load_dword s6, s[4:5], 0x13
; SI-GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9
; SI-GISEL-NEXT: s_mov_b32 s2, -1
; SI-GISEL-NEXT: s_waitcnt lgkmcnt(0)
; SI-GISEL-NEXT: v_mov_b32_e32 v0, s3
; SI-GISEL-NEXT: v_cmp_class_f32_e64 s[4:5], |v0|, s6
; SI-GISEL-NEXT: v_cndmask_b32_e64 v0, 0, -1, s[4:5]
; SI-GISEL-NEXT: v_cmp_class_f32_e64 s[4:5], |s6|, v0
; SI-GISEL-NEXT: s_or_b64 s[4:5], s[4:5], s[4:5]
; SI-GISEL-NEXT: s_cselect_b32 s4, -1, 0
; SI-GISEL-NEXT: s_mov_b32 s3, 0xf000
; SI-GISEL-NEXT: v_mov_b32_e32 v0, s4
; SI-GISEL-NEXT: buffer_store_dword v0, off, s[0:3], 0
; SI-GISEL-NEXT: s_endpgm
%a.fabs = call float @llvm.fabs.f32(float %a) #1
@ -99,10 +103,12 @@ define amdgpu_kernel void @test_class_fneg_f32(ptr addrspace(1) %out, [8 x i32],
; SI-GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9
; SI-GISEL-NEXT: s_mov_b32 s2, -1
; SI-GISEL-NEXT: s_waitcnt lgkmcnt(0)
; SI-GISEL-NEXT: v_mul_f32_e64 v0, 1.0, -s3
; SI-GISEL-NEXT: v_mul_f32_e64 v0, -1.0, s3
; SI-GISEL-NEXT: v_cmp_class_f32_e64 s[4:5], v0, s6
; SI-GISEL-NEXT: v_cndmask_b32_e64 v0, 0, -1, s[4:5]
; SI-GISEL-NEXT: s_or_b64 s[4:5], s[4:5], s[4:5]
; SI-GISEL-NEXT: s_cselect_b32 s4, -1, 0
; SI-GISEL-NEXT: s_mov_b32 s3, 0xf000
; SI-GISEL-NEXT: v_mov_b32_e32 v0, s4
; SI-GISEL-NEXT: buffer_store_dword v0, off, s[0:3], 0
; SI-GISEL-NEXT: s_endpgm
%a.fneg = fsub float -0.0, %a
@ -134,10 +140,12 @@ define amdgpu_kernel void @test_class_fneg_fabs_f32(ptr addrspace(1) %out, [8 x
; SI-GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9
; SI-GISEL-NEXT: s_mov_b32 s2, -1
; SI-GISEL-NEXT: s_waitcnt lgkmcnt(0)
; SI-GISEL-NEXT: v_mul_f32_e64 v0, 1.0, -|s3|
; SI-GISEL-NEXT: v_mul_f32_e64 v0, -1.0, |s3|
; SI-GISEL-NEXT: v_cmp_class_f32_e64 s[4:5], v0, s6
; SI-GISEL-NEXT: v_cndmask_b32_e64 v0, 0, -1, s[4:5]
; SI-GISEL-NEXT: s_or_b64 s[4:5], s[4:5], s[4:5]
; SI-GISEL-NEXT: s_cselect_b32 s4, -1, 0
; SI-GISEL-NEXT: s_mov_b32 s3, 0xf000
; SI-GISEL-NEXT: v_mov_b32_e32 v0, s4
; SI-GISEL-NEXT: buffer_store_dword v0, off, s[0:3], 0
; SI-GISEL-NEXT: s_endpgm
%a.fabs = call float @llvm.fabs.f32(float %a) #1
@ -168,8 +176,10 @@ define amdgpu_kernel void @test_class_1_f32(ptr addrspace(1) %out, float %a) #0
; SI-GISEL-NEXT: s_mov_b32 s2, -1
; SI-GISEL-NEXT: s_waitcnt lgkmcnt(0)
; SI-GISEL-NEXT: v_cmp_class_f32_e64 s[4:5], s3, 1
; SI-GISEL-NEXT: v_cndmask_b32_e64 v0, 0, -1, s[4:5]
; SI-GISEL-NEXT: s_or_b64 s[4:5], s[4:5], s[4:5]
; SI-GISEL-NEXT: s_cselect_b32 s4, -1, 0
; SI-GISEL-NEXT: s_mov_b32 s3, 0xf000
; SI-GISEL-NEXT: v_mov_b32_e32 v0, s4
; SI-GISEL-NEXT: buffer_store_dword v0, off, s[0:3], 0
; SI-GISEL-NEXT: s_endpgm
%result = call i1 @llvm.amdgcn.class.f32(float %a, i32 1) #1
@ -198,8 +208,10 @@ define amdgpu_kernel void @test_class_64_f32(ptr addrspace(1) %out, float %a) #0
; SI-GISEL-NEXT: s_mov_b32 s2, -1
; SI-GISEL-NEXT: s_waitcnt lgkmcnt(0)
; SI-GISEL-NEXT: v_cmp_class_f32_e64 s[4:5], s3, 64
; SI-GISEL-NEXT: v_cndmask_b32_e64 v0, 0, -1, s[4:5]
; SI-GISEL-NEXT: s_or_b64 s[4:5], s[4:5], s[4:5]
; SI-GISEL-NEXT: s_cselect_b32 s4, -1, 0
; SI-GISEL-NEXT: s_mov_b32 s3, 0xf000
; SI-GISEL-NEXT: v_mov_b32_e32 v0, s4
; SI-GISEL-NEXT: buffer_store_dword v0, off, s[0:3], 0
; SI-GISEL-NEXT: s_endpgm
%result = call i1 @llvm.amdgcn.class.f32(float %a, i32 64) #1
@ -231,8 +243,10 @@ define amdgpu_kernel void @test_class_full_mask_f32(ptr addrspace(1) %out, float
; SI-GISEL-NEXT: s_mov_b32 s2, -1
; SI-GISEL-NEXT: s_waitcnt lgkmcnt(0)
; SI-GISEL-NEXT: v_cmp_class_f32_e32 vcc, s3, v0
; SI-GISEL-NEXT: v_cndmask_b32_e64 v0, 0, -1, vcc
; SI-GISEL-NEXT: s_or_b64 s[4:5], vcc, vcc
; SI-GISEL-NEXT: s_cselect_b32 s4, -1, 0
; SI-GISEL-NEXT: s_mov_b32 s3, 0xf000
; SI-GISEL-NEXT: v_mov_b32_e32 v0, s4
; SI-GISEL-NEXT: buffer_store_dword v0, off, s[0:3], 0
; SI-GISEL-NEXT: s_endpgm
%result = call i1 @llvm.amdgcn.class.f32(float %a, i32 1023) #1
@ -263,8 +277,10 @@ define amdgpu_kernel void @test_class_9bit_mask_f32(ptr addrspace(1) %out, float
; SI-GISEL-NEXT: s_mov_b32 s2, -1
; SI-GISEL-NEXT: s_waitcnt lgkmcnt(0)
; SI-GISEL-NEXT: v_cmp_class_f32_e32 vcc, s3, v0
; SI-GISEL-NEXT: v_cndmask_b32_e64 v0, 0, -1, vcc
; SI-GISEL-NEXT: s_or_b64 s[4:5], vcc, vcc
; SI-GISEL-NEXT: s_cselect_b32 s4, -1, 0
; SI-GISEL-NEXT: s_mov_b32 s3, 0xf000
; SI-GISEL-NEXT: v_mov_b32_e32 v0, s4
; SI-GISEL-NEXT: buffer_store_dword v0, off, s[0:3], 0
; SI-GISEL-NEXT: s_endpgm
%result = call i1 @llvm.amdgcn.class.f32(float %a, i32 511) #1
@ -437,8 +453,10 @@ define amdgpu_kernel void @test_class_f64(ptr addrspace(1) %out, [8 x i32], doub
; SI-GISEL-NEXT: s_waitcnt lgkmcnt(0)
; SI-GISEL-NEXT: v_mov_b32_e32 v0, s3
; SI-GISEL-NEXT: v_cmp_class_f64_e32 vcc, s[6:7], v0
; SI-GISEL-NEXT: v_cndmask_b32_e64 v0, 0, -1, vcc
; SI-GISEL-NEXT: s_or_b64 s[4:5], vcc, vcc
; SI-GISEL-NEXT: s_cselect_b32 s4, -1, 0
; SI-GISEL-NEXT: s_mov_b32 s3, 0xf000
; SI-GISEL-NEXT: v_mov_b32_e32 v0, s4
; SI-GISEL-NEXT: buffer_store_dword v0, off, s[0:3], 0
; SI-GISEL-NEXT: s_endpgm
%result = call i1 @llvm.amdgcn.class.f64(double %a, i32 %b) #1
@ -464,16 +482,17 @@ define amdgpu_kernel void @test_class_fabs_f64(ptr addrspace(1) %out, [8 x i32],
;
; SI-GISEL-LABEL: test_class_fabs_f64:
; SI-GISEL: ; %bb.0:
; SI-GISEL-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x13
; SI-GISEL-NEXT: s_load_dword s3, s[4:5], 0x1d
; SI-GISEL-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x13
; SI-GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9
; SI-GISEL-NEXT: s_mov_b32 s2, -1
; SI-GISEL-NEXT: s_waitcnt lgkmcnt(0)
; SI-GISEL-NEXT: v_mov_b32_e32 v0, s6
; SI-GISEL-NEXT: v_mov_b32_e32 v1, s7
; SI-GISEL-NEXT: v_cmp_class_f64_e64 s[4:5], |v[0:1]|, s3
; SI-GISEL-NEXT: v_cndmask_b32_e64 v0, 0, -1, s[4:5]
; SI-GISEL-NEXT: v_mov_b32_e32 v0, s3
; SI-GISEL-NEXT: v_cmp_class_f64_e64 s[4:5], |s[6:7]|, v0
; SI-GISEL-NEXT: s_or_b64 s[4:5], s[4:5], s[4:5]
; SI-GISEL-NEXT: s_cselect_b32 s4, -1, 0
; SI-GISEL-NEXT: s_mov_b32 s3, 0xf000
; SI-GISEL-NEXT: v_mov_b32_e32 v0, s4
; SI-GISEL-NEXT: buffer_store_dword v0, off, s[0:3], 0
; SI-GISEL-NEXT: s_endpgm
%a.fabs = call double @llvm.fabs.f64(double %a) #1
@ -507,8 +526,10 @@ define amdgpu_kernel void @test_class_fneg_f64(ptr addrspace(1) %out, [8 x i32],
; SI-GISEL-NEXT: s_waitcnt lgkmcnt(0)
; SI-GISEL-NEXT: v_max_f64 v[0:1], -s[6:7], -s[6:7]
; SI-GISEL-NEXT: v_cmp_class_f64_e64 s[4:5], v[0:1], s3
; SI-GISEL-NEXT: v_cndmask_b32_e64 v0, 0, -1, s[4:5]
; SI-GISEL-NEXT: s_or_b64 s[4:5], s[4:5], s[4:5]
; SI-GISEL-NEXT: s_cselect_b32 s4, -1, 0
; SI-GISEL-NEXT: s_mov_b32 s3, 0xf000
; SI-GISEL-NEXT: v_mov_b32_e32 v0, s4
; SI-GISEL-NEXT: buffer_store_dword v0, off, s[0:3], 0
; SI-GISEL-NEXT: s_endpgm
%a.fneg = fsub double -0.0, %a
@ -542,8 +563,10 @@ define amdgpu_kernel void @test_class_fneg_fabs_f64(ptr addrspace(1) %out, [8 x
; SI-GISEL-NEXT: s_waitcnt lgkmcnt(0)
; SI-GISEL-NEXT: v_max_f64 v[0:1], -|s[6:7]|, -|s[6:7]|
; SI-GISEL-NEXT: v_cmp_class_f64_e64 s[4:5], v[0:1], s3
; SI-GISEL-NEXT: v_cndmask_b32_e64 v0, 0, -1, s[4:5]
; SI-GISEL-NEXT: s_or_b64 s[4:5], s[4:5], s[4:5]
; SI-GISEL-NEXT: s_cselect_b32 s4, -1, 0
; SI-GISEL-NEXT: s_mov_b32 s3, 0xf000
; SI-GISEL-NEXT: v_mov_b32_e32 v0, s4
; SI-GISEL-NEXT: buffer_store_dword v0, off, s[0:3], 0
; SI-GISEL-NEXT: s_endpgm
%a.fabs = call double @llvm.fabs.f64(double %a) #1
@ -572,10 +595,12 @@ define amdgpu_kernel void @test_class_1_f64(ptr addrspace(1) %out, double %a) #0
; SI-GISEL: ; %bb.0:
; SI-GISEL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9
; SI-GISEL-NEXT: s_waitcnt lgkmcnt(0)
; SI-GISEL-NEXT: v_cmp_class_f64_e64 s[2:3], s[2:3], 1
; SI-GISEL-NEXT: v_cndmask_b32_e64 v0, 0, -1, s[2:3]
; SI-GISEL-NEXT: v_cmp_class_f64_e64 s[4:5], s[2:3], 1
; SI-GISEL-NEXT: s_mov_b32 s2, -1
; SI-GISEL-NEXT: s_or_b64 s[4:5], s[4:5], s[4:5]
; SI-GISEL-NEXT: s_cselect_b32 s4, -1, 0
; SI-GISEL-NEXT: s_mov_b32 s3, 0xf000
; SI-GISEL-NEXT: v_mov_b32_e32 v0, s4
; SI-GISEL-NEXT: buffer_store_dword v0, off, s[0:3], 0
; SI-GISEL-NEXT: s_endpgm
%result = call i1 @llvm.amdgcn.class.f64(double %a, i32 1) #1
@ -602,10 +627,12 @@ define amdgpu_kernel void @test_class_64_f64(ptr addrspace(1) %out, double %a) #
; SI-GISEL: ; %bb.0:
; SI-GISEL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9
; SI-GISEL-NEXT: s_waitcnt lgkmcnt(0)
; SI-GISEL-NEXT: v_cmp_class_f64_e64 s[2:3], s[2:3], 64
; SI-GISEL-NEXT: v_cndmask_b32_e64 v0, 0, -1, s[2:3]
; SI-GISEL-NEXT: v_cmp_class_f64_e64 s[4:5], s[2:3], 64
; SI-GISEL-NEXT: s_mov_b32 s2, -1
; SI-GISEL-NEXT: s_or_b64 s[4:5], s[4:5], s[4:5]
; SI-GISEL-NEXT: s_cselect_b32 s4, -1, 0
; SI-GISEL-NEXT: s_mov_b32 s3, 0xf000
; SI-GISEL-NEXT: v_mov_b32_e32 v0, s4
; SI-GISEL-NEXT: buffer_store_dword v0, off, s[0:3], 0
; SI-GISEL-NEXT: s_endpgm
%result = call i1 @llvm.amdgcn.class.f64(double %a, i32 64) #1
@ -637,8 +664,10 @@ define amdgpu_kernel void @test_class_full_mask_f64(ptr addrspace(1) %out, [8 x
; SI-GISEL-NEXT: s_mov_b32 s2, -1
; SI-GISEL-NEXT: s_waitcnt lgkmcnt(0)
; SI-GISEL-NEXT: v_cmp_class_f64_e32 vcc, s[6:7], v0
; SI-GISEL-NEXT: v_cndmask_b32_e64 v0, 0, -1, vcc
; SI-GISEL-NEXT: s_or_b64 s[4:5], vcc, vcc
; SI-GISEL-NEXT: s_cselect_b32 s4, -1, 0
; SI-GISEL-NEXT: s_mov_b32 s3, 0xf000
; SI-GISEL-NEXT: v_mov_b32_e32 v0, s4
; SI-GISEL-NEXT: buffer_store_dword v0, off, s[0:3], 0
; SI-GISEL-NEXT: s_endpgm
%result = call i1 @llvm.amdgcn.class.f64(double %a, i32 511) #1
@ -679,8 +708,10 @@ define amdgpu_kernel void @v_test_class_full_mask_f64(ptr addrspace(1) %out, ptr
; SI-GISEL-NEXT: s_mov_b32 s2, 0
; SI-GISEL-NEXT: s_waitcnt lgkmcnt(0)
; SI-GISEL-NEXT: v_cmp_class_f64_e32 vcc, s[4:5], v2
; SI-GISEL-NEXT: v_cndmask_b32_e64 v2, 0, -1, vcc
; SI-GISEL-NEXT: s_or_b64 s[4:5], vcc, vcc
; SI-GISEL-NEXT: s_cselect_b32 s4, -1, 0
; SI-GISEL-NEXT: s_mov_b32 s3, 0xf000
; SI-GISEL-NEXT: v_mov_b32_e32 v2, s4
; SI-GISEL-NEXT: buffer_store_dword v2, v[0:1], s[0:3], 0 addr64
; SI-GISEL-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x() #1
@ -1132,6 +1163,8 @@ define amdgpu_kernel void @test_no_fold_or_class_f32_0(ptr addrspace(1) %out, pt
; SI-GISEL-NEXT: buffer_load_dword v0, v[0:1], s[4:7], 0 addr64
; SI-GISEL-NEXT: v_cmp_class_f32_e64 s[2:3], s8, 8
; SI-GISEL-NEXT: s_mov_b32 s6, -1
; SI-GISEL-NEXT: s_or_b64 s[2:3], s[2:3], s[2:3]
; SI-GISEL-NEXT: s_cselect_b64 s[2:3], exec, 0
; SI-GISEL-NEXT: s_waitcnt vmcnt(0)
; SI-GISEL-NEXT: v_cmp_class_f32_e64 s[4:5], v0, 4
; SI-GISEL-NEXT: s_or_b64 s[2:3], s[4:5], s[2:3]
@ -1171,8 +1204,10 @@ define amdgpu_kernel void @test_class_0_f32(ptr addrspace(1) %out, float %a) #0
; SI-GISEL-NEXT: s_mov_b32 s2, -1
; SI-GISEL-NEXT: s_waitcnt lgkmcnt(0)
; SI-GISEL-NEXT: v_cmp_class_f32_e64 s[4:5], s3, 0
; SI-GISEL-NEXT: v_cndmask_b32_e64 v0, 0, -1, s[4:5]
; SI-GISEL-NEXT: s_or_b64 s[4:5], s[4:5], s[4:5]
; SI-GISEL-NEXT: s_cselect_b32 s4, -1, 0
; SI-GISEL-NEXT: s_mov_b32 s3, 0xf000
; SI-GISEL-NEXT: v_mov_b32_e32 v0, s4
; SI-GISEL-NEXT: buffer_store_dword v0, off, s[0:3], 0
; SI-GISEL-NEXT: s_endpgm
%result = call i1 @llvm.amdgcn.class.f32(float %a, i32 0) #1
@ -1196,10 +1231,12 @@ define amdgpu_kernel void @test_class_0_f64(ptr addrspace(1) %out, double %a) #0
; SI-GISEL: ; %bb.0:
; SI-GISEL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9
; SI-GISEL-NEXT: s_waitcnt lgkmcnt(0)
; SI-GISEL-NEXT: v_cmp_class_f64_e64 s[2:3], s[2:3], 0
; SI-GISEL-NEXT: v_cndmask_b32_e64 v0, 0, -1, s[2:3]
; SI-GISEL-NEXT: v_cmp_class_f64_e64 s[4:5], s[2:3], 0
; SI-GISEL-NEXT: s_mov_b32 s2, -1
; SI-GISEL-NEXT: s_or_b64 s[4:5], s[4:5], s[4:5]
; SI-GISEL-NEXT: s_cselect_b32 s4, -1, 0
; SI-GISEL-NEXT: s_mov_b32 s3, 0xf000
; SI-GISEL-NEXT: v_mov_b32_e32 v0, s4
; SI-GISEL-NEXT: buffer_store_dword v0, off, s[0:3], 0
; SI-GISEL-NEXT: s_endpgm
%result = call i1 @llvm.amdgcn.class.f64(double %a, i32 0) #1
@ -1227,8 +1264,10 @@ define amdgpu_kernel void @test_class_undef_f32(ptr addrspace(1) %out, float %a,
; SI-GISEL-NEXT: s_waitcnt lgkmcnt(0)
; SI-GISEL-NEXT: v_mov_b32_e32 v0, s3
; SI-GISEL-NEXT: v_cmp_class_f32_e32 vcc, s0, v0
; SI-GISEL-NEXT: v_cndmask_b32_e64 v0, 0, -1, vcc
; SI-GISEL-NEXT: s_or_b64 s[4:5], vcc, vcc
; SI-GISEL-NEXT: s_cselect_b32 s4, -1, 0
; SI-GISEL-NEXT: s_mov_b32 s3, 0xf000
; SI-GISEL-NEXT: v_mov_b32_e32 v0, s4
; SI-GISEL-NEXT: buffer_store_dword v0, off, s[0:3], 0
; SI-GISEL-NEXT: s_endpgm
%result = call i1 @llvm.amdgcn.class.f32(float poison, i32 %b) #1