
[AMDGPU][NFC] Replace gfx940 and gfx941 with gfx942 in llvm/test gfx940 and gfx941 are no longer supported. This is one of a series of PRs to remove them from the code base. This PR uses gfx942 instead of gfx940 and gfx941 in the test RUN-lines (unless there is already a RUN-line for gfx942). The only notable difference in the test output is that gfx942 does not force the use of sc0 and sc1 on stores while gfx940 and gfx941 do (cf. https://reviews.llvm.org/D149986). For SWDEV-512631
220 lines
9.2 KiB
LLVM
220 lines
9.2 KiB
LLVM
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
|
|
; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=gfx90a < %s | FileCheck -check-prefixes=GFX90A %s
|
|
; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=gfx942 < %s | FileCheck -check-prefixes=GFX942 %s
|
|
; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=gfx1030 < %s | FileCheck -check-prefixes=GFX1030 %s
|
|
; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=gfx1100 < %s | FileCheck -check-prefixes=GFX1100 %s
|
|
|
|
define amdgpu_kernel void @test_insert_extract(i32 %p, i32 %q) {
|
|
; GFX90A-LABEL: test_insert_extract:
|
|
; GFX90A: ; %bb.0: ; %entry
|
|
; GFX90A-NEXT: s_load_dwordx2 s[0:1], s[8:9], 0x0
|
|
; GFX90A-NEXT: s_mov_b32 s2, 0
|
|
; GFX90A-NEXT: s_and_b64 vcc, exec, -1
|
|
; GFX90A-NEXT: s_mov_b32 s3, 0
|
|
; GFX90A-NEXT: s_mov_b32 s4, 0
|
|
; GFX90A-NEXT: s_mov_b32 s5, 0
|
|
; GFX90A-NEXT: s_mov_b32 s6, 0
|
|
; GFX90A-NEXT: .LBB0_1: ; %for.body
|
|
; GFX90A-NEXT: ; =>This Inner Loop Header: Depth=1
|
|
; GFX90A-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX90A-NEXT: s_cmp_eq_u32 s1, 1
|
|
; GFX90A-NEXT: s_cselect_b64 s[8:9], -1, 0
|
|
; GFX90A-NEXT: s_and_b64 s[8:9], s[8:9], exec
|
|
; GFX90A-NEXT: s_cselect_b32 s7, s4, s3
|
|
; GFX90A-NEXT: s_cmp_eq_u32 s1, 2
|
|
; GFX90A-NEXT: s_cselect_b64 s[8:9], -1, 0
|
|
; GFX90A-NEXT: s_and_b64 s[8:9], s[8:9], exec
|
|
; GFX90A-NEXT: s_cselect_b32 s7, s5, s7
|
|
; GFX90A-NEXT: s_cmp_eq_u32 s1, 3
|
|
; GFX90A-NEXT: s_cselect_b64 s[8:9], -1, 0
|
|
; GFX90A-NEXT: s_and_b64 s[8:9], s[8:9], exec
|
|
; GFX90A-NEXT: s_cselect_b32 s7, s6, s7
|
|
; GFX90A-NEXT: s_or_b32 s7, s7, s0
|
|
; GFX90A-NEXT: s_cmp_eq_u32 s1, 1
|
|
; GFX90A-NEXT: s_cselect_b64 s[8:9], -1, 0
|
|
; GFX90A-NEXT: s_and_b64 s[10:11], s[8:9], exec
|
|
; GFX90A-NEXT: s_cselect_b32 s4, s7, s4
|
|
; GFX90A-NEXT: s_cmp_eq_u32 s1, 3
|
|
; GFX90A-NEXT: s_cselect_b64 s[10:11], -1, 0
|
|
; GFX90A-NEXT: s_and_b64 s[12:13], s[10:11], exec
|
|
; GFX90A-NEXT: s_cselect_b32 s6, s7, s6
|
|
; GFX90A-NEXT: s_cmp_eq_u32 s1, 2
|
|
; GFX90A-NEXT: s_cselect_b64 s[12:13], -1, 0
|
|
; GFX90A-NEXT: s_and_b64 s[14:15], s[12:13], exec
|
|
; GFX90A-NEXT: s_cselect_b32 s5, s7, s5
|
|
; GFX90A-NEXT: s_cmp_eq_u32 s1, 0
|
|
; GFX90A-NEXT: s_cselect_b32 s3, s7, s3
|
|
; GFX90A-NEXT: s_or_b64 s[8:9], s[12:13], s[8:9]
|
|
; GFX90A-NEXT: s_or_b64 s[8:9], s[10:11], s[8:9]
|
|
; GFX90A-NEXT: s_and_b64 s[8:9], s[8:9], exec
|
|
; GFX90A-NEXT: s_cselect_b32 s2, 0, s2
|
|
; GFX90A-NEXT: s_mov_b64 vcc, vcc
|
|
; GFX90A-NEXT: s_cbranch_vccnz .LBB0_1
|
|
; GFX90A-NEXT: ; %bb.2: ; %DummyReturnBlock
|
|
; GFX90A-NEXT: s_endpgm
|
|
;
|
|
; GFX942-LABEL: test_insert_extract:
|
|
; GFX942: ; %bb.0: ; %entry
|
|
; GFX942-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
|
|
; GFX942-NEXT: s_mov_b32 s2, 0
|
|
; GFX942-NEXT: s_and_b64 vcc, exec, -1
|
|
; GFX942-NEXT: s_mov_b32 s3, 0
|
|
; GFX942-NEXT: s_mov_b32 s4, 0
|
|
; GFX942-NEXT: s_mov_b32 s5, 0
|
|
; GFX942-NEXT: s_mov_b32 s6, 0
|
|
; GFX942-NEXT: .LBB0_1: ; %for.body
|
|
; GFX942-NEXT: ; =>This Inner Loop Header: Depth=1
|
|
; GFX942-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX942-NEXT: s_cmp_eq_u32 s1, 1
|
|
; GFX942-NEXT: s_cselect_b64 s[8:9], -1, 0
|
|
; GFX942-NEXT: s_and_b64 s[8:9], s[8:9], exec
|
|
; GFX942-NEXT: s_cselect_b32 s7, s4, s3
|
|
; GFX942-NEXT: s_cmp_eq_u32 s1, 2
|
|
; GFX942-NEXT: s_cselect_b64 s[8:9], -1, 0
|
|
; GFX942-NEXT: s_and_b64 s[8:9], s[8:9], exec
|
|
; GFX942-NEXT: s_cselect_b32 s7, s5, s7
|
|
; GFX942-NEXT: s_cmp_eq_u32 s1, 3
|
|
; GFX942-NEXT: s_cselect_b64 s[8:9], -1, 0
|
|
; GFX942-NEXT: s_and_b64 s[8:9], s[8:9], exec
|
|
; GFX942-NEXT: s_cselect_b32 s7, s6, s7
|
|
; GFX942-NEXT: s_or_b32 s7, s7, s0
|
|
; GFX942-NEXT: s_cmp_eq_u32 s1, 1
|
|
; GFX942-NEXT: s_cselect_b64 s[8:9], -1, 0
|
|
; GFX942-NEXT: s_and_b64 s[10:11], s[8:9], exec
|
|
; GFX942-NEXT: s_cselect_b32 s4, s7, s4
|
|
; GFX942-NEXT: s_cmp_eq_u32 s1, 3
|
|
; GFX942-NEXT: s_cselect_b64 s[10:11], -1, 0
|
|
; GFX942-NEXT: s_and_b64 s[12:13], s[10:11], exec
|
|
; GFX942-NEXT: s_cselect_b32 s6, s7, s6
|
|
; GFX942-NEXT: s_cmp_eq_u32 s1, 2
|
|
; GFX942-NEXT: s_cselect_b64 s[12:13], -1, 0
|
|
; GFX942-NEXT: s_and_b64 s[14:15], s[12:13], exec
|
|
; GFX942-NEXT: s_cselect_b32 s5, s7, s5
|
|
; GFX942-NEXT: s_cmp_eq_u32 s1, 0
|
|
; GFX942-NEXT: s_cselect_b32 s3, s7, s3
|
|
; GFX942-NEXT: s_or_b64 s[8:9], s[12:13], s[8:9]
|
|
; GFX942-NEXT: s_or_b64 s[8:9], s[10:11], s[8:9]
|
|
; GFX942-NEXT: s_and_b64 s[8:9], s[8:9], exec
|
|
; GFX942-NEXT: s_cselect_b32 s2, 0, s2
|
|
; GFX942-NEXT: s_mov_b64 vcc, vcc
|
|
; GFX942-NEXT: s_cbranch_vccnz .LBB0_1
|
|
; GFX942-NEXT: ; %bb.2: ; %DummyReturnBlock
|
|
; GFX942-NEXT: s_endpgm
|
|
;
|
|
; GFX1030-LABEL: test_insert_extract:
|
|
; GFX1030: ; %bb.0: ; %entry
|
|
; GFX1030-NEXT: s_load_dwordx2 s[0:1], s[8:9], 0x0
|
|
; GFX1030-NEXT: s_mov_b32 s2, 0
|
|
; GFX1030-NEXT: s_mov_b32 s3, 0
|
|
; GFX1030-NEXT: s_mov_b32 s4, 0
|
|
; GFX1030-NEXT: s_mov_b32 s5, 0
|
|
; GFX1030-NEXT: s_mov_b32 s6, 0
|
|
; GFX1030-NEXT: s_mov_b32 vcc_lo, exec_lo
|
|
; GFX1030-NEXT: .p2align 6
|
|
; GFX1030-NEXT: .LBB0_1: ; %for.body
|
|
; GFX1030-NEXT: ; =>This Inner Loop Header: Depth=1
|
|
; GFX1030-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX1030-NEXT: s_cmp_eq_u32 s1, 1
|
|
; GFX1030-NEXT: s_cselect_b32 s7, -1, 0
|
|
; GFX1030-NEXT: s_and_b32 s7, s7, exec_lo
|
|
; GFX1030-NEXT: s_cselect_b32 s7, s4, s3
|
|
; GFX1030-NEXT: s_cmp_eq_u32 s1, 2
|
|
; GFX1030-NEXT: s_cselect_b32 s8, -1, 0
|
|
; GFX1030-NEXT: s_and_b32 s8, s8, exec_lo
|
|
; GFX1030-NEXT: s_cselect_b32 s7, s5, s7
|
|
; GFX1030-NEXT: s_cmp_eq_u32 s1, 3
|
|
; GFX1030-NEXT: s_cselect_b32 s8, -1, 0
|
|
; GFX1030-NEXT: s_and_b32 s8, s8, exec_lo
|
|
; GFX1030-NEXT: s_cselect_b32 s7, s6, s7
|
|
; GFX1030-NEXT: s_or_b32 s7, s7, s0
|
|
; GFX1030-NEXT: s_cmp_eq_u32 s1, 1
|
|
; GFX1030-NEXT: s_cselect_b32 s8, -1, 0
|
|
; GFX1030-NEXT: s_and_b32 s9, s8, exec_lo
|
|
; GFX1030-NEXT: s_cselect_b32 s4, s7, s4
|
|
; GFX1030-NEXT: s_cmp_eq_u32 s1, 3
|
|
; GFX1030-NEXT: s_cselect_b32 s9, -1, 0
|
|
; GFX1030-NEXT: s_and_b32 s10, s9, exec_lo
|
|
; GFX1030-NEXT: s_cselect_b32 s6, s7, s6
|
|
; GFX1030-NEXT: s_cmp_eq_u32 s1, 2
|
|
; GFX1030-NEXT: s_cselect_b32 s10, -1, 0
|
|
; GFX1030-NEXT: s_and_b32 s11, s10, exec_lo
|
|
; GFX1030-NEXT: s_cselect_b32 s5, s7, s5
|
|
; GFX1030-NEXT: s_cmp_eq_u32 s1, 0
|
|
; GFX1030-NEXT: s_cselect_b32 s3, s7, s3
|
|
; GFX1030-NEXT: s_or_b32 s7, s10, s8
|
|
; GFX1030-NEXT: s_or_b32 s7, s9, s7
|
|
; GFX1030-NEXT: s_and_b32 s7, s7, exec_lo
|
|
; GFX1030-NEXT: s_cselect_b32 s2, 0, s2
|
|
; GFX1030-NEXT: s_cbranch_vccnz .LBB0_1
|
|
; GFX1030-NEXT: ; %bb.2: ; %DummyReturnBlock
|
|
; GFX1030-NEXT: s_endpgm
|
|
;
|
|
; GFX1100-LABEL: test_insert_extract:
|
|
; GFX1100: ; %bb.0: ; %entry
|
|
; GFX1100-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
|
|
; GFX1100-NEXT: s_mov_b32 s2, 0
|
|
; GFX1100-NEXT: s_mov_b32 s3, 0
|
|
; GFX1100-NEXT: s_mov_b32 s4, 0
|
|
; GFX1100-NEXT: s_mov_b32 s5, 0
|
|
; GFX1100-NEXT: s_mov_b32 s6, 0
|
|
; GFX1100-NEXT: s_mov_b32 vcc_lo, exec_lo
|
|
; GFX1100-NEXT: .p2align 6
|
|
; GFX1100-NEXT: .LBB0_1: ; %for.body
|
|
; GFX1100-NEXT: ; =>This Inner Loop Header: Depth=1
|
|
; GFX1100-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX1100-NEXT: s_cmp_eq_u32 s1, 1
|
|
; GFX1100-NEXT: s_cselect_b32 s7, -1, 0
|
|
; GFX1100-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_3) | instid1(SALU_CYCLE_1)
|
|
; GFX1100-NEXT: s_and_b32 s7, s7, exec_lo
|
|
; GFX1100-NEXT: s_cselect_b32 s7, s4, s3
|
|
; GFX1100-NEXT: s_cmp_eq_u32 s1, 2
|
|
; GFX1100-NEXT: s_cselect_b32 s8, -1, 0
|
|
; GFX1100-NEXT: s_and_b32 s8, s8, exec_lo
|
|
; GFX1100-NEXT: s_cselect_b32 s7, s5, s7
|
|
; GFX1100-NEXT: s_cmp_eq_u32 s1, 3
|
|
; GFX1100-NEXT: s_cselect_b32 s8, -1, 0
|
|
; GFX1100-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1)
|
|
; GFX1100-NEXT: s_and_b32 s8, s8, exec_lo
|
|
; GFX1100-NEXT: s_cselect_b32 s7, s6, s7
|
|
; GFX1100-NEXT: s_or_b32 s7, s7, s0
|
|
; GFX1100-NEXT: s_cmp_eq_u32 s1, 1
|
|
; GFX1100-NEXT: s_cselect_b32 s8, -1, 0
|
|
; GFX1100-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_3) | instid1(SALU_CYCLE_1)
|
|
; GFX1100-NEXT: s_and_b32 s9, s8, exec_lo
|
|
; GFX1100-NEXT: s_cselect_b32 s4, s7, s4
|
|
; GFX1100-NEXT: s_cmp_eq_u32 s1, 3
|
|
; GFX1100-NEXT: s_cselect_b32 s9, -1, 0
|
|
; GFX1100-NEXT: s_and_b32 s10, s9, exec_lo
|
|
; GFX1100-NEXT: s_cselect_b32 s6, s7, s6
|
|
; GFX1100-NEXT: s_cmp_eq_u32 s1, 2
|
|
; GFX1100-NEXT: s_cselect_b32 s10, -1, 0
|
|
; GFX1100-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_4) | instid1(SALU_CYCLE_1)
|
|
; GFX1100-NEXT: s_and_b32 s11, s10, exec_lo
|
|
; GFX1100-NEXT: s_cselect_b32 s5, s7, s5
|
|
; GFX1100-NEXT: s_cmp_eq_u32 s1, 0
|
|
; GFX1100-NEXT: s_cselect_b32 s3, s7, s3
|
|
; GFX1100-NEXT: s_or_b32 s7, s10, s8
|
|
; GFX1100-NEXT: s_or_b32 s7, s9, s7
|
|
; GFX1100-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
|
|
; GFX1100-NEXT: s_and_b32 s7, s7, exec_lo
|
|
; GFX1100-NEXT: s_cselect_b32 s2, 0, s2
|
|
; GFX1100-NEXT: s_cbranch_vccnz .LBB0_1
|
|
; GFX1100-NEXT: ; %bb.2: ; %DummyReturnBlock
|
|
; GFX1100-NEXT: s_endpgm
|
|
entry:
|
|
%init = insertelement <4 x i32> zeroinitializer, i32 0, i64 0
|
|
br label %for.body
|
|
|
|
for.body: ; preds = %for.body, %entry
|
|
%x1 = phi <4 x i32> [ %init, %entry ], [ %i4, %for.body ]
|
|
%x2 = phi <4 x i32> [ zeroinitializer, %entry ], [ %i2, %for.body ]
|
|
%idxprom = zext i32 %q to i64
|
|
%e1 = extractelement <4 x i32> %x2, i64 %idxprom
|
|
%add = or i32 %e1, %p
|
|
%i2 = insertelement <4 x i32> %x2, i32 %add, i64 %idxprom
|
|
%e3 = extractelement <4 x i32> %x1, i64 %idxprom
|
|
%i4 = insertelement <4 x i32> %x1, i32 %e3, i64 0
|
|
br label %for.body
|
|
}
|
|
|