
Since e39f6c1844fab59c638d8059a6cf139adb42279a opt will infer the correct datalayout when given a triple. Avoid explicitly specifying it in tests that depend on the AMDGPU target being present to avoid the string becoming out of sync with the TargetInfo value. Only tests with REQUIRES: amdgpu-registered-target or a local lit.cfg were updated to ensure that tests for non-target-specific passes that happen to use the AMDGPU layout still pass when building with a limited set of targets. Reviewed By: shiltian, arsenm Pull Request: https://github.com/llvm/llvm-project/pull/137921
1343 lines
82 KiB
LLVM
1343 lines
82 KiB
LLVM
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
|
|
; RUN: llc -mtriple=amdgcn -mcpu=gfx942 < %s | FileCheck -check-prefix=SDAG-GFX942 %s
|
|
; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 < %s | FileCheck -check-prefix=SDAG-GFX1100 %s
|
|
; RUN: llc -mtriple=amdgcn -mcpu=gfx942 -global-isel=1 -global-isel-abort=2 < %s | FileCheck -check-prefix=GISEL-GFX942 %s
|
|
; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -global-isel=1 -global-isel-abort=2 < %s | FileCheck -check-prefix=GISEL-GFX1100 %s
|
|
|
|
; Note: if you're adding tests here, also add them to
|
|
; lower-buffer-fat-pointers-mem-transfer.ll to verify the IR produced by
|
|
; the lowering.
|
|
;
|
|
; This file is a sanity check to make sure that the code generated
|
|
; for buffer-related memcpy() calls turns into something reasonable in
|
|
; the backend, despite the wide intermediate vectors
|
|
|
|
target triple = "amdgcn--"
|
|
|
|
;; memcpy
|
|
|
|
declare void @llvm.memcpy.p7.p7.i32(ptr addrspace(7), ptr addrspace(7), i32, i1)
|
|
|
|
define amdgpu_kernel void @memcpy_known(ptr addrspace(7) inreg %src, ptr addrspace(7) inreg %dst) {
|
|
; SDAG-LABEL: memcpy_known:
|
|
; SDAG: ; %bb.0:
|
|
; SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; SDAG-NEXT: s_mov_b32 s7, s24
|
|
; SDAG-NEXT: s_mov_b32 s6, s23
|
|
; SDAG-NEXT: s_mov_b32 s5, s22
|
|
; SDAG-NEXT: s_mov_b32 s4, s21
|
|
; SDAG-NEXT: s_mov_b32 s8, 0
|
|
; SDAG-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:60 ; 4-byte Folded Spill
|
|
; SDAG-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:56 ; 4-byte Folded Spill
|
|
; SDAG-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:52 ; 4-byte Folded Spill
|
|
; SDAG-NEXT: buffer_store_dword v43, off, s[0:3], s32 offset:48 ; 4-byte Folded Spill
|
|
; SDAG-NEXT: buffer_store_dword v44, off, s[0:3], s32 offset:44 ; 4-byte Folded Spill
|
|
; SDAG-NEXT: buffer_store_dword v45, off, s[0:3], s32 offset:40 ; 4-byte Folded Spill
|
|
; SDAG-NEXT: buffer_store_dword v46, off, s[0:3], s32 offset:36 ; 4-byte Folded Spill
|
|
; SDAG-NEXT: buffer_store_dword v47, off, s[0:3], s32 offset:32 ; 4-byte Folded Spill
|
|
; SDAG-NEXT: buffer_store_dword v56, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill
|
|
; SDAG-NEXT: buffer_store_dword v57, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill
|
|
; SDAG-NEXT: buffer_store_dword v58, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill
|
|
; SDAG-NEXT: buffer_store_dword v59, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill
|
|
; SDAG-NEXT: buffer_store_dword v60, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill
|
|
; SDAG-NEXT: buffer_store_dword v61, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill
|
|
; SDAG-NEXT: buffer_store_dword v62, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill
|
|
; SDAG-NEXT: buffer_store_dword v63, off, s[0:3], s32 ; 4-byte Folded Spill
|
|
; SDAG-NEXT: .LBB0_1: ; %load-store-loop
|
|
; SDAG-NEXT: ; =>This Inner Loop Header: Depth=1
|
|
; SDAG-NEXT: s_add_i32 s9, s20, s8
|
|
; SDAG-NEXT: v_mov_b32_e32 v60, s9
|
|
; SDAG-NEXT: buffer_load_dwordx4 v[0:3], v60, s[16:19], 0 offen
|
|
; SDAG-NEXT: s_add_i32 s9, s25, s8
|
|
; SDAG-NEXT: s_addk_i32 s8, 0x100
|
|
; SDAG-NEXT: s_cmpk_lt_u32 s8, 0x2000
|
|
; SDAG-NEXT: s_waitcnt vmcnt(0)
|
|
; SDAG-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:64 ; 4-byte Folded Spill
|
|
; SDAG-NEXT: s_nop 0
|
|
; SDAG-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:68 ; 4-byte Folded Spill
|
|
; SDAG-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:72 ; 4-byte Folded Spill
|
|
; SDAG-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:76 ; 4-byte Folded Spill
|
|
; SDAG-NEXT: buffer_load_dwordx4 v[0:3], v60, s[16:19], 0 offen offset:16
|
|
; SDAG-NEXT: s_waitcnt vmcnt(0)
|
|
; SDAG-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:80 ; 4-byte Folded Spill
|
|
; SDAG-NEXT: s_nop 0
|
|
; SDAG-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:84 ; 4-byte Folded Spill
|
|
; SDAG-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:88 ; 4-byte Folded Spill
|
|
; SDAG-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:92 ; 4-byte Folded Spill
|
|
; SDAG-NEXT: buffer_load_dwordx4 v[8:11], v60, s[16:19], 0 offen offset:32
|
|
; SDAG-NEXT: buffer_load_dwordx4 v[12:15], v60, s[16:19], 0 offen offset:48
|
|
; SDAG-NEXT: buffer_load_dwordx4 v[16:19], v60, s[16:19], 0 offen offset:64
|
|
; SDAG-NEXT: buffer_load_dwordx4 v[20:23], v60, s[16:19], 0 offen offset:80
|
|
; SDAG-NEXT: buffer_load_dwordx4 v[24:27], v60, s[16:19], 0 offen offset:96
|
|
; SDAG-NEXT: buffer_load_dwordx4 v[28:31], v60, s[16:19], 0 offen offset:112
|
|
; SDAG-NEXT: buffer_load_dwordx4 v[32:35], v60, s[16:19], 0 offen offset:128
|
|
; SDAG-NEXT: buffer_load_dwordx4 v[36:39], v60, s[16:19], 0 offen offset:144
|
|
; SDAG-NEXT: buffer_load_dwordx4 v[48:51], v60, s[16:19], 0 offen offset:160
|
|
; SDAG-NEXT: buffer_load_dwordx4 v[52:55], v60, s[16:19], 0 offen offset:176
|
|
; SDAG-NEXT: buffer_load_dwordx4 v[40:43], v60, s[16:19], 0 offen offset:192
|
|
; SDAG-NEXT: buffer_load_dwordx4 v[44:47], v60, s[16:19], 0 offen offset:208
|
|
; SDAG-NEXT: buffer_load_dwordx4 v[56:59], v60, s[16:19], 0 offen offset:224
|
|
; SDAG-NEXT: s_nop 0
|
|
; SDAG-NEXT: buffer_load_dwordx4 v[60:63], v60, s[16:19], 0 offen offset:240
|
|
; SDAG-NEXT: s_nop 0
|
|
; SDAG-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:64 ; 4-byte Folded Reload
|
|
; SDAG-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:68 ; 4-byte Folded Reload
|
|
; SDAG-NEXT: buffer_load_dword v3, off, s[0:3], s32 offset:72 ; 4-byte Folded Reload
|
|
; SDAG-NEXT: buffer_load_dword v4, off, s[0:3], s32 offset:76 ; 4-byte Folded Reload
|
|
; SDAG-NEXT: v_mov_b32_e32 v0, s9
|
|
; SDAG-NEXT: s_waitcnt vmcnt(0)
|
|
; SDAG-NEXT: buffer_store_dwordx4 v[1:4], v0, s[4:7], 0 offen
|
|
; SDAG-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:80 ; 4-byte Folded Reload
|
|
; SDAG-NEXT: s_nop 0
|
|
; SDAG-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:84 ; 4-byte Folded Reload
|
|
; SDAG-NEXT: buffer_load_dword v3, off, s[0:3], s32 offset:88 ; 4-byte Folded Reload
|
|
; SDAG-NEXT: buffer_load_dword v4, off, s[0:3], s32 offset:92 ; 4-byte Folded Reload
|
|
; SDAG-NEXT: s_waitcnt vmcnt(0)
|
|
; SDAG-NEXT: buffer_store_dwordx4 v[1:4], v0, s[4:7], 0 offen offset:16
|
|
; SDAG-NEXT: buffer_store_dwordx4 v[8:11], v0, s[4:7], 0 offen offset:32
|
|
; SDAG-NEXT: buffer_store_dwordx4 v[12:15], v0, s[4:7], 0 offen offset:48
|
|
; SDAG-NEXT: buffer_store_dwordx4 v[16:19], v0, s[4:7], 0 offen offset:64
|
|
; SDAG-NEXT: buffer_store_dwordx4 v[20:23], v0, s[4:7], 0 offen offset:80
|
|
; SDAG-NEXT: buffer_store_dwordx4 v[24:27], v0, s[4:7], 0 offen offset:96
|
|
; SDAG-NEXT: buffer_store_dwordx4 v[28:31], v0, s[4:7], 0 offen offset:112
|
|
; SDAG-NEXT: buffer_store_dwordx4 v[32:35], v0, s[4:7], 0 offen offset:128
|
|
; SDAG-NEXT: buffer_store_dwordx4 v[36:39], v0, s[4:7], 0 offen offset:144
|
|
; SDAG-NEXT: buffer_store_dwordx4 v[48:51], v0, s[4:7], 0 offen offset:160
|
|
; SDAG-NEXT: buffer_store_dwordx4 v[52:55], v0, s[4:7], 0 offen offset:176
|
|
; SDAG-NEXT: buffer_store_dwordx4 v[40:43], v0, s[4:7], 0 offen offset:192
|
|
; SDAG-NEXT: buffer_store_dwordx4 v[44:47], v0, s[4:7], 0 offen offset:208
|
|
; SDAG-NEXT: buffer_store_dwordx4 v[56:59], v0, s[4:7], 0 offen offset:224
|
|
; SDAG-NEXT: buffer_store_dwordx4 v[60:63], v0, s[4:7], 0 offen offset:240
|
|
; SDAG-NEXT: s_cbranch_scc1 .LBB0_1
|
|
; SDAG-NEXT: ; %bb.2: ; %memcpy-split
|
|
; SDAG-NEXT: buffer_load_dword v63, off, s[0:3], s32 ; 4-byte Folded Reload
|
|
; SDAG-NEXT: buffer_load_dword v62, off, s[0:3], s32 offset:4 ; 4-byte Folded Reload
|
|
; SDAG-NEXT: buffer_load_dword v61, off, s[0:3], s32 offset:8 ; 4-byte Folded Reload
|
|
; SDAG-NEXT: buffer_load_dword v60, off, s[0:3], s32 offset:12 ; 4-byte Folded Reload
|
|
; SDAG-NEXT: buffer_load_dword v59, off, s[0:3], s32 offset:16 ; 4-byte Folded Reload
|
|
; SDAG-NEXT: buffer_load_dword v58, off, s[0:3], s32 offset:20 ; 4-byte Folded Reload
|
|
; SDAG-NEXT: buffer_load_dword v57, off, s[0:3], s32 offset:24 ; 4-byte Folded Reload
|
|
; SDAG-NEXT: buffer_load_dword v56, off, s[0:3], s32 offset:28 ; 4-byte Folded Reload
|
|
; SDAG-NEXT: buffer_load_dword v47, off, s[0:3], s32 offset:32 ; 4-byte Folded Reload
|
|
; SDAG-NEXT: buffer_load_dword v46, off, s[0:3], s32 offset:36 ; 4-byte Folded Reload
|
|
; SDAG-NEXT: buffer_load_dword v45, off, s[0:3], s32 offset:40 ; 4-byte Folded Reload
|
|
; SDAG-NEXT: buffer_load_dword v44, off, s[0:3], s32 offset:44 ; 4-byte Folded Reload
|
|
; SDAG-NEXT: buffer_load_dword v43, off, s[0:3], s32 offset:48 ; 4-byte Folded Reload
|
|
; SDAG-NEXT: buffer_load_dword v42, off, s[0:3], s32 offset:52 ; 4-byte Folded Reload
|
|
; SDAG-NEXT: buffer_load_dword v41, off, s[0:3], s32 offset:56 ; 4-byte Folded Reload
|
|
; SDAG-NEXT: buffer_load_dword v40, off, s[0:3], s32 offset:60 ; 4-byte Folded Reload
|
|
; SDAG-NEXT: s_waitcnt vmcnt(0)
|
|
; SDAG-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GISEL-LABEL: memcpy_known:
|
|
; GISEL: ; %bb.0:
|
|
; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GISEL-NEXT: s_mov_b32 s8, 0
|
|
; GISEL-NEXT: s_mov_b32 s4, s21
|
|
; GISEL-NEXT: s_mov_b32 s5, s22
|
|
; GISEL-NEXT: s_mov_b32 s6, s23
|
|
; GISEL-NEXT: s_mov_b32 s7, s24
|
|
; GISEL-NEXT: v_mov_b32_e32 v0, 0x2000
|
|
; GISEL-NEXT: v_mov_b32_e32 v1, s8
|
|
; GISEL-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:56 ; 4-byte Folded Spill
|
|
; GISEL-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:52 ; 4-byte Folded Spill
|
|
; GISEL-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:48 ; 4-byte Folded Spill
|
|
; GISEL-NEXT: buffer_store_dword v43, off, s[0:3], s32 offset:44 ; 4-byte Folded Spill
|
|
; GISEL-NEXT: buffer_store_dword v44, off, s[0:3], s32 offset:40 ; 4-byte Folded Spill
|
|
; GISEL-NEXT: buffer_store_dword v45, off, s[0:3], s32 offset:36 ; 4-byte Folded Spill
|
|
; GISEL-NEXT: buffer_store_dword v46, off, s[0:3], s32 offset:32 ; 4-byte Folded Spill
|
|
; GISEL-NEXT: buffer_store_dword v56, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill
|
|
; GISEL-NEXT: buffer_store_dword v57, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill
|
|
; GISEL-NEXT: buffer_store_dword v58, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill
|
|
; GISEL-NEXT: buffer_store_dword v59, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill
|
|
; GISEL-NEXT: buffer_store_dword v60, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill
|
|
; GISEL-NEXT: buffer_store_dword v61, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill
|
|
; GISEL-NEXT: buffer_store_dword v62, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill
|
|
; GISEL-NEXT: buffer_store_dword v63, off, s[0:3], s32 ; 4-byte Folded Spill
|
|
; GISEL-NEXT: .LBB0_1: ; %load-store-loop
|
|
; GISEL-NEXT: ; =>This Inner Loop Header: Depth=1
|
|
; GISEL-NEXT: v_add_u32_e32 v46, s20, v1
|
|
; GISEL-NEXT: buffer_load_dwordx4 v[2:5], v46, s[16:19], 0 offen
|
|
; GISEL-NEXT: s_waitcnt vmcnt(0)
|
|
; GISEL-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:76 ; 4-byte Folded Spill
|
|
; GISEL-NEXT: s_nop 0
|
|
; GISEL-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:80 ; 4-byte Folded Spill
|
|
; GISEL-NEXT: buffer_store_dword v4, off, s[0:3], s32 offset:84 ; 4-byte Folded Spill
|
|
; GISEL-NEXT: buffer_store_dword v5, off, s[0:3], s32 offset:88 ; 4-byte Folded Spill
|
|
; GISEL-NEXT: buffer_load_dwordx4 v[6:9], v46, s[16:19], 0 offen offset:16
|
|
; GISEL-NEXT: buffer_load_dwordx4 v[10:13], v46, s[16:19], 0 offen offset:32
|
|
; GISEL-NEXT: buffer_load_dwordx4 v[14:17], v46, s[16:19], 0 offen offset:48
|
|
; GISEL-NEXT: buffer_load_dwordx4 v[18:21], v46, s[16:19], 0 offen offset:64
|
|
; GISEL-NEXT: buffer_load_dwordx4 v[22:25], v46, s[16:19], 0 offen offset:80
|
|
; GISEL-NEXT: buffer_load_dwordx4 v[26:29], v46, s[16:19], 0 offen offset:96
|
|
; GISEL-NEXT: buffer_load_dwordx4 v[30:33], v46, s[16:19], 0 offen offset:112
|
|
; GISEL-NEXT: buffer_load_dwordx4 v[34:37], v46, s[16:19], 0 offen offset:128
|
|
; GISEL-NEXT: buffer_load_dwordx4 v[48:51], v46, s[16:19], 0 offen offset:144
|
|
; GISEL-NEXT: buffer_load_dwordx4 v[52:55], v46, s[16:19], 0 offen offset:160
|
|
; GISEL-NEXT: buffer_load_dwordx4 v[38:41], v46, s[16:19], 0 offen offset:176
|
|
; GISEL-NEXT: buffer_load_dwordx4 v[42:45], v46, s[16:19], 0 offen offset:192
|
|
; GISEL-NEXT: buffer_load_dwordx4 v[56:59], v46, s[16:19], 0 offen offset:208
|
|
; GISEL-NEXT: buffer_load_dwordx4 v[60:63], v46, s[16:19], 0 offen offset:224
|
|
; GISEL-NEXT: buffer_load_dwordx4 v[2:5], v46, s[16:19], 0 offen offset:240
|
|
; GISEL-NEXT: v_add_u32_e32 v46, s25, v1
|
|
; GISEL-NEXT: v_add_u32_e32 v1, 0x100, v1
|
|
; GISEL-NEXT: v_cmp_lt_u32_e32 vcc, v1, v0
|
|
; GISEL-NEXT: s_waitcnt vmcnt(0)
|
|
; GISEL-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:60 ; 4-byte Folded Spill
|
|
; GISEL-NEXT: s_nop 0
|
|
; GISEL-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:64 ; 4-byte Folded Spill
|
|
; GISEL-NEXT: buffer_store_dword v4, off, s[0:3], s32 offset:68 ; 4-byte Folded Spill
|
|
; GISEL-NEXT: buffer_store_dword v5, off, s[0:3], s32 offset:72 ; 4-byte Folded Spill
|
|
; GISEL-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:76 ; 4-byte Folded Reload
|
|
; GISEL-NEXT: s_nop 0
|
|
; GISEL-NEXT: buffer_load_dword v3, off, s[0:3], s32 offset:80 ; 4-byte Folded Reload
|
|
; GISEL-NEXT: buffer_load_dword v4, off, s[0:3], s32 offset:84 ; 4-byte Folded Reload
|
|
; GISEL-NEXT: buffer_load_dword v5, off, s[0:3], s32 offset:88 ; 4-byte Folded Reload
|
|
; GISEL-NEXT: s_waitcnt vmcnt(0)
|
|
; GISEL-NEXT: buffer_store_dwordx4 v[2:5], v46, s[4:7], 0 offen
|
|
; GISEL-NEXT: buffer_store_dwordx4 v[6:9], v46, s[4:7], 0 offen offset:16
|
|
; GISEL-NEXT: buffer_store_dwordx4 v[10:13], v46, s[4:7], 0 offen offset:32
|
|
; GISEL-NEXT: buffer_store_dwordx4 v[14:17], v46, s[4:7], 0 offen offset:48
|
|
; GISEL-NEXT: buffer_store_dwordx4 v[18:21], v46, s[4:7], 0 offen offset:64
|
|
; GISEL-NEXT: buffer_store_dwordx4 v[22:25], v46, s[4:7], 0 offen offset:80
|
|
; GISEL-NEXT: buffer_store_dwordx4 v[26:29], v46, s[4:7], 0 offen offset:96
|
|
; GISEL-NEXT: buffer_store_dwordx4 v[30:33], v46, s[4:7], 0 offen offset:112
|
|
; GISEL-NEXT: buffer_store_dwordx4 v[34:37], v46, s[4:7], 0 offen offset:128
|
|
; GISEL-NEXT: buffer_store_dwordx4 v[48:51], v46, s[4:7], 0 offen offset:144
|
|
; GISEL-NEXT: buffer_store_dwordx4 v[52:55], v46, s[4:7], 0 offen offset:160
|
|
; GISEL-NEXT: buffer_store_dwordx4 v[38:41], v46, s[4:7], 0 offen offset:176
|
|
; GISEL-NEXT: buffer_store_dwordx4 v[42:45], v46, s[4:7], 0 offen offset:192
|
|
; GISEL-NEXT: buffer_store_dwordx4 v[56:59], v46, s[4:7], 0 offen offset:208
|
|
; GISEL-NEXT: buffer_store_dwordx4 v[60:63], v46, s[4:7], 0 offen offset:224
|
|
; GISEL-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:60 ; 4-byte Folded Reload
|
|
; GISEL-NEXT: buffer_load_dword v3, off, s[0:3], s32 offset:64 ; 4-byte Folded Reload
|
|
; GISEL-NEXT: buffer_load_dword v4, off, s[0:3], s32 offset:68 ; 4-byte Folded Reload
|
|
; GISEL-NEXT: buffer_load_dword v5, off, s[0:3], s32 offset:72 ; 4-byte Folded Reload
|
|
; GISEL-NEXT: s_waitcnt vmcnt(0)
|
|
; GISEL-NEXT: buffer_store_dwordx4 v[2:5], v46, s[4:7], 0 offen offset:240
|
|
; GISEL-NEXT: s_cbranch_vccnz .LBB0_1
|
|
; GISEL-NEXT: ; %bb.2: ; %memcpy-split
|
|
; GISEL-NEXT: buffer_load_dword v63, off, s[0:3], s32 ; 4-byte Folded Reload
|
|
; GISEL-NEXT: buffer_load_dword v62, off, s[0:3], s32 offset:4 ; 4-byte Folded Reload
|
|
; GISEL-NEXT: buffer_load_dword v61, off, s[0:3], s32 offset:8 ; 4-byte Folded Reload
|
|
; GISEL-NEXT: buffer_load_dword v60, off, s[0:3], s32 offset:12 ; 4-byte Folded Reload
|
|
; GISEL-NEXT: buffer_load_dword v59, off, s[0:3], s32 offset:16 ; 4-byte Folded Reload
|
|
; GISEL-NEXT: buffer_load_dword v58, off, s[0:3], s32 offset:20 ; 4-byte Folded Reload
|
|
; GISEL-NEXT: buffer_load_dword v57, off, s[0:3], s32 offset:24 ; 4-byte Folded Reload
|
|
; GISEL-NEXT: buffer_load_dword v56, off, s[0:3], s32 offset:28 ; 4-byte Folded Reload
|
|
; GISEL-NEXT: buffer_load_dword v46, off, s[0:3], s32 offset:32 ; 4-byte Folded Reload
|
|
; GISEL-NEXT: buffer_load_dword v45, off, s[0:3], s32 offset:36 ; 4-byte Folded Reload
|
|
; GISEL-NEXT: buffer_load_dword v44, off, s[0:3], s32 offset:40 ; 4-byte Folded Reload
|
|
; GISEL-NEXT: buffer_load_dword v43, off, s[0:3], s32 offset:44 ; 4-byte Folded Reload
|
|
; GISEL-NEXT: buffer_load_dword v42, off, s[0:3], s32 offset:48 ; 4-byte Folded Reload
|
|
; GISEL-NEXT: buffer_load_dword v41, off, s[0:3], s32 offset:52 ; 4-byte Folded Reload
|
|
; GISEL-NEXT: buffer_load_dword v40, off, s[0:3], s32 offset:56 ; 4-byte Folded Reload
|
|
; GISEL-NEXT: s_waitcnt vmcnt(0)
|
|
; GISEL-NEXT: s_setpc_b64 s[30:31]
|
|
; SDAG-GFX942-LABEL: memcpy_known:
|
|
; SDAG-GFX942: ; %bb.3:
|
|
; SDAG-GFX942-NEXT: s_load_dwordx4 s[8:11], s[4:5], 0x0
|
|
; SDAG-GFX942-NEXT: s_load_dword s12, s[4:5], 0x10
|
|
; SDAG-GFX942-NEXT: s_waitcnt lgkmcnt(0)
|
|
; SDAG-GFX942-NEXT: s_branch .LBB0_0
|
|
; SDAG-GFX942-NEXT: .p2align 8
|
|
; SDAG-GFX942-NEXT: ; %bb.4:
|
|
; SDAG-GFX942-NEXT: .LBB0_0:
|
|
; SDAG-GFX942-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
|
|
; SDAG-GFX942-NEXT: s_load_dword s17, s[4:5], 0x34
|
|
; SDAG-GFX942-NEXT: s_load_dwordx4 s[8:11], s[4:5], 0x44
|
|
; SDAG-GFX942-NEXT: s_load_dword s12, s[4:5], 0x54
|
|
; SDAG-GFX942-NEXT: s_mov_b32 s16, 0
|
|
; SDAG-GFX942-NEXT: s_mov_b32 s5, s16
|
|
; SDAG-GFX942-NEXT: s_waitcnt lgkmcnt(0)
|
|
; SDAG-GFX942-NEXT: s_mov_b32 s4, s3
|
|
; SDAG-GFX942-NEXT: s_or_b64 s[6:7], s[4:5], s[16:17]
|
|
; SDAG-GFX942-NEXT: s_mov_b32 s17, s2
|
|
; SDAG-GFX942-NEXT: s_mov_b32 s2, s1
|
|
; SDAG-GFX942-NEXT: s_mov_b32 s3, s16
|
|
; SDAG-GFX942-NEXT: s_or_b64 s[4:5], s[2:3], s[16:17]
|
|
; SDAG-GFX942-NEXT: s_mov_b32 s17, s12
|
|
; SDAG-GFX942-NEXT: s_mov_b32 s2, s11
|
|
; SDAG-GFX942-NEXT: s_or_b64 s[14:15], s[2:3], s[16:17]
|
|
; SDAG-GFX942-NEXT: s_mov_b32 s17, s10
|
|
; SDAG-GFX942-NEXT: s_mov_b32 s2, s9
|
|
; SDAG-GFX942-NEXT: s_or_b64 s[12:13], s[2:3], s[16:17]
|
|
; SDAG-GFX942-NEXT: .LBB0_1: ; %load-store-loop
|
|
; SDAG-GFX942-NEXT: ; =>This Inner Loop Header: Depth=1
|
|
; SDAG-GFX942-NEXT: s_add_i32 s1, s0, s16
|
|
; SDAG-GFX942-NEXT: v_mov_b32_e32 v60, s1
|
|
; SDAG-GFX942-NEXT: buffer_load_dwordx4 v[8:11], v60, s[4:7], 0 offen
|
|
; SDAG-GFX942-NEXT: buffer_load_dwordx4 v[4:7], v60, s[4:7], 0 offen offset:16
|
|
; SDAG-GFX942-NEXT: buffer_load_dwordx4 v[12:15], v60, s[4:7], 0 offen offset:32
|
|
; SDAG-GFX942-NEXT: s_add_i32 s2, s8, s16
|
|
; SDAG-GFX942-NEXT: v_mov_b32_e32 v0, s2
|
|
; SDAG-GFX942-NEXT: s_addk_i32 s16, 0x100
|
|
; SDAG-GFX942-NEXT: s_cmpk_lt_u32 s16, 0x2000
|
|
; SDAG-GFX942-NEXT: s_waitcnt vmcnt(0)
|
|
; SDAG-GFX942-NEXT: v_accvgpr_write_b32 a0, v15 ; Reload Reuse
|
|
; SDAG-GFX942-NEXT: v_accvgpr_write_b32 a1, v14 ; Reload Reuse
|
|
; SDAG-GFX942-NEXT: v_accvgpr_write_b32 a2, v13 ; Reload Reuse
|
|
; SDAG-GFX942-NEXT: v_accvgpr_write_b32 a3, v12 ; Reload Reuse
|
|
; SDAG-GFX942-NEXT: buffer_load_dwordx4 v[12:15], v60, s[4:7], 0 offen offset:48
|
|
; SDAG-GFX942-NEXT: buffer_load_dwordx4 v[16:19], v60, s[4:7], 0 offen offset:64
|
|
; SDAG-GFX942-NEXT: buffer_load_dwordx4 v[20:23], v60, s[4:7], 0 offen offset:80
|
|
; SDAG-GFX942-NEXT: buffer_load_dwordx4 v[24:27], v60, s[4:7], 0 offen offset:96
|
|
; SDAG-GFX942-NEXT: buffer_load_dwordx4 v[28:31], v60, s[4:7], 0 offen offset:112
|
|
; SDAG-GFX942-NEXT: buffer_load_dwordx4 v[32:35], v60, s[4:7], 0 offen offset:128
|
|
; SDAG-GFX942-NEXT: buffer_load_dwordx4 v[36:39], v60, s[4:7], 0 offen offset:144
|
|
; SDAG-GFX942-NEXT: buffer_load_dwordx4 v[40:43], v60, s[4:7], 0 offen offset:160
|
|
; SDAG-GFX942-NEXT: buffer_load_dwordx4 v[44:47], v60, s[4:7], 0 offen offset:176
|
|
; SDAG-GFX942-NEXT: buffer_load_dwordx4 v[48:51], v60, s[4:7], 0 offen offset:192
|
|
; SDAG-GFX942-NEXT: buffer_load_dwordx4 v[52:55], v60, s[4:7], 0 offen offset:208
|
|
; SDAG-GFX942-NEXT: buffer_load_dwordx4 v[56:59], v60, s[4:7], 0 offen offset:224
|
|
; SDAG-GFX942-NEXT: s_nop 0
|
|
; SDAG-GFX942-NEXT: buffer_load_dwordx4 v[60:63], v60, s[4:7], 0 offen offset:240
|
|
; SDAG-GFX942-NEXT: s_nop 0
|
|
; SDAG-GFX942-NEXT: buffer_store_dwordx4 v[8:11], v0, s[12:15], 0 offen
|
|
; SDAG-GFX942-NEXT: buffer_store_dwordx4 v[4:7], v0, s[12:15], 0 offen offset:16
|
|
; SDAG-GFX942-NEXT: s_nop 1
|
|
; SDAG-GFX942-NEXT: v_accvgpr_read_b32 v5, a0 ; Reload Reuse
|
|
; SDAG-GFX942-NEXT: v_accvgpr_read_b32 v4, a1 ; Reload Reuse
|
|
; SDAG-GFX942-NEXT: v_accvgpr_read_b32 v3, a2 ; Reload Reuse
|
|
; SDAG-GFX942-NEXT: v_accvgpr_read_b32 v2, a3 ; Reload Reuse
|
|
; SDAG-GFX942-NEXT: buffer_store_dwordx4 v[2:5], v0, s[12:15], 0 offen offset:32
|
|
; SDAG-GFX942-NEXT: s_waitcnt vmcnt(15)
|
|
; SDAG-GFX942-NEXT: buffer_store_dwordx4 v[12:15], v0, s[12:15], 0 offen offset:48
|
|
; SDAG-GFX942-NEXT: s_waitcnt vmcnt(15)
|
|
; SDAG-GFX942-NEXT: buffer_store_dwordx4 v[16:19], v0, s[12:15], 0 offen offset:64
|
|
; SDAG-GFX942-NEXT: s_waitcnt vmcnt(15)
|
|
; SDAG-GFX942-NEXT: buffer_store_dwordx4 v[20:23], v0, s[12:15], 0 offen offset:80
|
|
; SDAG-GFX942-NEXT: s_waitcnt vmcnt(15)
|
|
; SDAG-GFX942-NEXT: buffer_store_dwordx4 v[24:27], v0, s[12:15], 0 offen offset:96
|
|
; SDAG-GFX942-NEXT: s_waitcnt vmcnt(15)
|
|
; SDAG-GFX942-NEXT: buffer_store_dwordx4 v[28:31], v0, s[12:15], 0 offen offset:112
|
|
; SDAG-GFX942-NEXT: s_waitcnt vmcnt(15)
|
|
; SDAG-GFX942-NEXT: buffer_store_dwordx4 v[32:35], v0, s[12:15], 0 offen offset:128
|
|
; SDAG-GFX942-NEXT: s_waitcnt vmcnt(15)
|
|
; SDAG-GFX942-NEXT: buffer_store_dwordx4 v[36:39], v0, s[12:15], 0 offen offset:144
|
|
; SDAG-GFX942-NEXT: s_waitcnt vmcnt(15)
|
|
; SDAG-GFX942-NEXT: buffer_store_dwordx4 v[40:43], v0, s[12:15], 0 offen offset:160
|
|
; SDAG-GFX942-NEXT: s_waitcnt vmcnt(15)
|
|
; SDAG-GFX942-NEXT: buffer_store_dwordx4 v[44:47], v0, s[12:15], 0 offen offset:176
|
|
; SDAG-GFX942-NEXT: s_waitcnt vmcnt(15)
|
|
; SDAG-GFX942-NEXT: buffer_store_dwordx4 v[48:51], v0, s[12:15], 0 offen offset:192
|
|
; SDAG-GFX942-NEXT: s_waitcnt vmcnt(15)
|
|
; SDAG-GFX942-NEXT: buffer_store_dwordx4 v[52:55], v0, s[12:15], 0 offen offset:208
|
|
; SDAG-GFX942-NEXT: s_waitcnt vmcnt(15)
|
|
; SDAG-GFX942-NEXT: buffer_store_dwordx4 v[56:59], v0, s[12:15], 0 offen offset:224
|
|
; SDAG-GFX942-NEXT: s_waitcnt vmcnt(15)
|
|
; SDAG-GFX942-NEXT: buffer_store_dwordx4 v[60:63], v0, s[12:15], 0 offen offset:240
|
|
; SDAG-GFX942-NEXT: s_cbranch_scc1 .LBB0_1
|
|
; SDAG-GFX942-NEXT: ; %bb.2: ; %memcpy-split
|
|
; SDAG-GFX942-NEXT: s_endpgm
|
|
;
|
|
; SDAG-GFX1100-LABEL: memcpy_known:
|
|
; SDAG-GFX1100: ; %bb.0:
|
|
; SDAG-GFX1100-NEXT: s_clause 0x3
|
|
; SDAG-GFX1100-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
|
|
; SDAG-GFX1100-NEXT: s_load_b32 s17, s[4:5], 0x34
|
|
; SDAG-GFX1100-NEXT: s_load_b128 s[8:11], s[4:5], 0x44
|
|
; SDAG-GFX1100-NEXT: s_load_b32 s18, s[4:5], 0x54
|
|
; SDAG-GFX1100-NEXT: s_mov_b32 s16, 0
|
|
; SDAG-GFX1100-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
|
|
; SDAG-GFX1100-NEXT: s_mov_b32 s5, s16
|
|
; SDAG-GFX1100-NEXT: s_mov_b32 s13, s16
|
|
; SDAG-GFX1100-NEXT: s_mov_b32 s15, s16
|
|
; SDAG-GFX1100-NEXT: s_waitcnt lgkmcnt(0)
|
|
; SDAG-GFX1100-NEXT: s_mov_b32 s4, s3
|
|
; SDAG-GFX1100-NEXT: s_mov_b32 s12, s1
|
|
; SDAG-GFX1100-NEXT: s_or_b64 s[6:7], s[4:5], s[16:17]
|
|
; SDAG-GFX1100-NEXT: s_mov_b32 s17, s2
|
|
; SDAG-GFX1100-NEXT: s_mov_b32 s14, s11
|
|
; SDAG-GFX1100-NEXT: s_or_b64 s[4:5], s[12:13], s[16:17]
|
|
; SDAG-GFX1100-NEXT: s_mov_b32 s17, s18
|
|
; SDAG-GFX1100-NEXT: s_mov_b32 s2, s9
|
|
; SDAG-GFX1100-NEXT: s_or_b64 s[14:15], s[14:15], s[16:17]
|
|
; SDAG-GFX1100-NEXT: s_mov_b32 s17, s10
|
|
; SDAG-GFX1100-NEXT: s_mov_b32 s3, s16
|
|
; SDAG-GFX1100-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
|
|
; SDAG-GFX1100-NEXT: s_or_b64 s[12:13], s[2:3], s[16:17]
|
|
; SDAG-GFX1100-NEXT: .LBB0_1: ; %load-store-loop
|
|
; SDAG-GFX1100-NEXT: ; =>This Inner Loop Header: Depth=1
|
|
; SDAG-GFX1100-NEXT: s_add_i32 s1, s0, s16
|
|
; SDAG-GFX1100-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
|
|
; SDAG-GFX1100-NEXT: v_mov_b32_e32 v60, s1
|
|
; SDAG-GFX1100-NEXT: s_add_i32 s1, s8, s16
|
|
; SDAG-GFX1100-NEXT: s_addk_i32 s16, 0x100
|
|
; SDAG-GFX1100-NEXT: v_mov_b32_e32 v64, s1
|
|
; SDAG-GFX1100-NEXT: s_cmpk_lt_u32 s16, 0x2000
|
|
; SDAG-GFX1100-NEXT: s_clause 0xf
|
|
; SDAG-GFX1100-NEXT: buffer_load_b128 v[0:3], v60, s[4:7], 0 offen
|
|
; SDAG-GFX1100-NEXT: buffer_load_b128 v[4:7], v60, s[4:7], 0 offen offset:16
|
|
; SDAG-GFX1100-NEXT: buffer_load_b128 v[8:11], v60, s[4:7], 0 offen offset:32
|
|
; SDAG-GFX1100-NEXT: buffer_load_b128 v[12:15], v60, s[4:7], 0 offen offset:48
|
|
; SDAG-GFX1100-NEXT: buffer_load_b128 v[16:19], v60, s[4:7], 0 offen offset:64
|
|
; SDAG-GFX1100-NEXT: buffer_load_b128 v[20:23], v60, s[4:7], 0 offen offset:80
|
|
; SDAG-GFX1100-NEXT: buffer_load_b128 v[24:27], v60, s[4:7], 0 offen offset:96
|
|
; SDAG-GFX1100-NEXT: buffer_load_b128 v[28:31], v60, s[4:7], 0 offen offset:112
|
|
; SDAG-GFX1100-NEXT: buffer_load_b128 v[32:35], v60, s[4:7], 0 offen offset:128
|
|
; SDAG-GFX1100-NEXT: buffer_load_b128 v[36:39], v60, s[4:7], 0 offen offset:144
|
|
; SDAG-GFX1100-NEXT: buffer_load_b128 v[40:43], v60, s[4:7], 0 offen offset:160
|
|
; SDAG-GFX1100-NEXT: buffer_load_b128 v[44:47], v60, s[4:7], 0 offen offset:176
|
|
; SDAG-GFX1100-NEXT: buffer_load_b128 v[48:51], v60, s[4:7], 0 offen offset:192
|
|
; SDAG-GFX1100-NEXT: buffer_load_b128 v[52:55], v60, s[4:7], 0 offen offset:208
|
|
; SDAG-GFX1100-NEXT: buffer_load_b128 v[56:59], v60, s[4:7], 0 offen offset:224
|
|
; SDAG-GFX1100-NEXT: buffer_load_b128 v[60:63], v60, s[4:7], 0 offen offset:240
|
|
; SDAG-GFX1100-NEXT: s_waitcnt vmcnt(15)
|
|
; SDAG-GFX1100-NEXT: buffer_store_b128 v[0:3], v64, s[12:15], 0 offen
|
|
; SDAG-GFX1100-NEXT: s_waitcnt vmcnt(14)
|
|
; SDAG-GFX1100-NEXT: buffer_store_b128 v[4:7], v64, s[12:15], 0 offen offset:16
|
|
; SDAG-GFX1100-NEXT: s_waitcnt vmcnt(13)
|
|
; SDAG-GFX1100-NEXT: buffer_store_b128 v[8:11], v64, s[12:15], 0 offen offset:32
|
|
; SDAG-GFX1100-NEXT: s_waitcnt vmcnt(12)
|
|
; SDAG-GFX1100-NEXT: buffer_store_b128 v[12:15], v64, s[12:15], 0 offen offset:48
|
|
; SDAG-GFX1100-NEXT: s_waitcnt vmcnt(11)
|
|
; SDAG-GFX1100-NEXT: buffer_store_b128 v[16:19], v64, s[12:15], 0 offen offset:64
|
|
; SDAG-GFX1100-NEXT: s_waitcnt vmcnt(10)
|
|
; SDAG-GFX1100-NEXT: buffer_store_b128 v[20:23], v64, s[12:15], 0 offen offset:80
|
|
; SDAG-GFX1100-NEXT: s_waitcnt vmcnt(9)
|
|
; SDAG-GFX1100-NEXT: buffer_store_b128 v[24:27], v64, s[12:15], 0 offen offset:96
|
|
; SDAG-GFX1100-NEXT: s_waitcnt vmcnt(8)
|
|
; SDAG-GFX1100-NEXT: buffer_store_b128 v[28:31], v64, s[12:15], 0 offen offset:112
|
|
; SDAG-GFX1100-NEXT: s_waitcnt vmcnt(7)
|
|
; SDAG-GFX1100-NEXT: buffer_store_b128 v[32:35], v64, s[12:15], 0 offen offset:128
|
|
; SDAG-GFX1100-NEXT: s_waitcnt vmcnt(6)
|
|
; SDAG-GFX1100-NEXT: buffer_store_b128 v[36:39], v64, s[12:15], 0 offen offset:144
|
|
; SDAG-GFX1100-NEXT: s_waitcnt vmcnt(5)
|
|
; SDAG-GFX1100-NEXT: buffer_store_b128 v[40:43], v64, s[12:15], 0 offen offset:160
|
|
; SDAG-GFX1100-NEXT: s_waitcnt vmcnt(4)
|
|
; SDAG-GFX1100-NEXT: buffer_store_b128 v[44:47], v64, s[12:15], 0 offen offset:176
|
|
; SDAG-GFX1100-NEXT: s_waitcnt vmcnt(3)
|
|
; SDAG-GFX1100-NEXT: buffer_store_b128 v[48:51], v64, s[12:15], 0 offen offset:192
|
|
; SDAG-GFX1100-NEXT: s_waitcnt vmcnt(2)
|
|
; SDAG-GFX1100-NEXT: buffer_store_b128 v[52:55], v64, s[12:15], 0 offen offset:208
|
|
; SDAG-GFX1100-NEXT: s_waitcnt vmcnt(1)
|
|
; SDAG-GFX1100-NEXT: buffer_store_b128 v[56:59], v64, s[12:15], 0 offen offset:224
|
|
; SDAG-GFX1100-NEXT: s_waitcnt vmcnt(0)
|
|
; SDAG-GFX1100-NEXT: buffer_store_b128 v[60:63], v64, s[12:15], 0 offen offset:240
|
|
; SDAG-GFX1100-NEXT: s_cbranch_scc1 .LBB0_1
|
|
; SDAG-GFX1100-NEXT: ; %bb.2: ; %memcpy-split
|
|
; SDAG-GFX1100-NEXT: s_endpgm
|
|
;
|
|
; GISEL-GFX942-LABEL: memcpy_known:
|
|
; GISEL-GFX942: ; %bb.0:
|
|
; GISEL-GFX942-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
|
|
; GISEL-GFX942-NEXT: s_load_dword s11, s[4:5], 0x34
|
|
; GISEL-GFX942-NEXT: s_mov_b32 s7, 0
|
|
; GISEL-GFX942-NEXT: s_load_dwordx4 s[12:15], s[4:5], 0x44
|
|
; GISEL-GFX942-NEXT: s_mov_b32 s8, s7
|
|
; GISEL-GFX942-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GISEL-GFX942-NEXT: s_mov_b32 s6, s1
|
|
; GISEL-GFX942-NEXT: s_mov_b32 s9, s2
|
|
; GISEL-GFX942-NEXT: s_or_b64 s[8:9], s[6:7], s[8:9]
|
|
; GISEL-GFX942-NEXT: s_mov_b32 s6, s3
|
|
; GISEL-GFX942-NEXT: s_load_dword s3, s[4:5], 0x54
|
|
; GISEL-GFX942-NEXT: s_mov_b32 s10, s7
|
|
; GISEL-GFX942-NEXT: s_or_b64 s[10:11], s[6:7], s[10:11]
|
|
; GISEL-GFX942-NEXT: s_mov_b32 s6, s13
|
|
; GISEL-GFX942-NEXT: s_mov_b32 s4, s7
|
|
; GISEL-GFX942-NEXT: s_mov_b32 s5, s14
|
|
; GISEL-GFX942-NEXT: s_mov_b32 s16, 0
|
|
; GISEL-GFX942-NEXT: s_or_b64 s[4:5], s[6:7], s[4:5]
|
|
; GISEL-GFX942-NEXT: s_mov_b32 s6, s15
|
|
; GISEL-GFX942-NEXT: s_mov_b32 s2, s7
|
|
; GISEL-GFX942-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GISEL-GFX942-NEXT: s_or_b64 s[6:7], s[6:7], s[2:3]
|
|
; GISEL-GFX942-NEXT: v_mov_b32_e32 v0, 0x2000
|
|
; GISEL-GFX942-NEXT: v_mov_b32_e32 v1, s16
|
|
; GISEL-GFX942-NEXT: .LBB0_1: ; %load-store-loop
|
|
; GISEL-GFX942-NEXT: ; =>This Inner Loop Header: Depth=1
|
|
; GISEL-GFX942-NEXT: v_add_u32_e32 v62, s0, v1
|
|
; GISEL-GFX942-NEXT: buffer_load_dwordx4 v[2:5], v62, s[8:11], 0 offen
|
|
; GISEL-GFX942-NEXT: buffer_load_dwordx4 v[6:9], v62, s[8:11], 0 offen offset:16
|
|
; GISEL-GFX942-NEXT: buffer_load_dwordx4 v[10:13], v62, s[8:11], 0 offen offset:32
|
|
; GISEL-GFX942-NEXT: v_add_u32_e32 v63, s12, v1
|
|
; GISEL-GFX942-NEXT: v_add_u32_e32 v1, 0x100, v1
|
|
; GISEL-GFX942-NEXT: v_cmp_lt_u32_e32 vcc, v1, v0
|
|
; GISEL-GFX942-NEXT: s_waitcnt vmcnt(0)
|
|
; GISEL-GFX942-NEXT: v_accvgpr_write_b32 a0, v13 ; Reload Reuse
|
|
; GISEL-GFX942-NEXT: v_accvgpr_write_b32 a1, v12 ; Reload Reuse
|
|
; GISEL-GFX942-NEXT: v_accvgpr_write_b32 a2, v11 ; Reload Reuse
|
|
; GISEL-GFX942-NEXT: v_accvgpr_write_b32 a3, v10 ; Reload Reuse
|
|
; GISEL-GFX942-NEXT: buffer_load_dwordx4 v[14:17], v62, s[8:11], 0 offen offset:48
|
|
; GISEL-GFX942-NEXT: buffer_load_dwordx4 v[18:21], v62, s[8:11], 0 offen offset:64
|
|
; GISEL-GFX942-NEXT: buffer_load_dwordx4 v[22:25], v62, s[8:11], 0 offen offset:80
|
|
; GISEL-GFX942-NEXT: buffer_load_dwordx4 v[26:29], v62, s[8:11], 0 offen offset:96
|
|
; GISEL-GFX942-NEXT: buffer_load_dwordx4 v[30:33], v62, s[8:11], 0 offen offset:112
|
|
; GISEL-GFX942-NEXT: buffer_load_dwordx4 v[34:37], v62, s[8:11], 0 offen offset:128
|
|
; GISEL-GFX942-NEXT: buffer_load_dwordx4 v[38:41], v62, s[8:11], 0 offen offset:144
|
|
; GISEL-GFX942-NEXT: buffer_load_dwordx4 v[42:45], v62, s[8:11], 0 offen offset:160
|
|
; GISEL-GFX942-NEXT: buffer_load_dwordx4 v[46:49], v62, s[8:11], 0 offen offset:176
|
|
; GISEL-GFX942-NEXT: buffer_load_dwordx4 v[50:53], v62, s[8:11], 0 offen offset:192
|
|
; GISEL-GFX942-NEXT: buffer_load_dwordx4 v[54:57], v62, s[8:11], 0 offen offset:208
|
|
; GISEL-GFX942-NEXT: buffer_load_dwordx4 v[58:61], v62, s[8:11], 0 offen offset:224
|
|
; GISEL-GFX942-NEXT: buffer_load_dwordx4 v[10:13], v62, s[8:11], 0 offen offset:240
|
|
; GISEL-GFX942-NEXT: s_nop 0
|
|
; GISEL-GFX942-NEXT: buffer_store_dwordx4 v[2:5], v63, s[4:7], 0 offen
|
|
; GISEL-GFX942-NEXT: buffer_store_dwordx4 v[6:9], v63, s[4:7], 0 offen offset:16
|
|
; GISEL-GFX942-NEXT: s_waitcnt vmcnt(2)
|
|
; GISEL-GFX942-NEXT: v_accvgpr_write_b32 a4, v13 ; Reload Reuse
|
|
; GISEL-GFX942-NEXT: v_accvgpr_read_b32 v5, a0 ; Reload Reuse
|
|
; GISEL-GFX942-NEXT: v_accvgpr_read_b32 v4, a1 ; Reload Reuse
|
|
; GISEL-GFX942-NEXT: v_accvgpr_read_b32 v3, a2 ; Reload Reuse
|
|
; GISEL-GFX942-NEXT: v_accvgpr_read_b32 v2, a3 ; Reload Reuse
|
|
; GISEL-GFX942-NEXT: v_accvgpr_write_b32 a5, v12 ; Reload Reuse
|
|
; GISEL-GFX942-NEXT: v_accvgpr_write_b32 a6, v11 ; Reload Reuse
|
|
; GISEL-GFX942-NEXT: v_accvgpr_write_b32 a7, v10 ; Reload Reuse
|
|
; GISEL-GFX942-NEXT: buffer_store_dwordx4 v[2:5], v63, s[4:7], 0 offen offset:32
|
|
; GISEL-GFX942-NEXT: buffer_store_dwordx4 v[14:17], v63, s[4:7], 0 offen offset:48
|
|
; GISEL-GFX942-NEXT: buffer_store_dwordx4 v[18:21], v63, s[4:7], 0 offen offset:64
|
|
; GISEL-GFX942-NEXT: buffer_store_dwordx4 v[22:25], v63, s[4:7], 0 offen offset:80
|
|
; GISEL-GFX942-NEXT: buffer_store_dwordx4 v[26:29], v63, s[4:7], 0 offen offset:96
|
|
; GISEL-GFX942-NEXT: buffer_store_dwordx4 v[30:33], v63, s[4:7], 0 offen offset:112
|
|
; GISEL-GFX942-NEXT: buffer_store_dwordx4 v[34:37], v63, s[4:7], 0 offen offset:128
|
|
; GISEL-GFX942-NEXT: buffer_store_dwordx4 v[38:41], v63, s[4:7], 0 offen offset:144
|
|
; GISEL-GFX942-NEXT: buffer_store_dwordx4 v[42:45], v63, s[4:7], 0 offen offset:160
|
|
; GISEL-GFX942-NEXT: buffer_store_dwordx4 v[46:49], v63, s[4:7], 0 offen offset:176
|
|
; GISEL-GFX942-NEXT: buffer_store_dwordx4 v[50:53], v63, s[4:7], 0 offen offset:192
|
|
; GISEL-GFX942-NEXT: buffer_store_dwordx4 v[54:57], v63, s[4:7], 0 offen offset:208
|
|
; GISEL-GFX942-NEXT: buffer_store_dwordx4 v[58:61], v63, s[4:7], 0 offen offset:224
|
|
; GISEL-GFX942-NEXT: v_accvgpr_read_b32 v5, a4 ; Reload Reuse
|
|
; GISEL-GFX942-NEXT: v_accvgpr_read_b32 v4, a5 ; Reload Reuse
|
|
; GISEL-GFX942-NEXT: v_accvgpr_read_b32 v3, a6 ; Reload Reuse
|
|
; GISEL-GFX942-NEXT: v_accvgpr_read_b32 v2, a7 ; Reload Reuse
|
|
; GISEL-GFX942-NEXT: buffer_store_dwordx4 v[2:5], v63, s[4:7], 0 offen offset:240
|
|
; GISEL-GFX942-NEXT: s_cbranch_vccnz .LBB0_1
|
|
; GISEL-GFX942-NEXT: ; %bb.2: ; %memcpy-split
|
|
; GISEL-GFX942-NEXT: s_endpgm
|
|
;
|
|
; GISEL-GFX1100-LABEL: memcpy_known:
|
|
; GISEL-GFX1100: ; %bb.0:
|
|
; GISEL-GFX1100-NEXT: s_clause 0x3
|
|
; GISEL-GFX1100-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
|
|
; GISEL-GFX1100-NEXT: s_load_b128 s[8:11], s[4:5], 0x44
|
|
; GISEL-GFX1100-NEXT: s_load_b32 s7, s[4:5], 0x34
|
|
; GISEL-GFX1100-NEXT: s_load_b32 s15, s[4:5], 0x54
|
|
; GISEL-GFX1100-NEXT: s_mov_b32 s17, 0
|
|
; GISEL-GFX1100-NEXT: s_mov_b32 s12, 0
|
|
; GISEL-GFX1100-NEXT: s_mov_b32 s4, s17
|
|
; GISEL-GFX1100-NEXT: s_mov_b32 s6, s17
|
|
; GISEL-GFX1100-NEXT: v_mov_b32_e32 v0, s12
|
|
; GISEL-GFX1100-NEXT: s_mov_b32 s14, s17
|
|
; GISEL-GFX1100-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GISEL-GFX1100-NEXT: s_mov_b32 s16, s1
|
|
; GISEL-GFX1100-NEXT: s_mov_b32 s5, s2
|
|
; GISEL-GFX1100-NEXT: s_mov_b32 s2, s17
|
|
; GISEL-GFX1100-NEXT: s_or_b64 s[4:5], s[16:17], s[4:5]
|
|
; GISEL-GFX1100-NEXT: s_mov_b32 s16, s3
|
|
; GISEL-GFX1100-NEXT: s_mov_b32 s3, s10
|
|
; GISEL-GFX1100-NEXT: s_or_b64 s[6:7], s[16:17], s[6:7]
|
|
; GISEL-GFX1100-NEXT: s_mov_b32 s16, s9
|
|
; GISEL-GFX1100-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1)
|
|
; GISEL-GFX1100-NEXT: s_or_b64 s[12:13], s[16:17], s[2:3]
|
|
; GISEL-GFX1100-NEXT: s_mov_b32 s16, s11
|
|
; GISEL-GFX1100-NEXT: s_or_b64 s[14:15], s[16:17], s[14:15]
|
|
; GISEL-GFX1100-NEXT: .LBB0_1: ; %load-store-loop
|
|
; GISEL-GFX1100-NEXT: ; =>This Inner Loop Header: Depth=1
|
|
; GISEL-GFX1100-NEXT: v_add_nc_u32_e32 v61, s0, v0
|
|
; GISEL-GFX1100-NEXT: v_add_nc_u32_e32 v65, s8, v0
|
|
; GISEL-GFX1100-NEXT: v_add_nc_u32_e32 v0, 0x100, v0
|
|
; GISEL-GFX1100-NEXT: s_clause 0xf
|
|
; GISEL-GFX1100-NEXT: buffer_load_b128 v[1:4], v61, s[4:7], 0 offen
|
|
; GISEL-GFX1100-NEXT: buffer_load_b128 v[5:8], v61, s[4:7], 0 offen offset:16
|
|
; GISEL-GFX1100-NEXT: buffer_load_b128 v[9:12], v61, s[4:7], 0 offen offset:32
|
|
; GISEL-GFX1100-NEXT: buffer_load_b128 v[13:16], v61, s[4:7], 0 offen offset:48
|
|
; GISEL-GFX1100-NEXT: buffer_load_b128 v[17:20], v61, s[4:7], 0 offen offset:64
|
|
; GISEL-GFX1100-NEXT: buffer_load_b128 v[21:24], v61, s[4:7], 0 offen offset:80
|
|
; GISEL-GFX1100-NEXT: buffer_load_b128 v[25:28], v61, s[4:7], 0 offen offset:96
|
|
; GISEL-GFX1100-NEXT: buffer_load_b128 v[29:32], v61, s[4:7], 0 offen offset:112
|
|
; GISEL-GFX1100-NEXT: buffer_load_b128 v[33:36], v61, s[4:7], 0 offen offset:128
|
|
; GISEL-GFX1100-NEXT: buffer_load_b128 v[37:40], v61, s[4:7], 0 offen offset:144
|
|
; GISEL-GFX1100-NEXT: buffer_load_b128 v[41:44], v61, s[4:7], 0 offen offset:160
|
|
; GISEL-GFX1100-NEXT: buffer_load_b128 v[45:48], v61, s[4:7], 0 offen offset:176
|
|
; GISEL-GFX1100-NEXT: buffer_load_b128 v[49:52], v61, s[4:7], 0 offen offset:192
|
|
; GISEL-GFX1100-NEXT: buffer_load_b128 v[53:56], v61, s[4:7], 0 offen offset:208
|
|
; GISEL-GFX1100-NEXT: buffer_load_b128 v[57:60], v61, s[4:7], 0 offen offset:224
|
|
; GISEL-GFX1100-NEXT: buffer_load_b128 v[61:64], v61, s[4:7], 0 offen offset:240
|
|
; GISEL-GFX1100-NEXT: s_waitcnt vmcnt(15)
|
|
; GISEL-GFX1100-NEXT: buffer_store_b128 v[1:4], v65, s[12:15], 0 offen
|
|
; GISEL-GFX1100-NEXT: s_waitcnt vmcnt(14)
|
|
; GISEL-GFX1100-NEXT: buffer_store_b128 v[5:8], v65, s[12:15], 0 offen offset:16
|
|
; GISEL-GFX1100-NEXT: s_waitcnt vmcnt(13)
|
|
; GISEL-GFX1100-NEXT: buffer_store_b128 v[9:12], v65, s[12:15], 0 offen offset:32
|
|
; GISEL-GFX1100-NEXT: s_waitcnt vmcnt(12)
|
|
; GISEL-GFX1100-NEXT: buffer_store_b128 v[13:16], v65, s[12:15], 0 offen offset:48
|
|
; GISEL-GFX1100-NEXT: s_waitcnt vmcnt(11)
|
|
; GISEL-GFX1100-NEXT: buffer_store_b128 v[17:20], v65, s[12:15], 0 offen offset:64
|
|
; GISEL-GFX1100-NEXT: s_waitcnt vmcnt(10)
|
|
; GISEL-GFX1100-NEXT: buffer_store_b128 v[21:24], v65, s[12:15], 0 offen offset:80
|
|
; GISEL-GFX1100-NEXT: s_waitcnt vmcnt(9)
|
|
; GISEL-GFX1100-NEXT: buffer_store_b128 v[25:28], v65, s[12:15], 0 offen offset:96
|
|
; GISEL-GFX1100-NEXT: s_waitcnt vmcnt(8)
|
|
; GISEL-GFX1100-NEXT: buffer_store_b128 v[29:32], v65, s[12:15], 0 offen offset:112
|
|
; GISEL-GFX1100-NEXT: s_waitcnt vmcnt(7)
|
|
; GISEL-GFX1100-NEXT: buffer_store_b128 v[33:36], v65, s[12:15], 0 offen offset:128
|
|
; GISEL-GFX1100-NEXT: s_waitcnt vmcnt(6)
|
|
; GISEL-GFX1100-NEXT: buffer_store_b128 v[37:40], v65, s[12:15], 0 offen offset:144
|
|
; GISEL-GFX1100-NEXT: s_waitcnt vmcnt(5)
|
|
; GISEL-GFX1100-NEXT: buffer_store_b128 v[41:44], v65, s[12:15], 0 offen offset:160
|
|
; GISEL-GFX1100-NEXT: s_waitcnt vmcnt(4)
|
|
; GISEL-GFX1100-NEXT: buffer_store_b128 v[45:48], v65, s[12:15], 0 offen offset:176
|
|
; GISEL-GFX1100-NEXT: s_waitcnt vmcnt(3)
|
|
; GISEL-GFX1100-NEXT: buffer_store_b128 v[49:52], v65, s[12:15], 0 offen offset:192
|
|
; GISEL-GFX1100-NEXT: s_waitcnt vmcnt(2)
|
|
; GISEL-GFX1100-NEXT: buffer_store_b128 v[53:56], v65, s[12:15], 0 offen offset:208
|
|
; GISEL-GFX1100-NEXT: s_waitcnt vmcnt(1)
|
|
; GISEL-GFX1100-NEXT: buffer_store_b128 v[57:60], v65, s[12:15], 0 offen offset:224
|
|
; GISEL-GFX1100-NEXT: s_waitcnt vmcnt(0)
|
|
; GISEL-GFX1100-NEXT: buffer_store_b128 v[61:64], v65, s[12:15], 0 offen offset:240
|
|
; GISEL-GFX1100-NEXT: v_cmp_gt_u32_e32 vcc_lo, 0x2000, v0
|
|
; GISEL-GFX1100-NEXT: s_cbranch_vccnz .LBB0_1
|
|
; GISEL-GFX1100-NEXT: ; %bb.2: ; %memcpy-split
|
|
; GISEL-GFX1100-NEXT: s_endpgm
|
|
call void @llvm.memcpy.p7.p7.i32(ptr addrspace(7) noundef nonnull align 16 %dst, ptr addrspace(7) noundef nonnull align 16 %src, i32 8192, i1 false)
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_kernel void @memcpy_known_medium(ptr addrspace(7) inreg %src, ptr addrspace(7) inreg %dst) {
|
|
; SDAG-LABEL: memcpy_known_medium:
|
|
; SDAG: ; %bb.0:
|
|
; SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; SDAG-NEXT: s_mov_b32 s7, s24
|
|
; SDAG-NEXT: s_mov_b32 s6, s23
|
|
; SDAG-NEXT: s_mov_b32 s5, s22
|
|
; SDAG-NEXT: s_mov_b32 s4, s21
|
|
; SDAG-NEXT: v_mov_b32_e32 v0, 0
|
|
; SDAG-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:52 ; 4-byte Folded Spill
|
|
; SDAG-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:48 ; 4-byte Folded Spill
|
|
; SDAG-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:44 ; 4-byte Folded Spill
|
|
; SDAG-NEXT: buffer_store_dword v43, off, s[0:3], s32 offset:40 ; 4-byte Folded Spill
|
|
; SDAG-NEXT: buffer_store_dword v44, off, s[0:3], s32 offset:36 ; 4-byte Folded Spill
|
|
; SDAG-NEXT: buffer_store_dword v45, off, s[0:3], s32 offset:32 ; 4-byte Folded Spill
|
|
; SDAG-NEXT: buffer_store_dword v56, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill
|
|
; SDAG-NEXT: buffer_store_dword v57, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill
|
|
; SDAG-NEXT: buffer_store_dword v58, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill
|
|
; SDAG-NEXT: buffer_store_dword v59, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill
|
|
; SDAG-NEXT: buffer_store_dword v60, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill
|
|
; SDAG-NEXT: buffer_store_dword v61, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill
|
|
; SDAG-NEXT: buffer_store_dword v62, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill
|
|
; SDAG-NEXT: buffer_store_dword v63, off, s[0:3], s32 ; 4-byte Folded Spill
|
|
; SDAG-NEXT: .LBB1_1: ; %load-store-loop
|
|
; SDAG-NEXT: ; =>This Inner Loop Header: Depth=1
|
|
; SDAG-NEXT: v_add_u32_e32 v45, s20, v0
|
|
; SDAG-NEXT: buffer_load_dwordx4 v[1:4], v45, s[16:19], 0 offen
|
|
; SDAG-NEXT: s_waitcnt vmcnt(0)
|
|
; SDAG-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:72 ; 4-byte Folded Spill
|
|
; SDAG-NEXT: s_nop 0
|
|
; SDAG-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:76 ; 4-byte Folded Spill
|
|
; SDAG-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:80 ; 4-byte Folded Spill
|
|
; SDAG-NEXT: buffer_store_dword v4, off, s[0:3], s32 offset:84 ; 4-byte Folded Spill
|
|
; SDAG-NEXT: buffer_load_dwordx4 v[5:8], v45, s[16:19], 0 offen offset:16
|
|
; SDAG-NEXT: buffer_load_dwordx4 v[9:12], v45, s[16:19], 0 offen offset:32
|
|
; SDAG-NEXT: buffer_load_dwordx4 v[13:16], v45, s[16:19], 0 offen offset:48
|
|
; SDAG-NEXT: buffer_load_dwordx4 v[17:20], v45, s[16:19], 0 offen offset:64
|
|
; SDAG-NEXT: buffer_load_dwordx4 v[21:24], v45, s[16:19], 0 offen offset:80
|
|
; SDAG-NEXT: buffer_load_dwordx4 v[25:28], v45, s[16:19], 0 offen offset:96
|
|
; SDAG-NEXT: buffer_load_dwordx4 v[29:32], v45, s[16:19], 0 offen offset:112
|
|
; SDAG-NEXT: buffer_load_dwordx4 v[33:36], v45, s[16:19], 0 offen offset:128
|
|
; SDAG-NEXT: buffer_load_dwordx4 v[48:51], v45, s[16:19], 0 offen offset:144
|
|
; SDAG-NEXT: buffer_load_dwordx4 v[52:55], v45, s[16:19], 0 offen offset:160
|
|
; SDAG-NEXT: buffer_load_dwordx4 v[37:40], v45, s[16:19], 0 offen offset:176
|
|
; SDAG-NEXT: buffer_load_dwordx4 v[41:44], v45, s[16:19], 0 offen offset:192
|
|
; SDAG-NEXT: buffer_load_dwordx4 v[56:59], v45, s[16:19], 0 offen offset:208
|
|
; SDAG-NEXT: buffer_load_dwordx4 v[60:63], v45, s[16:19], 0 offen offset:224
|
|
; SDAG-NEXT: buffer_load_dwordx4 v[1:4], v45, s[16:19], 0 offen offset:240
|
|
; SDAG-NEXT: v_add_u32_e32 v45, s25, v0
|
|
; SDAG-NEXT: v_add_co_u32_e32 v0, vcc, 0x100, v0
|
|
; SDAG-NEXT: s_and_b64 vcc, exec, vcc
|
|
; SDAG-NEXT: s_waitcnt vmcnt(0)
|
|
; SDAG-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:56 ; 4-byte Folded Spill
|
|
; SDAG-NEXT: s_nop 0
|
|
; SDAG-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:60 ; 4-byte Folded Spill
|
|
; SDAG-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:64 ; 4-byte Folded Spill
|
|
; SDAG-NEXT: buffer_store_dword v4, off, s[0:3], s32 offset:68 ; 4-byte Folded Spill
|
|
; SDAG-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:72 ; 4-byte Folded Reload
|
|
; SDAG-NEXT: s_nop 0
|
|
; SDAG-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:76 ; 4-byte Folded Reload
|
|
; SDAG-NEXT: buffer_load_dword v3, off, s[0:3], s32 offset:80 ; 4-byte Folded Reload
|
|
; SDAG-NEXT: buffer_load_dword v4, off, s[0:3], s32 offset:84 ; 4-byte Folded Reload
|
|
; SDAG-NEXT: s_waitcnt vmcnt(0)
|
|
; SDAG-NEXT: buffer_store_dwordx4 v[1:4], v45, s[4:7], 0 offen
|
|
; SDAG-NEXT: buffer_store_dwordx4 v[5:8], v45, s[4:7], 0 offen offset:16
|
|
; SDAG-NEXT: buffer_store_dwordx4 v[9:12], v45, s[4:7], 0 offen offset:32
|
|
; SDAG-NEXT: buffer_store_dwordx4 v[13:16], v45, s[4:7], 0 offen offset:48
|
|
; SDAG-NEXT: buffer_store_dwordx4 v[17:20], v45, s[4:7], 0 offen offset:64
|
|
; SDAG-NEXT: buffer_store_dwordx4 v[21:24], v45, s[4:7], 0 offen offset:80
|
|
; SDAG-NEXT: buffer_store_dwordx4 v[25:28], v45, s[4:7], 0 offen offset:96
|
|
; SDAG-NEXT: buffer_store_dwordx4 v[29:32], v45, s[4:7], 0 offen offset:112
|
|
; SDAG-NEXT: buffer_store_dwordx4 v[33:36], v45, s[4:7], 0 offen offset:128
|
|
; SDAG-NEXT: buffer_store_dwordx4 v[48:51], v45, s[4:7], 0 offen offset:144
|
|
; SDAG-NEXT: buffer_store_dwordx4 v[52:55], v45, s[4:7], 0 offen offset:160
|
|
; SDAG-NEXT: buffer_store_dwordx4 v[37:40], v45, s[4:7], 0 offen offset:176
|
|
; SDAG-NEXT: buffer_store_dwordx4 v[41:44], v45, s[4:7], 0 offen offset:192
|
|
; SDAG-NEXT: buffer_store_dwordx4 v[56:59], v45, s[4:7], 0 offen offset:208
|
|
; SDAG-NEXT: buffer_store_dwordx4 v[60:63], v45, s[4:7], 0 offen offset:224
|
|
; SDAG-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:56 ; 4-byte Folded Reload
|
|
; SDAG-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:60 ; 4-byte Folded Reload
|
|
; SDAG-NEXT: buffer_load_dword v3, off, s[0:3], s32 offset:64 ; 4-byte Folded Reload
|
|
; SDAG-NEXT: buffer_load_dword v4, off, s[0:3], s32 offset:68 ; 4-byte Folded Reload
|
|
; SDAG-NEXT: s_waitcnt vmcnt(0)
|
|
; SDAG-NEXT: buffer_store_dwordx4 v[1:4], v45, s[4:7], 0 offen offset:240
|
|
; SDAG-NEXT: s_cbranch_vccnz .LBB1_1
|
|
; SDAG-NEXT: ; %bb.2: ; %memcpy-split
|
|
; SDAG-NEXT: buffer_load_dword v63, off, s[0:3], s32 ; 4-byte Folded Reload
|
|
; SDAG-NEXT: buffer_load_dword v62, off, s[0:3], s32 offset:4 ; 4-byte Folded Reload
|
|
; SDAG-NEXT: buffer_load_dword v61, off, s[0:3], s32 offset:8 ; 4-byte Folded Reload
|
|
; SDAG-NEXT: buffer_load_dword v60, off, s[0:3], s32 offset:12 ; 4-byte Folded Reload
|
|
; SDAG-NEXT: buffer_load_dword v59, off, s[0:3], s32 offset:16 ; 4-byte Folded Reload
|
|
; SDAG-NEXT: buffer_load_dword v58, off, s[0:3], s32 offset:20 ; 4-byte Folded Reload
|
|
; SDAG-NEXT: buffer_load_dword v57, off, s[0:3], s32 offset:24 ; 4-byte Folded Reload
|
|
; SDAG-NEXT: buffer_load_dword v56, off, s[0:3], s32 offset:28 ; 4-byte Folded Reload
|
|
; SDAG-NEXT: buffer_load_dword v45, off, s[0:3], s32 offset:32 ; 4-byte Folded Reload
|
|
; SDAG-NEXT: buffer_load_dword v44, off, s[0:3], s32 offset:36 ; 4-byte Folded Reload
|
|
; SDAG-NEXT: buffer_load_dword v43, off, s[0:3], s32 offset:40 ; 4-byte Folded Reload
|
|
; SDAG-NEXT: buffer_load_dword v42, off, s[0:3], s32 offset:44 ; 4-byte Folded Reload
|
|
; SDAG-NEXT: buffer_load_dword v41, off, s[0:3], s32 offset:48 ; 4-byte Folded Reload
|
|
; SDAG-NEXT: buffer_load_dword v40, off, s[0:3], s32 offset:52 ; 4-byte Folded Reload
|
|
; SDAG-NEXT: s_waitcnt vmcnt(0)
|
|
; SDAG-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GISEL-LABEL: memcpy_known_medium:
|
|
; GISEL: ; %bb.0:
|
|
; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GISEL-NEXT: s_mov_b32 s8, 0
|
|
; GISEL-NEXT: s_mov_b32 s4, s21
|
|
; GISEL-NEXT: s_mov_b32 s5, s22
|
|
; GISEL-NEXT: s_mov_b32 s6, s23
|
|
; GISEL-NEXT: s_mov_b32 s7, s24
|
|
; GISEL-NEXT: v_mov_b32_e32 v0, s8
|
|
; GISEL-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:52 ; 4-byte Folded Spill
|
|
; GISEL-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:48 ; 4-byte Folded Spill
|
|
; GISEL-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:44 ; 4-byte Folded Spill
|
|
; GISEL-NEXT: buffer_store_dword v43, off, s[0:3], s32 offset:40 ; 4-byte Folded Spill
|
|
; GISEL-NEXT: buffer_store_dword v44, off, s[0:3], s32 offset:36 ; 4-byte Folded Spill
|
|
; GISEL-NEXT: buffer_store_dword v45, off, s[0:3], s32 offset:32 ; 4-byte Folded Spill
|
|
; GISEL-NEXT: buffer_store_dword v56, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill
|
|
; GISEL-NEXT: buffer_store_dword v57, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill
|
|
; GISEL-NEXT: buffer_store_dword v58, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill
|
|
; GISEL-NEXT: buffer_store_dword v59, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill
|
|
; GISEL-NEXT: buffer_store_dword v60, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill
|
|
; GISEL-NEXT: buffer_store_dword v61, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill
|
|
; GISEL-NEXT: buffer_store_dword v62, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill
|
|
; GISEL-NEXT: buffer_store_dword v63, off, s[0:3], s32 ; 4-byte Folded Spill
|
|
; GISEL-NEXT: .LBB1_1: ; %load-store-loop
|
|
; GISEL-NEXT: ; =>This Inner Loop Header: Depth=1
|
|
; GISEL-NEXT: v_add_u32_e32 v45, s20, v0
|
|
; GISEL-NEXT: buffer_load_dwordx4 v[1:4], v45, s[16:19], 0 offen
|
|
; GISEL-NEXT: s_waitcnt vmcnt(0)
|
|
; GISEL-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:72 ; 4-byte Folded Spill
|
|
; GISEL-NEXT: s_nop 0
|
|
; GISEL-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:76 ; 4-byte Folded Spill
|
|
; GISEL-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:80 ; 4-byte Folded Spill
|
|
; GISEL-NEXT: buffer_store_dword v4, off, s[0:3], s32 offset:84 ; 4-byte Folded Spill
|
|
; GISEL-NEXT: buffer_load_dwordx4 v[5:8], v45, s[16:19], 0 offen offset:16
|
|
; GISEL-NEXT: buffer_load_dwordx4 v[9:12], v45, s[16:19], 0 offen offset:32
|
|
; GISEL-NEXT: buffer_load_dwordx4 v[13:16], v45, s[16:19], 0 offen offset:48
|
|
; GISEL-NEXT: buffer_load_dwordx4 v[17:20], v45, s[16:19], 0 offen offset:64
|
|
; GISEL-NEXT: buffer_load_dwordx4 v[21:24], v45, s[16:19], 0 offen offset:80
|
|
; GISEL-NEXT: buffer_load_dwordx4 v[25:28], v45, s[16:19], 0 offen offset:96
|
|
; GISEL-NEXT: buffer_load_dwordx4 v[29:32], v45, s[16:19], 0 offen offset:112
|
|
; GISEL-NEXT: buffer_load_dwordx4 v[33:36], v45, s[16:19], 0 offen offset:128
|
|
; GISEL-NEXT: buffer_load_dwordx4 v[48:51], v45, s[16:19], 0 offen offset:144
|
|
; GISEL-NEXT: buffer_load_dwordx4 v[52:55], v45, s[16:19], 0 offen offset:160
|
|
; GISEL-NEXT: buffer_load_dwordx4 v[37:40], v45, s[16:19], 0 offen offset:176
|
|
; GISEL-NEXT: buffer_load_dwordx4 v[41:44], v45, s[16:19], 0 offen offset:192
|
|
; GISEL-NEXT: buffer_load_dwordx4 v[56:59], v45, s[16:19], 0 offen offset:208
|
|
; GISEL-NEXT: buffer_load_dwordx4 v[60:63], v45, s[16:19], 0 offen offset:224
|
|
; GISEL-NEXT: buffer_load_dwordx4 v[1:4], v45, s[16:19], 0 offen offset:240
|
|
; GISEL-NEXT: v_add_u32_e32 v45, s25, v0
|
|
; GISEL-NEXT: v_add_co_u32_e32 v0, vcc, 0x100, v0
|
|
; GISEL-NEXT: s_xor_b64 s[8:9], vcc, -1
|
|
; GISEL-NEXT: s_xor_b64 s[8:9], s[8:9], -1
|
|
; GISEL-NEXT: s_and_b64 vcc, s[8:9], exec
|
|
; GISEL-NEXT: s_waitcnt vmcnt(0)
|
|
; GISEL-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:56 ; 4-byte Folded Spill
|
|
; GISEL-NEXT: s_nop 0
|
|
; GISEL-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:60 ; 4-byte Folded Spill
|
|
; GISEL-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:64 ; 4-byte Folded Spill
|
|
; GISEL-NEXT: buffer_store_dword v4, off, s[0:3], s32 offset:68 ; 4-byte Folded Spill
|
|
; GISEL-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:72 ; 4-byte Folded Reload
|
|
; GISEL-NEXT: s_nop 0
|
|
; GISEL-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:76 ; 4-byte Folded Reload
|
|
; GISEL-NEXT: buffer_load_dword v3, off, s[0:3], s32 offset:80 ; 4-byte Folded Reload
|
|
; GISEL-NEXT: buffer_load_dword v4, off, s[0:3], s32 offset:84 ; 4-byte Folded Reload
|
|
; GISEL-NEXT: s_waitcnt vmcnt(0)
|
|
; GISEL-NEXT: buffer_store_dwordx4 v[1:4], v45, s[4:7], 0 offen
|
|
; GISEL-NEXT: buffer_store_dwordx4 v[5:8], v45, s[4:7], 0 offen offset:16
|
|
; GISEL-NEXT: buffer_store_dwordx4 v[9:12], v45, s[4:7], 0 offen offset:32
|
|
; GISEL-NEXT: buffer_store_dwordx4 v[13:16], v45, s[4:7], 0 offen offset:48
|
|
; GISEL-NEXT: buffer_store_dwordx4 v[17:20], v45, s[4:7], 0 offen offset:64
|
|
; GISEL-NEXT: buffer_store_dwordx4 v[21:24], v45, s[4:7], 0 offen offset:80
|
|
; GISEL-NEXT: buffer_store_dwordx4 v[25:28], v45, s[4:7], 0 offen offset:96
|
|
; GISEL-NEXT: buffer_store_dwordx4 v[29:32], v45, s[4:7], 0 offen offset:112
|
|
; GISEL-NEXT: buffer_store_dwordx4 v[33:36], v45, s[4:7], 0 offen offset:128
|
|
; GISEL-NEXT: buffer_store_dwordx4 v[48:51], v45, s[4:7], 0 offen offset:144
|
|
; GISEL-NEXT: buffer_store_dwordx4 v[52:55], v45, s[4:7], 0 offen offset:160
|
|
; GISEL-NEXT: buffer_store_dwordx4 v[37:40], v45, s[4:7], 0 offen offset:176
|
|
; GISEL-NEXT: buffer_store_dwordx4 v[41:44], v45, s[4:7], 0 offen offset:192
|
|
; GISEL-NEXT: buffer_store_dwordx4 v[56:59], v45, s[4:7], 0 offen offset:208
|
|
; GISEL-NEXT: buffer_store_dwordx4 v[60:63], v45, s[4:7], 0 offen offset:224
|
|
; GISEL-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:56 ; 4-byte Folded Reload
|
|
; GISEL-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:60 ; 4-byte Folded Reload
|
|
; GISEL-NEXT: buffer_load_dword v3, off, s[0:3], s32 offset:64 ; 4-byte Folded Reload
|
|
; GISEL-NEXT: buffer_load_dword v4, off, s[0:3], s32 offset:68 ; 4-byte Folded Reload
|
|
; GISEL-NEXT: s_waitcnt vmcnt(0)
|
|
; GISEL-NEXT: buffer_store_dwordx4 v[1:4], v45, s[4:7], 0 offen offset:240
|
|
; GISEL-NEXT: s_cbranch_vccnz .LBB1_1
|
|
; GISEL-NEXT: ; %bb.2: ; %memcpy-split
|
|
; GISEL-NEXT: buffer_load_dword v63, off, s[0:3], s32 ; 4-byte Folded Reload
|
|
; GISEL-NEXT: buffer_load_dword v62, off, s[0:3], s32 offset:4 ; 4-byte Folded Reload
|
|
; GISEL-NEXT: buffer_load_dword v61, off, s[0:3], s32 offset:8 ; 4-byte Folded Reload
|
|
; GISEL-NEXT: buffer_load_dword v60, off, s[0:3], s32 offset:12 ; 4-byte Folded Reload
|
|
; GISEL-NEXT: buffer_load_dword v59, off, s[0:3], s32 offset:16 ; 4-byte Folded Reload
|
|
; GISEL-NEXT: buffer_load_dword v58, off, s[0:3], s32 offset:20 ; 4-byte Folded Reload
|
|
; GISEL-NEXT: buffer_load_dword v57, off, s[0:3], s32 offset:24 ; 4-byte Folded Reload
|
|
; GISEL-NEXT: buffer_load_dword v56, off, s[0:3], s32 offset:28 ; 4-byte Folded Reload
|
|
; GISEL-NEXT: buffer_load_dword v45, off, s[0:3], s32 offset:32 ; 4-byte Folded Reload
|
|
; GISEL-NEXT: buffer_load_dword v44, off, s[0:3], s32 offset:36 ; 4-byte Folded Reload
|
|
; GISEL-NEXT: buffer_load_dword v43, off, s[0:3], s32 offset:40 ; 4-byte Folded Reload
|
|
; GISEL-NEXT: buffer_load_dword v42, off, s[0:3], s32 offset:44 ; 4-byte Folded Reload
|
|
; GISEL-NEXT: buffer_load_dword v41, off, s[0:3], s32 offset:48 ; 4-byte Folded Reload
|
|
; GISEL-NEXT: buffer_load_dword v40, off, s[0:3], s32 offset:52 ; 4-byte Folded Reload
|
|
; GISEL-NEXT: s_waitcnt vmcnt(0)
|
|
; GISEL-NEXT: s_setpc_b64 s[30:31]
|
|
; SDAG-GFX942-LABEL: memcpy_known_medium:
|
|
; SDAG-GFX942: ; %bb.3:
|
|
; SDAG-GFX942-NEXT: s_load_dwordx4 s[8:11], s[4:5], 0x0
|
|
; SDAG-GFX942-NEXT: s_load_dword s12, s[4:5], 0x10
|
|
; SDAG-GFX942-NEXT: s_waitcnt lgkmcnt(0)
|
|
; SDAG-GFX942-NEXT: s_branch .LBB1_0
|
|
; SDAG-GFX942-NEXT: .p2align 8
|
|
; SDAG-GFX942-NEXT: ; %bb.4:
|
|
; SDAG-GFX942-NEXT: .LBB1_0:
|
|
; SDAG-GFX942-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
|
|
; SDAG-GFX942-NEXT: s_load_dword s13, s[4:5], 0x34
|
|
; SDAG-GFX942-NEXT: s_load_dwordx4 s[8:11], s[4:5], 0x44
|
|
; SDAG-GFX942-NEXT: s_load_dword s14, s[4:5], 0x54
|
|
; SDAG-GFX942-NEXT: s_mov_b32 s12, 0
|
|
; SDAG-GFX942-NEXT: s_mov_b32 s5, s12
|
|
; SDAG-GFX942-NEXT: v_mov_b32_e32 v0, 0
|
|
; SDAG-GFX942-NEXT: s_waitcnt lgkmcnt(0)
|
|
; SDAG-GFX942-NEXT: s_mov_b32 s4, s3
|
|
; SDAG-GFX942-NEXT: s_or_b64 s[6:7], s[4:5], s[12:13]
|
|
; SDAG-GFX942-NEXT: s_mov_b32 s13, s2
|
|
; SDAG-GFX942-NEXT: s_mov_b32 s2, s1
|
|
; SDAG-GFX942-NEXT: s_mov_b32 s3, s12
|
|
; SDAG-GFX942-NEXT: s_or_b64 s[4:5], s[2:3], s[12:13]
|
|
; SDAG-GFX942-NEXT: s_mov_b32 s13, s14
|
|
; SDAG-GFX942-NEXT: s_mov_b32 s2, s11
|
|
; SDAG-GFX942-NEXT: s_or_b64 s[14:15], s[2:3], s[12:13]
|
|
; SDAG-GFX942-NEXT: s_mov_b32 s13, s10
|
|
; SDAG-GFX942-NEXT: s_mov_b32 s2, s9
|
|
; SDAG-GFX942-NEXT: s_or_b64 s[12:13], s[2:3], s[12:13]
|
|
; SDAG-GFX942-NEXT: .LBB1_1: ; %load-store-loop
|
|
; SDAG-GFX942-NEXT: ; =>This Inner Loop Header: Depth=1
|
|
; SDAG-GFX942-NEXT: v_add_u32_e32 v1, s0, v0
|
|
; SDAG-GFX942-NEXT: buffer_load_dwordx4 v[2:5], v1, s[4:7], 0 offen
|
|
; SDAG-GFX942-NEXT: buffer_load_dwordx4 v[6:9], v1, s[4:7], 0 offen offset:16
|
|
; SDAG-GFX942-NEXT: buffer_load_dwordx4 v[10:13], v1, s[4:7], 0 offen offset:32
|
|
; SDAG-GFX942-NEXT: v_add_u32_e32 v62, s8, v0
|
|
; SDAG-GFX942-NEXT: v_add_co_u32_e32 v0, vcc, 0x100, v0
|
|
; SDAG-GFX942-NEXT: s_and_b64 vcc, exec, vcc
|
|
; SDAG-GFX942-NEXT: s_waitcnt vmcnt(0)
|
|
; SDAG-GFX942-NEXT: v_accvgpr_write_b32 a0, v13 ; Reload Reuse
|
|
; SDAG-GFX942-NEXT: v_accvgpr_write_b32 a1, v12 ; Reload Reuse
|
|
; SDAG-GFX942-NEXT: v_accvgpr_write_b32 a2, v11 ; Reload Reuse
|
|
; SDAG-GFX942-NEXT: v_accvgpr_write_b32 a3, v10 ; Reload Reuse
|
|
; SDAG-GFX942-NEXT: buffer_load_dwordx4 v[14:17], v1, s[4:7], 0 offen offset:48
|
|
; SDAG-GFX942-NEXT: buffer_load_dwordx4 v[18:21], v1, s[4:7], 0 offen offset:64
|
|
; SDAG-GFX942-NEXT: buffer_load_dwordx4 v[22:25], v1, s[4:7], 0 offen offset:80
|
|
; SDAG-GFX942-NEXT: buffer_load_dwordx4 v[26:29], v1, s[4:7], 0 offen offset:96
|
|
; SDAG-GFX942-NEXT: buffer_load_dwordx4 v[30:33], v1, s[4:7], 0 offen offset:112
|
|
; SDAG-GFX942-NEXT: buffer_load_dwordx4 v[34:37], v1, s[4:7], 0 offen offset:128
|
|
; SDAG-GFX942-NEXT: buffer_load_dwordx4 v[38:41], v1, s[4:7], 0 offen offset:144
|
|
; SDAG-GFX942-NEXT: buffer_load_dwordx4 v[42:45], v1, s[4:7], 0 offen offset:160
|
|
; SDAG-GFX942-NEXT: buffer_load_dwordx4 v[46:49], v1, s[4:7], 0 offen offset:176
|
|
; SDAG-GFX942-NEXT: buffer_load_dwordx4 v[50:53], v1, s[4:7], 0 offen offset:192
|
|
; SDAG-GFX942-NEXT: buffer_load_dwordx4 v[54:57], v1, s[4:7], 0 offen offset:208
|
|
; SDAG-GFX942-NEXT: buffer_load_dwordx4 v[58:61], v1, s[4:7], 0 offen offset:224
|
|
; SDAG-GFX942-NEXT: buffer_load_dwordx4 v[10:13], v1, s[4:7], 0 offen offset:240
|
|
; SDAG-GFX942-NEXT: s_nop 0
|
|
; SDAG-GFX942-NEXT: buffer_store_dwordx4 v[2:5], v62, s[12:15], 0 offen
|
|
; SDAG-GFX942-NEXT: buffer_store_dwordx4 v[6:9], v62, s[12:15], 0 offen offset:16
|
|
; SDAG-GFX942-NEXT: s_waitcnt vmcnt(2)
|
|
; SDAG-GFX942-NEXT: v_accvgpr_write_b32 a4, v13 ; Reload Reuse
|
|
; SDAG-GFX942-NEXT: v_accvgpr_read_b32 v5, a0 ; Reload Reuse
|
|
; SDAG-GFX942-NEXT: v_accvgpr_read_b32 v4, a1 ; Reload Reuse
|
|
; SDAG-GFX942-NEXT: v_accvgpr_read_b32 v3, a2 ; Reload Reuse
|
|
; SDAG-GFX942-NEXT: v_accvgpr_read_b32 v2, a3 ; Reload Reuse
|
|
; SDAG-GFX942-NEXT: v_accvgpr_write_b32 a5, v12 ; Reload Reuse
|
|
; SDAG-GFX942-NEXT: v_accvgpr_write_b32 a6, v11 ; Reload Reuse
|
|
; SDAG-GFX942-NEXT: v_accvgpr_write_b32 a7, v10 ; Reload Reuse
|
|
; SDAG-GFX942-NEXT: buffer_store_dwordx4 v[2:5], v62, s[12:15], 0 offen offset:32
|
|
; SDAG-GFX942-NEXT: buffer_store_dwordx4 v[14:17], v62, s[12:15], 0 offen offset:48
|
|
; SDAG-GFX942-NEXT: buffer_store_dwordx4 v[18:21], v62, s[12:15], 0 offen offset:64
|
|
; SDAG-GFX942-NEXT: buffer_store_dwordx4 v[22:25], v62, s[12:15], 0 offen offset:80
|
|
; SDAG-GFX942-NEXT: buffer_store_dwordx4 v[26:29], v62, s[12:15], 0 offen offset:96
|
|
; SDAG-GFX942-NEXT: buffer_store_dwordx4 v[30:33], v62, s[12:15], 0 offen offset:112
|
|
; SDAG-GFX942-NEXT: buffer_store_dwordx4 v[34:37], v62, s[12:15], 0 offen offset:128
|
|
; SDAG-GFX942-NEXT: buffer_store_dwordx4 v[38:41], v62, s[12:15], 0 offen offset:144
|
|
; SDAG-GFX942-NEXT: buffer_store_dwordx4 v[42:45], v62, s[12:15], 0 offen offset:160
|
|
; SDAG-GFX942-NEXT: buffer_store_dwordx4 v[46:49], v62, s[12:15], 0 offen offset:176
|
|
; SDAG-GFX942-NEXT: buffer_store_dwordx4 v[50:53], v62, s[12:15], 0 offen offset:192
|
|
; SDAG-GFX942-NEXT: buffer_store_dwordx4 v[54:57], v62, s[12:15], 0 offen offset:208
|
|
; SDAG-GFX942-NEXT: buffer_store_dwordx4 v[58:61], v62, s[12:15], 0 offen offset:224
|
|
; SDAG-GFX942-NEXT: v_accvgpr_read_b32 v5, a4 ; Reload Reuse
|
|
; SDAG-GFX942-NEXT: v_accvgpr_read_b32 v4, a5 ; Reload Reuse
|
|
; SDAG-GFX942-NEXT: v_accvgpr_read_b32 v3, a6 ; Reload Reuse
|
|
; SDAG-GFX942-NEXT: v_accvgpr_read_b32 v2, a7 ; Reload Reuse
|
|
; SDAG-GFX942-NEXT: buffer_store_dwordx4 v[2:5], v62, s[12:15], 0 offen offset:240
|
|
; SDAG-GFX942-NEXT: s_cbranch_vccnz .LBB1_1
|
|
; SDAG-GFX942-NEXT: ; %bb.2: ; %memcpy-split
|
|
; SDAG-GFX942-NEXT: s_endpgm
|
|
;
|
|
; SDAG-GFX1100-LABEL: memcpy_known_medium:
|
|
; SDAG-GFX1100: ; %bb.0:
|
|
; SDAG-GFX1100-NEXT: s_clause 0x3
|
|
; SDAG-GFX1100-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
|
|
; SDAG-GFX1100-NEXT: s_load_b32 s13, s[4:5], 0x34
|
|
; SDAG-GFX1100-NEXT: s_load_b128 s[8:11], s[4:5], 0x44
|
|
; SDAG-GFX1100-NEXT: s_load_b32 s18, s[4:5], 0x54
|
|
; SDAG-GFX1100-NEXT: s_mov_b32 s12, 0
|
|
; SDAG-GFX1100-NEXT: v_mov_b32_e32 v0, 0
|
|
; SDAG-GFX1100-NEXT: s_mov_b32 s5, s12
|
|
; SDAG-GFX1100-NEXT: s_mov_b32 s15, s12
|
|
; SDAG-GFX1100-NEXT: s_mov_b32 s17, s12
|
|
; SDAG-GFX1100-NEXT: s_waitcnt lgkmcnt(0)
|
|
; SDAG-GFX1100-NEXT: s_mov_b32 s4, s3
|
|
; SDAG-GFX1100-NEXT: s_mov_b32 s14, s1
|
|
; SDAG-GFX1100-NEXT: s_or_b64 s[6:7], s[4:5], s[12:13]
|
|
; SDAG-GFX1100-NEXT: s_mov_b32 s13, s2
|
|
; SDAG-GFX1100-NEXT: s_mov_b32 s16, s11
|
|
; SDAG-GFX1100-NEXT: s_or_b64 s[4:5], s[14:15], s[12:13]
|
|
; SDAG-GFX1100-NEXT: s_mov_b32 s13, s18
|
|
; SDAG-GFX1100-NEXT: s_mov_b32 s2, s9
|
|
; SDAG-GFX1100-NEXT: s_or_b64 s[14:15], s[16:17], s[12:13]
|
|
; SDAG-GFX1100-NEXT: s_mov_b32 s13, s10
|
|
; SDAG-GFX1100-NEXT: s_mov_b32 s3, s12
|
|
; SDAG-GFX1100-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
|
|
; SDAG-GFX1100-NEXT: s_or_b64 s[12:13], s[2:3], s[12:13]
|
|
; SDAG-GFX1100-NEXT: .LBB1_1: ; %load-store-loop
|
|
; SDAG-GFX1100-NEXT: ; =>This Inner Loop Header: Depth=1
|
|
; SDAG-GFX1100-NEXT: v_add_nc_u32_e32 v61, s0, v0
|
|
; SDAG-GFX1100-NEXT: v_add_nc_u32_e32 v65, s8, v0
|
|
; SDAG-GFX1100-NEXT: v_add_co_u32 v0, s1, 0x100, v0
|
|
; SDAG-GFX1100-NEXT: s_and_b32 vcc_lo, exec_lo, s1
|
|
; SDAG-GFX1100-NEXT: s_clause 0xf
|
|
; SDAG-GFX1100-NEXT: buffer_load_b128 v[1:4], v61, s[4:7], 0 offen
|
|
; SDAG-GFX1100-NEXT: buffer_load_b128 v[5:8], v61, s[4:7], 0 offen offset:16
|
|
; SDAG-GFX1100-NEXT: buffer_load_b128 v[9:12], v61, s[4:7], 0 offen offset:32
|
|
; SDAG-GFX1100-NEXT: buffer_load_b128 v[13:16], v61, s[4:7], 0 offen offset:48
|
|
; SDAG-GFX1100-NEXT: buffer_load_b128 v[17:20], v61, s[4:7], 0 offen offset:64
|
|
; SDAG-GFX1100-NEXT: buffer_load_b128 v[21:24], v61, s[4:7], 0 offen offset:80
|
|
; SDAG-GFX1100-NEXT: buffer_load_b128 v[25:28], v61, s[4:7], 0 offen offset:96
|
|
; SDAG-GFX1100-NEXT: buffer_load_b128 v[29:32], v61, s[4:7], 0 offen offset:112
|
|
; SDAG-GFX1100-NEXT: buffer_load_b128 v[33:36], v61, s[4:7], 0 offen offset:128
|
|
; SDAG-GFX1100-NEXT: buffer_load_b128 v[37:40], v61, s[4:7], 0 offen offset:144
|
|
; SDAG-GFX1100-NEXT: buffer_load_b128 v[41:44], v61, s[4:7], 0 offen offset:160
|
|
; SDAG-GFX1100-NEXT: buffer_load_b128 v[45:48], v61, s[4:7], 0 offen offset:176
|
|
; SDAG-GFX1100-NEXT: buffer_load_b128 v[49:52], v61, s[4:7], 0 offen offset:192
|
|
; SDAG-GFX1100-NEXT: buffer_load_b128 v[53:56], v61, s[4:7], 0 offen offset:208
|
|
; SDAG-GFX1100-NEXT: buffer_load_b128 v[57:60], v61, s[4:7], 0 offen offset:224
|
|
; SDAG-GFX1100-NEXT: buffer_load_b128 v[61:64], v61, s[4:7], 0 offen offset:240
|
|
; SDAG-GFX1100-NEXT: s_waitcnt vmcnt(15)
|
|
; SDAG-GFX1100-NEXT: buffer_store_b128 v[1:4], v65, s[12:15], 0 offen
|
|
; SDAG-GFX1100-NEXT: s_waitcnt vmcnt(14)
|
|
; SDAG-GFX1100-NEXT: buffer_store_b128 v[5:8], v65, s[12:15], 0 offen offset:16
|
|
; SDAG-GFX1100-NEXT: s_waitcnt vmcnt(13)
|
|
; SDAG-GFX1100-NEXT: buffer_store_b128 v[9:12], v65, s[12:15], 0 offen offset:32
|
|
; SDAG-GFX1100-NEXT: s_waitcnt vmcnt(12)
|
|
; SDAG-GFX1100-NEXT: buffer_store_b128 v[13:16], v65, s[12:15], 0 offen offset:48
|
|
; SDAG-GFX1100-NEXT: s_waitcnt vmcnt(11)
|
|
; SDAG-GFX1100-NEXT: buffer_store_b128 v[17:20], v65, s[12:15], 0 offen offset:64
|
|
; SDAG-GFX1100-NEXT: s_waitcnt vmcnt(10)
|
|
; SDAG-GFX1100-NEXT: buffer_store_b128 v[21:24], v65, s[12:15], 0 offen offset:80
|
|
; SDAG-GFX1100-NEXT: s_waitcnt vmcnt(9)
|
|
; SDAG-GFX1100-NEXT: buffer_store_b128 v[25:28], v65, s[12:15], 0 offen offset:96
|
|
; SDAG-GFX1100-NEXT: s_waitcnt vmcnt(8)
|
|
; SDAG-GFX1100-NEXT: buffer_store_b128 v[29:32], v65, s[12:15], 0 offen offset:112
|
|
; SDAG-GFX1100-NEXT: s_waitcnt vmcnt(7)
|
|
; SDAG-GFX1100-NEXT: buffer_store_b128 v[33:36], v65, s[12:15], 0 offen offset:128
|
|
; SDAG-GFX1100-NEXT: s_waitcnt vmcnt(6)
|
|
; SDAG-GFX1100-NEXT: buffer_store_b128 v[37:40], v65, s[12:15], 0 offen offset:144
|
|
; SDAG-GFX1100-NEXT: s_waitcnt vmcnt(5)
|
|
; SDAG-GFX1100-NEXT: buffer_store_b128 v[41:44], v65, s[12:15], 0 offen offset:160
|
|
; SDAG-GFX1100-NEXT: s_waitcnt vmcnt(4)
|
|
; SDAG-GFX1100-NEXT: buffer_store_b128 v[45:48], v65, s[12:15], 0 offen offset:176
|
|
; SDAG-GFX1100-NEXT: s_waitcnt vmcnt(3)
|
|
; SDAG-GFX1100-NEXT: buffer_store_b128 v[49:52], v65, s[12:15], 0 offen offset:192
|
|
; SDAG-GFX1100-NEXT: s_waitcnt vmcnt(2)
|
|
; SDAG-GFX1100-NEXT: buffer_store_b128 v[53:56], v65, s[12:15], 0 offen offset:208
|
|
; SDAG-GFX1100-NEXT: s_waitcnt vmcnt(1)
|
|
; SDAG-GFX1100-NEXT: buffer_store_b128 v[57:60], v65, s[12:15], 0 offen offset:224
|
|
; SDAG-GFX1100-NEXT: s_waitcnt vmcnt(0)
|
|
; SDAG-GFX1100-NEXT: buffer_store_b128 v[61:64], v65, s[12:15], 0 offen offset:240
|
|
; SDAG-GFX1100-NEXT: s_cbranch_vccnz .LBB1_1
|
|
; SDAG-GFX1100-NEXT: ; %bb.2: ; %memcpy-split
|
|
; SDAG-GFX1100-NEXT: s_endpgm
|
|
;
|
|
; GISEL-GFX942-LABEL: memcpy_known_medium:
|
|
; GISEL-GFX942: ; %bb.0:
|
|
; GISEL-GFX942-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
|
|
; GISEL-GFX942-NEXT: s_load_dword s11, s[4:5], 0x34
|
|
; GISEL-GFX942-NEXT: s_mov_b32 s7, 0
|
|
; GISEL-GFX942-NEXT: s_load_dwordx4 s[12:15], s[4:5], 0x44
|
|
; GISEL-GFX942-NEXT: s_mov_b32 s8, s7
|
|
; GISEL-GFX942-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GISEL-GFX942-NEXT: s_mov_b32 s6, s1
|
|
; GISEL-GFX942-NEXT: s_mov_b32 s9, s2
|
|
; GISEL-GFX942-NEXT: s_or_b64 s[8:9], s[6:7], s[8:9]
|
|
; GISEL-GFX942-NEXT: s_mov_b32 s6, s3
|
|
; GISEL-GFX942-NEXT: s_load_dword s3, s[4:5], 0x54
|
|
; GISEL-GFX942-NEXT: s_mov_b32 s10, s7
|
|
; GISEL-GFX942-NEXT: s_or_b64 s[10:11], s[6:7], s[10:11]
|
|
; GISEL-GFX942-NEXT: s_mov_b32 s6, s13
|
|
; GISEL-GFX942-NEXT: s_mov_b32 s4, s7
|
|
; GISEL-GFX942-NEXT: s_mov_b32 s5, s14
|
|
; GISEL-GFX942-NEXT: s_mov_b32 s16, 0
|
|
; GISEL-GFX942-NEXT: s_or_b64 s[4:5], s[6:7], s[4:5]
|
|
; GISEL-GFX942-NEXT: s_mov_b32 s6, s15
|
|
; GISEL-GFX942-NEXT: s_mov_b32 s2, s7
|
|
; GISEL-GFX942-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GISEL-GFX942-NEXT: s_or_b64 s[6:7], s[6:7], s[2:3]
|
|
; GISEL-GFX942-NEXT: v_mov_b32_e32 v0, s16
|
|
; GISEL-GFX942-NEXT: .LBB1_1: ; %load-store-loop
|
|
; GISEL-GFX942-NEXT: ; =>This Inner Loop Header: Depth=1
|
|
; GISEL-GFX942-NEXT: v_add_u32_e32 v1, s0, v0
|
|
; GISEL-GFX942-NEXT: buffer_load_dwordx4 v[2:5], v1, s[8:11], 0 offen
|
|
; GISEL-GFX942-NEXT: buffer_load_dwordx4 v[6:9], v1, s[8:11], 0 offen offset:16
|
|
; GISEL-GFX942-NEXT: buffer_load_dwordx4 v[10:13], v1, s[8:11], 0 offen offset:32
|
|
; GISEL-GFX942-NEXT: v_add_u32_e32 v62, s12, v0
|
|
; GISEL-GFX942-NEXT: v_add_co_u32_e32 v0, vcc, 0x100, v0
|
|
; GISEL-GFX942-NEXT: s_xor_b64 s[2:3], vcc, -1
|
|
; GISEL-GFX942-NEXT: s_xor_b64 s[2:3], s[2:3], -1
|
|
; GISEL-GFX942-NEXT: s_and_b64 vcc, s[2:3], exec
|
|
; GISEL-GFX942-NEXT: s_waitcnt vmcnt(0)
|
|
; GISEL-GFX942-NEXT: v_accvgpr_write_b32 a0, v13 ; Reload Reuse
|
|
; GISEL-GFX942-NEXT: v_accvgpr_write_b32 a1, v12 ; Reload Reuse
|
|
; GISEL-GFX942-NEXT: v_accvgpr_write_b32 a2, v11 ; Reload Reuse
|
|
; GISEL-GFX942-NEXT: v_accvgpr_write_b32 a3, v10 ; Reload Reuse
|
|
; GISEL-GFX942-NEXT: buffer_load_dwordx4 v[14:17], v1, s[8:11], 0 offen offset:48
|
|
; GISEL-GFX942-NEXT: buffer_load_dwordx4 v[18:21], v1, s[8:11], 0 offen offset:64
|
|
; GISEL-GFX942-NEXT: buffer_load_dwordx4 v[22:25], v1, s[8:11], 0 offen offset:80
|
|
; GISEL-GFX942-NEXT: buffer_load_dwordx4 v[26:29], v1, s[8:11], 0 offen offset:96
|
|
; GISEL-GFX942-NEXT: buffer_load_dwordx4 v[30:33], v1, s[8:11], 0 offen offset:112
|
|
; GISEL-GFX942-NEXT: buffer_load_dwordx4 v[34:37], v1, s[8:11], 0 offen offset:128
|
|
; GISEL-GFX942-NEXT: buffer_load_dwordx4 v[38:41], v1, s[8:11], 0 offen offset:144
|
|
; GISEL-GFX942-NEXT: buffer_load_dwordx4 v[42:45], v1, s[8:11], 0 offen offset:160
|
|
; GISEL-GFX942-NEXT: buffer_load_dwordx4 v[46:49], v1, s[8:11], 0 offen offset:176
|
|
; GISEL-GFX942-NEXT: buffer_load_dwordx4 v[50:53], v1, s[8:11], 0 offen offset:192
|
|
; GISEL-GFX942-NEXT: buffer_load_dwordx4 v[54:57], v1, s[8:11], 0 offen offset:208
|
|
; GISEL-GFX942-NEXT: buffer_load_dwordx4 v[58:61], v1, s[8:11], 0 offen offset:224
|
|
; GISEL-GFX942-NEXT: buffer_load_dwordx4 v[10:13], v1, s[8:11], 0 offen offset:240
|
|
; GISEL-GFX942-NEXT: s_nop 0
|
|
; GISEL-GFX942-NEXT: buffer_store_dwordx4 v[2:5], v62, s[4:7], 0 offen
|
|
; GISEL-GFX942-NEXT: buffer_store_dwordx4 v[6:9], v62, s[4:7], 0 offen offset:16
|
|
; GISEL-GFX942-NEXT: s_waitcnt vmcnt(2)
|
|
; GISEL-GFX942-NEXT: v_accvgpr_write_b32 a4, v13 ; Reload Reuse
|
|
; GISEL-GFX942-NEXT: v_accvgpr_read_b32 v5, a0 ; Reload Reuse
|
|
; GISEL-GFX942-NEXT: v_accvgpr_read_b32 v4, a1 ; Reload Reuse
|
|
; GISEL-GFX942-NEXT: v_accvgpr_read_b32 v3, a2 ; Reload Reuse
|
|
; GISEL-GFX942-NEXT: v_accvgpr_read_b32 v2, a3 ; Reload Reuse
|
|
; GISEL-GFX942-NEXT: v_accvgpr_write_b32 a5, v12 ; Reload Reuse
|
|
; GISEL-GFX942-NEXT: v_accvgpr_write_b32 a6, v11 ; Reload Reuse
|
|
; GISEL-GFX942-NEXT: v_accvgpr_write_b32 a7, v10 ; Reload Reuse
|
|
; GISEL-GFX942-NEXT: buffer_store_dwordx4 v[2:5], v62, s[4:7], 0 offen offset:32
|
|
; GISEL-GFX942-NEXT: buffer_store_dwordx4 v[14:17], v62, s[4:7], 0 offen offset:48
|
|
; GISEL-GFX942-NEXT: buffer_store_dwordx4 v[18:21], v62, s[4:7], 0 offen offset:64
|
|
; GISEL-GFX942-NEXT: buffer_store_dwordx4 v[22:25], v62, s[4:7], 0 offen offset:80
|
|
; GISEL-GFX942-NEXT: buffer_store_dwordx4 v[26:29], v62, s[4:7], 0 offen offset:96
|
|
; GISEL-GFX942-NEXT: buffer_store_dwordx4 v[30:33], v62, s[4:7], 0 offen offset:112
|
|
; GISEL-GFX942-NEXT: buffer_store_dwordx4 v[34:37], v62, s[4:7], 0 offen offset:128
|
|
; GISEL-GFX942-NEXT: buffer_store_dwordx4 v[38:41], v62, s[4:7], 0 offen offset:144
|
|
; GISEL-GFX942-NEXT: buffer_store_dwordx4 v[42:45], v62, s[4:7], 0 offen offset:160
|
|
; GISEL-GFX942-NEXT: buffer_store_dwordx4 v[46:49], v62, s[4:7], 0 offen offset:176
|
|
; GISEL-GFX942-NEXT: buffer_store_dwordx4 v[50:53], v62, s[4:7], 0 offen offset:192
|
|
; GISEL-GFX942-NEXT: buffer_store_dwordx4 v[54:57], v62, s[4:7], 0 offen offset:208
|
|
; GISEL-GFX942-NEXT: buffer_store_dwordx4 v[58:61], v62, s[4:7], 0 offen offset:224
|
|
; GISEL-GFX942-NEXT: v_accvgpr_read_b32 v5, a4 ; Reload Reuse
|
|
; GISEL-GFX942-NEXT: v_accvgpr_read_b32 v4, a5 ; Reload Reuse
|
|
; GISEL-GFX942-NEXT: v_accvgpr_read_b32 v3, a6 ; Reload Reuse
|
|
; GISEL-GFX942-NEXT: v_accvgpr_read_b32 v2, a7 ; Reload Reuse
|
|
; GISEL-GFX942-NEXT: buffer_store_dwordx4 v[2:5], v62, s[4:7], 0 offen offset:240
|
|
; GISEL-GFX942-NEXT: s_cbranch_vccnz .LBB1_1
|
|
; GISEL-GFX942-NEXT: ; %bb.2: ; %memcpy-split
|
|
; GISEL-GFX942-NEXT: s_endpgm
|
|
;
|
|
; GISEL-GFX1100-LABEL: memcpy_known_medium:
|
|
; GISEL-GFX1100: ; %bb.0:
|
|
; GISEL-GFX1100-NEXT: s_clause 0x3
|
|
; GISEL-GFX1100-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
|
|
; GISEL-GFX1100-NEXT: s_load_b128 s[8:11], s[4:5], 0x44
|
|
; GISEL-GFX1100-NEXT: s_load_b32 s7, s[4:5], 0x34
|
|
; GISEL-GFX1100-NEXT: s_load_b32 s15, s[4:5], 0x54
|
|
; GISEL-GFX1100-NEXT: s_mov_b32 s17, 0
|
|
; GISEL-GFX1100-NEXT: s_mov_b32 s12, 0
|
|
; GISEL-GFX1100-NEXT: s_mov_b32 s4, s17
|
|
; GISEL-GFX1100-NEXT: s_mov_b32 s6, s17
|
|
; GISEL-GFX1100-NEXT: v_mov_b32_e32 v0, s12
|
|
; GISEL-GFX1100-NEXT: s_mov_b32 s14, s17
|
|
; GISEL-GFX1100-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GISEL-GFX1100-NEXT: s_mov_b32 s16, s1
|
|
; GISEL-GFX1100-NEXT: s_mov_b32 s5, s2
|
|
; GISEL-GFX1100-NEXT: s_mov_b32 s2, s17
|
|
; GISEL-GFX1100-NEXT: s_or_b64 s[4:5], s[16:17], s[4:5]
|
|
; GISEL-GFX1100-NEXT: s_mov_b32 s16, s3
|
|
; GISEL-GFX1100-NEXT: s_mov_b32 s3, s10
|
|
; GISEL-GFX1100-NEXT: s_or_b64 s[6:7], s[16:17], s[6:7]
|
|
; GISEL-GFX1100-NEXT: s_mov_b32 s16, s9
|
|
; GISEL-GFX1100-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1)
|
|
; GISEL-GFX1100-NEXT: s_or_b64 s[12:13], s[16:17], s[2:3]
|
|
; GISEL-GFX1100-NEXT: s_mov_b32 s16, s11
|
|
; GISEL-GFX1100-NEXT: s_or_b64 s[14:15], s[16:17], s[14:15]
|
|
; GISEL-GFX1100-NEXT: .LBB1_1: ; %load-store-loop
|
|
; GISEL-GFX1100-NEXT: ; =>This Inner Loop Header: Depth=1
|
|
; GISEL-GFX1100-NEXT: v_add_nc_u32_e32 v61, s0, v0
|
|
; GISEL-GFX1100-NEXT: v_add_nc_u32_e32 v65, s8, v0
|
|
; GISEL-GFX1100-NEXT: v_add_co_u32 v0, s1, 0x100, v0
|
|
; GISEL-GFX1100-NEXT: s_xor_b32 s1, s1, -1
|
|
; GISEL-GFX1100-NEXT: s_clause 0xf
|
|
; GISEL-GFX1100-NEXT: buffer_load_b128 v[1:4], v61, s[4:7], 0 offen
|
|
; GISEL-GFX1100-NEXT: buffer_load_b128 v[5:8], v61, s[4:7], 0 offen offset:16
|
|
; GISEL-GFX1100-NEXT: buffer_load_b128 v[9:12], v61, s[4:7], 0 offen offset:32
|
|
; GISEL-GFX1100-NEXT: buffer_load_b128 v[13:16], v61, s[4:7], 0 offen offset:48
|
|
; GISEL-GFX1100-NEXT: buffer_load_b128 v[17:20], v61, s[4:7], 0 offen offset:64
|
|
; GISEL-GFX1100-NEXT: buffer_load_b128 v[21:24], v61, s[4:7], 0 offen offset:80
|
|
; GISEL-GFX1100-NEXT: buffer_load_b128 v[25:28], v61, s[4:7], 0 offen offset:96
|
|
; GISEL-GFX1100-NEXT: buffer_load_b128 v[29:32], v61, s[4:7], 0 offen offset:112
|
|
; GISEL-GFX1100-NEXT: buffer_load_b128 v[33:36], v61, s[4:7], 0 offen offset:128
|
|
; GISEL-GFX1100-NEXT: buffer_load_b128 v[37:40], v61, s[4:7], 0 offen offset:144
|
|
; GISEL-GFX1100-NEXT: buffer_load_b128 v[41:44], v61, s[4:7], 0 offen offset:160
|
|
; GISEL-GFX1100-NEXT: buffer_load_b128 v[45:48], v61, s[4:7], 0 offen offset:176
|
|
; GISEL-GFX1100-NEXT: buffer_load_b128 v[49:52], v61, s[4:7], 0 offen offset:192
|
|
; GISEL-GFX1100-NEXT: buffer_load_b128 v[53:56], v61, s[4:7], 0 offen offset:208
|
|
; GISEL-GFX1100-NEXT: buffer_load_b128 v[57:60], v61, s[4:7], 0 offen offset:224
|
|
; GISEL-GFX1100-NEXT: buffer_load_b128 v[61:64], v61, s[4:7], 0 offen offset:240
|
|
; GISEL-GFX1100-NEXT: s_xor_b32 s1, s1, -1
|
|
; GISEL-GFX1100-NEXT: s_waitcnt vmcnt(15)
|
|
; GISEL-GFX1100-NEXT: buffer_store_b128 v[1:4], v65, s[12:15], 0 offen
|
|
; GISEL-GFX1100-NEXT: s_waitcnt vmcnt(14)
|
|
; GISEL-GFX1100-NEXT: buffer_store_b128 v[5:8], v65, s[12:15], 0 offen offset:16
|
|
; GISEL-GFX1100-NEXT: s_waitcnt vmcnt(13)
|
|
; GISEL-GFX1100-NEXT: buffer_store_b128 v[9:12], v65, s[12:15], 0 offen offset:32
|
|
; GISEL-GFX1100-NEXT: s_waitcnt vmcnt(12)
|
|
; GISEL-GFX1100-NEXT: buffer_store_b128 v[13:16], v65, s[12:15], 0 offen offset:48
|
|
; GISEL-GFX1100-NEXT: s_waitcnt vmcnt(11)
|
|
; GISEL-GFX1100-NEXT: buffer_store_b128 v[17:20], v65, s[12:15], 0 offen offset:64
|
|
; GISEL-GFX1100-NEXT: s_waitcnt vmcnt(10)
|
|
; GISEL-GFX1100-NEXT: buffer_store_b128 v[21:24], v65, s[12:15], 0 offen offset:80
|
|
; GISEL-GFX1100-NEXT: s_waitcnt vmcnt(9)
|
|
; GISEL-GFX1100-NEXT: buffer_store_b128 v[25:28], v65, s[12:15], 0 offen offset:96
|
|
; GISEL-GFX1100-NEXT: s_waitcnt vmcnt(8)
|
|
; GISEL-GFX1100-NEXT: buffer_store_b128 v[29:32], v65, s[12:15], 0 offen offset:112
|
|
; GISEL-GFX1100-NEXT: s_waitcnt vmcnt(7)
|
|
; GISEL-GFX1100-NEXT: buffer_store_b128 v[33:36], v65, s[12:15], 0 offen offset:128
|
|
; GISEL-GFX1100-NEXT: s_waitcnt vmcnt(6)
|
|
; GISEL-GFX1100-NEXT: buffer_store_b128 v[37:40], v65, s[12:15], 0 offen offset:144
|
|
; GISEL-GFX1100-NEXT: s_waitcnt vmcnt(5)
|
|
; GISEL-GFX1100-NEXT: buffer_store_b128 v[41:44], v65, s[12:15], 0 offen offset:160
|
|
; GISEL-GFX1100-NEXT: s_waitcnt vmcnt(4)
|
|
; GISEL-GFX1100-NEXT: buffer_store_b128 v[45:48], v65, s[12:15], 0 offen offset:176
|
|
; GISEL-GFX1100-NEXT: s_waitcnt vmcnt(3)
|
|
; GISEL-GFX1100-NEXT: buffer_store_b128 v[49:52], v65, s[12:15], 0 offen offset:192
|
|
; GISEL-GFX1100-NEXT: s_waitcnt vmcnt(2)
|
|
; GISEL-GFX1100-NEXT: buffer_store_b128 v[53:56], v65, s[12:15], 0 offen offset:208
|
|
; GISEL-GFX1100-NEXT: s_waitcnt vmcnt(1)
|
|
; GISEL-GFX1100-NEXT: buffer_store_b128 v[57:60], v65, s[12:15], 0 offen offset:224
|
|
; GISEL-GFX1100-NEXT: s_waitcnt vmcnt(0)
|
|
; GISEL-GFX1100-NEXT: buffer_store_b128 v[61:64], v65, s[12:15], 0 offen offset:240
|
|
; GISEL-GFX1100-NEXT: s_and_b32 vcc_lo, exec_lo, s1
|
|
; GISEL-GFX1100-NEXT: s_cbranch_vccnz .LBB1_1
|
|
; GISEL-GFX1100-NEXT: ; %bb.2: ; %memcpy-split
|
|
; GISEL-GFX1100-NEXT: s_endpgm
|
|
call void @llvm.memcpy.p7.p7.i32(ptr addrspace(7) noundef nonnull align 16 %dst, ptr addrspace(7) noundef nonnull align 16 %src, i32 256, i1 false)
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_kernel void @memcpy_known_small(ptr addrspace(7) inreg %src, ptr addrspace(7) inreg %dst) {
|
|
; SDAG-LABEL: memcpy_known_small:
|
|
; SDAG: ; %bb.0:
|
|
; SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; SDAG-NEXT: v_mov_b32_e32 v4, s20
|
|
; SDAG-NEXT: buffer_load_dwordx4 v[0:3], v4, s[16:19], 0 offen
|
|
; SDAG-NEXT: s_mov_b32 s7, s24
|
|
; SDAG-NEXT: s_mov_b32 s6, s23
|
|
; SDAG-NEXT: s_mov_b32 s5, s22
|
|
; SDAG-NEXT: s_mov_b32 s4, s21
|
|
; SDAG-NEXT: v_mov_b32_e32 v5, s25
|
|
; SDAG-NEXT: s_waitcnt vmcnt(0)
|
|
; SDAG-NEXT: buffer_store_dwordx4 v[0:3], v5, s[4:7], 0 offen
|
|
; SDAG-NEXT: buffer_load_dwordx4 v[0:3], v4, s[16:19], 0 offen offset:16
|
|
; SDAG-NEXT: s_waitcnt vmcnt(0)
|
|
; SDAG-NEXT: buffer_store_dwordx4 v[0:3], v5, s[4:7], 0 offen offset:16
|
|
; SDAG-NEXT: s_waitcnt vmcnt(0)
|
|
; SDAG-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GISEL-LABEL: memcpy_known_small:
|
|
; GISEL: ; %bb.0:
|
|
; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GISEL-NEXT: v_mov_b32_e32 v4, s20
|
|
; GISEL-NEXT: buffer_load_dwordx4 v[0:3], v4, s[16:19], 0 offen
|
|
; GISEL-NEXT: s_mov_b32 s4, s21
|
|
; GISEL-NEXT: s_mov_b32 s5, s22
|
|
; GISEL-NEXT: s_mov_b32 s6, s23
|
|
; GISEL-NEXT: s_mov_b32 s7, s24
|
|
; GISEL-NEXT: v_mov_b32_e32 v5, s25
|
|
; GISEL-NEXT: s_waitcnt vmcnt(0)
|
|
; GISEL-NEXT: buffer_store_dwordx4 v[0:3], v5, s[4:7], 0 offen
|
|
; GISEL-NEXT: buffer_load_dwordx4 v[0:3], v4, s[16:19], 0 offen offset:16
|
|
; GISEL-NEXT: s_waitcnt vmcnt(0)
|
|
; GISEL-NEXT: buffer_store_dwordx4 v[0:3], v5, s[4:7], 0 offen offset:16
|
|
; GISEL-NEXT: s_waitcnt vmcnt(0)
|
|
; GISEL-NEXT: s_setpc_b64 s[30:31]
|
|
; SDAG-GFX942-LABEL: memcpy_known_small:
|
|
; SDAG-GFX942: ; %bb.1:
|
|
; SDAG-GFX942-NEXT: s_load_dwordx4 s[8:11], s[4:5], 0x0
|
|
; SDAG-GFX942-NEXT: s_load_dword s12, s[4:5], 0x10
|
|
; SDAG-GFX942-NEXT: s_waitcnt lgkmcnt(0)
|
|
; SDAG-GFX942-NEXT: s_branch .LBB2_0
|
|
; SDAG-GFX942-NEXT: .p2align 8
|
|
; SDAG-GFX942-NEXT: ; %bb.2:
|
|
; SDAG-GFX942-NEXT: .LBB2_0:
|
|
; SDAG-GFX942-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
|
|
; SDAG-GFX942-NEXT: s_load_dword s13, s[4:5], 0x34
|
|
; SDAG-GFX942-NEXT: s_mov_b32 s12, 0
|
|
; SDAG-GFX942-NEXT: s_mov_b32 s7, s12
|
|
; SDAG-GFX942-NEXT: s_waitcnt lgkmcnt(0)
|
|
; SDAG-GFX942-NEXT: s_mov_b32 s6, s3
|
|
; SDAG-GFX942-NEXT: s_or_b64 s[10:11], s[6:7], s[12:13]
|
|
; SDAG-GFX942-NEXT: s_mov_b32 s13, s2
|
|
; SDAG-GFX942-NEXT: s_mov_b32 s2, s1
|
|
; SDAG-GFX942-NEXT: s_mov_b32 s3, s12
|
|
; SDAG-GFX942-NEXT: s_or_b64 s[8:9], s[2:3], s[12:13]
|
|
; SDAG-GFX942-NEXT: v_mov_b32_e32 v4, s0
|
|
; SDAG-GFX942-NEXT: buffer_load_dwordx4 v[0:3], v4, s[8:11], 0 offen
|
|
; SDAG-GFX942-NEXT: s_load_dword s13, s[4:5], 0x54
|
|
; SDAG-GFX942-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x44
|
|
; SDAG-GFX942-NEXT: s_mov_b32 s5, s12
|
|
; SDAG-GFX942-NEXT: s_waitcnt lgkmcnt(0)
|
|
; SDAG-GFX942-NEXT: s_mov_b32 s4, s3
|
|
; SDAG-GFX942-NEXT: s_or_b64 s[6:7], s[4:5], s[12:13]
|
|
; SDAG-GFX942-NEXT: s_mov_b32 s13, s2
|
|
; SDAG-GFX942-NEXT: s_mov_b32 s2, s1
|
|
; SDAG-GFX942-NEXT: s_mov_b32 s3, s12
|
|
; SDAG-GFX942-NEXT: s_or_b64 s[4:5], s[2:3], s[12:13]
|
|
; SDAG-GFX942-NEXT: v_mov_b32_e32 v5, s0
|
|
; SDAG-GFX942-NEXT: s_waitcnt vmcnt(0)
|
|
; SDAG-GFX942-NEXT: buffer_store_dwordx4 v[0:3], v5, s[4:7], 0 offen
|
|
; SDAG-GFX942-NEXT: buffer_load_dwordx4 v[0:3], v4, s[8:11], 0 offen offset:16
|
|
; SDAG-GFX942-NEXT: s_waitcnt vmcnt(0)
|
|
; SDAG-GFX942-NEXT: buffer_store_dwordx4 v[0:3], v5, s[4:7], 0 offen offset:16
|
|
; SDAG-GFX942-NEXT: s_endpgm
|
|
;
|
|
; SDAG-GFX1100-LABEL: memcpy_known_small:
|
|
; SDAG-GFX1100: ; %bb.0:
|
|
; SDAG-GFX1100-NEXT: s_clause 0x1
|
|
; SDAG-GFX1100-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
|
|
; SDAG-GFX1100-NEXT: s_load_b32 s13, s[4:5], 0x34
|
|
; SDAG-GFX1100-NEXT: s_mov_b32 s12, 0
|
|
; SDAG-GFX1100-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
|
|
; SDAG-GFX1100-NEXT: s_mov_b32 s7, s12
|
|
; SDAG-GFX1100-NEXT: s_mov_b32 s9, s12
|
|
; SDAG-GFX1100-NEXT: s_waitcnt lgkmcnt(0)
|
|
; SDAG-GFX1100-NEXT: s_mov_b32 s6, s3
|
|
; SDAG-GFX1100-NEXT: v_mov_b32_e32 v4, s0
|
|
; SDAG-GFX1100-NEXT: s_mov_b32 s8, s1
|
|
; SDAG-GFX1100-NEXT: s_or_b64 s[10:11], s[6:7], s[12:13]
|
|
; SDAG-GFX1100-NEXT: s_mov_b32 s13, s2
|
|
; SDAG-GFX1100-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
|
|
; SDAG-GFX1100-NEXT: s_or_b64 s[8:9], s[8:9], s[12:13]
|
|
; SDAG-GFX1100-NEXT: buffer_load_b128 v[0:3], v4, s[8:11], 0 offen
|
|
; SDAG-GFX1100-NEXT: s_clause 0x1
|
|
; SDAG-GFX1100-NEXT: s_load_b32 s13, s[4:5], 0x54
|
|
; SDAG-GFX1100-NEXT: s_load_b128 s[0:3], s[4:5], 0x44
|
|
; SDAG-GFX1100-NEXT: s_mov_b32 s5, s12
|
|
; SDAG-GFX1100-NEXT: s_waitcnt lgkmcnt(0)
|
|
; SDAG-GFX1100-NEXT: v_mov_b32_e32 v5, s0
|
|
; SDAG-GFX1100-NEXT: s_mov_b32 s4, s3
|
|
; SDAG-GFX1100-NEXT: s_mov_b32 s3, s12
|
|
; SDAG-GFX1100-NEXT: s_or_b64 s[6:7], s[4:5], s[12:13]
|
|
; SDAG-GFX1100-NEXT: s_mov_b32 s13, s2
|
|
; SDAG-GFX1100-NEXT: s_mov_b32 s2, s1
|
|
; SDAG-GFX1100-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
|
|
; SDAG-GFX1100-NEXT: s_or_b64 s[4:5], s[2:3], s[12:13]
|
|
; SDAG-GFX1100-NEXT: s_waitcnt vmcnt(0)
|
|
; SDAG-GFX1100-NEXT: buffer_store_b128 v[0:3], v5, s[4:7], 0 offen
|
|
; SDAG-GFX1100-NEXT: buffer_load_b128 v[0:3], v4, s[8:11], 0 offen offset:16
|
|
; SDAG-GFX1100-NEXT: s_waitcnt vmcnt(0)
|
|
; SDAG-GFX1100-NEXT: buffer_store_b128 v[0:3], v5, s[4:7], 0 offen offset:16
|
|
; SDAG-GFX1100-NEXT: s_endpgm
|
|
;
|
|
; GISEL-GFX942-LABEL: memcpy_known_small:
|
|
; GISEL-GFX942: ; %bb.0:
|
|
; GISEL-GFX942-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
|
|
; GISEL-GFX942-NEXT: s_load_dword s11, s[4:5], 0x34
|
|
; GISEL-GFX942-NEXT: s_mov_b32 s7, 0
|
|
; GISEL-GFX942-NEXT: s_mov_b32 s8, s7
|
|
; GISEL-GFX942-NEXT: s_mov_b32 s10, s7
|
|
; GISEL-GFX942-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GISEL-GFX942-NEXT: s_mov_b32 s6, s1
|
|
; GISEL-GFX942-NEXT: s_mov_b32 s9, s2
|
|
; GISEL-GFX942-NEXT: s_or_b64 s[8:9], s[6:7], s[8:9]
|
|
; GISEL-GFX942-NEXT: s_mov_b32 s6, s3
|
|
; GISEL-GFX942-NEXT: s_or_b64 s[10:11], s[6:7], s[10:11]
|
|
; GISEL-GFX942-NEXT: v_mov_b32_e32 v4, s0
|
|
; GISEL-GFX942-NEXT: buffer_load_dwordx4 v[0:3], v4, s[8:11], 0 offen
|
|
; GISEL-GFX942-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x44
|
|
; GISEL-GFX942-NEXT: s_load_dword s13, s[4:5], 0x54
|
|
; GISEL-GFX942-NEXT: s_mov_b32 s4, s7
|
|
; GISEL-GFX942-NEXT: s_mov_b32 s12, s7
|
|
; GISEL-GFX942-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GISEL-GFX942-NEXT: s_mov_b32 s6, s1
|
|
; GISEL-GFX942-NEXT: s_mov_b32 s5, s2
|
|
; GISEL-GFX942-NEXT: s_or_b64 s[4:5], s[6:7], s[4:5]
|
|
; GISEL-GFX942-NEXT: s_mov_b32 s6, s3
|
|
; GISEL-GFX942-NEXT: s_or_b64 s[6:7], s[6:7], s[12:13]
|
|
; GISEL-GFX942-NEXT: v_mov_b32_e32 v5, s0
|
|
; GISEL-GFX942-NEXT: s_waitcnt vmcnt(0)
|
|
; GISEL-GFX942-NEXT: buffer_store_dwordx4 v[0:3], v5, s[4:7], 0 offen
|
|
; GISEL-GFX942-NEXT: buffer_load_dwordx4 v[0:3], v4, s[8:11], 0 offen offset:16
|
|
; GISEL-GFX942-NEXT: s_waitcnt vmcnt(0)
|
|
; GISEL-GFX942-NEXT: buffer_store_dwordx4 v[0:3], v5, s[4:7], 0 offen offset:16
|
|
; GISEL-GFX942-NEXT: s_endpgm
|
|
;
|
|
; GISEL-GFX1100-LABEL: memcpy_known_small:
|
|
; GISEL-GFX1100: ; %bb.0:
|
|
; GISEL-GFX1100-NEXT: s_clause 0x1
|
|
; GISEL-GFX1100-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
|
|
; GISEL-GFX1100-NEXT: s_load_b32 s7, s[4:5], 0x34
|
|
; GISEL-GFX1100-NEXT: s_mov_b32 s13, 0
|
|
; GISEL-GFX1100-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
|
|
; GISEL-GFX1100-NEXT: s_mov_b32 s8, s13
|
|
; GISEL-GFX1100-NEXT: s_mov_b32 s6, s13
|
|
; GISEL-GFX1100-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GISEL-GFX1100-NEXT: s_mov_b32 s12, s1
|
|
; GISEL-GFX1100-NEXT: s_mov_b32 s9, s2
|
|
; GISEL-GFX1100-NEXT: v_mov_b32_e32 v4, s0
|
|
; GISEL-GFX1100-NEXT: s_or_b64 s[0:1], s[12:13], s[8:9]
|
|
; GISEL-GFX1100-NEXT: s_mov_b32 s12, s3
|
|
; GISEL-GFX1100-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
|
|
; GISEL-GFX1100-NEXT: s_or_b64 s[2:3], s[12:13], s[6:7]
|
|
; GISEL-GFX1100-NEXT: buffer_load_b128 v[0:3], v4, s[0:3], 0 offen
|
|
; GISEL-GFX1100-NEXT: s_clause 0x1
|
|
; GISEL-GFX1100-NEXT: s_load_b128 s[8:11], s[4:5], 0x44
|
|
; GISEL-GFX1100-NEXT: s_load_b32 s7, s[4:5], 0x54
|
|
; GISEL-GFX1100-NEXT: s_mov_b32 s4, s13
|
|
; GISEL-GFX1100-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GISEL-GFX1100-NEXT: v_mov_b32_e32 v5, s8
|
|
; GISEL-GFX1100-NEXT: s_mov_b32 s12, s9
|
|
; GISEL-GFX1100-NEXT: s_mov_b32 s5, s10
|
|
; GISEL-GFX1100-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1)
|
|
; GISEL-GFX1100-NEXT: s_or_b64 s[4:5], s[12:13], s[4:5]
|
|
; GISEL-GFX1100-NEXT: s_mov_b32 s12, s11
|
|
; GISEL-GFX1100-NEXT: s_or_b64 s[6:7], s[12:13], s[6:7]
|
|
; GISEL-GFX1100-NEXT: s_waitcnt vmcnt(0)
|
|
; GISEL-GFX1100-NEXT: buffer_store_b128 v[0:3], v5, s[4:7], 0 offen
|
|
; GISEL-GFX1100-NEXT: buffer_load_b128 v[0:3], v4, s[0:3], 0 offen offset:16
|
|
; GISEL-GFX1100-NEXT: s_waitcnt vmcnt(0)
|
|
; GISEL-GFX1100-NEXT: buffer_store_b128 v[0:3], v5, s[4:7], 0 offen offset:16
|
|
; GISEL-GFX1100-NEXT: s_endpgm
|
|
call void @llvm.memcpy.p7.p7.i32(ptr addrspace(7) %dst, ptr addrspace(7) %src, i32 32, i1 false)
|
|
ret void
|
|
}
|