So far, memcpy with known size, memcpy with unknown size, memmove with known size, and memmove with unknown size have individual optimized loop lowering implementations, while memset and memset.pattern use an unoptimized loop lowering. This patch extracts the parts of the memcpy lowerings (for known and unknown sizes) that generate the control flow for the loop expansion into an `insertLoopExpansion` function. The `createMemCpyLoop(Unk|K)nownSize` functions then only collect the necessary arguments for `insertLoopExpansion`, call it, and fill the generated loop basic blocks. The immediate benefit of this is that logic from the two memcpy lowerings is deduplicated. Moreover, it enables follow-up patches that will use `insertLoopExpansion` to optimize the memset and memset.pattern implementations similarly to memcpy, since they can use the exact same control flow patterns. The test changes are due to more consistent and useful basic block names in the loop expansion and an improvement in basic block ordering: previously, the basic block that determines if the residual loop is executed would be put at the end of the function, now it is put before the residual loop body. Otherwise, the generated code should be equivalent. This patch doesn't affect memmove; deduplicating its logic would also be nice, but to extract all CF generation from the memmove lowering, `insertLoopExpansion` would need to be able to also create code that iterates backwards over the argument buffers. That would make `insertLoopExpansion` a lot more complex for a code path that's only used for memmove, so it's probably not worth refactoring. For SWDEV-543208.
1252 lines
77 KiB
LLVM
1252 lines
77 KiB
LLVM
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
|
|
; RUN: llc -mtriple=amdgcn -mcpu=gfx942 < %s | FileCheck -check-prefix=SDAG-GFX942 %s
|
|
; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 < %s | FileCheck -check-prefix=SDAG-GFX1100 %s
|
|
; RUN: llc -mtriple=amdgcn -mcpu=gfx942 -global-isel=1 -global-isel-abort=2 < %s | FileCheck -check-prefix=GISEL-GFX942 %s
|
|
; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -global-isel=1 -global-isel-abort=2 < %s | FileCheck -check-prefix=GISEL-GFX1100 %s
|
|
|
|
; Note: if you're adding tests here, also add them to
|
|
; lower-buffer-fat-pointers-mem-transfer.ll to verify the IR produced by
|
|
; the lowering.
|
|
;
|
|
; This file is a sanity check to make sure that the code generated
|
|
; for buffer-related memcpy() calls turns into something reasonable in
|
|
; the backend, despite the wide intermediate vectors
|
|
|
|
target triple = "amdgcn--"
|
|
|
|
;; memcpy
|
|
|
|
declare void @llvm.memcpy.p7.p7.i32(ptr addrspace(7), ptr addrspace(7), i32, i1)
|
|
|
|
define amdgpu_kernel void @memcpy_known(ptr addrspace(7) %src, ptr addrspace(7) %dst) {
|
|
; SDAG-LABEL: memcpy_known:
|
|
; SDAG: ; %bb.0:
|
|
; SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; SDAG-NEXT: s_mov_b32 s7, s24
|
|
; SDAG-NEXT: s_mov_b32 s6, s23
|
|
; SDAG-NEXT: s_mov_b32 s5, s22
|
|
; SDAG-NEXT: s_mov_b32 s4, s21
|
|
; SDAG-NEXT: s_mov_b32 s8, 0
|
|
; SDAG-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:60 ; 4-byte Folded Spill
|
|
; SDAG-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:56 ; 4-byte Folded Spill
|
|
; SDAG-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:52 ; 4-byte Folded Spill
|
|
; SDAG-NEXT: buffer_store_dword v43, off, s[0:3], s32 offset:48 ; 4-byte Folded Spill
|
|
; SDAG-NEXT: buffer_store_dword v44, off, s[0:3], s32 offset:44 ; 4-byte Folded Spill
|
|
; SDAG-NEXT: buffer_store_dword v45, off, s[0:3], s32 offset:40 ; 4-byte Folded Spill
|
|
; SDAG-NEXT: buffer_store_dword v46, off, s[0:3], s32 offset:36 ; 4-byte Folded Spill
|
|
; SDAG-NEXT: buffer_store_dword v47, off, s[0:3], s32 offset:32 ; 4-byte Folded Spill
|
|
; SDAG-NEXT: buffer_store_dword v56, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill
|
|
; SDAG-NEXT: buffer_store_dword v57, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill
|
|
; SDAG-NEXT: buffer_store_dword v58, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill
|
|
; SDAG-NEXT: buffer_store_dword v59, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill
|
|
; SDAG-NEXT: buffer_store_dword v60, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill
|
|
; SDAG-NEXT: buffer_store_dword v61, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill
|
|
; SDAG-NEXT: buffer_store_dword v62, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill
|
|
; SDAG-NEXT: buffer_store_dword v63, off, s[0:3], s32 ; 4-byte Folded Spill
|
|
; SDAG-NEXT: .LBB0_1: ; %load-store-loop
|
|
; SDAG-NEXT: ; =>This Inner Loop Header: Depth=1
|
|
; SDAG-NEXT: s_add_i32 s9, s20, s8
|
|
; SDAG-NEXT: v_mov_b32_e32 v60, s9
|
|
; SDAG-NEXT: buffer_load_dwordx4 v[0:3], v60, s[16:19], 0 offen
|
|
; SDAG-NEXT: s_add_i32 s9, s25, s8
|
|
; SDAG-NEXT: s_addk_i32 s8, 0x100
|
|
; SDAG-NEXT: s_cmpk_lt_u32 s8, 0x2000
|
|
; SDAG-NEXT: s_waitcnt vmcnt(0)
|
|
; SDAG-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:64 ; 4-byte Folded Spill
|
|
; SDAG-NEXT: s_nop 0
|
|
; SDAG-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:68 ; 4-byte Folded Spill
|
|
; SDAG-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:72 ; 4-byte Folded Spill
|
|
; SDAG-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:76 ; 4-byte Folded Spill
|
|
; SDAG-NEXT: buffer_load_dwordx4 v[0:3], v60, s[16:19], 0 offen offset:16
|
|
; SDAG-NEXT: s_waitcnt vmcnt(0)
|
|
; SDAG-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:80 ; 4-byte Folded Spill
|
|
; SDAG-NEXT: s_nop 0
|
|
; SDAG-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:84 ; 4-byte Folded Spill
|
|
; SDAG-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:88 ; 4-byte Folded Spill
|
|
; SDAG-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:92 ; 4-byte Folded Spill
|
|
; SDAG-NEXT: buffer_load_dwordx4 v[8:11], v60, s[16:19], 0 offen offset:32
|
|
; SDAG-NEXT: buffer_load_dwordx4 v[12:15], v60, s[16:19], 0 offen offset:48
|
|
; SDAG-NEXT: buffer_load_dwordx4 v[16:19], v60, s[16:19], 0 offen offset:64
|
|
; SDAG-NEXT: buffer_load_dwordx4 v[20:23], v60, s[16:19], 0 offen offset:80
|
|
; SDAG-NEXT: buffer_load_dwordx4 v[24:27], v60, s[16:19], 0 offen offset:96
|
|
; SDAG-NEXT: buffer_load_dwordx4 v[28:31], v60, s[16:19], 0 offen offset:112
|
|
; SDAG-NEXT: buffer_load_dwordx4 v[32:35], v60, s[16:19], 0 offen offset:128
|
|
; SDAG-NEXT: buffer_load_dwordx4 v[36:39], v60, s[16:19], 0 offen offset:144
|
|
; SDAG-NEXT: buffer_load_dwordx4 v[48:51], v60, s[16:19], 0 offen offset:160
|
|
; SDAG-NEXT: buffer_load_dwordx4 v[52:55], v60, s[16:19], 0 offen offset:176
|
|
; SDAG-NEXT: buffer_load_dwordx4 v[40:43], v60, s[16:19], 0 offen offset:192
|
|
; SDAG-NEXT: buffer_load_dwordx4 v[44:47], v60, s[16:19], 0 offen offset:208
|
|
; SDAG-NEXT: buffer_load_dwordx4 v[56:59], v60, s[16:19], 0 offen offset:224
|
|
; SDAG-NEXT: s_nop 0
|
|
; SDAG-NEXT: buffer_load_dwordx4 v[60:63], v60, s[16:19], 0 offen offset:240
|
|
; SDAG-NEXT: s_nop 0
|
|
; SDAG-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:64 ; 4-byte Folded Reload
|
|
; SDAG-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:68 ; 4-byte Folded Reload
|
|
; SDAG-NEXT: buffer_load_dword v3, off, s[0:3], s32 offset:72 ; 4-byte Folded Reload
|
|
; SDAG-NEXT: buffer_load_dword v4, off, s[0:3], s32 offset:76 ; 4-byte Folded Reload
|
|
; SDAG-NEXT: v_mov_b32_e32 v0, s9
|
|
; SDAG-NEXT: s_waitcnt vmcnt(0)
|
|
; SDAG-NEXT: buffer_store_dwordx4 v[1:4], v0, s[4:7], 0 offen
|
|
; SDAG-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:80 ; 4-byte Folded Reload
|
|
; SDAG-NEXT: s_nop 0
|
|
; SDAG-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:84 ; 4-byte Folded Reload
|
|
; SDAG-NEXT: buffer_load_dword v3, off, s[0:3], s32 offset:88 ; 4-byte Folded Reload
|
|
; SDAG-NEXT: buffer_load_dword v4, off, s[0:3], s32 offset:92 ; 4-byte Folded Reload
|
|
; SDAG-NEXT: s_waitcnt vmcnt(0)
|
|
; SDAG-NEXT: buffer_store_dwordx4 v[1:4], v0, s[4:7], 0 offen offset:16
|
|
; SDAG-NEXT: buffer_store_dwordx4 v[8:11], v0, s[4:7], 0 offen offset:32
|
|
; SDAG-NEXT: buffer_store_dwordx4 v[12:15], v0, s[4:7], 0 offen offset:48
|
|
; SDAG-NEXT: buffer_store_dwordx4 v[16:19], v0, s[4:7], 0 offen offset:64
|
|
; SDAG-NEXT: buffer_store_dwordx4 v[20:23], v0, s[4:7], 0 offen offset:80
|
|
; SDAG-NEXT: buffer_store_dwordx4 v[24:27], v0, s[4:7], 0 offen offset:96
|
|
; SDAG-NEXT: buffer_store_dwordx4 v[28:31], v0, s[4:7], 0 offen offset:112
|
|
; SDAG-NEXT: buffer_store_dwordx4 v[32:35], v0, s[4:7], 0 offen offset:128
|
|
; SDAG-NEXT: buffer_store_dwordx4 v[36:39], v0, s[4:7], 0 offen offset:144
|
|
; SDAG-NEXT: buffer_store_dwordx4 v[48:51], v0, s[4:7], 0 offen offset:160
|
|
; SDAG-NEXT: buffer_store_dwordx4 v[52:55], v0, s[4:7], 0 offen offset:176
|
|
; SDAG-NEXT: buffer_store_dwordx4 v[40:43], v0, s[4:7], 0 offen offset:192
|
|
; SDAG-NEXT: buffer_store_dwordx4 v[44:47], v0, s[4:7], 0 offen offset:208
|
|
; SDAG-NEXT: buffer_store_dwordx4 v[56:59], v0, s[4:7], 0 offen offset:224
|
|
; SDAG-NEXT: buffer_store_dwordx4 v[60:63], v0, s[4:7], 0 offen offset:240
|
|
; SDAG-NEXT: s_cbranch_scc1 .LBB0_1
|
|
; SDAG-NEXT: ; %bb.2: ; %memcpy-split
|
|
; SDAG-NEXT: buffer_load_dword v63, off, s[0:3], s32 ; 4-byte Folded Reload
|
|
; SDAG-NEXT: buffer_load_dword v62, off, s[0:3], s32 offset:4 ; 4-byte Folded Reload
|
|
; SDAG-NEXT: buffer_load_dword v61, off, s[0:3], s32 offset:8 ; 4-byte Folded Reload
|
|
; SDAG-NEXT: buffer_load_dword v60, off, s[0:3], s32 offset:12 ; 4-byte Folded Reload
|
|
; SDAG-NEXT: buffer_load_dword v59, off, s[0:3], s32 offset:16 ; 4-byte Folded Reload
|
|
; SDAG-NEXT: buffer_load_dword v58, off, s[0:3], s32 offset:20 ; 4-byte Folded Reload
|
|
; SDAG-NEXT: buffer_load_dword v57, off, s[0:3], s32 offset:24 ; 4-byte Folded Reload
|
|
; SDAG-NEXT: buffer_load_dword v56, off, s[0:3], s32 offset:28 ; 4-byte Folded Reload
|
|
; SDAG-NEXT: buffer_load_dword v47, off, s[0:3], s32 offset:32 ; 4-byte Folded Reload
|
|
; SDAG-NEXT: buffer_load_dword v46, off, s[0:3], s32 offset:36 ; 4-byte Folded Reload
|
|
; SDAG-NEXT: buffer_load_dword v45, off, s[0:3], s32 offset:40 ; 4-byte Folded Reload
|
|
; SDAG-NEXT: buffer_load_dword v44, off, s[0:3], s32 offset:44 ; 4-byte Folded Reload
|
|
; SDAG-NEXT: buffer_load_dword v43, off, s[0:3], s32 offset:48 ; 4-byte Folded Reload
|
|
; SDAG-NEXT: buffer_load_dword v42, off, s[0:3], s32 offset:52 ; 4-byte Folded Reload
|
|
; SDAG-NEXT: buffer_load_dword v41, off, s[0:3], s32 offset:56 ; 4-byte Folded Reload
|
|
; SDAG-NEXT: buffer_load_dword v40, off, s[0:3], s32 offset:60 ; 4-byte Folded Reload
|
|
; SDAG-NEXT: s_waitcnt vmcnt(0)
|
|
; SDAG-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GISEL-LABEL: memcpy_known:
|
|
; GISEL: ; %bb.0:
|
|
; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GISEL-NEXT: s_mov_b32 s8, 0
|
|
; GISEL-NEXT: s_mov_b32 s4, s21
|
|
; GISEL-NEXT: s_mov_b32 s5, s22
|
|
; GISEL-NEXT: s_mov_b32 s6, s23
|
|
; GISEL-NEXT: s_mov_b32 s7, s24
|
|
; GISEL-NEXT: v_mov_b32_e32 v0, 0x2000
|
|
; GISEL-NEXT: v_mov_b32_e32 v1, s8
|
|
; GISEL-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:56 ; 4-byte Folded Spill
|
|
; GISEL-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:52 ; 4-byte Folded Spill
|
|
; GISEL-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:48 ; 4-byte Folded Spill
|
|
; GISEL-NEXT: buffer_store_dword v43, off, s[0:3], s32 offset:44 ; 4-byte Folded Spill
|
|
; GISEL-NEXT: buffer_store_dword v44, off, s[0:3], s32 offset:40 ; 4-byte Folded Spill
|
|
; GISEL-NEXT: buffer_store_dword v45, off, s[0:3], s32 offset:36 ; 4-byte Folded Spill
|
|
; GISEL-NEXT: buffer_store_dword v46, off, s[0:3], s32 offset:32 ; 4-byte Folded Spill
|
|
; GISEL-NEXT: buffer_store_dword v56, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill
|
|
; GISEL-NEXT: buffer_store_dword v57, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill
|
|
; GISEL-NEXT: buffer_store_dword v58, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill
|
|
; GISEL-NEXT: buffer_store_dword v59, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill
|
|
; GISEL-NEXT: buffer_store_dword v60, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill
|
|
; GISEL-NEXT: buffer_store_dword v61, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill
|
|
; GISEL-NEXT: buffer_store_dword v62, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill
|
|
; GISEL-NEXT: buffer_store_dword v63, off, s[0:3], s32 ; 4-byte Folded Spill
|
|
; GISEL-NEXT: .LBB0_1: ; %load-store-loop
|
|
; GISEL-NEXT: ; =>This Inner Loop Header: Depth=1
|
|
; GISEL-NEXT: v_add_u32_e32 v46, s20, v1
|
|
; GISEL-NEXT: buffer_load_dwordx4 v[2:5], v46, s[16:19], 0 offen
|
|
; GISEL-NEXT: s_waitcnt vmcnt(0)
|
|
; GISEL-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:76 ; 4-byte Folded Spill
|
|
; GISEL-NEXT: s_nop 0
|
|
; GISEL-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:80 ; 4-byte Folded Spill
|
|
; GISEL-NEXT: buffer_store_dword v4, off, s[0:3], s32 offset:84 ; 4-byte Folded Spill
|
|
; GISEL-NEXT: buffer_store_dword v5, off, s[0:3], s32 offset:88 ; 4-byte Folded Spill
|
|
; GISEL-NEXT: buffer_load_dwordx4 v[6:9], v46, s[16:19], 0 offen offset:16
|
|
; GISEL-NEXT: buffer_load_dwordx4 v[10:13], v46, s[16:19], 0 offen offset:32
|
|
; GISEL-NEXT: buffer_load_dwordx4 v[14:17], v46, s[16:19], 0 offen offset:48
|
|
; GISEL-NEXT: buffer_load_dwordx4 v[18:21], v46, s[16:19], 0 offen offset:64
|
|
; GISEL-NEXT: buffer_load_dwordx4 v[22:25], v46, s[16:19], 0 offen offset:80
|
|
; GISEL-NEXT: buffer_load_dwordx4 v[26:29], v46, s[16:19], 0 offen offset:96
|
|
; GISEL-NEXT: buffer_load_dwordx4 v[30:33], v46, s[16:19], 0 offen offset:112
|
|
; GISEL-NEXT: buffer_load_dwordx4 v[34:37], v46, s[16:19], 0 offen offset:128
|
|
; GISEL-NEXT: buffer_load_dwordx4 v[48:51], v46, s[16:19], 0 offen offset:144
|
|
; GISEL-NEXT: buffer_load_dwordx4 v[52:55], v46, s[16:19], 0 offen offset:160
|
|
; GISEL-NEXT: buffer_load_dwordx4 v[38:41], v46, s[16:19], 0 offen offset:176
|
|
; GISEL-NEXT: buffer_load_dwordx4 v[42:45], v46, s[16:19], 0 offen offset:192
|
|
; GISEL-NEXT: buffer_load_dwordx4 v[56:59], v46, s[16:19], 0 offen offset:208
|
|
; GISEL-NEXT: buffer_load_dwordx4 v[60:63], v46, s[16:19], 0 offen offset:224
|
|
; GISEL-NEXT: buffer_load_dwordx4 v[2:5], v46, s[16:19], 0 offen offset:240
|
|
; GISEL-NEXT: v_add_u32_e32 v46, s25, v1
|
|
; GISEL-NEXT: v_add_u32_e32 v1, 0x100, v1
|
|
; GISEL-NEXT: v_cmp_lt_u32_e32 vcc, v1, v0
|
|
; GISEL-NEXT: s_waitcnt vmcnt(0)
|
|
; GISEL-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:60 ; 4-byte Folded Spill
|
|
; GISEL-NEXT: s_nop 0
|
|
; GISEL-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:64 ; 4-byte Folded Spill
|
|
; GISEL-NEXT: buffer_store_dword v4, off, s[0:3], s32 offset:68 ; 4-byte Folded Spill
|
|
; GISEL-NEXT: buffer_store_dword v5, off, s[0:3], s32 offset:72 ; 4-byte Folded Spill
|
|
; GISEL-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:76 ; 4-byte Folded Reload
|
|
; GISEL-NEXT: s_nop 0
|
|
; GISEL-NEXT: buffer_load_dword v3, off, s[0:3], s32 offset:80 ; 4-byte Folded Reload
|
|
; GISEL-NEXT: buffer_load_dword v4, off, s[0:3], s32 offset:84 ; 4-byte Folded Reload
|
|
; GISEL-NEXT: buffer_load_dword v5, off, s[0:3], s32 offset:88 ; 4-byte Folded Reload
|
|
; GISEL-NEXT: s_waitcnt vmcnt(0)
|
|
; GISEL-NEXT: buffer_store_dwordx4 v[2:5], v46, s[4:7], 0 offen
|
|
; GISEL-NEXT: buffer_store_dwordx4 v[6:9], v46, s[4:7], 0 offen offset:16
|
|
; GISEL-NEXT: buffer_store_dwordx4 v[10:13], v46, s[4:7], 0 offen offset:32
|
|
; GISEL-NEXT: buffer_store_dwordx4 v[14:17], v46, s[4:7], 0 offen offset:48
|
|
; GISEL-NEXT: buffer_store_dwordx4 v[18:21], v46, s[4:7], 0 offen offset:64
|
|
; GISEL-NEXT: buffer_store_dwordx4 v[22:25], v46, s[4:7], 0 offen offset:80
|
|
; GISEL-NEXT: buffer_store_dwordx4 v[26:29], v46, s[4:7], 0 offen offset:96
|
|
; GISEL-NEXT: buffer_store_dwordx4 v[30:33], v46, s[4:7], 0 offen offset:112
|
|
; GISEL-NEXT: buffer_store_dwordx4 v[34:37], v46, s[4:7], 0 offen offset:128
|
|
; GISEL-NEXT: buffer_store_dwordx4 v[48:51], v46, s[4:7], 0 offen offset:144
|
|
; GISEL-NEXT: buffer_store_dwordx4 v[52:55], v46, s[4:7], 0 offen offset:160
|
|
; GISEL-NEXT: buffer_store_dwordx4 v[38:41], v46, s[4:7], 0 offen offset:176
|
|
; GISEL-NEXT: buffer_store_dwordx4 v[42:45], v46, s[4:7], 0 offen offset:192
|
|
; GISEL-NEXT: buffer_store_dwordx4 v[56:59], v46, s[4:7], 0 offen offset:208
|
|
; GISEL-NEXT: buffer_store_dwordx4 v[60:63], v46, s[4:7], 0 offen offset:224
|
|
; GISEL-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:60 ; 4-byte Folded Reload
|
|
; GISEL-NEXT: buffer_load_dword v3, off, s[0:3], s32 offset:64 ; 4-byte Folded Reload
|
|
; GISEL-NEXT: buffer_load_dword v4, off, s[0:3], s32 offset:68 ; 4-byte Folded Reload
|
|
; GISEL-NEXT: buffer_load_dword v5, off, s[0:3], s32 offset:72 ; 4-byte Folded Reload
|
|
; GISEL-NEXT: s_waitcnt vmcnt(0)
|
|
; GISEL-NEXT: buffer_store_dwordx4 v[2:5], v46, s[4:7], 0 offen offset:240
|
|
; GISEL-NEXT: s_cbranch_vccnz .LBB0_1
|
|
; GISEL-NEXT: ; %bb.2: ; %memcpy-split
|
|
; GISEL-NEXT: buffer_load_dword v63, off, s[0:3], s32 ; 4-byte Folded Reload
|
|
; GISEL-NEXT: buffer_load_dword v62, off, s[0:3], s32 offset:4 ; 4-byte Folded Reload
|
|
; GISEL-NEXT: buffer_load_dword v61, off, s[0:3], s32 offset:8 ; 4-byte Folded Reload
|
|
; GISEL-NEXT: buffer_load_dword v60, off, s[0:3], s32 offset:12 ; 4-byte Folded Reload
|
|
; GISEL-NEXT: buffer_load_dword v59, off, s[0:3], s32 offset:16 ; 4-byte Folded Reload
|
|
; GISEL-NEXT: buffer_load_dword v58, off, s[0:3], s32 offset:20 ; 4-byte Folded Reload
|
|
; GISEL-NEXT: buffer_load_dword v57, off, s[0:3], s32 offset:24 ; 4-byte Folded Reload
|
|
; GISEL-NEXT: buffer_load_dword v56, off, s[0:3], s32 offset:28 ; 4-byte Folded Reload
|
|
; GISEL-NEXT: buffer_load_dword v46, off, s[0:3], s32 offset:32 ; 4-byte Folded Reload
|
|
; GISEL-NEXT: buffer_load_dword v45, off, s[0:3], s32 offset:36 ; 4-byte Folded Reload
|
|
; GISEL-NEXT: buffer_load_dword v44, off, s[0:3], s32 offset:40 ; 4-byte Folded Reload
|
|
; GISEL-NEXT: buffer_load_dword v43, off, s[0:3], s32 offset:44 ; 4-byte Folded Reload
|
|
; GISEL-NEXT: buffer_load_dword v42, off, s[0:3], s32 offset:48 ; 4-byte Folded Reload
|
|
; GISEL-NEXT: buffer_load_dword v41, off, s[0:3], s32 offset:52 ; 4-byte Folded Reload
|
|
; GISEL-NEXT: buffer_load_dword v40, off, s[0:3], s32 offset:56 ; 4-byte Folded Reload
|
|
; GISEL-NEXT: s_waitcnt vmcnt(0)
|
|
; GISEL-NEXT: s_setpc_b64 s[30:31]
|
|
; SDAG-GFX942-LABEL: memcpy_known:
|
|
; SDAG-GFX942: ; %bb.0:
|
|
; SDAG-GFX942-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
|
|
; SDAG-GFX942-NEXT: s_load_dword s17, s[4:5], 0x34
|
|
; SDAG-GFX942-NEXT: s_load_dwordx4 s[8:11], s[4:5], 0x44
|
|
; SDAG-GFX942-NEXT: s_load_dword s12, s[4:5], 0x54
|
|
; SDAG-GFX942-NEXT: s_mov_b32 s16, 0
|
|
; SDAG-GFX942-NEXT: s_mov_b32 s5, s16
|
|
; SDAG-GFX942-NEXT: s_waitcnt lgkmcnt(0)
|
|
; SDAG-GFX942-NEXT: s_mov_b32 s4, s3
|
|
; SDAG-GFX942-NEXT: s_or_b64 s[6:7], s[4:5], s[16:17]
|
|
; SDAG-GFX942-NEXT: s_mov_b32 s17, s2
|
|
; SDAG-GFX942-NEXT: s_mov_b32 s2, s1
|
|
; SDAG-GFX942-NEXT: s_mov_b32 s3, s16
|
|
; SDAG-GFX942-NEXT: s_or_b64 s[4:5], s[2:3], s[16:17]
|
|
; SDAG-GFX942-NEXT: s_mov_b32 s17, s12
|
|
; SDAG-GFX942-NEXT: s_mov_b32 s2, s11
|
|
; SDAG-GFX942-NEXT: s_or_b64 s[14:15], s[2:3], s[16:17]
|
|
; SDAG-GFX942-NEXT: s_mov_b32 s17, s10
|
|
; SDAG-GFX942-NEXT: s_mov_b32 s2, s9
|
|
; SDAG-GFX942-NEXT: s_or_b64 s[12:13], s[2:3], s[16:17]
|
|
; SDAG-GFX942-NEXT: .LBB0_1: ; %static-memcpy-expansion-main-body
|
|
; SDAG-GFX942-NEXT: ; =>This Inner Loop Header: Depth=1
|
|
; SDAG-GFX942-NEXT: s_add_i32 s1, s0, s16
|
|
; SDAG-GFX942-NEXT: v_mov_b32_e32 v0, s1
|
|
; SDAG-GFX942-NEXT: buffer_load_dwordx4 v[2:5], v0, s[4:7], 0 offen
|
|
; SDAG-GFX942-NEXT: buffer_load_dwordx4 v[6:9], v0, s[4:7], 0 offen offset:16
|
|
; SDAG-GFX942-NEXT: buffer_load_dwordx4 v[10:13], v0, s[4:7], 0 offen offset:32
|
|
; SDAG-GFX942-NEXT: buffer_load_dwordx4 v[14:17], v0, s[4:7], 0 offen offset:48
|
|
; SDAG-GFX942-NEXT: buffer_load_dwordx4 v[18:21], v0, s[4:7], 0 offen offset:64
|
|
; SDAG-GFX942-NEXT: buffer_load_dwordx4 v[22:25], v0, s[4:7], 0 offen offset:80
|
|
; SDAG-GFX942-NEXT: buffer_load_dwordx4 v[26:29], v0, s[4:7], 0 offen offset:96
|
|
; SDAG-GFX942-NEXT: buffer_load_dwordx4 v[30:33], v0, s[4:7], 0 offen offset:112
|
|
; SDAG-GFX942-NEXT: buffer_load_dwordx4 v[34:37], v0, s[4:7], 0 offen offset:128
|
|
; SDAG-GFX942-NEXT: buffer_load_dwordx4 v[38:41], v0, s[4:7], 0 offen offset:144
|
|
; SDAG-GFX942-NEXT: buffer_load_dwordx4 v[42:45], v0, s[4:7], 0 offen offset:160
|
|
; SDAG-GFX942-NEXT: buffer_load_dwordx4 v[46:49], v0, s[4:7], 0 offen offset:176
|
|
; SDAG-GFX942-NEXT: buffer_load_dwordx4 v[50:53], v0, s[4:7], 0 offen offset:192
|
|
; SDAG-GFX942-NEXT: buffer_load_dwordx4 v[54:57], v0, s[4:7], 0 offen offset:208
|
|
; SDAG-GFX942-NEXT: buffer_load_dwordx4 v[58:61], v0, s[4:7], 0 offen offset:224
|
|
; SDAG-GFX942-NEXT: buffer_load_dwordx4 a[0:3], v0, s[4:7], 0 offen offset:240
|
|
; SDAG-GFX942-NEXT: s_add_i32 s1, s8, s16
|
|
; SDAG-GFX942-NEXT: s_addk_i32 s16, 0x100
|
|
; SDAG-GFX942-NEXT: v_mov_b32_e32 v0, s1
|
|
; SDAG-GFX942-NEXT: s_cmpk_lt_u32 s16, 0x2000
|
|
; SDAG-GFX942-NEXT: s_waitcnt vmcnt(15)
|
|
; SDAG-GFX942-NEXT: buffer_store_dwordx4 v[2:5], v0, s[12:15], 0 offen
|
|
; SDAG-GFX942-NEXT: s_waitcnt vmcnt(15)
|
|
; SDAG-GFX942-NEXT: buffer_store_dwordx4 v[6:9], v0, s[12:15], 0 offen offset:16
|
|
; SDAG-GFX942-NEXT: s_waitcnt vmcnt(15)
|
|
; SDAG-GFX942-NEXT: buffer_store_dwordx4 v[10:13], v0, s[12:15], 0 offen offset:32
|
|
; SDAG-GFX942-NEXT: s_waitcnt vmcnt(15)
|
|
; SDAG-GFX942-NEXT: buffer_store_dwordx4 v[14:17], v0, s[12:15], 0 offen offset:48
|
|
; SDAG-GFX942-NEXT: s_waitcnt vmcnt(15)
|
|
; SDAG-GFX942-NEXT: buffer_store_dwordx4 v[18:21], v0, s[12:15], 0 offen offset:64
|
|
; SDAG-GFX942-NEXT: s_waitcnt vmcnt(15)
|
|
; SDAG-GFX942-NEXT: buffer_store_dwordx4 v[22:25], v0, s[12:15], 0 offen offset:80
|
|
; SDAG-GFX942-NEXT: s_waitcnt vmcnt(15)
|
|
; SDAG-GFX942-NEXT: buffer_store_dwordx4 v[26:29], v0, s[12:15], 0 offen offset:96
|
|
; SDAG-GFX942-NEXT: s_waitcnt vmcnt(15)
|
|
; SDAG-GFX942-NEXT: buffer_store_dwordx4 v[30:33], v0, s[12:15], 0 offen offset:112
|
|
; SDAG-GFX942-NEXT: s_waitcnt vmcnt(15)
|
|
; SDAG-GFX942-NEXT: buffer_store_dwordx4 v[34:37], v0, s[12:15], 0 offen offset:128
|
|
; SDAG-GFX942-NEXT: s_waitcnt vmcnt(15)
|
|
; SDAG-GFX942-NEXT: buffer_store_dwordx4 v[38:41], v0, s[12:15], 0 offen offset:144
|
|
; SDAG-GFX942-NEXT: s_waitcnt vmcnt(15)
|
|
; SDAG-GFX942-NEXT: buffer_store_dwordx4 v[42:45], v0, s[12:15], 0 offen offset:160
|
|
; SDAG-GFX942-NEXT: s_waitcnt vmcnt(15)
|
|
; SDAG-GFX942-NEXT: buffer_store_dwordx4 v[46:49], v0, s[12:15], 0 offen offset:176
|
|
; SDAG-GFX942-NEXT: s_waitcnt vmcnt(15)
|
|
; SDAG-GFX942-NEXT: buffer_store_dwordx4 v[50:53], v0, s[12:15], 0 offen offset:192
|
|
; SDAG-GFX942-NEXT: s_waitcnt vmcnt(15)
|
|
; SDAG-GFX942-NEXT: buffer_store_dwordx4 v[54:57], v0, s[12:15], 0 offen offset:208
|
|
; SDAG-GFX942-NEXT: s_waitcnt vmcnt(15)
|
|
; SDAG-GFX942-NEXT: buffer_store_dwordx4 v[58:61], v0, s[12:15], 0 offen offset:224
|
|
; SDAG-GFX942-NEXT: s_waitcnt vmcnt(15)
|
|
; SDAG-GFX942-NEXT: buffer_store_dwordx4 a[0:3], v0, s[12:15], 0 offen offset:240
|
|
; SDAG-GFX942-NEXT: s_cbranch_scc1 .LBB0_1
|
|
; SDAG-GFX942-NEXT: ; %bb.2: ; %static-memcpy-post-expansion
|
|
; SDAG-GFX942-NEXT: s_endpgm
|
|
;
|
|
; SDAG-GFX1100-LABEL: memcpy_known:
|
|
; SDAG-GFX1100: ; %bb.0:
|
|
; SDAG-GFX1100-NEXT: s_clause 0x3
|
|
; SDAG-GFX1100-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
|
|
; SDAG-GFX1100-NEXT: s_load_b32 s17, s[4:5], 0x34
|
|
; SDAG-GFX1100-NEXT: s_load_b128 s[8:11], s[4:5], 0x44
|
|
; SDAG-GFX1100-NEXT: s_load_b32 s18, s[4:5], 0x54
|
|
; SDAG-GFX1100-NEXT: s_mov_b32 s16, 0
|
|
; SDAG-GFX1100-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
|
|
; SDAG-GFX1100-NEXT: s_mov_b32 s5, s16
|
|
; SDAG-GFX1100-NEXT: s_mov_b32 s13, s16
|
|
; SDAG-GFX1100-NEXT: s_mov_b32 s15, s16
|
|
; SDAG-GFX1100-NEXT: s_waitcnt lgkmcnt(0)
|
|
; SDAG-GFX1100-NEXT: s_mov_b32 s4, s3
|
|
; SDAG-GFX1100-NEXT: s_mov_b32 s12, s1
|
|
; SDAG-GFX1100-NEXT: s_or_b64 s[6:7], s[4:5], s[16:17]
|
|
; SDAG-GFX1100-NEXT: s_mov_b32 s17, s2
|
|
; SDAG-GFX1100-NEXT: s_mov_b32 s14, s11
|
|
; SDAG-GFX1100-NEXT: s_or_b64 s[4:5], s[12:13], s[16:17]
|
|
; SDAG-GFX1100-NEXT: s_mov_b32 s17, s18
|
|
; SDAG-GFX1100-NEXT: s_mov_b32 s2, s9
|
|
; SDAG-GFX1100-NEXT: s_or_b64 s[14:15], s[14:15], s[16:17]
|
|
; SDAG-GFX1100-NEXT: s_mov_b32 s17, s10
|
|
; SDAG-GFX1100-NEXT: s_mov_b32 s3, s16
|
|
; SDAG-GFX1100-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
|
|
; SDAG-GFX1100-NEXT: s_or_b64 s[12:13], s[2:3], s[16:17]
|
|
; SDAG-GFX1100-NEXT: .LBB0_1: ; %static-memcpy-expansion-main-body
|
|
; SDAG-GFX1100-NEXT: ; =>This Inner Loop Header: Depth=1
|
|
; SDAG-GFX1100-NEXT: s_add_i32 s1, s0, s16
|
|
; SDAG-GFX1100-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
|
|
; SDAG-GFX1100-NEXT: v_mov_b32_e32 v60, s1
|
|
; SDAG-GFX1100-NEXT: s_add_i32 s1, s8, s16
|
|
; SDAG-GFX1100-NEXT: s_addk_i32 s16, 0x100
|
|
; SDAG-GFX1100-NEXT: v_mov_b32_e32 v64, s1
|
|
; SDAG-GFX1100-NEXT: s_cmpk_lt_u32 s16, 0x2000
|
|
; SDAG-GFX1100-NEXT: s_clause 0xf
|
|
; SDAG-GFX1100-NEXT: buffer_load_b128 v[0:3], v60, s[4:7], 0 offen
|
|
; SDAG-GFX1100-NEXT: buffer_load_b128 v[4:7], v60, s[4:7], 0 offen offset:16
|
|
; SDAG-GFX1100-NEXT: buffer_load_b128 v[8:11], v60, s[4:7], 0 offen offset:32
|
|
; SDAG-GFX1100-NEXT: buffer_load_b128 v[12:15], v60, s[4:7], 0 offen offset:48
|
|
; SDAG-GFX1100-NEXT: buffer_load_b128 v[16:19], v60, s[4:7], 0 offen offset:64
|
|
; SDAG-GFX1100-NEXT: buffer_load_b128 v[20:23], v60, s[4:7], 0 offen offset:80
|
|
; SDAG-GFX1100-NEXT: buffer_load_b128 v[24:27], v60, s[4:7], 0 offen offset:96
|
|
; SDAG-GFX1100-NEXT: buffer_load_b128 v[28:31], v60, s[4:7], 0 offen offset:112
|
|
; SDAG-GFX1100-NEXT: buffer_load_b128 v[32:35], v60, s[4:7], 0 offen offset:128
|
|
; SDAG-GFX1100-NEXT: buffer_load_b128 v[36:39], v60, s[4:7], 0 offen offset:144
|
|
; SDAG-GFX1100-NEXT: buffer_load_b128 v[40:43], v60, s[4:7], 0 offen offset:160
|
|
; SDAG-GFX1100-NEXT: buffer_load_b128 v[44:47], v60, s[4:7], 0 offen offset:176
|
|
; SDAG-GFX1100-NEXT: buffer_load_b128 v[48:51], v60, s[4:7], 0 offen offset:192
|
|
; SDAG-GFX1100-NEXT: buffer_load_b128 v[52:55], v60, s[4:7], 0 offen offset:208
|
|
; SDAG-GFX1100-NEXT: buffer_load_b128 v[56:59], v60, s[4:7], 0 offen offset:224
|
|
; SDAG-GFX1100-NEXT: buffer_load_b128 v[60:63], v60, s[4:7], 0 offen offset:240
|
|
; SDAG-GFX1100-NEXT: s_waitcnt vmcnt(15)
|
|
; SDAG-GFX1100-NEXT: buffer_store_b128 v[0:3], v64, s[12:15], 0 offen
|
|
; SDAG-GFX1100-NEXT: s_waitcnt vmcnt(14)
|
|
; SDAG-GFX1100-NEXT: buffer_store_b128 v[4:7], v64, s[12:15], 0 offen offset:16
|
|
; SDAG-GFX1100-NEXT: s_waitcnt vmcnt(13)
|
|
; SDAG-GFX1100-NEXT: buffer_store_b128 v[8:11], v64, s[12:15], 0 offen offset:32
|
|
; SDAG-GFX1100-NEXT: s_waitcnt vmcnt(12)
|
|
; SDAG-GFX1100-NEXT: buffer_store_b128 v[12:15], v64, s[12:15], 0 offen offset:48
|
|
; SDAG-GFX1100-NEXT: s_waitcnt vmcnt(11)
|
|
; SDAG-GFX1100-NEXT: buffer_store_b128 v[16:19], v64, s[12:15], 0 offen offset:64
|
|
; SDAG-GFX1100-NEXT: s_waitcnt vmcnt(10)
|
|
; SDAG-GFX1100-NEXT: buffer_store_b128 v[20:23], v64, s[12:15], 0 offen offset:80
|
|
; SDAG-GFX1100-NEXT: s_waitcnt vmcnt(9)
|
|
; SDAG-GFX1100-NEXT: buffer_store_b128 v[24:27], v64, s[12:15], 0 offen offset:96
|
|
; SDAG-GFX1100-NEXT: s_waitcnt vmcnt(8)
|
|
; SDAG-GFX1100-NEXT: buffer_store_b128 v[28:31], v64, s[12:15], 0 offen offset:112
|
|
; SDAG-GFX1100-NEXT: s_waitcnt vmcnt(7)
|
|
; SDAG-GFX1100-NEXT: buffer_store_b128 v[32:35], v64, s[12:15], 0 offen offset:128
|
|
; SDAG-GFX1100-NEXT: s_waitcnt vmcnt(6)
|
|
; SDAG-GFX1100-NEXT: buffer_store_b128 v[36:39], v64, s[12:15], 0 offen offset:144
|
|
; SDAG-GFX1100-NEXT: s_waitcnt vmcnt(5)
|
|
; SDAG-GFX1100-NEXT: buffer_store_b128 v[40:43], v64, s[12:15], 0 offen offset:160
|
|
; SDAG-GFX1100-NEXT: s_waitcnt vmcnt(4)
|
|
; SDAG-GFX1100-NEXT: buffer_store_b128 v[44:47], v64, s[12:15], 0 offen offset:176
|
|
; SDAG-GFX1100-NEXT: s_waitcnt vmcnt(3)
|
|
; SDAG-GFX1100-NEXT: buffer_store_b128 v[48:51], v64, s[12:15], 0 offen offset:192
|
|
; SDAG-GFX1100-NEXT: s_waitcnt vmcnt(2)
|
|
; SDAG-GFX1100-NEXT: buffer_store_b128 v[52:55], v64, s[12:15], 0 offen offset:208
|
|
; SDAG-GFX1100-NEXT: s_waitcnt vmcnt(1)
|
|
; SDAG-GFX1100-NEXT: buffer_store_b128 v[56:59], v64, s[12:15], 0 offen offset:224
|
|
; SDAG-GFX1100-NEXT: s_waitcnt vmcnt(0)
|
|
; SDAG-GFX1100-NEXT: buffer_store_b128 v[60:63], v64, s[12:15], 0 offen offset:240
|
|
; SDAG-GFX1100-NEXT: s_cbranch_scc1 .LBB0_1
|
|
; SDAG-GFX1100-NEXT: ; %bb.2: ; %static-memcpy-post-expansion
|
|
; SDAG-GFX1100-NEXT: s_endpgm
|
|
;
|
|
; GISEL-GFX942-LABEL: memcpy_known:
|
|
; GISEL-GFX942: ; %bb.0:
|
|
; GISEL-GFX942-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
|
|
; GISEL-GFX942-NEXT: s_load_dword s7, s[4:5], 0x54
|
|
; GISEL-GFX942-NEXT: s_load_dword s11, s[4:5], 0x34
|
|
; GISEL-GFX942-NEXT: s_load_dwordx4 s[12:15], s[4:5], 0x44
|
|
; GISEL-GFX942-NEXT: s_mov_b32 s16, 0
|
|
; GISEL-GFX942-NEXT: v_mov_b32_e32 v0, 0x2000
|
|
; GISEL-GFX942-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GISEL-GFX942-NEXT: s_mov_b32 s8, s1
|
|
; GISEL-GFX942-NEXT: s_mov_b32 s9, s2
|
|
; GISEL-GFX942-NEXT: s_mov_b32 s10, s3
|
|
; GISEL-GFX942-NEXT: s_mov_b32 s4, s13
|
|
; GISEL-GFX942-NEXT: s_mov_b32 s5, s14
|
|
; GISEL-GFX942-NEXT: s_mov_b32 s6, s15
|
|
; GISEL-GFX942-NEXT: v_mov_b32_e32 v1, s16
|
|
; GISEL-GFX942-NEXT: .LBB0_1: ; %static-memcpy-expansion-main-body
|
|
; GISEL-GFX942-NEXT: ; =>This Inner Loop Header: Depth=1
|
|
; GISEL-GFX942-NEXT: v_add_u32_e32 v62, s0, v1
|
|
; GISEL-GFX942-NEXT: buffer_load_dwordx4 v[2:5], v62, s[8:11], 0 offen
|
|
; GISEL-GFX942-NEXT: buffer_load_dwordx4 v[6:9], v62, s[8:11], 0 offen offset:16
|
|
; GISEL-GFX942-NEXT: buffer_load_dwordx4 v[10:13], v62, s[8:11], 0 offen offset:32
|
|
; GISEL-GFX942-NEXT: buffer_load_dwordx4 v[14:17], v62, s[8:11], 0 offen offset:48
|
|
; GISEL-GFX942-NEXT: buffer_load_dwordx4 v[18:21], v62, s[8:11], 0 offen offset:64
|
|
; GISEL-GFX942-NEXT: buffer_load_dwordx4 v[22:25], v62, s[8:11], 0 offen offset:80
|
|
; GISEL-GFX942-NEXT: buffer_load_dwordx4 v[26:29], v62, s[8:11], 0 offen offset:96
|
|
; GISEL-GFX942-NEXT: buffer_load_dwordx4 v[30:33], v62, s[8:11], 0 offen offset:112
|
|
; GISEL-GFX942-NEXT: buffer_load_dwordx4 v[34:37], v62, s[8:11], 0 offen offset:128
|
|
; GISEL-GFX942-NEXT: buffer_load_dwordx4 v[38:41], v62, s[8:11], 0 offen offset:144
|
|
; GISEL-GFX942-NEXT: buffer_load_dwordx4 v[42:45], v62, s[8:11], 0 offen offset:160
|
|
; GISEL-GFX942-NEXT: buffer_load_dwordx4 v[46:49], v62, s[8:11], 0 offen offset:176
|
|
; GISEL-GFX942-NEXT: buffer_load_dwordx4 v[50:53], v62, s[8:11], 0 offen offset:192
|
|
; GISEL-GFX942-NEXT: buffer_load_dwordx4 v[54:57], v62, s[8:11], 0 offen offset:208
|
|
; GISEL-GFX942-NEXT: buffer_load_dwordx4 v[58:61], v62, s[8:11], 0 offen offset:224
|
|
; GISEL-GFX942-NEXT: buffer_load_dwordx4 a[0:3], v62, s[8:11], 0 offen offset:240
|
|
; GISEL-GFX942-NEXT: v_add_u32_e32 v63, s12, v1
|
|
; GISEL-GFX942-NEXT: v_add_u32_e32 v1, 0x100, v1
|
|
; GISEL-GFX942-NEXT: v_cmp_lt_u32_e32 vcc, v1, v0
|
|
; GISEL-GFX942-NEXT: s_waitcnt vmcnt(15)
|
|
; GISEL-GFX942-NEXT: buffer_store_dwordx4 v[2:5], v63, s[4:7], 0 offen
|
|
; GISEL-GFX942-NEXT: s_waitcnt vmcnt(15)
|
|
; GISEL-GFX942-NEXT: buffer_store_dwordx4 v[6:9], v63, s[4:7], 0 offen offset:16
|
|
; GISEL-GFX942-NEXT: s_waitcnt vmcnt(15)
|
|
; GISEL-GFX942-NEXT: buffer_store_dwordx4 v[10:13], v63, s[4:7], 0 offen offset:32
|
|
; GISEL-GFX942-NEXT: s_waitcnt vmcnt(15)
|
|
; GISEL-GFX942-NEXT: buffer_store_dwordx4 v[14:17], v63, s[4:7], 0 offen offset:48
|
|
; GISEL-GFX942-NEXT: s_waitcnt vmcnt(15)
|
|
; GISEL-GFX942-NEXT: buffer_store_dwordx4 v[18:21], v63, s[4:7], 0 offen offset:64
|
|
; GISEL-GFX942-NEXT: s_waitcnt vmcnt(15)
|
|
; GISEL-GFX942-NEXT: buffer_store_dwordx4 v[22:25], v63, s[4:7], 0 offen offset:80
|
|
; GISEL-GFX942-NEXT: s_waitcnt vmcnt(15)
|
|
; GISEL-GFX942-NEXT: buffer_store_dwordx4 v[26:29], v63, s[4:7], 0 offen offset:96
|
|
; GISEL-GFX942-NEXT: s_waitcnt vmcnt(15)
|
|
; GISEL-GFX942-NEXT: buffer_store_dwordx4 v[30:33], v63, s[4:7], 0 offen offset:112
|
|
; GISEL-GFX942-NEXT: s_waitcnt vmcnt(15)
|
|
; GISEL-GFX942-NEXT: buffer_store_dwordx4 v[34:37], v63, s[4:7], 0 offen offset:128
|
|
; GISEL-GFX942-NEXT: s_waitcnt vmcnt(15)
|
|
; GISEL-GFX942-NEXT: buffer_store_dwordx4 v[38:41], v63, s[4:7], 0 offen offset:144
|
|
; GISEL-GFX942-NEXT: s_waitcnt vmcnt(15)
|
|
; GISEL-GFX942-NEXT: buffer_store_dwordx4 v[42:45], v63, s[4:7], 0 offen offset:160
|
|
; GISEL-GFX942-NEXT: s_waitcnt vmcnt(15)
|
|
; GISEL-GFX942-NEXT: buffer_store_dwordx4 v[46:49], v63, s[4:7], 0 offen offset:176
|
|
; GISEL-GFX942-NEXT: s_waitcnt vmcnt(15)
|
|
; GISEL-GFX942-NEXT: buffer_store_dwordx4 v[50:53], v63, s[4:7], 0 offen offset:192
|
|
; GISEL-GFX942-NEXT: s_waitcnt vmcnt(15)
|
|
; GISEL-GFX942-NEXT: buffer_store_dwordx4 v[54:57], v63, s[4:7], 0 offen offset:208
|
|
; GISEL-GFX942-NEXT: s_waitcnt vmcnt(15)
|
|
; GISEL-GFX942-NEXT: buffer_store_dwordx4 v[58:61], v63, s[4:7], 0 offen offset:224
|
|
; GISEL-GFX942-NEXT: s_waitcnt vmcnt(15)
|
|
; GISEL-GFX942-NEXT: scratch_store_dwordx4 off, a[0:3], off ; 16-byte Folded Spill
|
|
; GISEL-GFX942-NEXT: scratch_load_dwordx4 v[2:5], off, off ; 16-byte Folded Reload
|
|
; GISEL-GFX942-NEXT: s_waitcnt vmcnt(0)
|
|
; GISEL-GFX942-NEXT: buffer_store_dwordx4 v[2:5], v63, s[4:7], 0 offen offset:240
|
|
; GISEL-GFX942-NEXT: s_cbranch_vccnz .LBB0_1
|
|
; GISEL-GFX942-NEXT: ; %bb.2: ; %static-memcpy-post-expansion
|
|
; GISEL-GFX942-NEXT: s_endpgm
|
|
;
|
|
; GISEL-GFX1100-LABEL: memcpy_known:
|
|
; GISEL-GFX1100: ; %bb.0:
|
|
; GISEL-GFX1100-NEXT: s_clause 0x3
|
|
; GISEL-GFX1100-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
|
|
; GISEL-GFX1100-NEXT: s_load_b128 s[8:11], s[4:5], 0x44
|
|
; GISEL-GFX1100-NEXT: s_load_b32 s7, s[4:5], 0x34
|
|
; GISEL-GFX1100-NEXT: s_load_b32 s15, s[4:5], 0x54
|
|
; GISEL-GFX1100-NEXT: s_mov_b32 s4, 0
|
|
; GISEL-GFX1100-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
|
|
; GISEL-GFX1100-NEXT: v_mov_b32_e32 v0, s4
|
|
; GISEL-GFX1100-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GISEL-GFX1100-NEXT: s_mov_b32 s4, s1
|
|
; GISEL-GFX1100-NEXT: s_mov_b32 s5, s2
|
|
; GISEL-GFX1100-NEXT: s_mov_b32 s6, s3
|
|
; GISEL-GFX1100-NEXT: s_mov_b32 s12, s9
|
|
; GISEL-GFX1100-NEXT: s_mov_b32 s13, s10
|
|
; GISEL-GFX1100-NEXT: s_mov_b32 s14, s11
|
|
; GISEL-GFX1100-NEXT: .LBB0_1: ; %static-memcpy-expansion-main-body
|
|
; GISEL-GFX1100-NEXT: ; =>This Inner Loop Header: Depth=1
|
|
; GISEL-GFX1100-NEXT: v_add_nc_u32_e32 v61, s0, v0
|
|
; GISEL-GFX1100-NEXT: v_add_nc_u32_e32 v65, s8, v0
|
|
; GISEL-GFX1100-NEXT: v_add_nc_u32_e32 v0, 0x100, v0
|
|
; GISEL-GFX1100-NEXT: s_clause 0xf
|
|
; GISEL-GFX1100-NEXT: buffer_load_b128 v[1:4], v61, s[4:7], 0 offen
|
|
; GISEL-GFX1100-NEXT: buffer_load_b128 v[5:8], v61, s[4:7], 0 offen offset:16
|
|
; GISEL-GFX1100-NEXT: buffer_load_b128 v[9:12], v61, s[4:7], 0 offen offset:32
|
|
; GISEL-GFX1100-NEXT: buffer_load_b128 v[13:16], v61, s[4:7], 0 offen offset:48
|
|
; GISEL-GFX1100-NEXT: buffer_load_b128 v[17:20], v61, s[4:7], 0 offen offset:64
|
|
; GISEL-GFX1100-NEXT: buffer_load_b128 v[21:24], v61, s[4:7], 0 offen offset:80
|
|
; GISEL-GFX1100-NEXT: buffer_load_b128 v[25:28], v61, s[4:7], 0 offen offset:96
|
|
; GISEL-GFX1100-NEXT: buffer_load_b128 v[29:32], v61, s[4:7], 0 offen offset:112
|
|
; GISEL-GFX1100-NEXT: buffer_load_b128 v[33:36], v61, s[4:7], 0 offen offset:128
|
|
; GISEL-GFX1100-NEXT: buffer_load_b128 v[37:40], v61, s[4:7], 0 offen offset:144
|
|
; GISEL-GFX1100-NEXT: buffer_load_b128 v[41:44], v61, s[4:7], 0 offen offset:160
|
|
; GISEL-GFX1100-NEXT: buffer_load_b128 v[45:48], v61, s[4:7], 0 offen offset:176
|
|
; GISEL-GFX1100-NEXT: buffer_load_b128 v[49:52], v61, s[4:7], 0 offen offset:192
|
|
; GISEL-GFX1100-NEXT: buffer_load_b128 v[53:56], v61, s[4:7], 0 offen offset:208
|
|
; GISEL-GFX1100-NEXT: buffer_load_b128 v[57:60], v61, s[4:7], 0 offen offset:224
|
|
; GISEL-GFX1100-NEXT: buffer_load_b128 v[61:64], v61, s[4:7], 0 offen offset:240
|
|
; GISEL-GFX1100-NEXT: s_waitcnt vmcnt(15)
|
|
; GISEL-GFX1100-NEXT: buffer_store_b128 v[1:4], v65, s[12:15], 0 offen
|
|
; GISEL-GFX1100-NEXT: s_waitcnt vmcnt(14)
|
|
; GISEL-GFX1100-NEXT: buffer_store_b128 v[5:8], v65, s[12:15], 0 offen offset:16
|
|
; GISEL-GFX1100-NEXT: s_waitcnt vmcnt(13)
|
|
; GISEL-GFX1100-NEXT: buffer_store_b128 v[9:12], v65, s[12:15], 0 offen offset:32
|
|
; GISEL-GFX1100-NEXT: s_waitcnt vmcnt(12)
|
|
; GISEL-GFX1100-NEXT: buffer_store_b128 v[13:16], v65, s[12:15], 0 offen offset:48
|
|
; GISEL-GFX1100-NEXT: s_waitcnt vmcnt(11)
|
|
; GISEL-GFX1100-NEXT: buffer_store_b128 v[17:20], v65, s[12:15], 0 offen offset:64
|
|
; GISEL-GFX1100-NEXT: s_waitcnt vmcnt(10)
|
|
; GISEL-GFX1100-NEXT: buffer_store_b128 v[21:24], v65, s[12:15], 0 offen offset:80
|
|
; GISEL-GFX1100-NEXT: s_waitcnt vmcnt(9)
|
|
; GISEL-GFX1100-NEXT: buffer_store_b128 v[25:28], v65, s[12:15], 0 offen offset:96
|
|
; GISEL-GFX1100-NEXT: s_waitcnt vmcnt(8)
|
|
; GISEL-GFX1100-NEXT: buffer_store_b128 v[29:32], v65, s[12:15], 0 offen offset:112
|
|
; GISEL-GFX1100-NEXT: s_waitcnt vmcnt(7)
|
|
; GISEL-GFX1100-NEXT: buffer_store_b128 v[33:36], v65, s[12:15], 0 offen offset:128
|
|
; GISEL-GFX1100-NEXT: s_waitcnt vmcnt(6)
|
|
; GISEL-GFX1100-NEXT: buffer_store_b128 v[37:40], v65, s[12:15], 0 offen offset:144
|
|
; GISEL-GFX1100-NEXT: s_waitcnt vmcnt(5)
|
|
; GISEL-GFX1100-NEXT: buffer_store_b128 v[41:44], v65, s[12:15], 0 offen offset:160
|
|
; GISEL-GFX1100-NEXT: s_waitcnt vmcnt(4)
|
|
; GISEL-GFX1100-NEXT: buffer_store_b128 v[45:48], v65, s[12:15], 0 offen offset:176
|
|
; GISEL-GFX1100-NEXT: s_waitcnt vmcnt(3)
|
|
; GISEL-GFX1100-NEXT: buffer_store_b128 v[49:52], v65, s[12:15], 0 offen offset:192
|
|
; GISEL-GFX1100-NEXT: s_waitcnt vmcnt(2)
|
|
; GISEL-GFX1100-NEXT: buffer_store_b128 v[53:56], v65, s[12:15], 0 offen offset:208
|
|
; GISEL-GFX1100-NEXT: s_waitcnt vmcnt(1)
|
|
; GISEL-GFX1100-NEXT: buffer_store_b128 v[57:60], v65, s[12:15], 0 offen offset:224
|
|
; GISEL-GFX1100-NEXT: s_waitcnt vmcnt(0)
|
|
; GISEL-GFX1100-NEXT: buffer_store_b128 v[61:64], v65, s[12:15], 0 offen offset:240
|
|
; GISEL-GFX1100-NEXT: v_cmp_gt_u32_e32 vcc_lo, 0x2000, v0
|
|
; GISEL-GFX1100-NEXT: s_cbranch_vccnz .LBB0_1
|
|
; GISEL-GFX1100-NEXT: ; %bb.2: ; %static-memcpy-post-expansion
|
|
; GISEL-GFX1100-NEXT: s_endpgm
|
|
call void @llvm.memcpy.p7.p7.i32(ptr addrspace(7) noundef nonnull align 16 %dst, ptr addrspace(7) noundef nonnull align 16 %src, i32 8192, i1 false)
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_kernel void @memcpy_known_medium(ptr addrspace(7) %src, ptr addrspace(7) %dst) {
|
|
; SDAG-LABEL: memcpy_known_medium:
|
|
; SDAG: ; %bb.0:
|
|
; SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; SDAG-NEXT: s_mov_b32 s7, s24
|
|
; SDAG-NEXT: s_mov_b32 s6, s23
|
|
; SDAG-NEXT: s_mov_b32 s5, s22
|
|
; SDAG-NEXT: s_mov_b32 s4, s21
|
|
; SDAG-NEXT: v_mov_b32_e32 v0, 0
|
|
; SDAG-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:52 ; 4-byte Folded Spill
|
|
; SDAG-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:48 ; 4-byte Folded Spill
|
|
; SDAG-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:44 ; 4-byte Folded Spill
|
|
; SDAG-NEXT: buffer_store_dword v43, off, s[0:3], s32 offset:40 ; 4-byte Folded Spill
|
|
; SDAG-NEXT: buffer_store_dword v44, off, s[0:3], s32 offset:36 ; 4-byte Folded Spill
|
|
; SDAG-NEXT: buffer_store_dword v45, off, s[0:3], s32 offset:32 ; 4-byte Folded Spill
|
|
; SDAG-NEXT: buffer_store_dword v56, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill
|
|
; SDAG-NEXT: buffer_store_dword v57, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill
|
|
; SDAG-NEXT: buffer_store_dword v58, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill
|
|
; SDAG-NEXT: buffer_store_dword v59, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill
|
|
; SDAG-NEXT: buffer_store_dword v60, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill
|
|
; SDAG-NEXT: buffer_store_dword v61, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill
|
|
; SDAG-NEXT: buffer_store_dword v62, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill
|
|
; SDAG-NEXT: buffer_store_dword v63, off, s[0:3], s32 ; 4-byte Folded Spill
|
|
; SDAG-NEXT: .LBB1_1: ; %load-store-loop
|
|
; SDAG-NEXT: ; =>This Inner Loop Header: Depth=1
|
|
; SDAG-NEXT: v_add_u32_e32 v45, s20, v0
|
|
; SDAG-NEXT: buffer_load_dwordx4 v[1:4], v45, s[16:19], 0 offen
|
|
; SDAG-NEXT: s_waitcnt vmcnt(0)
|
|
; SDAG-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:72 ; 4-byte Folded Spill
|
|
; SDAG-NEXT: s_nop 0
|
|
; SDAG-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:76 ; 4-byte Folded Spill
|
|
; SDAG-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:80 ; 4-byte Folded Spill
|
|
; SDAG-NEXT: buffer_store_dword v4, off, s[0:3], s32 offset:84 ; 4-byte Folded Spill
|
|
; SDAG-NEXT: buffer_load_dwordx4 v[5:8], v45, s[16:19], 0 offen offset:16
|
|
; SDAG-NEXT: buffer_load_dwordx4 v[9:12], v45, s[16:19], 0 offen offset:32
|
|
; SDAG-NEXT: buffer_load_dwordx4 v[13:16], v45, s[16:19], 0 offen offset:48
|
|
; SDAG-NEXT: buffer_load_dwordx4 v[17:20], v45, s[16:19], 0 offen offset:64
|
|
; SDAG-NEXT: buffer_load_dwordx4 v[21:24], v45, s[16:19], 0 offen offset:80
|
|
; SDAG-NEXT: buffer_load_dwordx4 v[25:28], v45, s[16:19], 0 offen offset:96
|
|
; SDAG-NEXT: buffer_load_dwordx4 v[29:32], v45, s[16:19], 0 offen offset:112
|
|
; SDAG-NEXT: buffer_load_dwordx4 v[33:36], v45, s[16:19], 0 offen offset:128
|
|
; SDAG-NEXT: buffer_load_dwordx4 v[48:51], v45, s[16:19], 0 offen offset:144
|
|
; SDAG-NEXT: buffer_load_dwordx4 v[52:55], v45, s[16:19], 0 offen offset:160
|
|
; SDAG-NEXT: buffer_load_dwordx4 v[37:40], v45, s[16:19], 0 offen offset:176
|
|
; SDAG-NEXT: buffer_load_dwordx4 v[41:44], v45, s[16:19], 0 offen offset:192
|
|
; SDAG-NEXT: buffer_load_dwordx4 v[56:59], v45, s[16:19], 0 offen offset:208
|
|
; SDAG-NEXT: buffer_load_dwordx4 v[60:63], v45, s[16:19], 0 offen offset:224
|
|
; SDAG-NEXT: buffer_load_dwordx4 v[1:4], v45, s[16:19], 0 offen offset:240
|
|
; SDAG-NEXT: v_add_u32_e32 v45, s25, v0
|
|
; SDAG-NEXT: v_add_co_u32_e32 v0, vcc, 0x100, v0
|
|
; SDAG-NEXT: s_and_b64 vcc, exec, vcc
|
|
; SDAG-NEXT: s_waitcnt vmcnt(0)
|
|
; SDAG-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:56 ; 4-byte Folded Spill
|
|
; SDAG-NEXT: s_nop 0
|
|
; SDAG-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:60 ; 4-byte Folded Spill
|
|
; SDAG-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:64 ; 4-byte Folded Spill
|
|
; SDAG-NEXT: buffer_store_dword v4, off, s[0:3], s32 offset:68 ; 4-byte Folded Spill
|
|
; SDAG-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:72 ; 4-byte Folded Reload
|
|
; SDAG-NEXT: s_nop 0
|
|
; SDAG-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:76 ; 4-byte Folded Reload
|
|
; SDAG-NEXT: buffer_load_dword v3, off, s[0:3], s32 offset:80 ; 4-byte Folded Reload
|
|
; SDAG-NEXT: buffer_load_dword v4, off, s[0:3], s32 offset:84 ; 4-byte Folded Reload
|
|
; SDAG-NEXT: s_waitcnt vmcnt(0)
|
|
; SDAG-NEXT: buffer_store_dwordx4 v[1:4], v45, s[4:7], 0 offen
|
|
; SDAG-NEXT: buffer_store_dwordx4 v[5:8], v45, s[4:7], 0 offen offset:16
|
|
; SDAG-NEXT: buffer_store_dwordx4 v[9:12], v45, s[4:7], 0 offen offset:32
|
|
; SDAG-NEXT: buffer_store_dwordx4 v[13:16], v45, s[4:7], 0 offen offset:48
|
|
; SDAG-NEXT: buffer_store_dwordx4 v[17:20], v45, s[4:7], 0 offen offset:64
|
|
; SDAG-NEXT: buffer_store_dwordx4 v[21:24], v45, s[4:7], 0 offen offset:80
|
|
; SDAG-NEXT: buffer_store_dwordx4 v[25:28], v45, s[4:7], 0 offen offset:96
|
|
; SDAG-NEXT: buffer_store_dwordx4 v[29:32], v45, s[4:7], 0 offen offset:112
|
|
; SDAG-NEXT: buffer_store_dwordx4 v[33:36], v45, s[4:7], 0 offen offset:128
|
|
; SDAG-NEXT: buffer_store_dwordx4 v[48:51], v45, s[4:7], 0 offen offset:144
|
|
; SDAG-NEXT: buffer_store_dwordx4 v[52:55], v45, s[4:7], 0 offen offset:160
|
|
; SDAG-NEXT: buffer_store_dwordx4 v[37:40], v45, s[4:7], 0 offen offset:176
|
|
; SDAG-NEXT: buffer_store_dwordx4 v[41:44], v45, s[4:7], 0 offen offset:192
|
|
; SDAG-NEXT: buffer_store_dwordx4 v[56:59], v45, s[4:7], 0 offen offset:208
|
|
; SDAG-NEXT: buffer_store_dwordx4 v[60:63], v45, s[4:7], 0 offen offset:224
|
|
; SDAG-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:56 ; 4-byte Folded Reload
|
|
; SDAG-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:60 ; 4-byte Folded Reload
|
|
; SDAG-NEXT: buffer_load_dword v3, off, s[0:3], s32 offset:64 ; 4-byte Folded Reload
|
|
; SDAG-NEXT: buffer_load_dword v4, off, s[0:3], s32 offset:68 ; 4-byte Folded Reload
|
|
; SDAG-NEXT: s_waitcnt vmcnt(0)
|
|
; SDAG-NEXT: buffer_store_dwordx4 v[1:4], v45, s[4:7], 0 offen offset:240
|
|
; SDAG-NEXT: s_cbranch_vccnz .LBB1_1
|
|
; SDAG-NEXT: ; %bb.2: ; %memcpy-split
|
|
; SDAG-NEXT: buffer_load_dword v63, off, s[0:3], s32 ; 4-byte Folded Reload
|
|
; SDAG-NEXT: buffer_load_dword v62, off, s[0:3], s32 offset:4 ; 4-byte Folded Reload
|
|
; SDAG-NEXT: buffer_load_dword v61, off, s[0:3], s32 offset:8 ; 4-byte Folded Reload
|
|
; SDAG-NEXT: buffer_load_dword v60, off, s[0:3], s32 offset:12 ; 4-byte Folded Reload
|
|
; SDAG-NEXT: buffer_load_dword v59, off, s[0:3], s32 offset:16 ; 4-byte Folded Reload
|
|
; SDAG-NEXT: buffer_load_dword v58, off, s[0:3], s32 offset:20 ; 4-byte Folded Reload
|
|
; SDAG-NEXT: buffer_load_dword v57, off, s[0:3], s32 offset:24 ; 4-byte Folded Reload
|
|
; SDAG-NEXT: buffer_load_dword v56, off, s[0:3], s32 offset:28 ; 4-byte Folded Reload
|
|
; SDAG-NEXT: buffer_load_dword v45, off, s[0:3], s32 offset:32 ; 4-byte Folded Reload
|
|
; SDAG-NEXT: buffer_load_dword v44, off, s[0:3], s32 offset:36 ; 4-byte Folded Reload
|
|
; SDAG-NEXT: buffer_load_dword v43, off, s[0:3], s32 offset:40 ; 4-byte Folded Reload
|
|
; SDAG-NEXT: buffer_load_dword v42, off, s[0:3], s32 offset:44 ; 4-byte Folded Reload
|
|
; SDAG-NEXT: buffer_load_dword v41, off, s[0:3], s32 offset:48 ; 4-byte Folded Reload
|
|
; SDAG-NEXT: buffer_load_dword v40, off, s[0:3], s32 offset:52 ; 4-byte Folded Reload
|
|
; SDAG-NEXT: s_waitcnt vmcnt(0)
|
|
; SDAG-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GISEL-LABEL: memcpy_known_medium:
|
|
; GISEL: ; %bb.0:
|
|
; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GISEL-NEXT: s_mov_b32 s8, 0
|
|
; GISEL-NEXT: s_mov_b32 s4, s21
|
|
; GISEL-NEXT: s_mov_b32 s5, s22
|
|
; GISEL-NEXT: s_mov_b32 s6, s23
|
|
; GISEL-NEXT: s_mov_b32 s7, s24
|
|
; GISEL-NEXT: v_mov_b32_e32 v0, s8
|
|
; GISEL-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:52 ; 4-byte Folded Spill
|
|
; GISEL-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:48 ; 4-byte Folded Spill
|
|
; GISEL-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:44 ; 4-byte Folded Spill
|
|
; GISEL-NEXT: buffer_store_dword v43, off, s[0:3], s32 offset:40 ; 4-byte Folded Spill
|
|
; GISEL-NEXT: buffer_store_dword v44, off, s[0:3], s32 offset:36 ; 4-byte Folded Spill
|
|
; GISEL-NEXT: buffer_store_dword v45, off, s[0:3], s32 offset:32 ; 4-byte Folded Spill
|
|
; GISEL-NEXT: buffer_store_dword v56, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill
|
|
; GISEL-NEXT: buffer_store_dword v57, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill
|
|
; GISEL-NEXT: buffer_store_dword v58, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill
|
|
; GISEL-NEXT: buffer_store_dword v59, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill
|
|
; GISEL-NEXT: buffer_store_dword v60, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill
|
|
; GISEL-NEXT: buffer_store_dword v61, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill
|
|
; GISEL-NEXT: buffer_store_dword v62, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill
|
|
; GISEL-NEXT: buffer_store_dword v63, off, s[0:3], s32 ; 4-byte Folded Spill
|
|
; GISEL-NEXT: .LBB1_1: ; %load-store-loop
|
|
; GISEL-NEXT: ; =>This Inner Loop Header: Depth=1
|
|
; GISEL-NEXT: v_add_u32_e32 v45, s20, v0
|
|
; GISEL-NEXT: buffer_load_dwordx4 v[1:4], v45, s[16:19], 0 offen
|
|
; GISEL-NEXT: s_waitcnt vmcnt(0)
|
|
; GISEL-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:72 ; 4-byte Folded Spill
|
|
; GISEL-NEXT: s_nop 0
|
|
; GISEL-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:76 ; 4-byte Folded Spill
|
|
; GISEL-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:80 ; 4-byte Folded Spill
|
|
; GISEL-NEXT: buffer_store_dword v4, off, s[0:3], s32 offset:84 ; 4-byte Folded Spill
|
|
; GISEL-NEXT: buffer_load_dwordx4 v[5:8], v45, s[16:19], 0 offen offset:16
|
|
; GISEL-NEXT: buffer_load_dwordx4 v[9:12], v45, s[16:19], 0 offen offset:32
|
|
; GISEL-NEXT: buffer_load_dwordx4 v[13:16], v45, s[16:19], 0 offen offset:48
|
|
; GISEL-NEXT: buffer_load_dwordx4 v[17:20], v45, s[16:19], 0 offen offset:64
|
|
; GISEL-NEXT: buffer_load_dwordx4 v[21:24], v45, s[16:19], 0 offen offset:80
|
|
; GISEL-NEXT: buffer_load_dwordx4 v[25:28], v45, s[16:19], 0 offen offset:96
|
|
; GISEL-NEXT: buffer_load_dwordx4 v[29:32], v45, s[16:19], 0 offen offset:112
|
|
; GISEL-NEXT: buffer_load_dwordx4 v[33:36], v45, s[16:19], 0 offen offset:128
|
|
; GISEL-NEXT: buffer_load_dwordx4 v[48:51], v45, s[16:19], 0 offen offset:144
|
|
; GISEL-NEXT: buffer_load_dwordx4 v[52:55], v45, s[16:19], 0 offen offset:160
|
|
; GISEL-NEXT: buffer_load_dwordx4 v[37:40], v45, s[16:19], 0 offen offset:176
|
|
; GISEL-NEXT: buffer_load_dwordx4 v[41:44], v45, s[16:19], 0 offen offset:192
|
|
; GISEL-NEXT: buffer_load_dwordx4 v[56:59], v45, s[16:19], 0 offen offset:208
|
|
; GISEL-NEXT: buffer_load_dwordx4 v[60:63], v45, s[16:19], 0 offen offset:224
|
|
; GISEL-NEXT: buffer_load_dwordx4 v[1:4], v45, s[16:19], 0 offen offset:240
|
|
; GISEL-NEXT: v_add_u32_e32 v45, s25, v0
|
|
; GISEL-NEXT: v_add_co_u32_e32 v0, vcc, 0x100, v0
|
|
; GISEL-NEXT: s_xor_b64 s[8:9], vcc, -1
|
|
; GISEL-NEXT: s_xor_b64 s[8:9], s[8:9], -1
|
|
; GISEL-NEXT: s_and_b64 vcc, s[8:9], exec
|
|
; GISEL-NEXT: s_waitcnt vmcnt(0)
|
|
; GISEL-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:56 ; 4-byte Folded Spill
|
|
; GISEL-NEXT: s_nop 0
|
|
; GISEL-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:60 ; 4-byte Folded Spill
|
|
; GISEL-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:64 ; 4-byte Folded Spill
|
|
; GISEL-NEXT: buffer_store_dword v4, off, s[0:3], s32 offset:68 ; 4-byte Folded Spill
|
|
; GISEL-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:72 ; 4-byte Folded Reload
|
|
; GISEL-NEXT: s_nop 0
|
|
; GISEL-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:76 ; 4-byte Folded Reload
|
|
; GISEL-NEXT: buffer_load_dword v3, off, s[0:3], s32 offset:80 ; 4-byte Folded Reload
|
|
; GISEL-NEXT: buffer_load_dword v4, off, s[0:3], s32 offset:84 ; 4-byte Folded Reload
|
|
; GISEL-NEXT: s_waitcnt vmcnt(0)
|
|
; GISEL-NEXT: buffer_store_dwordx4 v[1:4], v45, s[4:7], 0 offen
|
|
; GISEL-NEXT: buffer_store_dwordx4 v[5:8], v45, s[4:7], 0 offen offset:16
|
|
; GISEL-NEXT: buffer_store_dwordx4 v[9:12], v45, s[4:7], 0 offen offset:32
|
|
; GISEL-NEXT: buffer_store_dwordx4 v[13:16], v45, s[4:7], 0 offen offset:48
|
|
; GISEL-NEXT: buffer_store_dwordx4 v[17:20], v45, s[4:7], 0 offen offset:64
|
|
; GISEL-NEXT: buffer_store_dwordx4 v[21:24], v45, s[4:7], 0 offen offset:80
|
|
; GISEL-NEXT: buffer_store_dwordx4 v[25:28], v45, s[4:7], 0 offen offset:96
|
|
; GISEL-NEXT: buffer_store_dwordx4 v[29:32], v45, s[4:7], 0 offen offset:112
|
|
; GISEL-NEXT: buffer_store_dwordx4 v[33:36], v45, s[4:7], 0 offen offset:128
|
|
; GISEL-NEXT: buffer_store_dwordx4 v[48:51], v45, s[4:7], 0 offen offset:144
|
|
; GISEL-NEXT: buffer_store_dwordx4 v[52:55], v45, s[4:7], 0 offen offset:160
|
|
; GISEL-NEXT: buffer_store_dwordx4 v[37:40], v45, s[4:7], 0 offen offset:176
|
|
; GISEL-NEXT: buffer_store_dwordx4 v[41:44], v45, s[4:7], 0 offen offset:192
|
|
; GISEL-NEXT: buffer_store_dwordx4 v[56:59], v45, s[4:7], 0 offen offset:208
|
|
; GISEL-NEXT: buffer_store_dwordx4 v[60:63], v45, s[4:7], 0 offen offset:224
|
|
; GISEL-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:56 ; 4-byte Folded Reload
|
|
; GISEL-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:60 ; 4-byte Folded Reload
|
|
; GISEL-NEXT: buffer_load_dword v3, off, s[0:3], s32 offset:64 ; 4-byte Folded Reload
|
|
; GISEL-NEXT: buffer_load_dword v4, off, s[0:3], s32 offset:68 ; 4-byte Folded Reload
|
|
; GISEL-NEXT: s_waitcnt vmcnt(0)
|
|
; GISEL-NEXT: buffer_store_dwordx4 v[1:4], v45, s[4:7], 0 offen offset:240
|
|
; GISEL-NEXT: s_cbranch_vccnz .LBB1_1
|
|
; GISEL-NEXT: ; %bb.2: ; %memcpy-split
|
|
; GISEL-NEXT: buffer_load_dword v63, off, s[0:3], s32 ; 4-byte Folded Reload
|
|
; GISEL-NEXT: buffer_load_dword v62, off, s[0:3], s32 offset:4 ; 4-byte Folded Reload
|
|
; GISEL-NEXT: buffer_load_dword v61, off, s[0:3], s32 offset:8 ; 4-byte Folded Reload
|
|
; GISEL-NEXT: buffer_load_dword v60, off, s[0:3], s32 offset:12 ; 4-byte Folded Reload
|
|
; GISEL-NEXT: buffer_load_dword v59, off, s[0:3], s32 offset:16 ; 4-byte Folded Reload
|
|
; GISEL-NEXT: buffer_load_dword v58, off, s[0:3], s32 offset:20 ; 4-byte Folded Reload
|
|
; GISEL-NEXT: buffer_load_dword v57, off, s[0:3], s32 offset:24 ; 4-byte Folded Reload
|
|
; GISEL-NEXT: buffer_load_dword v56, off, s[0:3], s32 offset:28 ; 4-byte Folded Reload
|
|
; GISEL-NEXT: buffer_load_dword v45, off, s[0:3], s32 offset:32 ; 4-byte Folded Reload
|
|
; GISEL-NEXT: buffer_load_dword v44, off, s[0:3], s32 offset:36 ; 4-byte Folded Reload
|
|
; GISEL-NEXT: buffer_load_dword v43, off, s[0:3], s32 offset:40 ; 4-byte Folded Reload
|
|
; GISEL-NEXT: buffer_load_dword v42, off, s[0:3], s32 offset:44 ; 4-byte Folded Reload
|
|
; GISEL-NEXT: buffer_load_dword v41, off, s[0:3], s32 offset:48 ; 4-byte Folded Reload
|
|
; GISEL-NEXT: buffer_load_dword v40, off, s[0:3], s32 offset:52 ; 4-byte Folded Reload
|
|
; GISEL-NEXT: s_waitcnt vmcnt(0)
|
|
; GISEL-NEXT: s_setpc_b64 s[30:31]
|
|
; SDAG-GFX942-LABEL: memcpy_known_medium:
|
|
; SDAG-GFX942: ; %bb.0:
|
|
; SDAG-GFX942-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
|
|
; SDAG-GFX942-NEXT: s_load_dword s17, s[4:5], 0x34
|
|
; SDAG-GFX942-NEXT: s_load_dwordx4 s[8:11], s[4:5], 0x44
|
|
; SDAG-GFX942-NEXT: s_load_dword s12, s[4:5], 0x54
|
|
; SDAG-GFX942-NEXT: s_mov_b32 s16, 0
|
|
; SDAG-GFX942-NEXT: s_mov_b32 s5, s16
|
|
; SDAG-GFX942-NEXT: s_waitcnt lgkmcnt(0)
|
|
; SDAG-GFX942-NEXT: s_mov_b32 s4, s3
|
|
; SDAG-GFX942-NEXT: s_or_b64 s[6:7], s[4:5], s[16:17]
|
|
; SDAG-GFX942-NEXT: s_mov_b32 s17, s2
|
|
; SDAG-GFX942-NEXT: s_mov_b32 s2, s1
|
|
; SDAG-GFX942-NEXT: s_mov_b32 s3, s16
|
|
; SDAG-GFX942-NEXT: s_or_b64 s[4:5], s[2:3], s[16:17]
|
|
; SDAG-GFX942-NEXT: s_mov_b32 s17, s12
|
|
; SDAG-GFX942-NEXT: s_mov_b32 s2, s11
|
|
; SDAG-GFX942-NEXT: s_or_b64 s[14:15], s[2:3], s[16:17]
|
|
; SDAG-GFX942-NEXT: s_mov_b32 s17, s10
|
|
; SDAG-GFX942-NEXT: s_mov_b32 s2, s9
|
|
; SDAG-GFX942-NEXT: s_or_b64 s[12:13], s[2:3], s[16:17]
|
|
; SDAG-GFX942-NEXT: .LBB1_1: ; %static-memcpy-expansion-main-body
|
|
; SDAG-GFX942-NEXT: ; =>This Inner Loop Header: Depth=1
|
|
; SDAG-GFX942-NEXT: s_add_i32 s1, s0, s16
|
|
; SDAG-GFX942-NEXT: v_mov_b32_e32 v0, s1
|
|
; SDAG-GFX942-NEXT: buffer_load_dwordx4 v[2:5], v0, s[4:7], 0 offen
|
|
; SDAG-GFX942-NEXT: buffer_load_dwordx4 v[6:9], v0, s[4:7], 0 offen offset:16
|
|
; SDAG-GFX942-NEXT: buffer_load_dwordx4 v[10:13], v0, s[4:7], 0 offen offset:32
|
|
; SDAG-GFX942-NEXT: buffer_load_dwordx4 v[14:17], v0, s[4:7], 0 offen offset:48
|
|
; SDAG-GFX942-NEXT: buffer_load_dwordx4 v[18:21], v0, s[4:7], 0 offen offset:64
|
|
; SDAG-GFX942-NEXT: buffer_load_dwordx4 v[22:25], v0, s[4:7], 0 offen offset:80
|
|
; SDAG-GFX942-NEXT: buffer_load_dwordx4 v[26:29], v0, s[4:7], 0 offen offset:96
|
|
; SDAG-GFX942-NEXT: buffer_load_dwordx4 v[30:33], v0, s[4:7], 0 offen offset:112
|
|
; SDAG-GFX942-NEXT: buffer_load_dwordx4 v[34:37], v0, s[4:7], 0 offen offset:128
|
|
; SDAG-GFX942-NEXT: buffer_load_dwordx4 v[38:41], v0, s[4:7], 0 offen offset:144
|
|
; SDAG-GFX942-NEXT: buffer_load_dwordx4 v[42:45], v0, s[4:7], 0 offen offset:160
|
|
; SDAG-GFX942-NEXT: buffer_load_dwordx4 v[46:49], v0, s[4:7], 0 offen offset:176
|
|
; SDAG-GFX942-NEXT: buffer_load_dwordx4 v[50:53], v0, s[4:7], 0 offen offset:192
|
|
; SDAG-GFX942-NEXT: buffer_load_dwordx4 v[54:57], v0, s[4:7], 0 offen offset:208
|
|
; SDAG-GFX942-NEXT: buffer_load_dwordx4 v[58:61], v0, s[4:7], 0 offen offset:224
|
|
; SDAG-GFX942-NEXT: buffer_load_dwordx4 a[0:3], v0, s[4:7], 0 offen offset:240
|
|
; SDAG-GFX942-NEXT: s_add_i32 s1, s8, s16
|
|
; SDAG-GFX942-NEXT: s_addk_i32 s16, 0x100
|
|
; SDAG-GFX942-NEXT: v_mov_b32_e32 v0, s1
|
|
; SDAG-GFX942-NEXT: s_cmpk_lt_u32 s16, 0x100
|
|
; SDAG-GFX942-NEXT: s_waitcnt vmcnt(15)
|
|
; SDAG-GFX942-NEXT: buffer_store_dwordx4 v[2:5], v0, s[12:15], 0 offen
|
|
; SDAG-GFX942-NEXT: s_waitcnt vmcnt(15)
|
|
; SDAG-GFX942-NEXT: buffer_store_dwordx4 v[6:9], v0, s[12:15], 0 offen offset:16
|
|
; SDAG-GFX942-NEXT: s_waitcnt vmcnt(15)
|
|
; SDAG-GFX942-NEXT: buffer_store_dwordx4 v[10:13], v0, s[12:15], 0 offen offset:32
|
|
; SDAG-GFX942-NEXT: s_waitcnt vmcnt(15)
|
|
; SDAG-GFX942-NEXT: buffer_store_dwordx4 v[14:17], v0, s[12:15], 0 offen offset:48
|
|
; SDAG-GFX942-NEXT: s_waitcnt vmcnt(15)
|
|
; SDAG-GFX942-NEXT: buffer_store_dwordx4 v[18:21], v0, s[12:15], 0 offen offset:64
|
|
; SDAG-GFX942-NEXT: s_waitcnt vmcnt(15)
|
|
; SDAG-GFX942-NEXT: buffer_store_dwordx4 v[22:25], v0, s[12:15], 0 offen offset:80
|
|
; SDAG-GFX942-NEXT: s_waitcnt vmcnt(15)
|
|
; SDAG-GFX942-NEXT: buffer_store_dwordx4 v[26:29], v0, s[12:15], 0 offen offset:96
|
|
; SDAG-GFX942-NEXT: s_waitcnt vmcnt(15)
|
|
; SDAG-GFX942-NEXT: buffer_store_dwordx4 v[30:33], v0, s[12:15], 0 offen offset:112
|
|
; SDAG-GFX942-NEXT: s_waitcnt vmcnt(15)
|
|
; SDAG-GFX942-NEXT: buffer_store_dwordx4 v[34:37], v0, s[12:15], 0 offen offset:128
|
|
; SDAG-GFX942-NEXT: s_waitcnt vmcnt(15)
|
|
; SDAG-GFX942-NEXT: buffer_store_dwordx4 v[38:41], v0, s[12:15], 0 offen offset:144
|
|
; SDAG-GFX942-NEXT: s_waitcnt vmcnt(15)
|
|
; SDAG-GFX942-NEXT: buffer_store_dwordx4 v[42:45], v0, s[12:15], 0 offen offset:160
|
|
; SDAG-GFX942-NEXT: s_waitcnt vmcnt(15)
|
|
; SDAG-GFX942-NEXT: buffer_store_dwordx4 v[46:49], v0, s[12:15], 0 offen offset:176
|
|
; SDAG-GFX942-NEXT: s_waitcnt vmcnt(15)
|
|
; SDAG-GFX942-NEXT: buffer_store_dwordx4 v[50:53], v0, s[12:15], 0 offen offset:192
|
|
; SDAG-GFX942-NEXT: s_waitcnt vmcnt(15)
|
|
; SDAG-GFX942-NEXT: buffer_store_dwordx4 v[54:57], v0, s[12:15], 0 offen offset:208
|
|
; SDAG-GFX942-NEXT: s_waitcnt vmcnt(15)
|
|
; SDAG-GFX942-NEXT: buffer_store_dwordx4 v[58:61], v0, s[12:15], 0 offen offset:224
|
|
; SDAG-GFX942-NEXT: s_waitcnt vmcnt(15)
|
|
; SDAG-GFX942-NEXT: buffer_store_dwordx4 a[0:3], v0, s[12:15], 0 offen offset:240
|
|
; SDAG-GFX942-NEXT: s_cbranch_scc1 .LBB1_1
|
|
; SDAG-GFX942-NEXT: ; %bb.2: ; %static-memcpy-post-expansion
|
|
; SDAG-GFX942-NEXT: s_endpgm
|
|
;
|
|
; SDAG-GFX1100-LABEL: memcpy_known_medium:
|
|
; SDAG-GFX1100: ; %bb.0:
|
|
; SDAG-GFX1100-NEXT: s_clause 0x3
|
|
; SDAG-GFX1100-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
|
|
; SDAG-GFX1100-NEXT: s_load_b32 s17, s[4:5], 0x34
|
|
; SDAG-GFX1100-NEXT: s_load_b128 s[8:11], s[4:5], 0x44
|
|
; SDAG-GFX1100-NEXT: s_load_b32 s18, s[4:5], 0x54
|
|
; SDAG-GFX1100-NEXT: s_mov_b32 s16, 0
|
|
; SDAG-GFX1100-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
|
|
; SDAG-GFX1100-NEXT: s_mov_b32 s5, s16
|
|
; SDAG-GFX1100-NEXT: s_mov_b32 s13, s16
|
|
; SDAG-GFX1100-NEXT: s_mov_b32 s15, s16
|
|
; SDAG-GFX1100-NEXT: s_waitcnt lgkmcnt(0)
|
|
; SDAG-GFX1100-NEXT: s_mov_b32 s4, s3
|
|
; SDAG-GFX1100-NEXT: s_mov_b32 s12, s1
|
|
; SDAG-GFX1100-NEXT: s_or_b64 s[6:7], s[4:5], s[16:17]
|
|
; SDAG-GFX1100-NEXT: s_mov_b32 s17, s2
|
|
; SDAG-GFX1100-NEXT: s_mov_b32 s14, s11
|
|
; SDAG-GFX1100-NEXT: s_or_b64 s[4:5], s[12:13], s[16:17]
|
|
; SDAG-GFX1100-NEXT: s_mov_b32 s17, s18
|
|
; SDAG-GFX1100-NEXT: s_mov_b32 s2, s9
|
|
; SDAG-GFX1100-NEXT: s_or_b64 s[14:15], s[14:15], s[16:17]
|
|
; SDAG-GFX1100-NEXT: s_mov_b32 s17, s10
|
|
; SDAG-GFX1100-NEXT: s_mov_b32 s3, s16
|
|
; SDAG-GFX1100-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
|
|
; SDAG-GFX1100-NEXT: s_or_b64 s[12:13], s[2:3], s[16:17]
|
|
; SDAG-GFX1100-NEXT: .LBB1_1: ; %static-memcpy-expansion-main-body
|
|
; SDAG-GFX1100-NEXT: ; =>This Inner Loop Header: Depth=1
|
|
; SDAG-GFX1100-NEXT: s_add_i32 s1, s0, s16
|
|
; SDAG-GFX1100-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
|
|
; SDAG-GFX1100-NEXT: v_mov_b32_e32 v60, s1
|
|
; SDAG-GFX1100-NEXT: s_add_i32 s1, s8, s16
|
|
; SDAG-GFX1100-NEXT: s_addk_i32 s16, 0x100
|
|
; SDAG-GFX1100-NEXT: v_mov_b32_e32 v64, s1
|
|
; SDAG-GFX1100-NEXT: s_cmpk_lt_u32 s16, 0x100
|
|
; SDAG-GFX1100-NEXT: s_clause 0xf
|
|
; SDAG-GFX1100-NEXT: buffer_load_b128 v[0:3], v60, s[4:7], 0 offen
|
|
; SDAG-GFX1100-NEXT: buffer_load_b128 v[4:7], v60, s[4:7], 0 offen offset:16
|
|
; SDAG-GFX1100-NEXT: buffer_load_b128 v[8:11], v60, s[4:7], 0 offen offset:32
|
|
; SDAG-GFX1100-NEXT: buffer_load_b128 v[12:15], v60, s[4:7], 0 offen offset:48
|
|
; SDAG-GFX1100-NEXT: buffer_load_b128 v[16:19], v60, s[4:7], 0 offen offset:64
|
|
; SDAG-GFX1100-NEXT: buffer_load_b128 v[20:23], v60, s[4:7], 0 offen offset:80
|
|
; SDAG-GFX1100-NEXT: buffer_load_b128 v[24:27], v60, s[4:7], 0 offen offset:96
|
|
; SDAG-GFX1100-NEXT: buffer_load_b128 v[28:31], v60, s[4:7], 0 offen offset:112
|
|
; SDAG-GFX1100-NEXT: buffer_load_b128 v[32:35], v60, s[4:7], 0 offen offset:128
|
|
; SDAG-GFX1100-NEXT: buffer_load_b128 v[36:39], v60, s[4:7], 0 offen offset:144
|
|
; SDAG-GFX1100-NEXT: buffer_load_b128 v[40:43], v60, s[4:7], 0 offen offset:160
|
|
; SDAG-GFX1100-NEXT: buffer_load_b128 v[44:47], v60, s[4:7], 0 offen offset:176
|
|
; SDAG-GFX1100-NEXT: buffer_load_b128 v[48:51], v60, s[4:7], 0 offen offset:192
|
|
; SDAG-GFX1100-NEXT: buffer_load_b128 v[52:55], v60, s[4:7], 0 offen offset:208
|
|
; SDAG-GFX1100-NEXT: buffer_load_b128 v[56:59], v60, s[4:7], 0 offen offset:224
|
|
; SDAG-GFX1100-NEXT: buffer_load_b128 v[60:63], v60, s[4:7], 0 offen offset:240
|
|
; SDAG-GFX1100-NEXT: s_waitcnt vmcnt(15)
|
|
; SDAG-GFX1100-NEXT: buffer_store_b128 v[0:3], v64, s[12:15], 0 offen
|
|
; SDAG-GFX1100-NEXT: s_waitcnt vmcnt(14)
|
|
; SDAG-GFX1100-NEXT: buffer_store_b128 v[4:7], v64, s[12:15], 0 offen offset:16
|
|
; SDAG-GFX1100-NEXT: s_waitcnt vmcnt(13)
|
|
; SDAG-GFX1100-NEXT: buffer_store_b128 v[8:11], v64, s[12:15], 0 offen offset:32
|
|
; SDAG-GFX1100-NEXT: s_waitcnt vmcnt(12)
|
|
; SDAG-GFX1100-NEXT: buffer_store_b128 v[12:15], v64, s[12:15], 0 offen offset:48
|
|
; SDAG-GFX1100-NEXT: s_waitcnt vmcnt(11)
|
|
; SDAG-GFX1100-NEXT: buffer_store_b128 v[16:19], v64, s[12:15], 0 offen offset:64
|
|
; SDAG-GFX1100-NEXT: s_waitcnt vmcnt(10)
|
|
; SDAG-GFX1100-NEXT: buffer_store_b128 v[20:23], v64, s[12:15], 0 offen offset:80
|
|
; SDAG-GFX1100-NEXT: s_waitcnt vmcnt(9)
|
|
; SDAG-GFX1100-NEXT: buffer_store_b128 v[24:27], v64, s[12:15], 0 offen offset:96
|
|
; SDAG-GFX1100-NEXT: s_waitcnt vmcnt(8)
|
|
; SDAG-GFX1100-NEXT: buffer_store_b128 v[28:31], v64, s[12:15], 0 offen offset:112
|
|
; SDAG-GFX1100-NEXT: s_waitcnt vmcnt(7)
|
|
; SDAG-GFX1100-NEXT: buffer_store_b128 v[32:35], v64, s[12:15], 0 offen offset:128
|
|
; SDAG-GFX1100-NEXT: s_waitcnt vmcnt(6)
|
|
; SDAG-GFX1100-NEXT: buffer_store_b128 v[36:39], v64, s[12:15], 0 offen offset:144
|
|
; SDAG-GFX1100-NEXT: s_waitcnt vmcnt(5)
|
|
; SDAG-GFX1100-NEXT: buffer_store_b128 v[40:43], v64, s[12:15], 0 offen offset:160
|
|
; SDAG-GFX1100-NEXT: s_waitcnt vmcnt(4)
|
|
; SDAG-GFX1100-NEXT: buffer_store_b128 v[44:47], v64, s[12:15], 0 offen offset:176
|
|
; SDAG-GFX1100-NEXT: s_waitcnt vmcnt(3)
|
|
; SDAG-GFX1100-NEXT: buffer_store_b128 v[48:51], v64, s[12:15], 0 offen offset:192
|
|
; SDAG-GFX1100-NEXT: s_waitcnt vmcnt(2)
|
|
; SDAG-GFX1100-NEXT: buffer_store_b128 v[52:55], v64, s[12:15], 0 offen offset:208
|
|
; SDAG-GFX1100-NEXT: s_waitcnt vmcnt(1)
|
|
; SDAG-GFX1100-NEXT: buffer_store_b128 v[56:59], v64, s[12:15], 0 offen offset:224
|
|
; SDAG-GFX1100-NEXT: s_waitcnt vmcnt(0)
|
|
; SDAG-GFX1100-NEXT: buffer_store_b128 v[60:63], v64, s[12:15], 0 offen offset:240
|
|
; SDAG-GFX1100-NEXT: s_cbranch_scc1 .LBB1_1
|
|
; SDAG-GFX1100-NEXT: ; %bb.2: ; %static-memcpy-post-expansion
|
|
; SDAG-GFX1100-NEXT: s_endpgm
|
|
;
|
|
; GISEL-GFX942-LABEL: memcpy_known_medium:
|
|
; GISEL-GFX942: ; %bb.0:
|
|
; GISEL-GFX942-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
|
|
; GISEL-GFX942-NEXT: s_load_dword s7, s[4:5], 0x54
|
|
; GISEL-GFX942-NEXT: s_load_dword s11, s[4:5], 0x34
|
|
; GISEL-GFX942-NEXT: s_load_dwordx4 s[12:15], s[4:5], 0x44
|
|
; GISEL-GFX942-NEXT: s_mov_b32 s16, 0
|
|
; GISEL-GFX942-NEXT: v_mov_b32_e32 v0, 0x100
|
|
; GISEL-GFX942-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GISEL-GFX942-NEXT: s_mov_b32 s8, s1
|
|
; GISEL-GFX942-NEXT: s_mov_b32 s9, s2
|
|
; GISEL-GFX942-NEXT: s_mov_b32 s10, s3
|
|
; GISEL-GFX942-NEXT: s_mov_b32 s4, s13
|
|
; GISEL-GFX942-NEXT: s_mov_b32 s5, s14
|
|
; GISEL-GFX942-NEXT: s_mov_b32 s6, s15
|
|
; GISEL-GFX942-NEXT: v_mov_b32_e32 v1, s16
|
|
; GISEL-GFX942-NEXT: .LBB1_1: ; %static-memcpy-expansion-main-body
|
|
; GISEL-GFX942-NEXT: ; =>This Inner Loop Header: Depth=1
|
|
; GISEL-GFX942-NEXT: v_add_u32_e32 v62, s0, v1
|
|
; GISEL-GFX942-NEXT: buffer_load_dwordx4 v[2:5], v62, s[8:11], 0 offen
|
|
; GISEL-GFX942-NEXT: buffer_load_dwordx4 v[6:9], v62, s[8:11], 0 offen offset:16
|
|
; GISEL-GFX942-NEXT: buffer_load_dwordx4 v[10:13], v62, s[8:11], 0 offen offset:32
|
|
; GISEL-GFX942-NEXT: buffer_load_dwordx4 v[14:17], v62, s[8:11], 0 offen offset:48
|
|
; GISEL-GFX942-NEXT: buffer_load_dwordx4 v[18:21], v62, s[8:11], 0 offen offset:64
|
|
; GISEL-GFX942-NEXT: buffer_load_dwordx4 v[22:25], v62, s[8:11], 0 offen offset:80
|
|
; GISEL-GFX942-NEXT: buffer_load_dwordx4 v[26:29], v62, s[8:11], 0 offen offset:96
|
|
; GISEL-GFX942-NEXT: buffer_load_dwordx4 v[30:33], v62, s[8:11], 0 offen offset:112
|
|
; GISEL-GFX942-NEXT: buffer_load_dwordx4 v[34:37], v62, s[8:11], 0 offen offset:128
|
|
; GISEL-GFX942-NEXT: buffer_load_dwordx4 v[38:41], v62, s[8:11], 0 offen offset:144
|
|
; GISEL-GFX942-NEXT: buffer_load_dwordx4 v[42:45], v62, s[8:11], 0 offen offset:160
|
|
; GISEL-GFX942-NEXT: buffer_load_dwordx4 v[46:49], v62, s[8:11], 0 offen offset:176
|
|
; GISEL-GFX942-NEXT: buffer_load_dwordx4 v[50:53], v62, s[8:11], 0 offen offset:192
|
|
; GISEL-GFX942-NEXT: buffer_load_dwordx4 v[54:57], v62, s[8:11], 0 offen offset:208
|
|
; GISEL-GFX942-NEXT: buffer_load_dwordx4 v[58:61], v62, s[8:11], 0 offen offset:224
|
|
; GISEL-GFX942-NEXT: buffer_load_dwordx4 a[0:3], v62, s[8:11], 0 offen offset:240
|
|
; GISEL-GFX942-NEXT: v_add_u32_e32 v63, s12, v1
|
|
; GISEL-GFX942-NEXT: v_add_u32_e32 v1, 0x100, v1
|
|
; GISEL-GFX942-NEXT: v_cmp_lt_u32_e32 vcc, v1, v0
|
|
; GISEL-GFX942-NEXT: s_waitcnt vmcnt(15)
|
|
; GISEL-GFX942-NEXT: buffer_store_dwordx4 v[2:5], v63, s[4:7], 0 offen
|
|
; GISEL-GFX942-NEXT: s_waitcnt vmcnt(15)
|
|
; GISEL-GFX942-NEXT: buffer_store_dwordx4 v[6:9], v63, s[4:7], 0 offen offset:16
|
|
; GISEL-GFX942-NEXT: s_waitcnt vmcnt(15)
|
|
; GISEL-GFX942-NEXT: buffer_store_dwordx4 v[10:13], v63, s[4:7], 0 offen offset:32
|
|
; GISEL-GFX942-NEXT: s_waitcnt vmcnt(15)
|
|
; GISEL-GFX942-NEXT: buffer_store_dwordx4 v[14:17], v63, s[4:7], 0 offen offset:48
|
|
; GISEL-GFX942-NEXT: s_waitcnt vmcnt(15)
|
|
; GISEL-GFX942-NEXT: buffer_store_dwordx4 v[18:21], v63, s[4:7], 0 offen offset:64
|
|
; GISEL-GFX942-NEXT: s_waitcnt vmcnt(15)
|
|
; GISEL-GFX942-NEXT: buffer_store_dwordx4 v[22:25], v63, s[4:7], 0 offen offset:80
|
|
; GISEL-GFX942-NEXT: s_waitcnt vmcnt(15)
|
|
; GISEL-GFX942-NEXT: buffer_store_dwordx4 v[26:29], v63, s[4:7], 0 offen offset:96
|
|
; GISEL-GFX942-NEXT: s_waitcnt vmcnt(15)
|
|
; GISEL-GFX942-NEXT: buffer_store_dwordx4 v[30:33], v63, s[4:7], 0 offen offset:112
|
|
; GISEL-GFX942-NEXT: s_waitcnt vmcnt(15)
|
|
; GISEL-GFX942-NEXT: buffer_store_dwordx4 v[34:37], v63, s[4:7], 0 offen offset:128
|
|
; GISEL-GFX942-NEXT: s_waitcnt vmcnt(15)
|
|
; GISEL-GFX942-NEXT: buffer_store_dwordx4 v[38:41], v63, s[4:7], 0 offen offset:144
|
|
; GISEL-GFX942-NEXT: s_waitcnt vmcnt(15)
|
|
; GISEL-GFX942-NEXT: buffer_store_dwordx4 v[42:45], v63, s[4:7], 0 offen offset:160
|
|
; GISEL-GFX942-NEXT: s_waitcnt vmcnt(15)
|
|
; GISEL-GFX942-NEXT: buffer_store_dwordx4 v[46:49], v63, s[4:7], 0 offen offset:176
|
|
; GISEL-GFX942-NEXT: s_waitcnt vmcnt(15)
|
|
; GISEL-GFX942-NEXT: buffer_store_dwordx4 v[50:53], v63, s[4:7], 0 offen offset:192
|
|
; GISEL-GFX942-NEXT: s_waitcnt vmcnt(15)
|
|
; GISEL-GFX942-NEXT: buffer_store_dwordx4 v[54:57], v63, s[4:7], 0 offen offset:208
|
|
; GISEL-GFX942-NEXT: s_waitcnt vmcnt(15)
|
|
; GISEL-GFX942-NEXT: buffer_store_dwordx4 v[58:61], v63, s[4:7], 0 offen offset:224
|
|
; GISEL-GFX942-NEXT: s_waitcnt vmcnt(15)
|
|
; GISEL-GFX942-NEXT: scratch_store_dwordx4 off, a[0:3], off ; 16-byte Folded Spill
|
|
; GISEL-GFX942-NEXT: scratch_load_dwordx4 v[2:5], off, off ; 16-byte Folded Reload
|
|
; GISEL-GFX942-NEXT: s_waitcnt vmcnt(0)
|
|
; GISEL-GFX942-NEXT: buffer_store_dwordx4 v[2:5], v63, s[4:7], 0 offen offset:240
|
|
; GISEL-GFX942-NEXT: s_cbranch_vccnz .LBB1_1
|
|
; GISEL-GFX942-NEXT: ; %bb.2: ; %static-memcpy-post-expansion
|
|
; GISEL-GFX942-NEXT: s_endpgm
|
|
;
|
|
; GISEL-GFX1100-LABEL: memcpy_known_medium:
|
|
; GISEL-GFX1100: ; %bb.0:
|
|
; GISEL-GFX1100-NEXT: s_clause 0x3
|
|
; GISEL-GFX1100-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
|
|
; GISEL-GFX1100-NEXT: s_load_b128 s[8:11], s[4:5], 0x44
|
|
; GISEL-GFX1100-NEXT: s_load_b32 s7, s[4:5], 0x34
|
|
; GISEL-GFX1100-NEXT: s_load_b32 s15, s[4:5], 0x54
|
|
; GISEL-GFX1100-NEXT: s_mov_b32 s4, 0
|
|
; GISEL-GFX1100-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
|
|
; GISEL-GFX1100-NEXT: v_mov_b32_e32 v0, s4
|
|
; GISEL-GFX1100-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GISEL-GFX1100-NEXT: s_mov_b32 s4, s1
|
|
; GISEL-GFX1100-NEXT: s_mov_b32 s5, s2
|
|
; GISEL-GFX1100-NEXT: s_mov_b32 s6, s3
|
|
; GISEL-GFX1100-NEXT: s_mov_b32 s12, s9
|
|
; GISEL-GFX1100-NEXT: s_mov_b32 s13, s10
|
|
; GISEL-GFX1100-NEXT: s_mov_b32 s14, s11
|
|
; GISEL-GFX1100-NEXT: .LBB1_1: ; %static-memcpy-expansion-main-body
|
|
; GISEL-GFX1100-NEXT: ; =>This Inner Loop Header: Depth=1
|
|
; GISEL-GFX1100-NEXT: v_add_nc_u32_e32 v61, s0, v0
|
|
; GISEL-GFX1100-NEXT: v_add_nc_u32_e32 v65, s8, v0
|
|
; GISEL-GFX1100-NEXT: v_add_nc_u32_e32 v0, 0x100, v0
|
|
; GISEL-GFX1100-NEXT: s_clause 0xf
|
|
; GISEL-GFX1100-NEXT: buffer_load_b128 v[1:4], v61, s[4:7], 0 offen
|
|
; GISEL-GFX1100-NEXT: buffer_load_b128 v[5:8], v61, s[4:7], 0 offen offset:16
|
|
; GISEL-GFX1100-NEXT: buffer_load_b128 v[9:12], v61, s[4:7], 0 offen offset:32
|
|
; GISEL-GFX1100-NEXT: buffer_load_b128 v[13:16], v61, s[4:7], 0 offen offset:48
|
|
; GISEL-GFX1100-NEXT: buffer_load_b128 v[17:20], v61, s[4:7], 0 offen offset:64
|
|
; GISEL-GFX1100-NEXT: buffer_load_b128 v[21:24], v61, s[4:7], 0 offen offset:80
|
|
; GISEL-GFX1100-NEXT: buffer_load_b128 v[25:28], v61, s[4:7], 0 offen offset:96
|
|
; GISEL-GFX1100-NEXT: buffer_load_b128 v[29:32], v61, s[4:7], 0 offen offset:112
|
|
; GISEL-GFX1100-NEXT: buffer_load_b128 v[33:36], v61, s[4:7], 0 offen offset:128
|
|
; GISEL-GFX1100-NEXT: buffer_load_b128 v[37:40], v61, s[4:7], 0 offen offset:144
|
|
; GISEL-GFX1100-NEXT: buffer_load_b128 v[41:44], v61, s[4:7], 0 offen offset:160
|
|
; GISEL-GFX1100-NEXT: buffer_load_b128 v[45:48], v61, s[4:7], 0 offen offset:176
|
|
; GISEL-GFX1100-NEXT: buffer_load_b128 v[49:52], v61, s[4:7], 0 offen offset:192
|
|
; GISEL-GFX1100-NEXT: buffer_load_b128 v[53:56], v61, s[4:7], 0 offen offset:208
|
|
; GISEL-GFX1100-NEXT: buffer_load_b128 v[57:60], v61, s[4:7], 0 offen offset:224
|
|
; GISEL-GFX1100-NEXT: buffer_load_b128 v[61:64], v61, s[4:7], 0 offen offset:240
|
|
; GISEL-GFX1100-NEXT: s_waitcnt vmcnt(15)
|
|
; GISEL-GFX1100-NEXT: buffer_store_b128 v[1:4], v65, s[12:15], 0 offen
|
|
; GISEL-GFX1100-NEXT: s_waitcnt vmcnt(14)
|
|
; GISEL-GFX1100-NEXT: buffer_store_b128 v[5:8], v65, s[12:15], 0 offen offset:16
|
|
; GISEL-GFX1100-NEXT: s_waitcnt vmcnt(13)
|
|
; GISEL-GFX1100-NEXT: buffer_store_b128 v[9:12], v65, s[12:15], 0 offen offset:32
|
|
; GISEL-GFX1100-NEXT: s_waitcnt vmcnt(12)
|
|
; GISEL-GFX1100-NEXT: buffer_store_b128 v[13:16], v65, s[12:15], 0 offen offset:48
|
|
; GISEL-GFX1100-NEXT: s_waitcnt vmcnt(11)
|
|
; GISEL-GFX1100-NEXT: buffer_store_b128 v[17:20], v65, s[12:15], 0 offen offset:64
|
|
; GISEL-GFX1100-NEXT: s_waitcnt vmcnt(10)
|
|
; GISEL-GFX1100-NEXT: buffer_store_b128 v[21:24], v65, s[12:15], 0 offen offset:80
|
|
; GISEL-GFX1100-NEXT: s_waitcnt vmcnt(9)
|
|
; GISEL-GFX1100-NEXT: buffer_store_b128 v[25:28], v65, s[12:15], 0 offen offset:96
|
|
; GISEL-GFX1100-NEXT: s_waitcnt vmcnt(8)
|
|
; GISEL-GFX1100-NEXT: buffer_store_b128 v[29:32], v65, s[12:15], 0 offen offset:112
|
|
; GISEL-GFX1100-NEXT: s_waitcnt vmcnt(7)
|
|
; GISEL-GFX1100-NEXT: buffer_store_b128 v[33:36], v65, s[12:15], 0 offen offset:128
|
|
; GISEL-GFX1100-NEXT: s_waitcnt vmcnt(6)
|
|
; GISEL-GFX1100-NEXT: buffer_store_b128 v[37:40], v65, s[12:15], 0 offen offset:144
|
|
; GISEL-GFX1100-NEXT: s_waitcnt vmcnt(5)
|
|
; GISEL-GFX1100-NEXT: buffer_store_b128 v[41:44], v65, s[12:15], 0 offen offset:160
|
|
; GISEL-GFX1100-NEXT: s_waitcnt vmcnt(4)
|
|
; GISEL-GFX1100-NEXT: buffer_store_b128 v[45:48], v65, s[12:15], 0 offen offset:176
|
|
; GISEL-GFX1100-NEXT: s_waitcnt vmcnt(3)
|
|
; GISEL-GFX1100-NEXT: buffer_store_b128 v[49:52], v65, s[12:15], 0 offen offset:192
|
|
; GISEL-GFX1100-NEXT: s_waitcnt vmcnt(2)
|
|
; GISEL-GFX1100-NEXT: buffer_store_b128 v[53:56], v65, s[12:15], 0 offen offset:208
|
|
; GISEL-GFX1100-NEXT: s_waitcnt vmcnt(1)
|
|
; GISEL-GFX1100-NEXT: buffer_store_b128 v[57:60], v65, s[12:15], 0 offen offset:224
|
|
; GISEL-GFX1100-NEXT: s_waitcnt vmcnt(0)
|
|
; GISEL-GFX1100-NEXT: buffer_store_b128 v[61:64], v65, s[12:15], 0 offen offset:240
|
|
; GISEL-GFX1100-NEXT: v_cmp_gt_u32_e32 vcc_lo, 0x100, v0
|
|
; GISEL-GFX1100-NEXT: s_cbranch_vccnz .LBB1_1
|
|
; GISEL-GFX1100-NEXT: ; %bb.2: ; %static-memcpy-post-expansion
|
|
; GISEL-GFX1100-NEXT: s_endpgm
|
|
call void @llvm.memcpy.p7.p7.i32(ptr addrspace(7) noundef nonnull align 16 %dst, ptr addrspace(7) noundef nonnull align 16 %src, i32 256, i1 false)
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_kernel void @memcpy_known_small(ptr addrspace(7) %src, ptr addrspace(7) %dst) {
|
|
; SDAG-LABEL: memcpy_known_small:
|
|
; SDAG: ; %bb.0:
|
|
; SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; SDAG-NEXT: v_mov_b32_e32 v4, s20
|
|
; SDAG-NEXT: buffer_load_dwordx4 v[0:3], v4, s[16:19], 0 offen
|
|
; SDAG-NEXT: s_mov_b32 s7, s24
|
|
; SDAG-NEXT: s_mov_b32 s6, s23
|
|
; SDAG-NEXT: s_mov_b32 s5, s22
|
|
; SDAG-NEXT: s_mov_b32 s4, s21
|
|
; SDAG-NEXT: v_mov_b32_e32 v5, s25
|
|
; SDAG-NEXT: s_waitcnt vmcnt(0)
|
|
; SDAG-NEXT: buffer_store_dwordx4 v[0:3], v5, s[4:7], 0 offen
|
|
; SDAG-NEXT: buffer_load_dwordx4 v[0:3], v4, s[16:19], 0 offen offset:16
|
|
; SDAG-NEXT: s_waitcnt vmcnt(0)
|
|
; SDAG-NEXT: buffer_store_dwordx4 v[0:3], v5, s[4:7], 0 offen offset:16
|
|
; SDAG-NEXT: s_waitcnt vmcnt(0)
|
|
; SDAG-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; GISEL-LABEL: memcpy_known_small:
|
|
; GISEL: ; %bb.0:
|
|
; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GISEL-NEXT: v_mov_b32_e32 v4, s20
|
|
; GISEL-NEXT: buffer_load_dwordx4 v[0:3], v4, s[16:19], 0 offen
|
|
; GISEL-NEXT: s_mov_b32 s4, s21
|
|
; GISEL-NEXT: s_mov_b32 s5, s22
|
|
; GISEL-NEXT: s_mov_b32 s6, s23
|
|
; GISEL-NEXT: s_mov_b32 s7, s24
|
|
; GISEL-NEXT: v_mov_b32_e32 v5, s25
|
|
; GISEL-NEXT: s_waitcnt vmcnt(0)
|
|
; GISEL-NEXT: buffer_store_dwordx4 v[0:3], v5, s[4:7], 0 offen
|
|
; GISEL-NEXT: buffer_load_dwordx4 v[0:3], v4, s[16:19], 0 offen offset:16
|
|
; GISEL-NEXT: s_waitcnt vmcnt(0)
|
|
; GISEL-NEXT: buffer_store_dwordx4 v[0:3], v5, s[4:7], 0 offen offset:16
|
|
; GISEL-NEXT: s_waitcnt vmcnt(0)
|
|
; GISEL-NEXT: s_setpc_b64 s[30:31]
|
|
; SDAG-GFX942-LABEL: memcpy_known_small:
|
|
; SDAG-GFX942: ; %bb.0:
|
|
; SDAG-GFX942-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
|
|
; SDAG-GFX942-NEXT: s_load_dword s13, s[4:5], 0x34
|
|
; SDAG-GFX942-NEXT: s_mov_b32 s12, 0
|
|
; SDAG-GFX942-NEXT: s_mov_b32 s7, s12
|
|
; SDAG-GFX942-NEXT: s_waitcnt lgkmcnt(0)
|
|
; SDAG-GFX942-NEXT: s_mov_b32 s6, s3
|
|
; SDAG-GFX942-NEXT: s_or_b64 s[10:11], s[6:7], s[12:13]
|
|
; SDAG-GFX942-NEXT: s_mov_b32 s13, s2
|
|
; SDAG-GFX942-NEXT: s_mov_b32 s2, s1
|
|
; SDAG-GFX942-NEXT: s_mov_b32 s3, s12
|
|
; SDAG-GFX942-NEXT: s_or_b64 s[8:9], s[2:3], s[12:13]
|
|
; SDAG-GFX942-NEXT: v_mov_b32_e32 v0, s0
|
|
; SDAG-GFX942-NEXT: buffer_load_dwordx4 v[2:5], v0, s[8:11], 0 offen
|
|
; SDAG-GFX942-NEXT: s_load_dword s13, s[4:5], 0x54
|
|
; SDAG-GFX942-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x44
|
|
; SDAG-GFX942-NEXT: s_mov_b32 s5, s12
|
|
; SDAG-GFX942-NEXT: s_waitcnt lgkmcnt(0)
|
|
; SDAG-GFX942-NEXT: s_mov_b32 s4, s3
|
|
; SDAG-GFX942-NEXT: s_or_b64 s[6:7], s[4:5], s[12:13]
|
|
; SDAG-GFX942-NEXT: s_mov_b32 s13, s2
|
|
; SDAG-GFX942-NEXT: s_mov_b32 s2, s1
|
|
; SDAG-GFX942-NEXT: s_mov_b32 s3, s12
|
|
; SDAG-GFX942-NEXT: s_or_b64 s[4:5], s[2:3], s[12:13]
|
|
; SDAG-GFX942-NEXT: v_mov_b32_e32 v1, s0
|
|
; SDAG-GFX942-NEXT: s_waitcnt vmcnt(0)
|
|
; SDAG-GFX942-NEXT: buffer_store_dwordx4 v[2:5], v1, s[4:7], 0 offen
|
|
; SDAG-GFX942-NEXT: buffer_load_dwordx4 v[2:5], v0, s[8:11], 0 offen offset:16
|
|
; SDAG-GFX942-NEXT: s_waitcnt vmcnt(0)
|
|
; SDAG-GFX942-NEXT: buffer_store_dwordx4 v[2:5], v1, s[4:7], 0 offen offset:16
|
|
; SDAG-GFX942-NEXT: s_endpgm
|
|
;
|
|
; SDAG-GFX1100-LABEL: memcpy_known_small:
|
|
; SDAG-GFX1100: ; %bb.0:
|
|
; SDAG-GFX1100-NEXT: s_clause 0x1
|
|
; SDAG-GFX1100-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
|
|
; SDAG-GFX1100-NEXT: s_load_b32 s13, s[4:5], 0x34
|
|
; SDAG-GFX1100-NEXT: s_mov_b32 s12, 0
|
|
; SDAG-GFX1100-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
|
|
; SDAG-GFX1100-NEXT: s_mov_b32 s7, s12
|
|
; SDAG-GFX1100-NEXT: s_mov_b32 s9, s12
|
|
; SDAG-GFX1100-NEXT: s_waitcnt lgkmcnt(0)
|
|
; SDAG-GFX1100-NEXT: s_mov_b32 s6, s3
|
|
; SDAG-GFX1100-NEXT: s_mov_b32 s8, s1
|
|
; SDAG-GFX1100-NEXT: s_or_b64 s[10:11], s[6:7], s[12:13]
|
|
; SDAG-GFX1100-NEXT: s_mov_b32 s13, s2
|
|
; SDAG-GFX1100-NEXT: v_mov_b32_e32 v4, s0
|
|
; SDAG-GFX1100-NEXT: s_or_b64 s[8:9], s[8:9], s[12:13]
|
|
; SDAG-GFX1100-NEXT: s_clause 0x1
|
|
; SDAG-GFX1100-NEXT: s_load_b32 s13, s[4:5], 0x54
|
|
; SDAG-GFX1100-NEXT: s_load_b128 s[0:3], s[4:5], 0x44
|
|
; SDAG-GFX1100-NEXT: s_mov_b32 s5, s12
|
|
; SDAG-GFX1100-NEXT: s_waitcnt lgkmcnt(0)
|
|
; SDAG-GFX1100-NEXT: v_mov_b32_e32 v5, s0
|
|
; SDAG-GFX1100-NEXT: buffer_load_b128 v[0:3], v4, s[8:11], 0 offen
|
|
; SDAG-GFX1100-NEXT: s_mov_b32 s4, s3
|
|
; SDAG-GFX1100-NEXT: s_mov_b32 s3, s12
|
|
; SDAG-GFX1100-NEXT: s_or_b64 s[6:7], s[4:5], s[12:13]
|
|
; SDAG-GFX1100-NEXT: s_mov_b32 s13, s2
|
|
; SDAG-GFX1100-NEXT: s_mov_b32 s2, s1
|
|
; SDAG-GFX1100-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
|
|
; SDAG-GFX1100-NEXT: s_or_b64 s[4:5], s[2:3], s[12:13]
|
|
; SDAG-GFX1100-NEXT: s_waitcnt vmcnt(0)
|
|
; SDAG-GFX1100-NEXT: buffer_store_b128 v[0:3], v5, s[4:7], 0 offen
|
|
; SDAG-GFX1100-NEXT: buffer_load_b128 v[0:3], v4, s[8:11], 0 offen offset:16
|
|
; SDAG-GFX1100-NEXT: s_waitcnt vmcnt(0)
|
|
; SDAG-GFX1100-NEXT: buffer_store_b128 v[0:3], v5, s[4:7], 0 offen offset:16
|
|
; SDAG-GFX1100-NEXT: s_endpgm
|
|
;
|
|
; GISEL-GFX942-LABEL: memcpy_known_small:
|
|
; GISEL-GFX942: ; %bb.0:
|
|
; GISEL-GFX942-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
|
|
; GISEL-GFX942-NEXT: s_load_dword s11, s[4:5], 0x34
|
|
; GISEL-GFX942-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GISEL-GFX942-NEXT: s_mov_b32 s8, s1
|
|
; GISEL-GFX942-NEXT: s_mov_b32 s9, s2
|
|
; GISEL-GFX942-NEXT: s_mov_b32 s10, s3
|
|
; GISEL-GFX942-NEXT: v_mov_b32_e32 v4, s0
|
|
; GISEL-GFX942-NEXT: buffer_load_dwordx4 v[0:3], v4, s[8:11], 0 offen
|
|
; GISEL-GFX942-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x44
|
|
; GISEL-GFX942-NEXT: s_load_dword s7, s[4:5], 0x54
|
|
; GISEL-GFX942-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GISEL-GFX942-NEXT: s_mov_b32 s4, s1
|
|
; GISEL-GFX942-NEXT: s_mov_b32 s5, s2
|
|
; GISEL-GFX942-NEXT: s_mov_b32 s6, s3
|
|
; GISEL-GFX942-NEXT: v_mov_b32_e32 v5, s0
|
|
; GISEL-GFX942-NEXT: s_waitcnt vmcnt(0)
|
|
; GISEL-GFX942-NEXT: buffer_store_dwordx4 v[0:3], v5, s[4:7], 0 offen
|
|
; GISEL-GFX942-NEXT: buffer_load_dwordx4 v[0:3], v4, s[8:11], 0 offen offset:16
|
|
; GISEL-GFX942-NEXT: s_waitcnt vmcnt(0)
|
|
; GISEL-GFX942-NEXT: buffer_store_dwordx4 v[0:3], v5, s[4:7], 0 offen offset:16
|
|
; GISEL-GFX942-NEXT: s_endpgm
|
|
;
|
|
; GISEL-GFX1100-LABEL: memcpy_known_small:
|
|
; GISEL-GFX1100: ; %bb.0:
|
|
; GISEL-GFX1100-NEXT: s_clause 0x1
|
|
; GISEL-GFX1100-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
|
|
; GISEL-GFX1100-NEXT: s_load_b32 s11, s[4:5], 0x34
|
|
; GISEL-GFX1100-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GISEL-GFX1100-NEXT: v_mov_b32_e32 v4, s0
|
|
; GISEL-GFX1100-NEXT: s_mov_b32 s8, s1
|
|
; GISEL-GFX1100-NEXT: s_mov_b32 s9, s2
|
|
; GISEL-GFX1100-NEXT: s_mov_b32 s10, s3
|
|
; GISEL-GFX1100-NEXT: s_clause 0x1
|
|
; GISEL-GFX1100-NEXT: s_load_b128 s[0:3], s[4:5], 0x44
|
|
; GISEL-GFX1100-NEXT: s_load_b32 s7, s[4:5], 0x54
|
|
; GISEL-GFX1100-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GISEL-GFX1100-NEXT: v_mov_b32_e32 v5, s0
|
|
; GISEL-GFX1100-NEXT: buffer_load_b128 v[0:3], v4, s[8:11], 0 offen
|
|
; GISEL-GFX1100-NEXT: s_mov_b32 s4, s1
|
|
; GISEL-GFX1100-NEXT: s_mov_b32 s5, s2
|
|
; GISEL-GFX1100-NEXT: s_mov_b32 s6, s3
|
|
; GISEL-GFX1100-NEXT: s_waitcnt vmcnt(0)
|
|
; GISEL-GFX1100-NEXT: buffer_store_b128 v[0:3], v5, s[4:7], 0 offen
|
|
; GISEL-GFX1100-NEXT: buffer_load_b128 v[0:3], v4, s[8:11], 0 offen offset:16
|
|
; GISEL-GFX1100-NEXT: s_waitcnt vmcnt(0)
|
|
; GISEL-GFX1100-NEXT: buffer_store_b128 v[0:3], v5, s[4:7], 0 offen offset:16
|
|
; GISEL-GFX1100-NEXT: s_endpgm
|
|
call void @llvm.memcpy.p7.p7.i32(ptr addrspace(7) %dst, ptr addrspace(7) %src, i32 32, i1 false)
|
|
ret void
|
|
}
|