llvm-project/llvm/test/CodeGen/AMDGPU/load-constant-i1.ll
Simon Pilgrim d561259a08
[DAG] visitFREEZE - replace multiple frozen/unfrozen uses of an SDValue with just the frozen node (#150017)
Similar to InstCombinerImpl::freezeOtherUses, attempt to ensure that we
merge multiple frozen/unfrozen uses of a SDValue. This fixes a number of
hasOneUse() problems when trying to push FREEZE nodes through the DAG.

Remove SimplifyMultipleUseDemandedBits handling of FREEZE nodes as we
now want to keep the common node, and not bypass for some nodes just
because of DemandedElts.

Fixes #149799
2025-08-05 09:24:09 +01:00

9713 lines
405 KiB
LLVM

; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
; RUN: llc -mtriple=amdgcn-- < %s | FileCheck -check-prefix=GFX6 %s
; RUN: llc -mtriple=amdgcn-- -mcpu=tonga < %s | FileCheck -check-prefix=GFX8 %s
; RUN: llc -mtriple=r600-- -mcpu=cypress < %s | FileCheck -check-prefix=EG %s
; RUN: llc -mtriple=amdgcn -mcpu=gfx1200 < %s | FileCheck -check-prefix=GFX12 %s
define amdgpu_kernel void @constant_load_i1(ptr addrspace(1) %out, ptr addrspace(4) nocapture %in) #0 {
; GFX6-LABEL: constant_load_i1:
; GFX6: ; %bb.0:
; GFX6-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9
; GFX6-NEXT: s_mov_b32 s7, 0xf000
; GFX6-NEXT: s_mov_b32 s6, -1
; GFX6-NEXT: s_mov_b32 s10, s6
; GFX6-NEXT: s_mov_b32 s11, s7
; GFX6-NEXT: s_waitcnt lgkmcnt(0)
; GFX6-NEXT: s_mov_b32 s8, s2
; GFX6-NEXT: s_mov_b32 s9, s3
; GFX6-NEXT: buffer_load_ubyte v0, off, s[8:11], 0
; GFX6-NEXT: s_mov_b32 s4, s0
; GFX6-NEXT: s_mov_b32 s5, s1
; GFX6-NEXT: s_waitcnt vmcnt(0)
; GFX6-NEXT: v_and_b32_e32 v0, 1, v0
; GFX6-NEXT: buffer_store_byte v0, off, s[4:7], 0
; GFX6-NEXT: s_endpgm
;
; GFX8-LABEL: constant_load_i1:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
; GFX8-NEXT: s_waitcnt lgkmcnt(0)
; GFX8-NEXT: v_mov_b32_e32 v0, s2
; GFX8-NEXT: v_mov_b32_e32 v1, s3
; GFX8-NEXT: flat_load_ubyte v2, v[0:1]
; GFX8-NEXT: v_mov_b32_e32 v0, s0
; GFX8-NEXT: v_mov_b32_e32 v1, s1
; GFX8-NEXT: s_waitcnt vmcnt(0)
; GFX8-NEXT: v_and_b32_e32 v2, 1, v2
; GFX8-NEXT: flat_store_byte v[0:1], v2
; GFX8-NEXT: s_endpgm
;
; EG-LABEL: constant_load_i1:
; EG: ; %bb.0:
; EG-NEXT: ALU 0, @8, KC0[CB0:0-32], KC1[]
; EG-NEXT: TEX 0 @6
; EG-NEXT: ALU 11, @9, KC0[CB0:0-32], KC1[]
; EG-NEXT: MEM_RAT MSKOR T0.XW, T1.X
; EG-NEXT: CF_END
; EG-NEXT: PAD
; EG-NEXT: Fetch clause starting at 6:
; EG-NEXT: VTX_READ_8 T0.X, T0.X, 0, #1
; EG-NEXT: ALU clause starting at 8:
; EG-NEXT: MOV * T0.X, KC0[2].Z,
; EG-NEXT: ALU clause starting at 9:
; EG-NEXT: AND_INT T0.W, KC0[2].Y, literal.x,
; EG-NEXT: AND_INT * T1.W, T0.X, 1,
; EG-NEXT: 3(4.203895e-45), 0(0.000000e+00)
; EG-NEXT: LSHL * T0.W, PV.W, literal.x,
; EG-NEXT: 3(4.203895e-45), 0(0.000000e+00)
; EG-NEXT: LSHL T0.X, T1.W, PV.W,
; EG-NEXT: LSHL * T0.W, literal.x, PV.W,
; EG-NEXT: 255(3.573311e-43), 0(0.000000e+00)
; EG-NEXT: MOV T0.Y, 0.0,
; EG-NEXT: MOV * T0.Z, 0.0,
; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.x,
; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
;
; GFX12-LABEL: constant_load_i1:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
; GFX12-NEXT: s_wait_kmcnt 0x0
; GFX12-NEXT: s_load_u8 s2, s[2:3], 0x0
; GFX12-NEXT: s_wait_kmcnt 0x0
; GFX12-NEXT: s_and_b32 s2, s2, 1
; GFX12-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
; GFX12-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2
; GFX12-NEXT: global_store_b8 v0, v1, s[0:1]
; GFX12-NEXT: s_endpgm
%load = load i1, ptr addrspace(4) %in
store i1 %load, ptr addrspace(1) %out
ret void
}
define amdgpu_kernel void @constant_load_v2i1(ptr addrspace(1) %out, ptr addrspace(4) nocapture %in) #0 {
; GFX6-LABEL: constant_load_v2i1:
; GFX6: ; %bb.0:
; GFX6-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9
; GFX6-NEXT: s_mov_b32 s7, 0xf000
; GFX6-NEXT: s_mov_b32 s6, -1
; GFX6-NEXT: s_mov_b32 s10, s6
; GFX6-NEXT: s_mov_b32 s11, s7
; GFX6-NEXT: s_waitcnt lgkmcnt(0)
; GFX6-NEXT: s_mov_b32 s8, s2
; GFX6-NEXT: s_mov_b32 s9, s3
; GFX6-NEXT: buffer_load_ubyte v0, off, s[8:11], 0
; GFX6-NEXT: s_mov_b32 s4, s0
; GFX6-NEXT: s_mov_b32 s5, s1
; GFX6-NEXT: s_waitcnt vmcnt(0)
; GFX6-NEXT: buffer_store_byte v0, off, s[4:7], 0
; GFX6-NEXT: s_endpgm
;
; GFX8-LABEL: constant_load_v2i1:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
; GFX8-NEXT: s_waitcnt lgkmcnt(0)
; GFX8-NEXT: v_mov_b32_e32 v0, s2
; GFX8-NEXT: v_mov_b32_e32 v1, s3
; GFX8-NEXT: flat_load_ubyte v2, v[0:1]
; GFX8-NEXT: v_mov_b32_e32 v0, s0
; GFX8-NEXT: v_mov_b32_e32 v1, s1
; GFX8-NEXT: s_waitcnt vmcnt(0)
; GFX8-NEXT: flat_store_byte v[0:1], v2
; GFX8-NEXT: s_endpgm
;
; EG-LABEL: constant_load_v2i1:
; EG: ; %bb.0:
; EG-NEXT: ALU 0, @8, KC0[CB0:0-32], KC1[]
; EG-NEXT: TEX 0 @6
; EG-NEXT: ALU 11, @9, KC0[CB0:0-32], KC1[]
; EG-NEXT: MEM_RAT MSKOR T0.XW, T1.X
; EG-NEXT: CF_END
; EG-NEXT: PAD
; EG-NEXT: Fetch clause starting at 6:
; EG-NEXT: VTX_READ_8 T0.X, T0.X, 0, #1
; EG-NEXT: ALU clause starting at 8:
; EG-NEXT: MOV * T0.X, KC0[2].Z,
; EG-NEXT: ALU clause starting at 9:
; EG-NEXT: AND_INT T0.W, KC0[2].Y, literal.x,
; EG-NEXT: AND_INT * T1.W, T0.X, literal.x,
; EG-NEXT: 3(4.203895e-45), 0(0.000000e+00)
; EG-NEXT: LSHL * T0.W, PV.W, literal.x,
; EG-NEXT: 3(4.203895e-45), 0(0.000000e+00)
; EG-NEXT: LSHL T0.X, T1.W, PV.W,
; EG-NEXT: LSHL * T0.W, literal.x, PV.W,
; EG-NEXT: 255(3.573311e-43), 0(0.000000e+00)
; EG-NEXT: MOV T0.Y, 0.0,
; EG-NEXT: MOV * T0.Z, 0.0,
; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.x,
; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
;
; GFX12-LABEL: constant_load_v2i1:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
; GFX12-NEXT: v_mov_b32_e32 v0, 0
; GFX12-NEXT: s_wait_kmcnt 0x0
; GFX12-NEXT: global_load_u8 v1, v0, s[2:3]
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: global_store_b8 v0, v1, s[0:1]
; GFX12-NEXT: s_endpgm
%load = load <2 x i1>, ptr addrspace(4) %in
store <2 x i1> %load, ptr addrspace(1) %out
ret void
}
define amdgpu_kernel void @constant_load_v3i1(ptr addrspace(1) %out, ptr addrspace(4) nocapture %in) #0 {
; GFX6-LABEL: constant_load_v3i1:
; GFX6: ; %bb.0:
; GFX6-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9
; GFX6-NEXT: s_mov_b32 s7, 0xf000
; GFX6-NEXT: s_mov_b32 s6, -1
; GFX6-NEXT: s_mov_b32 s10, s6
; GFX6-NEXT: s_mov_b32 s11, s7
; GFX6-NEXT: s_waitcnt lgkmcnt(0)
; GFX6-NEXT: s_mov_b32 s8, s2
; GFX6-NEXT: s_mov_b32 s9, s3
; GFX6-NEXT: buffer_load_ubyte v0, off, s[8:11], 0
; GFX6-NEXT: s_mov_b32 s4, s0
; GFX6-NEXT: s_mov_b32 s5, s1
; GFX6-NEXT: s_waitcnt vmcnt(0)
; GFX6-NEXT: buffer_store_byte v0, off, s[4:7], 0
; GFX6-NEXT: s_endpgm
;
; GFX8-LABEL: constant_load_v3i1:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
; GFX8-NEXT: s_waitcnt lgkmcnt(0)
; GFX8-NEXT: v_mov_b32_e32 v0, s2
; GFX8-NEXT: v_mov_b32_e32 v1, s3
; GFX8-NEXT: flat_load_ubyte v2, v[0:1]
; GFX8-NEXT: v_mov_b32_e32 v0, s0
; GFX8-NEXT: v_mov_b32_e32 v1, s1
; GFX8-NEXT: s_waitcnt vmcnt(0)
; GFX8-NEXT: flat_store_byte v[0:1], v2
; GFX8-NEXT: s_endpgm
;
; EG-LABEL: constant_load_v3i1:
; EG: ; %bb.0:
; EG-NEXT: ALU 0, @8, KC0[CB0:0-32], KC1[]
; EG-NEXT: TEX 0 @6
; EG-NEXT: ALU 10, @9, KC0[CB0:0-32], KC1[]
; EG-NEXT: MEM_RAT MSKOR T0.XW, T1.X
; EG-NEXT: CF_END
; EG-NEXT: PAD
; EG-NEXT: Fetch clause starting at 6:
; EG-NEXT: VTX_READ_8 T0.X, T0.X, 0, #1
; EG-NEXT: ALU clause starting at 8:
; EG-NEXT: MOV * T0.X, KC0[2].Z,
; EG-NEXT: ALU clause starting at 9:
; EG-NEXT: AND_INT * T0.W, KC0[2].Y, literal.x,
; EG-NEXT: 3(4.203895e-45), 0(0.000000e+00)
; EG-NEXT: LSHL * T0.W, PV.W, literal.x,
; EG-NEXT: 3(4.203895e-45), 0(0.000000e+00)
; EG-NEXT: LSHL T0.X, T0.X, PV.W,
; EG-NEXT: LSHL * T0.W, literal.x, PV.W,
; EG-NEXT: 255(3.573311e-43), 0(0.000000e+00)
; EG-NEXT: MOV T0.Y, 0.0,
; EG-NEXT: MOV * T0.Z, 0.0,
; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.x,
; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
;
; GFX12-LABEL: constant_load_v3i1:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
; GFX12-NEXT: v_mov_b32_e32 v0, 0
; GFX12-NEXT: s_wait_kmcnt 0x0
; GFX12-NEXT: global_load_u8 v1, v0, s[2:3]
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: global_store_b8 v0, v1, s[0:1]
; GFX12-NEXT: s_endpgm
%load = load <3 x i1>, ptr addrspace(4) %in
store <3 x i1> %load, ptr addrspace(1) %out
ret void
}
define amdgpu_kernel void @constant_load_v4i1(ptr addrspace(1) %out, ptr addrspace(4) nocapture %in) #0 {
; GFX6-LABEL: constant_load_v4i1:
; GFX6: ; %bb.0:
; GFX6-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9
; GFX6-NEXT: s_mov_b32 s7, 0xf000
; GFX6-NEXT: s_mov_b32 s6, -1
; GFX6-NEXT: s_mov_b32 s10, s6
; GFX6-NEXT: s_mov_b32 s11, s7
; GFX6-NEXT: s_waitcnt lgkmcnt(0)
; GFX6-NEXT: s_mov_b32 s8, s2
; GFX6-NEXT: s_mov_b32 s9, s3
; GFX6-NEXT: buffer_load_ubyte v0, off, s[8:11], 0
; GFX6-NEXT: s_mov_b32 s4, s0
; GFX6-NEXT: s_mov_b32 s5, s1
; GFX6-NEXT: s_waitcnt vmcnt(0)
; GFX6-NEXT: buffer_store_byte v0, off, s[4:7], 0
; GFX6-NEXT: s_endpgm
;
; GFX8-LABEL: constant_load_v4i1:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
; GFX8-NEXT: s_waitcnt lgkmcnt(0)
; GFX8-NEXT: v_mov_b32_e32 v0, s2
; GFX8-NEXT: v_mov_b32_e32 v1, s3
; GFX8-NEXT: flat_load_ubyte v2, v[0:1]
; GFX8-NEXT: v_mov_b32_e32 v0, s0
; GFX8-NEXT: v_mov_b32_e32 v1, s1
; GFX8-NEXT: s_waitcnt vmcnt(0)
; GFX8-NEXT: flat_store_byte v[0:1], v2
; GFX8-NEXT: s_endpgm
;
; EG-LABEL: constant_load_v4i1:
; EG: ; %bb.0:
; EG-NEXT: ALU 0, @8, KC0[CB0:0-32], KC1[]
; EG-NEXT: TEX 0 @6
; EG-NEXT: ALU 11, @9, KC0[CB0:0-32], KC1[]
; EG-NEXT: MEM_RAT MSKOR T0.XW, T1.X
; EG-NEXT: CF_END
; EG-NEXT: PAD
; EG-NEXT: Fetch clause starting at 6:
; EG-NEXT: VTX_READ_8 T0.X, T0.X, 0, #1
; EG-NEXT: ALU clause starting at 8:
; EG-NEXT: MOV * T0.X, KC0[2].Z,
; EG-NEXT: ALU clause starting at 9:
; EG-NEXT: AND_INT T0.W, KC0[2].Y, literal.x,
; EG-NEXT: AND_INT * T1.W, T0.X, literal.y,
; EG-NEXT: 3(4.203895e-45), 15(2.101948e-44)
; EG-NEXT: LSHL * T0.W, PV.W, literal.x,
; EG-NEXT: 3(4.203895e-45), 0(0.000000e+00)
; EG-NEXT: LSHL T0.X, T1.W, PV.W,
; EG-NEXT: LSHL * T0.W, literal.x, PV.W,
; EG-NEXT: 255(3.573311e-43), 0(0.000000e+00)
; EG-NEXT: MOV T0.Y, 0.0,
; EG-NEXT: MOV * T0.Z, 0.0,
; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.x,
; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
;
; GFX12-LABEL: constant_load_v4i1:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
; GFX12-NEXT: v_mov_b32_e32 v0, 0
; GFX12-NEXT: s_wait_kmcnt 0x0
; GFX12-NEXT: global_load_u8 v1, v0, s[2:3]
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: global_store_b8 v0, v1, s[0:1]
; GFX12-NEXT: s_endpgm
%load = load <4 x i1>, ptr addrspace(4) %in
store <4 x i1> %load, ptr addrspace(1) %out
ret void
}
define amdgpu_kernel void @constant_load_v8i1(ptr addrspace(1) %out, ptr addrspace(4) nocapture %in) #0 {
; GFX6-LABEL: constant_load_v8i1:
; GFX6: ; %bb.0:
; GFX6-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9
; GFX6-NEXT: s_mov_b32 s7, 0xf000
; GFX6-NEXT: s_mov_b32 s6, -1
; GFX6-NEXT: s_mov_b32 s10, s6
; GFX6-NEXT: s_mov_b32 s11, s7
; GFX6-NEXT: s_waitcnt lgkmcnt(0)
; GFX6-NEXT: s_mov_b32 s8, s2
; GFX6-NEXT: s_mov_b32 s9, s3
; GFX6-NEXT: buffer_load_ubyte v0, off, s[8:11], 0
; GFX6-NEXT: s_mov_b32 s4, s0
; GFX6-NEXT: s_mov_b32 s5, s1
; GFX6-NEXT: s_waitcnt vmcnt(0)
; GFX6-NEXT: buffer_store_byte v0, off, s[4:7], 0
; GFX6-NEXT: s_endpgm
;
; GFX8-LABEL: constant_load_v8i1:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
; GFX8-NEXT: s_waitcnt lgkmcnt(0)
; GFX8-NEXT: v_mov_b32_e32 v0, s2
; GFX8-NEXT: v_mov_b32_e32 v1, s3
; GFX8-NEXT: flat_load_ubyte v2, v[0:1]
; GFX8-NEXT: v_mov_b32_e32 v0, s0
; GFX8-NEXT: v_mov_b32_e32 v1, s1
; GFX8-NEXT: s_waitcnt vmcnt(0)
; GFX8-NEXT: flat_store_byte v[0:1], v2
; GFX8-NEXT: s_endpgm
;
; EG-LABEL: constant_load_v8i1:
; EG: ; %bb.0:
; EG-NEXT: ALU 0, @8, KC0[CB0:0-32], KC1[]
; EG-NEXT: TEX 0 @6
; EG-NEXT: ALU 11, @9, KC0[CB0:0-32], KC1[]
; EG-NEXT: MEM_RAT MSKOR T0.XW, T1.X
; EG-NEXT: CF_END
; EG-NEXT: PAD
; EG-NEXT: Fetch clause starting at 6:
; EG-NEXT: VTX_READ_8 T0.X, T0.X, 0, #1
; EG-NEXT: ALU clause starting at 8:
; EG-NEXT: MOV * T0.X, KC0[2].Z,
; EG-NEXT: ALU clause starting at 9:
; EG-NEXT: AND_INT T0.W, KC0[2].Y, literal.x,
; EG-NEXT: AND_INT * T1.W, T0.X, literal.y,
; EG-NEXT: 3(4.203895e-45), 255(3.573311e-43)
; EG-NEXT: LSHL * T0.W, PV.W, literal.x,
; EG-NEXT: 3(4.203895e-45), 0(0.000000e+00)
; EG-NEXT: LSHL T0.X, T1.W, PV.W,
; EG-NEXT: LSHL * T0.W, literal.x, PV.W,
; EG-NEXT: 255(3.573311e-43), 0(0.000000e+00)
; EG-NEXT: MOV T0.Y, 0.0,
; EG-NEXT: MOV * T0.Z, 0.0,
; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.x,
; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
;
; GFX12-LABEL: constant_load_v8i1:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
; GFX12-NEXT: v_mov_b32_e32 v0, 0
; GFX12-NEXT: s_wait_kmcnt 0x0
; GFX12-NEXT: global_load_u8 v1, v0, s[2:3]
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: global_store_b8 v0, v1, s[0:1]
; GFX12-NEXT: s_endpgm
%load = load <8 x i1>, ptr addrspace(4) %in
store <8 x i1> %load, ptr addrspace(1) %out
ret void
}
define amdgpu_kernel void @constant_load_v16i1(ptr addrspace(1) %out, ptr addrspace(4) nocapture %in) #0 {
; GFX6-LABEL: constant_load_v16i1:
; GFX6: ; %bb.0:
; GFX6-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9
; GFX6-NEXT: s_mov_b32 s7, 0xf000
; GFX6-NEXT: s_mov_b32 s6, -1
; GFX6-NEXT: s_mov_b32 s10, s6
; GFX6-NEXT: s_mov_b32 s11, s7
; GFX6-NEXT: s_waitcnt lgkmcnt(0)
; GFX6-NEXT: s_mov_b32 s8, s2
; GFX6-NEXT: s_mov_b32 s9, s3
; GFX6-NEXT: buffer_load_ushort v0, off, s[8:11], 0
; GFX6-NEXT: s_mov_b32 s4, s0
; GFX6-NEXT: s_mov_b32 s5, s1
; GFX6-NEXT: s_waitcnt vmcnt(0)
; GFX6-NEXT: buffer_store_short v0, off, s[4:7], 0
; GFX6-NEXT: s_endpgm
;
; GFX8-LABEL: constant_load_v16i1:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
; GFX8-NEXT: s_waitcnt lgkmcnt(0)
; GFX8-NEXT: v_mov_b32_e32 v0, s2
; GFX8-NEXT: v_mov_b32_e32 v1, s3
; GFX8-NEXT: flat_load_ushort v2, v[0:1]
; GFX8-NEXT: v_mov_b32_e32 v0, s0
; GFX8-NEXT: v_mov_b32_e32 v1, s1
; GFX8-NEXT: s_waitcnt vmcnt(0)
; GFX8-NEXT: flat_store_short v[0:1], v2
; GFX8-NEXT: s_endpgm
;
; EG-LABEL: constant_load_v16i1:
; EG: ; %bb.0:
; EG-NEXT: ALU 0, @8, KC0[CB0:0-32], KC1[]
; EG-NEXT: TEX 0 @6
; EG-NEXT: ALU 11, @9, KC0[CB0:0-32], KC1[]
; EG-NEXT: MEM_RAT MSKOR T0.XW, T1.X
; EG-NEXT: CF_END
; EG-NEXT: PAD
; EG-NEXT: Fetch clause starting at 6:
; EG-NEXT: VTX_READ_16 T0.X, T0.X, 0, #1
; EG-NEXT: ALU clause starting at 8:
; EG-NEXT: MOV * T0.X, KC0[2].Z,
; EG-NEXT: ALU clause starting at 9:
; EG-NEXT: AND_INT T0.W, KC0[2].Y, literal.x,
; EG-NEXT: AND_INT * T1.W, T0.X, literal.y,
; EG-NEXT: 3(4.203895e-45), 65535(9.183409e-41)
; EG-NEXT: LSHL * T0.W, PV.W, literal.x,
; EG-NEXT: 3(4.203895e-45), 0(0.000000e+00)
; EG-NEXT: LSHL T0.X, T1.W, PV.W,
; EG-NEXT: LSHL * T0.W, literal.x, PV.W,
; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00)
; EG-NEXT: MOV T0.Y, 0.0,
; EG-NEXT: MOV * T0.Z, 0.0,
; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.x,
; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
;
; GFX12-LABEL: constant_load_v16i1:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
; GFX12-NEXT: v_mov_b32_e32 v0, 0
; GFX12-NEXT: s_wait_kmcnt 0x0
; GFX12-NEXT: global_load_u16 v1, v0, s[2:3]
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: global_store_b16 v0, v1, s[0:1]
; GFX12-NEXT: s_endpgm
%load = load <16 x i1>, ptr addrspace(4) %in
store <16 x i1> %load, ptr addrspace(1) %out
ret void
}
define amdgpu_kernel void @constant_load_v32i1(ptr addrspace(1) %out, ptr addrspace(4) nocapture %in) #0 {
; GFX6-LABEL: constant_load_v32i1:
; GFX6: ; %bb.0:
; GFX6-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9
; GFX6-NEXT: s_waitcnt lgkmcnt(0)
; GFX6-NEXT: s_load_dword s4, s[2:3], 0x0
; GFX6-NEXT: s_mov_b32 s3, 0xf000
; GFX6-NEXT: s_mov_b32 s2, -1
; GFX6-NEXT: s_waitcnt lgkmcnt(0)
; GFX6-NEXT: v_mov_b32_e32 v0, s4
; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0
; GFX6-NEXT: s_endpgm
;
; GFX8-LABEL: constant_load_v32i1:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
; GFX8-NEXT: s_waitcnt lgkmcnt(0)
; GFX8-NEXT: s_load_dword s2, s[2:3], 0x0
; GFX8-NEXT: v_mov_b32_e32 v0, s0
; GFX8-NEXT: v_mov_b32_e32 v1, s1
; GFX8-NEXT: s_waitcnt lgkmcnt(0)
; GFX8-NEXT: v_mov_b32_e32 v2, s2
; GFX8-NEXT: flat_store_dword v[0:1], v2
; GFX8-NEXT: s_endpgm
;
; EG-LABEL: constant_load_v32i1:
; EG: ; %bb.0:
; EG-NEXT: ALU 0, @8, KC0[CB0:0-32], KC1[]
; EG-NEXT: TEX 0 @6
; EG-NEXT: ALU 1, @9, KC0[CB0:0-32], KC1[]
; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.X, T1.X, 1
; EG-NEXT: CF_END
; EG-NEXT: PAD
; EG-NEXT: Fetch clause starting at 6:
; EG-NEXT: VTX_READ_32 T0.X, T0.X, 0, #1
; EG-NEXT: ALU clause starting at 8:
; EG-NEXT: MOV * T0.X, KC0[2].Z,
; EG-NEXT: ALU clause starting at 9:
; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.x,
; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
;
; GFX12-LABEL: constant_load_v32i1:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
; GFX12-NEXT: s_wait_kmcnt 0x0
; GFX12-NEXT: s_load_b32 s2, s[2:3], 0x0
; GFX12-NEXT: s_wait_kmcnt 0x0
; GFX12-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2
; GFX12-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX12-NEXT: s_endpgm
%load = load <32 x i1>, ptr addrspace(4) %in
store <32 x i1> %load, ptr addrspace(1) %out
ret void
}
define amdgpu_kernel void @constant_load_v64i1(ptr addrspace(1) %out, ptr addrspace(4) nocapture %in) #0 {
; GFX6-LABEL: constant_load_v64i1:
; GFX6: ; %bb.0:
; GFX6-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9
; GFX6-NEXT: s_waitcnt lgkmcnt(0)
; GFX6-NEXT: s_load_dwordx2 s[4:5], s[2:3], 0x0
; GFX6-NEXT: s_mov_b32 s3, 0xf000
; GFX6-NEXT: s_mov_b32 s2, -1
; GFX6-NEXT: s_waitcnt lgkmcnt(0)
; GFX6-NEXT: v_mov_b32_e32 v0, s4
; GFX6-NEXT: v_mov_b32_e32 v1, s5
; GFX6-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0
; GFX6-NEXT: s_endpgm
;
; GFX8-LABEL: constant_load_v64i1:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
; GFX8-NEXT: s_waitcnt lgkmcnt(0)
; GFX8-NEXT: s_load_dwordx2 s[2:3], s[2:3], 0x0
; GFX8-NEXT: v_mov_b32_e32 v0, s0
; GFX8-NEXT: v_mov_b32_e32 v1, s1
; GFX8-NEXT: s_waitcnt lgkmcnt(0)
; GFX8-NEXT: v_mov_b32_e32 v2, s2
; GFX8-NEXT: v_mov_b32_e32 v3, s3
; GFX8-NEXT: flat_store_dwordx2 v[0:1], v[2:3]
; GFX8-NEXT: s_endpgm
;
; EG-LABEL: constant_load_v64i1:
; EG: ; %bb.0:
; EG-NEXT: ALU 0, @8, KC0[CB0:0-32], KC1[]
; EG-NEXT: TEX 0 @6
; EG-NEXT: ALU 1, @9, KC0[CB0:0-32], KC1[]
; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.XY, T1.X, 1
; EG-NEXT: CF_END
; EG-NEXT: PAD
; EG-NEXT: Fetch clause starting at 6:
; EG-NEXT: VTX_READ_64 T0.XY, T0.X, 0, #1
; EG-NEXT: ALU clause starting at 8:
; EG-NEXT: MOV * T0.X, KC0[2].Z,
; EG-NEXT: ALU clause starting at 9:
; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.x,
; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
;
; GFX12-LABEL: constant_load_v64i1:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
; GFX12-NEXT: s_wait_kmcnt 0x0
; GFX12-NEXT: s_load_b64 s[2:3], s[2:3], 0x0
; GFX12-NEXT: v_mov_b32_e32 v2, 0
; GFX12-NEXT: s_wait_kmcnt 0x0
; GFX12-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3
; GFX12-NEXT: global_store_b64 v2, v[0:1], s[0:1]
; GFX12-NEXT: s_endpgm
%load = load <64 x i1>, ptr addrspace(4) %in
store <64 x i1> %load, ptr addrspace(1) %out
ret void
}
define amdgpu_kernel void @constant_zextload_i1_to_i32(ptr addrspace(1) %out, ptr addrspace(4) nocapture %in) #0 {
; GFX6-LABEL: constant_zextload_i1_to_i32:
; GFX6: ; %bb.0:
; GFX6-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9
; GFX6-NEXT: s_mov_b32 s7, 0xf000
; GFX6-NEXT: s_mov_b32 s6, -1
; GFX6-NEXT: s_mov_b32 s10, s6
; GFX6-NEXT: s_mov_b32 s11, s7
; GFX6-NEXT: s_waitcnt lgkmcnt(0)
; GFX6-NEXT: s_mov_b32 s8, s2
; GFX6-NEXT: s_mov_b32 s9, s3
; GFX6-NEXT: buffer_load_ubyte v0, off, s[8:11], 0
; GFX6-NEXT: s_mov_b32 s4, s0
; GFX6-NEXT: s_mov_b32 s5, s1
; GFX6-NEXT: s_waitcnt vmcnt(0)
; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0
; GFX6-NEXT: s_endpgm
;
; GFX8-LABEL: constant_zextload_i1_to_i32:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
; GFX8-NEXT: s_waitcnt lgkmcnt(0)
; GFX8-NEXT: v_mov_b32_e32 v0, s2
; GFX8-NEXT: v_mov_b32_e32 v1, s3
; GFX8-NEXT: flat_load_ubyte v2, v[0:1]
; GFX8-NEXT: v_mov_b32_e32 v0, s0
; GFX8-NEXT: v_mov_b32_e32 v1, s1
; GFX8-NEXT: s_waitcnt vmcnt(0)
; GFX8-NEXT: flat_store_dword v[0:1], v2
; GFX8-NEXT: s_endpgm
;
; EG-LABEL: constant_zextload_i1_to_i32:
; EG: ; %bb.0:
; EG-NEXT: ALU 0, @8, KC0[CB0:0-32], KC1[]
; EG-NEXT: TEX 0 @6
; EG-NEXT: ALU 1, @9, KC0[CB0:0-32], KC1[]
; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.X, T1.X, 1
; EG-NEXT: CF_END
; EG-NEXT: PAD
; EG-NEXT: Fetch clause starting at 6:
; EG-NEXT: VTX_READ_8 T0.X, T0.X, 0, #1
; EG-NEXT: ALU clause starting at 8:
; EG-NEXT: MOV * T0.X, KC0[2].Z,
; EG-NEXT: ALU clause starting at 9:
; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.x,
; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
;
; GFX12-LABEL: constant_zextload_i1_to_i32:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
; GFX12-NEXT: s_wait_kmcnt 0x0
; GFX12-NEXT: s_load_u8 s2, s[2:3], 0x0
; GFX12-NEXT: s_wait_kmcnt 0x0
; GFX12-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2
; GFX12-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX12-NEXT: s_endpgm
%a = load i1, ptr addrspace(4) %in
%ext = zext i1 %a to i32
store i32 %ext, ptr addrspace(1) %out
ret void
}
define amdgpu_kernel void @constant_sextload_i1_to_i32(ptr addrspace(1) %out, ptr addrspace(4) nocapture %in) #0 {
; GFX6-LABEL: constant_sextload_i1_to_i32:
; GFX6: ; %bb.0:
; GFX6-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9
; GFX6-NEXT: s_mov_b32 s7, 0xf000
; GFX6-NEXT: s_mov_b32 s6, -1
; GFX6-NEXT: s_mov_b32 s10, s6
; GFX6-NEXT: s_mov_b32 s11, s7
; GFX6-NEXT: s_waitcnt lgkmcnt(0)
; GFX6-NEXT: s_mov_b32 s8, s2
; GFX6-NEXT: s_mov_b32 s9, s3
; GFX6-NEXT: buffer_load_ubyte v0, off, s[8:11], 0
; GFX6-NEXT: s_mov_b32 s4, s0
; GFX6-NEXT: s_mov_b32 s5, s1
; GFX6-NEXT: s_waitcnt vmcnt(0)
; GFX6-NEXT: v_bfe_i32 v0, v0, 0, 1
; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0
; GFX6-NEXT: s_endpgm
;
; GFX8-LABEL: constant_sextload_i1_to_i32:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
; GFX8-NEXT: s_waitcnt lgkmcnt(0)
; GFX8-NEXT: v_mov_b32_e32 v0, s2
; GFX8-NEXT: v_mov_b32_e32 v1, s3
; GFX8-NEXT: flat_load_ubyte v2, v[0:1]
; GFX8-NEXT: v_mov_b32_e32 v0, s0
; GFX8-NEXT: v_mov_b32_e32 v1, s1
; GFX8-NEXT: s_waitcnt vmcnt(0)
; GFX8-NEXT: v_bfe_i32 v2, v2, 0, 1
; GFX8-NEXT: flat_store_dword v[0:1], v2
; GFX8-NEXT: s_endpgm
;
; EG-LABEL: constant_sextload_i1_to_i32:
; EG: ; %bb.0:
; EG-NEXT: ALU 0, @8, KC0[CB0:0-32], KC1[]
; EG-NEXT: TEX 0 @6
; EG-NEXT: ALU 2, @9, KC0[CB0:0-32], KC1[]
; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.X, T1.X, 1
; EG-NEXT: CF_END
; EG-NEXT: PAD
; EG-NEXT: Fetch clause starting at 6:
; EG-NEXT: VTX_READ_8 T0.X, T0.X, 0, #1
; EG-NEXT: ALU clause starting at 8:
; EG-NEXT: MOV * T0.X, KC0[2].Z,
; EG-NEXT: ALU clause starting at 9:
; EG-NEXT: BFE_INT T0.X, T0.X, 0.0, 1,
; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.x,
; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
;
; GFX12-LABEL: constant_sextload_i1_to_i32:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
; GFX12-NEXT: s_wait_kmcnt 0x0
; GFX12-NEXT: s_load_u8 s2, s[2:3], 0x0
; GFX12-NEXT: s_wait_kmcnt 0x0
; GFX12-NEXT: s_bfe_i32 s2, s2, 0x10000
; GFX12-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
; GFX12-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2
; GFX12-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX12-NEXT: s_endpgm
%a = load i1, ptr addrspace(4) %in
%ext = sext i1 %a to i32
store i32 %ext, ptr addrspace(1) %out
ret void
}
define amdgpu_kernel void @constant_zextload_v1i1_to_v1i32(ptr addrspace(1) %out, ptr addrspace(4) nocapture %in) #0 {
; GFX6-LABEL: constant_zextload_v1i1_to_v1i32:
; GFX6: ; %bb.0:
; GFX6-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9
; GFX6-NEXT: s_mov_b32 s7, 0xf000
; GFX6-NEXT: s_mov_b32 s6, -1
; GFX6-NEXT: s_mov_b32 s10, s6
; GFX6-NEXT: s_mov_b32 s11, s7
; GFX6-NEXT: s_waitcnt lgkmcnt(0)
; GFX6-NEXT: s_mov_b32 s8, s2
; GFX6-NEXT: s_mov_b32 s9, s3
; GFX6-NEXT: buffer_load_ubyte v0, off, s[8:11], 0
; GFX6-NEXT: s_mov_b32 s4, s0
; GFX6-NEXT: s_mov_b32 s5, s1
; GFX6-NEXT: s_waitcnt vmcnt(0)
; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0
; GFX6-NEXT: s_endpgm
;
; GFX8-LABEL: constant_zextload_v1i1_to_v1i32:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
; GFX8-NEXT: s_waitcnt lgkmcnt(0)
; GFX8-NEXT: v_mov_b32_e32 v0, s2
; GFX8-NEXT: v_mov_b32_e32 v1, s3
; GFX8-NEXT: flat_load_ubyte v2, v[0:1]
; GFX8-NEXT: v_mov_b32_e32 v0, s0
; GFX8-NEXT: v_mov_b32_e32 v1, s1
; GFX8-NEXT: s_waitcnt vmcnt(0)
; GFX8-NEXT: flat_store_dword v[0:1], v2
; GFX8-NEXT: s_endpgm
;
; EG-LABEL: constant_zextload_v1i1_to_v1i32:
; EG: ; %bb.0:
; EG-NEXT: ALU 0, @8, KC0[CB0:0-32], KC1[]
; EG-NEXT: TEX 0 @6
; EG-NEXT: ALU 1, @9, KC0[CB0:0-32], KC1[]
; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.X, T1.X, 1
; EG-NEXT: CF_END
; EG-NEXT: PAD
; EG-NEXT: Fetch clause starting at 6:
; EG-NEXT: VTX_READ_8 T0.X, T0.X, 0, #1
; EG-NEXT: ALU clause starting at 8:
; EG-NEXT: MOV * T0.X, KC0[2].Z,
; EG-NEXT: ALU clause starting at 9:
; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.x,
; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
;
; GFX12-LABEL: constant_zextload_v1i1_to_v1i32:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
; GFX12-NEXT: s_wait_kmcnt 0x0
; GFX12-NEXT: s_load_u8 s2, s[2:3], 0x0
; GFX12-NEXT: s_wait_kmcnt 0x0
; GFX12-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2
; GFX12-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX12-NEXT: s_endpgm
%load = load <1 x i1>, ptr addrspace(4) %in
%ext = zext <1 x i1> %load to <1 x i32>
store <1 x i32> %ext, ptr addrspace(1) %out
ret void
}
define amdgpu_kernel void @constant_sextload_v1i1_to_v1i32(ptr addrspace(1) %out, ptr addrspace(4) nocapture %in) #0 {
; GFX6-LABEL: constant_sextload_v1i1_to_v1i32:
; GFX6: ; %bb.0:
; GFX6-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9
; GFX6-NEXT: s_mov_b32 s7, 0xf000
; GFX6-NEXT: s_mov_b32 s6, -1
; GFX6-NEXT: s_mov_b32 s10, s6
; GFX6-NEXT: s_mov_b32 s11, s7
; GFX6-NEXT: s_waitcnt lgkmcnt(0)
; GFX6-NEXT: s_mov_b32 s8, s2
; GFX6-NEXT: s_mov_b32 s9, s3
; GFX6-NEXT: buffer_load_ubyte v0, off, s[8:11], 0
; GFX6-NEXT: s_mov_b32 s4, s0
; GFX6-NEXT: s_mov_b32 s5, s1
; GFX6-NEXT: s_waitcnt vmcnt(0)
; GFX6-NEXT: v_bfe_i32 v0, v0, 0, 1
; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0
; GFX6-NEXT: s_endpgm
;
; GFX8-LABEL: constant_sextload_v1i1_to_v1i32:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
; GFX8-NEXT: s_waitcnt lgkmcnt(0)
; GFX8-NEXT: v_mov_b32_e32 v0, s2
; GFX8-NEXT: v_mov_b32_e32 v1, s3
; GFX8-NEXT: flat_load_ubyte v2, v[0:1]
; GFX8-NEXT: v_mov_b32_e32 v0, s0
; GFX8-NEXT: v_mov_b32_e32 v1, s1
; GFX8-NEXT: s_waitcnt vmcnt(0)
; GFX8-NEXT: v_bfe_i32 v2, v2, 0, 1
; GFX8-NEXT: flat_store_dword v[0:1], v2
; GFX8-NEXT: s_endpgm
;
; EG-LABEL: constant_sextload_v1i1_to_v1i32:
; EG: ; %bb.0:
; EG-NEXT: ALU 0, @8, KC0[CB0:0-32], KC1[]
; EG-NEXT: TEX 0 @6
; EG-NEXT: ALU 2, @9, KC0[CB0:0-32], KC1[]
; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.X, T1.X, 1
; EG-NEXT: CF_END
; EG-NEXT: PAD
; EG-NEXT: Fetch clause starting at 6:
; EG-NEXT: VTX_READ_8 T0.X, T0.X, 0, #1
; EG-NEXT: ALU clause starting at 8:
; EG-NEXT: MOV * T0.X, KC0[2].Z,
; EG-NEXT: ALU clause starting at 9:
; EG-NEXT: BFE_INT T0.X, T0.X, 0.0, 1,
; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.x,
; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
;
; GFX12-LABEL: constant_sextload_v1i1_to_v1i32:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
; GFX12-NEXT: s_wait_kmcnt 0x0
; GFX12-NEXT: s_load_u8 s2, s[2:3], 0x0
; GFX12-NEXT: s_wait_kmcnt 0x0
; GFX12-NEXT: s_bfe_i32 s2, s2, 0x10000
; GFX12-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
; GFX12-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2
; GFX12-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX12-NEXT: s_endpgm
%load = load <1 x i1>, ptr addrspace(4) %in
%ext = sext <1 x i1> %load to <1 x i32>
store <1 x i32> %ext, ptr addrspace(1) %out
ret void
}
define amdgpu_kernel void @constant_zextload_v2i1_to_v2i32(ptr addrspace(1) %out, ptr addrspace(4) nocapture %in) #0 {
; GFX6-LABEL: constant_zextload_v2i1_to_v2i32:
; GFX6: ; %bb.0:
; GFX6-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9
; GFX6-NEXT: s_mov_b32 s7, 0xf000
; GFX6-NEXT: s_mov_b32 s6, -1
; GFX6-NEXT: s_mov_b32 s10, s6
; GFX6-NEXT: s_mov_b32 s11, s7
; GFX6-NEXT: s_waitcnt lgkmcnt(0)
; GFX6-NEXT: s_mov_b32 s8, s2
; GFX6-NEXT: s_mov_b32 s9, s3
; GFX6-NEXT: buffer_load_ubyte v0, off, s[8:11], 0
; GFX6-NEXT: s_mov_b32 s4, s0
; GFX6-NEXT: s_mov_b32 s5, s1
; GFX6-NEXT: s_waitcnt vmcnt(0)
; GFX6-NEXT: v_lshrrev_b32_e32 v1, 1, v0
; GFX6-NEXT: v_and_b32_e32 v0, 1, v0
; GFX6-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0
; GFX6-NEXT: s_endpgm
;
; GFX8-LABEL: constant_zextload_v2i1_to_v2i32:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
; GFX8-NEXT: v_mov_b32_e32 v3, 1
; GFX8-NEXT: s_waitcnt lgkmcnt(0)
; GFX8-NEXT: v_mov_b32_e32 v0, s2
; GFX8-NEXT: v_mov_b32_e32 v1, s3
; GFX8-NEXT: flat_load_ubyte v2, v[0:1]
; GFX8-NEXT: v_mov_b32_e32 v0, s0
; GFX8-NEXT: v_mov_b32_e32 v1, s1
; GFX8-NEXT: s_waitcnt vmcnt(0)
; GFX8-NEXT: v_lshrrev_b32_sdwa v3, v3, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0
; GFX8-NEXT: v_and_b32_e32 v2, 1, v2
; GFX8-NEXT: flat_store_dwordx2 v[0:1], v[2:3]
; GFX8-NEXT: s_endpgm
;
; EG-LABEL: constant_zextload_v2i1_to_v2i32:
; EG: ; %bb.0:
; EG-NEXT: ALU 0, @8, KC0[CB0:0-32], KC1[]
; EG-NEXT: TEX 0 @6
; EG-NEXT: ALU 3, @9, KC0[CB0:0-32], KC1[]
; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.XY, T1.X, 1
; EG-NEXT: CF_END
; EG-NEXT: PAD
; EG-NEXT: Fetch clause starting at 6:
; EG-NEXT: VTX_READ_8 T0.X, T0.X, 0, #1
; EG-NEXT: ALU clause starting at 8:
; EG-NEXT: MOV * T0.X, KC0[2].Z,
; EG-NEXT: ALU clause starting at 9:
; EG-NEXT: BFE_UINT * T0.Y, T0.X, 1, 1,
; EG-NEXT: AND_INT T0.X, T0.X, 1,
; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.x,
; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
;
; GFX12-LABEL: constant_zextload_v2i1_to_v2i32:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
; GFX12-NEXT: v_mov_b32_e32 v2, 0
; GFX12-NEXT: s_wait_kmcnt 0x0
; GFX12-NEXT: global_load_u8 v0, v2, s[2:3]
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: v_and_b32_e32 v1, 0xffff, v0
; GFX12-NEXT: v_and_b32_e32 v0, 1, v0
; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_2)
; GFX12-NEXT: v_lshrrev_b32_e32 v1, 1, v1
; GFX12-NEXT: global_store_b64 v2, v[0:1], s[0:1]
; GFX12-NEXT: s_endpgm
%load = load <2 x i1>, ptr addrspace(4) %in
%ext = zext <2 x i1> %load to <2 x i32>
store <2 x i32> %ext, ptr addrspace(1) %out
ret void
}
define amdgpu_kernel void @constant_sextload_v2i1_to_v2i32(ptr addrspace(1) %out, ptr addrspace(4) nocapture %in) #0 {
; GFX6-LABEL: constant_sextload_v2i1_to_v2i32:
; GFX6: ; %bb.0:
; GFX6-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9
; GFX6-NEXT: s_mov_b32 s7, 0xf000
; GFX6-NEXT: s_mov_b32 s6, -1
; GFX6-NEXT: s_mov_b32 s10, s6
; GFX6-NEXT: s_mov_b32 s11, s7
; GFX6-NEXT: s_waitcnt lgkmcnt(0)
; GFX6-NEXT: s_mov_b32 s8, s2
; GFX6-NEXT: s_mov_b32 s9, s3
; GFX6-NEXT: buffer_load_ubyte v0, off, s[8:11], 0
; GFX6-NEXT: s_mov_b32 s4, s0
; GFX6-NEXT: s_mov_b32 s5, s1
; GFX6-NEXT: s_waitcnt vmcnt(0)
; GFX6-NEXT: v_bfe_i32 v1, v0, 1, 1
; GFX6-NEXT: v_bfe_i32 v0, v0, 0, 1
; GFX6-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0
; GFX6-NEXT: s_endpgm
;
; GFX8-LABEL: constant_sextload_v2i1_to_v2i32:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
; GFX8-NEXT: s_waitcnt lgkmcnt(0)
; GFX8-NEXT: v_mov_b32_e32 v0, s2
; GFX8-NEXT: v_mov_b32_e32 v1, s3
; GFX8-NEXT: flat_load_ubyte v2, v[0:1]
; GFX8-NEXT: v_mov_b32_e32 v0, s0
; GFX8-NEXT: v_mov_b32_e32 v1, s1
; GFX8-NEXT: s_waitcnt vmcnt(0)
; GFX8-NEXT: v_bfe_i32 v3, v2, 1, 1
; GFX8-NEXT: v_bfe_i32 v2, v2, 0, 1
; GFX8-NEXT: flat_store_dwordx2 v[0:1], v[2:3]
; GFX8-NEXT: s_endpgm
;
; EG-LABEL: constant_sextload_v2i1_to_v2i32:
; EG: ; %bb.0:
; EG-NEXT: ALU 0, @8, KC0[CB0:0-32], KC1[]
; EG-NEXT: TEX 0 @6
; EG-NEXT: ALU 4, @9, KC0[CB0:0-32], KC1[]
; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T1.XY, T0.X, 1
; EG-NEXT: CF_END
; EG-NEXT: PAD
; EG-NEXT: Fetch clause starting at 6:
; EG-NEXT: VTX_READ_8 T0.X, T0.X, 0, #1
; EG-NEXT: ALU clause starting at 8:
; EG-NEXT: MOV * T0.X, KC0[2].Z,
; EG-NEXT: ALU clause starting at 9:
; EG-NEXT: BFE_INT T1.X, T0.X, 0.0, 1,
; EG-NEXT: LSHR T0.W, T0.X, 1,
; EG-NEXT: LSHR * T0.X, KC0[2].Y, literal.x,
; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
; EG-NEXT: BFE_INT * T1.Y, PV.W, 0.0, 1,
;
; GFX12-LABEL: constant_sextload_v2i1_to_v2i32:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
; GFX12-NEXT: s_wait_kmcnt 0x0
; GFX12-NEXT: s_load_u8 s2, s[2:3], 0x0
; GFX12-NEXT: s_wait_kmcnt 0x0
; GFX12-NEXT: s_bfe_i32 s3, s2, 0x10000
; GFX12-NEXT: s_bfe_i32 s2, s2, 0x10001
; GFX12-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
; GFX12-NEXT: v_dual_mov_b32 v2, 0 :: v_dual_mov_b32 v1, s2
; GFX12-NEXT: v_mov_b32_e32 v0, s3
; GFX12-NEXT: global_store_b64 v2, v[0:1], s[0:1]
; GFX12-NEXT: s_endpgm
%load = load <2 x i1>, ptr addrspace(4) %in
%ext = sext <2 x i1> %load to <2 x i32>
store <2 x i32> %ext, ptr addrspace(1) %out
ret void
}
define amdgpu_kernel void @constant_zextload_v3i1_to_v3i32(ptr addrspace(1) %out, ptr addrspace(4) nocapture %in) #0 {
; GFX6-LABEL: constant_zextload_v3i1_to_v3i32:
; GFX6: ; %bb.0:
; GFX6-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9
; GFX6-NEXT: s_mov_b32 s7, 0xf000
; GFX6-NEXT: s_mov_b32 s6, -1
; GFX6-NEXT: s_mov_b32 s10, s6
; GFX6-NEXT: s_mov_b32 s11, s7
; GFX6-NEXT: s_waitcnt lgkmcnt(0)
; GFX6-NEXT: s_mov_b32 s8, s2
; GFX6-NEXT: s_mov_b32 s9, s3
; GFX6-NEXT: buffer_load_ubyte v2, off, s[8:11], 0
; GFX6-NEXT: s_mov_b32 s4, s0
; GFX6-NEXT: s_mov_b32 s5, s1
; GFX6-NEXT: s_waitcnt vmcnt(0)
; GFX6-NEXT: v_and_b32_e32 v0, 1, v2
; GFX6-NEXT: v_bfe_u32 v1, v2, 1, 1
; GFX6-NEXT: v_lshrrev_b32_e32 v2, 2, v2
; GFX6-NEXT: buffer_store_dword v2, off, s[4:7], 0 offset:8
; GFX6-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0
; GFX6-NEXT: s_endpgm
;
; GFX8-LABEL: constant_zextload_v3i1_to_v3i32:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
; GFX8-NEXT: s_waitcnt lgkmcnt(0)
; GFX8-NEXT: v_mov_b32_e32 v0, s2
; GFX8-NEXT: v_mov_b32_e32 v1, s3
; GFX8-NEXT: flat_load_ubyte v1, v[0:1]
; GFX8-NEXT: v_mov_b32_e32 v0, 2
; GFX8-NEXT: v_mov_b32_e32 v3, s0
; GFX8-NEXT: v_mov_b32_e32 v4, s1
; GFX8-NEXT: s_waitcnt vmcnt(0)
; GFX8-NEXT: v_lshrrev_b32_sdwa v2, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0
; GFX8-NEXT: v_and_b32_e32 v0, 1, v1
; GFX8-NEXT: v_bfe_u32 v1, v1, 1, 1
; GFX8-NEXT: flat_store_dwordx3 v[3:4], v[0:2]
; GFX8-NEXT: s_endpgm
;
; EG-LABEL: constant_zextload_v3i1_to_v3i32:
; EG: ; %bb.0:
; EG-NEXT: ALU 0, @8, KC0[CB0:0-32], KC1[]
; EG-NEXT: TEX 0 @6
; EG-NEXT: ALU 8, @9, KC0[CB0:0-32], KC1[]
; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.X, T3.X, 0
; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T1.XY, T2.X, 1
; EG-NEXT: CF_END
; EG-NEXT: Fetch clause starting at 6:
; EG-NEXT: VTX_READ_8 T0.X, T0.X, 0, #1
; EG-NEXT: ALU clause starting at 8:
; EG-NEXT: MOV * T0.X, KC0[2].Z,
; EG-NEXT: ALU clause starting at 9:
; EG-NEXT: BFE_UINT * T1.Y, T0.X, 1, 1,
; EG-NEXT: AND_INT T1.X, T0.X, 1,
; EG-NEXT: LSHR * T2.X, KC0[2].Y, literal.x,
; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
; EG-NEXT: LSHR T0.X, T0.X, literal.x,
; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y,
; EG-NEXT: 2(2.802597e-45), 8(1.121039e-44)
; EG-NEXT: LSHR * T3.X, PV.W, literal.x,
; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
;
; GFX12-LABEL: constant_zextload_v3i1_to_v3i32:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
; GFX12-NEXT: v_mov_b32_e32 v3, 0
; GFX12-NEXT: s_wait_kmcnt 0x0
; GFX12-NEXT: global_load_u8 v1, v3, s[2:3]
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: v_and_b32_e32 v0, 0xffff, v1
; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_3)
; GFX12-NEXT: v_lshrrev_b32_e32 v2, 2, v0
; GFX12-NEXT: v_and_b32_e32 v0, 1, v1
; GFX12-NEXT: v_bfe_u32 v1, v1, 1, 1
; GFX12-NEXT: v_and_b32_e32 v2, 0xffff, v2
; GFX12-NEXT: global_store_b96 v3, v[0:2], s[0:1]
; GFX12-NEXT: s_endpgm
%load = load <3 x i1>, ptr addrspace(4) %in
%ext = zext <3 x i1> %load to <3 x i32>
store <3 x i32> %ext, ptr addrspace(1) %out
ret void
}
define amdgpu_kernel void @constant_sextload_v3i1_to_v3i32(ptr addrspace(1) %out, ptr addrspace(4) nocapture %in) #0 {
; GFX6-LABEL: constant_sextload_v3i1_to_v3i32:
; GFX6: ; %bb.0:
; GFX6-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9
; GFX6-NEXT: s_mov_b32 s7, 0xf000
; GFX6-NEXT: s_mov_b32 s6, -1
; GFX6-NEXT: s_mov_b32 s10, s6
; GFX6-NEXT: s_mov_b32 s11, s7
; GFX6-NEXT: s_waitcnt lgkmcnt(0)
; GFX6-NEXT: s_mov_b32 s8, s2
; GFX6-NEXT: s_mov_b32 s9, s3
; GFX6-NEXT: buffer_load_ubyte v2, off, s[8:11], 0
; GFX6-NEXT: s_mov_b32 s4, s0
; GFX6-NEXT: s_mov_b32 s5, s1
; GFX6-NEXT: s_waitcnt vmcnt(0)
; GFX6-NEXT: v_bfe_i32 v1, v2, 1, 1
; GFX6-NEXT: v_bfe_i32 v0, v2, 0, 1
; GFX6-NEXT: v_bfe_i32 v2, v2, 2, 1
; GFX6-NEXT: buffer_store_dword v2, off, s[4:7], 0 offset:8
; GFX6-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0
; GFX6-NEXT: s_endpgm
;
; GFX8-LABEL: constant_sextload_v3i1_to_v3i32:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
; GFX8-NEXT: s_waitcnt lgkmcnt(0)
; GFX8-NEXT: v_mov_b32_e32 v0, s2
; GFX8-NEXT: v_mov_b32_e32 v1, s3
; GFX8-NEXT: flat_load_ubyte v0, v[0:1]
; GFX8-NEXT: v_mov_b32_e32 v3, s0
; GFX8-NEXT: v_mov_b32_e32 v4, s1
; GFX8-NEXT: s_waitcnt vmcnt(0)
; GFX8-NEXT: v_bfe_i32 v2, v0, 2, 1
; GFX8-NEXT: v_bfe_i32 v1, v0, 1, 1
; GFX8-NEXT: v_bfe_i32 v0, v0, 0, 1
; GFX8-NEXT: flat_store_dwordx3 v[3:4], v[0:2]
; GFX8-NEXT: s_endpgm
;
; EG-LABEL: constant_sextload_v3i1_to_v3i32:
; EG: ; %bb.0:
; EG-NEXT: ALU 0, @8, KC0[CB0:0-32], KC1[]
; EG-NEXT: TEX 0 @6
; EG-NEXT: ALU 10, @9, KC0[CB0:0-32], KC1[]
; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T2.X, T0.X, 0
; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T3.XY, T1.X, 1
; EG-NEXT: CF_END
; EG-NEXT: Fetch clause starting at 6:
; EG-NEXT: VTX_READ_8 T0.X, T0.X, 0, #1
; EG-NEXT: ALU clause starting at 8:
; EG-NEXT: MOV * T0.X, KC0[2].Z,
; EG-NEXT: ALU clause starting at 9:
; EG-NEXT: LSHR T1.X, KC0[2].Y, literal.x,
; EG-NEXT: LSHR * T0.W, T0.X, literal.x,
; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
; EG-NEXT: BFE_INT * T2.X, PV.W, 0.0, 1,
; EG-NEXT: BFE_INT T3.X, T0.X, 0.0, 1,
; EG-NEXT: LSHR T0.W, T0.X, 1,
; EG-NEXT: ADD_INT * T1.W, KC0[2].Y, literal.x,
; EG-NEXT: 8(1.121039e-44), 0(0.000000e+00)
; EG-NEXT: LSHR T0.X, PS, literal.x,
; EG-NEXT: BFE_INT * T3.Y, PV.W, 0.0, 1,
; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
;
; GFX12-LABEL: constant_sextload_v3i1_to_v3i32:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
; GFX12-NEXT: s_wait_kmcnt 0x0
; GFX12-NEXT: s_load_u8 s2, s[2:3], 0x0
; GFX12-NEXT: s_wait_kmcnt 0x0
; GFX12-NEXT: s_bfe_i32 s3, s2, 0x10002
; GFX12-NEXT: s_bfe_i32 s4, s2, 0x10000
; GFX12-NEXT: s_bfe_i32 s2, s2, 0x10001
; GFX12-NEXT: v_dual_mov_b32 v3, 0 :: v_dual_mov_b32 v0, s4
; GFX12-NEXT: v_dual_mov_b32 v1, s2 :: v_dual_mov_b32 v2, s3
; GFX12-NEXT: global_store_b96 v3, v[0:2], s[0:1]
; GFX12-NEXT: s_endpgm
%load = load <3 x i1>, ptr addrspace(4) %in
%ext = sext <3 x i1> %load to <3 x i32>
store <3 x i32> %ext, ptr addrspace(1) %out
ret void
}
define amdgpu_kernel void @constant_zextload_v4i1_to_v4i32(ptr addrspace(1) %out, ptr addrspace(4) nocapture %in) #0 {
; GFX6-LABEL: constant_zextload_v4i1_to_v4i32:
; GFX6: ; %bb.0:
; GFX6-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9
; GFX6-NEXT: s_mov_b32 s7, 0xf000
; GFX6-NEXT: s_mov_b32 s6, -1
; GFX6-NEXT: s_mov_b32 s10, s6
; GFX6-NEXT: s_mov_b32 s11, s7
; GFX6-NEXT: s_waitcnt lgkmcnt(0)
; GFX6-NEXT: s_mov_b32 s8, s2
; GFX6-NEXT: s_mov_b32 s9, s3
; GFX6-NEXT: buffer_load_ubyte v1, off, s[8:11], 0
; GFX6-NEXT: s_mov_b32 s4, s0
; GFX6-NEXT: s_mov_b32 s5, s1
; GFX6-NEXT: s_waitcnt vmcnt(0)
; GFX6-NEXT: v_lshrrev_b32_e32 v3, 3, v1
; GFX6-NEXT: v_and_b32_e32 v0, 1, v1
; GFX6-NEXT: v_bfe_u32 v2, v1, 2, 1
; GFX6-NEXT: v_bfe_u32 v1, v1, 1, 1
; GFX6-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0
; GFX6-NEXT: s_endpgm
;
; GFX8-LABEL: constant_zextload_v4i1_to_v4i32:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
; GFX8-NEXT: s_waitcnt lgkmcnt(0)
; GFX8-NEXT: v_mov_b32_e32 v0, s2
; GFX8-NEXT: v_mov_b32_e32 v1, s3
; GFX8-NEXT: flat_load_ubyte v1, v[0:1]
; GFX8-NEXT: v_mov_b32_e32 v0, 3
; GFX8-NEXT: v_mov_b32_e32 v4, s0
; GFX8-NEXT: v_mov_b32_e32 v5, s1
; GFX8-NEXT: s_waitcnt vmcnt(0)
; GFX8-NEXT: v_lshrrev_b32_sdwa v3, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0
; GFX8-NEXT: v_and_b32_e32 v0, 1, v1
; GFX8-NEXT: v_bfe_u32 v2, v1, 2, 1
; GFX8-NEXT: v_bfe_u32 v1, v1, 1, 1
; GFX8-NEXT: flat_store_dwordx4 v[4:5], v[0:3]
; GFX8-NEXT: s_endpgm
;
; EG-LABEL: constant_zextload_v4i1_to_v4i32:
; EG: ; %bb.0:
; EG-NEXT: ALU 0, @8, KC0[CB0:0-32], KC1[]
; EG-NEXT: TEX 0 @6
; EG-NEXT: ALU 7, @9, KC0[CB0:0-32], KC1[]
; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.XYZW, T1.X, 1
; EG-NEXT: CF_END
; EG-NEXT: PAD
; EG-NEXT: Fetch clause starting at 6:
; EG-NEXT: VTX_READ_8 T0.X, T0.X, 0, #1
; EG-NEXT: ALU clause starting at 8:
; EG-NEXT: MOV * T0.X, KC0[2].Z,
; EG-NEXT: ALU clause starting at 9:
; EG-NEXT: BFE_UINT * T0.W, T0.X, literal.x, 1,
; EG-NEXT: 3(4.203895e-45), 0(0.000000e+00)
; EG-NEXT: BFE_UINT * T0.Z, T0.X, literal.x, 1,
; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
; EG-NEXT: BFE_UINT * T0.Y, T0.X, 1, 1,
; EG-NEXT: AND_INT T0.X, T0.X, 1,
; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.x,
; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
;
; GFX12-LABEL: constant_zextload_v4i1_to_v4i32:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
; GFX12-NEXT: v_mov_b32_e32 v4, 0
; GFX12-NEXT: s_wait_kmcnt 0x0
; GFX12-NEXT: global_load_u8 v1, v4, s[2:3]
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: v_and_b32_e32 v3, 0xffff, v1
; GFX12-NEXT: v_and_b32_e32 v0, 1, v1
; GFX12-NEXT: v_bfe_u32 v2, v1, 2, 1
; GFX12-NEXT: v_bfe_u32 v1, v1, 1, 1
; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_4)
; GFX12-NEXT: v_lshrrev_b32_e32 v3, 3, v3
; GFX12-NEXT: global_store_b128 v4, v[0:3], s[0:1]
; GFX12-NEXT: s_endpgm
%load = load <4 x i1>, ptr addrspace(4) %in
%ext = zext <4 x i1> %load to <4 x i32>
store <4 x i32> %ext, ptr addrspace(1) %out
ret void
}
define amdgpu_kernel void @constant_sextload_v4i1_to_v4i32(ptr addrspace(1) %out, ptr addrspace(4) nocapture %in) #0 {
; GFX6-LABEL: constant_sextload_v4i1_to_v4i32:
; GFX6: ; %bb.0:
; GFX6-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9
; GFX6-NEXT: s_mov_b32 s7, 0xf000
; GFX6-NEXT: s_mov_b32 s6, -1
; GFX6-NEXT: s_mov_b32 s10, s6
; GFX6-NEXT: s_mov_b32 s11, s7
; GFX6-NEXT: s_waitcnt lgkmcnt(0)
; GFX6-NEXT: s_mov_b32 s8, s2
; GFX6-NEXT: s_mov_b32 s9, s3
; GFX6-NEXT: buffer_load_ubyte v0, off, s[8:11], 0
; GFX6-NEXT: s_mov_b32 s4, s0
; GFX6-NEXT: s_mov_b32 s5, s1
; GFX6-NEXT: s_waitcnt vmcnt(0)
; GFX6-NEXT: v_bfe_i32 v3, v0, 3, 1
; GFX6-NEXT: v_bfe_i32 v2, v0, 2, 1
; GFX6-NEXT: v_bfe_i32 v1, v0, 1, 1
; GFX6-NEXT: v_bfe_i32 v0, v0, 0, 1
; GFX6-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0
; GFX6-NEXT: s_endpgm
;
; GFX8-LABEL: constant_sextload_v4i1_to_v4i32:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
; GFX8-NEXT: s_waitcnt lgkmcnt(0)
; GFX8-NEXT: v_mov_b32_e32 v0, s2
; GFX8-NEXT: v_mov_b32_e32 v1, s3
; GFX8-NEXT: flat_load_ubyte v0, v[0:1]
; GFX8-NEXT: v_mov_b32_e32 v4, s0
; GFX8-NEXT: v_mov_b32_e32 v5, s1
; GFX8-NEXT: s_waitcnt vmcnt(0)
; GFX8-NEXT: v_bfe_i32 v3, v0, 3, 1
; GFX8-NEXT: v_bfe_i32 v2, v0, 2, 1
; GFX8-NEXT: v_bfe_i32 v1, v0, 1, 1
; GFX8-NEXT: v_bfe_i32 v0, v0, 0, 1
; GFX8-NEXT: flat_store_dwordx4 v[4:5], v[0:3]
; GFX8-NEXT: s_endpgm
;
; EG-LABEL: constant_sextload_v4i1_to_v4i32:
; EG: ; %bb.0:
; EG-NEXT: ALU 0, @8, KC0[CB0:0-32], KC1[]
; EG-NEXT: TEX 0 @6
; EG-NEXT: ALU 10, @9, KC0[CB0:0-32], KC1[]
; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T1.XYZW, T0.X, 1
; EG-NEXT: CF_END
; EG-NEXT: PAD
; EG-NEXT: Fetch clause starting at 6:
; EG-NEXT: VTX_READ_8 T0.X, T0.X, 0, #1
; EG-NEXT: ALU clause starting at 8:
; EG-NEXT: MOV * T0.X, KC0[2].Z,
; EG-NEXT: ALU clause starting at 9:
; EG-NEXT: BFE_INT T1.X, T0.X, 0.0, 1,
; EG-NEXT: LSHR * T0.W, T0.X, literal.x,
; EG-NEXT: 3(4.203895e-45), 0(0.000000e+00)
; EG-NEXT: BFE_INT T1.W, PV.W, 0.0, 1,
; EG-NEXT: LSHR * T0.W, T0.X, literal.x,
; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
; EG-NEXT: BFE_INT T1.Z, PS, 0.0, 1,
; EG-NEXT: LSHR * T0.W, T0.X, 1,
; EG-NEXT: LSHR T0.X, KC0[2].Y, literal.x,
; EG-NEXT: BFE_INT * T1.Y, PV.W, 0.0, 1,
; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
;
; GFX12-LABEL: constant_sextload_v4i1_to_v4i32:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
; GFX12-NEXT: s_wait_kmcnt 0x0
; GFX12-NEXT: s_load_u8 s2, s[2:3], 0x0
; GFX12-NEXT: s_wait_kmcnt 0x0
; GFX12-NEXT: s_bfe_i32 s3, s2, 0x10003
; GFX12-NEXT: s_bfe_i32 s4, s2, 0x10002
; GFX12-NEXT: s_bfe_i32 s5, s2, 0x10000
; GFX12-NEXT: s_bfe_i32 s2, s2, 0x10001
; GFX12-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
; GFX12-NEXT: v_dual_mov_b32 v4, 0 :: v_dual_mov_b32 v1, s2
; GFX12-NEXT: v_dual_mov_b32 v0, s5 :: v_dual_mov_b32 v3, s3
; GFX12-NEXT: v_mov_b32_e32 v2, s4
; GFX12-NEXT: global_store_b128 v4, v[0:3], s[0:1]
; GFX12-NEXT: s_endpgm
%load = load <4 x i1>, ptr addrspace(4) %in
%ext = sext <4 x i1> %load to <4 x i32>
store <4 x i32> %ext, ptr addrspace(1) %out
ret void
}
define amdgpu_kernel void @constant_zextload_v8i1_to_v8i32(ptr addrspace(1) %out, ptr addrspace(4) nocapture %in) #0 {
; GFX6-LABEL: constant_zextload_v8i1_to_v8i32:
; GFX6: ; %bb.0:
; GFX6-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9
; GFX6-NEXT: s_mov_b32 s7, 0xf000
; GFX6-NEXT: s_mov_b32 s6, -1
; GFX6-NEXT: s_mov_b32 s10, s6
; GFX6-NEXT: s_mov_b32 s11, s7
; GFX6-NEXT: s_waitcnt lgkmcnt(0)
; GFX6-NEXT: s_mov_b32 s8, s2
; GFX6-NEXT: s_mov_b32 s9, s3
; GFX6-NEXT: buffer_load_ubyte v4, off, s[8:11], 0
; GFX6-NEXT: s_mov_b32 s4, s0
; GFX6-NEXT: s_mov_b32 s5, s1
; GFX6-NEXT: s_waitcnt vmcnt(0)
; GFX6-NEXT: v_bfe_u32 v3, v4, 3, 1
; GFX6-NEXT: v_bfe_u32 v1, v4, 1, 1
; GFX6-NEXT: v_lshrrev_b32_e32 v7, 7, v4
; GFX6-NEXT: v_bfe_u32 v5, v4, 5, 1
; GFX6-NEXT: v_and_b32_e32 v0, 1, v4
; GFX6-NEXT: v_bfe_u32 v2, v4, 2, 1
; GFX6-NEXT: v_bfe_u32 v6, v4, 6, 1
; GFX6-NEXT: v_bfe_u32 v4, v4, 4, 1
; GFX6-NEXT: buffer_store_dwordx4 v[4:7], off, s[4:7], 0 offset:16
; GFX6-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0
; GFX6-NEXT: s_endpgm
;
; GFX8-LABEL: constant_zextload_v8i1_to_v8i32:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
; GFX8-NEXT: s_waitcnt lgkmcnt(0)
; GFX8-NEXT: v_mov_b32_e32 v0, s2
; GFX8-NEXT: v_mov_b32_e32 v1, s3
; GFX8-NEXT: flat_load_ubyte v0, v[0:1]
; GFX8-NEXT: v_mov_b32_e32 v9, s1
; GFX8-NEXT: v_mov_b32_e32 v8, s0
; GFX8-NEXT: s_waitcnt vmcnt(0)
; GFX8-NEXT: v_readfirstlane_b32 s2, v0
; GFX8-NEXT: s_bfe_u32 s3, s2, 0x10003
; GFX8-NEXT: s_bfe_u32 s4, s2, 0x10001
; GFX8-NEXT: s_bfe_u32 s5, s2, 0x10005
; GFX8-NEXT: s_and_b32 s6, s2, 1
; GFX8-NEXT: s_bfe_u32 s7, s2, 0x10002
; GFX8-NEXT: s_bfe_u32 s2, s2, 0x10004
; GFX8-NEXT: s_add_u32 s0, s0, 16
; GFX8-NEXT: s_addc_u32 s1, s1, 0
; GFX8-NEXT: v_and_b32_e32 v0, 0xffff, v0
; GFX8-NEXT: v_mov_b32_e32 v11, s1
; GFX8-NEXT: v_lshrrev_b32_e32 v3, 7, v0
; GFX8-NEXT: v_bfe_u32 v2, v0, 6, 1
; GFX8-NEXT: v_mov_b32_e32 v0, s2
; GFX8-NEXT: v_mov_b32_e32 v1, s5
; GFX8-NEXT: v_mov_b32_e32 v10, s0
; GFX8-NEXT: v_mov_b32_e32 v4, s6
; GFX8-NEXT: v_mov_b32_e32 v5, s4
; GFX8-NEXT: v_mov_b32_e32 v6, s7
; GFX8-NEXT: v_mov_b32_e32 v7, s3
; GFX8-NEXT: flat_store_dwordx4 v[10:11], v[0:3]
; GFX8-NEXT: flat_store_dwordx4 v[8:9], v[4:7]
; GFX8-NEXT: s_endpgm
;
; EG-LABEL: constant_zextload_v8i1_to_v8i32:
; EG: ; %bb.0:
; EG-NEXT: ALU 0, @8, KC0[CB0:0-32], KC1[]
; EG-NEXT: TEX 0 @6
; EG-NEXT: ALU 17, @9, KC0[CB0:0-32], KC1[]
; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T5.XYZW, T8.X, 0
; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T6.XYZW, T7.X, 1
; EG-NEXT: CF_END
; EG-NEXT: Fetch clause starting at 6:
; EG-NEXT: VTX_READ_8 T5.X, T5.X, 0, #1
; EG-NEXT: ALU clause starting at 8:
; EG-NEXT: MOV * T5.X, KC0[2].Z,
; EG-NEXT: ALU clause starting at 9:
; EG-NEXT: BFE_UINT * T6.W, T5.X, literal.x, 1,
; EG-NEXT: 3(4.203895e-45), 0(0.000000e+00)
; EG-NEXT: BFE_UINT * T6.Z, T5.X, literal.x, 1,
; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
; EG-NEXT: BFE_UINT T6.Y, T5.X, 1, 1,
; EG-NEXT: BFE_UINT * T5.W, T5.X, literal.x, 1,
; EG-NEXT: 7(9.809089e-45), 0(0.000000e+00)
; EG-NEXT: AND_INT T6.X, T5.X, 1,
; EG-NEXT: BFE_UINT T5.Z, T5.X, literal.x, 1,
; EG-NEXT: LSHR * T7.X, KC0[2].Y, literal.y,
; EG-NEXT: 6(8.407791e-45), 2(2.802597e-45)
; EG-NEXT: BFE_UINT * T5.Y, T5.X, literal.x, 1,
; EG-NEXT: 5(7.006492e-45), 0(0.000000e+00)
; EG-NEXT: BFE_UINT T5.X, T5.X, literal.x, 1,
; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y,
; EG-NEXT: 4(5.605194e-45), 16(2.242078e-44)
; EG-NEXT: LSHR * T8.X, PV.W, literal.x,
; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
;
; GFX12-LABEL: constant_zextload_v8i1_to_v8i32:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
; GFX12-NEXT: v_mov_b32_e32 v8, 0
; GFX12-NEXT: s_wait_kmcnt 0x0
; GFX12-NEXT: global_load_u8 v0, v8, s[2:3]
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: v_readfirstlane_b32 s2, v0
; GFX12-NEXT: s_bfe_u32 s4, s2, 0x10001
; GFX12-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
; GFX12-NEXT: v_dual_mov_b32 v5, s4 :: v_dual_and_b32 v0, 0xffff, v0
; GFX12-NEXT: s_bfe_u32 s3, s2, 0x10003
; GFX12-NEXT: s_bfe_u32 s5, s2, 0x10005
; GFX12-NEXT: s_and_b32 s6, s2, 1
; GFX12-NEXT: s_bfe_u32 s7, s2, 0x10002
; GFX12-NEXT: s_bfe_u32 s2, s2, 0x10004
; GFX12-NEXT: v_lshrrev_b32_e32 v3, 7, v0
; GFX12-NEXT: v_bfe_u32 v2, v0, 6, 1
; GFX12-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v7, s3
; GFX12-NEXT: s_wait_alu 0xfffe
; GFX12-NEXT: v_dual_mov_b32 v1, s5 :: v_dual_mov_b32 v4, s6
; GFX12-NEXT: v_mov_b32_e32 v6, s7
; GFX12-NEXT: s_clause 0x1
; GFX12-NEXT: global_store_b128 v8, v[0:3], s[0:1] offset:16
; GFX12-NEXT: global_store_b128 v8, v[4:7], s[0:1]
; GFX12-NEXT: s_endpgm
%load = load <8 x i1>, ptr addrspace(4) %in
%ext = zext <8 x i1> %load to <8 x i32>
store <8 x i32> %ext, ptr addrspace(1) %out
ret void
}
define amdgpu_kernel void @constant_sextload_v8i1_to_v8i32(ptr addrspace(1) %out, ptr addrspace(4) nocapture %in) #0 {
; GFX6-LABEL: constant_sextload_v8i1_to_v8i32:
; GFX6: ; %bb.0:
; GFX6-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9
; GFX6-NEXT: s_mov_b32 s7, 0xf000
; GFX6-NEXT: s_mov_b32 s6, -1
; GFX6-NEXT: s_mov_b32 s10, s6
; GFX6-NEXT: s_mov_b32 s11, s7
; GFX6-NEXT: s_waitcnt lgkmcnt(0)
; GFX6-NEXT: s_mov_b32 s8, s2
; GFX6-NEXT: s_mov_b32 s9, s3
; GFX6-NEXT: buffer_load_ubyte v4, off, s[8:11], 0
; GFX6-NEXT: s_mov_b32 s4, s0
; GFX6-NEXT: s_mov_b32 s5, s1
; GFX6-NEXT: s_waitcnt vmcnt(0)
; GFX6-NEXT: v_bfe_i32 v3, v4, 3, 1
; GFX6-NEXT: v_bfe_i32 v2, v4, 2, 1
; GFX6-NEXT: v_bfe_i32 v1, v4, 1, 1
; GFX6-NEXT: v_bfe_i32 v0, v4, 0, 1
; GFX6-NEXT: v_bfe_i32 v7, v4, 7, 1
; GFX6-NEXT: v_bfe_i32 v6, v4, 6, 1
; GFX6-NEXT: v_bfe_i32 v5, v4, 5, 1
; GFX6-NEXT: v_bfe_i32 v4, v4, 4, 1
; GFX6-NEXT: buffer_store_dwordx4 v[4:7], off, s[4:7], 0 offset:16
; GFX6-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0
; GFX6-NEXT: s_endpgm
;
; GFX8-LABEL: constant_sextload_v8i1_to_v8i32:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
; GFX8-NEXT: s_waitcnt lgkmcnt(0)
; GFX8-NEXT: v_mov_b32_e32 v0, s2
; GFX8-NEXT: v_mov_b32_e32 v1, s3
; GFX8-NEXT: flat_load_ubyte v4, v[0:1]
; GFX8-NEXT: s_add_u32 s2, s0, 16
; GFX8-NEXT: s_addc_u32 s3, s1, 0
; GFX8-NEXT: v_mov_b32_e32 v11, s3
; GFX8-NEXT: v_mov_b32_e32 v9, s1
; GFX8-NEXT: v_mov_b32_e32 v10, s2
; GFX8-NEXT: v_mov_b32_e32 v8, s0
; GFX8-NEXT: s_waitcnt vmcnt(0)
; GFX8-NEXT: v_bfe_i32 v3, v4, 3, 1
; GFX8-NEXT: v_bfe_i32 v2, v4, 2, 1
; GFX8-NEXT: v_bfe_i32 v1, v4, 1, 1
; GFX8-NEXT: v_bfe_i32 v0, v4, 0, 1
; GFX8-NEXT: v_bfe_i32 v7, v4, 7, 1
; GFX8-NEXT: v_bfe_i32 v6, v4, 6, 1
; GFX8-NEXT: v_bfe_i32 v5, v4, 5, 1
; GFX8-NEXT: v_bfe_i32 v4, v4, 4, 1
; GFX8-NEXT: flat_store_dwordx4 v[10:11], v[4:7]
; GFX8-NEXT: flat_store_dwordx4 v[8:9], v[0:3]
; GFX8-NEXT: s_endpgm
;
; EG-LABEL: constant_sextload_v8i1_to_v8i32:
; EG: ; %bb.0:
; EG-NEXT: ALU 0, @8, KC0[CB0:0-32], KC1[]
; EG-NEXT: TEX 0 @6
; EG-NEXT: ALU 23, @9, KC0[CB0:0-32], KC1[]
; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T6.XYZW, T5.X, 0
; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T7.XYZW, T8.X, 1
; EG-NEXT: CF_END
; EG-NEXT: Fetch clause starting at 6:
; EG-NEXT: VTX_READ_8 T5.X, T5.X, 0, #1
; EG-NEXT: ALU clause starting at 8:
; EG-NEXT: MOV * T5.X, KC0[2].Z,
; EG-NEXT: ALU clause starting at 9:
; EG-NEXT: LSHR * T0.W, T5.X, literal.x,
; EG-NEXT: 7(9.809089e-45), 0(0.000000e+00)
; EG-NEXT: BFE_INT T6.W, PV.W, 0.0, 1,
; EG-NEXT: LSHR * T0.W, T5.X, literal.x,
; EG-NEXT: 6(8.407791e-45), 0(0.000000e+00)
; EG-NEXT: BFE_INT T7.X, T5.X, 0.0, 1,
; EG-NEXT: BFE_INT T6.Z, PS, 0.0, 1,
; EG-NEXT: LSHR T0.W, T5.X, literal.x,
; EG-NEXT: LSHR * T1.W, T5.X, literal.y,
; EG-NEXT: 3(4.203895e-45), 5(7.006492e-45)
; EG-NEXT: LSHR T8.X, KC0[2].Y, literal.x,
; EG-NEXT: BFE_INT T6.Y, PS, 0.0, 1,
; EG-NEXT: LSHR T0.Z, T5.X, literal.x,
; EG-NEXT: BFE_INT T7.W, PV.W, 0.0, 1,
; EG-NEXT: LSHR * T0.W, T5.X, literal.y,
; EG-NEXT: 2(2.802597e-45), 4(5.605194e-45)
; EG-NEXT: BFE_INT T6.X, PS, 0.0, 1,
; EG-NEXT: BFE_INT T7.Z, PV.Z, 0.0, 1,
; EG-NEXT: LSHR T0.W, T5.X, 1,
; EG-NEXT: ADD_INT * T1.W, KC0[2].Y, literal.x,
; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00)
; EG-NEXT: LSHR T5.X, PS, literal.x,
; EG-NEXT: BFE_INT * T7.Y, PV.W, 0.0, 1,
; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
;
; GFX12-LABEL: constant_sextload_v8i1_to_v8i32:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
; GFX12-NEXT: s_wait_kmcnt 0x0
; GFX12-NEXT: s_load_u8 s2, s[2:3], 0x0
; GFX12-NEXT: s_wait_kmcnt 0x0
; GFX12-NEXT: s_bfe_i32 s3, s2, 0x10003
; GFX12-NEXT: s_bfe_i32 s4, s2, 0x10002
; GFX12-NEXT: s_bfe_i32 s5, s2, 0x10001
; GFX12-NEXT: s_bfe_i32 s6, s2, 0x10000
; GFX12-NEXT: s_bfe_i32 s7, s2, 0x10007
; GFX12-NEXT: s_bfe_i32 s8, s2, 0x10006
; GFX12-NEXT: s_bfe_i32 s9, s2, 0x10004
; GFX12-NEXT: s_bfe_i32 s2, s2, 0x10005
; GFX12-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
; GFX12-NEXT: v_dual_mov_b32 v8, 0 :: v_dual_mov_b32 v1, s2
; GFX12-NEXT: v_dual_mov_b32 v0, s9 :: v_dual_mov_b32 v3, s7
; GFX12-NEXT: v_dual_mov_b32 v2, s8 :: v_dual_mov_b32 v5, s5
; GFX12-NEXT: v_dual_mov_b32 v4, s6 :: v_dual_mov_b32 v7, s3
; GFX12-NEXT: v_mov_b32_e32 v6, s4
; GFX12-NEXT: s_clause 0x1
; GFX12-NEXT: global_store_b128 v8, v[0:3], s[0:1] offset:16
; GFX12-NEXT: global_store_b128 v8, v[4:7], s[0:1]
; GFX12-NEXT: s_endpgm
%load = load <8 x i1>, ptr addrspace(4) %in
%ext = sext <8 x i1> %load to <8 x i32>
store <8 x i32> %ext, ptr addrspace(1) %out
ret void
}
define amdgpu_kernel void @constant_zextload_v16i1_to_v16i32(ptr addrspace(1) %out, ptr addrspace(4) nocapture %in) #0 {
; GFX6-LABEL: constant_zextload_v16i1_to_v16i32:
; GFX6: ; %bb.0:
; GFX6-NEXT: s_load_dwordx4 s[4:7], s[4:5], 0x9
; GFX6-NEXT: s_mov_b32 s3, 0xf000
; GFX6-NEXT: s_mov_b32 s2, -1
; GFX6-NEXT: s_mov_b32 s10, s2
; GFX6-NEXT: s_mov_b32 s11, s3
; GFX6-NEXT: s_waitcnt lgkmcnt(0)
; GFX6-NEXT: s_mov_b32 s8, s6
; GFX6-NEXT: s_mov_b32 s9, s7
; GFX6-NEXT: buffer_load_ushort v12, off, s[8:11], 0
; GFX6-NEXT: s_mov_b32 s0, s4
; GFX6-NEXT: s_mov_b32 s1, s5
; GFX6-NEXT: s_waitcnt vmcnt(0)
; GFX6-NEXT: v_bfe_u32 v3, v12, 3, 1
; GFX6-NEXT: v_bfe_u32 v1, v12, 1, 1
; GFX6-NEXT: v_bfe_u32 v7, v12, 7, 1
; GFX6-NEXT: v_bfe_u32 v5, v12, 5, 1
; GFX6-NEXT: v_bfe_u32 v11, v12, 11, 1
; GFX6-NEXT: v_bfe_u32 v9, v12, 9, 1
; GFX6-NEXT: v_lshrrev_b32_e32 v15, 15, v12
; GFX6-NEXT: v_bfe_u32 v13, v12, 13, 1
; GFX6-NEXT: v_and_b32_e32 v0, 1, v12
; GFX6-NEXT: v_bfe_u32 v2, v12, 2, 1
; GFX6-NEXT: v_bfe_u32 v6, v12, 6, 1
; GFX6-NEXT: v_bfe_u32 v4, v12, 4, 1
; GFX6-NEXT: v_bfe_u32 v10, v12, 10, 1
; GFX6-NEXT: v_bfe_u32 v8, v12, 8, 1
; GFX6-NEXT: v_bfe_u32 v14, v12, 14, 1
; GFX6-NEXT: v_bfe_u32 v12, v12, 12, 1
; GFX6-NEXT: buffer_store_dwordx4 v[12:15], off, s[0:3], 0 offset:48
; GFX6-NEXT: buffer_store_dwordx4 v[8:11], off, s[0:3], 0 offset:32
; GFX6-NEXT: buffer_store_dwordx4 v[4:7], off, s[0:3], 0 offset:16
; GFX6-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0
; GFX6-NEXT: s_endpgm
;
; GFX8-LABEL: constant_zextload_v16i1_to_v16i32:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
; GFX8-NEXT: s_waitcnt lgkmcnt(0)
; GFX8-NEXT: v_mov_b32_e32 v0, s2
; GFX8-NEXT: v_mov_b32_e32 v1, s3
; GFX8-NEXT: flat_load_ushort v0, v[0:1]
; GFX8-NEXT: v_mov_b32_e32 v17, s1
; GFX8-NEXT: v_mov_b32_e32 v16, s0
; GFX8-NEXT: s_waitcnt vmcnt(0)
; GFX8-NEXT: v_readfirstlane_b32 s2, v0
; GFX8-NEXT: s_and_b32 s6, 0xffff, s2
; GFX8-NEXT: s_bfe_u32 s3, s2, 0x10003
; GFX8-NEXT: s_bfe_u32 s4, s2, 0x10001
; GFX8-NEXT: s_bfe_u32 s5, s2, 0x10007
; GFX8-NEXT: s_bfe_u32 s7, s2, 0x10009
; GFX8-NEXT: s_bfe_u32 s8, s2, 0x1000d
; GFX8-NEXT: s_and_b32 s9, s2, 1
; GFX8-NEXT: s_bfe_u32 s10, s2, 0x1000a
; GFX8-NEXT: s_bfe_u32 s2, s2, 0x1000c
; GFX8-NEXT: s_bfe_u32 s11, s6, 0x10005
; GFX8-NEXT: s_bfe_u32 s12, s6, 0x1000b
; GFX8-NEXT: s_lshr_b32 s13, s6, 15
; GFX8-NEXT: s_bfe_u32 s14, s6, 0x10002
; GFX8-NEXT: s_bfe_u32 s15, s6, 0x10006
; GFX8-NEXT: s_bfe_u32 s16, s6, 0x10004
; GFX8-NEXT: s_bfe_u32 s17, s6, 0x10008
; GFX8-NEXT: s_bfe_u32 s6, s6, 0x1000e
; GFX8-NEXT: v_mov_b32_e32 v0, s2
; GFX8-NEXT: s_add_u32 s2, s0, 48
; GFX8-NEXT: v_mov_b32_e32 v15, s3
; GFX8-NEXT: s_addc_u32 s3, s1, 0
; GFX8-NEXT: v_mov_b32_e32 v19, s3
; GFX8-NEXT: v_mov_b32_e32 v1, s8
; GFX8-NEXT: v_mov_b32_e32 v2, s6
; GFX8-NEXT: v_mov_b32_e32 v3, s13
; GFX8-NEXT: v_mov_b32_e32 v18, s2
; GFX8-NEXT: s_add_u32 s2, s0, 32
; GFX8-NEXT: flat_store_dwordx4 v[18:19], v[0:3]
; GFX8-NEXT: s_addc_u32 s3, s1, 0
; GFX8-NEXT: v_mov_b32_e32 v0, s2
; GFX8-NEXT: v_mov_b32_e32 v5, s7
; GFX8-NEXT: v_mov_b32_e32 v6, s10
; GFX8-NEXT: v_mov_b32_e32 v4, s17
; GFX8-NEXT: v_mov_b32_e32 v7, s12
; GFX8-NEXT: v_mov_b32_e32 v1, s3
; GFX8-NEXT: s_add_u32 s0, s0, 16
; GFX8-NEXT: flat_store_dwordx4 v[0:1], v[4:7]
; GFX8-NEXT: s_addc_u32 s1, s1, 0
; GFX8-NEXT: v_mov_b32_e32 v0, s0
; GFX8-NEXT: v_mov_b32_e32 v11, s5
; GFX8-NEXT: v_mov_b32_e32 v8, s16
; GFX8-NEXT: v_mov_b32_e32 v9, s11
; GFX8-NEXT: v_mov_b32_e32 v10, s15
; GFX8-NEXT: v_mov_b32_e32 v1, s1
; GFX8-NEXT: v_mov_b32_e32 v12, s9
; GFX8-NEXT: v_mov_b32_e32 v13, s4
; GFX8-NEXT: v_mov_b32_e32 v14, s14
; GFX8-NEXT: flat_store_dwordx4 v[0:1], v[8:11]
; GFX8-NEXT: flat_store_dwordx4 v[16:17], v[12:15]
; GFX8-NEXT: s_endpgm
;
; EG-LABEL: constant_zextload_v16i1_to_v16i32:
; EG: ; %bb.0:
; EG-NEXT: ALU 0, @10, KC0[CB0:0-32], KC1[]
; EG-NEXT: TEX 0 @8
; EG-NEXT: ALU 36, @11, KC0[CB0:0-32], KC1[]
; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T7.XYZW, T14.X, 0
; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T11.XYZW, T13.X, 0
; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T9.XYZW, T12.X, 0
; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T8.XYZW, T10.X, 1
; EG-NEXT: CF_END
; EG-NEXT: Fetch clause starting at 8:
; EG-NEXT: VTX_READ_16 T7.X, T7.X, 0, #1
; EG-NEXT: ALU clause starting at 10:
; EG-NEXT: MOV * T7.X, KC0[2].Z,
; EG-NEXT: ALU clause starting at 11:
; EG-NEXT: BFE_UINT * T8.W, T7.X, literal.x, 1,
; EG-NEXT: 3(4.203895e-45), 0(0.000000e+00)
; EG-NEXT: BFE_UINT * T8.Z, T7.X, literal.x, 1,
; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
; EG-NEXT: BFE_UINT T8.Y, T7.X, 1, 1,
; EG-NEXT: BFE_UINT * T9.W, T7.X, literal.x, 1,
; EG-NEXT: 7(9.809089e-45), 0(0.000000e+00)
; EG-NEXT: AND_INT T8.X, T7.X, 1,
; EG-NEXT: BFE_UINT T9.Z, T7.X, literal.x, 1,
; EG-NEXT: LSHR * T10.X, KC0[2].Y, literal.y,
; EG-NEXT: 6(8.407791e-45), 2(2.802597e-45)
; EG-NEXT: BFE_UINT T9.Y, T7.X, literal.x, 1,
; EG-NEXT: BFE_UINT * T11.W, T7.X, literal.y, 1,
; EG-NEXT: 5(7.006492e-45), 11(1.541428e-44)
; EG-NEXT: BFE_UINT T9.X, T7.X, literal.x, 1,
; EG-NEXT: BFE_UINT T11.Z, T7.X, literal.y, 1,
; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.z,
; EG-NEXT: 4(5.605194e-45), 10(1.401298e-44)
; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00)
; EG-NEXT: LSHR T12.X, PV.W, literal.x,
; EG-NEXT: BFE_UINT T11.Y, T7.X, literal.y, 1,
; EG-NEXT: LSHR * T7.W, T7.X, literal.z,
; EG-NEXT: 2(2.802597e-45), 9(1.261169e-44)
; EG-NEXT: 15(2.101948e-44), 0(0.000000e+00)
; EG-NEXT: BFE_UINT T11.X, T7.X, literal.x, 1,
; EG-NEXT: BFE_UINT T7.Z, T7.X, literal.y, 1,
; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.z,
; EG-NEXT: 8(1.121039e-44), 14(1.961818e-44)
; EG-NEXT: 32(4.484155e-44), 0(0.000000e+00)
; EG-NEXT: LSHR T13.X, PV.W, literal.x,
; EG-NEXT: BFE_UINT * T7.Y, T7.X, literal.y, 1,
; EG-NEXT: 2(2.802597e-45), 13(1.821688e-44)
; EG-NEXT: BFE_UINT T7.X, T7.X, literal.x, 1,
; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y,
; EG-NEXT: 12(1.681558e-44), 48(6.726233e-44)
; EG-NEXT: LSHR * T14.X, PV.W, literal.x,
; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
;
; GFX12-LABEL: constant_zextload_v16i1_to_v16i32:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
; GFX12-NEXT: v_mov_b32_e32 v16, 0
; GFX12-NEXT: s_wait_kmcnt 0x0
; GFX12-NEXT: global_load_u16 v0, v16, s[2:3]
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: v_readfirstlane_b32 s2, v0
; GFX12-NEXT: s_and_b32 s6, 0xffff, s2
; GFX12-NEXT: s_bfe_u32 s3, s2, 0x10003
; GFX12-NEXT: s_bfe_u32 s4, s2, 0x10001
; GFX12-NEXT: s_bfe_u32 s5, s2, 0x10007
; GFX12-NEXT: s_bfe_u32 s7, s2, 0x10009
; GFX12-NEXT: s_bfe_u32 s8, s2, 0x1000d
; GFX12-NEXT: s_and_b32 s9, s2, 1
; GFX12-NEXT: s_bfe_u32 s10, s2, 0x1000a
; GFX12-NEXT: s_bfe_u32 s2, s2, 0x1000c
; GFX12-NEXT: s_bfe_u32 s11, s6, 0x10005
; GFX12-NEXT: s_bfe_u32 s12, s6, 0x1000b
; GFX12-NEXT: s_lshr_b32 s13, s6, 15
; GFX12-NEXT: s_bfe_u32 s14, s6, 0x10002
; GFX12-NEXT: s_bfe_u32 s15, s6, 0x10006
; GFX12-NEXT: s_bfe_u32 s16, s6, 0x10004
; GFX12-NEXT: s_bfe_u32 s17, s6, 0x10008
; GFX12-NEXT: s_bfe_u32 s6, s6, 0x1000e
; GFX12-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v5, s7
; GFX12-NEXT: v_mov_b32_e32 v1, s8
; GFX12-NEXT: v_dual_mov_b32 v15, s3 :: v_dual_mov_b32 v2, s6
; GFX12-NEXT: v_dual_mov_b32 v3, s13 :: v_dual_mov_b32 v4, s17
; GFX12-NEXT: v_dual_mov_b32 v6, s10 :: v_dual_mov_b32 v11, s5
; GFX12-NEXT: v_dual_mov_b32 v7, s12 :: v_dual_mov_b32 v8, s16
; GFX12-NEXT: v_dual_mov_b32 v9, s11 :: v_dual_mov_b32 v10, s15
; GFX12-NEXT: v_dual_mov_b32 v12, s9 :: v_dual_mov_b32 v13, s4
; GFX12-NEXT: v_mov_b32_e32 v14, s14
; GFX12-NEXT: s_clause 0x3
; GFX12-NEXT: global_store_b128 v16, v[0:3], s[0:1] offset:48
; GFX12-NEXT: global_store_b128 v16, v[4:7], s[0:1] offset:32
; GFX12-NEXT: global_store_b128 v16, v[8:11], s[0:1] offset:16
; GFX12-NEXT: global_store_b128 v16, v[12:15], s[0:1]
; GFX12-NEXT: s_endpgm
%load = load <16 x i1>, ptr addrspace(4) %in
%ext = zext <16 x i1> %load to <16 x i32>
store <16 x i32> %ext, ptr addrspace(1) %out
ret void
}
define amdgpu_kernel void @constant_sextload_v16i1_to_v16i32(ptr addrspace(1) %out, ptr addrspace(4) nocapture %in) #0 {
; GFX6-LABEL: constant_sextload_v16i1_to_v16i32:
; GFX6: ; %bb.0:
; GFX6-NEXT: s_load_dwordx4 s[4:7], s[4:5], 0x9
; GFX6-NEXT: s_mov_b32 s3, 0xf000
; GFX6-NEXT: s_mov_b32 s2, -1
; GFX6-NEXT: s_mov_b32 s10, s2
; GFX6-NEXT: s_mov_b32 s11, s3
; GFX6-NEXT: s_waitcnt lgkmcnt(0)
; GFX6-NEXT: s_mov_b32 s8, s6
; GFX6-NEXT: s_mov_b32 s9, s7
; GFX6-NEXT: buffer_load_ushort v12, off, s[8:11], 0
; GFX6-NEXT: s_mov_b32 s0, s4
; GFX6-NEXT: s_mov_b32 s1, s5
; GFX6-NEXT: s_waitcnt vmcnt(0)
; GFX6-NEXT: v_bfe_i32 v3, v12, 3, 1
; GFX6-NEXT: v_bfe_i32 v2, v12, 2, 1
; GFX6-NEXT: v_bfe_i32 v1, v12, 1, 1
; GFX6-NEXT: v_bfe_i32 v0, v12, 0, 1
; GFX6-NEXT: v_bfe_i32 v7, v12, 7, 1
; GFX6-NEXT: v_bfe_i32 v6, v12, 6, 1
; GFX6-NEXT: v_bfe_i32 v5, v12, 5, 1
; GFX6-NEXT: v_bfe_i32 v4, v12, 4, 1
; GFX6-NEXT: v_bfe_i32 v11, v12, 11, 1
; GFX6-NEXT: v_bfe_i32 v10, v12, 10, 1
; GFX6-NEXT: v_bfe_i32 v9, v12, 9, 1
; GFX6-NEXT: v_bfe_i32 v8, v12, 8, 1
; GFX6-NEXT: v_bfe_i32 v15, v12, 15, 1
; GFX6-NEXT: v_bfe_i32 v14, v12, 14, 1
; GFX6-NEXT: v_bfe_i32 v13, v12, 13, 1
; GFX6-NEXT: v_bfe_i32 v12, v12, 12, 1
; GFX6-NEXT: buffer_store_dwordx4 v[12:15], off, s[0:3], 0 offset:48
; GFX6-NEXT: buffer_store_dwordx4 v[8:11], off, s[0:3], 0 offset:32
; GFX6-NEXT: buffer_store_dwordx4 v[4:7], off, s[0:3], 0 offset:16
; GFX6-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0
; GFX6-NEXT: s_endpgm
;
; GFX8-LABEL: constant_sextload_v16i1_to_v16i32:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
; GFX8-NEXT: s_waitcnt lgkmcnt(0)
; GFX8-NEXT: v_mov_b32_e32 v0, s2
; GFX8-NEXT: v_mov_b32_e32 v1, s3
; GFX8-NEXT: flat_load_ushort v18, v[0:1]
; GFX8-NEXT: s_add_u32 s2, s0, 48
; GFX8-NEXT: s_addc_u32 s3, s1, 0
; GFX8-NEXT: v_mov_b32_e32 v9, s3
; GFX8-NEXT: v_mov_b32_e32 v8, s2
; GFX8-NEXT: s_add_u32 s2, s0, 32
; GFX8-NEXT: v_mov_b32_e32 v13, s1
; GFX8-NEXT: s_addc_u32 s3, s1, 0
; GFX8-NEXT: v_mov_b32_e32 v12, s0
; GFX8-NEXT: s_add_u32 s0, s0, 16
; GFX8-NEXT: v_mov_b32_e32 v15, s3
; GFX8-NEXT: s_addc_u32 s1, s1, 0
; GFX8-NEXT: v_mov_b32_e32 v14, s2
; GFX8-NEXT: v_mov_b32_e32 v17, s1
; GFX8-NEXT: v_mov_b32_e32 v16, s0
; GFX8-NEXT: s_waitcnt vmcnt(0)
; GFX8-NEXT: v_bfe_i32 v7, v18, 15, 1
; GFX8-NEXT: v_bfe_i32 v6, v18, 14, 1
; GFX8-NEXT: v_bfe_i32 v5, v18, 13, 1
; GFX8-NEXT: v_bfe_i32 v4, v18, 12, 1
; GFX8-NEXT: flat_store_dwordx4 v[8:9], v[4:7]
; GFX8-NEXT: v_bfe_i32 v11, v18, 11, 1
; GFX8-NEXT: v_bfe_i32 v10, v18, 10, 1
; GFX8-NEXT: v_bfe_i32 v9, v18, 9, 1
; GFX8-NEXT: v_bfe_i32 v8, v18, 8, 1
; GFX8-NEXT: v_bfe_i32 v3, v18, 3, 1
; GFX8-NEXT: v_bfe_i32 v2, v18, 2, 1
; GFX8-NEXT: v_bfe_i32 v1, v18, 1, 1
; GFX8-NEXT: v_bfe_i32 v0, v18, 0, 1
; GFX8-NEXT: v_bfe_i32 v7, v18, 7, 1
; GFX8-NEXT: v_bfe_i32 v6, v18, 6, 1
; GFX8-NEXT: v_bfe_i32 v5, v18, 5, 1
; GFX8-NEXT: v_bfe_i32 v4, v18, 4, 1
; GFX8-NEXT: flat_store_dwordx4 v[14:15], v[8:11]
; GFX8-NEXT: flat_store_dwordx4 v[16:17], v[4:7]
; GFX8-NEXT: flat_store_dwordx4 v[12:13], v[0:3]
; GFX8-NEXT: s_endpgm
;
; EG-LABEL: constant_sextload_v16i1_to_v16i32:
; EG: ; %bb.0:
; EG-NEXT: ALU 0, @10, KC0[CB0:0-32], KC1[]
; EG-NEXT: TEX 0 @8
; EG-NEXT: ALU 51, @11, KC0[CB0:0-32], KC1[]
; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T12.XYZW, T7.X, 0
; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T10.XYZW, T14.X, 0
; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T8.XYZW, T11.X, 0
; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T13.XYZW, T9.X, 1
; EG-NEXT: CF_END
; EG-NEXT: Fetch clause starting at 8:
; EG-NEXT: VTX_READ_16 T7.X, T7.X, 0, #1
; EG-NEXT: ALU clause starting at 10:
; EG-NEXT: MOV * T7.X, KC0[2].Z,
; EG-NEXT: ALU clause starting at 11:
; EG-NEXT: LSHR * T0.W, T7.X, literal.x,
; EG-NEXT: 7(9.809089e-45), 0(0.000000e+00)
; EG-NEXT: BFE_INT T8.W, PV.W, 0.0, 1,
; EG-NEXT: LSHR * T0.W, T7.X, literal.x,
; EG-NEXT: 6(8.407791e-45), 0(0.000000e+00)
; EG-NEXT: BFE_INT T8.Z, PS, 0.0, 1,
; EG-NEXT: LSHR T0.W, T7.X, literal.x,
; EG-NEXT: LSHR * T1.W, T7.X, literal.y,
; EG-NEXT: 11(1.541428e-44), 5(7.006492e-45)
; EG-NEXT: LSHR T9.X, KC0[2].Y, literal.x,
; EG-NEXT: BFE_INT T8.Y, PS, 0.0, 1,
; EG-NEXT: LSHR T0.Z, T7.X, literal.y,
; EG-NEXT: BFE_INT T10.W, PV.W, 0.0, 1,
; EG-NEXT: LSHR * T0.W, T7.X, literal.z,
; EG-NEXT: 2(2.802597e-45), 10(1.401298e-44)
; EG-NEXT: 4(5.605194e-45), 0(0.000000e+00)
; EG-NEXT: BFE_INT T8.X, PS, 0.0, 1,
; EG-NEXT: BFE_INT T10.Z, PV.Z, 0.0, 1,
; EG-NEXT: LSHR T0.W, T7.X, literal.x,
; EG-NEXT: ADD_INT * T1.W, KC0[2].Y, literal.y,
; EG-NEXT: 9(1.261169e-44), 16(2.242078e-44)
; EG-NEXT: LSHR T11.X, PS, literal.x,
; EG-NEXT: BFE_INT T10.Y, PV.W, 0.0, 1,
; EG-NEXT: LSHR T0.W, T7.X, literal.y,
; EG-NEXT: LSHR * T1.W, T7.X, literal.z,
; EG-NEXT: 2(2.802597e-45), 15(2.101948e-44)
; EG-NEXT: 8(1.121039e-44), 0(0.000000e+00)
; EG-NEXT: BFE_INT T10.X, PS, 0.0, 1,
; EG-NEXT: BFE_INT T12.W, PV.W, 0.0, 1,
; EG-NEXT: LSHR * T0.W, T7.X, literal.x,
; EG-NEXT: 14(1.961818e-44), 0(0.000000e+00)
; EG-NEXT: BFE_INT T13.X, T7.X, 0.0, 1,
; EG-NEXT: LSHR T0.Y, T7.X, literal.x,
; EG-NEXT: BFE_INT T12.Z, PS, 0.0, 1,
; EG-NEXT: LSHR T0.W, T7.X, literal.y,
; EG-NEXT: ADD_INT * T1.W, KC0[2].Y, literal.z,
; EG-NEXT: 3(4.203895e-45), 13(1.821688e-44)
; EG-NEXT: 32(4.484155e-44), 0(0.000000e+00)
; EG-NEXT: LSHR T14.X, PS, literal.x,
; EG-NEXT: BFE_INT T12.Y, PV.W, 0.0, 1,
; EG-NEXT: LSHR T0.Z, T7.X, literal.x,
; EG-NEXT: BFE_INT T13.W, PV.Y, 0.0, 1,
; EG-NEXT: LSHR * T0.W, T7.X, literal.y,
; EG-NEXT: 2(2.802597e-45), 12(1.681558e-44)
; EG-NEXT: BFE_INT T12.X, PS, 0.0, 1,
; EG-NEXT: BFE_INT T13.Z, PV.Z, 0.0, 1,
; EG-NEXT: LSHR T0.W, T7.X, 1,
; EG-NEXT: ADD_INT * T1.W, KC0[2].Y, literal.x,
; EG-NEXT: 48(6.726233e-44), 0(0.000000e+00)
; EG-NEXT: LSHR T7.X, PS, literal.x,
; EG-NEXT: BFE_INT * T13.Y, PV.W, 0.0, 1,
; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
;
; GFX12-LABEL: constant_sextload_v16i1_to_v16i32:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
; GFX12-NEXT: s_wait_kmcnt 0x0
; GFX12-NEXT: s_load_u16 s2, s[2:3], 0x0
; GFX12-NEXT: s_wait_kmcnt 0x0
; GFX12-NEXT: s_bfe_i32 s3, s2, 0x10003
; GFX12-NEXT: s_bfe_i32 s4, s2, 0x10002
; GFX12-NEXT: s_bfe_i32 s5, s2, 0x10001
; GFX12-NEXT: s_bfe_i32 s6, s2, 0x10000
; GFX12-NEXT: s_bfe_i32 s7, s2, 0x10007
; GFX12-NEXT: s_bfe_i32 s8, s2, 0x10006
; GFX12-NEXT: s_bfe_i32 s9, s2, 0x10005
; GFX12-NEXT: s_bfe_i32 s10, s2, 0x10004
; GFX12-NEXT: s_bfe_i32 s11, s2, 0x1000b
; GFX12-NEXT: s_bfe_i32 s12, s2, 0x1000a
; GFX12-NEXT: s_bfe_i32 s13, s2, 0x10009
; GFX12-NEXT: s_bfe_i32 s14, s2, 0x10008
; GFX12-NEXT: s_bfe_i32 s15, s2, 0x1000f
; GFX12-NEXT: s_bfe_i32 s16, s2, 0x1000e
; GFX12-NEXT: s_bfe_i32 s17, s2, 0x1000c
; GFX12-NEXT: s_bfe_i32 s2, s2, 0x1000d
; GFX12-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
; GFX12-NEXT: v_dual_mov_b32 v16, 0 :: v_dual_mov_b32 v1, s2
; GFX12-NEXT: v_dual_mov_b32 v0, s17 :: v_dual_mov_b32 v3, s15
; GFX12-NEXT: v_dual_mov_b32 v2, s16 :: v_dual_mov_b32 v5, s13
; GFX12-NEXT: v_dual_mov_b32 v4, s14 :: v_dual_mov_b32 v7, s11
; GFX12-NEXT: v_dual_mov_b32 v6, s12 :: v_dual_mov_b32 v9, s9
; GFX12-NEXT: v_dual_mov_b32 v8, s10 :: v_dual_mov_b32 v11, s7
; GFX12-NEXT: v_dual_mov_b32 v10, s8 :: v_dual_mov_b32 v13, s5
; GFX12-NEXT: v_dual_mov_b32 v12, s6 :: v_dual_mov_b32 v15, s3
; GFX12-NEXT: v_mov_b32_e32 v14, s4
; GFX12-NEXT: s_clause 0x3
; GFX12-NEXT: global_store_b128 v16, v[0:3], s[0:1] offset:48
; GFX12-NEXT: global_store_b128 v16, v[4:7], s[0:1] offset:32
; GFX12-NEXT: global_store_b128 v16, v[8:11], s[0:1] offset:16
; GFX12-NEXT: global_store_b128 v16, v[12:15], s[0:1]
; GFX12-NEXT: s_endpgm
%load = load <16 x i1>, ptr addrspace(4) %in
%ext = sext <16 x i1> %load to <16 x i32>
store <16 x i32> %ext, ptr addrspace(1) %out
ret void
}
define amdgpu_kernel void @constant_zextload_v32i1_to_v32i32(ptr addrspace(1) %out, ptr addrspace(4) nocapture %in) #0 {
; GFX6-LABEL: constant_zextload_v32i1_to_v32i32:
; GFX6: ; %bb.0:
; GFX6-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9
; GFX6-NEXT: s_waitcnt lgkmcnt(0)
; GFX6-NEXT: s_load_dword s4, s[2:3], 0x0
; GFX6-NEXT: s_mov_b32 s3, 0xf000
; GFX6-NEXT: s_mov_b32 s2, -1
; GFX6-NEXT: s_waitcnt lgkmcnt(0)
; GFX6-NEXT: s_bfe_u32 s5, s4, 0x10003
; GFX6-NEXT: s_bfe_u32 s6, s4, 0x10001
; GFX6-NEXT: s_bfe_u32 s7, s4, 0x10007
; GFX6-NEXT: s_bfe_u32 s8, s4, 0x10005
; GFX6-NEXT: s_bfe_u32 s9, s4, 0x1000b
; GFX6-NEXT: s_bfe_u32 s10, s4, 0x10009
; GFX6-NEXT: s_bfe_u32 s11, s4, 0x1000f
; GFX6-NEXT: s_bfe_u32 s12, s4, 0x1000d
; GFX6-NEXT: s_bfe_u32 s13, s4, 0x10013
; GFX6-NEXT: s_bfe_u32 s14, s4, 0x10011
; GFX6-NEXT: s_bfe_u32 s15, s4, 0x10017
; GFX6-NEXT: s_bfe_u32 s16, s4, 0x10015
; GFX6-NEXT: s_bfe_u32 s17, s4, 0x1001b
; GFX6-NEXT: s_bfe_u32 s18, s4, 0x10019
; GFX6-NEXT: s_lshr_b32 s19, s4, 31
; GFX6-NEXT: s_bfe_u32 s20, s4, 0x1001d
; GFX6-NEXT: s_and_b32 s21, s4, 1
; GFX6-NEXT: s_bfe_u32 s22, s4, 0x10002
; GFX6-NEXT: s_bfe_u32 s23, s4, 0x10006
; GFX6-NEXT: s_bfe_u32 s24, s4, 0x10004
; GFX6-NEXT: s_bfe_u32 s25, s4, 0x1000a
; GFX6-NEXT: s_bfe_u32 s26, s4, 0x10008
; GFX6-NEXT: s_bfe_u32 s27, s4, 0x1000e
; GFX6-NEXT: s_bfe_u32 s28, s4, 0x1000c
; GFX6-NEXT: s_bfe_u32 s29, s4, 0x10012
; GFX6-NEXT: s_bfe_u32 s30, s4, 0x10010
; GFX6-NEXT: s_bfe_u32 s31, s4, 0x10016
; GFX6-NEXT: s_bfe_u32 s33, s4, 0x10014
; GFX6-NEXT: s_bfe_u32 s34, s4, 0x1001a
; GFX6-NEXT: s_bfe_u32 s35, s4, 0x1001e
; GFX6-NEXT: s_bfe_u32 s36, s4, 0x1001c
; GFX6-NEXT: s_bfe_u32 s4, s4, 0x10018
; GFX6-NEXT: v_mov_b32_e32 v0, s36
; GFX6-NEXT: v_mov_b32_e32 v1, s20
; GFX6-NEXT: v_mov_b32_e32 v2, s35
; GFX6-NEXT: v_mov_b32_e32 v3, s19
; GFX6-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:112
; GFX6-NEXT: s_waitcnt expcnt(0)
; GFX6-NEXT: v_mov_b32_e32 v0, s4
; GFX6-NEXT: v_mov_b32_e32 v1, s18
; GFX6-NEXT: v_mov_b32_e32 v2, s34
; GFX6-NEXT: v_mov_b32_e32 v3, s17
; GFX6-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:96
; GFX6-NEXT: s_waitcnt expcnt(0)
; GFX6-NEXT: v_mov_b32_e32 v0, s33
; GFX6-NEXT: v_mov_b32_e32 v1, s16
; GFX6-NEXT: v_mov_b32_e32 v2, s31
; GFX6-NEXT: v_mov_b32_e32 v3, s15
; GFX6-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:80
; GFX6-NEXT: s_waitcnt expcnt(0)
; GFX6-NEXT: v_mov_b32_e32 v0, s30
; GFX6-NEXT: v_mov_b32_e32 v1, s14
; GFX6-NEXT: v_mov_b32_e32 v2, s29
; GFX6-NEXT: v_mov_b32_e32 v3, s13
; GFX6-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:64
; GFX6-NEXT: s_waitcnt expcnt(0)
; GFX6-NEXT: v_mov_b32_e32 v0, s28
; GFX6-NEXT: v_mov_b32_e32 v1, s12
; GFX6-NEXT: v_mov_b32_e32 v2, s27
; GFX6-NEXT: v_mov_b32_e32 v3, s11
; GFX6-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:48
; GFX6-NEXT: s_waitcnt expcnt(0)
; GFX6-NEXT: v_mov_b32_e32 v0, s26
; GFX6-NEXT: v_mov_b32_e32 v1, s10
; GFX6-NEXT: v_mov_b32_e32 v2, s25
; GFX6-NEXT: v_mov_b32_e32 v3, s9
; GFX6-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:32
; GFX6-NEXT: s_waitcnt expcnt(0)
; GFX6-NEXT: v_mov_b32_e32 v0, s24
; GFX6-NEXT: v_mov_b32_e32 v1, s8
; GFX6-NEXT: v_mov_b32_e32 v2, s23
; GFX6-NEXT: v_mov_b32_e32 v3, s7
; GFX6-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:16
; GFX6-NEXT: s_waitcnt expcnt(0)
; GFX6-NEXT: v_mov_b32_e32 v0, s21
; GFX6-NEXT: v_mov_b32_e32 v1, s6
; GFX6-NEXT: v_mov_b32_e32 v2, s22
; GFX6-NEXT: v_mov_b32_e32 v3, s5
; GFX6-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0
; GFX6-NEXT: s_endpgm
;
; GFX8-LABEL: constant_zextload_v32i1_to_v32i32:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
; GFX8-NEXT: s_waitcnt lgkmcnt(0)
; GFX8-NEXT: s_load_dword s2, s[2:3], 0x0
; GFX8-NEXT: s_waitcnt lgkmcnt(0)
; GFX8-NEXT: s_bfe_u32 s4, s2, 0x10003
; GFX8-NEXT: s_bfe_u32 s5, s2, 0x10001
; GFX8-NEXT: s_bfe_u32 s6, s2, 0x10007
; GFX8-NEXT: s_bfe_u32 s7, s2, 0x10005
; GFX8-NEXT: s_bfe_u32 s8, s2, 0x1000b
; GFX8-NEXT: s_bfe_u32 s9, s2, 0x10009
; GFX8-NEXT: s_bfe_u32 s10, s2, 0x1000f
; GFX8-NEXT: s_bfe_u32 s11, s2, 0x1000d
; GFX8-NEXT: s_bfe_u32 s12, s2, 0x10013
; GFX8-NEXT: s_bfe_u32 s13, s2, 0x10011
; GFX8-NEXT: s_bfe_u32 s14, s2, 0x10017
; GFX8-NEXT: s_bfe_u32 s15, s2, 0x1001b
; GFX8-NEXT: s_bfe_u32 s16, s2, 0x10019
; GFX8-NEXT: s_lshr_b32 s3, s2, 31
; GFX8-NEXT: s_bfe_u32 s17, s2, 0x1001d
; GFX8-NEXT: s_and_b32 s18, s2, 1
; GFX8-NEXT: s_bfe_u32 s19, s2, 0x10002
; GFX8-NEXT: s_bfe_u32 s20, s2, 0x10006
; GFX8-NEXT: s_bfe_u32 s21, s2, 0x10004
; GFX8-NEXT: s_bfe_u32 s22, s2, 0x1000a
; GFX8-NEXT: s_bfe_u32 s23, s2, 0x10008
; GFX8-NEXT: s_bfe_u32 s24, s2, 0x1000e
; GFX8-NEXT: s_bfe_u32 s25, s2, 0x1000c
; GFX8-NEXT: s_bfe_u32 s26, s2, 0x10012
; GFX8-NEXT: s_bfe_u32 s27, s2, 0x10010
; GFX8-NEXT: s_bfe_u32 s28, s2, 0x10016
; GFX8-NEXT: s_bfe_u32 s29, s2, 0x10015
; GFX8-NEXT: s_bfe_u32 s30, s2, 0x10014
; GFX8-NEXT: s_bfe_u32 s31, s2, 0x1001a
; GFX8-NEXT: s_bfe_u32 s33, s2, 0x10018
; GFX8-NEXT: s_bfe_u32 s34, s2, 0x1001e
; GFX8-NEXT: s_bfe_u32 s2, s2, 0x1001c
; GFX8-NEXT: v_mov_b32_e32 v0, s2
; GFX8-NEXT: s_add_u32 s2, s0, 0x70
; GFX8-NEXT: v_mov_b32_e32 v3, s3
; GFX8-NEXT: s_addc_u32 s3, s1, 0
; GFX8-NEXT: v_mov_b32_e32 v5, s3
; GFX8-NEXT: v_mov_b32_e32 v4, s2
; GFX8-NEXT: s_add_u32 s2, s0, 0x60
; GFX8-NEXT: v_mov_b32_e32 v1, s17
; GFX8-NEXT: v_mov_b32_e32 v2, s34
; GFX8-NEXT: s_addc_u32 s3, s1, 0
; GFX8-NEXT: flat_store_dwordx4 v[4:5], v[0:3]
; GFX8-NEXT: v_mov_b32_e32 v5, s3
; GFX8-NEXT: v_mov_b32_e32 v4, s2
; GFX8-NEXT: s_add_u32 s2, s0, 0x50
; GFX8-NEXT: v_mov_b32_e32 v0, s33
; GFX8-NEXT: v_mov_b32_e32 v1, s16
; GFX8-NEXT: v_mov_b32_e32 v2, s31
; GFX8-NEXT: v_mov_b32_e32 v3, s15
; GFX8-NEXT: s_addc_u32 s3, s1, 0
; GFX8-NEXT: flat_store_dwordx4 v[4:5], v[0:3]
; GFX8-NEXT: v_mov_b32_e32 v5, s3
; GFX8-NEXT: v_mov_b32_e32 v4, s2
; GFX8-NEXT: s_add_u32 s2, s0, 64
; GFX8-NEXT: v_mov_b32_e32 v0, s30
; GFX8-NEXT: v_mov_b32_e32 v1, s29
; GFX8-NEXT: v_mov_b32_e32 v2, s28
; GFX8-NEXT: v_mov_b32_e32 v3, s14
; GFX8-NEXT: s_addc_u32 s3, s1, 0
; GFX8-NEXT: flat_store_dwordx4 v[4:5], v[0:3]
; GFX8-NEXT: v_mov_b32_e32 v5, s3
; GFX8-NEXT: v_mov_b32_e32 v4, s2
; GFX8-NEXT: s_add_u32 s2, s0, 48
; GFX8-NEXT: v_mov_b32_e32 v0, s27
; GFX8-NEXT: v_mov_b32_e32 v1, s13
; GFX8-NEXT: v_mov_b32_e32 v2, s26
; GFX8-NEXT: v_mov_b32_e32 v3, s12
; GFX8-NEXT: s_addc_u32 s3, s1, 0
; GFX8-NEXT: flat_store_dwordx4 v[4:5], v[0:3]
; GFX8-NEXT: v_mov_b32_e32 v5, s3
; GFX8-NEXT: v_mov_b32_e32 v4, s2
; GFX8-NEXT: s_add_u32 s2, s0, 32
; GFX8-NEXT: v_mov_b32_e32 v0, s25
; GFX8-NEXT: v_mov_b32_e32 v1, s11
; GFX8-NEXT: v_mov_b32_e32 v2, s24
; GFX8-NEXT: v_mov_b32_e32 v3, s10
; GFX8-NEXT: s_addc_u32 s3, s1, 0
; GFX8-NEXT: flat_store_dwordx4 v[4:5], v[0:3]
; GFX8-NEXT: v_mov_b32_e32 v5, s3
; GFX8-NEXT: v_mov_b32_e32 v4, s2
; GFX8-NEXT: s_add_u32 s2, s0, 16
; GFX8-NEXT: v_mov_b32_e32 v0, s23
; GFX8-NEXT: v_mov_b32_e32 v1, s9
; GFX8-NEXT: v_mov_b32_e32 v2, s22
; GFX8-NEXT: v_mov_b32_e32 v3, s8
; GFX8-NEXT: s_addc_u32 s3, s1, 0
; GFX8-NEXT: flat_store_dwordx4 v[4:5], v[0:3]
; GFX8-NEXT: v_mov_b32_e32 v5, s3
; GFX8-NEXT: v_mov_b32_e32 v0, s21
; GFX8-NEXT: v_mov_b32_e32 v1, s7
; GFX8-NEXT: v_mov_b32_e32 v2, s20
; GFX8-NEXT: v_mov_b32_e32 v3, s6
; GFX8-NEXT: v_mov_b32_e32 v4, s2
; GFX8-NEXT: flat_store_dwordx4 v[4:5], v[0:3]
; GFX8-NEXT: v_mov_b32_e32 v5, s1
; GFX8-NEXT: v_mov_b32_e32 v0, s18
; GFX8-NEXT: v_mov_b32_e32 v1, s5
; GFX8-NEXT: v_mov_b32_e32 v2, s19
; GFX8-NEXT: v_mov_b32_e32 v3, s4
; GFX8-NEXT: v_mov_b32_e32 v4, s0
; GFX8-NEXT: flat_store_dwordx4 v[4:5], v[0:3]
; GFX8-NEXT: s_endpgm
;
; EG-LABEL: constant_zextload_v32i1_to_v32i32:
; EG: ; %bb.0:
; EG-NEXT: ALU 0, @14, KC0[CB0:0-32], KC1[]
; EG-NEXT: TEX 0 @12
; EG-NEXT: ALU 76, @15, KC0[CB0:0-32], KC1[]
; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T11.XYZW, T26.X, 0
; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T23.XYZW, T25.X, 0
; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T21.XYZW, T24.X, 0
; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T19.XYZW, T22.X, 0
; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T17.XYZW, T20.X, 0
; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T15.XYZW, T18.X, 0
; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T13.XYZW, T16.X, 0
; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T12.XYZW, T14.X, 1
; EG-NEXT: CF_END
; EG-NEXT: Fetch clause starting at 12:
; EG-NEXT: VTX_READ_32 T11.X, T11.X, 0, #1
; EG-NEXT: ALU clause starting at 14:
; EG-NEXT: MOV * T11.X, KC0[2].Z,
; EG-NEXT: ALU clause starting at 15:
; EG-NEXT: BFE_UINT * T12.W, T11.X, literal.x, 1,
; EG-NEXT: 3(4.203895e-45), 0(0.000000e+00)
; EG-NEXT: BFE_UINT * T12.Z, T11.X, literal.x, 1,
; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
; EG-NEXT: BFE_UINT T12.Y, T11.X, 1, 1,
; EG-NEXT: BFE_UINT * T13.W, T11.X, literal.x, 1,
; EG-NEXT: 7(9.809089e-45), 0(0.000000e+00)
; EG-NEXT: AND_INT T12.X, T11.X, 1,
; EG-NEXT: BFE_UINT T13.Z, T11.X, literal.x, 1,
; EG-NEXT: LSHR * T14.X, KC0[2].Y, literal.y,
; EG-NEXT: 6(8.407791e-45), 2(2.802597e-45)
; EG-NEXT: BFE_UINT T13.Y, T11.X, literal.x, 1,
; EG-NEXT: BFE_UINT * T15.W, T11.X, literal.y, 1,
; EG-NEXT: 5(7.006492e-45), 11(1.541428e-44)
; EG-NEXT: BFE_UINT T13.X, T11.X, literal.x, 1,
; EG-NEXT: BFE_UINT T15.Z, T11.X, literal.y, 1,
; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.z,
; EG-NEXT: 4(5.605194e-45), 10(1.401298e-44)
; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00)
; EG-NEXT: LSHR T16.X, PV.W, literal.x,
; EG-NEXT: BFE_UINT T15.Y, T11.X, literal.y, 1,
; EG-NEXT: BFE_UINT * T17.W, T11.X, literal.z, 1,
; EG-NEXT: 2(2.802597e-45), 9(1.261169e-44)
; EG-NEXT: 15(2.101948e-44), 0(0.000000e+00)
; EG-NEXT: BFE_UINT T15.X, T11.X, literal.x, 1,
; EG-NEXT: BFE_UINT T17.Z, T11.X, literal.y, 1,
; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.z,
; EG-NEXT: 8(1.121039e-44), 14(1.961818e-44)
; EG-NEXT: 32(4.484155e-44), 0(0.000000e+00)
; EG-NEXT: LSHR T18.X, PV.W, literal.x,
; EG-NEXT: BFE_UINT T17.Y, T11.X, literal.y, 1,
; EG-NEXT: BFE_UINT * T19.W, T11.X, literal.z, 1,
; EG-NEXT: 2(2.802597e-45), 13(1.821688e-44)
; EG-NEXT: 19(2.662467e-44), 0(0.000000e+00)
; EG-NEXT: BFE_UINT T17.X, T11.X, literal.x, 1,
; EG-NEXT: BFE_UINT T19.Z, T11.X, literal.y, 1,
; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.z,
; EG-NEXT: 12(1.681558e-44), 18(2.522337e-44)
; EG-NEXT: 48(6.726233e-44), 0(0.000000e+00)
; EG-NEXT: LSHR T20.X, PV.W, literal.x,
; EG-NEXT: BFE_UINT T19.Y, T11.X, literal.y, 1,
; EG-NEXT: BFE_UINT * T21.W, T11.X, literal.z, 1,
; EG-NEXT: 2(2.802597e-45), 17(2.382207e-44)
; EG-NEXT: 23(3.222986e-44), 0(0.000000e+00)
; EG-NEXT: BFE_UINT T19.X, T11.X, literal.x, 1,
; EG-NEXT: BFE_UINT T21.Z, T11.X, literal.y, 1,
; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.z,
; EG-NEXT: 16(2.242078e-44), 22(3.082857e-44)
; EG-NEXT: 64(8.968310e-44), 0(0.000000e+00)
; EG-NEXT: LSHR T22.X, PV.W, literal.x,
; EG-NEXT: BFE_UINT T21.Y, T11.X, literal.y, 1,
; EG-NEXT: BFE_UINT * T23.W, T11.X, literal.z, 1,
; EG-NEXT: 2(2.802597e-45), 21(2.942727e-44)
; EG-NEXT: 27(3.783506e-44), 0(0.000000e+00)
; EG-NEXT: BFE_UINT T21.X, T11.X, literal.x, 1,
; EG-NEXT: BFE_UINT T23.Z, T11.X, literal.y, 1,
; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.z,
; EG-NEXT: 20(2.802597e-44), 26(3.643376e-44)
; EG-NEXT: 80(1.121039e-43), 0(0.000000e+00)
; EG-NEXT: LSHR T24.X, PV.W, literal.x,
; EG-NEXT: BFE_UINT T23.Y, T11.X, literal.y, 1,
; EG-NEXT: LSHR * T11.W, T11.X, literal.z,
; EG-NEXT: 2(2.802597e-45), 25(3.503246e-44)
; EG-NEXT: 31(4.344025e-44), 0(0.000000e+00)
; EG-NEXT: BFE_UINT T23.X, T11.X, literal.x, 1,
; EG-NEXT: BFE_UINT T11.Z, T11.X, literal.y, 1,
; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.z,
; EG-NEXT: 24(3.363116e-44), 30(4.203895e-44)
; EG-NEXT: 96(1.345247e-43), 0(0.000000e+00)
; EG-NEXT: LSHR T25.X, PV.W, literal.x,
; EG-NEXT: BFE_UINT * T11.Y, T11.X, literal.y, 1,
; EG-NEXT: 2(2.802597e-45), 29(4.063766e-44)
; EG-NEXT: BFE_UINT T11.X, T11.X, literal.x, 1,
; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y,
; EG-NEXT: 28(3.923636e-44), 112(1.569454e-43)
; EG-NEXT: LSHR * T26.X, PV.W, literal.x,
; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
;
; GFX12-LABEL: constant_zextload_v32i1_to_v32i32:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
; GFX12-NEXT: s_wait_kmcnt 0x0
; GFX12-NEXT: s_load_b32 s2, s[2:3], 0x0
; GFX12-NEXT: s_wait_kmcnt 0x0
; GFX12-NEXT: s_bfe_u32 s3, s2, 0x10003
; GFX12-NEXT: s_bfe_u32 s4, s2, 0x10001
; GFX12-NEXT: s_bfe_u32 s5, s2, 0x10007
; GFX12-NEXT: s_bfe_u32 s6, s2, 0x10005
; GFX12-NEXT: s_bfe_u32 s7, s2, 0x1000b
; GFX12-NEXT: s_bfe_u32 s8, s2, 0x10009
; GFX12-NEXT: s_bfe_u32 s9, s2, 0x1000f
; GFX12-NEXT: s_bfe_u32 s10, s2, 0x1000d
; GFX12-NEXT: s_bfe_u32 s11, s2, 0x10013
; GFX12-NEXT: s_bfe_u32 s12, s2, 0x10011
; GFX12-NEXT: s_bfe_u32 s13, s2, 0x10017
; GFX12-NEXT: s_bfe_u32 s14, s2, 0x1001b
; GFX12-NEXT: s_bfe_u32 s15, s2, 0x10019
; GFX12-NEXT: s_lshr_b32 s16, s2, 31
; GFX12-NEXT: s_bfe_u32 s17, s2, 0x1001d
; GFX12-NEXT: s_and_b32 s18, s2, 1
; GFX12-NEXT: s_bfe_u32 s19, s2, 0x10002
; GFX12-NEXT: s_bfe_u32 s20, s2, 0x10006
; GFX12-NEXT: s_bfe_u32 s21, s2, 0x10004
; GFX12-NEXT: s_bfe_u32 s22, s2, 0x1000a
; GFX12-NEXT: s_bfe_u32 s23, s2, 0x10008
; GFX12-NEXT: s_bfe_u32 s24, s2, 0x1000e
; GFX12-NEXT: s_bfe_u32 s25, s2, 0x1000c
; GFX12-NEXT: s_bfe_u32 s26, s2, 0x10012
; GFX12-NEXT: s_bfe_u32 s27, s2, 0x10010
; GFX12-NEXT: s_bfe_u32 s28, s2, 0x10016
; GFX12-NEXT: s_bfe_u32 s29, s2, 0x10015
; GFX12-NEXT: s_bfe_u32 s30, s2, 0x10014
; GFX12-NEXT: s_bfe_u32 s31, s2, 0x1001a
; GFX12-NEXT: s_bfe_u32 s33, s2, 0x10018
; GFX12-NEXT: s_bfe_u32 s34, s2, 0x1001c
; GFX12-NEXT: s_bfe_u32 s2, s2, 0x1001e
; GFX12-NEXT: v_dual_mov_b32 v24, 0 :: v_dual_mov_b32 v1, s17
; GFX12-NEXT: v_dual_mov_b32 v0, s34 :: v_dual_mov_b32 v3, s16
; GFX12-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v5, s15
; GFX12-NEXT: v_dual_mov_b32 v4, s33 :: v_dual_mov_b32 v7, s14
; GFX12-NEXT: v_dual_mov_b32 v6, s31 :: v_dual_mov_b32 v9, s29
; GFX12-NEXT: v_dual_mov_b32 v8, s30 :: v_dual_mov_b32 v11, s13
; GFX12-NEXT: v_mov_b32_e32 v10, s28
; GFX12-NEXT: s_clause 0x1
; GFX12-NEXT: global_store_b128 v24, v[0:3], s[0:1] offset:112
; GFX12-NEXT: global_store_b128 v24, v[4:7], s[0:1] offset:96
; GFX12-NEXT: v_dual_mov_b32 v0, s27 :: v_dual_mov_b32 v3, s11
; GFX12-NEXT: v_dual_mov_b32 v1, s12 :: v_dual_mov_b32 v2, s26
; GFX12-NEXT: v_dual_mov_b32 v5, s10 :: v_dual_mov_b32 v4, s25
; GFX12-NEXT: v_dual_mov_b32 v7, s9 :: v_dual_mov_b32 v6, s24
; GFX12-NEXT: v_dual_mov_b32 v13, s8 :: v_dual_mov_b32 v12, s23
; GFX12-NEXT: v_dual_mov_b32 v15, s7 :: v_dual_mov_b32 v14, s22
; GFX12-NEXT: v_dual_mov_b32 v17, s6 :: v_dual_mov_b32 v16, s21
; GFX12-NEXT: v_dual_mov_b32 v19, s5 :: v_dual_mov_b32 v18, s20
; GFX12-NEXT: v_dual_mov_b32 v21, s4 :: v_dual_mov_b32 v20, s18
; GFX12-NEXT: v_dual_mov_b32 v23, s3 :: v_dual_mov_b32 v22, s19
; GFX12-NEXT: s_clause 0x5
; GFX12-NEXT: global_store_b128 v24, v[8:11], s[0:1] offset:80
; GFX12-NEXT: global_store_b128 v24, v[0:3], s[0:1] offset:64
; GFX12-NEXT: global_store_b128 v24, v[4:7], s[0:1] offset:48
; GFX12-NEXT: global_store_b128 v24, v[12:15], s[0:1] offset:32
; GFX12-NEXT: global_store_b128 v24, v[16:19], s[0:1] offset:16
; GFX12-NEXT: global_store_b128 v24, v[20:23], s[0:1]
; GFX12-NEXT: s_endpgm
%load = load <32 x i1>, ptr addrspace(4) %in
%ext = zext <32 x i1> %load to <32 x i32>
store <32 x i32> %ext, ptr addrspace(1) %out
ret void
}
define amdgpu_kernel void @constant_sextload_v32i1_to_v32i32(ptr addrspace(1) %out, ptr addrspace(4) nocapture %in) #0 {
; GFX6-LABEL: constant_sextload_v32i1_to_v32i32:
; GFX6: ; %bb.0:
; GFX6-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9
; GFX6-NEXT: s_waitcnt lgkmcnt(0)
; GFX6-NEXT: s_load_dword s4, s[2:3], 0x0
; GFX6-NEXT: s_mov_b32 s3, 0xf000
; GFX6-NEXT: s_mov_b32 s2, -1
; GFX6-NEXT: s_waitcnt lgkmcnt(0)
; GFX6-NEXT: s_bfe_i32 s5, s4, 0x10003
; GFX6-NEXT: s_bfe_i32 s6, s4, 0x10002
; GFX6-NEXT: s_bfe_i32 s7, s4, 0x10001
; GFX6-NEXT: s_bfe_i32 s8, s4, 0x10000
; GFX6-NEXT: s_bfe_i32 s9, s4, 0x10007
; GFX6-NEXT: s_bfe_i32 s10, s4, 0x10006
; GFX6-NEXT: s_bfe_i32 s11, s4, 0x10005
; GFX6-NEXT: s_bfe_i32 s12, s4, 0x10004
; GFX6-NEXT: s_bfe_i32 s13, s4, 0x1000b
; GFX6-NEXT: s_bfe_i32 s14, s4, 0x1000a
; GFX6-NEXT: s_bfe_i32 s15, s4, 0x10009
; GFX6-NEXT: s_bfe_i32 s16, s4, 0x10008
; GFX6-NEXT: s_bfe_i32 s17, s4, 0x1000f
; GFX6-NEXT: s_bfe_i32 s18, s4, 0x1000e
; GFX6-NEXT: s_bfe_i32 s19, s4, 0x1000d
; GFX6-NEXT: s_bfe_i32 s20, s4, 0x1000c
; GFX6-NEXT: s_bfe_i32 s21, s4, 0x10013
; GFX6-NEXT: s_bfe_i32 s22, s4, 0x10012
; GFX6-NEXT: s_bfe_i32 s23, s4, 0x10011
; GFX6-NEXT: s_bfe_i32 s24, s4, 0x10010
; GFX6-NEXT: s_bfe_i32 s25, s4, 0x10017
; GFX6-NEXT: s_bfe_i32 s26, s4, 0x10016
; GFX6-NEXT: s_bfe_i32 s27, s4, 0x10015
; GFX6-NEXT: s_bfe_i32 s28, s4, 0x10014
; GFX6-NEXT: s_bfe_i32 s29, s4, 0x1001b
; GFX6-NEXT: s_bfe_i32 s30, s4, 0x1001a
; GFX6-NEXT: s_bfe_i32 s31, s4, 0x10019
; GFX6-NEXT: s_ashr_i32 s33, s4, 31
; GFX6-NEXT: s_bfe_i32 s34, s4, 0x1001e
; GFX6-NEXT: s_bfe_i32 s35, s4, 0x1001d
; GFX6-NEXT: s_bfe_i32 s36, s4, 0x1001c
; GFX6-NEXT: s_bfe_i32 s4, s4, 0x10018
; GFX6-NEXT: v_mov_b32_e32 v0, s36
; GFX6-NEXT: v_mov_b32_e32 v1, s35
; GFX6-NEXT: v_mov_b32_e32 v2, s34
; GFX6-NEXT: v_mov_b32_e32 v3, s33
; GFX6-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:112
; GFX6-NEXT: s_waitcnt expcnt(0)
; GFX6-NEXT: v_mov_b32_e32 v0, s4
; GFX6-NEXT: v_mov_b32_e32 v1, s31
; GFX6-NEXT: v_mov_b32_e32 v2, s30
; GFX6-NEXT: v_mov_b32_e32 v3, s29
; GFX6-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:96
; GFX6-NEXT: s_waitcnt expcnt(0)
; GFX6-NEXT: v_mov_b32_e32 v0, s28
; GFX6-NEXT: v_mov_b32_e32 v1, s27
; GFX6-NEXT: v_mov_b32_e32 v2, s26
; GFX6-NEXT: v_mov_b32_e32 v3, s25
; GFX6-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:80
; GFX6-NEXT: s_waitcnt expcnt(0)
; GFX6-NEXT: v_mov_b32_e32 v0, s24
; GFX6-NEXT: v_mov_b32_e32 v1, s23
; GFX6-NEXT: v_mov_b32_e32 v2, s22
; GFX6-NEXT: v_mov_b32_e32 v3, s21
; GFX6-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:64
; GFX6-NEXT: s_waitcnt expcnt(0)
; GFX6-NEXT: v_mov_b32_e32 v0, s20
; GFX6-NEXT: v_mov_b32_e32 v1, s19
; GFX6-NEXT: v_mov_b32_e32 v2, s18
; GFX6-NEXT: v_mov_b32_e32 v3, s17
; GFX6-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:48
; GFX6-NEXT: s_waitcnt expcnt(0)
; GFX6-NEXT: v_mov_b32_e32 v0, s16
; GFX6-NEXT: v_mov_b32_e32 v1, s15
; GFX6-NEXT: v_mov_b32_e32 v2, s14
; GFX6-NEXT: v_mov_b32_e32 v3, s13
; GFX6-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:32
; GFX6-NEXT: s_waitcnt expcnt(0)
; GFX6-NEXT: v_mov_b32_e32 v0, s12
; GFX6-NEXT: v_mov_b32_e32 v1, s11
; GFX6-NEXT: v_mov_b32_e32 v2, s10
; GFX6-NEXT: v_mov_b32_e32 v3, s9
; GFX6-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:16
; GFX6-NEXT: s_waitcnt expcnt(0)
; GFX6-NEXT: v_mov_b32_e32 v0, s8
; GFX6-NEXT: v_mov_b32_e32 v1, s7
; GFX6-NEXT: v_mov_b32_e32 v2, s6
; GFX6-NEXT: v_mov_b32_e32 v3, s5
; GFX6-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0
; GFX6-NEXT: s_endpgm
;
; GFX8-LABEL: constant_sextload_v32i1_to_v32i32:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
; GFX8-NEXT: s_waitcnt lgkmcnt(0)
; GFX8-NEXT: s_load_dword s2, s[2:3], 0x0
; GFX8-NEXT: s_waitcnt lgkmcnt(0)
; GFX8-NEXT: s_bfe_i32 s4, s2, 0x10003
; GFX8-NEXT: s_bfe_i32 s5, s2, 0x10002
; GFX8-NEXT: s_bfe_i32 s6, s2, 0x10001
; GFX8-NEXT: s_bfe_i32 s7, s2, 0x10000
; GFX8-NEXT: s_bfe_i32 s8, s2, 0x10007
; GFX8-NEXT: s_bfe_i32 s9, s2, 0x10006
; GFX8-NEXT: s_bfe_i32 s10, s2, 0x10005
; GFX8-NEXT: s_bfe_i32 s11, s2, 0x10004
; GFX8-NEXT: s_bfe_i32 s12, s2, 0x1000b
; GFX8-NEXT: s_bfe_i32 s13, s2, 0x1000a
; GFX8-NEXT: s_bfe_i32 s14, s2, 0x10009
; GFX8-NEXT: s_bfe_i32 s15, s2, 0x10008
; GFX8-NEXT: s_bfe_i32 s16, s2, 0x1000f
; GFX8-NEXT: s_bfe_i32 s17, s2, 0x1000e
; GFX8-NEXT: s_bfe_i32 s18, s2, 0x1000d
; GFX8-NEXT: s_bfe_i32 s19, s2, 0x1000c
; GFX8-NEXT: s_bfe_i32 s20, s2, 0x10013
; GFX8-NEXT: s_bfe_i32 s21, s2, 0x10012
; GFX8-NEXT: s_bfe_i32 s22, s2, 0x10011
; GFX8-NEXT: s_bfe_i32 s23, s2, 0x10010
; GFX8-NEXT: s_bfe_i32 s24, s2, 0x10017
; GFX8-NEXT: s_bfe_i32 s25, s2, 0x10016
; GFX8-NEXT: s_bfe_i32 s26, s2, 0x10015
; GFX8-NEXT: s_bfe_i32 s27, s2, 0x10014
; GFX8-NEXT: s_bfe_i32 s28, s2, 0x1001b
; GFX8-NEXT: s_bfe_i32 s29, s2, 0x1001a
; GFX8-NEXT: s_bfe_i32 s30, s2, 0x10019
; GFX8-NEXT: s_bfe_i32 s31, s2, 0x10018
; GFX8-NEXT: s_ashr_i32 s3, s2, 31
; GFX8-NEXT: s_bfe_i32 s33, s2, 0x1001e
; GFX8-NEXT: s_bfe_i32 s34, s2, 0x1001d
; GFX8-NEXT: s_bfe_i32 s2, s2, 0x1001c
; GFX8-NEXT: v_mov_b32_e32 v0, s2
; GFX8-NEXT: s_add_u32 s2, s0, 0x70
; GFX8-NEXT: v_mov_b32_e32 v3, s3
; GFX8-NEXT: s_addc_u32 s3, s1, 0
; GFX8-NEXT: v_mov_b32_e32 v5, s3
; GFX8-NEXT: v_mov_b32_e32 v4, s2
; GFX8-NEXT: s_add_u32 s2, s0, 0x60
; GFX8-NEXT: v_mov_b32_e32 v1, s34
; GFX8-NEXT: v_mov_b32_e32 v2, s33
; GFX8-NEXT: s_addc_u32 s3, s1, 0
; GFX8-NEXT: flat_store_dwordx4 v[4:5], v[0:3]
; GFX8-NEXT: v_mov_b32_e32 v5, s3
; GFX8-NEXT: v_mov_b32_e32 v4, s2
; GFX8-NEXT: s_add_u32 s2, s0, 0x50
; GFX8-NEXT: v_mov_b32_e32 v0, s31
; GFX8-NEXT: v_mov_b32_e32 v1, s30
; GFX8-NEXT: v_mov_b32_e32 v2, s29
; GFX8-NEXT: v_mov_b32_e32 v3, s28
; GFX8-NEXT: s_addc_u32 s3, s1, 0
; GFX8-NEXT: flat_store_dwordx4 v[4:5], v[0:3]
; GFX8-NEXT: v_mov_b32_e32 v5, s3
; GFX8-NEXT: v_mov_b32_e32 v4, s2
; GFX8-NEXT: s_add_u32 s2, s0, 64
; GFX8-NEXT: v_mov_b32_e32 v0, s27
; GFX8-NEXT: v_mov_b32_e32 v1, s26
; GFX8-NEXT: v_mov_b32_e32 v2, s25
; GFX8-NEXT: v_mov_b32_e32 v3, s24
; GFX8-NEXT: s_addc_u32 s3, s1, 0
; GFX8-NEXT: flat_store_dwordx4 v[4:5], v[0:3]
; GFX8-NEXT: v_mov_b32_e32 v5, s3
; GFX8-NEXT: v_mov_b32_e32 v4, s2
; GFX8-NEXT: s_add_u32 s2, s0, 48
; GFX8-NEXT: v_mov_b32_e32 v0, s23
; GFX8-NEXT: v_mov_b32_e32 v1, s22
; GFX8-NEXT: v_mov_b32_e32 v2, s21
; GFX8-NEXT: v_mov_b32_e32 v3, s20
; GFX8-NEXT: s_addc_u32 s3, s1, 0
; GFX8-NEXT: flat_store_dwordx4 v[4:5], v[0:3]
; GFX8-NEXT: v_mov_b32_e32 v5, s3
; GFX8-NEXT: v_mov_b32_e32 v4, s2
; GFX8-NEXT: s_add_u32 s2, s0, 32
; GFX8-NEXT: v_mov_b32_e32 v0, s19
; GFX8-NEXT: v_mov_b32_e32 v1, s18
; GFX8-NEXT: v_mov_b32_e32 v2, s17
; GFX8-NEXT: v_mov_b32_e32 v3, s16
; GFX8-NEXT: s_addc_u32 s3, s1, 0
; GFX8-NEXT: flat_store_dwordx4 v[4:5], v[0:3]
; GFX8-NEXT: v_mov_b32_e32 v5, s3
; GFX8-NEXT: v_mov_b32_e32 v4, s2
; GFX8-NEXT: s_add_u32 s2, s0, 16
; GFX8-NEXT: v_mov_b32_e32 v0, s15
; GFX8-NEXT: v_mov_b32_e32 v1, s14
; GFX8-NEXT: v_mov_b32_e32 v2, s13
; GFX8-NEXT: v_mov_b32_e32 v3, s12
; GFX8-NEXT: s_addc_u32 s3, s1, 0
; GFX8-NEXT: flat_store_dwordx4 v[4:5], v[0:3]
; GFX8-NEXT: v_mov_b32_e32 v5, s3
; GFX8-NEXT: v_mov_b32_e32 v0, s11
; GFX8-NEXT: v_mov_b32_e32 v1, s10
; GFX8-NEXT: v_mov_b32_e32 v2, s9
; GFX8-NEXT: v_mov_b32_e32 v3, s8
; GFX8-NEXT: v_mov_b32_e32 v4, s2
; GFX8-NEXT: flat_store_dwordx4 v[4:5], v[0:3]
; GFX8-NEXT: v_mov_b32_e32 v5, s1
; GFX8-NEXT: v_mov_b32_e32 v0, s7
; GFX8-NEXT: v_mov_b32_e32 v1, s6
; GFX8-NEXT: v_mov_b32_e32 v2, s5
; GFX8-NEXT: v_mov_b32_e32 v3, s4
; GFX8-NEXT: v_mov_b32_e32 v4, s0
; GFX8-NEXT: flat_store_dwordx4 v[4:5], v[0:3]
; GFX8-NEXT: s_endpgm
;
; EG-LABEL: constant_sextload_v32i1_to_v32i32:
; EG: ; %bb.0:
; EG-NEXT: ALU 0, @16, KC0[CB0:0-32], KC1[]
; EG-NEXT: TEX 0 @14
; EG-NEXT: ALU 99, @17, KC0[CB0:0-32], KC1[]
; EG-NEXT: ALU 5, @117, KC0[CB0:0-32], KC1[]
; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T24.XYZW, T11.X, 0
; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T22.XYZW, T26.X, 0
; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T20.XYZW, T23.X, 0
; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T18.XYZW, T21.X, 0
; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T16.XYZW, T19.X, 0
; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T14.XYZW, T17.X, 0
; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T12.XYZW, T15.X, 0
; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T25.XYZW, T13.X, 1
; EG-NEXT: CF_END
; EG-NEXT: PAD
; EG-NEXT: Fetch clause starting at 14:
; EG-NEXT: VTX_READ_32 T11.X, T11.X, 0, #1
; EG-NEXT: ALU clause starting at 16:
; EG-NEXT: MOV * T11.X, KC0[2].Z,
; EG-NEXT: ALU clause starting at 17:
; EG-NEXT: LSHR * T0.W, T11.X, literal.x,
; EG-NEXT: 7(9.809089e-45), 0(0.000000e+00)
; EG-NEXT: BFE_INT T12.W, PV.W, 0.0, 1,
; EG-NEXT: LSHR * T0.W, T11.X, literal.x,
; EG-NEXT: 6(8.407791e-45), 0(0.000000e+00)
; EG-NEXT: BFE_INT T12.Z, PS, 0.0, 1,
; EG-NEXT: LSHR T0.W, T11.X, literal.x,
; EG-NEXT: LSHR * T1.W, T11.X, literal.y,
; EG-NEXT: 11(1.541428e-44), 5(7.006492e-45)
; EG-NEXT: LSHR T13.X, KC0[2].Y, literal.x,
; EG-NEXT: BFE_INT T12.Y, PS, 0.0, 1,
; EG-NEXT: LSHR T0.Z, T11.X, literal.y,
; EG-NEXT: BFE_INT T14.W, PV.W, 0.0, 1,
; EG-NEXT: LSHR * T0.W, T11.X, literal.z,
; EG-NEXT: 2(2.802597e-45), 10(1.401298e-44)
; EG-NEXT: 4(5.605194e-45), 0(0.000000e+00)
; EG-NEXT: BFE_INT T12.X, PS, 0.0, 1,
; EG-NEXT: LSHR T0.Y, T11.X, literal.x,
; EG-NEXT: BFE_INT T14.Z, PV.Z, 0.0, 1,
; EG-NEXT: LSHR T0.W, T11.X, literal.y,
; EG-NEXT: ADD_INT * T1.W, KC0[2].Y, literal.z,
; EG-NEXT: 15(2.101948e-44), 9(1.261169e-44)
; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00)
; EG-NEXT: LSHR T15.X, PS, literal.x,
; EG-NEXT: BFE_INT T14.Y, PV.W, 0.0, 1,
; EG-NEXT: LSHR T0.Z, T11.X, literal.y,
; EG-NEXT: BFE_INT T16.W, PV.Y, 0.0, 1,
; EG-NEXT: LSHR * T0.W, T11.X, literal.z,
; EG-NEXT: 2(2.802597e-45), 14(1.961818e-44)
; EG-NEXT: 8(1.121039e-44), 0(0.000000e+00)
; EG-NEXT: BFE_INT T14.X, PS, 0.0, 1,
; EG-NEXT: LSHR T0.Y, T11.X, literal.x,
; EG-NEXT: BFE_INT T16.Z, PV.Z, 0.0, 1,
; EG-NEXT: LSHR T0.W, T11.X, literal.y,
; EG-NEXT: ADD_INT * T1.W, KC0[2].Y, literal.z,
; EG-NEXT: 19(2.662467e-44), 13(1.821688e-44)
; EG-NEXT: 32(4.484155e-44), 0(0.000000e+00)
; EG-NEXT: LSHR T17.X, PS, literal.x,
; EG-NEXT: BFE_INT T16.Y, PV.W, 0.0, 1,
; EG-NEXT: LSHR T0.Z, T11.X, literal.y,
; EG-NEXT: BFE_INT T18.W, PV.Y, 0.0, 1,
; EG-NEXT: LSHR * T0.W, T11.X, literal.z,
; EG-NEXT: 2(2.802597e-45), 18(2.522337e-44)
; EG-NEXT: 12(1.681558e-44), 0(0.000000e+00)
; EG-NEXT: BFE_INT T16.X, PS, 0.0, 1,
; EG-NEXT: LSHR T0.Y, T11.X, literal.x,
; EG-NEXT: BFE_INT T18.Z, PV.Z, 0.0, 1,
; EG-NEXT: LSHR T0.W, T11.X, literal.y,
; EG-NEXT: ADD_INT * T1.W, KC0[2].Y, literal.z,
; EG-NEXT: 23(3.222986e-44), 17(2.382207e-44)
; EG-NEXT: 48(6.726233e-44), 0(0.000000e+00)
; EG-NEXT: LSHR T19.X, PS, literal.x,
; EG-NEXT: BFE_INT T18.Y, PV.W, 0.0, 1,
; EG-NEXT: LSHR T0.Z, T11.X, literal.y,
; EG-NEXT: BFE_INT T20.W, PV.Y, 0.0, 1,
; EG-NEXT: LSHR * T0.W, T11.X, literal.z,
; EG-NEXT: 2(2.802597e-45), 22(3.082857e-44)
; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00)
; EG-NEXT: BFE_INT T18.X, PS, 0.0, 1,
; EG-NEXT: LSHR T0.Y, T11.X, literal.x,
; EG-NEXT: BFE_INT T20.Z, PV.Z, 0.0, 1,
; EG-NEXT: LSHR T0.W, T11.X, literal.y,
; EG-NEXT: ADD_INT * T1.W, KC0[2].Y, literal.z,
; EG-NEXT: 27(3.783506e-44), 21(2.942727e-44)
; EG-NEXT: 64(8.968310e-44), 0(0.000000e+00)
; EG-NEXT: LSHR T21.X, PS, literal.x,
; EG-NEXT: BFE_INT T20.Y, PV.W, 0.0, 1,
; EG-NEXT: LSHR T0.Z, T11.X, literal.y,
; EG-NEXT: BFE_INT T22.W, PV.Y, 0.0, 1,
; EG-NEXT: LSHR * T0.W, T11.X, literal.z,
; EG-NEXT: 2(2.802597e-45), 26(3.643376e-44)
; EG-NEXT: 20(2.802597e-44), 0(0.000000e+00)
; EG-NEXT: BFE_INT T20.X, PS, 0.0, 1,
; EG-NEXT: BFE_INT T22.Z, PV.Z, 0.0, 1,
; EG-NEXT: LSHR T0.W, T11.X, literal.x,
; EG-NEXT: ADD_INT * T1.W, KC0[2].Y, literal.y,
; EG-NEXT: 25(3.503246e-44), 80(1.121039e-43)
; EG-NEXT: LSHR T23.X, PS, literal.x,
; EG-NEXT: BFE_INT T22.Y, PV.W, 0.0, 1,
; EG-NEXT: LSHR * T0.W, T11.X, literal.y,
; EG-NEXT: 2(2.802597e-45), 24(3.363116e-44)
; EG-NEXT: BFE_INT T22.X, PV.W, 0.0, 1,
; EG-NEXT: LSHR T0.W, T11.X, literal.x,
; EG-NEXT: ASHR * T24.W, T11.X, literal.y,
; EG-NEXT: 30(4.203895e-44), 31(4.344025e-44)
; EG-NEXT: BFE_INT T25.X, T11.X, 0.0, 1,
; EG-NEXT: LSHR T0.Y, T11.X, literal.x,
; EG-NEXT: BFE_INT T24.Z, PV.W, 0.0, 1,
; EG-NEXT: LSHR T0.W, T11.X, literal.y,
; EG-NEXT: ADD_INT * T1.W, KC0[2].Y, literal.z,
; EG-NEXT: 3(4.203895e-45), 29(4.063766e-44)
; EG-NEXT: 96(1.345247e-43), 0(0.000000e+00)
; EG-NEXT: LSHR T26.X, PS, literal.x,
; EG-NEXT: BFE_INT T24.Y, PV.W, 0.0, 1,
; EG-NEXT: LSHR T0.Z, T11.X, literal.x,
; EG-NEXT: BFE_INT T25.W, PV.Y, 0.0, 1,
; EG-NEXT: LSHR * T0.W, T11.X, literal.y,
; EG-NEXT: 2(2.802597e-45), 28(3.923636e-44)
; EG-NEXT: BFE_INT T24.X, PS, 0.0, 1,
; EG-NEXT: BFE_INT * T25.Z, PV.Z, 0.0, 1,
; EG-NEXT: ALU clause starting at 117:
; EG-NEXT: LSHR T0.W, T11.X, 1,
; EG-NEXT: ADD_INT * T1.W, KC0[2].Y, literal.x,
; EG-NEXT: 112(1.569454e-43), 0(0.000000e+00)
; EG-NEXT: LSHR T11.X, PS, literal.x,
; EG-NEXT: BFE_INT * T25.Y, PV.W, 0.0, 1,
; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
;
; GFX12-LABEL: constant_sextload_v32i1_to_v32i32:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
; GFX12-NEXT: s_wait_kmcnt 0x0
; GFX12-NEXT: s_load_b32 s2, s[2:3], 0x0
; GFX12-NEXT: s_wait_kmcnt 0x0
; GFX12-NEXT: s_bfe_i32 s3, s2, 0x10003
; GFX12-NEXT: s_bfe_i32 s4, s2, 0x10002
; GFX12-NEXT: s_bfe_i32 s5, s2, 0x10001
; GFX12-NEXT: s_bfe_i32 s6, s2, 0x10000
; GFX12-NEXT: s_bfe_i32 s7, s2, 0x10007
; GFX12-NEXT: s_bfe_i32 s8, s2, 0x10006
; GFX12-NEXT: s_bfe_i32 s9, s2, 0x10005
; GFX12-NEXT: s_bfe_i32 s10, s2, 0x10004
; GFX12-NEXT: s_bfe_i32 s11, s2, 0x1000b
; GFX12-NEXT: s_bfe_i32 s12, s2, 0x1000a
; GFX12-NEXT: s_bfe_i32 s13, s2, 0x10009
; GFX12-NEXT: s_bfe_i32 s14, s2, 0x10008
; GFX12-NEXT: s_bfe_i32 s15, s2, 0x1000f
; GFX12-NEXT: s_bfe_i32 s16, s2, 0x1000e
; GFX12-NEXT: s_bfe_i32 s17, s2, 0x1000d
; GFX12-NEXT: s_bfe_i32 s18, s2, 0x1000c
; GFX12-NEXT: s_bfe_i32 s19, s2, 0x10013
; GFX12-NEXT: s_bfe_i32 s20, s2, 0x10012
; GFX12-NEXT: s_bfe_i32 s21, s2, 0x10011
; GFX12-NEXT: s_bfe_i32 s22, s2, 0x10010
; GFX12-NEXT: s_bfe_i32 s23, s2, 0x10017
; GFX12-NEXT: s_bfe_i32 s24, s2, 0x10016
; GFX12-NEXT: s_bfe_i32 s25, s2, 0x10015
; GFX12-NEXT: s_bfe_i32 s26, s2, 0x10014
; GFX12-NEXT: s_bfe_i32 s27, s2, 0x1001b
; GFX12-NEXT: s_bfe_i32 s28, s2, 0x1001a
; GFX12-NEXT: s_bfe_i32 s29, s2, 0x10019
; GFX12-NEXT: s_bfe_i32 s30, s2, 0x10018
; GFX12-NEXT: s_ashr_i32 s31, s2, 31
; GFX12-NEXT: s_bfe_i32 s33, s2, 0x1001e
; GFX12-NEXT: s_bfe_i32 s34, s2, 0x1001c
; GFX12-NEXT: s_bfe_i32 s2, s2, 0x1001d
; GFX12-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
; GFX12-NEXT: v_dual_mov_b32 v24, 0 :: v_dual_mov_b32 v1, s2
; GFX12-NEXT: v_dual_mov_b32 v0, s34 :: v_dual_mov_b32 v3, s31
; GFX12-NEXT: v_dual_mov_b32 v2, s33 :: v_dual_mov_b32 v5, s29
; GFX12-NEXT: v_dual_mov_b32 v4, s30 :: v_dual_mov_b32 v7, s27
; GFX12-NEXT: v_dual_mov_b32 v6, s28 :: v_dual_mov_b32 v9, s25
; GFX12-NEXT: v_dual_mov_b32 v8, s26 :: v_dual_mov_b32 v11, s23
; GFX12-NEXT: v_mov_b32_e32 v10, s24
; GFX12-NEXT: s_clause 0x1
; GFX12-NEXT: global_store_b128 v24, v[0:3], s[0:1] offset:112
; GFX12-NEXT: global_store_b128 v24, v[4:7], s[0:1] offset:96
; GFX12-NEXT: v_dual_mov_b32 v0, s22 :: v_dual_mov_b32 v3, s19
; GFX12-NEXT: v_dual_mov_b32 v1, s21 :: v_dual_mov_b32 v2, s20
; GFX12-NEXT: v_dual_mov_b32 v5, s17 :: v_dual_mov_b32 v4, s18
; GFX12-NEXT: v_dual_mov_b32 v7, s15 :: v_dual_mov_b32 v6, s16
; GFX12-NEXT: v_dual_mov_b32 v13, s13 :: v_dual_mov_b32 v12, s14
; GFX12-NEXT: v_dual_mov_b32 v15, s11 :: v_dual_mov_b32 v14, s12
; GFX12-NEXT: v_dual_mov_b32 v17, s9 :: v_dual_mov_b32 v16, s10
; GFX12-NEXT: v_dual_mov_b32 v19, s7 :: v_dual_mov_b32 v18, s8
; GFX12-NEXT: v_dual_mov_b32 v21, s5 :: v_dual_mov_b32 v20, s6
; GFX12-NEXT: v_dual_mov_b32 v23, s3 :: v_dual_mov_b32 v22, s4
; GFX12-NEXT: s_clause 0x5
; GFX12-NEXT: global_store_b128 v24, v[8:11], s[0:1] offset:80
; GFX12-NEXT: global_store_b128 v24, v[0:3], s[0:1] offset:64
; GFX12-NEXT: global_store_b128 v24, v[4:7], s[0:1] offset:48
; GFX12-NEXT: global_store_b128 v24, v[12:15], s[0:1] offset:32
; GFX12-NEXT: global_store_b128 v24, v[16:19], s[0:1] offset:16
; GFX12-NEXT: global_store_b128 v24, v[20:23], s[0:1]
; GFX12-NEXT: s_endpgm
%load = load <32 x i1>, ptr addrspace(4) %in
%ext = sext <32 x i1> %load to <32 x i32>
store <32 x i32> %ext, ptr addrspace(1) %out
ret void
}
define amdgpu_kernel void @constant_zextload_v64i1_to_v64i32(ptr addrspace(1) %out, ptr addrspace(4) nocapture %in) #0 {
; GFX6-LABEL: constant_zextload_v64i1_to_v64i32:
; GFX6: ; %bb.0:
; GFX6-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9
; GFX6-NEXT: s_waitcnt lgkmcnt(0)
; GFX6-NEXT: s_load_dwordx2 s[2:3], s[2:3], 0x0
; GFX6-NEXT: s_waitcnt lgkmcnt(0)
; GFX6-NEXT: s_bfe_u32 s4, s2, 0x10003
; GFX6-NEXT: s_bfe_u32 s5, s2, 0x10001
; GFX6-NEXT: s_bfe_u32 s6, s2, 0x10007
; GFX6-NEXT: s_bfe_u32 s7, s2, 0x10005
; GFX6-NEXT: s_bfe_u32 s8, s2, 0x1000b
; GFX6-NEXT: s_bfe_u32 s9, s2, 0x10009
; GFX6-NEXT: s_bfe_u32 s10, s2, 0x1000f
; GFX6-NEXT: s_bfe_u32 s13, s2, 0x1000d
; GFX6-NEXT: s_bfe_u32 s14, s2, 0x10013
; GFX6-NEXT: s_bfe_u32 s15, s2, 0x10011
; GFX6-NEXT: s_bfe_u32 s16, s2, 0x10017
; GFX6-NEXT: s_bfe_u32 s17, s2, 0x10015
; GFX6-NEXT: s_bfe_u32 s18, s2, 0x1001b
; GFX6-NEXT: s_bfe_u32 s19, s2, 0x10019
; GFX6-NEXT: s_lshr_b32 s20, s2, 31
; GFX6-NEXT: s_bfe_u32 s21, s2, 0x1001d
; GFX6-NEXT: s_bfe_u32 s22, s3, 0x10003
; GFX6-NEXT: s_bfe_u32 s23, s3, 0x10001
; GFX6-NEXT: s_bfe_u32 s24, s3, 0x10007
; GFX6-NEXT: s_bfe_u32 s25, s3, 0x10005
; GFX6-NEXT: s_bfe_u32 s26, s3, 0x1000b
; GFX6-NEXT: s_bfe_u32 s27, s3, 0x10009
; GFX6-NEXT: s_bfe_u32 s28, s3, 0x1000f
; GFX6-NEXT: s_bfe_u32 s29, s3, 0x1000d
; GFX6-NEXT: s_bfe_u32 s30, s3, 0x10013
; GFX6-NEXT: s_bfe_u32 s31, s3, 0x10011
; GFX6-NEXT: s_bfe_u32 s33, s3, 0x10017
; GFX6-NEXT: s_bfe_u32 s34, s3, 0x10015
; GFX6-NEXT: s_bfe_u32 s35, s3, 0x1001b
; GFX6-NEXT: s_bfe_u32 s36, s3, 0x10019
; GFX6-NEXT: s_lshr_b32 s37, s3, 31
; GFX6-NEXT: s_bfe_u32 s38, s3, 0x1001d
; GFX6-NEXT: s_and_b32 s12, s2, 1
; GFX6-NEXT: s_bfe_u32 s11, s2, 0x10002
; GFX6-NEXT: s_bfe_u32 s39, s2, 0x10006
; GFX6-NEXT: s_bfe_u32 s40, s2, 0x10004
; GFX6-NEXT: s_bfe_u32 s41, s2, 0x1000a
; GFX6-NEXT: s_bfe_u32 s42, s2, 0x10008
; GFX6-NEXT: s_bfe_u32 s43, s2, 0x1000e
; GFX6-NEXT: s_bfe_u32 s44, s2, 0x1000c
; GFX6-NEXT: s_bfe_u32 s45, s2, 0x10012
; GFX6-NEXT: s_bfe_u32 s46, s2, 0x10010
; GFX6-NEXT: s_bfe_u32 s47, s2, 0x10016
; GFX6-NEXT: s_bfe_u32 s48, s2, 0x10014
; GFX6-NEXT: s_bfe_u32 s49, s2, 0x1001a
; GFX6-NEXT: s_bfe_u32 s50, s2, 0x10018
; GFX6-NEXT: s_bfe_u32 s51, s2, 0x1001e
; GFX6-NEXT: s_bfe_u32 s52, s2, 0x1001c
; GFX6-NEXT: s_and_b32 s53, s3, 1
; GFX6-NEXT: s_bfe_u32 s54, s3, 0x10002
; GFX6-NEXT: s_bfe_u32 s55, s3, 0x10006
; GFX6-NEXT: s_bfe_u32 s56, s3, 0x10004
; GFX6-NEXT: s_bfe_u32 s57, s3, 0x10008
; GFX6-NEXT: s_bfe_u32 s58, s3, 0x1000e
; GFX6-NEXT: s_bfe_u32 s59, s3, 0x1000c
; GFX6-NEXT: s_bfe_u32 s60, s3, 0x10012
; GFX6-NEXT: s_bfe_u32 s61, s3, 0x10010
; GFX6-NEXT: s_bfe_u32 s62, s3, 0x10016
; GFX6-NEXT: s_bfe_u32 s63, s3, 0x10014
; GFX6-NEXT: s_bfe_u32 s64, s3, 0x1001a
; GFX6-NEXT: s_bfe_u32 s65, s3, 0x10018
; GFX6-NEXT: s_bfe_u32 s66, s3, 0x1001e
; GFX6-NEXT: s_bfe_u32 s67, s3, 0x1001c
; GFX6-NEXT: s_bfe_u32 s68, s3, 0x1000a
; GFX6-NEXT: s_mov_b32 s3, 0xf000
; GFX6-NEXT: s_mov_b32 s2, -1
; GFX6-NEXT: v_mov_b32_e32 v0, s67
; GFX6-NEXT: v_mov_b32_e32 v1, s38
; GFX6-NEXT: v_mov_b32_e32 v2, s66
; GFX6-NEXT: v_mov_b32_e32 v3, s37
; GFX6-NEXT: v_mov_b32_e32 v4, s65
; GFX6-NEXT: v_mov_b32_e32 v5, s36
; GFX6-NEXT: v_mov_b32_e32 v6, s64
; GFX6-NEXT: v_mov_b32_e32 v7, s35
; GFX6-NEXT: v_mov_b32_e32 v8, s63
; GFX6-NEXT: v_mov_b32_e32 v9, s34
; GFX6-NEXT: v_mov_b32_e32 v10, s62
; GFX6-NEXT: v_mov_b32_e32 v11, s33
; GFX6-NEXT: v_mov_b32_e32 v12, s61
; GFX6-NEXT: v_mov_b32_e32 v13, s31
; GFX6-NEXT: v_mov_b32_e32 v14, s60
; GFX6-NEXT: v_mov_b32_e32 v15, s30
; GFX6-NEXT: v_mov_b32_e32 v16, s59
; GFX6-NEXT: v_mov_b32_e32 v17, s29
; GFX6-NEXT: v_mov_b32_e32 v18, s58
; GFX6-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:240
; GFX6-NEXT: s_waitcnt expcnt(0)
; GFX6-NEXT: v_mov_b32_e32 v0, s57
; GFX6-NEXT: v_mov_b32_e32 v19, s28
; GFX6-NEXT: v_mov_b32_e32 v1, s27
; GFX6-NEXT: v_mov_b32_e32 v2, s68
; GFX6-NEXT: v_mov_b32_e32 v3, s26
; GFX6-NEXT: buffer_store_dwordx4 v[4:7], off, s[0:3], 0 offset:224
; GFX6-NEXT: buffer_store_dwordx4 v[8:11], off, s[0:3], 0 offset:208
; GFX6-NEXT: buffer_store_dwordx4 v[12:15], off, s[0:3], 0 offset:192
; GFX6-NEXT: buffer_store_dwordx4 v[16:19], off, s[0:3], 0 offset:176
; GFX6-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:160
; GFX6-NEXT: s_waitcnt expcnt(0)
; GFX6-NEXT: v_mov_b32_e32 v0, s56
; GFX6-NEXT: v_mov_b32_e32 v1, s25
; GFX6-NEXT: v_mov_b32_e32 v2, s55
; GFX6-NEXT: v_mov_b32_e32 v3, s24
; GFX6-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:144
; GFX6-NEXT: s_waitcnt expcnt(0)
; GFX6-NEXT: v_mov_b32_e32 v0, s53
; GFX6-NEXT: v_mov_b32_e32 v1, s23
; GFX6-NEXT: v_mov_b32_e32 v2, s54
; GFX6-NEXT: v_mov_b32_e32 v3, s22
; GFX6-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:128
; GFX6-NEXT: s_waitcnt expcnt(0)
; GFX6-NEXT: v_mov_b32_e32 v0, s52
; GFX6-NEXT: v_mov_b32_e32 v1, s21
; GFX6-NEXT: v_mov_b32_e32 v2, s51
; GFX6-NEXT: v_mov_b32_e32 v3, s20
; GFX6-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:112
; GFX6-NEXT: s_waitcnt expcnt(0)
; GFX6-NEXT: v_mov_b32_e32 v0, s50
; GFX6-NEXT: v_mov_b32_e32 v1, s19
; GFX6-NEXT: v_mov_b32_e32 v2, s49
; GFX6-NEXT: v_mov_b32_e32 v3, s18
; GFX6-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:96
; GFX6-NEXT: s_waitcnt expcnt(0)
; GFX6-NEXT: v_mov_b32_e32 v0, s48
; GFX6-NEXT: v_mov_b32_e32 v1, s17
; GFX6-NEXT: v_mov_b32_e32 v2, s47
; GFX6-NEXT: v_mov_b32_e32 v3, s16
; GFX6-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:80
; GFX6-NEXT: s_waitcnt expcnt(0)
; GFX6-NEXT: v_mov_b32_e32 v0, s46
; GFX6-NEXT: v_mov_b32_e32 v1, s15
; GFX6-NEXT: v_mov_b32_e32 v2, s45
; GFX6-NEXT: v_mov_b32_e32 v3, s14
; GFX6-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:64
; GFX6-NEXT: s_waitcnt expcnt(0)
; GFX6-NEXT: v_mov_b32_e32 v0, s44
; GFX6-NEXT: v_mov_b32_e32 v1, s13
; GFX6-NEXT: v_mov_b32_e32 v2, s43
; GFX6-NEXT: v_mov_b32_e32 v3, s10
; GFX6-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:48
; GFX6-NEXT: s_waitcnt expcnt(0)
; GFX6-NEXT: v_mov_b32_e32 v0, s42
; GFX6-NEXT: v_mov_b32_e32 v1, s9
; GFX6-NEXT: v_mov_b32_e32 v2, s41
; GFX6-NEXT: v_mov_b32_e32 v3, s8
; GFX6-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:32
; GFX6-NEXT: s_waitcnt expcnt(0)
; GFX6-NEXT: v_mov_b32_e32 v0, s40
; GFX6-NEXT: v_mov_b32_e32 v1, s7
; GFX6-NEXT: v_mov_b32_e32 v2, s39
; GFX6-NEXT: v_mov_b32_e32 v3, s6
; GFX6-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:16
; GFX6-NEXT: s_waitcnt expcnt(0)
; GFX6-NEXT: v_mov_b32_e32 v0, s12
; GFX6-NEXT: v_mov_b32_e32 v1, s5
; GFX6-NEXT: v_mov_b32_e32 v2, s11
; GFX6-NEXT: v_mov_b32_e32 v3, s4
; GFX6-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0
; GFX6-NEXT: s_endpgm
;
; GFX8-LABEL: constant_zextload_v64i1_to_v64i32:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
; GFX8-NEXT: s_waitcnt lgkmcnt(0)
; GFX8-NEXT: s_load_dwordx2 s[26:27], s[2:3], 0x0
; GFX8-NEXT: s_waitcnt lgkmcnt(0)
; GFX8-NEXT: s_bfe_u32 s2, s26, 0x10003
; GFX8-NEXT: s_bfe_u32 s3, s26, 0x10001
; GFX8-NEXT: s_bfe_u32 s4, s26, 0x10007
; GFX8-NEXT: s_bfe_u32 s5, s26, 0x10005
; GFX8-NEXT: s_bfe_u32 s6, s26, 0x1000b
; GFX8-NEXT: s_bfe_u32 s9, s26, 0x10009
; GFX8-NEXT: s_bfe_u32 s11, s26, 0x1000f
; GFX8-NEXT: s_bfe_u32 s13, s26, 0x1000d
; GFX8-NEXT: s_bfe_u32 s15, s26, 0x10013
; GFX8-NEXT: s_bfe_u32 s17, s26, 0x10011
; GFX8-NEXT: s_bfe_u32 s19, s26, 0x10017
; GFX8-NEXT: s_bfe_u32 s21, s26, 0x1001b
; GFX8-NEXT: s_bfe_u32 s23, s26, 0x10019
; GFX8-NEXT: s_lshr_b32 s25, s26, 31
; GFX8-NEXT: s_bfe_u32 s28, s26, 0x1001d
; GFX8-NEXT: s_bfe_u32 s29, s27, 0x10003
; GFX8-NEXT: s_bfe_u32 s30, s27, 0x10001
; GFX8-NEXT: s_bfe_u32 s31, s27, 0x10007
; GFX8-NEXT: s_bfe_u32 s33, s27, 0x10005
; GFX8-NEXT: s_bfe_u32 s34, s27, 0x1000b
; GFX8-NEXT: s_bfe_u32 s35, s27, 0x10009
; GFX8-NEXT: s_bfe_u32 s36, s27, 0x1000f
; GFX8-NEXT: s_bfe_u32 s37, s27, 0x1000d
; GFX8-NEXT: s_bfe_u32 s38, s27, 0x10013
; GFX8-NEXT: s_bfe_u32 s39, s27, 0x10011
; GFX8-NEXT: s_bfe_u32 s40, s27, 0x10017
; GFX8-NEXT: s_bfe_u32 s41, s27, 0x1001b
; GFX8-NEXT: s_bfe_u32 s42, s27, 0x10019
; GFX8-NEXT: s_lshr_b32 s43, s27, 31
; GFX8-NEXT: s_bfe_u32 s44, s27, 0x1001d
; GFX8-NEXT: s_and_b32 s8, s26, 1
; GFX8-NEXT: s_bfe_u32 s7, s26, 0x10002
; GFX8-NEXT: s_bfe_u32 s10, s26, 0x10006
; GFX8-NEXT: s_bfe_u32 s12, s26, 0x10004
; GFX8-NEXT: s_bfe_u32 s14, s26, 0x1000a
; GFX8-NEXT: s_bfe_u32 s16, s26, 0x10008
; GFX8-NEXT: s_bfe_u32 s18, s26, 0x1000e
; GFX8-NEXT: s_bfe_u32 s20, s26, 0x1000c
; GFX8-NEXT: s_bfe_u32 s22, s26, 0x10012
; GFX8-NEXT: s_bfe_u32 s24, s26, 0x10010
; GFX8-NEXT: s_bfe_u32 s45, s26, 0x10016
; GFX8-NEXT: s_bfe_u32 s46, s26, 0x10015
; GFX8-NEXT: s_bfe_u32 s47, s26, 0x10014
; GFX8-NEXT: s_bfe_u32 s48, s26, 0x1001a
; GFX8-NEXT: s_bfe_u32 s49, s26, 0x10018
; GFX8-NEXT: s_bfe_u32 s50, s26, 0x1001e
; GFX8-NEXT: s_bfe_u32 s51, s26, 0x1001c
; GFX8-NEXT: s_and_b32 s52, s27, 1
; GFX8-NEXT: s_bfe_u32 s53, s27, 0x10002
; GFX8-NEXT: s_bfe_u32 s54, s27, 0x10006
; GFX8-NEXT: s_bfe_u32 s55, s27, 0x10004
; GFX8-NEXT: s_bfe_u32 s56, s27, 0x1000a
; GFX8-NEXT: s_bfe_u32 s57, s27, 0x10008
; GFX8-NEXT: s_bfe_u32 s58, s27, 0x1000e
; GFX8-NEXT: s_bfe_u32 s59, s27, 0x1000c
; GFX8-NEXT: s_bfe_u32 s60, s27, 0x10012
; GFX8-NEXT: s_bfe_u32 s61, s27, 0x10010
; GFX8-NEXT: s_bfe_u32 s62, s27, 0x10016
; GFX8-NEXT: s_bfe_u32 s63, s27, 0x10015
; GFX8-NEXT: s_bfe_u32 s64, s27, 0x10014
; GFX8-NEXT: s_bfe_u32 s65, s27, 0x1001a
; GFX8-NEXT: s_bfe_u32 s66, s27, 0x10018
; GFX8-NEXT: s_bfe_u32 s26, s27, 0x1001e
; GFX8-NEXT: s_bfe_u32 s27, s27, 0x1001c
; GFX8-NEXT: v_mov_b32_e32 v2, s26
; GFX8-NEXT: s_add_u32 s26, s0, 0xf0
; GFX8-NEXT: v_mov_b32_e32 v0, s27
; GFX8-NEXT: s_addc_u32 s27, s1, 0
; GFX8-NEXT: v_mov_b32_e32 v4, s26
; GFX8-NEXT: v_mov_b32_e32 v1, s44
; GFX8-NEXT: v_mov_b32_e32 v3, s43
; GFX8-NEXT: v_mov_b32_e32 v5, s27
; GFX8-NEXT: s_add_u32 s26, s0, 0xe0
; GFX8-NEXT: flat_store_dwordx4 v[4:5], v[0:3]
; GFX8-NEXT: s_addc_u32 s27, s1, 0
; GFX8-NEXT: v_mov_b32_e32 v4, s26
; GFX8-NEXT: v_mov_b32_e32 v0, s66
; GFX8-NEXT: v_mov_b32_e32 v1, s42
; GFX8-NEXT: v_mov_b32_e32 v2, s65
; GFX8-NEXT: v_mov_b32_e32 v3, s41
; GFX8-NEXT: v_mov_b32_e32 v5, s27
; GFX8-NEXT: s_add_u32 s26, s0, 0xd0
; GFX8-NEXT: flat_store_dwordx4 v[4:5], v[0:3]
; GFX8-NEXT: s_addc_u32 s27, s1, 0
; GFX8-NEXT: v_mov_b32_e32 v4, s26
; GFX8-NEXT: v_mov_b32_e32 v0, s64
; GFX8-NEXT: v_mov_b32_e32 v1, s63
; GFX8-NEXT: v_mov_b32_e32 v2, s62
; GFX8-NEXT: v_mov_b32_e32 v3, s40
; GFX8-NEXT: v_mov_b32_e32 v5, s27
; GFX8-NEXT: s_add_u32 s26, s0, 0xc0
; GFX8-NEXT: flat_store_dwordx4 v[4:5], v[0:3]
; GFX8-NEXT: s_addc_u32 s27, s1, 0
; GFX8-NEXT: v_mov_b32_e32 v4, s26
; GFX8-NEXT: v_mov_b32_e32 v0, s61
; GFX8-NEXT: v_mov_b32_e32 v1, s39
; GFX8-NEXT: v_mov_b32_e32 v2, s60
; GFX8-NEXT: v_mov_b32_e32 v3, s38
; GFX8-NEXT: v_mov_b32_e32 v5, s27
; GFX8-NEXT: s_add_u32 s26, s0, 0xb0
; GFX8-NEXT: flat_store_dwordx4 v[4:5], v[0:3]
; GFX8-NEXT: s_addc_u32 s27, s1, 0
; GFX8-NEXT: v_mov_b32_e32 v4, s26
; GFX8-NEXT: v_mov_b32_e32 v0, s59
; GFX8-NEXT: v_mov_b32_e32 v1, s37
; GFX8-NEXT: v_mov_b32_e32 v2, s58
; GFX8-NEXT: v_mov_b32_e32 v3, s36
; GFX8-NEXT: v_mov_b32_e32 v5, s27
; GFX8-NEXT: s_add_u32 s26, s0, 0xa0
; GFX8-NEXT: flat_store_dwordx4 v[4:5], v[0:3]
; GFX8-NEXT: s_addc_u32 s27, s1, 0
; GFX8-NEXT: v_mov_b32_e32 v4, s26
; GFX8-NEXT: v_mov_b32_e32 v0, s57
; GFX8-NEXT: v_mov_b32_e32 v1, s35
; GFX8-NEXT: v_mov_b32_e32 v2, s56
; GFX8-NEXT: v_mov_b32_e32 v3, s34
; GFX8-NEXT: v_mov_b32_e32 v5, s27
; GFX8-NEXT: s_add_u32 s26, s0, 0x90
; GFX8-NEXT: flat_store_dwordx4 v[4:5], v[0:3]
; GFX8-NEXT: s_addc_u32 s27, s1, 0
; GFX8-NEXT: v_mov_b32_e32 v4, s26
; GFX8-NEXT: v_mov_b32_e32 v0, s55
; GFX8-NEXT: v_mov_b32_e32 v1, s33
; GFX8-NEXT: v_mov_b32_e32 v2, s54
; GFX8-NEXT: v_mov_b32_e32 v3, s31
; GFX8-NEXT: v_mov_b32_e32 v5, s27
; GFX8-NEXT: s_add_u32 s26, s0, 0x80
; GFX8-NEXT: flat_store_dwordx4 v[4:5], v[0:3]
; GFX8-NEXT: s_addc_u32 s27, s1, 0
; GFX8-NEXT: v_mov_b32_e32 v4, s26
; GFX8-NEXT: v_mov_b32_e32 v0, s52
; GFX8-NEXT: v_mov_b32_e32 v1, s30
; GFX8-NEXT: v_mov_b32_e32 v2, s53
; GFX8-NEXT: v_mov_b32_e32 v3, s29
; GFX8-NEXT: v_mov_b32_e32 v5, s27
; GFX8-NEXT: s_add_u32 s26, s0, 0x70
; GFX8-NEXT: flat_store_dwordx4 v[4:5], v[0:3]
; GFX8-NEXT: s_addc_u32 s27, s1, 0
; GFX8-NEXT: v_mov_b32_e32 v4, s26
; GFX8-NEXT: v_mov_b32_e32 v0, s51
; GFX8-NEXT: v_mov_b32_e32 v1, s28
; GFX8-NEXT: v_mov_b32_e32 v2, s50
; GFX8-NEXT: v_mov_b32_e32 v3, s25
; GFX8-NEXT: v_mov_b32_e32 v5, s27
; GFX8-NEXT: s_add_u32 s26, s0, 0x60
; GFX8-NEXT: flat_store_dwordx4 v[4:5], v[0:3]
; GFX8-NEXT: s_addc_u32 s27, s1, 0
; GFX8-NEXT: v_mov_b32_e32 v4, s26
; GFX8-NEXT: v_mov_b32_e32 v0, s49
; GFX8-NEXT: v_mov_b32_e32 v1, s23
; GFX8-NEXT: v_mov_b32_e32 v2, s48
; GFX8-NEXT: v_mov_b32_e32 v3, s21
; GFX8-NEXT: v_mov_b32_e32 v5, s27
; GFX8-NEXT: s_add_u32 s26, s0, 0x50
; GFX8-NEXT: flat_store_dwordx4 v[4:5], v[0:3]
; GFX8-NEXT: s_addc_u32 s27, s1, 0
; GFX8-NEXT: v_mov_b32_e32 v4, s26
; GFX8-NEXT: v_mov_b32_e32 v0, s47
; GFX8-NEXT: v_mov_b32_e32 v1, s46
; GFX8-NEXT: v_mov_b32_e32 v2, s45
; GFX8-NEXT: v_mov_b32_e32 v3, s19
; GFX8-NEXT: v_mov_b32_e32 v5, s27
; GFX8-NEXT: flat_store_dwordx4 v[4:5], v[0:3]
; GFX8-NEXT: s_nop 0
; GFX8-NEXT: v_mov_b32_e32 v2, s22
; GFX8-NEXT: s_add_u32 s22, s0, 64
; GFX8-NEXT: s_addc_u32 s23, s1, 0
; GFX8-NEXT: v_mov_b32_e32 v4, s22
; GFX8-NEXT: v_mov_b32_e32 v0, s24
; GFX8-NEXT: v_mov_b32_e32 v1, s17
; GFX8-NEXT: v_mov_b32_e32 v3, s15
; GFX8-NEXT: v_mov_b32_e32 v5, s23
; GFX8-NEXT: flat_store_dwordx4 v[4:5], v[0:3]
; GFX8-NEXT: s_nop 0
; GFX8-NEXT: v_mov_b32_e32 v2, s18
; GFX8-NEXT: s_add_u32 s18, s0, 48
; GFX8-NEXT: s_addc_u32 s19, s1, 0
; GFX8-NEXT: v_mov_b32_e32 v4, s18
; GFX8-NEXT: v_mov_b32_e32 v0, s20
; GFX8-NEXT: v_mov_b32_e32 v1, s13
; GFX8-NEXT: v_mov_b32_e32 v3, s11
; GFX8-NEXT: v_mov_b32_e32 v5, s19
; GFX8-NEXT: flat_store_dwordx4 v[4:5], v[0:3]
; GFX8-NEXT: s_nop 0
; GFX8-NEXT: v_mov_b32_e32 v2, s14
; GFX8-NEXT: s_add_u32 s14, s0, 32
; GFX8-NEXT: s_addc_u32 s15, s1, 0
; GFX8-NEXT: v_mov_b32_e32 v4, s14
; GFX8-NEXT: v_mov_b32_e32 v0, s16
; GFX8-NEXT: v_mov_b32_e32 v1, s9
; GFX8-NEXT: v_mov_b32_e32 v3, s6
; GFX8-NEXT: v_mov_b32_e32 v5, s15
; GFX8-NEXT: flat_store_dwordx4 v[4:5], v[0:3]
; GFX8-NEXT: s_nop 0
; GFX8-NEXT: v_mov_b32_e32 v3, s4
; GFX8-NEXT: s_add_u32 s4, s0, 16
; GFX8-NEXT: v_mov_b32_e32 v1, s5
; GFX8-NEXT: s_addc_u32 s5, s1, 0
; GFX8-NEXT: v_mov_b32_e32 v4, s4
; GFX8-NEXT: v_mov_b32_e32 v0, s12
; GFX8-NEXT: v_mov_b32_e32 v2, s10
; GFX8-NEXT: v_mov_b32_e32 v5, s5
; GFX8-NEXT: flat_store_dwordx4 v[4:5], v[0:3]
; GFX8-NEXT: v_mov_b32_e32 v5, s1
; GFX8-NEXT: v_mov_b32_e32 v0, s8
; GFX8-NEXT: v_mov_b32_e32 v1, s3
; GFX8-NEXT: v_mov_b32_e32 v2, s7
; GFX8-NEXT: v_mov_b32_e32 v3, s2
; GFX8-NEXT: v_mov_b32_e32 v4, s0
; GFX8-NEXT: flat_store_dwordx4 v[4:5], v[0:3]
; GFX8-NEXT: s_endpgm
;
; EG-LABEL: constant_zextload_v64i1_to_v64i32:
; EG: ; %bb.0:
; EG-NEXT: ALU 0, @24, KC0[CB0:0-32], KC1[]
; EG-NEXT: TEX 0 @22
; EG-NEXT: ALU 96, @25, KC0[CB0:0-32], KC1[]
; EG-NEXT: ALU 57, @122, KC0[CB0:0-32], KC1[]
; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T48.XYZW, T50.X, 0
; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T46.XYZW, T49.X, 0
; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T44.XYZW, T47.X, 0
; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T42.XYZW, T45.X, 0
; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T40.XYZW, T43.X, 0
; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T38.XYZW, T41.X, 0
; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T36.XYZW, T39.X, 0
; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T35.XYZW, T37.X, 0
; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T33.XYZW, T21.X, 0
; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T31.XYZW, T34.X, 0
; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T29.XYZW, T32.X, 0
; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T27.XYZW, T30.X, 0
; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T25.XYZW, T28.X, 0
; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T23.XYZW, T26.X, 0
; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T20.XYZW, T24.X, 0
; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T19.XYZW, T22.X, 1
; EG-NEXT: CF_END
; EG-NEXT: PAD
; EG-NEXT: Fetch clause starting at 22:
; EG-NEXT: VTX_READ_64 T21.XY, T19.X, 0, #1
; EG-NEXT: ALU clause starting at 24:
; EG-NEXT: MOV * T19.X, KC0[2].Z,
; EG-NEXT: ALU clause starting at 25:
; EG-NEXT: BFE_UINT * T19.W, T21.X, literal.x, 1,
; EG-NEXT: 3(4.203895e-45), 0(0.000000e+00)
; EG-NEXT: BFE_UINT * T19.Z, T21.X, literal.x, 1,
; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
; EG-NEXT: BFE_UINT T19.Y, T21.X, 1, 1,
; EG-NEXT: BFE_UINT * T20.W, T21.X, literal.x, 1,
; EG-NEXT: 7(9.809089e-45), 0(0.000000e+00)
; EG-NEXT: AND_INT T19.X, T21.X, 1,
; EG-NEXT: BFE_UINT T20.Z, T21.X, literal.x, 1,
; EG-NEXT: LSHR * T22.X, KC0[2].Y, literal.y,
; EG-NEXT: 6(8.407791e-45), 2(2.802597e-45)
; EG-NEXT: BFE_UINT T20.Y, T21.X, literal.x, 1,
; EG-NEXT: BFE_UINT * T23.W, T21.X, literal.y, 1,
; EG-NEXT: 5(7.006492e-45), 11(1.541428e-44)
; EG-NEXT: BFE_UINT T20.X, T21.X, literal.x, 1,
; EG-NEXT: BFE_UINT T23.Z, T21.X, literal.y, 1,
; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.z,
; EG-NEXT: 4(5.605194e-45), 10(1.401298e-44)
; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00)
; EG-NEXT: LSHR T24.X, PV.W, literal.x,
; EG-NEXT: BFE_UINT T23.Y, T21.X, literal.y, 1,
; EG-NEXT: BFE_UINT * T25.W, T21.X, literal.z, 1,
; EG-NEXT: 2(2.802597e-45), 9(1.261169e-44)
; EG-NEXT: 15(2.101948e-44), 0(0.000000e+00)
; EG-NEXT: BFE_UINT T23.X, T21.X, literal.x, 1,
; EG-NEXT: BFE_UINT T25.Z, T21.X, literal.y, 1,
; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.z,
; EG-NEXT: 8(1.121039e-44), 14(1.961818e-44)
; EG-NEXT: 32(4.484155e-44), 0(0.000000e+00)
; EG-NEXT: LSHR T26.X, PV.W, literal.x,
; EG-NEXT: BFE_UINT T25.Y, T21.X, literal.y, 1,
; EG-NEXT: BFE_UINT * T27.W, T21.X, literal.z, 1,
; EG-NEXT: 2(2.802597e-45), 13(1.821688e-44)
; EG-NEXT: 19(2.662467e-44), 0(0.000000e+00)
; EG-NEXT: BFE_UINT T25.X, T21.X, literal.x, 1,
; EG-NEXT: BFE_UINT T27.Z, T21.X, literal.y, 1,
; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.z,
; EG-NEXT: 12(1.681558e-44), 18(2.522337e-44)
; EG-NEXT: 48(6.726233e-44), 0(0.000000e+00)
; EG-NEXT: LSHR T28.X, PV.W, literal.x,
; EG-NEXT: BFE_UINT T27.Y, T21.X, literal.y, 1,
; EG-NEXT: BFE_UINT * T29.W, T21.X, literal.z, 1,
; EG-NEXT: 2(2.802597e-45), 17(2.382207e-44)
; EG-NEXT: 23(3.222986e-44), 0(0.000000e+00)
; EG-NEXT: BFE_UINT T27.X, T21.X, literal.x, 1,
; EG-NEXT: BFE_UINT T29.Z, T21.X, literal.y, 1,
; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.z,
; EG-NEXT: 16(2.242078e-44), 22(3.082857e-44)
; EG-NEXT: 64(8.968310e-44), 0(0.000000e+00)
; EG-NEXT: LSHR T30.X, PV.W, literal.x,
; EG-NEXT: BFE_UINT T29.Y, T21.X, literal.y, 1,
; EG-NEXT: BFE_UINT * T31.W, T21.X, literal.z, 1,
; EG-NEXT: 2(2.802597e-45), 21(2.942727e-44)
; EG-NEXT: 27(3.783506e-44), 0(0.000000e+00)
; EG-NEXT: BFE_UINT T29.X, T21.X, literal.x, 1,
; EG-NEXT: BFE_UINT T31.Z, T21.X, literal.y, 1,
; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.z,
; EG-NEXT: 20(2.802597e-44), 26(3.643376e-44)
; EG-NEXT: 80(1.121039e-43), 0(0.000000e+00)
; EG-NEXT: LSHR T32.X, PV.W, literal.x,
; EG-NEXT: BFE_UINT T31.Y, T21.X, literal.y, 1,
; EG-NEXT: LSHR * T33.W, T21.X, literal.z,
; EG-NEXT: 2(2.802597e-45), 25(3.503246e-44)
; EG-NEXT: 31(4.344025e-44), 0(0.000000e+00)
; EG-NEXT: BFE_UINT T31.X, T21.X, literal.x, 1,
; EG-NEXT: BFE_UINT T33.Z, T21.X, literal.y, 1,
; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.z,
; EG-NEXT: 24(3.363116e-44), 30(4.203895e-44)
; EG-NEXT: 96(1.345247e-43), 0(0.000000e+00)
; EG-NEXT: LSHR T34.X, PV.W, literal.x,
; EG-NEXT: BFE_UINT T33.Y, T21.X, literal.y, 1,
; EG-NEXT: BFE_UINT * T35.W, T21.Y, literal.z, 1,
; EG-NEXT: 2(2.802597e-45), 29(4.063766e-44)
; EG-NEXT: 3(4.203895e-45), 0(0.000000e+00)
; EG-NEXT: BFE_UINT T33.X, T21.X, literal.x, 1,
; EG-NEXT: BFE_UINT T35.Z, T21.Y, literal.y, 1,
; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.z,
; EG-NEXT: 28(3.923636e-44), 2(2.802597e-45)
; EG-NEXT: 112(1.569454e-43), 0(0.000000e+00)
; EG-NEXT: LSHR T21.X, PV.W, literal.x,
; EG-NEXT: BFE_UINT T35.Y, T21.Y, 1, 1,
; EG-NEXT: BFE_UINT T36.W, T21.Y, literal.y, 1,
; EG-NEXT: AND_INT * T35.X, T21.Y, 1,
; EG-NEXT: 2(2.802597e-45), 7(9.809089e-45)
; EG-NEXT: BFE_UINT T36.Z, T21.Y, literal.x, 1,
; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y,
; EG-NEXT: 6(8.407791e-45), 128(1.793662e-43)
; EG-NEXT: LSHR T37.X, PV.W, literal.x,
; EG-NEXT: BFE_UINT T36.Y, T21.Y, literal.y, 1,
; EG-NEXT: BFE_UINT * T38.W, T21.Y, literal.z, 1,
; EG-NEXT: 2(2.802597e-45), 5(7.006492e-45)
; EG-NEXT: 11(1.541428e-44), 0(0.000000e+00)
; EG-NEXT: BFE_UINT T36.X, T21.Y, literal.x, 1,
; EG-NEXT: BFE_UINT T38.Z, T21.Y, literal.y, 1,
; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.z,
; EG-NEXT: 4(5.605194e-45), 10(1.401298e-44)
; EG-NEXT: 144(2.017870e-43), 0(0.000000e+00)
; EG-NEXT: ALU clause starting at 122:
; EG-NEXT: LSHR T39.X, T0.W, literal.x,
; EG-NEXT: BFE_UINT T38.Y, T21.Y, literal.y, 1,
; EG-NEXT: BFE_UINT * T40.W, T21.Y, literal.z, 1,
; EG-NEXT: 2(2.802597e-45), 9(1.261169e-44)
; EG-NEXT: 15(2.101948e-44), 0(0.000000e+00)
; EG-NEXT: BFE_UINT T38.X, T21.Y, literal.x, 1,
; EG-NEXT: BFE_UINT T40.Z, T21.Y, literal.y, 1,
; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.z,
; EG-NEXT: 8(1.121039e-44), 14(1.961818e-44)
; EG-NEXT: 160(2.242078e-43), 0(0.000000e+00)
; EG-NEXT: LSHR T41.X, PV.W, literal.x,
; EG-NEXT: BFE_UINT T40.Y, T21.Y, literal.y, 1,
; EG-NEXT: BFE_UINT * T42.W, T21.Y, literal.z, 1,
; EG-NEXT: 2(2.802597e-45), 13(1.821688e-44)
; EG-NEXT: 19(2.662467e-44), 0(0.000000e+00)
; EG-NEXT: BFE_UINT T40.X, T21.Y, literal.x, 1,
; EG-NEXT: BFE_UINT T42.Z, T21.Y, literal.y, 1,
; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.z,
; EG-NEXT: 12(1.681558e-44), 18(2.522337e-44)
; EG-NEXT: 176(2.466285e-43), 0(0.000000e+00)
; EG-NEXT: LSHR T43.X, PV.W, literal.x,
; EG-NEXT: BFE_UINT T42.Y, T21.Y, literal.y, 1,
; EG-NEXT: BFE_UINT * T44.W, T21.Y, literal.z, 1,
; EG-NEXT: 2(2.802597e-45), 17(2.382207e-44)
; EG-NEXT: 23(3.222986e-44), 0(0.000000e+00)
; EG-NEXT: BFE_UINT T42.X, T21.Y, literal.x, 1,
; EG-NEXT: BFE_UINT T44.Z, T21.Y, literal.y, 1,
; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.z,
; EG-NEXT: 16(2.242078e-44), 22(3.082857e-44)
; EG-NEXT: 192(2.690493e-43), 0(0.000000e+00)
; EG-NEXT: LSHR T45.X, PV.W, literal.x,
; EG-NEXT: BFE_UINT T44.Y, T21.Y, literal.y, 1,
; EG-NEXT: BFE_UINT * T46.W, T21.Y, literal.z, 1,
; EG-NEXT: 2(2.802597e-45), 21(2.942727e-44)
; EG-NEXT: 27(3.783506e-44), 0(0.000000e+00)
; EG-NEXT: BFE_UINT T44.X, T21.Y, literal.x, 1,
; EG-NEXT: BFE_UINT T46.Z, T21.Y, literal.y, 1,
; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.z,
; EG-NEXT: 20(2.802597e-44), 26(3.643376e-44)
; EG-NEXT: 208(2.914701e-43), 0(0.000000e+00)
; EG-NEXT: LSHR T47.X, PV.W, literal.x,
; EG-NEXT: BFE_UINT T46.Y, T21.Y, literal.y, 1,
; EG-NEXT: LSHR * T48.W, T21.Y, literal.z,
; EG-NEXT: 2(2.802597e-45), 25(3.503246e-44)
; EG-NEXT: 31(4.344025e-44), 0(0.000000e+00)
; EG-NEXT: BFE_UINT T46.X, T21.Y, literal.x, 1,
; EG-NEXT: BFE_UINT T48.Z, T21.Y, literal.y, 1,
; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.z,
; EG-NEXT: 24(3.363116e-44), 30(4.203895e-44)
; EG-NEXT: 224(3.138909e-43), 0(0.000000e+00)
; EG-NEXT: LSHR T49.X, PV.W, literal.x,
; EG-NEXT: BFE_UINT * T48.Y, T21.Y, literal.y, 1,
; EG-NEXT: 2(2.802597e-45), 29(4.063766e-44)
; EG-NEXT: BFE_UINT T48.X, T21.Y, literal.x, 1,
; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y,
; EG-NEXT: 28(3.923636e-44), 240(3.363116e-43)
; EG-NEXT: LSHR * T50.X, PV.W, literal.x,
; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
;
; GFX12-LABEL: constant_zextload_v64i1_to_v64i32:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
; GFX12-NEXT: s_wait_kmcnt 0x0
; GFX12-NEXT: s_load_b64 s[2:3], s[2:3], 0x0
; GFX12-NEXT: s_wait_kmcnt 0x0
; GFX12-NEXT: s_lshr_b32 s33, s3, 31
; GFX12-NEXT: s_bfe_u32 s34, s3, 0x1001d
; GFX12-NEXT: s_bfe_u32 s65, s3, 0x1001c
; GFX12-NEXT: s_bfe_u32 s66, s3, 0x1001e
; GFX12-NEXT: s_bfe_u32 s30, s3, 0x1001b
; GFX12-NEXT: s_bfe_u32 s31, s3, 0x10019
; GFX12-NEXT: s_bfe_u32 s63, s3, 0x1001a
; GFX12-NEXT: s_bfe_u32 s64, s3, 0x10018
; GFX12-NEXT: v_dual_mov_b32 v24, 0 :: v_dual_mov_b32 v1, s34
; GFX12-NEXT: s_bfe_u32 s29, s3, 0x10017
; GFX12-NEXT: s_bfe_u32 s60, s3, 0x10016
; GFX12-NEXT: s_bfe_u32 s61, s3, 0x10015
; GFX12-NEXT: s_bfe_u32 s62, s3, 0x10014
; GFX12-NEXT: v_dual_mov_b32 v0, s65 :: v_dual_mov_b32 v3, s33
; GFX12-NEXT: v_dual_mov_b32 v2, s66 :: v_dual_mov_b32 v5, s31
; GFX12-NEXT: s_bfe_u32 s27, s3, 0x10013
; GFX12-NEXT: s_bfe_u32 s28, s3, 0x10011
; GFX12-NEXT: s_bfe_u32 s58, s3, 0x10012
; GFX12-NEXT: s_bfe_u32 s59, s3, 0x10010
; GFX12-NEXT: v_dual_mov_b32 v4, s64 :: v_dual_mov_b32 v7, s30
; GFX12-NEXT: v_dual_mov_b32 v6, s63 :: v_dual_mov_b32 v9, s61
; GFX12-NEXT: v_dual_mov_b32 v8, s62 :: v_dual_mov_b32 v11, s29
; GFX12-NEXT: v_dual_mov_b32 v10, s60 :: v_dual_mov_b32 v13, s28
; GFX12-NEXT: s_bfe_u32 s19, s3, 0x10003
; GFX12-NEXT: s_bfe_u32 s20, s3, 0x10001
; GFX12-NEXT: s_bfe_u32 s21, s3, 0x10007
; GFX12-NEXT: s_bfe_u32 s22, s3, 0x10005
; GFX12-NEXT: s_bfe_u32 s23, s3, 0x1000b
; GFX12-NEXT: s_bfe_u32 s24, s3, 0x10009
; GFX12-NEXT: s_bfe_u32 s25, s3, 0x1000f
; GFX12-NEXT: s_bfe_u32 s26, s3, 0x1000d
; GFX12-NEXT: s_and_b32 s51, s3, 1
; GFX12-NEXT: s_bfe_u32 s52, s3, 0x10002
; GFX12-NEXT: s_bfe_u32 s53, s3, 0x10006
; GFX12-NEXT: s_bfe_u32 s54, s3, 0x10004
; GFX12-NEXT: s_bfe_u32 s55, s3, 0x1000a
; GFX12-NEXT: s_bfe_u32 s56, s3, 0x10008
; GFX12-NEXT: s_bfe_u32 s57, s3, 0x1000e
; GFX12-NEXT: v_dual_mov_b32 v12, s59 :: v_dual_mov_b32 v15, s27
; GFX12-NEXT: v_mov_b32_e32 v14, s58
; GFX12-NEXT: s_bfe_u32 s3, s3, 0x1000c
; GFX12-NEXT: s_clause 0x3
; GFX12-NEXT: global_store_b128 v24, v[0:3], s[0:1] offset:240
; GFX12-NEXT: global_store_b128 v24, v[4:7], s[0:1] offset:224
; GFX12-NEXT: global_store_b128 v24, v[8:11], s[0:1] offset:208
; GFX12-NEXT: global_store_b128 v24, v[12:15], s[0:1] offset:192
; GFX12-NEXT: v_dual_mov_b32 v0, s3 :: v_dual_mov_b32 v3, s25
; GFX12-NEXT: v_dual_mov_b32 v1, s26 :: v_dual_mov_b32 v2, s57
; GFX12-NEXT: v_dual_mov_b32 v5, s24 :: v_dual_mov_b32 v4, s56
; GFX12-NEXT: v_dual_mov_b32 v7, s23 :: v_dual_mov_b32 v6, s55
; GFX12-NEXT: v_mov_b32_e32 v9, s22
; GFX12-NEXT: s_bfe_u32 s4, s2, 0x10003
; GFX12-NEXT: s_bfe_u32 s5, s2, 0x10001
; GFX12-NEXT: s_bfe_u32 s6, s2, 0x10007
; GFX12-NEXT: s_bfe_u32 s7, s2, 0x10005
; GFX12-NEXT: s_bfe_u32 s8, s2, 0x1000b
; GFX12-NEXT: s_bfe_u32 s9, s2, 0x10009
; GFX12-NEXT: s_bfe_u32 s10, s2, 0x1000f
; GFX12-NEXT: s_bfe_u32 s11, s2, 0x1000d
; GFX12-NEXT: s_bfe_u32 s12, s2, 0x10013
; GFX12-NEXT: s_bfe_u32 s13, s2, 0x10011
; GFX12-NEXT: s_bfe_u32 s14, s2, 0x10017
; GFX12-NEXT: s_bfe_u32 s15, s2, 0x1001b
; GFX12-NEXT: s_bfe_u32 s16, s2, 0x10019
; GFX12-NEXT: s_lshr_b32 s17, s2, 31
; GFX12-NEXT: s_bfe_u32 s18, s2, 0x1001d
; GFX12-NEXT: s_and_b32 s35, s2, 1
; GFX12-NEXT: s_bfe_u32 s36, s2, 0x10002
; GFX12-NEXT: s_bfe_u32 s37, s2, 0x10006
; GFX12-NEXT: s_bfe_u32 s38, s2, 0x10004
; GFX12-NEXT: s_bfe_u32 s39, s2, 0x1000a
; GFX12-NEXT: s_bfe_u32 s40, s2, 0x10008
; GFX12-NEXT: s_bfe_u32 s41, s2, 0x1000e
; GFX12-NEXT: s_bfe_u32 s42, s2, 0x1000c
; GFX12-NEXT: s_bfe_u32 s43, s2, 0x10012
; GFX12-NEXT: s_bfe_u32 s44, s2, 0x10010
; GFX12-NEXT: s_bfe_u32 s45, s2, 0x10016
; GFX12-NEXT: s_bfe_u32 s46, s2, 0x10015
; GFX12-NEXT: s_bfe_u32 s47, s2, 0x10014
; GFX12-NEXT: s_bfe_u32 s48, s2, 0x1001a
; GFX12-NEXT: s_bfe_u32 s49, s2, 0x10018
; GFX12-NEXT: s_bfe_u32 s50, s2, 0x1001e
; GFX12-NEXT: s_bfe_u32 s2, s2, 0x1001c
; GFX12-NEXT: v_dual_mov_b32 v8, s54 :: v_dual_mov_b32 v11, s21
; GFX12-NEXT: v_dual_mov_b32 v10, s53 :: v_dual_mov_b32 v13, s20
; GFX12-NEXT: v_dual_mov_b32 v12, s51 :: v_dual_mov_b32 v15, s19
; GFX12-NEXT: v_dual_mov_b32 v14, s52 :: v_dual_mov_b32 v17, s18
; GFX12-NEXT: s_wait_alu 0xfffe
; GFX12-NEXT: v_dual_mov_b32 v16, s2 :: v_dual_mov_b32 v19, s17
; GFX12-NEXT: v_dual_mov_b32 v18, s50 :: v_dual_mov_b32 v21, s16
; GFX12-NEXT: v_dual_mov_b32 v20, s49 :: v_dual_mov_b32 v23, s15
; GFX12-NEXT: v_mov_b32_e32 v22, s48
; GFX12-NEXT: s_clause 0x5
; GFX12-NEXT: global_store_b128 v24, v[0:3], s[0:1] offset:176
; GFX12-NEXT: global_store_b128 v24, v[4:7], s[0:1] offset:160
; GFX12-NEXT: global_store_b128 v24, v[8:11], s[0:1] offset:144
; GFX12-NEXT: global_store_b128 v24, v[12:15], s[0:1] offset:128
; GFX12-NEXT: global_store_b128 v24, v[16:19], s[0:1] offset:112
; GFX12-NEXT: global_store_b128 v24, v[20:23], s[0:1] offset:96
; GFX12-NEXT: v_dual_mov_b32 v0, s47 :: v_dual_mov_b32 v3, s14
; GFX12-NEXT: v_dual_mov_b32 v1, s46 :: v_dual_mov_b32 v2, s45
; GFX12-NEXT: v_dual_mov_b32 v5, s13 :: v_dual_mov_b32 v4, s44
; GFX12-NEXT: v_dual_mov_b32 v7, s12 :: v_dual_mov_b32 v6, s43
; GFX12-NEXT: v_dual_mov_b32 v9, s11 :: v_dual_mov_b32 v8, s42
; GFX12-NEXT: v_dual_mov_b32 v11, s10 :: v_dual_mov_b32 v10, s41
; GFX12-NEXT: v_dual_mov_b32 v13, s9 :: v_dual_mov_b32 v12, s40
; GFX12-NEXT: v_dual_mov_b32 v15, s8 :: v_dual_mov_b32 v14, s39
; GFX12-NEXT: v_dual_mov_b32 v17, s7 :: v_dual_mov_b32 v16, s38
; GFX12-NEXT: v_dual_mov_b32 v19, s6 :: v_dual_mov_b32 v18, s37
; GFX12-NEXT: v_dual_mov_b32 v21, s5 :: v_dual_mov_b32 v20, s35
; GFX12-NEXT: v_dual_mov_b32 v23, s4 :: v_dual_mov_b32 v22, s36
; GFX12-NEXT: s_clause 0x5
; GFX12-NEXT: global_store_b128 v24, v[0:3], s[0:1] offset:80
; GFX12-NEXT: global_store_b128 v24, v[4:7], s[0:1] offset:64
; GFX12-NEXT: global_store_b128 v24, v[8:11], s[0:1] offset:48
; GFX12-NEXT: global_store_b128 v24, v[12:15], s[0:1] offset:32
; GFX12-NEXT: global_store_b128 v24, v[16:19], s[0:1] offset:16
; GFX12-NEXT: global_store_b128 v24, v[20:23], s[0:1]
; GFX12-NEXT: s_endpgm
%load = load <64 x i1>, ptr addrspace(4) %in
%ext = zext <64 x i1> %load to <64 x i32>
store <64 x i32> %ext, ptr addrspace(1) %out
ret void
}
define amdgpu_kernel void @constant_sextload_v64i1_to_v64i32(ptr addrspace(1) %out, ptr addrspace(4) nocapture %in) #0 {
; GFX6-LABEL: constant_sextload_v64i1_to_v64i32:
; GFX6: ; %bb.0:
; GFX6-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9
; GFX6-NEXT: s_waitcnt lgkmcnt(0)
; GFX6-NEXT: s_load_dwordx2 s[2:3], s[2:3], 0x0
; GFX6-NEXT: s_waitcnt lgkmcnt(0)
; GFX6-NEXT: s_bfe_i32 s4, s2, 0x10003
; GFX6-NEXT: s_bfe_i32 s5, s2, 0x10002
; GFX6-NEXT: s_bfe_i32 s6, s2, 0x10001
; GFX6-NEXT: s_bfe_i32 s7, s2, 0x10000
; GFX6-NEXT: s_bfe_i32 s8, s2, 0x10007
; GFX6-NEXT: s_bfe_i32 s9, s2, 0x10006
; GFX6-NEXT: s_bfe_i32 s10, s2, 0x10005
; GFX6-NEXT: s_bfe_i32 s11, s2, 0x10004
; GFX6-NEXT: s_bfe_i32 s12, s2, 0x1000b
; GFX6-NEXT: s_bfe_i32 s13, s2, 0x1000a
; GFX6-NEXT: s_bfe_i32 s14, s2, 0x10009
; GFX6-NEXT: s_bfe_i32 s15, s2, 0x10008
; GFX6-NEXT: s_bfe_i32 s16, s2, 0x1000f
; GFX6-NEXT: s_bfe_i32 s17, s2, 0x1000e
; GFX6-NEXT: s_bfe_i32 s18, s2, 0x1000d
; GFX6-NEXT: s_bfe_i32 s19, s2, 0x1000c
; GFX6-NEXT: s_bfe_i32 s20, s2, 0x10013
; GFX6-NEXT: s_bfe_i32 s21, s2, 0x10012
; GFX6-NEXT: s_bfe_i32 s22, s2, 0x10011
; GFX6-NEXT: s_bfe_i32 s23, s2, 0x10010
; GFX6-NEXT: s_bfe_i32 s24, s2, 0x10017
; GFX6-NEXT: s_bfe_i32 s25, s2, 0x10016
; GFX6-NEXT: s_bfe_i32 s26, s2, 0x10015
; GFX6-NEXT: s_bfe_i32 s27, s2, 0x10014
; GFX6-NEXT: s_bfe_i32 s28, s2, 0x1001b
; GFX6-NEXT: s_bfe_i32 s29, s2, 0x1001a
; GFX6-NEXT: s_bfe_i32 s30, s2, 0x10019
; GFX6-NEXT: s_bfe_i32 s31, s2, 0x10018
; GFX6-NEXT: s_ashr_i32 s33, s2, 31
; GFX6-NEXT: s_bfe_i32 s34, s2, 0x1001e
; GFX6-NEXT: s_bfe_i32 s35, s2, 0x1001d
; GFX6-NEXT: s_bfe_i32 s36, s2, 0x1001c
; GFX6-NEXT: s_bfe_i32 s37, s3, 0x10003
; GFX6-NEXT: s_bfe_i32 s38, s3, 0x10002
; GFX6-NEXT: s_bfe_i32 s39, s3, 0x10001
; GFX6-NEXT: s_bfe_i32 s40, s3, 0x10000
; GFX6-NEXT: s_bfe_i32 s41, s3, 0x10007
; GFX6-NEXT: s_bfe_i32 s42, s3, 0x10006
; GFX6-NEXT: s_bfe_i32 s43, s3, 0x10005
; GFX6-NEXT: s_bfe_i32 s44, s3, 0x10004
; GFX6-NEXT: s_bfe_i32 s45, s3, 0x1000b
; GFX6-NEXT: s_bfe_i32 s46, s3, 0x1000a
; GFX6-NEXT: s_bfe_i32 s47, s3, 0x10009
; GFX6-NEXT: s_bfe_i32 s48, s3, 0x10008
; GFX6-NEXT: s_bfe_i32 s49, s3, 0x1000e
; GFX6-NEXT: s_bfe_i32 s50, s3, 0x1000d
; GFX6-NEXT: s_bfe_i32 s51, s3, 0x1000c
; GFX6-NEXT: s_bfe_i32 s52, s3, 0x10013
; GFX6-NEXT: s_bfe_i32 s53, s3, 0x10012
; GFX6-NEXT: s_bfe_i32 s54, s3, 0x10011
; GFX6-NEXT: s_bfe_i32 s55, s3, 0x10010
; GFX6-NEXT: s_bfe_i32 s56, s3, 0x10017
; GFX6-NEXT: s_bfe_i32 s57, s3, 0x10016
; GFX6-NEXT: s_bfe_i32 s58, s3, 0x10015
; GFX6-NEXT: s_bfe_i32 s59, s3, 0x10014
; GFX6-NEXT: s_bfe_i32 s60, s3, 0x1001b
; GFX6-NEXT: s_bfe_i32 s61, s3, 0x1001a
; GFX6-NEXT: s_bfe_i32 s62, s3, 0x10019
; GFX6-NEXT: s_bfe_i32 s63, s3, 0x10018
; GFX6-NEXT: s_ashr_i32 s64, s3, 31
; GFX6-NEXT: s_bfe_i32 s65, s3, 0x1001e
; GFX6-NEXT: s_bfe_i32 s66, s3, 0x1001d
; GFX6-NEXT: s_bfe_i32 s67, s3, 0x1001c
; GFX6-NEXT: s_bfe_i32 s68, s3, 0x1000f
; GFX6-NEXT: s_mov_b32 s3, 0xf000
; GFX6-NEXT: s_mov_b32 s2, -1
; GFX6-NEXT: v_mov_b32_e32 v0, s67
; GFX6-NEXT: v_mov_b32_e32 v1, s66
; GFX6-NEXT: v_mov_b32_e32 v2, s65
; GFX6-NEXT: v_mov_b32_e32 v3, s64
; GFX6-NEXT: v_mov_b32_e32 v4, s63
; GFX6-NEXT: v_mov_b32_e32 v5, s62
; GFX6-NEXT: v_mov_b32_e32 v6, s61
; GFX6-NEXT: v_mov_b32_e32 v7, s60
; GFX6-NEXT: v_mov_b32_e32 v8, s59
; GFX6-NEXT: v_mov_b32_e32 v9, s58
; GFX6-NEXT: v_mov_b32_e32 v10, s57
; GFX6-NEXT: v_mov_b32_e32 v11, s56
; GFX6-NEXT: v_mov_b32_e32 v12, s55
; GFX6-NEXT: v_mov_b32_e32 v13, s54
; GFX6-NEXT: v_mov_b32_e32 v14, s53
; GFX6-NEXT: v_mov_b32_e32 v15, s52
; GFX6-NEXT: v_mov_b32_e32 v16, s51
; GFX6-NEXT: v_mov_b32_e32 v17, s50
; GFX6-NEXT: v_mov_b32_e32 v18, s49
; GFX6-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:240
; GFX6-NEXT: s_waitcnt expcnt(0)
; GFX6-NEXT: v_mov_b32_e32 v0, s48
; GFX6-NEXT: v_mov_b32_e32 v19, s68
; GFX6-NEXT: v_mov_b32_e32 v1, s47
; GFX6-NEXT: v_mov_b32_e32 v2, s46
; GFX6-NEXT: v_mov_b32_e32 v3, s45
; GFX6-NEXT: buffer_store_dwordx4 v[4:7], off, s[0:3], 0 offset:224
; GFX6-NEXT: buffer_store_dwordx4 v[8:11], off, s[0:3], 0 offset:208
; GFX6-NEXT: buffer_store_dwordx4 v[12:15], off, s[0:3], 0 offset:192
; GFX6-NEXT: buffer_store_dwordx4 v[16:19], off, s[0:3], 0 offset:176
; GFX6-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:160
; GFX6-NEXT: s_waitcnt expcnt(0)
; GFX6-NEXT: v_mov_b32_e32 v0, s44
; GFX6-NEXT: v_mov_b32_e32 v1, s43
; GFX6-NEXT: v_mov_b32_e32 v2, s42
; GFX6-NEXT: v_mov_b32_e32 v3, s41
; GFX6-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:144
; GFX6-NEXT: s_waitcnt expcnt(0)
; GFX6-NEXT: v_mov_b32_e32 v0, s40
; GFX6-NEXT: v_mov_b32_e32 v1, s39
; GFX6-NEXT: v_mov_b32_e32 v2, s38
; GFX6-NEXT: v_mov_b32_e32 v3, s37
; GFX6-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:128
; GFX6-NEXT: s_waitcnt expcnt(0)
; GFX6-NEXT: v_mov_b32_e32 v0, s36
; GFX6-NEXT: v_mov_b32_e32 v1, s35
; GFX6-NEXT: v_mov_b32_e32 v2, s34
; GFX6-NEXT: v_mov_b32_e32 v3, s33
; GFX6-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:112
; GFX6-NEXT: s_waitcnt expcnt(0)
; GFX6-NEXT: v_mov_b32_e32 v0, s31
; GFX6-NEXT: v_mov_b32_e32 v1, s30
; GFX6-NEXT: v_mov_b32_e32 v2, s29
; GFX6-NEXT: v_mov_b32_e32 v3, s28
; GFX6-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:96
; GFX6-NEXT: s_waitcnt expcnt(0)
; GFX6-NEXT: v_mov_b32_e32 v0, s27
; GFX6-NEXT: v_mov_b32_e32 v1, s26
; GFX6-NEXT: v_mov_b32_e32 v2, s25
; GFX6-NEXT: v_mov_b32_e32 v3, s24
; GFX6-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:80
; GFX6-NEXT: s_waitcnt expcnt(0)
; GFX6-NEXT: v_mov_b32_e32 v0, s23
; GFX6-NEXT: v_mov_b32_e32 v1, s22
; GFX6-NEXT: v_mov_b32_e32 v2, s21
; GFX6-NEXT: v_mov_b32_e32 v3, s20
; GFX6-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:64
; GFX6-NEXT: s_waitcnt expcnt(0)
; GFX6-NEXT: v_mov_b32_e32 v0, s19
; GFX6-NEXT: v_mov_b32_e32 v1, s18
; GFX6-NEXT: v_mov_b32_e32 v2, s17
; GFX6-NEXT: v_mov_b32_e32 v3, s16
; GFX6-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:48
; GFX6-NEXT: s_waitcnt expcnt(0)
; GFX6-NEXT: v_mov_b32_e32 v0, s15
; GFX6-NEXT: v_mov_b32_e32 v1, s14
; GFX6-NEXT: v_mov_b32_e32 v2, s13
; GFX6-NEXT: v_mov_b32_e32 v3, s12
; GFX6-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:32
; GFX6-NEXT: s_waitcnt expcnt(0)
; GFX6-NEXT: v_mov_b32_e32 v0, s11
; GFX6-NEXT: v_mov_b32_e32 v1, s10
; GFX6-NEXT: v_mov_b32_e32 v2, s9
; GFX6-NEXT: v_mov_b32_e32 v3, s8
; GFX6-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:16
; GFX6-NEXT: s_waitcnt expcnt(0)
; GFX6-NEXT: v_mov_b32_e32 v0, s7
; GFX6-NEXT: v_mov_b32_e32 v1, s6
; GFX6-NEXT: v_mov_b32_e32 v2, s5
; GFX6-NEXT: v_mov_b32_e32 v3, s4
; GFX6-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0
; GFX6-NEXT: s_endpgm
;
; GFX8-LABEL: constant_sextload_v64i1_to_v64i32:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
; GFX8-NEXT: s_waitcnt lgkmcnt(0)
; GFX8-NEXT: s_load_dwordx2 s[26:27], s[2:3], 0x0
; GFX8-NEXT: s_waitcnt lgkmcnt(0)
; GFX8-NEXT: s_bfe_i32 s2, s26, 0x10003
; GFX8-NEXT: s_bfe_i32 s3, s26, 0x10002
; GFX8-NEXT: s_bfe_i32 s4, s26, 0x10001
; GFX8-NEXT: s_bfe_i32 s5, s26, 0x10000
; GFX8-NEXT: s_bfe_i32 s6, s26, 0x10007
; GFX8-NEXT: s_bfe_i32 s7, s26, 0x10006
; GFX8-NEXT: s_bfe_i32 s8, s26, 0x10005
; GFX8-NEXT: s_bfe_i32 s9, s26, 0x10004
; GFX8-NEXT: s_bfe_i32 s10, s26, 0x1000b
; GFX8-NEXT: s_bfe_i32 s11, s26, 0x1000a
; GFX8-NEXT: s_bfe_i32 s12, s26, 0x10009
; GFX8-NEXT: s_bfe_i32 s13, s26, 0x10008
; GFX8-NEXT: s_bfe_i32 s14, s26, 0x1000f
; GFX8-NEXT: s_bfe_i32 s15, s26, 0x1000e
; GFX8-NEXT: s_bfe_i32 s16, s26, 0x1000d
; GFX8-NEXT: s_bfe_i32 s17, s26, 0x1000c
; GFX8-NEXT: s_bfe_i32 s18, s26, 0x10013
; GFX8-NEXT: s_bfe_i32 s19, s26, 0x10012
; GFX8-NEXT: s_bfe_i32 s20, s26, 0x10011
; GFX8-NEXT: s_bfe_i32 s21, s26, 0x10010
; GFX8-NEXT: s_bfe_i32 s22, s26, 0x10017
; GFX8-NEXT: s_bfe_i32 s23, s26, 0x10016
; GFX8-NEXT: s_bfe_i32 s24, s26, 0x10015
; GFX8-NEXT: s_bfe_i32 s25, s26, 0x10014
; GFX8-NEXT: s_bfe_i32 s28, s26, 0x1001b
; GFX8-NEXT: s_bfe_i32 s29, s26, 0x1001a
; GFX8-NEXT: s_bfe_i32 s30, s26, 0x10019
; GFX8-NEXT: s_bfe_i32 s31, s26, 0x10018
; GFX8-NEXT: s_ashr_i32 s33, s26, 31
; GFX8-NEXT: s_bfe_i32 s34, s26, 0x1001e
; GFX8-NEXT: s_bfe_i32 s35, s26, 0x1001d
; GFX8-NEXT: s_bfe_i32 s36, s26, 0x1001c
; GFX8-NEXT: s_bfe_i32 s37, s27, 0x10003
; GFX8-NEXT: s_bfe_i32 s38, s27, 0x10002
; GFX8-NEXT: s_bfe_i32 s39, s27, 0x10001
; GFX8-NEXT: s_bfe_i32 s40, s27, 0x10000
; GFX8-NEXT: s_bfe_i32 s41, s27, 0x10007
; GFX8-NEXT: s_bfe_i32 s42, s27, 0x10006
; GFX8-NEXT: s_bfe_i32 s43, s27, 0x10005
; GFX8-NEXT: s_bfe_i32 s44, s27, 0x10004
; GFX8-NEXT: s_bfe_i32 s45, s27, 0x1000b
; GFX8-NEXT: s_bfe_i32 s46, s27, 0x1000a
; GFX8-NEXT: s_bfe_i32 s47, s27, 0x10009
; GFX8-NEXT: s_bfe_i32 s48, s27, 0x10008
; GFX8-NEXT: s_bfe_i32 s49, s27, 0x1000f
; GFX8-NEXT: s_bfe_i32 s50, s27, 0x1000e
; GFX8-NEXT: s_bfe_i32 s51, s27, 0x1000d
; GFX8-NEXT: s_bfe_i32 s52, s27, 0x1000c
; GFX8-NEXT: s_bfe_i32 s53, s27, 0x10013
; GFX8-NEXT: s_bfe_i32 s54, s27, 0x10012
; GFX8-NEXT: s_bfe_i32 s55, s27, 0x10011
; GFX8-NEXT: s_bfe_i32 s56, s27, 0x10010
; GFX8-NEXT: s_bfe_i32 s57, s27, 0x10017
; GFX8-NEXT: s_bfe_i32 s58, s27, 0x10016
; GFX8-NEXT: s_bfe_i32 s59, s27, 0x10015
; GFX8-NEXT: s_bfe_i32 s60, s27, 0x10014
; GFX8-NEXT: s_bfe_i32 s61, s27, 0x1001b
; GFX8-NEXT: s_bfe_i32 s62, s27, 0x1001a
; GFX8-NEXT: s_bfe_i32 s63, s27, 0x10019
; GFX8-NEXT: s_bfe_i32 s64, s27, 0x10018
; GFX8-NEXT: s_ashr_i32 s26, s27, 31
; GFX8-NEXT: s_bfe_i32 s65, s27, 0x1001e
; GFX8-NEXT: s_bfe_i32 s66, s27, 0x1001d
; GFX8-NEXT: s_bfe_i32 s27, s27, 0x1001c
; GFX8-NEXT: v_mov_b32_e32 v3, s26
; GFX8-NEXT: s_add_u32 s26, s0, 0xf0
; GFX8-NEXT: v_mov_b32_e32 v0, s27
; GFX8-NEXT: s_addc_u32 s27, s1, 0
; GFX8-NEXT: v_mov_b32_e32 v4, s26
; GFX8-NEXT: v_mov_b32_e32 v1, s66
; GFX8-NEXT: v_mov_b32_e32 v2, s65
; GFX8-NEXT: v_mov_b32_e32 v5, s27
; GFX8-NEXT: s_add_u32 s26, s0, 0xe0
; GFX8-NEXT: flat_store_dwordx4 v[4:5], v[0:3]
; GFX8-NEXT: s_addc_u32 s27, s1, 0
; GFX8-NEXT: v_mov_b32_e32 v4, s26
; GFX8-NEXT: v_mov_b32_e32 v0, s64
; GFX8-NEXT: v_mov_b32_e32 v1, s63
; GFX8-NEXT: v_mov_b32_e32 v2, s62
; GFX8-NEXT: v_mov_b32_e32 v3, s61
; GFX8-NEXT: v_mov_b32_e32 v5, s27
; GFX8-NEXT: s_add_u32 s26, s0, 0xd0
; GFX8-NEXT: flat_store_dwordx4 v[4:5], v[0:3]
; GFX8-NEXT: s_addc_u32 s27, s1, 0
; GFX8-NEXT: v_mov_b32_e32 v4, s26
; GFX8-NEXT: v_mov_b32_e32 v0, s60
; GFX8-NEXT: v_mov_b32_e32 v1, s59
; GFX8-NEXT: v_mov_b32_e32 v2, s58
; GFX8-NEXT: v_mov_b32_e32 v3, s57
; GFX8-NEXT: v_mov_b32_e32 v5, s27
; GFX8-NEXT: s_add_u32 s26, s0, 0xc0
; GFX8-NEXT: flat_store_dwordx4 v[4:5], v[0:3]
; GFX8-NEXT: s_addc_u32 s27, s1, 0
; GFX8-NEXT: v_mov_b32_e32 v4, s26
; GFX8-NEXT: v_mov_b32_e32 v0, s56
; GFX8-NEXT: v_mov_b32_e32 v1, s55
; GFX8-NEXT: v_mov_b32_e32 v2, s54
; GFX8-NEXT: v_mov_b32_e32 v3, s53
; GFX8-NEXT: v_mov_b32_e32 v5, s27
; GFX8-NEXT: s_add_u32 s26, s0, 0xb0
; GFX8-NEXT: flat_store_dwordx4 v[4:5], v[0:3]
; GFX8-NEXT: s_addc_u32 s27, s1, 0
; GFX8-NEXT: v_mov_b32_e32 v4, s26
; GFX8-NEXT: v_mov_b32_e32 v0, s52
; GFX8-NEXT: v_mov_b32_e32 v1, s51
; GFX8-NEXT: v_mov_b32_e32 v2, s50
; GFX8-NEXT: v_mov_b32_e32 v3, s49
; GFX8-NEXT: v_mov_b32_e32 v5, s27
; GFX8-NEXT: s_add_u32 s26, s0, 0xa0
; GFX8-NEXT: flat_store_dwordx4 v[4:5], v[0:3]
; GFX8-NEXT: s_addc_u32 s27, s1, 0
; GFX8-NEXT: v_mov_b32_e32 v4, s26
; GFX8-NEXT: v_mov_b32_e32 v0, s48
; GFX8-NEXT: v_mov_b32_e32 v1, s47
; GFX8-NEXT: v_mov_b32_e32 v2, s46
; GFX8-NEXT: v_mov_b32_e32 v3, s45
; GFX8-NEXT: v_mov_b32_e32 v5, s27
; GFX8-NEXT: s_add_u32 s26, s0, 0x90
; GFX8-NEXT: flat_store_dwordx4 v[4:5], v[0:3]
; GFX8-NEXT: s_addc_u32 s27, s1, 0
; GFX8-NEXT: v_mov_b32_e32 v4, s26
; GFX8-NEXT: v_mov_b32_e32 v0, s44
; GFX8-NEXT: v_mov_b32_e32 v1, s43
; GFX8-NEXT: v_mov_b32_e32 v2, s42
; GFX8-NEXT: v_mov_b32_e32 v3, s41
; GFX8-NEXT: v_mov_b32_e32 v5, s27
; GFX8-NEXT: s_add_u32 s26, s0, 0x80
; GFX8-NEXT: flat_store_dwordx4 v[4:5], v[0:3]
; GFX8-NEXT: s_addc_u32 s27, s1, 0
; GFX8-NEXT: v_mov_b32_e32 v4, s26
; GFX8-NEXT: v_mov_b32_e32 v0, s40
; GFX8-NEXT: v_mov_b32_e32 v1, s39
; GFX8-NEXT: v_mov_b32_e32 v2, s38
; GFX8-NEXT: v_mov_b32_e32 v3, s37
; GFX8-NEXT: v_mov_b32_e32 v5, s27
; GFX8-NEXT: s_add_u32 s26, s0, 0x70
; GFX8-NEXT: flat_store_dwordx4 v[4:5], v[0:3]
; GFX8-NEXT: s_addc_u32 s27, s1, 0
; GFX8-NEXT: v_mov_b32_e32 v4, s26
; GFX8-NEXT: v_mov_b32_e32 v0, s36
; GFX8-NEXT: v_mov_b32_e32 v1, s35
; GFX8-NEXT: v_mov_b32_e32 v2, s34
; GFX8-NEXT: v_mov_b32_e32 v3, s33
; GFX8-NEXT: v_mov_b32_e32 v5, s27
; GFX8-NEXT: s_add_u32 s26, s0, 0x60
; GFX8-NEXT: flat_store_dwordx4 v[4:5], v[0:3]
; GFX8-NEXT: s_addc_u32 s27, s1, 0
; GFX8-NEXT: v_mov_b32_e32 v4, s26
; GFX8-NEXT: v_mov_b32_e32 v0, s31
; GFX8-NEXT: v_mov_b32_e32 v1, s30
; GFX8-NEXT: v_mov_b32_e32 v2, s29
; GFX8-NEXT: v_mov_b32_e32 v3, s28
; GFX8-NEXT: v_mov_b32_e32 v5, s27
; GFX8-NEXT: flat_store_dwordx4 v[4:5], v[0:3]
; GFX8-NEXT: s_nop 0
; GFX8-NEXT: v_mov_b32_e32 v3, s22
; GFX8-NEXT: s_add_u32 s22, s0, 0x50
; GFX8-NEXT: v_mov_b32_e32 v2, s23
; GFX8-NEXT: s_addc_u32 s23, s1, 0
; GFX8-NEXT: v_mov_b32_e32 v4, s22
; GFX8-NEXT: v_mov_b32_e32 v0, s25
; GFX8-NEXT: v_mov_b32_e32 v1, s24
; GFX8-NEXT: v_mov_b32_e32 v5, s23
; GFX8-NEXT: flat_store_dwordx4 v[4:5], v[0:3]
; GFX8-NEXT: s_nop 0
; GFX8-NEXT: v_mov_b32_e32 v3, s18
; GFX8-NEXT: s_add_u32 s18, s0, 64
; GFX8-NEXT: v_mov_b32_e32 v2, s19
; GFX8-NEXT: s_addc_u32 s19, s1, 0
; GFX8-NEXT: v_mov_b32_e32 v4, s18
; GFX8-NEXT: v_mov_b32_e32 v0, s21
; GFX8-NEXT: v_mov_b32_e32 v1, s20
; GFX8-NEXT: v_mov_b32_e32 v5, s19
; GFX8-NEXT: flat_store_dwordx4 v[4:5], v[0:3]
; GFX8-NEXT: s_nop 0
; GFX8-NEXT: v_mov_b32_e32 v3, s14
; GFX8-NEXT: s_add_u32 s14, s0, 48
; GFX8-NEXT: v_mov_b32_e32 v2, s15
; GFX8-NEXT: s_addc_u32 s15, s1, 0
; GFX8-NEXT: v_mov_b32_e32 v4, s14
; GFX8-NEXT: v_mov_b32_e32 v0, s17
; GFX8-NEXT: v_mov_b32_e32 v1, s16
; GFX8-NEXT: v_mov_b32_e32 v5, s15
; GFX8-NEXT: flat_store_dwordx4 v[4:5], v[0:3]
; GFX8-NEXT: s_nop 0
; GFX8-NEXT: v_mov_b32_e32 v3, s10
; GFX8-NEXT: s_add_u32 s10, s0, 32
; GFX8-NEXT: v_mov_b32_e32 v2, s11
; GFX8-NEXT: s_addc_u32 s11, s1, 0
; GFX8-NEXT: v_mov_b32_e32 v4, s10
; GFX8-NEXT: v_mov_b32_e32 v0, s13
; GFX8-NEXT: v_mov_b32_e32 v1, s12
; GFX8-NEXT: v_mov_b32_e32 v5, s11
; GFX8-NEXT: flat_store_dwordx4 v[4:5], v[0:3]
; GFX8-NEXT: s_nop 0
; GFX8-NEXT: v_mov_b32_e32 v3, s6
; GFX8-NEXT: s_add_u32 s6, s0, 16
; GFX8-NEXT: v_mov_b32_e32 v2, s7
; GFX8-NEXT: s_addc_u32 s7, s1, 0
; GFX8-NEXT: v_mov_b32_e32 v4, s6
; GFX8-NEXT: v_mov_b32_e32 v0, s9
; GFX8-NEXT: v_mov_b32_e32 v1, s8
; GFX8-NEXT: v_mov_b32_e32 v5, s7
; GFX8-NEXT: flat_store_dwordx4 v[4:5], v[0:3]
; GFX8-NEXT: v_mov_b32_e32 v5, s1
; GFX8-NEXT: v_mov_b32_e32 v0, s5
; GFX8-NEXT: v_mov_b32_e32 v1, s4
; GFX8-NEXT: v_mov_b32_e32 v2, s3
; GFX8-NEXT: v_mov_b32_e32 v3, s2
; GFX8-NEXT: v_mov_b32_e32 v4, s0
; GFX8-NEXT: flat_store_dwordx4 v[4:5], v[0:3]
; GFX8-NEXT: s_endpgm
;
; EG-LABEL: constant_sextload_v64i1_to_v64i32:
; EG: ; %bb.0:
; EG-NEXT: ALU 0, @24, KC0[CB0:0-32], KC1[]
; EG-NEXT: TEX 0 @22
; EG-NEXT: ALU 99, @25, KC0[CB0:0-32], KC1[]
; EG-NEXT: ALU 98, @125, KC0[CB0:0-32], KC1[]
; EG-NEXT: ALU 13, @224, KC0[CB0:0-32], KC1[]
; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T48.XYZW, T50.X, 0
; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T45.XYZW, T49.X, 0
; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T43.XYZW, T46.X, 0
; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T41.XYZW, T44.X, 0
; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T39.XYZW, T42.X, 0
; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T37.XYZW, T40.X, 0
; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T34.XYZW, T38.X, 0
; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T19.XYZW, T36.X, 0
; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T32.XYZW, T35.X, 0
; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T30.XYZW, T33.X, 0
; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T28.XYZW, T31.X, 0
; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T26.XYZW, T29.X, 0
; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T24.XYZW, T27.X, 0
; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T22.XYZW, T25.X, 0
; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T20.XYZW, T23.X, 0
; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T47.XYZW, T21.X, 1
; EG-NEXT: CF_END
; EG-NEXT: Fetch clause starting at 22:
; EG-NEXT: VTX_READ_64 T19.XY, T19.X, 0, #1
; EG-NEXT: ALU clause starting at 24:
; EG-NEXT: MOV * T19.X, KC0[2].Z,
; EG-NEXT: ALU clause starting at 25:
; EG-NEXT: LSHR * T0.W, T19.X, literal.x,
; EG-NEXT: 7(9.809089e-45), 0(0.000000e+00)
; EG-NEXT: BFE_INT T20.W, PV.W, 0.0, 1,
; EG-NEXT: LSHR * T0.W, T19.X, literal.x,
; EG-NEXT: 6(8.407791e-45), 0(0.000000e+00)
; EG-NEXT: BFE_INT T20.Z, PS, 0.0, 1,
; EG-NEXT: LSHR T0.W, T19.X, literal.x,
; EG-NEXT: LSHR * T1.W, T19.X, literal.y,
; EG-NEXT: 11(1.541428e-44), 5(7.006492e-45)
; EG-NEXT: LSHR T21.X, KC0[2].Y, literal.x,
; EG-NEXT: BFE_INT T20.Y, PS, 0.0, 1,
; EG-NEXT: LSHR T0.Z, T19.X, literal.y,
; EG-NEXT: BFE_INT T22.W, PV.W, 0.0, 1,
; EG-NEXT: LSHR * T0.W, T19.X, literal.z,
; EG-NEXT: 2(2.802597e-45), 10(1.401298e-44)
; EG-NEXT: 4(5.605194e-45), 0(0.000000e+00)
; EG-NEXT: BFE_INT T20.X, PS, 0.0, 1,
; EG-NEXT: LSHR T0.Y, T19.X, literal.x,
; EG-NEXT: BFE_INT T22.Z, PV.Z, 0.0, 1,
; EG-NEXT: LSHR T0.W, T19.X, literal.y,
; EG-NEXT: ADD_INT * T1.W, KC0[2].Y, literal.z,
; EG-NEXT: 15(2.101948e-44), 9(1.261169e-44)
; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00)
; EG-NEXT: LSHR T23.X, PS, literal.x,
; EG-NEXT: BFE_INT T22.Y, PV.W, 0.0, 1,
; EG-NEXT: LSHR T0.Z, T19.X, literal.y,
; EG-NEXT: BFE_INT T24.W, PV.Y, 0.0, 1,
; EG-NEXT: LSHR * T0.W, T19.X, literal.z,
; EG-NEXT: 2(2.802597e-45), 14(1.961818e-44)
; EG-NEXT: 8(1.121039e-44), 0(0.000000e+00)
; EG-NEXT: BFE_INT T22.X, PS, 0.0, 1,
; EG-NEXT: LSHR T0.Y, T19.X, literal.x,
; EG-NEXT: BFE_INT T24.Z, PV.Z, 0.0, 1,
; EG-NEXT: LSHR T0.W, T19.X, literal.y,
; EG-NEXT: ADD_INT * T1.W, KC0[2].Y, literal.z,
; EG-NEXT: 19(2.662467e-44), 13(1.821688e-44)
; EG-NEXT: 32(4.484155e-44), 0(0.000000e+00)
; EG-NEXT: LSHR T25.X, PS, literal.x,
; EG-NEXT: BFE_INT T24.Y, PV.W, 0.0, 1,
; EG-NEXT: LSHR T0.Z, T19.X, literal.y,
; EG-NEXT: BFE_INT T26.W, PV.Y, 0.0, 1,
; EG-NEXT: LSHR * T0.W, T19.X, literal.z,
; EG-NEXT: 2(2.802597e-45), 18(2.522337e-44)
; EG-NEXT: 12(1.681558e-44), 0(0.000000e+00)
; EG-NEXT: BFE_INT T24.X, PS, 0.0, 1,
; EG-NEXT: LSHR T0.Y, T19.X, literal.x,
; EG-NEXT: BFE_INT T26.Z, PV.Z, 0.0, 1,
; EG-NEXT: LSHR T0.W, T19.X, literal.y,
; EG-NEXT: ADD_INT * T1.W, KC0[2].Y, literal.z,
; EG-NEXT: 23(3.222986e-44), 17(2.382207e-44)
; EG-NEXT: 48(6.726233e-44), 0(0.000000e+00)
; EG-NEXT: LSHR T27.X, PS, literal.x,
; EG-NEXT: BFE_INT T26.Y, PV.W, 0.0, 1,
; EG-NEXT: LSHR T0.Z, T19.X, literal.y,
; EG-NEXT: BFE_INT T28.W, PV.Y, 0.0, 1,
; EG-NEXT: LSHR * T0.W, T19.X, literal.z,
; EG-NEXT: 2(2.802597e-45), 22(3.082857e-44)
; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00)
; EG-NEXT: BFE_INT T26.X, PS, 0.0, 1,
; EG-NEXT: LSHR T0.Y, T19.X, literal.x,
; EG-NEXT: BFE_INT T28.Z, PV.Z, 0.0, 1,
; EG-NEXT: LSHR T0.W, T19.X, literal.y,
; EG-NEXT: ADD_INT * T1.W, KC0[2].Y, literal.z,
; EG-NEXT: 27(3.783506e-44), 21(2.942727e-44)
; EG-NEXT: 64(8.968310e-44), 0(0.000000e+00)
; EG-NEXT: LSHR T29.X, PS, literal.x,
; EG-NEXT: BFE_INT T28.Y, PV.W, 0.0, 1,
; EG-NEXT: LSHR T0.Z, T19.X, literal.y,
; EG-NEXT: BFE_INT T30.W, PV.Y, 0.0, 1,
; EG-NEXT: LSHR * T0.W, T19.X, literal.z,
; EG-NEXT: 2(2.802597e-45), 26(3.643376e-44)
; EG-NEXT: 20(2.802597e-44), 0(0.000000e+00)
; EG-NEXT: BFE_INT T28.X, PS, 0.0, 1,
; EG-NEXT: BFE_INT T30.Z, PV.Z, 0.0, 1,
; EG-NEXT: LSHR T0.W, T19.X, literal.x,
; EG-NEXT: ADD_INT * T1.W, KC0[2].Y, literal.y,
; EG-NEXT: 25(3.503246e-44), 80(1.121039e-43)
; EG-NEXT: LSHR T31.X, PS, literal.x,
; EG-NEXT: BFE_INT T30.Y, PV.W, 0.0, 1,
; EG-NEXT: LSHR T0.Z, T19.X, literal.y,
; EG-NEXT: LSHR T0.W, T19.X, literal.z,
; EG-NEXT: ASHR * T32.W, T19.X, literal.w,
; EG-NEXT: 2(2.802597e-45), 30(4.203895e-44)
; EG-NEXT: 24(3.363116e-44), 31(4.344025e-44)
; EG-NEXT: BFE_INT T30.X, PV.W, 0.0, 1,
; EG-NEXT: BFE_INT T32.Z, PV.Z, 0.0, 1,
; EG-NEXT: LSHR T0.W, T19.X, literal.x,
; EG-NEXT: ADD_INT * T1.W, KC0[2].Y, literal.y,
; EG-NEXT: 29(4.063766e-44), 96(1.345247e-43)
; EG-NEXT: LSHR T33.X, PS, literal.x,
; EG-NEXT: BFE_INT T32.Y, PV.W, 0.0, 1,
; EG-NEXT: LSHR T0.W, T19.Y, literal.y,
; EG-NEXT: LSHR * T1.W, T19.X, literal.z,
; EG-NEXT: 2(2.802597e-45), 7(9.809089e-45)
; EG-NEXT: 28(3.923636e-44), 0(0.000000e+00)
; EG-NEXT: BFE_INT T32.X, PS, 0.0, 1,
; EG-NEXT: LSHR T0.Z, T19.Y, literal.x,
; EG-NEXT: BFE_INT T34.W, PV.W, 0.0, 1,
; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y,
; EG-NEXT: 6(8.407791e-45), 112(1.569454e-43)
; EG-NEXT: ALU clause starting at 125:
; EG-NEXT: LSHR T35.X, T0.W, literal.x,
; EG-NEXT: LSHR T0.Y, T19.Y, literal.y,
; EG-NEXT: BFE_INT T34.Z, T0.Z, 0.0, 1,
; EG-NEXT: LSHR T0.W, T19.Y, literal.z,
; EG-NEXT: ADD_INT * T1.W, KC0[2].Y, literal.w,
; EG-NEXT: 2(2.802597e-45), 11(1.541428e-44)
; EG-NEXT: 5(7.006492e-45), 128(1.793662e-43)
; EG-NEXT: LSHR T36.X, PS, literal.x,
; EG-NEXT: BFE_INT T34.Y, PV.W, 0.0, 1,
; EG-NEXT: LSHR T0.Z, T19.Y, literal.y,
; EG-NEXT: BFE_INT T37.W, PV.Y, 0.0, 1,
; EG-NEXT: LSHR * T0.W, T19.Y, literal.z,
; EG-NEXT: 2(2.802597e-45), 10(1.401298e-44)
; EG-NEXT: 4(5.605194e-45), 0(0.000000e+00)
; EG-NEXT: BFE_INT T34.X, PS, 0.0, 1,
; EG-NEXT: LSHR T0.Y, T19.Y, literal.x,
; EG-NEXT: BFE_INT T37.Z, PV.Z, 0.0, 1,
; EG-NEXT: LSHR T0.W, T19.Y, literal.y,
; EG-NEXT: ADD_INT * T1.W, KC0[2].Y, literal.z,
; EG-NEXT: 15(2.101948e-44), 9(1.261169e-44)
; EG-NEXT: 144(2.017870e-43), 0(0.000000e+00)
; EG-NEXT: LSHR T38.X, PS, literal.x,
; EG-NEXT: BFE_INT T37.Y, PV.W, 0.0, 1,
; EG-NEXT: LSHR T0.Z, T19.Y, literal.y,
; EG-NEXT: BFE_INT T39.W, PV.Y, 0.0, 1,
; EG-NEXT: LSHR * T0.W, T19.Y, literal.z,
; EG-NEXT: 2(2.802597e-45), 14(1.961818e-44)
; EG-NEXT: 8(1.121039e-44), 0(0.000000e+00)
; EG-NEXT: BFE_INT T37.X, PS, 0.0, 1,
; EG-NEXT: LSHR T0.Y, T19.Y, literal.x,
; EG-NEXT: BFE_INT T39.Z, PV.Z, 0.0, 1,
; EG-NEXT: LSHR T0.W, T19.Y, literal.y,
; EG-NEXT: ADD_INT * T1.W, KC0[2].Y, literal.z,
; EG-NEXT: 19(2.662467e-44), 13(1.821688e-44)
; EG-NEXT: 160(2.242078e-43), 0(0.000000e+00)
; EG-NEXT: LSHR T40.X, PS, literal.x,
; EG-NEXT: BFE_INT T39.Y, PV.W, 0.0, 1,
; EG-NEXT: LSHR T0.Z, T19.Y, literal.y,
; EG-NEXT: BFE_INT T41.W, PV.Y, 0.0, 1,
; EG-NEXT: LSHR * T0.W, T19.Y, literal.z,
; EG-NEXT: 2(2.802597e-45), 18(2.522337e-44)
; EG-NEXT: 12(1.681558e-44), 0(0.000000e+00)
; EG-NEXT: BFE_INT T39.X, PS, 0.0, 1,
; EG-NEXT: LSHR T0.Y, T19.Y, literal.x,
; EG-NEXT: BFE_INT T41.Z, PV.Z, 0.0, 1,
; EG-NEXT: LSHR T0.W, T19.Y, literal.y,
; EG-NEXT: ADD_INT * T1.W, KC0[2].Y, literal.z,
; EG-NEXT: 23(3.222986e-44), 17(2.382207e-44)
; EG-NEXT: 176(2.466285e-43), 0(0.000000e+00)
; EG-NEXT: LSHR T42.X, PS, literal.x,
; EG-NEXT: BFE_INT T41.Y, PV.W, 0.0, 1,
; EG-NEXT: LSHR T0.Z, T19.Y, literal.y,
; EG-NEXT: BFE_INT T43.W, PV.Y, 0.0, 1,
; EG-NEXT: LSHR * T0.W, T19.Y, literal.z,
; EG-NEXT: 2(2.802597e-45), 22(3.082857e-44)
; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00)
; EG-NEXT: BFE_INT T41.X, PS, 0.0, 1,
; EG-NEXT: LSHR T0.Y, T19.Y, literal.x,
; EG-NEXT: BFE_INT T43.Z, PV.Z, 0.0, 1,
; EG-NEXT: LSHR T0.W, T19.Y, literal.y,
; EG-NEXT: ADD_INT * T1.W, KC0[2].Y, literal.z,
; EG-NEXT: 27(3.783506e-44), 21(2.942727e-44)
; EG-NEXT: 192(2.690493e-43), 0(0.000000e+00)
; EG-NEXT: LSHR T44.X, PS, literal.x,
; EG-NEXT: BFE_INT T43.Y, PV.W, 0.0, 1,
; EG-NEXT: LSHR T0.Z, T19.Y, literal.y,
; EG-NEXT: BFE_INT T45.W, PV.Y, 0.0, 1,
; EG-NEXT: LSHR * T0.W, T19.Y, literal.z,
; EG-NEXT: 2(2.802597e-45), 26(3.643376e-44)
; EG-NEXT: 20(2.802597e-44), 0(0.000000e+00)
; EG-NEXT: BFE_INT T43.X, PS, 0.0, 1,
; EG-NEXT: BFE_INT T45.Z, PV.Z, 0.0, 1,
; EG-NEXT: LSHR T0.W, T19.Y, literal.x,
; EG-NEXT: ADD_INT * T1.W, KC0[2].Y, literal.y,
; EG-NEXT: 25(3.503246e-44), 208(2.914701e-43)
; EG-NEXT: LSHR T46.X, PS, literal.x,
; EG-NEXT: BFE_INT T45.Y, PV.W, 0.0, 1,
; EG-NEXT: LSHR * T0.W, T19.Y, literal.y,
; EG-NEXT: 2(2.802597e-45), 24(3.363116e-44)
; EG-NEXT: BFE_INT T45.X, PV.W, 0.0, 1,
; EG-NEXT: LSHR T0.Z, T19.Y, literal.x,
; EG-NEXT: LSHR T0.W, T19.X, 1,
; EG-NEXT: LSHR * T1.W, T19.Y, literal.y,
; EG-NEXT: 2(2.802597e-45), 3(4.203895e-45)
; EG-NEXT: BFE_INT T47.X, T19.X, 0.0, 1,
; EG-NEXT: LSHR T0.Y, T19.X, literal.x,
; EG-NEXT: LSHR T1.Z, T19.X, literal.y,
; EG-NEXT: LSHR T2.W, T19.Y, literal.z,
; EG-NEXT: ASHR * T48.W, T19.Y, literal.w,
; EG-NEXT: 2(2.802597e-45), 3(4.203895e-45)
; EG-NEXT: 30(4.203895e-44), 31(4.344025e-44)
; EG-NEXT: BFE_INT T19.X, T19.Y, 0.0, 1,
; EG-NEXT: LSHR T1.Y, T19.Y, literal.x,
; EG-NEXT: BFE_INT T48.Z, PV.W, 0.0, 1,
; EG-NEXT: BFE_INT T47.W, PV.Z, 0.0, 1,
; EG-NEXT: ADD_INT * T2.W, KC0[2].Y, literal.y,
; EG-NEXT: 29(4.063766e-44), 224(3.138909e-43)
; EG-NEXT: LSHR * T49.X, PS, literal.x,
; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
; EG-NEXT: ALU clause starting at 224:
; EG-NEXT: BFE_INT T48.Y, T1.Y, 0.0, 1,
; EG-NEXT: BFE_INT T47.Z, T0.Y, 0.0, 1, BS:VEC_120/SCL_212
; EG-NEXT: BFE_INT T19.W, T1.W, 0.0, 1,
; EG-NEXT: LSHR * T1.W, T19.Y, literal.x,
; EG-NEXT: 28(3.923636e-44), 0(0.000000e+00)
; EG-NEXT: BFE_INT T48.X, PS, 0.0, 1,
; EG-NEXT: BFE_INT T47.Y, T0.W, 0.0, 1,
; EG-NEXT: BFE_INT T19.Z, T0.Z, 0.0, 1,
; EG-NEXT: LSHR T0.W, T19.Y, 1,
; EG-NEXT: ADD_INT * T1.W, KC0[2].Y, literal.x,
; EG-NEXT: 240(3.363116e-43), 0(0.000000e+00)
; EG-NEXT: LSHR T50.X, PS, literal.x,
; EG-NEXT: BFE_INT * T19.Y, PV.W, 0.0, 1,
; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
;
; GFX12-LABEL: constant_sextload_v64i1_to_v64i32:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
; GFX12-NEXT: s_wait_kmcnt 0x0
; GFX12-NEXT: s_load_b64 s[2:3], s[2:3], 0x0
; GFX12-NEXT: s_wait_kmcnt 0x0
; GFX12-NEXT: s_ashr_i32 s63, s3, 31
; GFX12-NEXT: s_bfe_i32 s64, s3, 0x1001e
; GFX12-NEXT: s_bfe_i32 s65, s3, 0x1001c
; GFX12-NEXT: s_bfe_i32 s66, s3, 0x1001d
; GFX12-NEXT: s_bfe_i32 s59, s3, 0x1001b
; GFX12-NEXT: s_bfe_i32 s60, s3, 0x1001a
; GFX12-NEXT: s_bfe_i32 s61, s3, 0x10019
; GFX12-NEXT: s_bfe_i32 s62, s3, 0x10018
; GFX12-NEXT: v_dual_mov_b32 v24, 0 :: v_dual_mov_b32 v1, s66
; GFX12-NEXT: s_bfe_i32 s55, s3, 0x10017
; GFX12-NEXT: s_bfe_i32 s56, s3, 0x10016
; GFX12-NEXT: s_bfe_i32 s57, s3, 0x10015
; GFX12-NEXT: s_bfe_i32 s58, s3, 0x10014
; GFX12-NEXT: v_dual_mov_b32 v0, s65 :: v_dual_mov_b32 v3, s63
; GFX12-NEXT: v_dual_mov_b32 v2, s64 :: v_dual_mov_b32 v5, s61
; GFX12-NEXT: s_bfe_i32 s51, s3, 0x10013
; GFX12-NEXT: s_bfe_i32 s52, s3, 0x10012
; GFX12-NEXT: s_bfe_i32 s53, s3, 0x10011
; GFX12-NEXT: s_bfe_i32 s54, s3, 0x10010
; GFX12-NEXT: v_dual_mov_b32 v4, s62 :: v_dual_mov_b32 v7, s59
; GFX12-NEXT: v_dual_mov_b32 v6, s60 :: v_dual_mov_b32 v9, s57
; GFX12-NEXT: v_dual_mov_b32 v8, s58 :: v_dual_mov_b32 v11, s55
; GFX12-NEXT: v_dual_mov_b32 v10, s56 :: v_dual_mov_b32 v13, s53
; GFX12-NEXT: s_bfe_i32 s36, s3, 0x10003
; GFX12-NEXT: s_bfe_i32 s37, s3, 0x10002
; GFX12-NEXT: s_bfe_i32 s38, s3, 0x10001
; GFX12-NEXT: s_bfe_i32 s39, s3, 0x10000
; GFX12-NEXT: s_bfe_i32 s40, s3, 0x10007
; GFX12-NEXT: s_bfe_i32 s41, s3, 0x10006
; GFX12-NEXT: s_bfe_i32 s42, s3, 0x10005
; GFX12-NEXT: s_bfe_i32 s43, s3, 0x10004
; GFX12-NEXT: s_bfe_i32 s44, s3, 0x1000b
; GFX12-NEXT: s_bfe_i32 s45, s3, 0x1000a
; GFX12-NEXT: s_bfe_i32 s46, s3, 0x10009
; GFX12-NEXT: s_bfe_i32 s47, s3, 0x10008
; GFX12-NEXT: s_bfe_i32 s48, s3, 0x1000f
; GFX12-NEXT: s_bfe_i32 s49, s3, 0x1000e
; GFX12-NEXT: s_bfe_i32 s50, s3, 0x1000d
; GFX12-NEXT: v_dual_mov_b32 v12, s54 :: v_dual_mov_b32 v15, s51
; GFX12-NEXT: v_mov_b32_e32 v14, s52
; GFX12-NEXT: s_bfe_i32 s3, s3, 0x1000c
; GFX12-NEXT: s_clause 0x3
; GFX12-NEXT: global_store_b128 v24, v[0:3], s[0:1] offset:240
; GFX12-NEXT: global_store_b128 v24, v[4:7], s[0:1] offset:224
; GFX12-NEXT: global_store_b128 v24, v[8:11], s[0:1] offset:208
; GFX12-NEXT: global_store_b128 v24, v[12:15], s[0:1] offset:192
; GFX12-NEXT: v_dual_mov_b32 v0, s3 :: v_dual_mov_b32 v3, s48
; GFX12-NEXT: v_dual_mov_b32 v1, s50 :: v_dual_mov_b32 v2, s49
; GFX12-NEXT: v_dual_mov_b32 v5, s46 :: v_dual_mov_b32 v4, s47
; GFX12-NEXT: v_dual_mov_b32 v7, s44 :: v_dual_mov_b32 v6, s45
; GFX12-NEXT: v_mov_b32_e32 v9, s42
; GFX12-NEXT: s_bfe_i32 s4, s2, 0x10003
; GFX12-NEXT: s_bfe_i32 s5, s2, 0x10002
; GFX12-NEXT: s_bfe_i32 s6, s2, 0x10001
; GFX12-NEXT: s_bfe_i32 s7, s2, 0x10000
; GFX12-NEXT: s_bfe_i32 s8, s2, 0x10007
; GFX12-NEXT: s_bfe_i32 s9, s2, 0x10006
; GFX12-NEXT: s_bfe_i32 s10, s2, 0x10005
; GFX12-NEXT: s_bfe_i32 s11, s2, 0x10004
; GFX12-NEXT: s_bfe_i32 s12, s2, 0x1000b
; GFX12-NEXT: s_bfe_i32 s13, s2, 0x1000a
; GFX12-NEXT: s_bfe_i32 s14, s2, 0x10009
; GFX12-NEXT: s_bfe_i32 s15, s2, 0x10008
; GFX12-NEXT: s_bfe_i32 s16, s2, 0x1000f
; GFX12-NEXT: s_bfe_i32 s17, s2, 0x1000e
; GFX12-NEXT: s_bfe_i32 s18, s2, 0x1000d
; GFX12-NEXT: s_bfe_i32 s19, s2, 0x1000c
; GFX12-NEXT: s_bfe_i32 s20, s2, 0x10013
; GFX12-NEXT: s_bfe_i32 s21, s2, 0x10012
; GFX12-NEXT: s_bfe_i32 s22, s2, 0x10011
; GFX12-NEXT: s_bfe_i32 s23, s2, 0x10010
; GFX12-NEXT: s_bfe_i32 s24, s2, 0x10017
; GFX12-NEXT: s_bfe_i32 s25, s2, 0x10016
; GFX12-NEXT: s_bfe_i32 s26, s2, 0x10015
; GFX12-NEXT: s_bfe_i32 s27, s2, 0x10014
; GFX12-NEXT: s_bfe_i32 s28, s2, 0x1001b
; GFX12-NEXT: s_bfe_i32 s29, s2, 0x1001a
; GFX12-NEXT: s_bfe_i32 s30, s2, 0x10019
; GFX12-NEXT: s_bfe_i32 s31, s2, 0x10018
; GFX12-NEXT: s_ashr_i32 s33, s2, 31
; GFX12-NEXT: s_bfe_i32 s34, s2, 0x1001e
; GFX12-NEXT: s_bfe_i32 s35, s2, 0x1001d
; GFX12-NEXT: s_bfe_i32 s2, s2, 0x1001c
; GFX12-NEXT: v_dual_mov_b32 v8, s43 :: v_dual_mov_b32 v11, s40
; GFX12-NEXT: v_dual_mov_b32 v10, s41 :: v_dual_mov_b32 v13, s38
; GFX12-NEXT: v_dual_mov_b32 v12, s39 :: v_dual_mov_b32 v15, s36
; GFX12-NEXT: v_dual_mov_b32 v14, s37 :: v_dual_mov_b32 v17, s35
; GFX12-NEXT: s_wait_alu 0xfffe
; GFX12-NEXT: v_dual_mov_b32 v16, s2 :: v_dual_mov_b32 v19, s33
; GFX12-NEXT: v_dual_mov_b32 v18, s34 :: v_dual_mov_b32 v21, s30
; GFX12-NEXT: v_dual_mov_b32 v20, s31 :: v_dual_mov_b32 v23, s28
; GFX12-NEXT: v_mov_b32_e32 v22, s29
; GFX12-NEXT: s_clause 0x5
; GFX12-NEXT: global_store_b128 v24, v[0:3], s[0:1] offset:176
; GFX12-NEXT: global_store_b128 v24, v[4:7], s[0:1] offset:160
; GFX12-NEXT: global_store_b128 v24, v[8:11], s[0:1] offset:144
; GFX12-NEXT: global_store_b128 v24, v[12:15], s[0:1] offset:128
; GFX12-NEXT: global_store_b128 v24, v[16:19], s[0:1] offset:112
; GFX12-NEXT: global_store_b128 v24, v[20:23], s[0:1] offset:96
; GFX12-NEXT: v_dual_mov_b32 v0, s27 :: v_dual_mov_b32 v3, s24
; GFX12-NEXT: v_dual_mov_b32 v1, s26 :: v_dual_mov_b32 v2, s25
; GFX12-NEXT: v_dual_mov_b32 v5, s22 :: v_dual_mov_b32 v4, s23
; GFX12-NEXT: v_dual_mov_b32 v7, s20 :: v_dual_mov_b32 v6, s21
; GFX12-NEXT: v_dual_mov_b32 v9, s18 :: v_dual_mov_b32 v8, s19
; GFX12-NEXT: v_dual_mov_b32 v11, s16 :: v_dual_mov_b32 v10, s17
; GFX12-NEXT: v_dual_mov_b32 v13, s14 :: v_dual_mov_b32 v12, s15
; GFX12-NEXT: v_dual_mov_b32 v15, s12 :: v_dual_mov_b32 v14, s13
; GFX12-NEXT: v_dual_mov_b32 v17, s10 :: v_dual_mov_b32 v16, s11
; GFX12-NEXT: v_dual_mov_b32 v19, s8 :: v_dual_mov_b32 v18, s9
; GFX12-NEXT: v_dual_mov_b32 v21, s6 :: v_dual_mov_b32 v20, s7
; GFX12-NEXT: v_dual_mov_b32 v23, s4 :: v_dual_mov_b32 v22, s5
; GFX12-NEXT: s_clause 0x5
; GFX12-NEXT: global_store_b128 v24, v[0:3], s[0:1] offset:80
; GFX12-NEXT: global_store_b128 v24, v[4:7], s[0:1] offset:64
; GFX12-NEXT: global_store_b128 v24, v[8:11], s[0:1] offset:48
; GFX12-NEXT: global_store_b128 v24, v[12:15], s[0:1] offset:32
; GFX12-NEXT: global_store_b128 v24, v[16:19], s[0:1] offset:16
; GFX12-NEXT: global_store_b128 v24, v[20:23], s[0:1]
; GFX12-NEXT: s_endpgm
%load = load <64 x i1>, ptr addrspace(4) %in
%ext = sext <64 x i1> %load to <64 x i32>
store <64 x i32> %ext, ptr addrspace(1) %out
ret void
}
define amdgpu_kernel void @constant_zextload_i1_to_i64(ptr addrspace(1) %out, ptr addrspace(4) nocapture %in) #0 {
; GFX6-LABEL: constant_zextload_i1_to_i64:
; GFX6: ; %bb.0:
; GFX6-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9
; GFX6-NEXT: s_mov_b32 s7, 0xf000
; GFX6-NEXT: s_mov_b32 s6, -1
; GFX6-NEXT: s_mov_b32 s10, s6
; GFX6-NEXT: s_mov_b32 s11, s7
; GFX6-NEXT: s_waitcnt lgkmcnt(0)
; GFX6-NEXT: s_mov_b32 s8, s2
; GFX6-NEXT: s_mov_b32 s9, s3
; GFX6-NEXT: buffer_load_ubyte v0, off, s[8:11], 0
; GFX6-NEXT: s_mov_b32 s4, s0
; GFX6-NEXT: s_mov_b32 s5, s1
; GFX6-NEXT: s_waitcnt vmcnt(0)
; GFX6-NEXT: v_and_b32_e32 v0, 1, v0
; GFX6-NEXT: v_mov_b32_e32 v1, 0
; GFX6-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0
; GFX6-NEXT: s_endpgm
;
; GFX8-LABEL: constant_zextload_i1_to_i64:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
; GFX8-NEXT: v_mov_b32_e32 v3, 0
; GFX8-NEXT: s_waitcnt lgkmcnt(0)
; GFX8-NEXT: v_mov_b32_e32 v0, s2
; GFX8-NEXT: v_mov_b32_e32 v1, s3
; GFX8-NEXT: flat_load_ubyte v2, v[0:1]
; GFX8-NEXT: v_mov_b32_e32 v0, s0
; GFX8-NEXT: v_mov_b32_e32 v1, s1
; GFX8-NEXT: s_waitcnt vmcnt(0)
; GFX8-NEXT: v_and_b32_e32 v2, 1, v2
; GFX8-NEXT: flat_store_dwordx2 v[0:1], v[2:3]
; GFX8-NEXT: s_endpgm
;
; EG-LABEL: constant_zextload_i1_to_i64:
; EG: ; %bb.0:
; EG-NEXT: ALU 0, @8, KC0[CB0:0-32], KC1[]
; EG-NEXT: TEX 0 @6
; EG-NEXT: ALU 2, @9, KC0[CB0:0-32], KC1[]
; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.XY, T1.X, 1
; EG-NEXT: CF_END
; EG-NEXT: PAD
; EG-NEXT: Fetch clause starting at 6:
; EG-NEXT: VTX_READ_8 T0.X, T0.X, 0, #1
; EG-NEXT: ALU clause starting at 8:
; EG-NEXT: MOV * T0.X, KC0[2].Z,
; EG-NEXT: ALU clause starting at 9:
; EG-NEXT: MOV * T0.Y, 0.0,
; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.x,
; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
;
; GFX12-LABEL: constant_zextload_i1_to_i64:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
; GFX12-NEXT: s_wait_kmcnt 0x0
; GFX12-NEXT: s_load_u8 s2, s[2:3], 0x0
; GFX12-NEXT: s_wait_kmcnt 0x0
; GFX12-NEXT: s_and_b32 s2, s2, 1
; GFX12-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
; GFX12-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_mov_b32 v0, s2
; GFX12-NEXT: global_store_b64 v1, v[0:1], s[0:1]
; GFX12-NEXT: s_endpgm
%a = load i1, ptr addrspace(4) %in
%ext = zext i1 %a to i64
store i64 %ext, ptr addrspace(1) %out
ret void
}
define amdgpu_kernel void @constant_sextload_i1_to_i64(ptr addrspace(1) %out, ptr addrspace(4) nocapture %in) #0 {
; GFX6-LABEL: constant_sextload_i1_to_i64:
; GFX6: ; %bb.0:
; GFX6-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9
; GFX6-NEXT: s_mov_b32 s7, 0xf000
; GFX6-NEXT: s_mov_b32 s6, -1
; GFX6-NEXT: s_mov_b32 s10, s6
; GFX6-NEXT: s_mov_b32 s11, s7
; GFX6-NEXT: s_waitcnt lgkmcnt(0)
; GFX6-NEXT: s_mov_b32 s8, s2
; GFX6-NEXT: s_mov_b32 s9, s3
; GFX6-NEXT: buffer_load_ubyte v0, off, s[8:11], 0
; GFX6-NEXT: s_mov_b32 s4, s0
; GFX6-NEXT: s_mov_b32 s5, s1
; GFX6-NEXT: s_waitcnt vmcnt(0)
; GFX6-NEXT: v_bfe_i32 v0, v0, 0, 1
; GFX6-NEXT: v_ashrrev_i32_e32 v1, 31, v0
; GFX6-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0
; GFX6-NEXT: s_endpgm
;
; GFX8-LABEL: constant_sextload_i1_to_i64:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
; GFX8-NEXT: s_waitcnt lgkmcnt(0)
; GFX8-NEXT: v_mov_b32_e32 v0, s2
; GFX8-NEXT: v_mov_b32_e32 v1, s3
; GFX8-NEXT: flat_load_ubyte v2, v[0:1]
; GFX8-NEXT: v_mov_b32_e32 v0, s0
; GFX8-NEXT: v_mov_b32_e32 v1, s1
; GFX8-NEXT: s_waitcnt vmcnt(0)
; GFX8-NEXT: v_bfe_i32 v2, v2, 0, 1
; GFX8-NEXT: v_ashrrev_i32_e32 v3, 31, v2
; GFX8-NEXT: flat_store_dwordx2 v[0:1], v[2:3]
; GFX8-NEXT: s_endpgm
;
; EG-LABEL: constant_sextload_i1_to_i64:
; EG: ; %bb.0:
; EG-NEXT: ALU 0, @8, KC0[CB0:0-32], KC1[]
; EG-NEXT: TEX 0 @6
; EG-NEXT: ALU 3, @9, KC0[CB0:0-32], KC1[]
; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.XY, T1.X, 1
; EG-NEXT: CF_END
; EG-NEXT: PAD
; EG-NEXT: Fetch clause starting at 6:
; EG-NEXT: VTX_READ_8 T0.X, T0.X, 0, #1
; EG-NEXT: ALU clause starting at 8:
; EG-NEXT: MOV * T0.X, KC0[2].Z,
; EG-NEXT: ALU clause starting at 9:
; EG-NEXT: BFE_INT T0.X, T0.X, 0.0, 1,
; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.x,
; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
; EG-NEXT: MOV * T0.Y, PV.X,
;
; GFX12-LABEL: constant_sextload_i1_to_i64:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
; GFX12-NEXT: s_wait_kmcnt 0x0
; GFX12-NEXT: s_load_u8 s2, s[2:3], 0x0
; GFX12-NEXT: s_wait_kmcnt 0x0
; GFX12-NEXT: s_bfe_i64 s[2:3], s[2:3], 0x10000
; GFX12-NEXT: v_mov_b32_e32 v2, 0
; GFX12-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3
; GFX12-NEXT: global_store_b64 v2, v[0:1], s[0:1]
; GFX12-NEXT: s_endpgm
%a = load i1, ptr addrspace(4) %in
%ext = sext i1 %a to i64
store i64 %ext, ptr addrspace(1) %out
ret void
}
define amdgpu_kernel void @constant_zextload_v1i1_to_v1i64(ptr addrspace(1) %out, ptr addrspace(4) nocapture %in) #0 {
; GFX6-LABEL: constant_zextload_v1i1_to_v1i64:
; GFX6: ; %bb.0:
; GFX6-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9
; GFX6-NEXT: s_mov_b32 s7, 0xf000
; GFX6-NEXT: s_mov_b32 s6, -1
; GFX6-NEXT: s_mov_b32 s10, s6
; GFX6-NEXT: s_mov_b32 s11, s7
; GFX6-NEXT: s_waitcnt lgkmcnt(0)
; GFX6-NEXT: s_mov_b32 s8, s2
; GFX6-NEXT: s_mov_b32 s9, s3
; GFX6-NEXT: buffer_load_ubyte v0, off, s[8:11], 0
; GFX6-NEXT: s_mov_b32 s4, s0
; GFX6-NEXT: s_mov_b32 s5, s1
; GFX6-NEXT: s_waitcnt vmcnt(0)
; GFX6-NEXT: v_and_b32_e32 v0, 1, v0
; GFX6-NEXT: v_mov_b32_e32 v1, 0
; GFX6-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0
; GFX6-NEXT: s_endpgm
;
; GFX8-LABEL: constant_zextload_v1i1_to_v1i64:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
; GFX8-NEXT: v_mov_b32_e32 v3, 0
; GFX8-NEXT: s_waitcnt lgkmcnt(0)
; GFX8-NEXT: v_mov_b32_e32 v0, s2
; GFX8-NEXT: v_mov_b32_e32 v1, s3
; GFX8-NEXT: flat_load_ubyte v2, v[0:1]
; GFX8-NEXT: v_mov_b32_e32 v0, s0
; GFX8-NEXT: v_mov_b32_e32 v1, s1
; GFX8-NEXT: s_waitcnt vmcnt(0)
; GFX8-NEXT: v_and_b32_e32 v2, 1, v2
; GFX8-NEXT: flat_store_dwordx2 v[0:1], v[2:3]
; GFX8-NEXT: s_endpgm
;
; EG-LABEL: constant_zextload_v1i1_to_v1i64:
; EG: ; %bb.0:
; EG-NEXT: ALU 0, @8, KC0[CB0:0-32], KC1[]
; EG-NEXT: TEX 0 @6
; EG-NEXT: ALU 2, @9, KC0[CB0:0-32], KC1[]
; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.XY, T1.X, 1
; EG-NEXT: CF_END
; EG-NEXT: PAD
; EG-NEXT: Fetch clause starting at 6:
; EG-NEXT: VTX_READ_8 T0.X, T0.X, 0, #1
; EG-NEXT: ALU clause starting at 8:
; EG-NEXT: MOV * T0.X, KC0[2].Z,
; EG-NEXT: ALU clause starting at 9:
; EG-NEXT: MOV * T0.Y, 0.0,
; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.x,
; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
;
; GFX12-LABEL: constant_zextload_v1i1_to_v1i64:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
; GFX12-NEXT: s_wait_kmcnt 0x0
; GFX12-NEXT: s_load_u8 s2, s[2:3], 0x0
; GFX12-NEXT: s_wait_kmcnt 0x0
; GFX12-NEXT: s_and_b32 s2, s2, 1
; GFX12-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
; GFX12-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_mov_b32 v0, s2
; GFX12-NEXT: global_store_b64 v1, v[0:1], s[0:1]
; GFX12-NEXT: s_endpgm
%load = load <1 x i1>, ptr addrspace(4) %in
%ext = zext <1 x i1> %load to <1 x i64>
store <1 x i64> %ext, ptr addrspace(1) %out
ret void
}
define amdgpu_kernel void @constant_sextload_v1i1_to_v1i64(ptr addrspace(1) %out, ptr addrspace(4) nocapture %in) #0 {
; GFX6-LABEL: constant_sextload_v1i1_to_v1i64:
; GFX6: ; %bb.0:
; GFX6-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9
; GFX6-NEXT: s_mov_b32 s7, 0xf000
; GFX6-NEXT: s_mov_b32 s6, -1
; GFX6-NEXT: s_mov_b32 s10, s6
; GFX6-NEXT: s_mov_b32 s11, s7
; GFX6-NEXT: s_waitcnt lgkmcnt(0)
; GFX6-NEXT: s_mov_b32 s8, s2
; GFX6-NEXT: s_mov_b32 s9, s3
; GFX6-NEXT: buffer_load_ubyte v0, off, s[8:11], 0
; GFX6-NEXT: s_mov_b32 s4, s0
; GFX6-NEXT: s_mov_b32 s5, s1
; GFX6-NEXT: s_waitcnt vmcnt(0)
; GFX6-NEXT: v_bfe_i32 v0, v0, 0, 1
; GFX6-NEXT: v_ashrrev_i32_e32 v1, 31, v0
; GFX6-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0
; GFX6-NEXT: s_endpgm
;
; GFX8-LABEL: constant_sextload_v1i1_to_v1i64:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
; GFX8-NEXT: s_waitcnt lgkmcnt(0)
; GFX8-NEXT: v_mov_b32_e32 v0, s2
; GFX8-NEXT: v_mov_b32_e32 v1, s3
; GFX8-NEXT: flat_load_ubyte v2, v[0:1]
; GFX8-NEXT: v_mov_b32_e32 v0, s0
; GFX8-NEXT: v_mov_b32_e32 v1, s1
; GFX8-NEXT: s_waitcnt vmcnt(0)
; GFX8-NEXT: v_bfe_i32 v2, v2, 0, 1
; GFX8-NEXT: v_ashrrev_i32_e32 v3, 31, v2
; GFX8-NEXT: flat_store_dwordx2 v[0:1], v[2:3]
; GFX8-NEXT: s_endpgm
;
; EG-LABEL: constant_sextload_v1i1_to_v1i64:
; EG: ; %bb.0:
; EG-NEXT: ALU 0, @8, KC0[CB0:0-32], KC1[]
; EG-NEXT: TEX 0 @6
; EG-NEXT: ALU 3, @9, KC0[CB0:0-32], KC1[]
; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.XY, T1.X, 1
; EG-NEXT: CF_END
; EG-NEXT: PAD
; EG-NEXT: Fetch clause starting at 6:
; EG-NEXT: VTX_READ_8 T0.X, T0.X, 0, #1
; EG-NEXT: ALU clause starting at 8:
; EG-NEXT: MOV * T0.X, KC0[2].Z,
; EG-NEXT: ALU clause starting at 9:
; EG-NEXT: BFE_INT T0.X, T0.X, 0.0, 1,
; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.x,
; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
; EG-NEXT: MOV * T0.Y, PV.X,
;
; GFX12-LABEL: constant_sextload_v1i1_to_v1i64:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
; GFX12-NEXT: s_wait_kmcnt 0x0
; GFX12-NEXT: s_load_u8 s2, s[2:3], 0x0
; GFX12-NEXT: s_wait_kmcnt 0x0
; GFX12-NEXT: s_bfe_i64 s[2:3], s[2:3], 0x10000
; GFX12-NEXT: v_mov_b32_e32 v2, 0
; GFX12-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3
; GFX12-NEXT: global_store_b64 v2, v[0:1], s[0:1]
; GFX12-NEXT: s_endpgm
%load = load <1 x i1>, ptr addrspace(4) %in
%ext = sext <1 x i1> %load to <1 x i64>
store <1 x i64> %ext, ptr addrspace(1) %out
ret void
}
define amdgpu_kernel void @constant_zextload_v2i1_to_v2i64(ptr addrspace(1) %out, ptr addrspace(4) nocapture %in) #0 {
; GFX6-LABEL: constant_zextload_v2i1_to_v2i64:
; GFX6: ; %bb.0:
; GFX6-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9
; GFX6-NEXT: s_mov_b32 s7, 0xf000
; GFX6-NEXT: s_mov_b32 s6, -1
; GFX6-NEXT: s_mov_b32 s10, s6
; GFX6-NEXT: s_mov_b32 s11, s7
; GFX6-NEXT: s_waitcnt lgkmcnt(0)
; GFX6-NEXT: s_mov_b32 s8, s2
; GFX6-NEXT: s_mov_b32 s9, s3
; GFX6-NEXT: buffer_load_ubyte v0, off, s[8:11], 0
; GFX6-NEXT: v_mov_b32_e32 v1, 0
; GFX6-NEXT: s_mov_b32 s4, s0
; GFX6-NEXT: s_mov_b32 s5, s1
; GFX6-NEXT: s_waitcnt vmcnt(0)
; GFX6-NEXT: v_lshrrev_b32_e32 v2, 1, v0
; GFX6-NEXT: v_and_b32_e32 v0, 1, v0
; GFX6-NEXT: v_mov_b32_e32 v3, v1
; GFX6-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0
; GFX6-NEXT: s_endpgm
;
; GFX8-LABEL: constant_zextload_v2i1_to_v2i64:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
; GFX8-NEXT: v_mov_b32_e32 v2, 1
; GFX8-NEXT: s_waitcnt lgkmcnt(0)
; GFX8-NEXT: v_mov_b32_e32 v0, s2
; GFX8-NEXT: v_mov_b32_e32 v1, s3
; GFX8-NEXT: flat_load_ubyte v0, v[0:1]
; GFX8-NEXT: v_mov_b32_e32 v1, 0
; GFX8-NEXT: v_mov_b32_e32 v4, s0
; GFX8-NEXT: v_mov_b32_e32 v5, s1
; GFX8-NEXT: v_mov_b32_e32 v3, v1
; GFX8-NEXT: s_waitcnt vmcnt(0)
; GFX8-NEXT: v_lshrrev_b32_sdwa v2, v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0
; GFX8-NEXT: v_and_b32_e32 v0, 1, v0
; GFX8-NEXT: flat_store_dwordx4 v[4:5], v[0:3]
; GFX8-NEXT: s_endpgm
;
; EG-LABEL: constant_zextload_v2i1_to_v2i64:
; EG: ; %bb.0:
; EG-NEXT: ALU 0, @8, KC0[CB0:0-32], KC1[]
; EG-NEXT: TEX 0 @6
; EG-NEXT: ALU 5, @9, KC0[CB0:0-32], KC1[]
; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.XYZW, T1.X, 1
; EG-NEXT: CF_END
; EG-NEXT: PAD
; EG-NEXT: Fetch clause starting at 6:
; EG-NEXT: VTX_READ_8 T0.X, T0.X, 0, #1
; EG-NEXT: ALU clause starting at 8:
; EG-NEXT: MOV * T0.X, KC0[2].Z,
; EG-NEXT: ALU clause starting at 9:
; EG-NEXT: BFE_UINT * T0.Z, T0.X, 1, 1,
; EG-NEXT: AND_INT T0.X, T0.X, 1,
; EG-NEXT: MOV T0.Y, 0.0,
; EG-NEXT: MOV T0.W, 0.0,
; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.x,
; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
;
; GFX12-LABEL: constant_zextload_v2i1_to_v2i64:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
; GFX12-NEXT: v_mov_b32_e32 v1, 0
; GFX12-NEXT: s_wait_kmcnt 0x0
; GFX12-NEXT: global_load_u8 v0, v1, s[2:3]
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: v_and_b32_e32 v2, 0xffff, v0
; GFX12-NEXT: v_dual_mov_b32 v3, v1 :: v_dual_and_b32 v0, 1, v0
; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
; GFX12-NEXT: v_lshrrev_b32_e32 v2, 1, v2
; GFX12-NEXT: v_and_b32_e32 v0, 0xffff, v0
; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_2)
; GFX12-NEXT: v_and_b32_e32 v2, 0xffff, v2
; GFX12-NEXT: global_store_b128 v1, v[0:3], s[0:1]
; GFX12-NEXT: s_endpgm
%load = load <2 x i1>, ptr addrspace(4) %in
%ext = zext <2 x i1> %load to <2 x i64>
store <2 x i64> %ext, ptr addrspace(1) %out
ret void
}
define amdgpu_kernel void @constant_sextload_v2i1_to_v2i64(ptr addrspace(1) %out, ptr addrspace(4) nocapture %in) #0 {
; GFX6-LABEL: constant_sextload_v2i1_to_v2i64:
; GFX6: ; %bb.0:
; GFX6-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9
; GFX6-NEXT: s_mov_b32 s7, 0xf000
; GFX6-NEXT: s_mov_b32 s6, -1
; GFX6-NEXT: s_mov_b32 s10, s6
; GFX6-NEXT: s_mov_b32 s11, s7
; GFX6-NEXT: s_waitcnt lgkmcnt(0)
; GFX6-NEXT: s_mov_b32 s8, s2
; GFX6-NEXT: s_mov_b32 s9, s3
; GFX6-NEXT: buffer_load_ubyte v0, off, s[8:11], 0
; GFX6-NEXT: s_mov_b32 s4, s0
; GFX6-NEXT: s_mov_b32 s5, s1
; GFX6-NEXT: s_waitcnt vmcnt(0)
; GFX6-NEXT: v_lshrrev_b32_e32 v2, 1, v0
; GFX6-NEXT: v_bfe_i32 v0, v0, 0, 1
; GFX6-NEXT: v_ashrrev_i32_e32 v1, 31, v0
; GFX6-NEXT: v_bfe_i32 v2, v2, 0, 1
; GFX6-NEXT: v_ashrrev_i32_e32 v3, 31, v2
; GFX6-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0
; GFX6-NEXT: s_endpgm
;
; GFX8-LABEL: constant_sextload_v2i1_to_v2i64:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
; GFX8-NEXT: s_waitcnt lgkmcnt(0)
; GFX8-NEXT: v_mov_b32_e32 v0, s2
; GFX8-NEXT: v_mov_b32_e32 v1, s3
; GFX8-NEXT: flat_load_ubyte v0, v[0:1]
; GFX8-NEXT: v_mov_b32_e32 v4, s0
; GFX8-NEXT: v_mov_b32_e32 v5, s1
; GFX8-NEXT: s_waitcnt vmcnt(0)
; GFX8-NEXT: v_lshrrev_b32_e32 v2, 1, v0
; GFX8-NEXT: v_bfe_i32 v0, v0, 0, 1
; GFX8-NEXT: v_bfe_i32 v2, v2, 0, 1
; GFX8-NEXT: v_ashrrev_i32_e32 v1, 31, v0
; GFX8-NEXT: v_ashrrev_i32_e32 v3, 31, v2
; GFX8-NEXT: flat_store_dwordx4 v[4:5], v[0:3]
; GFX8-NEXT: s_endpgm
;
; EG-LABEL: constant_sextload_v2i1_to_v2i64:
; EG: ; %bb.0:
; EG-NEXT: ALU 0, @8, KC0[CB0:0-32], KC1[]
; EG-NEXT: TEX 0 @6
; EG-NEXT: ALU 6, @9, KC0[CB0:0-32], KC1[]
; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T1.XYZW, T0.X, 1
; EG-NEXT: CF_END
; EG-NEXT: PAD
; EG-NEXT: Fetch clause starting at 6:
; EG-NEXT: VTX_READ_8 T0.X, T0.X, 0, #1
; EG-NEXT: ALU clause starting at 8:
; EG-NEXT: MOV * T0.X, KC0[2].Z,
; EG-NEXT: ALU clause starting at 9:
; EG-NEXT: BFE_INT T1.X, T0.X, 0.0, 1,
; EG-NEXT: LSHR * T0.W, T0.X, 1,
; EG-NEXT: BFE_INT * T1.Z, PV.W, 0.0, 1,
; EG-NEXT: MOV * T1.Y, T1.X,
; EG-NEXT: LSHR T0.X, KC0[2].Y, literal.x,
; EG-NEXT: MOV * T1.W, T1.Z,
; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
;
; GFX12-LABEL: constant_sextload_v2i1_to_v2i64:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
; GFX12-NEXT: v_mov_b32_e32 v4, 0
; GFX12-NEXT: s_wait_kmcnt 0x0
; GFX12-NEXT: global_load_u8 v0, v4, s[2:3]
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: v_lshrrev_b32_e32 v1, 1, v0
; GFX12-NEXT: v_bfe_i32 v0, v0, 0, 1
; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
; GFX12-NEXT: v_bfe_i32 v2, v1, 0, 1
; GFX12-NEXT: v_ashrrev_i32_e32 v1, 31, v0
; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_2)
; GFX12-NEXT: v_ashrrev_i32_e32 v3, 31, v2
; GFX12-NEXT: global_store_b128 v4, v[0:3], s[0:1]
; GFX12-NEXT: s_endpgm
%load = load <2 x i1>, ptr addrspace(4) %in
%ext = sext <2 x i1> %load to <2 x i64>
store <2 x i64> %ext, ptr addrspace(1) %out
ret void
}
define amdgpu_kernel void @constant_zextload_v3i1_to_v3i64(ptr addrspace(1) %out, ptr addrspace(4) nocapture %in) #0 {
; GFX6-LABEL: constant_zextload_v3i1_to_v3i64:
; GFX6: ; %bb.0:
; GFX6-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9
; GFX6-NEXT: s_mov_b32 s7, 0xf000
; GFX6-NEXT: s_mov_b32 s6, -1
; GFX6-NEXT: s_mov_b32 s10, s6
; GFX6-NEXT: s_mov_b32 s11, s7
; GFX6-NEXT: s_waitcnt lgkmcnt(0)
; GFX6-NEXT: s_mov_b32 s8, s2
; GFX6-NEXT: s_mov_b32 s9, s3
; GFX6-NEXT: buffer_load_ubyte v4, off, s[8:11], 0
; GFX6-NEXT: v_mov_b32_e32 v5, 0
; GFX6-NEXT: v_mov_b32_e32 v1, v5
; GFX6-NEXT: v_mov_b32_e32 v3, v5
; GFX6-NEXT: s_mov_b32 s4, s0
; GFX6-NEXT: s_mov_b32 s5, s1
; GFX6-NEXT: s_waitcnt vmcnt(0)
; GFX6-NEXT: v_and_b32_e32 v0, 1, v4
; GFX6-NEXT: v_bfe_u32 v2, v4, 1, 1
; GFX6-NEXT: v_lshrrev_b32_e32 v4, 2, v4
; GFX6-NEXT: buffer_store_dwordx2 v[4:5], off, s[4:7], 0 offset:16
; GFX6-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0
; GFX6-NEXT: s_endpgm
;
; GFX8-LABEL: constant_zextload_v3i1_to_v3i64:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
; GFX8-NEXT: v_mov_b32_e32 v10, 2
; GFX8-NEXT: v_mov_b32_e32 v5, 0
; GFX8-NEXT: v_mov_b32_e32 v3, v5
; GFX8-NEXT: s_waitcnt lgkmcnt(0)
; GFX8-NEXT: v_mov_b32_e32 v0, s2
; GFX8-NEXT: v_mov_b32_e32 v1, s3
; GFX8-NEXT: flat_load_ubyte v4, v[0:1]
; GFX8-NEXT: s_add_u32 s2, s0, 16
; GFX8-NEXT: s_addc_u32 s3, s1, 0
; GFX8-NEXT: v_mov_b32_e32 v9, s3
; GFX8-NEXT: v_mov_b32_e32 v7, s1
; GFX8-NEXT: v_mov_b32_e32 v8, s2
; GFX8-NEXT: v_mov_b32_e32 v1, v5
; GFX8-NEXT: v_mov_b32_e32 v6, s0
; GFX8-NEXT: s_waitcnt vmcnt(0)
; GFX8-NEXT: v_and_b32_e32 v0, 1, v4
; GFX8-NEXT: v_bfe_u32 v2, v4, 1, 1
; GFX8-NEXT: v_lshrrev_b32_sdwa v4, v10, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0
; GFX8-NEXT: flat_store_dwordx2 v[8:9], v[4:5]
; GFX8-NEXT: flat_store_dwordx4 v[6:7], v[0:3]
; GFX8-NEXT: s_endpgm
;
; EG-LABEL: constant_zextload_v3i1_to_v3i64:
; EG: ; %bb.0:
; EG-NEXT: ALU 0, @8, KC0[CB0:0-32], KC1[]
; EG-NEXT: TEX 0 @6
; EG-NEXT: ALU 11, @9, KC0[CB0:0-32], KC1[]
; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.XY, T3.X, 0
; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T1.XYZW, T2.X, 1
; EG-NEXT: CF_END
; EG-NEXT: Fetch clause starting at 6:
; EG-NEXT: VTX_READ_8 T0.X, T0.X, 0, #1
; EG-NEXT: ALU clause starting at 8:
; EG-NEXT: MOV * T0.X, KC0[2].Z,
; EG-NEXT: ALU clause starting at 9:
; EG-NEXT: BFE_UINT * T1.Z, T0.X, 1, 1,
; EG-NEXT: AND_INT T1.X, T0.X, 1,
; EG-NEXT: MOV T1.Y, 0.0,
; EG-NEXT: LSHR * T0.X, T0.X, literal.x,
; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
; EG-NEXT: MOV T0.Y, 0.0,
; EG-NEXT: MOV * T1.W, 0.0,
; EG-NEXT: LSHR T2.X, KC0[2].Y, literal.x,
; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y,
; EG-NEXT: 2(2.802597e-45), 16(2.242078e-44)
; EG-NEXT: LSHR * T3.X, PV.W, literal.x,
; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
;
; GFX12-LABEL: constant_zextload_v3i1_to_v3i64:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
; GFX12-NEXT: v_mov_b32_e32 v5, 0
; GFX12-NEXT: s_wait_kmcnt 0x0
; GFX12-NEXT: global_load_u8 v0, v5, s[2:3]
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: v_and_b32_e32 v1, 0xffff, v0
; GFX12-NEXT: v_bfe_u32 v2, v0, 1, 1
; GFX12-NEXT: v_and_b32_e32 v0, 1, v0
; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_1) | instid1(VALU_DEP_4)
; GFX12-NEXT: v_lshrrev_b32_e32 v4, 2, v1
; GFX12-NEXT: v_mov_b32_e32 v3, v5
; GFX12-NEXT: v_dual_mov_b32 v1, v5 :: v_dual_and_b32 v2, 0xffff, v2
; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
; GFX12-NEXT: v_and_b32_e32 v0, 0xffff, v0
; GFX12-NEXT: v_and_b32_e32 v4, 0xffff, v4
; GFX12-NEXT: s_clause 0x1
; GFX12-NEXT: global_store_b64 v5, v[4:5], s[0:1] offset:16
; GFX12-NEXT: global_store_b128 v5, v[0:3], s[0:1]
; GFX12-NEXT: s_endpgm
%load = load <3 x i1>, ptr addrspace(4) %in
%ext = zext <3 x i1> %load to <3 x i64>
store <3 x i64> %ext, ptr addrspace(1) %out
ret void
}
define amdgpu_kernel void @constant_sextload_v3i1_to_v3i64(ptr addrspace(1) %out, ptr addrspace(4) nocapture %in) #0 {
; GFX6-LABEL: constant_sextload_v3i1_to_v3i64:
; GFX6: ; %bb.0:
; GFX6-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9
; GFX6-NEXT: s_mov_b32 s7, 0xf000
; GFX6-NEXT: s_mov_b32 s6, -1
; GFX6-NEXT: s_mov_b32 s10, s6
; GFX6-NEXT: s_mov_b32 s11, s7
; GFX6-NEXT: s_waitcnt lgkmcnt(0)
; GFX6-NEXT: s_mov_b32 s8, s2
; GFX6-NEXT: s_mov_b32 s9, s3
; GFX6-NEXT: buffer_load_ubyte v0, off, s[8:11], 0
; GFX6-NEXT: s_mov_b32 s4, s0
; GFX6-NEXT: s_mov_b32 s5, s1
; GFX6-NEXT: s_waitcnt vmcnt(0)
; GFX6-NEXT: v_lshrrev_b32_e32 v3, 2, v0
; GFX6-NEXT: v_lshrrev_b32_e32 v2, 1, v0
; GFX6-NEXT: v_bfe_i32 v0, v0, 0, 1
; GFX6-NEXT: v_ashrrev_i32_e32 v1, 31, v0
; GFX6-NEXT: v_bfe_i32 v2, v2, 0, 1
; GFX6-NEXT: v_bfe_i32 v4, v3, 0, 1
; GFX6-NEXT: v_ashrrev_i32_e32 v3, 31, v2
; GFX6-NEXT: v_ashrrev_i32_e32 v5, 31, v4
; GFX6-NEXT: buffer_store_dwordx2 v[4:5], off, s[4:7], 0 offset:16
; GFX6-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0
; GFX6-NEXT: s_endpgm
;
; GFX8-LABEL: constant_sextload_v3i1_to_v3i64:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
; GFX8-NEXT: s_waitcnt lgkmcnt(0)
; GFX8-NEXT: v_mov_b32_e32 v0, s2
; GFX8-NEXT: v_mov_b32_e32 v1, s3
; GFX8-NEXT: flat_load_ubyte v0, v[0:1]
; GFX8-NEXT: s_add_u32 s2, s0, 16
; GFX8-NEXT: s_addc_u32 s3, s1, 0
; GFX8-NEXT: v_mov_b32_e32 v7, s3
; GFX8-NEXT: v_mov_b32_e32 v5, s1
; GFX8-NEXT: v_mov_b32_e32 v6, s2
; GFX8-NEXT: v_mov_b32_e32 v4, s0
; GFX8-NEXT: s_waitcnt vmcnt(0)
; GFX8-NEXT: v_lshrrev_b32_e32 v3, 2, v0
; GFX8-NEXT: v_lshrrev_b32_e32 v2, 1, v0
; GFX8-NEXT: v_bfe_i32 v8, v3, 0, 1
; GFX8-NEXT: v_bfe_i32 v0, v0, 0, 1
; GFX8-NEXT: v_bfe_i32 v2, v2, 0, 1
; GFX8-NEXT: v_ashrrev_i32_e32 v9, 31, v8
; GFX8-NEXT: v_ashrrev_i32_e32 v1, 31, v0
; GFX8-NEXT: v_ashrrev_i32_e32 v3, 31, v2
; GFX8-NEXT: flat_store_dwordx2 v[6:7], v[8:9]
; GFX8-NEXT: flat_store_dwordx4 v[4:5], v[0:3]
; GFX8-NEXT: s_endpgm
;
; EG-LABEL: constant_sextload_v3i1_to_v3i64:
; EG: ; %bb.0:
; EG-NEXT: ALU 0, @8, KC0[CB0:0-32], KC1[]
; EG-NEXT: TEX 0 @6
; EG-NEXT: ALU 14, @9, KC0[CB0:0-32], KC1[]
; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.XY, T3.X, 0
; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T1.XYZW, T2.X, 1
; EG-NEXT: CF_END
; EG-NEXT: Fetch clause starting at 6:
; EG-NEXT: VTX_READ_8 T0.X, T0.X, 0, #1
; EG-NEXT: ALU clause starting at 8:
; EG-NEXT: MOV * T0.X, KC0[2].Z,
; EG-NEXT: ALU clause starting at 9:
; EG-NEXT: BFE_INT T1.X, T0.X, 0.0, 1,
; EG-NEXT: LSHR T0.W, T0.X, 1,
; EG-NEXT: LSHR * T2.X, KC0[2].Y, literal.x,
; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
; EG-NEXT: BFE_INT T1.Z, PV.W, 0.0, 1,
; EG-NEXT: LSHR * T0.W, T0.X, literal.x,
; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
; EG-NEXT: BFE_INT T0.X, PV.W, 0.0, 1,
; EG-NEXT: MOV T1.Y, T1.X,
; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.x,
; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00)
; EG-NEXT: LSHR T3.X, PV.W, literal.x,
; EG-NEXT: MOV T0.Y, PV.X,
; EG-NEXT: MOV * T1.W, T1.Z,
; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
;
; GFX12-LABEL: constant_sextload_v3i1_to_v3i64:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
; GFX12-NEXT: v_mov_b32_e32 v6, 0
; GFX12-NEXT: s_wait_kmcnt 0x0
; GFX12-NEXT: global_load_u8 v0, v6, s[2:3]
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: v_lshrrev_b32_e32 v1, 2, v0
; GFX12-NEXT: v_lshrrev_b32_e32 v2, 1, v0
; GFX12-NEXT: v_bfe_i32 v0, v0, 0, 1
; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
; GFX12-NEXT: v_bfe_i32 v4, v1, 0, 1
; GFX12-NEXT: v_bfe_i32 v2, v2, 0, 1
; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
; GFX12-NEXT: v_ashrrev_i32_e32 v1, 31, v0
; GFX12-NEXT: v_ashrrev_i32_e32 v5, 31, v4
; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_3)
; GFX12-NEXT: v_ashrrev_i32_e32 v3, 31, v2
; GFX12-NEXT: s_clause 0x1
; GFX12-NEXT: global_store_b64 v6, v[4:5], s[0:1] offset:16
; GFX12-NEXT: global_store_b128 v6, v[0:3], s[0:1]
; GFX12-NEXT: s_endpgm
%load = load <3 x i1>, ptr addrspace(4) %in
%ext = sext <3 x i1> %load to <3 x i64>
store <3 x i64> %ext, ptr addrspace(1) %out
ret void
}
define amdgpu_kernel void @constant_zextload_v4i1_to_v4i64(ptr addrspace(1) %out, ptr addrspace(4) nocapture %in) #0 {
; GFX6-LABEL: constant_zextload_v4i1_to_v4i64:
; GFX6: ; %bb.0:
; GFX6-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9
; GFX6-NEXT: s_mov_b32 s7, 0xf000
; GFX6-NEXT: s_mov_b32 s6, -1
; GFX6-NEXT: s_mov_b32 s10, s6
; GFX6-NEXT: s_mov_b32 s11, s7
; GFX6-NEXT: s_waitcnt lgkmcnt(0)
; GFX6-NEXT: s_mov_b32 s8, s2
; GFX6-NEXT: s_mov_b32 s9, s3
; GFX6-NEXT: buffer_load_ubyte v0, off, s[8:11], 0
; GFX6-NEXT: v_mov_b32_e32 v1, 0
; GFX6-NEXT: v_mov_b32_e32 v3, v1
; GFX6-NEXT: v_mov_b32_e32 v5, v1
; GFX6-NEXT: v_mov_b32_e32 v7, v1
; GFX6-NEXT: s_mov_b32 s4, s0
; GFX6-NEXT: s_mov_b32 s5, s1
; GFX6-NEXT: s_waitcnt vmcnt(0)
; GFX6-NEXT: v_and_b32_e32 v4, 1, v0
; GFX6-NEXT: v_bfe_u32 v6, v0, 1, 1
; GFX6-NEXT: v_lshrrev_b32_e32 v2, 3, v0
; GFX6-NEXT: v_bfe_u32 v0, v0, 2, 1
; GFX6-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0 offset:16
; GFX6-NEXT: buffer_store_dwordx4 v[4:7], off, s[4:7], 0
; GFX6-NEXT: s_endpgm
;
; GFX8-LABEL: constant_zextload_v4i1_to_v4i64:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
; GFX8-NEXT: v_mov_b32_e32 v2, 3
; GFX8-NEXT: s_waitcnt lgkmcnt(0)
; GFX8-NEXT: v_mov_b32_e32 v0, s2
; GFX8-NEXT: v_mov_b32_e32 v1, s3
; GFX8-NEXT: flat_load_ubyte v0, v[0:1]
; GFX8-NEXT: s_add_u32 s2, s0, 16
; GFX8-NEXT: s_addc_u32 s3, s1, 0
; GFX8-NEXT: v_mov_b32_e32 v1, 0
; GFX8-NEXT: v_mov_b32_e32 v11, s3
; GFX8-NEXT: v_mov_b32_e32 v3, v1
; GFX8-NEXT: v_mov_b32_e32 v9, s1
; GFX8-NEXT: v_mov_b32_e32 v10, s2
; GFX8-NEXT: v_mov_b32_e32 v5, v1
; GFX8-NEXT: v_mov_b32_e32 v7, v1
; GFX8-NEXT: v_mov_b32_e32 v8, s0
; GFX8-NEXT: s_waitcnt vmcnt(0)
; GFX8-NEXT: v_and_b32_e32 v4, 1, v0
; GFX8-NEXT: v_bfe_u32 v6, v0, 1, 1
; GFX8-NEXT: v_lshrrev_b32_sdwa v2, v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0
; GFX8-NEXT: v_bfe_u32 v0, v0, 2, 1
; GFX8-NEXT: flat_store_dwordx4 v[10:11], v[0:3]
; GFX8-NEXT: flat_store_dwordx4 v[8:9], v[4:7]
; GFX8-NEXT: s_endpgm
;
; EG-LABEL: constant_zextload_v4i1_to_v4i64:
; EG: ; %bb.0:
; EG-NEXT: ALU 0, @8, KC0[CB0:0-32], KC1[]
; EG-NEXT: TEX 0 @6
; EG-NEXT: ALU 14, @9, KC0[CB0:0-32], KC1[]
; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T1.XYZW, T3.X, 0
; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.XYZW, T2.X, 1
; EG-NEXT: CF_END
; EG-NEXT: Fetch clause starting at 6:
; EG-NEXT: VTX_READ_8 T0.X, T0.X, 0, #1
; EG-NEXT: ALU clause starting at 8:
; EG-NEXT: MOV * T0.X, KC0[2].Z,
; EG-NEXT: ALU clause starting at 9:
; EG-NEXT: BFE_UINT * T1.Z, T0.X, literal.x, 1,
; EG-NEXT: 3(4.203895e-45), 0(0.000000e+00)
; EG-NEXT: BFE_UINT T1.X, T0.X, literal.x, 1,
; EG-NEXT: MOV T1.Y, 0.0,
; EG-NEXT: BFE_UINT T0.Z, T0.X, 1, 1,
; EG-NEXT: AND_INT * T0.X, T0.X, 1,
; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
; EG-NEXT: MOV T0.Y, 0.0,
; EG-NEXT: MOV T1.W, 0.0,
; EG-NEXT: MOV * T0.W, 0.0,
; EG-NEXT: LSHR T2.X, KC0[2].Y, literal.x,
; EG-NEXT: ADD_INT * T2.W, KC0[2].Y, literal.y,
; EG-NEXT: 2(2.802597e-45), 16(2.242078e-44)
; EG-NEXT: LSHR * T3.X, PV.W, literal.x,
; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
;
; GFX12-LABEL: constant_zextload_v4i1_to_v4i64:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
; GFX12-NEXT: v_mov_b32_e32 v1, 0
; GFX12-NEXT: s_wait_kmcnt 0x0
; GFX12-NEXT: global_load_u8 v0, v1, s[2:3]
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: v_readfirstlane_b32 s2, v0
; GFX12-NEXT: v_and_b32_e32 v0, 0xffff, v0
; GFX12-NEXT: s_bfe_u32 s3, s2, 0x10002
; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1)
; GFX12-NEXT: v_lshrrev_b32_e32 v2, 3, v0
; GFX12-NEXT: s_and_b32 s3, 0xffff, s3
; GFX12-NEXT: v_dual_mov_b32 v3, v1 :: v_dual_mov_b32 v0, s3
; GFX12-NEXT: s_bfe_u32 s3, s2, 0x10001
; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_2)
; GFX12-NEXT: v_and_b32_e32 v2, 0xffff, v2
; GFX12-NEXT: s_and_b32 s2, s2, 1
; GFX12-NEXT: s_wait_alu 0xfffe
; GFX12-NEXT: s_and_b32 s3, 0xffff, s3
; GFX12-NEXT: s_and_b32 s2, 0xffff, s2
; GFX12-NEXT: global_store_b128 v1, v[0:3], s[0:1] offset:16
; GFX12-NEXT: s_wait_alu 0xfffe
; GFX12-NEXT: v_mov_b32_e32 v0, s2
; GFX12-NEXT: v_mov_b32_e32 v2, s3
; GFX12-NEXT: global_store_b128 v1, v[0:3], s[0:1]
; GFX12-NEXT: s_endpgm
%load = load <4 x i1>, ptr addrspace(4) %in
%ext = zext <4 x i1> %load to <4 x i64>
store <4 x i64> %ext, ptr addrspace(1) %out
ret void
}
define amdgpu_kernel void @constant_sextload_v4i1_to_v4i64(ptr addrspace(1) %out, ptr addrspace(4) nocapture %in) #0 {
; GFX6-LABEL: constant_sextload_v4i1_to_v4i64:
; GFX6: ; %bb.0:
; GFX6-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9
; GFX6-NEXT: s_mov_b32 s7, 0xf000
; GFX6-NEXT: s_mov_b32 s6, -1
; GFX6-NEXT: s_mov_b32 s10, s6
; GFX6-NEXT: s_mov_b32 s11, s7
; GFX6-NEXT: s_waitcnt lgkmcnt(0)
; GFX6-NEXT: s_mov_b32 s8, s2
; GFX6-NEXT: s_mov_b32 s9, s3
; GFX6-NEXT: buffer_load_ubyte v0, off, s[8:11], 0
; GFX6-NEXT: s_mov_b32 s4, s0
; GFX6-NEXT: s_mov_b32 s5, s1
; GFX6-NEXT: s_waitcnt vmcnt(0)
; GFX6-NEXT: v_lshrrev_b32_e32 v3, 2, v0
; GFX6-NEXT: v_lshrrev_b32_e32 v4, 3, v0
; GFX6-NEXT: v_lshrrev_b32_e32 v2, 1, v0
; GFX6-NEXT: v_bfe_i32 v0, v0, 0, 1
; GFX6-NEXT: v_ashrrev_i32_e32 v1, 31, v0
; GFX6-NEXT: v_bfe_i32 v2, v2, 0, 1
; GFX6-NEXT: v_bfe_i32 v6, v4, 0, 1
; GFX6-NEXT: v_bfe_i32 v4, v3, 0, 1
; GFX6-NEXT: v_ashrrev_i32_e32 v3, 31, v2
; GFX6-NEXT: v_ashrrev_i32_e32 v7, 31, v6
; GFX6-NEXT: v_ashrrev_i32_e32 v5, 31, v4
; GFX6-NEXT: buffer_store_dwordx4 v[4:7], off, s[4:7], 0 offset:16
; GFX6-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0
; GFX6-NEXT: s_endpgm
;
; GFX8-LABEL: constant_sextload_v4i1_to_v4i64:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
; GFX8-NEXT: s_waitcnt lgkmcnt(0)
; GFX8-NEXT: v_mov_b32_e32 v0, s2
; GFX8-NEXT: v_mov_b32_e32 v1, s3
; GFX8-NEXT: flat_load_ubyte v0, v[0:1]
; GFX8-NEXT: s_add_u32 s2, s0, 16
; GFX8-NEXT: s_addc_u32 s3, s1, 0
; GFX8-NEXT: v_mov_b32_e32 v11, s3
; GFX8-NEXT: v_mov_b32_e32 v9, s1
; GFX8-NEXT: v_mov_b32_e32 v10, s2
; GFX8-NEXT: v_mov_b32_e32 v8, s0
; GFX8-NEXT: s_waitcnt vmcnt(0)
; GFX8-NEXT: v_lshrrev_b32_e32 v3, 2, v0
; GFX8-NEXT: v_lshrrev_b32_e32 v4, 3, v0
; GFX8-NEXT: v_lshrrev_b32_e32 v2, 1, v0
; GFX8-NEXT: v_bfe_i32 v6, v4, 0, 1
; GFX8-NEXT: v_bfe_i32 v4, v3, 0, 1
; GFX8-NEXT: v_bfe_i32 v0, v0, 0, 1
; GFX8-NEXT: v_bfe_i32 v2, v2, 0, 1
; GFX8-NEXT: v_ashrrev_i32_e32 v7, 31, v6
; GFX8-NEXT: v_ashrrev_i32_e32 v5, 31, v4
; GFX8-NEXT: v_ashrrev_i32_e32 v1, 31, v0
; GFX8-NEXT: v_ashrrev_i32_e32 v3, 31, v2
; GFX8-NEXT: flat_store_dwordx4 v[10:11], v[4:7]
; GFX8-NEXT: flat_store_dwordx4 v[8:9], v[0:3]
; GFX8-NEXT: s_endpgm
;
; EG-LABEL: constant_sextload_v4i1_to_v4i64:
; EG: ; %bb.0:
; EG-NEXT: ALU 0, @8, KC0[CB0:0-32], KC1[]
; EG-NEXT: TEX 0 @6
; EG-NEXT: ALU 17, @9, KC0[CB0:0-32], KC1[]
; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T2.XYZW, T3.X, 0
; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T1.XYZW, T0.X, 1
; EG-NEXT: CF_END
; EG-NEXT: Fetch clause starting at 6:
; EG-NEXT: VTX_READ_8 T0.X, T0.X, 0, #1
; EG-NEXT: ALU clause starting at 8:
; EG-NEXT: MOV * T0.X, KC0[2].Z,
; EG-NEXT: ALU clause starting at 9:
; EG-NEXT: LSHR * T0.W, T0.X, literal.x,
; EG-NEXT: 3(4.203895e-45), 0(0.000000e+00)
; EG-NEXT: BFE_INT T1.X, T0.X, 0.0, 1,
; EG-NEXT: BFE_INT T2.Z, PV.W, 0.0, 1,
; EG-NEXT: LSHR * T0.W, T0.X, literal.x,
; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
; EG-NEXT: BFE_INT T2.X, PV.W, 0.0, 1,
; EG-NEXT: LSHR * T0.W, T0.X, 1,
; EG-NEXT: MOV T2.Y, PV.X,
; EG-NEXT: BFE_INT * T1.Z, PV.W, 0.0, 1,
; EG-NEXT: LSHR T0.X, KC0[2].Y, literal.x,
; EG-NEXT: MOV T1.Y, T1.X,
; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y,
; EG-NEXT: 2(2.802597e-45), 16(2.242078e-44)
; EG-NEXT: LSHR T3.X, PV.W, literal.x,
; EG-NEXT: MOV T1.W, T1.Z,
; EG-NEXT: MOV * T2.W, T2.Z,
; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
;
; GFX12-LABEL: constant_sextload_v4i1_to_v4i64:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
; GFX12-NEXT: v_mov_b32_e32 v8, 0
; GFX12-NEXT: s_wait_kmcnt 0x0
; GFX12-NEXT: global_load_u8 v0, v8, s[2:3]
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: v_lshrrev_b32_e32 v1, 3, v0
; GFX12-NEXT: v_lshrrev_b32_e32 v2, 2, v0
; GFX12-NEXT: v_lshrrev_b32_e32 v3, 1, v0
; GFX12-NEXT: v_bfe_i32 v0, v0, 0, 1
; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
; GFX12-NEXT: v_bfe_i32 v6, v1, 0, 1
; GFX12-NEXT: v_bfe_i32 v4, v2, 0, 1
; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
; GFX12-NEXT: v_bfe_i32 v2, v3, 0, 1
; GFX12-NEXT: v_ashrrev_i32_e32 v1, 31, v0
; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
; GFX12-NEXT: v_ashrrev_i32_e32 v7, 31, v6
; GFX12-NEXT: v_ashrrev_i32_e32 v5, 31, v4
; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_4)
; GFX12-NEXT: v_ashrrev_i32_e32 v3, 31, v2
; GFX12-NEXT: s_clause 0x1
; GFX12-NEXT: global_store_b128 v8, v[4:7], s[0:1] offset:16
; GFX12-NEXT: global_store_b128 v8, v[0:3], s[0:1]
; GFX12-NEXT: s_endpgm
%load = load <4 x i1>, ptr addrspace(4) %in
%ext = sext <4 x i1> %load to <4 x i64>
store <4 x i64> %ext, ptr addrspace(1) %out
ret void
}
define amdgpu_kernel void @constant_zextload_v8i1_to_v8i64(ptr addrspace(1) %out, ptr addrspace(4) nocapture %in) #0 {
; GFX6-LABEL: constant_zextload_v8i1_to_v8i64:
; GFX6: ; %bb.0:
; GFX6-NEXT: s_load_dwordx4 s[4:7], s[4:5], 0x9
; GFX6-NEXT: s_mov_b32 s3, 0xf000
; GFX6-NEXT: s_mov_b32 s2, -1
; GFX6-NEXT: s_mov_b32 s10, s2
; GFX6-NEXT: s_mov_b32 s11, s3
; GFX6-NEXT: s_waitcnt lgkmcnt(0)
; GFX6-NEXT: s_mov_b32 s8, s6
; GFX6-NEXT: s_mov_b32 s9, s7
; GFX6-NEXT: buffer_load_ubyte v0, off, s[8:11], 0
; GFX6-NEXT: v_mov_b32_e32 v1, 0
; GFX6-NEXT: v_mov_b32_e32 v3, v1
; GFX6-NEXT: v_mov_b32_e32 v5, v1
; GFX6-NEXT: v_mov_b32_e32 v7, v1
; GFX6-NEXT: v_mov_b32_e32 v9, v1
; GFX6-NEXT: v_mov_b32_e32 v11, v1
; GFX6-NEXT: v_mov_b32_e32 v13, v1
; GFX6-NEXT: v_mov_b32_e32 v15, v1
; GFX6-NEXT: s_mov_b32 s0, s4
; GFX6-NEXT: s_mov_b32 s1, s5
; GFX6-NEXT: s_waitcnt vmcnt(0)
; GFX6-NEXT: v_bfe_u32 v14, v0, 1, 1
; GFX6-NEXT: v_bfe_u32 v10, v0, 3, 1
; GFX6-NEXT: v_bfe_u32 v6, v0, 5, 1
; GFX6-NEXT: v_lshrrev_b32_e32 v2, 7, v0
; GFX6-NEXT: v_and_b32_e32 v12, 1, v0
; GFX6-NEXT: v_bfe_u32 v8, v0, 2, 1
; GFX6-NEXT: v_bfe_u32 v4, v0, 4, 1
; GFX6-NEXT: v_bfe_u32 v0, v0, 6, 1
; GFX6-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:48
; GFX6-NEXT: buffer_store_dwordx4 v[4:7], off, s[0:3], 0 offset:32
; GFX6-NEXT: buffer_store_dwordx4 v[8:11], off, s[0:3], 0 offset:16
; GFX6-NEXT: buffer_store_dwordx4 v[12:15], off, s[0:3], 0
; GFX6-NEXT: s_endpgm
;
; GFX8-LABEL: constant_zextload_v8i1_to_v8i64:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
; GFX8-NEXT: s_waitcnt lgkmcnt(0)
; GFX8-NEXT: v_mov_b32_e32 v0, s2
; GFX8-NEXT: v_mov_b32_e32 v1, s3
; GFX8-NEXT: flat_load_ubyte v0, v[0:1]
; GFX8-NEXT: s_add_u32 s2, s0, 48
; GFX8-NEXT: s_addc_u32 s3, s1, 0
; GFX8-NEXT: s_add_u32 s4, s0, 32
; GFX8-NEXT: s_addc_u32 s5, s1, 0
; GFX8-NEXT: v_mov_b32_e32 v1, 0
; GFX8-NEXT: v_mov_b32_e32 v16, s5
; GFX8-NEXT: v_mov_b32_e32 v5, v1
; GFX8-NEXT: v_mov_b32_e32 v7, v1
; GFX8-NEXT: v_mov_b32_e32 v15, s4
; GFX8-NEXT: v_mov_b32_e32 v8, v1
; GFX8-NEXT: v_mov_b32_e32 v10, v1
; GFX8-NEXT: v_mov_b32_e32 v3, v1
; GFX8-NEXT: v_mov_b32_e32 v12, v1
; GFX8-NEXT: v_mov_b32_e32 v14, v1
; GFX8-NEXT: s_waitcnt vmcnt(0)
; GFX8-NEXT: v_bfe_u32 v6, v0, 5, 1
; GFX8-NEXT: v_bfe_u32 v4, v0, 4, 1
; GFX8-NEXT: flat_store_dwordx4 v[15:16], v[4:7]
; GFX8-NEXT: v_mov_b32_e32 v16, s3
; GFX8-NEXT: v_mov_b32_e32 v5, s1
; GFX8-NEXT: v_mov_b32_e32 v4, s0
; GFX8-NEXT: s_add_u32 s0, s0, 16
; GFX8-NEXT: s_addc_u32 s1, s1, 0
; GFX8-NEXT: v_mov_b32_e32 v18, s1
; GFX8-NEXT: v_mov_b32_e32 v17, s0
; GFX8-NEXT: v_and_b32_e32 v6, 0xffff, v0
; GFX8-NEXT: v_bfe_u32 v9, v0, 3, 1
; GFX8-NEXT: v_bfe_u32 v7, v0, 2, 1
; GFX8-NEXT: v_mov_b32_e32 v15, s2
; GFX8-NEXT: v_bfe_u32 v13, v0, 1, 1
; GFX8-NEXT: v_and_b32_e32 v11, 1, v0
; GFX8-NEXT: v_lshrrev_b32_e32 v2, 7, v6
; GFX8-NEXT: v_bfe_u32 v0, v6, 6, 1
; GFX8-NEXT: flat_store_dwordx4 v[17:18], v[7:10]
; GFX8-NEXT: flat_store_dwordx4 v[15:16], v[0:3]
; GFX8-NEXT: flat_store_dwordx4 v[4:5], v[11:14]
; GFX8-NEXT: s_endpgm
;
; EG-LABEL: constant_zextload_v8i1_to_v8i64:
; EG: ; %bb.0:
; EG-NEXT: ALU 0, @10, KC0[CB0:0-32], KC1[]
; EG-NEXT: TEX 0 @8
; EG-NEXT: ALU 30, @11, KC0[CB0:0-32], KC1[]
; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T6.XYZW, T12.X, 0
; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T7.XYZW, T11.X, 0
; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T8.XYZW, T10.X, 0
; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T5.XYZW, T9.X, 1
; EG-NEXT: CF_END
; EG-NEXT: Fetch clause starting at 8:
; EG-NEXT: VTX_READ_8 T5.X, T5.X, 0, #1
; EG-NEXT: ALU clause starting at 10:
; EG-NEXT: MOV * T5.X, KC0[2].Z,
; EG-NEXT: ALU clause starting at 11:
; EG-NEXT: BFE_UINT * T6.Z, T5.X, literal.x, 1,
; EG-NEXT: 7(9.809089e-45), 0(0.000000e+00)
; EG-NEXT: BFE_UINT T6.X, T5.X, literal.x, 1,
; EG-NEXT: MOV T6.Y, 0.0,
; EG-NEXT: BFE_UINT * T7.Z, T5.X, literal.y, 1,
; EG-NEXT: 6(8.407791e-45), 5(7.006492e-45)
; EG-NEXT: BFE_UINT T7.X, T5.X, literal.x, 1,
; EG-NEXT: MOV T7.Y, 0.0,
; EG-NEXT: BFE_UINT * T8.Z, T5.X, literal.y, 1,
; EG-NEXT: 4(5.605194e-45), 3(4.203895e-45)
; EG-NEXT: BFE_UINT T8.X, T5.X, literal.x, 1,
; EG-NEXT: MOV T8.Y, 0.0,
; EG-NEXT: BFE_UINT T5.Z, T5.X, 1, 1,
; EG-NEXT: AND_INT * T5.X, T5.X, 1,
; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
; EG-NEXT: MOV T5.Y, 0.0,
; EG-NEXT: MOV T6.W, 0.0,
; EG-NEXT: MOV * T7.W, 0.0,
; EG-NEXT: MOV T8.W, 0.0,
; EG-NEXT: MOV * T5.W, 0.0,
; EG-NEXT: LSHR T9.X, KC0[2].Y, literal.x,
; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y,
; EG-NEXT: 2(2.802597e-45), 16(2.242078e-44)
; EG-NEXT: LSHR T10.X, PV.W, literal.x,
; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y,
; EG-NEXT: 2(2.802597e-45), 32(4.484155e-44)
; EG-NEXT: LSHR T11.X, PV.W, literal.x,
; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y,
; EG-NEXT: 2(2.802597e-45), 48(6.726233e-44)
; EG-NEXT: LSHR * T12.X, PV.W, literal.x,
; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
;
; GFX12-LABEL: constant_zextload_v8i1_to_v8i64:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
; GFX12-NEXT: v_mov_b32_e32 v1, 0
; GFX12-NEXT: s_wait_kmcnt 0x0
; GFX12-NEXT: global_load_u8 v12, v1, s[2:3]
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: v_dual_mov_b32 v3, v1 :: v_dual_and_b32 v0, 0xffff, v12
; GFX12-NEXT: v_mov_b32_e32 v5, v1
; GFX12-NEXT: v_mov_b32_e32 v7, v1
; GFX12-NEXT: v_bfe_u32 v6, v12, 5, 1
; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_4)
; GFX12-NEXT: v_lshrrev_b32_e32 v2, 7, v0
; GFX12-NEXT: v_bfe_u32 v0, v0, 6, 1
; GFX12-NEXT: v_bfe_u32 v4, v12, 4, 1
; GFX12-NEXT: v_mov_b32_e32 v9, v1
; GFX12-NEXT: v_mov_b32_e32 v11, v1
; GFX12-NEXT: v_bfe_u32 v10, v12, 3, 1
; GFX12-NEXT: v_bfe_u32 v8, v12, 2, 1
; GFX12-NEXT: v_mov_b32_e32 v13, v1
; GFX12-NEXT: v_mov_b32_e32 v15, v1
; GFX12-NEXT: v_bfe_u32 v14, v12, 1, 1
; GFX12-NEXT: v_and_b32_e32 v12, 1, v12
; GFX12-NEXT: s_clause 0x3
; GFX12-NEXT: global_store_b128 v1, v[0:3], s[0:1] offset:48
; GFX12-NEXT: global_store_b128 v1, v[4:7], s[0:1] offset:32
; GFX12-NEXT: global_store_b128 v1, v[8:11], s[0:1] offset:16
; GFX12-NEXT: global_store_b128 v1, v[12:15], s[0:1]
; GFX12-NEXT: s_endpgm
%load = load <8 x i1>, ptr addrspace(4) %in
%ext = zext <8 x i1> %load to <8 x i64>
store <8 x i64> %ext, ptr addrspace(1) %out
ret void
}
define amdgpu_kernel void @constant_sextload_v8i1_to_v8i64(ptr addrspace(1) %out, ptr addrspace(4) nocapture %in) #0 {
; GFX6-LABEL: constant_sextload_v8i1_to_v8i64:
; GFX6: ; %bb.0:
; GFX6-NEXT: s_load_dwordx4 s[4:7], s[4:5], 0x9
; GFX6-NEXT: s_mov_b32 s3, 0xf000
; GFX6-NEXT: s_mov_b32 s2, -1
; GFX6-NEXT: s_mov_b32 s10, s2
; GFX6-NEXT: s_mov_b32 s11, s3
; GFX6-NEXT: s_waitcnt lgkmcnt(0)
; GFX6-NEXT: s_mov_b32 s8, s6
; GFX6-NEXT: s_mov_b32 s9, s7
; GFX6-NEXT: buffer_load_ubyte v0, off, s[8:11], 0
; GFX6-NEXT: s_mov_b32 s0, s4
; GFX6-NEXT: s_mov_b32 s1, s5
; GFX6-NEXT: s_waitcnt vmcnt(0)
; GFX6-NEXT: v_lshrrev_b32_e32 v3, 6, v0
; GFX6-NEXT: v_lshrrev_b32_e32 v5, 7, v0
; GFX6-NEXT: v_lshrrev_b32_e32 v7, 4, v0
; GFX6-NEXT: v_lshrrev_b32_e32 v8, 5, v0
; GFX6-NEXT: v_lshrrev_b32_e32 v4, 2, v0
; GFX6-NEXT: v_lshrrev_b32_e32 v6, 3, v0
; GFX6-NEXT: v_lshrrev_b32_e32 v2, 1, v0
; GFX6-NEXT: v_bfe_i32 v0, v0, 0, 1
; GFX6-NEXT: v_ashrrev_i32_e32 v1, 31, v0
; GFX6-NEXT: v_bfe_i32 v2, v2, 0, 1
; GFX6-NEXT: v_bfe_i32 v6, v6, 0, 1
; GFX6-NEXT: v_bfe_i32 v4, v4, 0, 1
; GFX6-NEXT: v_bfe_i32 v10, v8, 0, 1
; GFX6-NEXT: v_bfe_i32 v8, v7, 0, 1
; GFX6-NEXT: v_bfe_i32 v14, v5, 0, 1
; GFX6-NEXT: v_bfe_i32 v12, v3, 0, 1
; GFX6-NEXT: v_ashrrev_i32_e32 v3, 31, v2
; GFX6-NEXT: v_ashrrev_i32_e32 v7, 31, v6
; GFX6-NEXT: v_ashrrev_i32_e32 v5, 31, v4
; GFX6-NEXT: v_ashrrev_i32_e32 v11, 31, v10
; GFX6-NEXT: v_ashrrev_i32_e32 v9, 31, v8
; GFX6-NEXT: v_ashrrev_i32_e32 v15, 31, v14
; GFX6-NEXT: v_ashrrev_i32_e32 v13, 31, v12
; GFX6-NEXT: buffer_store_dwordx4 v[12:15], off, s[0:3], 0 offset:48
; GFX6-NEXT: buffer_store_dwordx4 v[8:11], off, s[0:3], 0 offset:32
; GFX6-NEXT: buffer_store_dwordx4 v[4:7], off, s[0:3], 0 offset:16
; GFX6-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0
; GFX6-NEXT: s_endpgm
;
; GFX8-LABEL: constant_sextload_v8i1_to_v8i64:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
; GFX8-NEXT: s_waitcnt lgkmcnt(0)
; GFX8-NEXT: v_mov_b32_e32 v0, s2
; GFX8-NEXT: v_mov_b32_e32 v1, s3
; GFX8-NEXT: flat_load_ubyte v0, v[0:1]
; GFX8-NEXT: v_mov_b32_e32 v17, s1
; GFX8-NEXT: v_mov_b32_e32 v16, s0
; GFX8-NEXT: s_waitcnt vmcnt(0)
; GFX8-NEXT: v_readfirstlane_b32 s3, v0
; GFX8-NEXT: s_lshr_b32 s2, s3, 6
; GFX8-NEXT: s_lshr_b32 s4, s3, 7
; GFX8-NEXT: s_lshr_b32 s6, s3, 4
; GFX8-NEXT: s_lshr_b32 s8, s3, 5
; GFX8-NEXT: s_lshr_b32 s10, s3, 2
; GFX8-NEXT: s_lshr_b32 s12, s3, 3
; GFX8-NEXT: s_lshr_b32 s14, s3, 1
; GFX8-NEXT: v_mov_b32_e32 v0, s3
; GFX8-NEXT: s_bfe_i64 s[14:15], s[14:15], 0x10000
; GFX8-NEXT: s_bfe_i64 s[12:13], s[12:13], 0x10000
; GFX8-NEXT: s_bfe_i64 s[10:11], s[10:11], 0x10000
; GFX8-NEXT: s_bfe_i64 s[8:9], s[8:9], 0x10000
; GFX8-NEXT: s_bfe_i64 s[6:7], s[6:7], 0x10000
; GFX8-NEXT: s_bfe_i64 s[4:5], s[4:5], 0x10000
; GFX8-NEXT: s_bfe_i64 s[2:3], s[2:3], 0x10000
; GFX8-NEXT: v_mov_b32_e32 v4, s2
; GFX8-NEXT: s_add_u32 s2, s0, 48
; GFX8-NEXT: v_mov_b32_e32 v5, s3
; GFX8-NEXT: s_addc_u32 s3, s1, 0
; GFX8-NEXT: v_mov_b32_e32 v19, s3
; GFX8-NEXT: v_mov_b32_e32 v18, s2
; GFX8-NEXT: s_add_u32 s2, s0, 32
; GFX8-NEXT: v_mov_b32_e32 v6, s4
; GFX8-NEXT: v_mov_b32_e32 v7, s5
; GFX8-NEXT: s_addc_u32 s3, s1, 0
; GFX8-NEXT: flat_store_dwordx4 v[18:19], v[4:7]
; GFX8-NEXT: s_add_u32 s0, s0, 16
; GFX8-NEXT: v_mov_b32_e32 v5, s3
; GFX8-NEXT: v_mov_b32_e32 v8, s6
; GFX8-NEXT: v_mov_b32_e32 v9, s7
; GFX8-NEXT: v_mov_b32_e32 v10, s8
; GFX8-NEXT: v_mov_b32_e32 v11, s9
; GFX8-NEXT: v_mov_b32_e32 v4, s2
; GFX8-NEXT: s_addc_u32 s1, s1, 0
; GFX8-NEXT: flat_store_dwordx4 v[4:5], v[8:11]
; GFX8-NEXT: v_mov_b32_e32 v5, s1
; GFX8-NEXT: v_bfe_i32 v0, v0, 0, 1
; GFX8-NEXT: v_mov_b32_e32 v12, s10
; GFX8-NEXT: v_mov_b32_e32 v13, s11
; GFX8-NEXT: v_mov_b32_e32 v14, s12
; GFX8-NEXT: v_mov_b32_e32 v15, s13
; GFX8-NEXT: v_mov_b32_e32 v4, s0
; GFX8-NEXT: v_ashrrev_i32_e32 v1, 31, v0
; GFX8-NEXT: v_mov_b32_e32 v2, s14
; GFX8-NEXT: v_mov_b32_e32 v3, s15
; GFX8-NEXT: flat_store_dwordx4 v[4:5], v[12:15]
; GFX8-NEXT: flat_store_dwordx4 v[16:17], v[0:3]
; GFX8-NEXT: s_endpgm
;
; EG-LABEL: constant_sextload_v8i1_to_v8i64:
; EG: ; %bb.0:
; EG-NEXT: ALU 0, @10, KC0[CB0:0-32], KC1[]
; EG-NEXT: TEX 0 @8
; EG-NEXT: ALU 37, @11, KC0[CB0:0-32], KC1[]
; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T8.XYZW, T12.X, 0
; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T5.XYZW, T11.X, 0
; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T9.XYZW, T10.X, 0
; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T7.XYZW, T6.X, 1
; EG-NEXT: CF_END
; EG-NEXT: Fetch clause starting at 8:
; EG-NEXT: VTX_READ_8 T5.X, T5.X, 0, #1
; EG-NEXT: ALU clause starting at 10:
; EG-NEXT: MOV * T5.X, KC0[2].Z,
; EG-NEXT: ALU clause starting at 11:
; EG-NEXT: LSHR T6.X, KC0[2].Y, literal.x,
; EG-NEXT: LSHR * T0.W, T5.X, literal.y,
; EG-NEXT: 2(2.802597e-45), 7(9.809089e-45)
; EG-NEXT: BFE_INT T7.X, T5.X, 0.0, 1,
; EG-NEXT: BFE_INT T8.Z, PV.W, 0.0, 1,
; EG-NEXT: LSHR T0.W, T5.X, literal.x,
; EG-NEXT: LSHR * T1.W, T5.X, literal.y,
; EG-NEXT: 3(4.203895e-45), 6(8.407791e-45)
; EG-NEXT: BFE_INT T8.X, PS, 0.0, 1,
; EG-NEXT: BFE_INT T9.Z, PV.W, 0.0, 1,
; EG-NEXT: LSHR T0.W, T5.X, 1,
; EG-NEXT: LSHR * T1.W, T5.X, literal.x,
; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
; EG-NEXT: BFE_INT T9.X, PS, 0.0, 1,
; EG-NEXT: MOV T8.Y, PV.X,
; EG-NEXT: BFE_INT T7.Z, PV.W, 0.0, 1,
; EG-NEXT: LSHR T0.W, T5.X, literal.x,
; EG-NEXT: ADD_INT * T1.W, KC0[2].Y, literal.y,
; EG-NEXT: 5(7.006492e-45), 16(2.242078e-44)
; EG-NEXT: LSHR T10.X, PS, literal.x,
; EG-NEXT: MOV T9.Y, PV.X,
; EG-NEXT: BFE_INT T5.Z, PV.W, 0.0, 1,
; EG-NEXT: LSHR * T0.W, T5.X, literal.y,
; EG-NEXT: 2(2.802597e-45), 4(5.605194e-45)
; EG-NEXT: BFE_INT T5.X, PV.W, 0.0, 1,
; EG-NEXT: MOV T7.Y, T7.X,
; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.x,
; EG-NEXT: 32(4.484155e-44), 0(0.000000e+00)
; EG-NEXT: LSHR T11.X, PV.W, literal.x,
; EG-NEXT: MOV T5.Y, PV.X,
; EG-NEXT: ADD_INT T0.Z, KC0[2].Y, literal.y,
; EG-NEXT: MOV T7.W, T7.Z,
; EG-NEXT: MOV * T9.W, T9.Z,
; EG-NEXT: 2(2.802597e-45), 48(6.726233e-44)
; EG-NEXT: LSHR T12.X, PV.Z, literal.x,
; EG-NEXT: MOV T5.W, T5.Z,
; EG-NEXT: MOV * T8.W, T8.Z,
; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
;
; GFX12-LABEL: constant_sextload_v8i1_to_v8i64:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
; GFX12-NEXT: v_mov_b32_e32 v16, 0
; GFX12-NEXT: s_wait_kmcnt 0x0
; GFX12-NEXT: global_load_u8 v0, v16, s[2:3]
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: v_readfirstlane_b32 s3, v0
; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX12-NEXT: v_mov_b32_e32 v9, s3
; GFX12-NEXT: s_lshr_b32 s2, s3, 6
; GFX12-NEXT: s_lshr_b32 s4, s3, 7
; GFX12-NEXT: s_lshr_b32 s6, s3, 4
; GFX12-NEXT: s_lshr_b32 s8, s3, 5
; GFX12-NEXT: s_lshr_b32 s10, s3, 2
; GFX12-NEXT: s_lshr_b32 s12, s3, 3
; GFX12-NEXT: s_lshr_b32 s14, s3, 1
; GFX12-NEXT: s_wait_alu 0xfffe
; GFX12-NEXT: s_bfe_i64 s[2:3], s[2:3], 0x10000
; GFX12-NEXT: s_bfe_i64 s[4:5], s[4:5], 0x10000
; GFX12-NEXT: v_bfe_i32 v12, v9, 0, 1
; GFX12-NEXT: s_bfe_i64 s[8:9], s[8:9], 0x10000
; GFX12-NEXT: s_bfe_i64 s[6:7], s[6:7], 0x10000
; GFX12-NEXT: s_bfe_i64 s[12:13], s[12:13], 0x10000
; GFX12-NEXT: s_bfe_i64 s[10:11], s[10:11], 0x10000
; GFX12-NEXT: s_wait_alu 0xfffe
; GFX12-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3
; GFX12-NEXT: v_dual_mov_b32 v2, s4 :: v_dual_mov_b32 v3, s5
; GFX12-NEXT: s_bfe_i64 s[14:15], s[14:15], 0x10000
; GFX12-NEXT: v_dual_mov_b32 v4, s6 :: v_dual_mov_b32 v5, s7
; GFX12-NEXT: v_dual_mov_b32 v6, s8 :: v_dual_mov_b32 v7, s9
; GFX12-NEXT: v_dual_mov_b32 v8, s10 :: v_dual_mov_b32 v9, s11
; GFX12-NEXT: v_dual_mov_b32 v10, s12 :: v_dual_mov_b32 v11, s13
; GFX12-NEXT: v_dual_mov_b32 v14, s14 :: v_dual_mov_b32 v15, s15
; GFX12-NEXT: v_ashrrev_i32_e32 v13, 31, v12
; GFX12-NEXT: s_clause 0x3
; GFX12-NEXT: global_store_b128 v16, v[0:3], s[0:1] offset:48
; GFX12-NEXT: global_store_b128 v16, v[4:7], s[0:1] offset:32
; GFX12-NEXT: global_store_b128 v16, v[8:11], s[0:1] offset:16
; GFX12-NEXT: global_store_b128 v16, v[12:15], s[0:1]
; GFX12-NEXT: s_endpgm
%load = load <8 x i1>, ptr addrspace(4) %in
%ext = sext <8 x i1> %load to <8 x i64>
store <8 x i64> %ext, ptr addrspace(1) %out
ret void
}
define amdgpu_kernel void @constant_zextload_v16i1_to_v16i64(ptr addrspace(1) %out, ptr addrspace(4) nocapture %in) #0 {
; GFX6-LABEL: constant_zextload_v16i1_to_v16i64:
; GFX6: ; %bb.0:
; GFX6-NEXT: s_load_dwordx4 s[4:7], s[4:5], 0x9
; GFX6-NEXT: s_mov_b32 s3, 0xf000
; GFX6-NEXT: s_mov_b32 s2, -1
; GFX6-NEXT: s_mov_b32 s10, s2
; GFX6-NEXT: s_mov_b32 s11, s3
; GFX6-NEXT: s_waitcnt lgkmcnt(0)
; GFX6-NEXT: s_mov_b32 s8, s6
; GFX6-NEXT: s_mov_b32 s9, s7
; GFX6-NEXT: buffer_load_ushort v29, off, s[8:11], 0
; GFX6-NEXT: v_mov_b32_e32 v1, 0
; GFX6-NEXT: v_mov_b32_e32 v3, v1
; GFX6-NEXT: v_mov_b32_e32 v4, v1
; GFX6-NEXT: v_mov_b32_e32 v6, v1
; GFX6-NEXT: v_mov_b32_e32 v7, v1
; GFX6-NEXT: v_mov_b32_e32 v9, v1
; GFX6-NEXT: v_mov_b32_e32 v10, v1
; GFX6-NEXT: v_mov_b32_e32 v12, v1
; GFX6-NEXT: v_mov_b32_e32 v14, v1
; GFX6-NEXT: v_mov_b32_e32 v16, v1
; GFX6-NEXT: v_mov_b32_e32 v18, v1
; GFX6-NEXT: v_mov_b32_e32 v20, v1
; GFX6-NEXT: v_mov_b32_e32 v22, v1
; GFX6-NEXT: v_mov_b32_e32 v24, v1
; GFX6-NEXT: v_mov_b32_e32 v26, v1
; GFX6-NEXT: v_mov_b32_e32 v28, v1
; GFX6-NEXT: s_mov_b32 s0, s4
; GFX6-NEXT: s_mov_b32 s1, s5
; GFX6-NEXT: s_waitcnt vmcnt(0)
; GFX6-NEXT: v_bfe_u32 v2, v29, 11, 1
; GFX6-NEXT: v_bfe_u32 v0, v29, 10, 1
; GFX6-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:80
; GFX6-NEXT: v_bfe_u32 v5, v29, 9, 1
; GFX6-NEXT: s_waitcnt expcnt(0)
; GFX6-NEXT: v_bfe_u32 v3, v29, 8, 1
; GFX6-NEXT: buffer_store_dwordx4 v[3:6], off, s[0:3], 0 offset:64
; GFX6-NEXT: v_lshrrev_b32_e32 v8, 15, v29
; GFX6-NEXT: s_waitcnt expcnt(0)
; GFX6-NEXT: v_bfe_u32 v6, v29, 14, 1
; GFX6-NEXT: buffer_store_dwordx4 v[6:9], off, s[0:3], 0 offset:112
; GFX6-NEXT: v_bfe_u32 v27, v29, 5, 1
; GFX6-NEXT: v_bfe_u32 v23, v29, 7, 1
; GFX6-NEXT: v_bfe_u32 v19, v29, 1, 1
; GFX6-NEXT: v_bfe_u32 v15, v29, 3, 1
; GFX6-NEXT: v_bfe_u32 v11, v29, 13, 1
; GFX6-NEXT: v_bfe_u32 v25, v29, 4, 1
; GFX6-NEXT: v_bfe_u32 v21, v29, 6, 1
; GFX6-NEXT: v_and_b32_e32 v17, 1, v29
; GFX6-NEXT: v_bfe_u32 v13, v29, 2, 1
; GFX6-NEXT: s_waitcnt expcnt(0)
; GFX6-NEXT: v_bfe_u32 v9, v29, 12, 1
; GFX6-NEXT: buffer_store_dwordx4 v[9:12], off, s[0:3], 0 offset:96
; GFX6-NEXT: buffer_store_dwordx4 v[13:16], off, s[0:3], 0 offset:16
; GFX6-NEXT: buffer_store_dwordx4 v[17:20], off, s[0:3], 0
; GFX6-NEXT: buffer_store_dwordx4 v[21:24], off, s[0:3], 0 offset:48
; GFX6-NEXT: buffer_store_dwordx4 v[25:28], off, s[0:3], 0 offset:32
; GFX6-NEXT: s_endpgm
;
; GFX8-LABEL: constant_zextload_v16i1_to_v16i64:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
; GFX8-NEXT: s_waitcnt lgkmcnt(0)
; GFX8-NEXT: v_mov_b32_e32 v0, s2
; GFX8-NEXT: v_mov_b32_e32 v1, s3
; GFX8-NEXT: flat_load_ushort v0, v[0:1]
; GFX8-NEXT: v_mov_b32_e32 v1, 0
; GFX8-NEXT: v_mov_b32_e32 v3, v1
; GFX8-NEXT: v_mov_b32_e32 v5, v1
; GFX8-NEXT: v_mov_b32_e32 v7, v1
; GFX8-NEXT: v_mov_b32_e32 v9, v1
; GFX8-NEXT: v_mov_b32_e32 v11, v1
; GFX8-NEXT: s_waitcnt vmcnt(0)
; GFX8-NEXT: v_readfirstlane_b32 s2, v0
; GFX8-NEXT: s_bfe_u32 s3, s2, 0x10009
; GFX8-NEXT: s_bfe_u32 s4, s2, 0x1000d
; GFX8-NEXT: s_bfe_u32 s5, s2, 0x10007
; GFX8-NEXT: s_bfe_u32 s6, s2, 0x10003
; GFX8-NEXT: s_bfe_u32 s7, s2, 0x10001
; GFX8-NEXT: s_and_b32 s8, s2, 1
; GFX8-NEXT: s_bfe_u32 s9, s2, 0x10002
; GFX8-NEXT: s_bfe_u32 s10, s2, 0x10004
; GFX8-NEXT: s_bfe_u32 s11, s2, 0x10006
; GFX8-NEXT: s_bfe_u32 s12, s2, 0x1000c
; GFX8-NEXT: s_bfe_u32 s2, s2, 0x1000a
; GFX8-NEXT: v_and_b32_e32 v4, 0xffff, v0
; GFX8-NEXT: v_mov_b32_e32 v0, s2
; GFX8-NEXT: s_add_u32 s2, s0, 0x50
; GFX8-NEXT: v_mov_b32_e32 v6, s3
; GFX8-NEXT: s_addc_u32 s3, s1, 0
; GFX8-NEXT: v_mov_b32_e32 v13, s3
; GFX8-NEXT: v_mov_b32_e32 v12, s2
; GFX8-NEXT: s_add_u32 s2, s0, 64
; GFX8-NEXT: v_bfe_u32 v2, v4, 11, 1
; GFX8-NEXT: s_addc_u32 s3, s1, 0
; GFX8-NEXT: flat_store_dwordx4 v[12:13], v[0:3]
; GFX8-NEXT: v_mov_b32_e32 v13, s3
; GFX8-NEXT: v_mov_b32_e32 v12, s2
; GFX8-NEXT: s_add_u32 s2, s0, 0x70
; GFX8-NEXT: v_lshrrev_b32_e32 v10, 15, v4
; GFX8-NEXT: v_bfe_u32 v14, v4, 5, 1
; GFX8-NEXT: v_bfe_u32 v8, v4, 14, 1
; GFX8-NEXT: v_bfe_u32 v4, v4, 8, 1
; GFX8-NEXT: s_addc_u32 s3, s1, 0
; GFX8-NEXT: flat_store_dwordx4 v[12:13], v[4:7]
; GFX8-NEXT: v_mov_b32_e32 v0, s12
; GFX8-NEXT: v_mov_b32_e32 v5, s3
; GFX8-NEXT: v_mov_b32_e32 v4, s2
; GFX8-NEXT: s_add_u32 s2, s0, 0x60
; GFX8-NEXT: s_addc_u32 s3, s1, 0
; GFX8-NEXT: flat_store_dwordx4 v[4:5], v[8:11]
; GFX8-NEXT: v_mov_b32_e32 v5, s3
; GFX8-NEXT: v_mov_b32_e32 v4, s2
; GFX8-NEXT: s_add_u32 s2, s0, 48
; GFX8-NEXT: v_mov_b32_e32 v2, s4
; GFX8-NEXT: s_addc_u32 s3, s1, 0
; GFX8-NEXT: flat_store_dwordx4 v[4:5], v[0:3]
; GFX8-NEXT: v_mov_b32_e32 v5, s3
; GFX8-NEXT: v_mov_b32_e32 v4, s2
; GFX8-NEXT: s_add_u32 s2, s0, 32
; GFX8-NEXT: v_mov_b32_e32 v0, s11
; GFX8-NEXT: v_mov_b32_e32 v2, s5
; GFX8-NEXT: s_addc_u32 s3, s1, 0
; GFX8-NEXT: flat_store_dwordx4 v[4:5], v[0:3]
; GFX8-NEXT: v_mov_b32_e32 v5, s3
; GFX8-NEXT: v_mov_b32_e32 v4, s2
; GFX8-NEXT: s_add_u32 s2, s0, 16
; GFX8-NEXT: v_mov_b32_e32 v0, s10
; GFX8-NEXT: v_mov_b32_e32 v2, v14
; GFX8-NEXT: s_addc_u32 s3, s1, 0
; GFX8-NEXT: flat_store_dwordx4 v[4:5], v[0:3]
; GFX8-NEXT: v_mov_b32_e32 v5, s3
; GFX8-NEXT: v_mov_b32_e32 v0, s9
; GFX8-NEXT: v_mov_b32_e32 v2, s6
; GFX8-NEXT: v_mov_b32_e32 v4, s2
; GFX8-NEXT: flat_store_dwordx4 v[4:5], v[0:3]
; GFX8-NEXT: v_mov_b32_e32 v5, s1
; GFX8-NEXT: v_mov_b32_e32 v0, s8
; GFX8-NEXT: v_mov_b32_e32 v2, s7
; GFX8-NEXT: v_mov_b32_e32 v4, s0
; GFX8-NEXT: flat_store_dwordx4 v[4:5], v[0:3]
; GFX8-NEXT: s_endpgm
;
; EG-LABEL: constant_zextload_v16i1_to_v16i64:
; EG: ; %bb.0:
; EG-NEXT: ALU 0, @14, KC0[CB0:0-32], KC1[]
; EG-NEXT: TEX 0 @12
; EG-NEXT: ALU 62, @15, KC0[CB0:0-32], KC1[]
; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T8.XYZW, T22.X, 0
; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T9.XYZW, T21.X, 0
; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T10.XYZW, T20.X, 0
; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T11.XYZW, T19.X, 0
; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T12.XYZW, T18.X, 0
; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T13.XYZW, T17.X, 0
; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T14.XYZW, T16.X, 0
; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T7.XYZW, T15.X, 1
; EG-NEXT: CF_END
; EG-NEXT: Fetch clause starting at 12:
; EG-NEXT: VTX_READ_16 T7.X, T7.X, 0, #1
; EG-NEXT: ALU clause starting at 14:
; EG-NEXT: MOV * T7.X, KC0[2].Z,
; EG-NEXT: ALU clause starting at 15:
; EG-NEXT: LSHR * T8.Z, T7.X, literal.x,
; EG-NEXT: 15(2.101948e-44), 0(0.000000e+00)
; EG-NEXT: BFE_UINT T8.X, T7.X, literal.x, 1,
; EG-NEXT: MOV T8.Y, 0.0,
; EG-NEXT: BFE_UINT * T9.Z, T7.X, literal.y, 1,
; EG-NEXT: 14(1.961818e-44), 13(1.821688e-44)
; EG-NEXT: BFE_UINT T9.X, T7.X, literal.x, 1,
; EG-NEXT: MOV T9.Y, 0.0,
; EG-NEXT: BFE_UINT * T10.Z, T7.X, literal.y, 1,
; EG-NEXT: 12(1.681558e-44), 11(1.541428e-44)
; EG-NEXT: BFE_UINT T10.X, T7.X, literal.x, 1,
; EG-NEXT: MOV T10.Y, 0.0,
; EG-NEXT: BFE_UINT * T11.Z, T7.X, literal.y, 1,
; EG-NEXT: 10(1.401298e-44), 9(1.261169e-44)
; EG-NEXT: BFE_UINT T11.X, T7.X, literal.x, 1,
; EG-NEXT: MOV T11.Y, 0.0,
; EG-NEXT: BFE_UINT * T12.Z, T7.X, literal.y, 1,
; EG-NEXT: 8(1.121039e-44), 7(9.809089e-45)
; EG-NEXT: BFE_UINT T12.X, T7.X, literal.x, 1,
; EG-NEXT: MOV T12.Y, 0.0,
; EG-NEXT: BFE_UINT * T13.Z, T7.X, literal.y, 1,
; EG-NEXT: 6(8.407791e-45), 5(7.006492e-45)
; EG-NEXT: BFE_UINT T13.X, T7.X, literal.x, 1,
; EG-NEXT: MOV T13.Y, 0.0,
; EG-NEXT: BFE_UINT * T14.Z, T7.X, literal.y, 1,
; EG-NEXT: 4(5.605194e-45), 3(4.203895e-45)
; EG-NEXT: BFE_UINT T14.X, T7.X, literal.x, 1,
; EG-NEXT: MOV T14.Y, 0.0,
; EG-NEXT: BFE_UINT T7.Z, T7.X, 1, 1,
; EG-NEXT: AND_INT * T7.X, T7.X, 1,
; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
; EG-NEXT: MOV T7.Y, 0.0,
; EG-NEXT: MOV T8.W, 0.0,
; EG-NEXT: MOV * T9.W, 0.0,
; EG-NEXT: MOV T10.W, 0.0,
; EG-NEXT: MOV * T11.W, 0.0,
; EG-NEXT: MOV T12.W, 0.0,
; EG-NEXT: MOV * T13.W, 0.0,
; EG-NEXT: MOV T14.W, 0.0,
; EG-NEXT: MOV * T7.W, 0.0,
; EG-NEXT: LSHR T15.X, KC0[2].Y, literal.x,
; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y,
; EG-NEXT: 2(2.802597e-45), 16(2.242078e-44)
; EG-NEXT: LSHR T16.X, PV.W, literal.x,
; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y,
; EG-NEXT: 2(2.802597e-45), 32(4.484155e-44)
; EG-NEXT: LSHR T17.X, PV.W, literal.x,
; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y,
; EG-NEXT: 2(2.802597e-45), 48(6.726233e-44)
; EG-NEXT: LSHR T18.X, PV.W, literal.x,
; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y,
; EG-NEXT: 2(2.802597e-45), 64(8.968310e-44)
; EG-NEXT: LSHR T19.X, PV.W, literal.x,
; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y,
; EG-NEXT: 2(2.802597e-45), 80(1.121039e-43)
; EG-NEXT: LSHR T20.X, PV.W, literal.x,
; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y,
; EG-NEXT: 2(2.802597e-45), 96(1.345247e-43)
; EG-NEXT: LSHR T21.X, PV.W, literal.x,
; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y,
; EG-NEXT: 2(2.802597e-45), 112(1.569454e-43)
; EG-NEXT: LSHR * T22.X, PV.W, literal.x,
; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
;
; GFX12-LABEL: constant_zextload_v16i1_to_v16i64:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
; GFX12-NEXT: v_mov_b32_e32 v1, 0
; GFX12-NEXT: s_wait_kmcnt 0x0
; GFX12-NEXT: global_load_u16 v0, v1, s[2:3]
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: v_readfirstlane_b32 s2, v0
; GFX12-NEXT: v_dual_mov_b32 v7, v1 :: v_dual_and_b32 v4, 0xffff, v0
; GFX12-NEXT: v_mov_b32_e32 v11, v1
; GFX12-NEXT: s_bfe_u32 s3, s2, 0x1000a
; GFX12-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_3)
; GFX12-NEXT: v_dual_mov_b32 v3, v1 :: v_dual_mov_b32 v0, s3
; GFX12-NEXT: v_bfe_u32 v2, v4, 11, 1
; GFX12-NEXT: s_bfe_u32 s3, s2, 0x1000d
; GFX12-NEXT: s_bfe_u32 s4, s2, 0x1000c
; GFX12-NEXT: v_mov_b32_e32 v5, v1
; GFX12-NEXT: v_bfe_u32 v6, v4, 5, 1
; GFX12-NEXT: global_store_b128 v1, v[0:3], s[0:1] offset:80
; GFX12-NEXT: v_mov_b32_e32 v0, s4
; GFX12-NEXT: s_wait_alu 0xfffe
; GFX12-NEXT: v_mov_b32_e32 v2, s3
; GFX12-NEXT: s_bfe_u32 s3, s2, 0x10007
; GFX12-NEXT: s_bfe_u32 s4, s2, 0x10006
; GFX12-NEXT: v_mov_b32_e32 v9, v1
; GFX12-NEXT: s_bfe_u32 s6, s2, 0x10002
; GFX12-NEXT: global_store_b128 v1, v[0:3], s[0:1] offset:96
; GFX12-NEXT: s_wait_alu 0xfffe
; GFX12-NEXT: v_mov_b32_e32 v0, s4
; GFX12-NEXT: v_mov_b32_e32 v2, s3
; GFX12-NEXT: s_bfe_u32 s4, s2, 0x10004
; GFX12-NEXT: s_bfe_u32 s3, s2, 0x10009
; GFX12-NEXT: s_bfe_u32 s5, s2, 0x10001
; GFX12-NEXT: v_lshrrev_b32_e32 v10, 15, v4
; GFX12-NEXT: global_store_b128 v1, v[0:3], s[0:1] offset:48
; GFX12-NEXT: s_wait_alu 0xfffe
; GFX12-NEXT: v_mov_b32_e32 v0, s4
; GFX12-NEXT: v_mov_b32_e32 v2, v6
; GFX12-NEXT: s_bfe_u32 s4, s2, 0x10003
; GFX12-NEXT: s_and_b32 s2, s2, 1
; GFX12-NEXT: v_bfe_u32 v8, v4, 14, 1
; GFX12-NEXT: v_bfe_u32 v4, v4, 8, 1
; GFX12-NEXT: global_store_b128 v1, v[0:3], s[0:1] offset:32
; GFX12-NEXT: v_mov_b32_e32 v0, s6
; GFX12-NEXT: s_wait_alu 0xfffe
; GFX12-NEXT: v_mov_b32_e32 v2, s4
; GFX12-NEXT: v_mov_b32_e32 v6, s3
; GFX12-NEXT: global_store_b128 v1, v[0:3], s[0:1] offset:16
; GFX12-NEXT: v_mov_b32_e32 v0, s2
; GFX12-NEXT: v_mov_b32_e32 v2, s5
; GFX12-NEXT: s_clause 0x2
; GFX12-NEXT: global_store_b128 v1, v[8:11], s[0:1] offset:112
; GFX12-NEXT: global_store_b128 v1, v[4:7], s[0:1] offset:64
; GFX12-NEXT: global_store_b128 v1, v[0:3], s[0:1]
; GFX12-NEXT: s_endpgm
%load = load <16 x i1>, ptr addrspace(4) %in
%ext = zext <16 x i1> %load to <16 x i64>
store <16 x i64> %ext, ptr addrspace(1) %out
ret void
}
define amdgpu_kernel void @constant_sextload_v16i1_to_v16i64(ptr addrspace(1) %out, ptr addrspace(4) nocapture %in) #0 {
; GFX6-LABEL: constant_sextload_v16i1_to_v16i64:
; GFX6: ; %bb.0:
; GFX6-NEXT: s_load_dwordx4 s[4:7], s[4:5], 0x9
; GFX6-NEXT: s_mov_b32 s3, 0xf000
; GFX6-NEXT: s_mov_b32 s2, -1
; GFX6-NEXT: s_mov_b32 s10, s2
; GFX6-NEXT: s_mov_b32 s11, s3
; GFX6-NEXT: s_waitcnt lgkmcnt(0)
; GFX6-NEXT: s_mov_b32 s8, s6
; GFX6-NEXT: s_mov_b32 s9, s7
; GFX6-NEXT: buffer_load_ushort v1, off, s[8:11], 0
; GFX6-NEXT: s_mov_b32 s0, s4
; GFX6-NEXT: s_mov_b32 s1, s5
; GFX6-NEXT: s_waitcnt vmcnt(0)
; GFX6-NEXT: v_lshrrev_b32_e32 v3, 14, v1
; GFX6-NEXT: v_lshrrev_b32_e32 v4, 15, v1
; GFX6-NEXT: v_lshrrev_b32_e32 v7, 12, v1
; GFX6-NEXT: v_lshrrev_b32_e32 v8, 13, v1
; GFX6-NEXT: v_lshrrev_b32_e32 v11, 10, v1
; GFX6-NEXT: v_lshrrev_b32_e32 v12, 11, v1
; GFX6-NEXT: v_lshrrev_b32_e32 v14, 8, v1
; GFX6-NEXT: v_lshrrev_b32_e32 v16, 9, v1
; GFX6-NEXT: v_lshrrev_b32_e32 v15, 6, v1
; GFX6-NEXT: v_lshrrev_b32_e32 v9, 4, v1
; GFX6-NEXT: v_lshrrev_b32_e32 v10, 5, v1
; GFX6-NEXT: v_lshrrev_b32_e32 v0, 2, v1
; GFX6-NEXT: v_lshrrev_b32_e32 v2, 3, v1
; GFX6-NEXT: v_lshrrev_b32_e32 v13, 1, v1
; GFX6-NEXT: v_bfe_i32 v2, v2, 0, 1
; GFX6-NEXT: v_bfe_i32 v0, v0, 0, 1
; GFX6-NEXT: v_bfe_i32 v5, v4, 0, 1
; GFX6-NEXT: v_bfe_i32 v3, v3, 0, 1
; GFX6-NEXT: v_ashrrev_i32_e32 v6, 31, v5
; GFX6-NEXT: v_ashrrev_i32_e32 v4, 31, v3
; GFX6-NEXT: buffer_store_dwordx4 v[3:6], off, s[0:3], 0 offset:112
; GFX6-NEXT: s_waitcnt expcnt(0)
; GFX6-NEXT: v_bfe_i32 v6, v10, 0, 1
; GFX6-NEXT: v_bfe_i32 v4, v9, 0, 1
; GFX6-NEXT: v_bfe_i32 v9, v8, 0, 1
; GFX6-NEXT: v_bfe_i32 v7, v7, 0, 1
; GFX6-NEXT: v_ashrrev_i32_e32 v10, 31, v9
; GFX6-NEXT: v_ashrrev_i32_e32 v8, 31, v7
; GFX6-NEXT: buffer_store_dwordx4 v[7:10], off, s[0:3], 0 offset:96
; GFX6-NEXT: s_waitcnt expcnt(0)
; GFX6-NEXT: v_bfe_i32 v9, v12, 0, 1
; GFX6-NEXT: v_bfe_i32 v7, v11, 0, 1
; GFX6-NEXT: v_bfe_i32 v13, v13, 0, 1
; GFX6-NEXT: v_bfe_i32 v11, v1, 0, 1
; GFX6-NEXT: v_lshrrev_b32_e32 v1, 7, v1
; GFX6-NEXT: v_ashrrev_i32_e32 v10, 31, v9
; GFX6-NEXT: v_ashrrev_i32_e32 v8, 31, v7
; GFX6-NEXT: buffer_store_dwordx4 v[7:10], off, s[0:3], 0 offset:80
; GFX6-NEXT: v_bfe_i32 v17, v1, 0, 1
; GFX6-NEXT: v_bfe_i32 v15, v15, 0, 1
; GFX6-NEXT: v_bfe_i32 v21, v16, 0, 1
; GFX6-NEXT: v_bfe_i32 v19, v14, 0, 1
; GFX6-NEXT: v_ashrrev_i32_e32 v12, 31, v11
; GFX6-NEXT: v_ashrrev_i32_e32 v14, 31, v13
; GFX6-NEXT: v_ashrrev_i32_e32 v3, 31, v2
; GFX6-NEXT: v_ashrrev_i32_e32 v1, 31, v0
; GFX6-NEXT: s_waitcnt expcnt(0)
; GFX6-NEXT: v_ashrrev_i32_e32 v7, 31, v6
; GFX6-NEXT: v_ashrrev_i32_e32 v5, 31, v4
; GFX6-NEXT: v_ashrrev_i32_e32 v18, 31, v17
; GFX6-NEXT: v_ashrrev_i32_e32 v16, 31, v15
; GFX6-NEXT: v_ashrrev_i32_e32 v22, 31, v21
; GFX6-NEXT: v_ashrrev_i32_e32 v20, 31, v19
; GFX6-NEXT: buffer_store_dwordx4 v[19:22], off, s[0:3], 0 offset:64
; GFX6-NEXT: buffer_store_dwordx4 v[15:18], off, s[0:3], 0 offset:48
; GFX6-NEXT: buffer_store_dwordx4 v[4:7], off, s[0:3], 0 offset:32
; GFX6-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:16
; GFX6-NEXT: buffer_store_dwordx4 v[11:14], off, s[0:3], 0
; GFX6-NEXT: s_endpgm
;
; GFX8-LABEL: constant_sextload_v16i1_to_v16i64:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
; GFX8-NEXT: s_waitcnt lgkmcnt(0)
; GFX8-NEXT: v_mov_b32_e32 v0, s2
; GFX8-NEXT: v_mov_b32_e32 v1, s3
; GFX8-NEXT: flat_load_ushort v0, v[0:1]
; GFX8-NEXT: v_mov_b32_e32 v19, s1
; GFX8-NEXT: v_mov_b32_e32 v18, s0
; GFX8-NEXT: s_waitcnt vmcnt(0)
; GFX8-NEXT: v_readfirstlane_b32 s3, v0
; GFX8-NEXT: s_lshr_b32 s2, s3, 14
; GFX8-NEXT: s_lshr_b32 s4, s3, 15
; GFX8-NEXT: s_lshr_b32 s6, s3, 12
; GFX8-NEXT: s_lshr_b32 s8, s3, 13
; GFX8-NEXT: s_lshr_b32 s10, s3, 10
; GFX8-NEXT: s_lshr_b32 s12, s3, 11
; GFX8-NEXT: s_lshr_b32 s14, s3, 8
; GFX8-NEXT: s_lshr_b32 s16, s3, 9
; GFX8-NEXT: s_lshr_b32 s18, s3, 6
; GFX8-NEXT: s_lshr_b32 s20, s3, 7
; GFX8-NEXT: s_lshr_b32 s22, s3, 4
; GFX8-NEXT: s_lshr_b32 s24, s3, 5
; GFX8-NEXT: s_lshr_b32 s26, s3, 2
; GFX8-NEXT: s_lshr_b32 s28, s3, 3
; GFX8-NEXT: s_lshr_b32 s30, s3, 1
; GFX8-NEXT: v_mov_b32_e32 v0, s3
; GFX8-NEXT: s_bfe_i64 s[30:31], s[30:31], 0x10000
; GFX8-NEXT: s_bfe_i64 s[28:29], s[28:29], 0x10000
; GFX8-NEXT: s_bfe_i64 s[26:27], s[26:27], 0x10000
; GFX8-NEXT: s_bfe_i64 s[24:25], s[24:25], 0x10000
; GFX8-NEXT: s_bfe_i64 s[22:23], s[22:23], 0x10000
; GFX8-NEXT: s_bfe_i64 s[20:21], s[20:21], 0x10000
; GFX8-NEXT: s_bfe_i64 s[18:19], s[18:19], 0x10000
; GFX8-NEXT: s_bfe_i64 s[16:17], s[16:17], 0x10000
; GFX8-NEXT: s_bfe_i64 s[14:15], s[14:15], 0x10000
; GFX8-NEXT: s_bfe_i64 s[12:13], s[12:13], 0x10000
; GFX8-NEXT: s_bfe_i64 s[10:11], s[10:11], 0x10000
; GFX8-NEXT: s_bfe_i64 s[8:9], s[8:9], 0x10000
; GFX8-NEXT: s_bfe_i64 s[6:7], s[6:7], 0x10000
; GFX8-NEXT: s_bfe_i64 s[4:5], s[4:5], 0x10000
; GFX8-NEXT: s_bfe_i64 s[2:3], s[2:3], 0x10000
; GFX8-NEXT: v_mov_b32_e32 v2, s2
; GFX8-NEXT: s_add_u32 s2, s0, 0x70
; GFX8-NEXT: v_mov_b32_e32 v3, s3
; GFX8-NEXT: s_addc_u32 s3, s1, 0
; GFX8-NEXT: v_mov_b32_e32 v15, s3
; GFX8-NEXT: v_mov_b32_e32 v14, s2
; GFX8-NEXT: s_add_u32 s2, s0, 0x60
; GFX8-NEXT: v_mov_b32_e32 v4, s4
; GFX8-NEXT: v_mov_b32_e32 v5, s5
; GFX8-NEXT: s_addc_u32 s3, s1, 0
; GFX8-NEXT: flat_store_dwordx4 v[14:15], v[2:5]
; GFX8-NEXT: v_mov_b32_e32 v15, s3
; GFX8-NEXT: v_mov_b32_e32 v14, s2
; GFX8-NEXT: s_add_u32 s2, s0, 0x50
; GFX8-NEXT: v_mov_b32_e32 v6, s6
; GFX8-NEXT: v_mov_b32_e32 v7, s7
; GFX8-NEXT: v_mov_b32_e32 v8, s8
; GFX8-NEXT: v_mov_b32_e32 v9, s9
; GFX8-NEXT: s_addc_u32 s3, s1, 0
; GFX8-NEXT: flat_store_dwordx4 v[14:15], v[6:9]
; GFX8-NEXT: v_mov_b32_e32 v15, s3
; GFX8-NEXT: v_mov_b32_e32 v14, s2
; GFX8-NEXT: s_add_u32 s2, s0, 64
; GFX8-NEXT: v_mov_b32_e32 v10, s10
; GFX8-NEXT: v_mov_b32_e32 v11, s11
; GFX8-NEXT: v_mov_b32_e32 v12, s12
; GFX8-NEXT: v_mov_b32_e32 v13, s13
; GFX8-NEXT: s_addc_u32 s3, s1, 0
; GFX8-NEXT: flat_store_dwordx4 v[14:15], v[10:13]
; GFX8-NEXT: v_mov_b32_e32 v15, s3
; GFX8-NEXT: v_mov_b32_e32 v14, s2
; GFX8-NEXT: s_add_u32 s2, s0, 48
; GFX8-NEXT: v_mov_b32_e32 v2, s14
; GFX8-NEXT: v_mov_b32_e32 v3, s15
; GFX8-NEXT: v_mov_b32_e32 v4, s16
; GFX8-NEXT: v_mov_b32_e32 v5, s17
; GFX8-NEXT: s_addc_u32 s3, s1, 0
; GFX8-NEXT: flat_store_dwordx4 v[14:15], v[2:5]
; GFX8-NEXT: v_mov_b32_e32 v6, s18
; GFX8-NEXT: v_mov_b32_e32 v5, s3
; GFX8-NEXT: v_mov_b32_e32 v4, s2
; GFX8-NEXT: s_add_u32 s2, s0, 32
; GFX8-NEXT: v_mov_b32_e32 v7, s19
; GFX8-NEXT: v_mov_b32_e32 v8, s20
; GFX8-NEXT: v_mov_b32_e32 v9, s21
; GFX8-NEXT: s_addc_u32 s3, s1, 0
; GFX8-NEXT: flat_store_dwordx4 v[4:5], v[6:9]
; GFX8-NEXT: v_mov_b32_e32 v5, s3
; GFX8-NEXT: s_add_u32 s0, s0, 16
; GFX8-NEXT: v_mov_b32_e32 v10, s22
; GFX8-NEXT: v_mov_b32_e32 v11, s23
; GFX8-NEXT: v_mov_b32_e32 v12, s24
; GFX8-NEXT: v_mov_b32_e32 v13, s25
; GFX8-NEXT: v_mov_b32_e32 v4, s2
; GFX8-NEXT: s_addc_u32 s1, s1, 0
; GFX8-NEXT: flat_store_dwordx4 v[4:5], v[10:13]
; GFX8-NEXT: v_mov_b32_e32 v5, s1
; GFX8-NEXT: v_bfe_i32 v0, v0, 0, 1
; GFX8-NEXT: v_mov_b32_e32 v14, s26
; GFX8-NEXT: v_mov_b32_e32 v15, s27
; GFX8-NEXT: v_mov_b32_e32 v16, s28
; GFX8-NEXT: v_mov_b32_e32 v17, s29
; GFX8-NEXT: v_mov_b32_e32 v4, s0
; GFX8-NEXT: v_ashrrev_i32_e32 v1, 31, v0
; GFX8-NEXT: v_mov_b32_e32 v2, s30
; GFX8-NEXT: v_mov_b32_e32 v3, s31
; GFX8-NEXT: flat_store_dwordx4 v[4:5], v[14:17]
; GFX8-NEXT: flat_store_dwordx4 v[18:19], v[0:3]
; GFX8-NEXT: s_endpgm
;
; EG-LABEL: constant_sextload_v16i1_to_v16i64:
; EG: ; %bb.0:
; EG-NEXT: ALU 0, @14, KC0[CB0:0-32], KC1[]
; EG-NEXT: TEX 0 @12
; EG-NEXT: ALU 78, @15, KC0[CB0:0-32], KC1[]
; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T14.XYZW, T22.X, 0
; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T20.XYZW, T21.X, 0
; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T15.XYZW, T18.X, 0
; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T7.XYZW, T12.X, 0
; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T16.XYZW, T11.X, 0
; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T19.XYZW, T10.X, 0
; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T17.XYZW, T9.X, 0
; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T13.XYZW, T8.X, 1
; EG-NEXT: CF_END
; EG-NEXT: Fetch clause starting at 12:
; EG-NEXT: VTX_READ_16 T7.X, T7.X, 0, #1
; EG-NEXT: ALU clause starting at 14:
; EG-NEXT: MOV * T7.X, KC0[2].Z,
; EG-NEXT: ALU clause starting at 15:
; EG-NEXT: LSHR T8.X, KC0[2].Y, literal.x,
; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y,
; EG-NEXT: 2(2.802597e-45), 16(2.242078e-44)
; EG-NEXT: LSHR T9.X, PV.W, literal.x,
; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y,
; EG-NEXT: 2(2.802597e-45), 32(4.484155e-44)
; EG-NEXT: LSHR T10.X, PV.W, literal.x,
; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y,
; EG-NEXT: 2(2.802597e-45), 48(6.726233e-44)
; EG-NEXT: LSHR T11.X, PV.W, literal.x,
; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y,
; EG-NEXT: 2(2.802597e-45), 64(8.968310e-44)
; EG-NEXT: LSHR T12.X, PV.W, literal.x,
; EG-NEXT: LSHR * T0.W, T7.X, literal.y,
; EG-NEXT: 2(2.802597e-45), 15(2.101948e-44)
; EG-NEXT: BFE_INT T13.X, T7.X, 0.0, 1,
; EG-NEXT: BFE_INT T14.Z, PV.W, 0.0, 1,
; EG-NEXT: LSHR T0.W, T7.X, literal.x,
; EG-NEXT: LSHR * T1.W, T7.X, literal.y,
; EG-NEXT: 11(1.541428e-44), 14(1.961818e-44)
; EG-NEXT: BFE_INT T14.X, PS, 0.0, 1,
; EG-NEXT: LSHR T0.Y, T7.X, literal.x,
; EG-NEXT: BFE_INT T15.Z, PV.W, 0.0, 1,
; EG-NEXT: LSHR T0.W, T7.X, literal.y,
; EG-NEXT: LSHR * T1.W, T7.X, literal.z,
; EG-NEXT: 12(1.681558e-44), 7(9.809089e-45)
; EG-NEXT: 10(1.401298e-44), 0(0.000000e+00)
; EG-NEXT: BFE_INT T15.X, PS, 0.0, 1,
; EG-NEXT: MOV T14.Y, PV.X,
; EG-NEXT: BFE_INT T16.Z, PV.W, 0.0, 1,
; EG-NEXT: LSHR T0.W, T7.X, literal.x,
; EG-NEXT: LSHR * T1.W, T7.X, literal.y,
; EG-NEXT: 3(4.203895e-45), 6(8.407791e-45)
; EG-NEXT: BFE_INT T16.X, PS, 0.0, 1,
; EG-NEXT: MOV T15.Y, PV.X,
; EG-NEXT: BFE_INT T17.Z, PV.W, 0.0, 1,
; EG-NEXT: LSHR T0.W, T7.X, 1,
; EG-NEXT: LSHR * T1.W, T7.X, literal.x,
; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
; EG-NEXT: BFE_INT T17.X, PS, 0.0, 1,
; EG-NEXT: MOV T16.Y, PV.X,
; EG-NEXT: BFE_INT T13.Z, PV.W, 0.0, 1,
; EG-NEXT: LSHR T0.W, T7.X, literal.x,
; EG-NEXT: ADD_INT * T1.W, KC0[2].Y, literal.y,
; EG-NEXT: 5(7.006492e-45), 80(1.121039e-43)
; EG-NEXT: LSHR T18.X, PS, literal.x,
; EG-NEXT: MOV T17.Y, PV.X,
; EG-NEXT: BFE_INT T19.Z, PV.W, 0.0, 1,
; EG-NEXT: LSHR T0.W, T7.X, literal.y,
; EG-NEXT: LSHR * T1.W, T7.X, literal.z,
; EG-NEXT: 2(2.802597e-45), 9(1.261169e-44)
; EG-NEXT: 4(5.605194e-45), 0(0.000000e+00)
; EG-NEXT: BFE_INT T19.X, PS, 0.0, 1,
; EG-NEXT: MOV T13.Y, T13.X,
; EG-NEXT: BFE_INT T7.Z, PV.W, 0.0, 1,
; EG-NEXT: LSHR T0.W, T7.X, literal.x, BS:VEC_120/SCL_212
; EG-NEXT: LSHR * T1.W, T7.X, literal.y,
; EG-NEXT: 13(1.821688e-44), 8(1.121039e-44)
; EG-NEXT: BFE_INT T7.X, PS, 0.0, 1,
; EG-NEXT: MOV T19.Y, PV.X,
; EG-NEXT: BFE_INT T20.Z, PV.W, 0.0, 1,
; EG-NEXT: MOV T13.W, T13.Z,
; EG-NEXT: MOV * T17.W, T17.Z,
; EG-NEXT: BFE_INT T20.X, T0.Y, 0.0, 1,
; EG-NEXT: MOV T7.Y, PV.X,
; EG-NEXT: ADD_INT T0.Z, KC0[2].Y, literal.x,
; EG-NEXT: MOV T19.W, T19.Z,
; EG-NEXT: MOV * T16.W, T16.Z,
; EG-NEXT: 96(1.345247e-43), 0(0.000000e+00)
; EG-NEXT: LSHR T21.X, PV.Z, literal.x,
; EG-NEXT: MOV T20.Y, PV.X,
; EG-NEXT: ADD_INT T0.Z, KC0[2].Y, literal.y,
; EG-NEXT: MOV T7.W, T7.Z,
; EG-NEXT: MOV * T15.W, T15.Z,
; EG-NEXT: 2(2.802597e-45), 112(1.569454e-43)
; EG-NEXT: LSHR T22.X, PV.Z, literal.x,
; EG-NEXT: MOV T20.W, T20.Z,
; EG-NEXT: MOV * T14.W, T14.Z,
; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
;
; GFX12-LABEL: constant_sextload_v16i1_to_v16i64:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
; GFX12-NEXT: v_mov_b32_e32 v32, 0
; GFX12-NEXT: s_wait_kmcnt 0x0
; GFX12-NEXT: global_load_u16 v0, v32, s[2:3]
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: v_readfirstlane_b32 s3, v0
; GFX12-NEXT: s_lshr_b32 s4, s3, 15
; GFX12-NEXT: s_lshr_b32 s2, s3, 14
; GFX12-NEXT: s_bfe_i64 s[4:5], s[4:5], 0x10000
; GFX12-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
; GFX12-NEXT: v_dual_mov_b32 v28, s3 :: v_dual_mov_b32 v3, s5
; GFX12-NEXT: s_lshr_b32 s6, s3, 12
; GFX12-NEXT: s_lshr_b32 s8, s3, 13
; GFX12-NEXT: s_lshr_b32 s10, s3, 10
; GFX12-NEXT: s_lshr_b32 s12, s3, 11
; GFX12-NEXT: s_lshr_b32 s14, s3, 8
; GFX12-NEXT: s_lshr_b32 s16, s3, 9
; GFX12-NEXT: s_lshr_b32 s18, s3, 6
; GFX12-NEXT: s_lshr_b32 s20, s3, 7
; GFX12-NEXT: s_lshr_b32 s22, s3, 4
; GFX12-NEXT: s_lshr_b32 s24, s3, 5
; GFX12-NEXT: s_lshr_b32 s26, s3, 2
; GFX12-NEXT: s_lshr_b32 s28, s3, 3
; GFX12-NEXT: s_lshr_b32 s30, s3, 1
; GFX12-NEXT: s_bfe_i64 s[12:13], s[12:13], 0x10000
; GFX12-NEXT: s_bfe_i64 s[10:11], s[10:11], 0x10000
; GFX12-NEXT: s_bfe_i64 s[8:9], s[8:9], 0x10000
; GFX12-NEXT: s_bfe_i64 s[6:7], s[6:7], 0x10000
; GFX12-NEXT: s_bfe_i64 s[2:3], s[2:3], 0x10000
; GFX12-NEXT: s_bfe_i64 s[16:17], s[16:17], 0x10000
; GFX12-NEXT: s_bfe_i64 s[14:15], s[14:15], 0x10000
; GFX12-NEXT: s_bfe_i64 s[20:21], s[20:21], 0x10000
; GFX12-NEXT: s_bfe_i64 s[18:19], s[18:19], 0x10000
; GFX12-NEXT: s_wait_alu 0xfffe
; GFX12-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v5, s7
; GFX12-NEXT: v_dual_mov_b32 v1, s3 :: v_dual_mov_b32 v2, s4
; GFX12-NEXT: v_dual_mov_b32 v7, s9 :: v_dual_mov_b32 v4, s6
; GFX12-NEXT: v_dual_mov_b32 v9, s11 :: v_dual_mov_b32 v6, s8
; GFX12-NEXT: v_dual_mov_b32 v11, s13 :: v_dual_mov_b32 v8, s10
; GFX12-NEXT: v_dual_mov_b32 v13, s15 :: v_dual_mov_b32 v10, s12
; GFX12-NEXT: v_mov_b32_e32 v15, s17
; GFX12-NEXT: v_bfe_i32 v28, v28, 0, 1
; GFX12-NEXT: s_bfe_i64 s[24:25], s[24:25], 0x10000
; GFX12-NEXT: s_bfe_i64 s[22:23], s[22:23], 0x10000
; GFX12-NEXT: v_dual_mov_b32 v12, s14 :: v_dual_mov_b32 v17, s19
; GFX12-NEXT: v_dual_mov_b32 v14, s16 :: v_dual_mov_b32 v19, s21
; GFX12-NEXT: s_bfe_i64 s[28:29], s[28:29], 0x10000
; GFX12-NEXT: s_bfe_i64 s[26:27], s[26:27], 0x10000
; GFX12-NEXT: v_dual_mov_b32 v16, s18 :: v_dual_mov_b32 v21, s23
; GFX12-NEXT: v_dual_mov_b32 v18, s20 :: v_dual_mov_b32 v23, s25
; GFX12-NEXT: s_bfe_i64 s[30:31], s[30:31], 0x10000
; GFX12-NEXT: v_dual_mov_b32 v20, s22 :: v_dual_mov_b32 v25, s27
; GFX12-NEXT: v_dual_mov_b32 v22, s24 :: v_dual_mov_b32 v27, s29
; GFX12-NEXT: v_dual_mov_b32 v24, s26 :: v_dual_mov_b32 v31, s31
; GFX12-NEXT: v_mov_b32_e32 v26, s28
; GFX12-NEXT: v_mov_b32_e32 v30, s30
; GFX12-NEXT: s_clause 0x1
; GFX12-NEXT: global_store_b128 v32, v[0:3], s[0:1] offset:112
; GFX12-NEXT: global_store_b128 v32, v[4:7], s[0:1] offset:96
; GFX12-NEXT: v_ashrrev_i32_e32 v29, 31, v28
; GFX12-NEXT: s_clause 0x5
; GFX12-NEXT: global_store_b128 v32, v[8:11], s[0:1] offset:80
; GFX12-NEXT: global_store_b128 v32, v[12:15], s[0:1] offset:64
; GFX12-NEXT: global_store_b128 v32, v[16:19], s[0:1] offset:48
; GFX12-NEXT: global_store_b128 v32, v[20:23], s[0:1] offset:32
; GFX12-NEXT: global_store_b128 v32, v[24:27], s[0:1] offset:16
; GFX12-NEXT: global_store_b128 v32, v[28:31], s[0:1]
; GFX12-NEXT: s_endpgm
%load = load <16 x i1>, ptr addrspace(4) %in
%ext = sext <16 x i1> %load to <16 x i64>
store <16 x i64> %ext, ptr addrspace(1) %out
ret void
}
define amdgpu_kernel void @constant_zextload_v32i1_to_v32i64(ptr addrspace(1) %out, ptr addrspace(4) nocapture %in) #0 {
; GFX6-LABEL: constant_zextload_v32i1_to_v32i64:
; GFX6: ; %bb.0:
; GFX6-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9
; GFX6-NEXT: s_waitcnt lgkmcnt(0)
; GFX6-NEXT: s_load_dword s4, s[2:3], 0x0
; GFX6-NEXT: s_mov_b32 s3, 0xf000
; GFX6-NEXT: v_mov_b32_e32 v1, 0
; GFX6-NEXT: s_mov_b32 s2, -1
; GFX6-NEXT: v_mov_b32_e32 v3, v1
; GFX6-NEXT: s_waitcnt lgkmcnt(0)
; GFX6-NEXT: s_bfe_u32 s5, s4, 0x10001
; GFX6-NEXT: s_bfe_u32 s6, s4, 0x10003
; GFX6-NEXT: s_bfe_u32 s7, s4, 0x10005
; GFX6-NEXT: s_bfe_u32 s8, s4, 0x10007
; GFX6-NEXT: s_bfe_u32 s9, s4, 0x10009
; GFX6-NEXT: s_bfe_u32 s10, s4, 0x1000b
; GFX6-NEXT: s_bfe_u32 s11, s4, 0x1000d
; GFX6-NEXT: s_bfe_u32 s12, s4, 0x1000f
; GFX6-NEXT: s_bfe_u32 s13, s4, 0x10011
; GFX6-NEXT: s_bfe_u32 s14, s4, 0x10013
; GFX6-NEXT: s_bfe_u32 s15, s4, 0x10015
; GFX6-NEXT: s_bfe_u32 s16, s4, 0x10017
; GFX6-NEXT: s_bfe_u32 s17, s4, 0x10019
; GFX6-NEXT: s_bfe_u32 s18, s4, 0x1001b
; GFX6-NEXT: s_bfe_u32 s19, s4, 0x1001d
; GFX6-NEXT: s_lshr_b32 s20, s4, 31
; GFX6-NEXT: s_and_b32 s21, s4, 1
; GFX6-NEXT: s_bfe_u32 s22, s4, 0x10002
; GFX6-NEXT: s_bfe_u32 s23, s4, 0x10004
; GFX6-NEXT: s_bfe_u32 s24, s4, 0x10006
; GFX6-NEXT: s_bfe_u32 s25, s4, 0x10008
; GFX6-NEXT: s_bfe_u32 s26, s4, 0x1000a
; GFX6-NEXT: s_bfe_u32 s27, s4, 0x1000c
; GFX6-NEXT: s_bfe_u32 s28, s4, 0x1000e
; GFX6-NEXT: s_bfe_u32 s29, s4, 0x10010
; GFX6-NEXT: s_bfe_u32 s30, s4, 0x10012
; GFX6-NEXT: s_bfe_u32 s31, s4, 0x10014
; GFX6-NEXT: s_bfe_u32 s33, s4, 0x10016
; GFX6-NEXT: s_bfe_u32 s34, s4, 0x10018
; GFX6-NEXT: s_bfe_u32 s35, s4, 0x1001a
; GFX6-NEXT: s_bfe_u32 s36, s4, 0x1001e
; GFX6-NEXT: s_bfe_u32 s4, s4, 0x1001c
; GFX6-NEXT: v_mov_b32_e32 v0, s36
; GFX6-NEXT: v_mov_b32_e32 v2, s20
; GFX6-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:240
; GFX6-NEXT: s_waitcnt expcnt(0)
; GFX6-NEXT: v_mov_b32_e32 v0, s4
; GFX6-NEXT: v_mov_b32_e32 v2, s19
; GFX6-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:224
; GFX6-NEXT: s_waitcnt expcnt(0)
; GFX6-NEXT: v_mov_b32_e32 v0, s35
; GFX6-NEXT: v_mov_b32_e32 v2, s18
; GFX6-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:208
; GFX6-NEXT: s_waitcnt expcnt(0)
; GFX6-NEXT: v_mov_b32_e32 v0, s34
; GFX6-NEXT: v_mov_b32_e32 v2, s17
; GFX6-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:192
; GFX6-NEXT: s_waitcnt expcnt(0)
; GFX6-NEXT: v_mov_b32_e32 v0, s33
; GFX6-NEXT: v_mov_b32_e32 v2, s16
; GFX6-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:176
; GFX6-NEXT: s_waitcnt expcnt(0)
; GFX6-NEXT: v_mov_b32_e32 v0, s31
; GFX6-NEXT: v_mov_b32_e32 v2, s15
; GFX6-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:160
; GFX6-NEXT: s_waitcnt expcnt(0)
; GFX6-NEXT: v_mov_b32_e32 v0, s30
; GFX6-NEXT: v_mov_b32_e32 v2, s14
; GFX6-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:144
; GFX6-NEXT: s_waitcnt expcnt(0)
; GFX6-NEXT: v_mov_b32_e32 v0, s29
; GFX6-NEXT: v_mov_b32_e32 v2, s13
; GFX6-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:128
; GFX6-NEXT: s_waitcnt expcnt(0)
; GFX6-NEXT: v_mov_b32_e32 v0, s28
; GFX6-NEXT: v_mov_b32_e32 v2, s12
; GFX6-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:112
; GFX6-NEXT: s_waitcnt expcnt(0)
; GFX6-NEXT: v_mov_b32_e32 v0, s27
; GFX6-NEXT: v_mov_b32_e32 v2, s11
; GFX6-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:96
; GFX6-NEXT: s_waitcnt expcnt(0)
; GFX6-NEXT: v_mov_b32_e32 v0, s26
; GFX6-NEXT: v_mov_b32_e32 v2, s10
; GFX6-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:80
; GFX6-NEXT: s_waitcnt expcnt(0)
; GFX6-NEXT: v_mov_b32_e32 v0, s25
; GFX6-NEXT: v_mov_b32_e32 v2, s9
; GFX6-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:64
; GFX6-NEXT: s_waitcnt expcnt(0)
; GFX6-NEXT: v_mov_b32_e32 v0, s24
; GFX6-NEXT: v_mov_b32_e32 v2, s8
; GFX6-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:48
; GFX6-NEXT: s_waitcnt expcnt(0)
; GFX6-NEXT: v_mov_b32_e32 v0, s23
; GFX6-NEXT: v_mov_b32_e32 v2, s7
; GFX6-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:32
; GFX6-NEXT: s_waitcnt expcnt(0)
; GFX6-NEXT: v_mov_b32_e32 v0, s22
; GFX6-NEXT: v_mov_b32_e32 v2, s6
; GFX6-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:16
; GFX6-NEXT: s_waitcnt expcnt(0)
; GFX6-NEXT: v_mov_b32_e32 v0, s21
; GFX6-NEXT: v_mov_b32_e32 v2, s5
; GFX6-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0
; GFX6-NEXT: s_endpgm
;
; GFX8-LABEL: constant_zextload_v32i1_to_v32i64:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
; GFX8-NEXT: v_mov_b32_e32 v1, 0
; GFX8-NEXT: v_mov_b32_e32 v3, v1
; GFX8-NEXT: s_waitcnt lgkmcnt(0)
; GFX8-NEXT: s_load_dword s6, s[2:3], 0x0
; GFX8-NEXT: s_waitcnt lgkmcnt(0)
; GFX8-NEXT: s_lshr_b32 s7, s6, 31
; GFX8-NEXT: s_bfe_u32 s8, s6, 0x1001d
; GFX8-NEXT: s_bfe_u32 s9, s6, 0x1001b
; GFX8-NEXT: s_bfe_u32 s10, s6, 0x10019
; GFX8-NEXT: s_bfe_u32 s11, s6, 0x10017
; GFX8-NEXT: s_bfe_u32 s12, s6, 0x10013
; GFX8-NEXT: s_bfe_u32 s13, s6, 0x10011
; GFX8-NEXT: s_bfe_u32 s14, s6, 0x1000f
; GFX8-NEXT: s_bfe_u32 s15, s6, 0x1000d
; GFX8-NEXT: s_bfe_u32 s16, s6, 0x1000b
; GFX8-NEXT: s_bfe_u32 s17, s6, 0x10009
; GFX8-NEXT: s_bfe_u32 s18, s6, 0x10007
; GFX8-NEXT: s_bfe_u32 s19, s6, 0x10005
; GFX8-NEXT: s_bfe_u32 s4, s6, 0x10003
; GFX8-NEXT: s_bfe_u32 s2, s6, 0x10001
; GFX8-NEXT: s_and_b32 s3, s6, 1
; GFX8-NEXT: s_bfe_u32 s5, s6, 0x10002
; GFX8-NEXT: s_bfe_u32 s20, s6, 0x10004
; GFX8-NEXT: s_bfe_u32 s21, s6, 0x10006
; GFX8-NEXT: s_bfe_u32 s22, s6, 0x10008
; GFX8-NEXT: s_bfe_u32 s23, s6, 0x1000a
; GFX8-NEXT: s_bfe_u32 s24, s6, 0x1000c
; GFX8-NEXT: s_bfe_u32 s25, s6, 0x1000e
; GFX8-NEXT: s_bfe_u32 s26, s6, 0x10010
; GFX8-NEXT: s_bfe_u32 s27, s6, 0x10012
; GFX8-NEXT: s_bfe_u32 s28, s6, 0x10014
; GFX8-NEXT: s_bfe_u32 s29, s6, 0x10015
; GFX8-NEXT: s_bfe_u32 s30, s6, 0x10016
; GFX8-NEXT: s_bfe_u32 s31, s6, 0x10018
; GFX8-NEXT: s_bfe_u32 s33, s6, 0x1001a
; GFX8-NEXT: s_bfe_u32 s34, s6, 0x1001c
; GFX8-NEXT: s_bfe_u32 s6, s6, 0x1001e
; GFX8-NEXT: v_mov_b32_e32 v0, s6
; GFX8-NEXT: s_add_u32 s6, s0, 0xf0
; GFX8-NEXT: v_mov_b32_e32 v2, s7
; GFX8-NEXT: s_addc_u32 s7, s1, 0
; GFX8-NEXT: v_mov_b32_e32 v4, s6
; GFX8-NEXT: v_mov_b32_e32 v5, s7
; GFX8-NEXT: s_add_u32 s6, s0, 0xe0
; GFX8-NEXT: flat_store_dwordx4 v[4:5], v[0:3]
; GFX8-NEXT: s_addc_u32 s7, s1, 0
; GFX8-NEXT: v_mov_b32_e32 v4, s6
; GFX8-NEXT: v_mov_b32_e32 v0, s34
; GFX8-NEXT: v_mov_b32_e32 v2, s8
; GFX8-NEXT: v_mov_b32_e32 v5, s7
; GFX8-NEXT: s_add_u32 s6, s0, 0xd0
; GFX8-NEXT: flat_store_dwordx4 v[4:5], v[0:3]
; GFX8-NEXT: s_addc_u32 s7, s1, 0
; GFX8-NEXT: v_mov_b32_e32 v4, s6
; GFX8-NEXT: v_mov_b32_e32 v0, s33
; GFX8-NEXT: v_mov_b32_e32 v2, s9
; GFX8-NEXT: v_mov_b32_e32 v5, s7
; GFX8-NEXT: s_add_u32 s6, s0, 0xc0
; GFX8-NEXT: flat_store_dwordx4 v[4:5], v[0:3]
; GFX8-NEXT: s_addc_u32 s7, s1, 0
; GFX8-NEXT: v_mov_b32_e32 v4, s6
; GFX8-NEXT: v_mov_b32_e32 v0, s31
; GFX8-NEXT: v_mov_b32_e32 v2, s10
; GFX8-NEXT: v_mov_b32_e32 v5, s7
; GFX8-NEXT: s_add_u32 s6, s0, 0xb0
; GFX8-NEXT: flat_store_dwordx4 v[4:5], v[0:3]
; GFX8-NEXT: s_addc_u32 s7, s1, 0
; GFX8-NEXT: v_mov_b32_e32 v4, s6
; GFX8-NEXT: v_mov_b32_e32 v0, s30
; GFX8-NEXT: v_mov_b32_e32 v2, s11
; GFX8-NEXT: v_mov_b32_e32 v5, s7
; GFX8-NEXT: s_add_u32 s6, s0, 0xa0
; GFX8-NEXT: flat_store_dwordx4 v[4:5], v[0:3]
; GFX8-NEXT: s_addc_u32 s7, s1, 0
; GFX8-NEXT: v_mov_b32_e32 v4, s6
; GFX8-NEXT: v_mov_b32_e32 v0, s28
; GFX8-NEXT: v_mov_b32_e32 v2, s29
; GFX8-NEXT: v_mov_b32_e32 v5, s7
; GFX8-NEXT: s_add_u32 s6, s0, 0x90
; GFX8-NEXT: flat_store_dwordx4 v[4:5], v[0:3]
; GFX8-NEXT: s_addc_u32 s7, s1, 0
; GFX8-NEXT: v_mov_b32_e32 v4, s6
; GFX8-NEXT: v_mov_b32_e32 v0, s27
; GFX8-NEXT: v_mov_b32_e32 v2, s12
; GFX8-NEXT: v_mov_b32_e32 v5, s7
; GFX8-NEXT: s_add_u32 s6, s0, 0x80
; GFX8-NEXT: flat_store_dwordx4 v[4:5], v[0:3]
; GFX8-NEXT: s_addc_u32 s7, s1, 0
; GFX8-NEXT: v_mov_b32_e32 v4, s6
; GFX8-NEXT: v_mov_b32_e32 v0, s26
; GFX8-NEXT: v_mov_b32_e32 v2, s13
; GFX8-NEXT: v_mov_b32_e32 v5, s7
; GFX8-NEXT: s_add_u32 s6, s0, 0x70
; GFX8-NEXT: flat_store_dwordx4 v[4:5], v[0:3]
; GFX8-NEXT: s_addc_u32 s7, s1, 0
; GFX8-NEXT: v_mov_b32_e32 v4, s6
; GFX8-NEXT: v_mov_b32_e32 v0, s25
; GFX8-NEXT: v_mov_b32_e32 v2, s14
; GFX8-NEXT: v_mov_b32_e32 v5, s7
; GFX8-NEXT: s_add_u32 s6, s0, 0x60
; GFX8-NEXT: flat_store_dwordx4 v[4:5], v[0:3]
; GFX8-NEXT: s_addc_u32 s7, s1, 0
; GFX8-NEXT: v_mov_b32_e32 v4, s6
; GFX8-NEXT: v_mov_b32_e32 v0, s24
; GFX8-NEXT: v_mov_b32_e32 v2, s15
; GFX8-NEXT: v_mov_b32_e32 v5, s7
; GFX8-NEXT: s_add_u32 s6, s0, 0x50
; GFX8-NEXT: flat_store_dwordx4 v[4:5], v[0:3]
; GFX8-NEXT: s_addc_u32 s7, s1, 0
; GFX8-NEXT: v_mov_b32_e32 v4, s6
; GFX8-NEXT: v_mov_b32_e32 v0, s23
; GFX8-NEXT: v_mov_b32_e32 v2, s16
; GFX8-NEXT: v_mov_b32_e32 v5, s7
; GFX8-NEXT: s_add_u32 s6, s0, 64
; GFX8-NEXT: flat_store_dwordx4 v[4:5], v[0:3]
; GFX8-NEXT: s_addc_u32 s7, s1, 0
; GFX8-NEXT: v_mov_b32_e32 v4, s6
; GFX8-NEXT: v_mov_b32_e32 v0, s22
; GFX8-NEXT: v_mov_b32_e32 v2, s17
; GFX8-NEXT: v_mov_b32_e32 v5, s7
; GFX8-NEXT: s_add_u32 s6, s0, 48
; GFX8-NEXT: flat_store_dwordx4 v[4:5], v[0:3]
; GFX8-NEXT: s_addc_u32 s7, s1, 0
; GFX8-NEXT: v_mov_b32_e32 v4, s6
; GFX8-NEXT: v_mov_b32_e32 v0, s21
; GFX8-NEXT: v_mov_b32_e32 v2, s18
; GFX8-NEXT: v_mov_b32_e32 v5, s7
; GFX8-NEXT: s_add_u32 s6, s0, 32
; GFX8-NEXT: flat_store_dwordx4 v[4:5], v[0:3]
; GFX8-NEXT: s_addc_u32 s7, s1, 0
; GFX8-NEXT: v_mov_b32_e32 v4, s6
; GFX8-NEXT: v_mov_b32_e32 v0, s20
; GFX8-NEXT: v_mov_b32_e32 v2, s19
; GFX8-NEXT: v_mov_b32_e32 v5, s7
; GFX8-NEXT: flat_store_dwordx4 v[4:5], v[0:3]
; GFX8-NEXT: s_nop 0
; GFX8-NEXT: v_mov_b32_e32 v2, s4
; GFX8-NEXT: s_add_u32 s4, s0, 16
; GFX8-NEXT: v_mov_b32_e32 v0, s5
; GFX8-NEXT: s_addc_u32 s5, s1, 0
; GFX8-NEXT: v_mov_b32_e32 v4, s4
; GFX8-NEXT: v_mov_b32_e32 v5, s5
; GFX8-NEXT: flat_store_dwordx4 v[4:5], v[0:3]
; GFX8-NEXT: v_mov_b32_e32 v5, s1
; GFX8-NEXT: v_mov_b32_e32 v0, s3
; GFX8-NEXT: v_mov_b32_e32 v2, s2
; GFX8-NEXT: v_mov_b32_e32 v4, s0
; GFX8-NEXT: flat_store_dwordx4 v[4:5], v[0:3]
; GFX8-NEXT: s_endpgm
;
; EG-LABEL: constant_zextload_v32i1_to_v32i64:
; EG: ; %bb.0:
; EG-NEXT: ALU 0, @24, KC0[CB0:0-32], KC1[]
; EG-NEXT: TEX 0 @22
; EG-NEXT: ALU 96, @25, KC0[CB0:0-32], KC1[]
; EG-NEXT: ALU 30, @122, KC0[CB0:0-32], KC1[]
; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T12.XYZW, T42.X, 0
; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T13.XYZW, T41.X, 0
; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T14.XYZW, T40.X, 0
; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T15.XYZW, T39.X, 0
; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T16.XYZW, T38.X, 0
; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T17.XYZW, T37.X, 0
; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T18.XYZW, T36.X, 0
; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T19.XYZW, T35.X, 0
; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T20.XYZW, T34.X, 0
; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T21.XYZW, T33.X, 0
; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T22.XYZW, T32.X, 0
; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T23.XYZW, T31.X, 0
; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T24.XYZW, T30.X, 0
; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T25.XYZW, T29.X, 0
; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T26.XYZW, T28.X, 0
; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T11.XYZW, T27.X, 1
; EG-NEXT: CF_END
; EG-NEXT: PAD
; EG-NEXT: Fetch clause starting at 22:
; EG-NEXT: VTX_READ_32 T11.X, T11.X, 0, #1
; EG-NEXT: ALU clause starting at 24:
; EG-NEXT: MOV * T11.X, KC0[2].Z,
; EG-NEXT: ALU clause starting at 25:
; EG-NEXT: LSHR * T12.Z, T11.X, literal.x,
; EG-NEXT: 31(4.344025e-44), 0(0.000000e+00)
; EG-NEXT: BFE_UINT T12.X, T11.X, literal.x, 1,
; EG-NEXT: MOV T12.Y, 0.0,
; EG-NEXT: BFE_UINT * T13.Z, T11.X, literal.y, 1,
; EG-NEXT: 30(4.203895e-44), 29(4.063766e-44)
; EG-NEXT: BFE_UINT T13.X, T11.X, literal.x, 1,
; EG-NEXT: MOV T13.Y, 0.0,
; EG-NEXT: BFE_UINT * T14.Z, T11.X, literal.y, 1,
; EG-NEXT: 28(3.923636e-44), 27(3.783506e-44)
; EG-NEXT: BFE_UINT T14.X, T11.X, literal.x, 1,
; EG-NEXT: MOV T14.Y, 0.0,
; EG-NEXT: BFE_UINT * T15.Z, T11.X, literal.y, 1,
; EG-NEXT: 26(3.643376e-44), 25(3.503246e-44)
; EG-NEXT: BFE_UINT T15.X, T11.X, literal.x, 1,
; EG-NEXT: MOV T15.Y, 0.0,
; EG-NEXT: BFE_UINT * T16.Z, T11.X, literal.y, 1,
; EG-NEXT: 24(3.363116e-44), 23(3.222986e-44)
; EG-NEXT: BFE_UINT T16.X, T11.X, literal.x, 1,
; EG-NEXT: MOV T16.Y, 0.0,
; EG-NEXT: BFE_UINT * T17.Z, T11.X, literal.y, 1,
; EG-NEXT: 22(3.082857e-44), 21(2.942727e-44)
; EG-NEXT: BFE_UINT T17.X, T11.X, literal.x, 1,
; EG-NEXT: MOV T17.Y, 0.0,
; EG-NEXT: BFE_UINT * T18.Z, T11.X, literal.y, 1,
; EG-NEXT: 20(2.802597e-44), 19(2.662467e-44)
; EG-NEXT: BFE_UINT T18.X, T11.X, literal.x, 1,
; EG-NEXT: MOV T18.Y, 0.0,
; EG-NEXT: BFE_UINT * T19.Z, T11.X, literal.y, 1,
; EG-NEXT: 18(2.522337e-44), 17(2.382207e-44)
; EG-NEXT: BFE_UINT T19.X, T11.X, literal.x, 1,
; EG-NEXT: MOV T19.Y, 0.0,
; EG-NEXT: BFE_UINT * T20.Z, T11.X, literal.y, 1,
; EG-NEXT: 16(2.242078e-44), 15(2.101948e-44)
; EG-NEXT: BFE_UINT T20.X, T11.X, literal.x, 1,
; EG-NEXT: MOV T20.Y, 0.0,
; EG-NEXT: BFE_UINT * T21.Z, T11.X, literal.y, 1,
; EG-NEXT: 14(1.961818e-44), 13(1.821688e-44)
; EG-NEXT: BFE_UINT T21.X, T11.X, literal.x, 1,
; EG-NEXT: MOV T21.Y, 0.0,
; EG-NEXT: BFE_UINT * T22.Z, T11.X, literal.y, 1,
; EG-NEXT: 12(1.681558e-44), 11(1.541428e-44)
; EG-NEXT: BFE_UINT T22.X, T11.X, literal.x, 1,
; EG-NEXT: MOV T22.Y, 0.0,
; EG-NEXT: BFE_UINT * T23.Z, T11.X, literal.y, 1,
; EG-NEXT: 10(1.401298e-44), 9(1.261169e-44)
; EG-NEXT: BFE_UINT T23.X, T11.X, literal.x, 1,
; EG-NEXT: MOV T23.Y, 0.0,
; EG-NEXT: BFE_UINT * T24.Z, T11.X, literal.y, 1,
; EG-NEXT: 8(1.121039e-44), 7(9.809089e-45)
; EG-NEXT: BFE_UINT T24.X, T11.X, literal.x, 1,
; EG-NEXT: MOV T24.Y, 0.0,
; EG-NEXT: BFE_UINT * T25.Z, T11.X, literal.y, 1,
; EG-NEXT: 6(8.407791e-45), 5(7.006492e-45)
; EG-NEXT: BFE_UINT T25.X, T11.X, literal.x, 1,
; EG-NEXT: MOV T25.Y, 0.0,
; EG-NEXT: BFE_UINT * T26.Z, T11.X, literal.y, 1,
; EG-NEXT: 4(5.605194e-45), 3(4.203895e-45)
; EG-NEXT: BFE_UINT T26.X, T11.X, literal.x, 1,
; EG-NEXT: MOV T26.Y, 0.0,
; EG-NEXT: BFE_UINT T11.Z, T11.X, 1, 1,
; EG-NEXT: AND_INT * T11.X, T11.X, 1,
; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
; EG-NEXT: MOV T11.Y, 0.0,
; EG-NEXT: MOV T12.W, 0.0,
; EG-NEXT: MOV * T13.W, 0.0,
; EG-NEXT: MOV T14.W, 0.0,
; EG-NEXT: MOV * T15.W, 0.0,
; EG-NEXT: MOV T16.W, 0.0,
; EG-NEXT: MOV * T17.W, 0.0,
; EG-NEXT: MOV T18.W, 0.0,
; EG-NEXT: MOV * T19.W, 0.0,
; EG-NEXT: MOV T20.W, 0.0,
; EG-NEXT: MOV * T21.W, 0.0,
; EG-NEXT: MOV T22.W, 0.0,
; EG-NEXT: MOV * T23.W, 0.0,
; EG-NEXT: MOV T24.W, 0.0,
; EG-NEXT: MOV * T25.W, 0.0,
; EG-NEXT: MOV T26.W, 0.0,
; EG-NEXT: MOV * T11.W, 0.0,
; EG-NEXT: LSHR T27.X, KC0[2].Y, literal.x,
; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y,
; EG-NEXT: 2(2.802597e-45), 16(2.242078e-44)
; EG-NEXT: LSHR T28.X, PV.W, literal.x,
; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y,
; EG-NEXT: 2(2.802597e-45), 32(4.484155e-44)
; EG-NEXT: LSHR T29.X, PV.W, literal.x,
; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y,
; EG-NEXT: 2(2.802597e-45), 48(6.726233e-44)
; EG-NEXT: LSHR T30.X, PV.W, literal.x,
; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y,
; EG-NEXT: 2(2.802597e-45), 64(8.968310e-44)
; EG-NEXT: LSHR T31.X, PV.W, literal.x,
; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y,
; EG-NEXT: 2(2.802597e-45), 80(1.121039e-43)
; EG-NEXT: LSHR * T32.X, PV.W, literal.x,
; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
; EG-NEXT: ALU clause starting at 122:
; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.x,
; EG-NEXT: 96(1.345247e-43), 0(0.000000e+00)
; EG-NEXT: LSHR T33.X, PV.W, literal.x,
; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y,
; EG-NEXT: 2(2.802597e-45), 112(1.569454e-43)
; EG-NEXT: LSHR T34.X, PV.W, literal.x,
; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y,
; EG-NEXT: 2(2.802597e-45), 128(1.793662e-43)
; EG-NEXT: LSHR T35.X, PV.W, literal.x,
; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y,
; EG-NEXT: 2(2.802597e-45), 144(2.017870e-43)
; EG-NEXT: LSHR T36.X, PV.W, literal.x,
; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y,
; EG-NEXT: 2(2.802597e-45), 160(2.242078e-43)
; EG-NEXT: LSHR T37.X, PV.W, literal.x,
; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y,
; EG-NEXT: 2(2.802597e-45), 176(2.466285e-43)
; EG-NEXT: LSHR T38.X, PV.W, literal.x,
; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y,
; EG-NEXT: 2(2.802597e-45), 192(2.690493e-43)
; EG-NEXT: LSHR T39.X, PV.W, literal.x,
; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y,
; EG-NEXT: 2(2.802597e-45), 208(2.914701e-43)
; EG-NEXT: LSHR T40.X, PV.W, literal.x,
; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y,
; EG-NEXT: 2(2.802597e-45), 224(3.138909e-43)
; EG-NEXT: LSHR T41.X, PV.W, literal.x,
; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y,
; EG-NEXT: 2(2.802597e-45), 240(3.363116e-43)
; EG-NEXT: LSHR * T42.X, PV.W, literal.x,
; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
;
; GFX12-LABEL: constant_zextload_v32i1_to_v32i64:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
; GFX12-NEXT: s_wait_kmcnt 0x0
; GFX12-NEXT: s_load_b32 s2, s[2:3], 0x0
; GFX12-NEXT: s_wait_kmcnt 0x0
; GFX12-NEXT: s_bfe_u32 s3, s2, 0x1001e
; GFX12-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_2) | instid1(VALU_DEP_1)
; GFX12-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_mov_b32 v0, s3
; GFX12-NEXT: s_lshr_b32 s4, s2, 31
; GFX12-NEXT: s_bfe_u32 s3, s2, 0x1001d
; GFX12-NEXT: v_dual_mov_b32 v2, s4 :: v_dual_mov_b32 v3, v1
; GFX12-NEXT: s_bfe_u32 s4, s2, 0x1001c
; GFX12-NEXT: global_store_b128 v1, v[0:3], s[0:1] offset:240
; GFX12-NEXT: s_wait_alu 0xfffe
; GFX12-NEXT: v_mov_b32_e32 v0, s4
; GFX12-NEXT: v_mov_b32_e32 v2, s3
; GFX12-NEXT: s_bfe_u32 s3, s2, 0x1001b
; GFX12-NEXT: s_bfe_u32 s4, s2, 0x1001a
; GFX12-NEXT: global_store_b128 v1, v[0:3], s[0:1] offset:224
; GFX12-NEXT: s_wait_alu 0xfffe
; GFX12-NEXT: v_mov_b32_e32 v0, s4
; GFX12-NEXT: v_mov_b32_e32 v2, s3
; GFX12-NEXT: s_bfe_u32 s3, s2, 0x10019
; GFX12-NEXT: s_bfe_u32 s4, s2, 0x10018
; GFX12-NEXT: global_store_b128 v1, v[0:3], s[0:1] offset:208
; GFX12-NEXT: s_wait_alu 0xfffe
; GFX12-NEXT: v_mov_b32_e32 v0, s4
; GFX12-NEXT: v_mov_b32_e32 v2, s3
; GFX12-NEXT: s_bfe_u32 s3, s2, 0x10017
; GFX12-NEXT: s_bfe_u32 s4, s2, 0x10016
; GFX12-NEXT: global_store_b128 v1, v[0:3], s[0:1] offset:192
; GFX12-NEXT: s_wait_alu 0xfffe
; GFX12-NEXT: v_mov_b32_e32 v0, s4
; GFX12-NEXT: v_mov_b32_e32 v2, s3
; GFX12-NEXT: s_bfe_u32 s3, s2, 0x10014
; GFX12-NEXT: s_bfe_u32 s4, s2, 0x10015
; GFX12-NEXT: global_store_b128 v1, v[0:3], s[0:1] offset:176
; GFX12-NEXT: s_wait_alu 0xfffe
; GFX12-NEXT: v_mov_b32_e32 v0, s3
; GFX12-NEXT: v_mov_b32_e32 v2, s4
; GFX12-NEXT: s_bfe_u32 s3, s2, 0x10013
; GFX12-NEXT: s_bfe_u32 s4, s2, 0x10012
; GFX12-NEXT: global_store_b128 v1, v[0:3], s[0:1] offset:160
; GFX12-NEXT: s_wait_alu 0xfffe
; GFX12-NEXT: v_mov_b32_e32 v0, s4
; GFX12-NEXT: v_mov_b32_e32 v2, s3
; GFX12-NEXT: s_bfe_u32 s3, s2, 0x10011
; GFX12-NEXT: s_bfe_u32 s4, s2, 0x10010
; GFX12-NEXT: global_store_b128 v1, v[0:3], s[0:1] offset:144
; GFX12-NEXT: s_wait_alu 0xfffe
; GFX12-NEXT: v_mov_b32_e32 v0, s4
; GFX12-NEXT: v_mov_b32_e32 v2, s3
; GFX12-NEXT: s_bfe_u32 s3, s2, 0x1000f
; GFX12-NEXT: s_bfe_u32 s4, s2, 0x1000e
; GFX12-NEXT: global_store_b128 v1, v[0:3], s[0:1] offset:128
; GFX12-NEXT: s_wait_alu 0xfffe
; GFX12-NEXT: v_mov_b32_e32 v0, s4
; GFX12-NEXT: v_mov_b32_e32 v2, s3
; GFX12-NEXT: s_bfe_u32 s3, s2, 0x1000d
; GFX12-NEXT: s_bfe_u32 s4, s2, 0x1000c
; GFX12-NEXT: global_store_b128 v1, v[0:3], s[0:1] offset:112
; GFX12-NEXT: s_wait_alu 0xfffe
; GFX12-NEXT: v_mov_b32_e32 v0, s4
; GFX12-NEXT: v_mov_b32_e32 v2, s3
; GFX12-NEXT: s_bfe_u32 s3, s2, 0x1000b
; GFX12-NEXT: s_bfe_u32 s4, s2, 0x1000a
; GFX12-NEXT: global_store_b128 v1, v[0:3], s[0:1] offset:96
; GFX12-NEXT: s_wait_alu 0xfffe
; GFX12-NEXT: v_mov_b32_e32 v0, s4
; GFX12-NEXT: v_mov_b32_e32 v2, s3
; GFX12-NEXT: s_bfe_u32 s3, s2, 0x10009
; GFX12-NEXT: s_bfe_u32 s4, s2, 0x10008
; GFX12-NEXT: global_store_b128 v1, v[0:3], s[0:1] offset:80
; GFX12-NEXT: s_wait_alu 0xfffe
; GFX12-NEXT: v_mov_b32_e32 v0, s4
; GFX12-NEXT: v_mov_b32_e32 v2, s3
; GFX12-NEXT: s_bfe_u32 s3, s2, 0x10007
; GFX12-NEXT: s_bfe_u32 s4, s2, 0x10006
; GFX12-NEXT: global_store_b128 v1, v[0:3], s[0:1] offset:64
; GFX12-NEXT: s_wait_alu 0xfffe
; GFX12-NEXT: v_mov_b32_e32 v0, s4
; GFX12-NEXT: v_mov_b32_e32 v2, s3
; GFX12-NEXT: s_bfe_u32 s3, s2, 0x10005
; GFX12-NEXT: s_bfe_u32 s4, s2, 0x10004
; GFX12-NEXT: global_store_b128 v1, v[0:3], s[0:1] offset:48
; GFX12-NEXT: s_wait_alu 0xfffe
; GFX12-NEXT: v_mov_b32_e32 v0, s4
; GFX12-NEXT: v_mov_b32_e32 v2, s3
; GFX12-NEXT: s_bfe_u32 s3, s2, 0x10003
; GFX12-NEXT: s_bfe_u32 s4, s2, 0x10002
; GFX12-NEXT: global_store_b128 v1, v[0:3], s[0:1] offset:32
; GFX12-NEXT: s_wait_alu 0xfffe
; GFX12-NEXT: v_mov_b32_e32 v0, s4
; GFX12-NEXT: v_mov_b32_e32 v2, s3
; GFX12-NEXT: s_bfe_u32 s3, s2, 0x10001
; GFX12-NEXT: s_and_b32 s2, s2, 1
; GFX12-NEXT: global_store_b128 v1, v[0:3], s[0:1] offset:16
; GFX12-NEXT: s_wait_alu 0xfffe
; GFX12-NEXT: v_mov_b32_e32 v0, s2
; GFX12-NEXT: v_mov_b32_e32 v2, s3
; GFX12-NEXT: global_store_b128 v1, v[0:3], s[0:1]
; GFX12-NEXT: s_endpgm
%load = load <32 x i1>, ptr addrspace(4) %in
%ext = zext <32 x i1> %load to <32 x i64>
store <32 x i64> %ext, ptr addrspace(1) %out
ret void
}
define amdgpu_kernel void @constant_sextload_v32i1_to_v32i64(ptr addrspace(1) %out, ptr addrspace(4) nocapture %in) #0 {
; GFX6-LABEL: constant_sextload_v32i1_to_v32i64:
; GFX6: ; %bb.0:
; GFX6-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9
; GFX6-NEXT: s_waitcnt lgkmcnt(0)
; GFX6-NEXT: s_load_dword s4, s[2:3], 0x0
; GFX6-NEXT: s_mov_b32 s3, 0xf000
; GFX6-NEXT: s_mov_b32 s2, -1
; GFX6-NEXT: s_waitcnt lgkmcnt(0)
; GFX6-NEXT: s_lshr_b32 s38, s4, 30
; GFX6-NEXT: s_lshr_b32 s40, s4, 31
; GFX6-NEXT: s_lshr_b32 s34, s4, 28
; GFX6-NEXT: s_lshr_b32 s36, s4, 29
; GFX6-NEXT: s_lshr_b32 s28, s4, 26
; GFX6-NEXT: s_lshr_b32 s30, s4, 27
; GFX6-NEXT: s_lshr_b32 s24, s4, 24
; GFX6-NEXT: s_lshr_b32 s26, s4, 25
; GFX6-NEXT: s_lshr_b32 s20, s4, 22
; GFX6-NEXT: s_lshr_b32 s22, s4, 23
; GFX6-NEXT: s_lshr_b32 s18, s4, 20
; GFX6-NEXT: s_lshr_b32 s6, s4, 21
; GFX6-NEXT: s_lshr_b32 s8, s4, 18
; GFX6-NEXT: s_lshr_b32 s10, s4, 19
; GFX6-NEXT: s_lshr_b32 s12, s4, 16
; GFX6-NEXT: s_lshr_b32 s14, s4, 17
; GFX6-NEXT: s_lshr_b32 s16, s4, 14
; GFX6-NEXT: s_bfe_i64 s[44:45], s[4:5], 0x10000
; GFX6-NEXT: s_lshr_b32 s42, s4, 15
; GFX6-NEXT: v_mov_b32_e32 v0, s44
; GFX6-NEXT: v_mov_b32_e32 v1, s45
; GFX6-NEXT: s_lshr_b32 s44, s4, 12
; GFX6-NEXT: s_bfe_i64 s[38:39], s[38:39], 0x10000
; GFX6-NEXT: s_bfe_i64 s[40:41], s[40:41], 0x10000
; GFX6-NEXT: v_mov_b32_e32 v2, s38
; GFX6-NEXT: v_mov_b32_e32 v3, s39
; GFX6-NEXT: s_lshr_b32 s38, s4, 13
; GFX6-NEXT: v_mov_b32_e32 v4, s40
; GFX6-NEXT: v_mov_b32_e32 v5, s41
; GFX6-NEXT: s_lshr_b32 s40, s4, 10
; GFX6-NEXT: s_bfe_i64 s[34:35], s[34:35], 0x10000
; GFX6-NEXT: s_bfe_i64 s[36:37], s[36:37], 0x10000
; GFX6-NEXT: v_mov_b32_e32 v6, s34
; GFX6-NEXT: v_mov_b32_e32 v7, s35
; GFX6-NEXT: s_lshr_b32 s34, s4, 11
; GFX6-NEXT: v_mov_b32_e32 v8, s36
; GFX6-NEXT: v_mov_b32_e32 v9, s37
; GFX6-NEXT: s_lshr_b32 s36, s4, 8
; GFX6-NEXT: s_bfe_i64 s[28:29], s[28:29], 0x10000
; GFX6-NEXT: s_bfe_i64 s[30:31], s[30:31], 0x10000
; GFX6-NEXT: v_mov_b32_e32 v10, s28
; GFX6-NEXT: v_mov_b32_e32 v11, s29
; GFX6-NEXT: s_lshr_b32 s28, s4, 9
; GFX6-NEXT: v_mov_b32_e32 v12, s30
; GFX6-NEXT: v_mov_b32_e32 v13, s31
; GFX6-NEXT: s_lshr_b32 s30, s4, 6
; GFX6-NEXT: s_bfe_i64 s[24:25], s[24:25], 0x10000
; GFX6-NEXT: s_bfe_i64 s[26:27], s[26:27], 0x10000
; GFX6-NEXT: v_mov_b32_e32 v14, s24
; GFX6-NEXT: v_mov_b32_e32 v15, s25
; GFX6-NEXT: s_lshr_b32 s24, s4, 7
; GFX6-NEXT: v_mov_b32_e32 v16, s26
; GFX6-NEXT: v_mov_b32_e32 v17, s27
; GFX6-NEXT: s_lshr_b32 s26, s4, 4
; GFX6-NEXT: s_bfe_i64 s[22:23], s[22:23], 0x10000
; GFX6-NEXT: s_bfe_i64 s[20:21], s[20:21], 0x10000
; GFX6-NEXT: buffer_store_dwordx4 v[2:5], off, s[0:3], 0 offset:240
; GFX6-NEXT: s_waitcnt expcnt(0)
; GFX6-NEXT: v_mov_b32_e32 v2, s20
; GFX6-NEXT: v_mov_b32_e32 v3, s21
; GFX6-NEXT: s_lshr_b32 s20, s4, 5
; GFX6-NEXT: v_mov_b32_e32 v4, s22
; GFX6-NEXT: v_mov_b32_e32 v5, s23
; GFX6-NEXT: s_lshr_b32 s22, s4, 2
; GFX6-NEXT: s_bfe_i64 s[18:19], s[18:19], 0x10000
; GFX6-NEXT: buffer_store_dwordx4 v[6:9], off, s[0:3], 0 offset:224
; GFX6-NEXT: s_waitcnt expcnt(0)
; GFX6-NEXT: v_mov_b32_e32 v6, s18
; GFX6-NEXT: v_mov_b32_e32 v7, s19
; GFX6-NEXT: s_lshr_b32 s18, s4, 3
; GFX6-NEXT: s_lshr_b32 s4, s4, 1
; GFX6-NEXT: s_bfe_i64 s[4:5], s[4:5], 0x10000
; GFX6-NEXT: s_bfe_i64 s[18:19], s[18:19], 0x10000
; GFX6-NEXT: s_bfe_i64 s[22:23], s[22:23], 0x10000
; GFX6-NEXT: s_bfe_i64 s[20:21], s[20:21], 0x10000
; GFX6-NEXT: s_bfe_i64 s[26:27], s[26:27], 0x10000
; GFX6-NEXT: s_bfe_i64 s[24:25], s[24:25], 0x10000
; GFX6-NEXT: s_bfe_i64 s[30:31], s[30:31], 0x10000
; GFX6-NEXT: s_bfe_i64 s[28:29], s[28:29], 0x10000
; GFX6-NEXT: s_bfe_i64 s[36:37], s[36:37], 0x10000
; GFX6-NEXT: s_bfe_i64 s[34:35], s[34:35], 0x10000
; GFX6-NEXT: s_bfe_i64 s[40:41], s[40:41], 0x10000
; GFX6-NEXT: s_bfe_i64 s[38:39], s[38:39], 0x10000
; GFX6-NEXT: s_bfe_i64 s[44:45], s[44:45], 0x10000
; GFX6-NEXT: s_bfe_i64 s[42:43], s[42:43], 0x10000
; GFX6-NEXT: s_bfe_i64 s[16:17], s[16:17], 0x10000
; GFX6-NEXT: s_bfe_i64 s[14:15], s[14:15], 0x10000
; GFX6-NEXT: s_bfe_i64 s[12:13], s[12:13], 0x10000
; GFX6-NEXT: s_bfe_i64 s[10:11], s[10:11], 0x10000
; GFX6-NEXT: s_bfe_i64 s[8:9], s[8:9], 0x10000
; GFX6-NEXT: s_bfe_i64 s[6:7], s[6:7], 0x10000
; GFX6-NEXT: buffer_store_dwordx4 v[10:13], off, s[0:3], 0 offset:208
; GFX6-NEXT: buffer_store_dwordx4 v[14:17], off, s[0:3], 0 offset:192
; GFX6-NEXT: buffer_store_dwordx4 v[2:5], off, s[0:3], 0 offset:176
; GFX6-NEXT: v_mov_b32_e32 v8, s6
; GFX6-NEXT: v_mov_b32_e32 v9, s7
; GFX6-NEXT: buffer_store_dwordx4 v[6:9], off, s[0:3], 0 offset:160
; GFX6-NEXT: s_waitcnt expcnt(1)
; GFX6-NEXT: v_mov_b32_e32 v2, s8
; GFX6-NEXT: v_mov_b32_e32 v3, s9
; GFX6-NEXT: v_mov_b32_e32 v4, s10
; GFX6-NEXT: v_mov_b32_e32 v5, s11
; GFX6-NEXT: buffer_store_dwordx4 v[2:5], off, s[0:3], 0 offset:144
; GFX6-NEXT: s_waitcnt expcnt(0)
; GFX6-NEXT: v_mov_b32_e32 v2, s12
; GFX6-NEXT: v_mov_b32_e32 v3, s13
; GFX6-NEXT: v_mov_b32_e32 v4, s14
; GFX6-NEXT: v_mov_b32_e32 v5, s15
; GFX6-NEXT: buffer_store_dwordx4 v[2:5], off, s[0:3], 0 offset:128
; GFX6-NEXT: s_waitcnt expcnt(0)
; GFX6-NEXT: v_mov_b32_e32 v2, s16
; GFX6-NEXT: v_mov_b32_e32 v3, s17
; GFX6-NEXT: v_mov_b32_e32 v4, s42
; GFX6-NEXT: v_mov_b32_e32 v5, s43
; GFX6-NEXT: buffer_store_dwordx4 v[2:5], off, s[0:3], 0 offset:112
; GFX6-NEXT: s_waitcnt expcnt(0)
; GFX6-NEXT: v_mov_b32_e32 v2, s44
; GFX6-NEXT: v_mov_b32_e32 v3, s45
; GFX6-NEXT: v_mov_b32_e32 v4, s38
; GFX6-NEXT: v_mov_b32_e32 v5, s39
; GFX6-NEXT: buffer_store_dwordx4 v[2:5], off, s[0:3], 0 offset:96
; GFX6-NEXT: s_waitcnt expcnt(0)
; GFX6-NEXT: v_mov_b32_e32 v2, s40
; GFX6-NEXT: v_mov_b32_e32 v3, s41
; GFX6-NEXT: v_mov_b32_e32 v4, s34
; GFX6-NEXT: v_mov_b32_e32 v5, s35
; GFX6-NEXT: buffer_store_dwordx4 v[2:5], off, s[0:3], 0 offset:80
; GFX6-NEXT: s_waitcnt expcnt(0)
; GFX6-NEXT: v_mov_b32_e32 v2, s36
; GFX6-NEXT: v_mov_b32_e32 v3, s37
; GFX6-NEXT: v_mov_b32_e32 v4, s28
; GFX6-NEXT: v_mov_b32_e32 v5, s29
; GFX6-NEXT: buffer_store_dwordx4 v[2:5], off, s[0:3], 0 offset:64
; GFX6-NEXT: s_waitcnt expcnt(0)
; GFX6-NEXT: v_mov_b32_e32 v2, s30
; GFX6-NEXT: v_mov_b32_e32 v3, s31
; GFX6-NEXT: v_mov_b32_e32 v4, s24
; GFX6-NEXT: v_mov_b32_e32 v5, s25
; GFX6-NEXT: buffer_store_dwordx4 v[2:5], off, s[0:3], 0 offset:48
; GFX6-NEXT: s_waitcnt expcnt(0)
; GFX6-NEXT: v_mov_b32_e32 v2, s26
; GFX6-NEXT: v_mov_b32_e32 v3, s27
; GFX6-NEXT: v_mov_b32_e32 v4, s20
; GFX6-NEXT: v_mov_b32_e32 v5, s21
; GFX6-NEXT: buffer_store_dwordx4 v[2:5], off, s[0:3], 0 offset:32
; GFX6-NEXT: s_waitcnt expcnt(0)
; GFX6-NEXT: v_mov_b32_e32 v2, s22
; GFX6-NEXT: v_mov_b32_e32 v3, s23
; GFX6-NEXT: v_mov_b32_e32 v4, s18
; GFX6-NEXT: v_mov_b32_e32 v5, s19
; GFX6-NEXT: buffer_store_dwordx4 v[2:5], off, s[0:3], 0 offset:16
; GFX6-NEXT: s_waitcnt expcnt(0)
; GFX6-NEXT: v_mov_b32_e32 v2, s4
; GFX6-NEXT: v_mov_b32_e32 v3, s5
; GFX6-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0
; GFX6-NEXT: s_endpgm
;
; GFX8-LABEL: constant_sextload_v32i1_to_v32i64:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
; GFX8-NEXT: s_waitcnt lgkmcnt(0)
; GFX8-NEXT: s_load_dword s2, s[2:3], 0x0
; GFX8-NEXT: s_waitcnt lgkmcnt(0)
; GFX8-NEXT: s_lshr_b32 s44, s2, 30
; GFX8-NEXT: s_lshr_b32 s46, s2, 31
; GFX8-NEXT: s_lshr_b32 s48, s2, 28
; GFX8-NEXT: s_lshr_b32 s50, s2, 29
; GFX8-NEXT: s_lshr_b32 s52, s2, 26
; GFX8-NEXT: s_lshr_b32 s54, s2, 27
; GFX8-NEXT: s_lshr_b32 s56, s2, 24
; GFX8-NEXT: s_lshr_b32 s58, s2, 25
; GFX8-NEXT: s_lshr_b32 s60, s2, 22
; GFX8-NEXT: s_lshr_b32 s62, s2, 23
; GFX8-NEXT: s_lshr_b32 s64, s2, 20
; GFX8-NEXT: s_lshr_b32 s66, s2, 21
; GFX8-NEXT: s_lshr_b32 s42, s2, 18
; GFX8-NEXT: s_lshr_b32 s40, s2, 19
; GFX8-NEXT: s_lshr_b32 s38, s2, 16
; GFX8-NEXT: s_lshr_b32 s36, s2, 17
; GFX8-NEXT: s_lshr_b32 s34, s2, 14
; GFX8-NEXT: s_lshr_b32 s30, s2, 15
; GFX8-NEXT: s_lshr_b32 s28, s2, 12
; GFX8-NEXT: s_lshr_b32 s26, s2, 13
; GFX8-NEXT: s_lshr_b32 s24, s2, 10
; GFX8-NEXT: s_lshr_b32 s22, s2, 11
; GFX8-NEXT: s_lshr_b32 s20, s2, 8
; GFX8-NEXT: s_lshr_b32 s18, s2, 9
; GFX8-NEXT: s_lshr_b32 s16, s2, 6
; GFX8-NEXT: s_lshr_b32 s14, s2, 7
; GFX8-NEXT: s_lshr_b32 s12, s2, 4
; GFX8-NEXT: s_lshr_b32 s10, s2, 5
; GFX8-NEXT: s_lshr_b32 s8, s2, 2
; GFX8-NEXT: s_lshr_b32 s6, s2, 3
; GFX8-NEXT: s_lshr_b32 s68, s2, 1
; GFX8-NEXT: s_bfe_i64 s[4:5], s[2:3], 0x10000
; GFX8-NEXT: s_bfe_i64 s[2:3], s[68:69], 0x10000
; GFX8-NEXT: s_bfe_i64 s[6:7], s[6:7], 0x10000
; GFX8-NEXT: s_bfe_i64 s[8:9], s[8:9], 0x10000
; GFX8-NEXT: s_bfe_i64 s[10:11], s[10:11], 0x10000
; GFX8-NEXT: s_bfe_i64 s[12:13], s[12:13], 0x10000
; GFX8-NEXT: s_bfe_i64 s[14:15], s[14:15], 0x10000
; GFX8-NEXT: s_bfe_i64 s[16:17], s[16:17], 0x10000
; GFX8-NEXT: s_bfe_i64 s[18:19], s[18:19], 0x10000
; GFX8-NEXT: s_bfe_i64 s[20:21], s[20:21], 0x10000
; GFX8-NEXT: s_bfe_i64 s[22:23], s[22:23], 0x10000
; GFX8-NEXT: s_bfe_i64 s[24:25], s[24:25], 0x10000
; GFX8-NEXT: s_bfe_i64 s[26:27], s[26:27], 0x10000
; GFX8-NEXT: s_bfe_i64 s[28:29], s[28:29], 0x10000
; GFX8-NEXT: s_bfe_i64 s[30:31], s[30:31], 0x10000
; GFX8-NEXT: s_bfe_i64 s[34:35], s[34:35], 0x10000
; GFX8-NEXT: s_bfe_i64 s[36:37], s[36:37], 0x10000
; GFX8-NEXT: s_bfe_i64 s[38:39], s[38:39], 0x10000
; GFX8-NEXT: s_bfe_i64 s[40:41], s[40:41], 0x10000
; GFX8-NEXT: s_bfe_i64 s[42:43], s[42:43], 0x10000
; GFX8-NEXT: s_bfe_i64 s[66:67], s[66:67], 0x10000
; GFX8-NEXT: s_bfe_i64 s[64:65], s[64:65], 0x10000
; GFX8-NEXT: s_bfe_i64 s[62:63], s[62:63], 0x10000
; GFX8-NEXT: s_bfe_i64 s[60:61], s[60:61], 0x10000
; GFX8-NEXT: s_bfe_i64 s[58:59], s[58:59], 0x10000
; GFX8-NEXT: s_bfe_i64 s[56:57], s[56:57], 0x10000
; GFX8-NEXT: s_bfe_i64 s[54:55], s[54:55], 0x10000
; GFX8-NEXT: s_bfe_i64 s[52:53], s[52:53], 0x10000
; GFX8-NEXT: s_bfe_i64 s[50:51], s[50:51], 0x10000
; GFX8-NEXT: s_bfe_i64 s[48:49], s[48:49], 0x10000
; GFX8-NEXT: s_bfe_i64 s[46:47], s[46:47], 0x10000
; GFX8-NEXT: s_bfe_i64 s[44:45], s[44:45], 0x10000
; GFX8-NEXT: v_mov_b32_e32 v0, s44
; GFX8-NEXT: s_add_u32 s44, s0, 0xf0
; GFX8-NEXT: v_mov_b32_e32 v1, s45
; GFX8-NEXT: s_addc_u32 s45, s1, 0
; GFX8-NEXT: v_mov_b32_e32 v4, s44
; GFX8-NEXT: v_mov_b32_e32 v2, s46
; GFX8-NEXT: v_mov_b32_e32 v3, s47
; GFX8-NEXT: v_mov_b32_e32 v5, s45
; GFX8-NEXT: s_add_u32 s44, s0, 0xe0
; GFX8-NEXT: flat_store_dwordx4 v[4:5], v[0:3]
; GFX8-NEXT: s_addc_u32 s45, s1, 0
; GFX8-NEXT: v_mov_b32_e32 v4, s44
; GFX8-NEXT: v_mov_b32_e32 v0, s48
; GFX8-NEXT: v_mov_b32_e32 v1, s49
; GFX8-NEXT: v_mov_b32_e32 v2, s50
; GFX8-NEXT: v_mov_b32_e32 v3, s51
; GFX8-NEXT: v_mov_b32_e32 v5, s45
; GFX8-NEXT: s_add_u32 s44, s0, 0xd0
; GFX8-NEXT: flat_store_dwordx4 v[4:5], v[0:3]
; GFX8-NEXT: s_addc_u32 s45, s1, 0
; GFX8-NEXT: v_mov_b32_e32 v4, s44
; GFX8-NEXT: v_mov_b32_e32 v0, s52
; GFX8-NEXT: v_mov_b32_e32 v1, s53
; GFX8-NEXT: v_mov_b32_e32 v2, s54
; GFX8-NEXT: v_mov_b32_e32 v3, s55
; GFX8-NEXT: v_mov_b32_e32 v5, s45
; GFX8-NEXT: s_add_u32 s44, s0, 0xc0
; GFX8-NEXT: flat_store_dwordx4 v[4:5], v[0:3]
; GFX8-NEXT: s_addc_u32 s45, s1, 0
; GFX8-NEXT: v_mov_b32_e32 v4, s44
; GFX8-NEXT: v_mov_b32_e32 v0, s56
; GFX8-NEXT: v_mov_b32_e32 v1, s57
; GFX8-NEXT: v_mov_b32_e32 v2, s58
; GFX8-NEXT: v_mov_b32_e32 v3, s59
; GFX8-NEXT: v_mov_b32_e32 v5, s45
; GFX8-NEXT: s_add_u32 s44, s0, 0xb0
; GFX8-NEXT: flat_store_dwordx4 v[4:5], v[0:3]
; GFX8-NEXT: s_addc_u32 s45, s1, 0
; GFX8-NEXT: v_mov_b32_e32 v4, s44
; GFX8-NEXT: v_mov_b32_e32 v0, s60
; GFX8-NEXT: v_mov_b32_e32 v1, s61
; GFX8-NEXT: v_mov_b32_e32 v2, s62
; GFX8-NEXT: v_mov_b32_e32 v3, s63
; GFX8-NEXT: v_mov_b32_e32 v5, s45
; GFX8-NEXT: s_add_u32 s44, s0, 0xa0
; GFX8-NEXT: flat_store_dwordx4 v[4:5], v[0:3]
; GFX8-NEXT: s_addc_u32 s45, s1, 0
; GFX8-NEXT: v_mov_b32_e32 v4, s44
; GFX8-NEXT: v_mov_b32_e32 v0, s64
; GFX8-NEXT: v_mov_b32_e32 v1, s65
; GFX8-NEXT: v_mov_b32_e32 v2, s66
; GFX8-NEXT: v_mov_b32_e32 v3, s67
; GFX8-NEXT: v_mov_b32_e32 v5, s45
; GFX8-NEXT: flat_store_dwordx4 v[4:5], v[0:3]
; GFX8-NEXT: s_nop 0
; GFX8-NEXT: v_mov_b32_e32 v2, s40
; GFX8-NEXT: s_add_u32 s40, s0, 0x90
; GFX8-NEXT: v_mov_b32_e32 v3, s41
; GFX8-NEXT: s_addc_u32 s41, s1, 0
; GFX8-NEXT: v_mov_b32_e32 v4, s40
; GFX8-NEXT: v_mov_b32_e32 v0, s42
; GFX8-NEXT: v_mov_b32_e32 v1, s43
; GFX8-NEXT: v_mov_b32_e32 v5, s41
; GFX8-NEXT: flat_store_dwordx4 v[4:5], v[0:3]
; GFX8-NEXT: s_nop 0
; GFX8-NEXT: v_mov_b32_e32 v2, s36
; GFX8-NEXT: s_add_u32 s36, s0, 0x80
; GFX8-NEXT: v_mov_b32_e32 v3, s37
; GFX8-NEXT: s_addc_u32 s37, s1, 0
; GFX8-NEXT: v_mov_b32_e32 v4, s36
; GFX8-NEXT: v_mov_b32_e32 v0, s38
; GFX8-NEXT: v_mov_b32_e32 v1, s39
; GFX8-NEXT: v_mov_b32_e32 v5, s37
; GFX8-NEXT: flat_store_dwordx4 v[4:5], v[0:3]
; GFX8-NEXT: s_nop 0
; GFX8-NEXT: v_mov_b32_e32 v2, s30
; GFX8-NEXT: s_add_u32 s30, s0, 0x70
; GFX8-NEXT: v_mov_b32_e32 v3, s31
; GFX8-NEXT: s_addc_u32 s31, s1, 0
; GFX8-NEXT: v_mov_b32_e32 v4, s30
; GFX8-NEXT: v_mov_b32_e32 v0, s34
; GFX8-NEXT: v_mov_b32_e32 v1, s35
; GFX8-NEXT: v_mov_b32_e32 v5, s31
; GFX8-NEXT: flat_store_dwordx4 v[4:5], v[0:3]
; GFX8-NEXT: s_nop 0
; GFX8-NEXT: v_mov_b32_e32 v2, s26
; GFX8-NEXT: s_add_u32 s26, s0, 0x60
; GFX8-NEXT: v_mov_b32_e32 v3, s27
; GFX8-NEXT: s_addc_u32 s27, s1, 0
; GFX8-NEXT: v_mov_b32_e32 v4, s26
; GFX8-NEXT: v_mov_b32_e32 v0, s28
; GFX8-NEXT: v_mov_b32_e32 v1, s29
; GFX8-NEXT: v_mov_b32_e32 v5, s27
; GFX8-NEXT: flat_store_dwordx4 v[4:5], v[0:3]
; GFX8-NEXT: s_nop 0
; GFX8-NEXT: v_mov_b32_e32 v2, s22
; GFX8-NEXT: s_add_u32 s22, s0, 0x50
; GFX8-NEXT: v_mov_b32_e32 v3, s23
; GFX8-NEXT: s_addc_u32 s23, s1, 0
; GFX8-NEXT: v_mov_b32_e32 v4, s22
; GFX8-NEXT: v_mov_b32_e32 v0, s24
; GFX8-NEXT: v_mov_b32_e32 v1, s25
; GFX8-NEXT: v_mov_b32_e32 v5, s23
; GFX8-NEXT: flat_store_dwordx4 v[4:5], v[0:3]
; GFX8-NEXT: s_nop 0
; GFX8-NEXT: v_mov_b32_e32 v2, s18
; GFX8-NEXT: s_add_u32 s18, s0, 64
; GFX8-NEXT: v_mov_b32_e32 v3, s19
; GFX8-NEXT: s_addc_u32 s19, s1, 0
; GFX8-NEXT: v_mov_b32_e32 v4, s18
; GFX8-NEXT: v_mov_b32_e32 v0, s20
; GFX8-NEXT: v_mov_b32_e32 v1, s21
; GFX8-NEXT: v_mov_b32_e32 v5, s19
; GFX8-NEXT: flat_store_dwordx4 v[4:5], v[0:3]
; GFX8-NEXT: s_nop 0
; GFX8-NEXT: v_mov_b32_e32 v2, s14
; GFX8-NEXT: s_add_u32 s14, s0, 48
; GFX8-NEXT: v_mov_b32_e32 v3, s15
; GFX8-NEXT: s_addc_u32 s15, s1, 0
; GFX8-NEXT: v_mov_b32_e32 v4, s14
; GFX8-NEXT: v_mov_b32_e32 v0, s16
; GFX8-NEXT: v_mov_b32_e32 v1, s17
; GFX8-NEXT: v_mov_b32_e32 v5, s15
; GFX8-NEXT: flat_store_dwordx4 v[4:5], v[0:3]
; GFX8-NEXT: s_nop 0
; GFX8-NEXT: v_mov_b32_e32 v2, s10
; GFX8-NEXT: s_add_u32 s10, s0, 32
; GFX8-NEXT: v_mov_b32_e32 v3, s11
; GFX8-NEXT: s_addc_u32 s11, s1, 0
; GFX8-NEXT: v_mov_b32_e32 v4, s10
; GFX8-NEXT: v_mov_b32_e32 v0, s12
; GFX8-NEXT: v_mov_b32_e32 v1, s13
; GFX8-NEXT: v_mov_b32_e32 v5, s11
; GFX8-NEXT: flat_store_dwordx4 v[4:5], v[0:3]
; GFX8-NEXT: s_nop 0
; GFX8-NEXT: v_mov_b32_e32 v2, s6
; GFX8-NEXT: s_add_u32 s6, s0, 16
; GFX8-NEXT: v_mov_b32_e32 v3, s7
; GFX8-NEXT: s_addc_u32 s7, s1, 0
; GFX8-NEXT: v_mov_b32_e32 v4, s6
; GFX8-NEXT: v_mov_b32_e32 v0, s8
; GFX8-NEXT: v_mov_b32_e32 v1, s9
; GFX8-NEXT: v_mov_b32_e32 v5, s7
; GFX8-NEXT: flat_store_dwordx4 v[4:5], v[0:3]
; GFX8-NEXT: v_mov_b32_e32 v5, s1
; GFX8-NEXT: v_mov_b32_e32 v0, s4
; GFX8-NEXT: v_mov_b32_e32 v1, s5
; GFX8-NEXT: v_mov_b32_e32 v2, s2
; GFX8-NEXT: v_mov_b32_e32 v3, s3
; GFX8-NEXT: v_mov_b32_e32 v4, s0
; GFX8-NEXT: flat_store_dwordx4 v[4:5], v[0:3]
; GFX8-NEXT: s_endpgm
;
; EG-LABEL: constant_sextload_v32i1_to_v32i64:
; EG: ; %bb.0:
; EG-NEXT: ALU 0, @24, KC0[CB0:0-32], KC1[]
; EG-NEXT: TEX 0 @22
; EG-NEXT: ALU 92, @25, KC0[CB0:0-32], KC1[]
; EG-NEXT: ALU 65, @118, KC0[CB0:0-32], KC1[]
; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T26.XYZW, T42.X, 0
; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T40.XYZW, T41.X, 0
; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T27.XYZW, T34.X, 0
; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T39.XYZW, T24.X, 0
; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T28.XYZW, T23.X, 0
; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T38.XYZW, T22.X, 0
; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T29.XYZW, T21.X, 0
; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T37.XYZW, T20.X, 0
; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T30.XYZW, T19.X, 0
; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T36.XYZW, T18.X, 0
; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T31.XYZW, T17.X, 0
; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T11.XYZW, T16.X, 0
; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T32.XYZW, T15.X, 0
; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T35.XYZW, T14.X, 0
; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T33.XYZW, T13.X, 0
; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T25.XYZW, T12.X, 1
; EG-NEXT: CF_END
; EG-NEXT: PAD
; EG-NEXT: Fetch clause starting at 22:
; EG-NEXT: VTX_READ_32 T11.X, T11.X, 0, #1
; EG-NEXT: ALU clause starting at 24:
; EG-NEXT: MOV * T11.X, KC0[2].Z,
; EG-NEXT: ALU clause starting at 25:
; EG-NEXT: LSHR T12.X, KC0[2].Y, literal.x,
; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y,
; EG-NEXT: 2(2.802597e-45), 16(2.242078e-44)
; EG-NEXT: LSHR T13.X, PV.W, literal.x,
; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y,
; EG-NEXT: 2(2.802597e-45), 32(4.484155e-44)
; EG-NEXT: LSHR T14.X, PV.W, literal.x,
; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y,
; EG-NEXT: 2(2.802597e-45), 48(6.726233e-44)
; EG-NEXT: LSHR T15.X, PV.W, literal.x,
; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y,
; EG-NEXT: 2(2.802597e-45), 64(8.968310e-44)
; EG-NEXT: LSHR T16.X, PV.W, literal.x,
; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y,
; EG-NEXT: 2(2.802597e-45), 80(1.121039e-43)
; EG-NEXT: LSHR T17.X, PV.W, literal.x,
; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y,
; EG-NEXT: 2(2.802597e-45), 96(1.345247e-43)
; EG-NEXT: LSHR T18.X, PV.W, literal.x,
; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y,
; EG-NEXT: 2(2.802597e-45), 112(1.569454e-43)
; EG-NEXT: LSHR T19.X, PV.W, literal.x,
; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y,
; EG-NEXT: 2(2.802597e-45), 128(1.793662e-43)
; EG-NEXT: LSHR T20.X, PV.W, literal.x,
; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y,
; EG-NEXT: 2(2.802597e-45), 144(2.017870e-43)
; EG-NEXT: LSHR T21.X, PV.W, literal.x,
; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y,
; EG-NEXT: 2(2.802597e-45), 160(2.242078e-43)
; EG-NEXT: LSHR T22.X, PV.W, literal.x,
; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y,
; EG-NEXT: 2(2.802597e-45), 176(2.466285e-43)
; EG-NEXT: LSHR T23.X, PV.W, literal.x,
; EG-NEXT: LSHR T0.Y, T11.X, literal.y,
; EG-NEXT: LSHR T0.Z, T11.X, literal.z,
; EG-NEXT: LSHR * T0.W, T11.X, literal.w,
; EG-NEXT: 2(2.802597e-45), 28(3.923636e-44)
; EG-NEXT: 29(4.063766e-44), 24(3.363116e-44)
; EG-NEXT: ADD_INT * T1.W, KC0[2].Y, literal.x,
; EG-NEXT: 192(2.690493e-43), 0(0.000000e+00)
; EG-NEXT: LSHR T24.X, PV.W, literal.x,
; EG-NEXT: LSHR T1.Y, T11.X, literal.y,
; EG-NEXT: LSHR T1.Z, T11.X, literal.z,
; EG-NEXT: LSHR * T1.W, T11.X, literal.w,
; EG-NEXT: 2(2.802597e-45), 25(3.503246e-44)
; EG-NEXT: 20(2.802597e-44), 21(2.942727e-44)
; EG-NEXT: LSHR * T2.W, T11.X, literal.x,
; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00)
; EG-NEXT: BFE_INT T25.X, T11.X, 0.0, 1,
; EG-NEXT: LSHR T2.Y, T11.X, literal.x,
; EG-NEXT: ASHR T26.Z, T11.X, literal.y,
; EG-NEXT: LSHR T3.W, T11.X, literal.z,
; EG-NEXT: LSHR * T4.W, T11.X, literal.w,
; EG-NEXT: 17(2.382207e-44), 31(4.344025e-44)
; EG-NEXT: 27(3.783506e-44), 30(4.203895e-44)
; EG-NEXT: BFE_INT T26.X, PS, 0.0, 1,
; EG-NEXT: LSHR T3.Y, T11.X, literal.x,
; EG-NEXT: BFE_INT T27.Z, PV.W, 0.0, 1,
; EG-NEXT: LSHR T3.W, T11.X, literal.y,
; EG-NEXT: LSHR * T4.W, T11.X, literal.z,
; EG-NEXT: 12(1.681558e-44), 23(3.222986e-44)
; EG-NEXT: 26(3.643376e-44), 0(0.000000e+00)
; EG-NEXT: BFE_INT T27.X, PS, 0.0, 1,
; EG-NEXT: MOV T26.Y, PV.X,
; EG-NEXT: BFE_INT T28.Z, PV.W, 0.0, 1,
; EG-NEXT: LSHR T3.W, T11.X, literal.x,
; EG-NEXT: LSHR * T4.W, T11.X, literal.y,
; EG-NEXT: 19(2.662467e-44), 22(3.082857e-44)
; EG-NEXT: BFE_INT T28.X, PS, 0.0, 1,
; EG-NEXT: MOV T27.Y, PV.X,
; EG-NEXT: BFE_INT T29.Z, PV.W, 0.0, 1,
; EG-NEXT: LSHR T3.W, T11.X, literal.x,
; EG-NEXT: LSHR * T4.W, T11.X, literal.y,
; EG-NEXT: 15(2.101948e-44), 18(2.522337e-44)
; EG-NEXT: BFE_INT T29.X, PS, 0.0, 1,
; EG-NEXT: MOV T28.Y, PV.X,
; EG-NEXT: BFE_INT T30.Z, PV.W, 0.0, 1,
; EG-NEXT: LSHR T3.W, T11.X, literal.x,
; EG-NEXT: LSHR * T4.W, T11.X, literal.y,
; EG-NEXT: 11(1.541428e-44), 14(1.961818e-44)
; EG-NEXT: BFE_INT T30.X, PS, 0.0, 1,
; EG-NEXT: MOV T29.Y, PV.X,
; EG-NEXT: BFE_INT T31.Z, PV.W, 0.0, 1,
; EG-NEXT: LSHR T3.W, T11.X, literal.x,
; EG-NEXT: LSHR * T4.W, T11.X, literal.y,
; EG-NEXT: 7(9.809089e-45), 10(1.401298e-44)
; EG-NEXT: BFE_INT T31.X, PS, 0.0, 1,
; EG-NEXT: MOV T30.Y, PV.X,
; EG-NEXT: BFE_INT T32.Z, PV.W, 0.0, 1,
; EG-NEXT: LSHR T3.W, T11.X, literal.x,
; EG-NEXT: LSHR * T4.W, T11.X, literal.y,
; EG-NEXT: 3(4.203895e-45), 6(8.407791e-45)
; EG-NEXT: ALU clause starting at 118:
; EG-NEXT: BFE_INT T32.X, T4.W, 0.0, 1,
; EG-NEXT: MOV T31.Y, T31.X,
; EG-NEXT: BFE_INT T33.Z, T3.W, 0.0, 1, BS:VEC_120/SCL_212
; EG-NEXT: LSHR T3.W, T11.X, 1, BS:VEC_120/SCL_212
; EG-NEXT: LSHR * T4.W, T11.X, literal.x,
; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
; EG-NEXT: BFE_INT T33.X, PS, 0.0, 1,
; EG-NEXT: MOV T32.Y, PV.X,
; EG-NEXT: BFE_INT T25.Z, PV.W, 0.0, 1,
; EG-NEXT: LSHR T3.W, T11.X, literal.x,
; EG-NEXT: ADD_INT * T4.W, KC0[2].Y, literal.y,
; EG-NEXT: 5(7.006492e-45), 208(2.914701e-43)
; EG-NEXT: LSHR T34.X, PS, literal.x,
; EG-NEXT: MOV T33.Y, PV.X,
; EG-NEXT: BFE_INT T35.Z, PV.W, 0.0, 1,
; EG-NEXT: LSHR T3.W, T11.X, literal.y,
; EG-NEXT: LSHR * T4.W, T11.X, literal.z,
; EG-NEXT: 2(2.802597e-45), 9(1.261169e-44)
; EG-NEXT: 4(5.605194e-45), 0(0.000000e+00)
; EG-NEXT: BFE_INT T35.X, PS, 0.0, 1,
; EG-NEXT: MOV T25.Y, T25.X,
; EG-NEXT: BFE_INT T11.Z, PV.W, 0.0, 1,
; EG-NEXT: LSHR T3.W, T11.X, literal.x, BS:VEC_120/SCL_212
; EG-NEXT: LSHR * T4.W, T11.X, literal.y,
; EG-NEXT: 13(1.821688e-44), 8(1.121039e-44)
; EG-NEXT: BFE_INT T11.X, PS, 0.0, 1,
; EG-NEXT: MOV T35.Y, PV.X,
; EG-NEXT: BFE_INT T36.Z, PV.W, 0.0, 1,
; EG-NEXT: MOV T25.W, T25.Z,
; EG-NEXT: MOV * T33.W, T33.Z,
; EG-NEXT: BFE_INT T36.X, T3.Y, 0.0, 1,
; EG-NEXT: MOV T11.Y, PV.X,
; EG-NEXT: BFE_INT T37.Z, T2.Y, 0.0, 1, BS:VEC_120/SCL_212
; EG-NEXT: MOV T35.W, T35.Z,
; EG-NEXT: MOV * T32.W, T32.Z,
; EG-NEXT: BFE_INT T37.X, T2.W, 0.0, 1,
; EG-NEXT: MOV T36.Y, PV.X,
; EG-NEXT: BFE_INT T38.Z, T1.W, 0.0, 1, BS:VEC_120/SCL_212
; EG-NEXT: MOV T11.W, T11.Z,
; EG-NEXT: MOV * T31.W, T31.Z,
; EG-NEXT: BFE_INT T38.X, T1.Z, 0.0, 1,
; EG-NEXT: MOV T37.Y, PV.X,
; EG-NEXT: BFE_INT T39.Z, T1.Y, 0.0, 1,
; EG-NEXT: MOV T36.W, T36.Z, BS:VEC_120/SCL_212
; EG-NEXT: MOV * T30.W, T30.Z,
; EG-NEXT: BFE_INT T39.X, T0.W, 0.0, 1,
; EG-NEXT: MOV T38.Y, PV.X,
; EG-NEXT: BFE_INT T40.Z, T0.Z, 0.0, 1,
; EG-NEXT: MOV T37.W, T37.Z, BS:VEC_120/SCL_212
; EG-NEXT: MOV * T29.W, T29.Z,
; EG-NEXT: BFE_INT T40.X, T0.Y, 0.0, 1,
; EG-NEXT: MOV T39.Y, PV.X,
; EG-NEXT: ADD_INT T0.Z, KC0[2].Y, literal.x,
; EG-NEXT: MOV T38.W, T38.Z,
; EG-NEXT: MOV * T28.W, T28.Z,
; EG-NEXT: 224(3.138909e-43), 0(0.000000e+00)
; EG-NEXT: LSHR T41.X, PV.Z, literal.x,
; EG-NEXT: MOV T40.Y, PV.X,
; EG-NEXT: ADD_INT T0.Z, KC0[2].Y, literal.y,
; EG-NEXT: MOV T39.W, T39.Z,
; EG-NEXT: MOV * T27.W, T27.Z,
; EG-NEXT: 2(2.802597e-45), 240(3.363116e-43)
; EG-NEXT: LSHR T42.X, PV.Z, literal.x,
; EG-NEXT: MOV T40.W, T40.Z,
; EG-NEXT: MOV * T26.W, T26.Z,
; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
;
; GFX12-LABEL: constant_sextload_v32i1_to_v32i64:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
; GFX12-NEXT: s_wait_kmcnt 0x0
; GFX12-NEXT: s_load_b32 s2, s[2:3], 0x0
; GFX12-NEXT: s_wait_kmcnt 0x0
; GFX12-NEXT: s_lshr_b32 s34, s2, 30
; GFX12-NEXT: s_lshr_b32 s36, s2, 31
; GFX12-NEXT: s_lshr_b32 s38, s2, 28
; GFX12-NEXT: s_lshr_b32 s40, s2, 29
; GFX12-NEXT: s_lshr_b32 s42, s2, 26
; GFX12-NEXT: s_lshr_b32 s44, s2, 27
; GFX12-NEXT: s_bfe_i64 s[34:35], s[34:35], 0x10000
; GFX12-NEXT: s_bfe_i64 s[36:37], s[36:37], 0x10000
; GFX12-NEXT: s_lshr_b32 s46, s2, 24
; GFX12-NEXT: s_lshr_b32 s48, s2, 25
; GFX12-NEXT: s_bfe_i64 s[40:41], s[40:41], 0x10000
; GFX12-NEXT: s_bfe_i64 s[38:39], s[38:39], 0x10000
; GFX12-NEXT: v_dual_mov_b32 v24, 0 :: v_dual_mov_b32 v1, s35
; GFX12-NEXT: s_bfe_i64 s[44:45], s[44:45], 0x10000
; GFX12-NEXT: s_bfe_i64 s[42:43], s[42:43], 0x10000
; GFX12-NEXT: v_dual_mov_b32 v0, s34 :: v_dual_mov_b32 v3, s37
; GFX12-NEXT: v_dual_mov_b32 v2, s36 :: v_dual_mov_b32 v5, s39
; GFX12-NEXT: s_lshr_b32 s26, s2, 22
; GFX12-NEXT: s_lshr_b32 s50, s2, 23
; GFX12-NEXT: s_bfe_i64 s[48:49], s[48:49], 0x10000
; GFX12-NEXT: s_bfe_i64 s[46:47], s[46:47], 0x10000
; GFX12-NEXT: v_dual_mov_b32 v4, s38 :: v_dual_mov_b32 v7, s41
; GFX12-NEXT: v_dual_mov_b32 v6, s40 :: v_dual_mov_b32 v9, s43
; GFX12-NEXT: s_lshr_b32 s52, s2, 20
; GFX12-NEXT: s_lshr_b32 s54, s2, 21
; GFX12-NEXT: v_dual_mov_b32 v8, s42 :: v_dual_mov_b32 v11, s45
; GFX12-NEXT: v_dual_mov_b32 v10, s44 :: v_dual_mov_b32 v13, s47
; GFX12-NEXT: s_lshr_b32 s56, s2, 18
; GFX12-NEXT: s_lshr_b32 s58, s2, 19
; GFX12-NEXT: s_bfe_i64 s[50:51], s[50:51], 0x10000
; GFX12-NEXT: v_dual_mov_b32 v12, s46 :: v_dual_mov_b32 v15, s49
; GFX12-NEXT: s_bfe_i64 s[26:27], s[26:27], 0x10000
; GFX12-NEXT: v_mov_b32_e32 v14, s48
; GFX12-NEXT: s_lshr_b32 s60, s2, 16
; GFX12-NEXT: s_lshr_b32 s62, s2, 17
; GFX12-NEXT: s_bfe_i64 s[54:55], s[54:55], 0x10000
; GFX12-NEXT: s_bfe_i64 s[52:53], s[52:53], 0x10000
; GFX12-NEXT: s_lshr_b32 s64, s2, 14
; GFX12-NEXT: s_lshr_b32 s66, s2, 15
; GFX12-NEXT: s_bfe_i64 s[58:59], s[58:59], 0x10000
; GFX12-NEXT: s_bfe_i64 s[56:57], s[56:57], 0x10000
; GFX12-NEXT: s_clause 0x3
; GFX12-NEXT: global_store_b128 v24, v[0:3], s[0:1] offset:240
; GFX12-NEXT: global_store_b128 v24, v[4:7], s[0:1] offset:224
; GFX12-NEXT: global_store_b128 v24, v[8:11], s[0:1] offset:208
; GFX12-NEXT: global_store_b128 v24, v[12:15], s[0:1] offset:192
; GFX12-NEXT: v_dual_mov_b32 v0, s26 :: v_dual_mov_b32 v3, s51
; GFX12-NEXT: v_dual_mov_b32 v1, s27 :: v_dual_mov_b32 v2, s50
; GFX12-NEXT: v_mov_b32_e32 v5, s53
; GFX12-NEXT: s_lshr_b32 s30, s2, 12
; GFX12-NEXT: s_lshr_b32 s28, s2, 13
; GFX12-NEXT: s_lshr_b32 s24, s2, 10
; GFX12-NEXT: s_lshr_b32 s22, s2, 11
; GFX12-NEXT: s_bfe_i64 s[62:63], s[62:63], 0x10000
; GFX12-NEXT: s_bfe_i64 s[60:61], s[60:61], 0x10000
; GFX12-NEXT: v_dual_mov_b32 v4, s52 :: v_dual_mov_b32 v7, s55
; GFX12-NEXT: v_dual_mov_b32 v6, s54 :: v_dual_mov_b32 v9, s57
; GFX12-NEXT: s_lshr_b32 s20, s2, 8
; GFX12-NEXT: s_lshr_b32 s18, s2, 9
; GFX12-NEXT: s_bfe_i64 s[66:67], s[66:67], 0x10000
; GFX12-NEXT: s_bfe_i64 s[64:65], s[64:65], 0x10000
; GFX12-NEXT: v_dual_mov_b32 v8, s56 :: v_dual_mov_b32 v11, s59
; GFX12-NEXT: v_dual_mov_b32 v10, s58 :: v_dual_mov_b32 v13, s61
; GFX12-NEXT: s_lshr_b32 s16, s2, 6
; GFX12-NEXT: s_lshr_b32 s14, s2, 7
; GFX12-NEXT: s_bfe_i64 s[22:23], s[22:23], 0x10000
; GFX12-NEXT: s_bfe_i64 s[24:25], s[24:25], 0x10000
; GFX12-NEXT: s_bfe_i64 s[28:29], s[28:29], 0x10000
; GFX12-NEXT: s_bfe_i64 s[30:31], s[30:31], 0x10000
; GFX12-NEXT: v_dual_mov_b32 v12, s60 :: v_dual_mov_b32 v15, s63
; GFX12-NEXT: v_dual_mov_b32 v14, s62 :: v_dual_mov_b32 v17, s65
; GFX12-NEXT: s_lshr_b32 s12, s2, 4
; GFX12-NEXT: s_lshr_b32 s10, s2, 5
; GFX12-NEXT: s_bfe_i64 s[18:19], s[18:19], 0x10000
; GFX12-NEXT: s_bfe_i64 s[20:21], s[20:21], 0x10000
; GFX12-NEXT: v_dual_mov_b32 v16, s64 :: v_dual_mov_b32 v19, s67
; GFX12-NEXT: v_dual_mov_b32 v18, s66 :: v_dual_mov_b32 v21, s31
; GFX12-NEXT: s_lshr_b32 s8, s2, 2
; GFX12-NEXT: s_lshr_b32 s6, s2, 3
; GFX12-NEXT: s_bfe_i64 s[14:15], s[14:15], 0x10000
; GFX12-NEXT: s_bfe_i64 s[16:17], s[16:17], 0x10000
; GFX12-NEXT: v_dual_mov_b32 v20, s30 :: v_dual_mov_b32 v23, s29
; GFX12-NEXT: v_mov_b32_e32 v22, s28
; GFX12-NEXT: s_clause 0x5
; GFX12-NEXT: global_store_b128 v24, v[0:3], s[0:1] offset:176
; GFX12-NEXT: global_store_b128 v24, v[4:7], s[0:1] offset:160
; GFX12-NEXT: global_store_b128 v24, v[8:11], s[0:1] offset:144
; GFX12-NEXT: global_store_b128 v24, v[12:15], s[0:1] offset:128
; GFX12-NEXT: global_store_b128 v24, v[16:19], s[0:1] offset:112
; GFX12-NEXT: global_store_b128 v24, v[20:23], s[0:1] offset:96
; GFX12-NEXT: v_dual_mov_b32 v0, s24 :: v_dual_mov_b32 v3, s23
; GFX12-NEXT: v_dual_mov_b32 v1, s25 :: v_dual_mov_b32 v2, s22
; GFX12-NEXT: v_mov_b32_e32 v5, s21
; GFX12-NEXT: s_lshr_b32 s68, s2, 1
; GFX12-NEXT: s_bfe_i64 s[10:11], s[10:11], 0x10000
; GFX12-NEXT: s_bfe_i64 s[12:13], s[12:13], 0x10000
; GFX12-NEXT: v_dual_mov_b32 v4, s20 :: v_dual_mov_b32 v7, s19
; GFX12-NEXT: v_dual_mov_b32 v6, s18 :: v_dual_mov_b32 v9, s17
; GFX12-NEXT: s_bfe_i64 s[6:7], s[6:7], 0x10000
; GFX12-NEXT: s_bfe_i64 s[8:9], s[8:9], 0x10000
; GFX12-NEXT: v_dual_mov_b32 v8, s16 :: v_dual_mov_b32 v11, s15
; GFX12-NEXT: v_dual_mov_b32 v10, s14 :: v_dual_mov_b32 v13, s13
; GFX12-NEXT: s_bfe_i64 s[4:5], s[2:3], 0x10000
; GFX12-NEXT: s_bfe_i64 s[2:3], s[68:69], 0x10000
; GFX12-NEXT: v_dual_mov_b32 v12, s12 :: v_dual_mov_b32 v15, s11
; GFX12-NEXT: v_dual_mov_b32 v14, s10 :: v_dual_mov_b32 v17, s9
; GFX12-NEXT: v_dual_mov_b32 v16, s8 :: v_dual_mov_b32 v19, s7
; GFX12-NEXT: v_dual_mov_b32 v18, s6 :: v_dual_mov_b32 v21, s5
; GFX12-NEXT: v_dual_mov_b32 v20, s4 :: v_dual_mov_b32 v23, s3
; GFX12-NEXT: v_mov_b32_e32 v22, s2
; GFX12-NEXT: s_clause 0x5
; GFX12-NEXT: global_store_b128 v24, v[0:3], s[0:1] offset:80
; GFX12-NEXT: global_store_b128 v24, v[4:7], s[0:1] offset:64
; GFX12-NEXT: global_store_b128 v24, v[8:11], s[0:1] offset:48
; GFX12-NEXT: global_store_b128 v24, v[12:15], s[0:1] offset:32
; GFX12-NEXT: global_store_b128 v24, v[16:19], s[0:1] offset:16
; GFX12-NEXT: global_store_b128 v24, v[20:23], s[0:1]
; GFX12-NEXT: s_endpgm
%load = load <32 x i1>, ptr addrspace(4) %in
%ext = sext <32 x i1> %load to <32 x i64>
store <32 x i64> %ext, ptr addrspace(1) %out
ret void
}
define amdgpu_kernel void @constant_zextload_v64i1_to_v64i64(ptr addrspace(1) %out, ptr addrspace(4) nocapture %in) #0 {
; GFX6-LABEL: constant_zextload_v64i1_to_v64i64:
; GFX6: ; %bb.0:
; GFX6-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9
; GFX6-NEXT: s_waitcnt lgkmcnt(0)
; GFX6-NEXT: s_load_dwordx2 s[2:3], s[2:3], 0x0
; GFX6-NEXT: s_waitcnt lgkmcnt(0)
; GFX6-NEXT: s_bfe_u32 s4, s2, 0x10003
; GFX6-NEXT: s_bfe_u32 s5, s2, 0x10005
; GFX6-NEXT: s_bfe_u32 s8, s2, 0x10007
; GFX6-NEXT: s_bfe_u32 s11, s2, 0x10009
; GFX6-NEXT: s_bfe_u32 s13, s2, 0x1000b
; GFX6-NEXT: s_bfe_u32 s15, s2, 0x1000d
; GFX6-NEXT: s_bfe_u32 s17, s2, 0x1000f
; GFX6-NEXT: s_bfe_u32 s19, s2, 0x10011
; GFX6-NEXT: s_bfe_u32 s21, s2, 0x10013
; GFX6-NEXT: s_bfe_u32 s23, s2, 0x10015
; GFX6-NEXT: s_bfe_u32 s25, s2, 0x10017
; GFX6-NEXT: s_bfe_u32 s27, s2, 0x10019
; GFX6-NEXT: s_bfe_u32 s29, s2, 0x1001b
; GFX6-NEXT: s_bfe_u32 s31, s2, 0x1001d
; GFX6-NEXT: s_lshr_b32 s34, s2, 31
; GFX6-NEXT: s_bfe_u32 s35, s3, 0x10003
; GFX6-NEXT: s_bfe_u32 s36, s3, 0x10005
; GFX6-NEXT: s_bfe_u32 s37, s3, 0x10007
; GFX6-NEXT: s_bfe_u32 s38, s3, 0x10009
; GFX6-NEXT: s_bfe_u32 s39, s3, 0x1000b
; GFX6-NEXT: s_bfe_u32 s40, s3, 0x1000d
; GFX6-NEXT: s_bfe_u32 s41, s3, 0x1000f
; GFX6-NEXT: s_bfe_u32 s42, s3, 0x10011
; GFX6-NEXT: s_bfe_u32 s43, s3, 0x10013
; GFX6-NEXT: s_bfe_u32 s44, s3, 0x10015
; GFX6-NEXT: s_bfe_u32 s45, s3, 0x10017
; GFX6-NEXT: s_bfe_u32 s46, s3, 0x10019
; GFX6-NEXT: s_bfe_u32 s47, s3, 0x1001b
; GFX6-NEXT: s_bfe_u32 s48, s3, 0x1001d
; GFX6-NEXT: s_lshr_b32 s49, s3, 31
; GFX6-NEXT: s_bfe_u32 s9, s3, 0x10001
; GFX6-NEXT: s_bfe_u32 s6, s2, 0x10001
; GFX6-NEXT: s_and_b32 s7, s2, 1
; GFX6-NEXT: s_and_b32 s10, s3, 1
; GFX6-NEXT: s_bfe_u32 s12, s2, 0x10002
; GFX6-NEXT: s_bfe_u32 s14, s2, 0x10004
; GFX6-NEXT: s_bfe_u32 s16, s2, 0x10006
; GFX6-NEXT: s_bfe_u32 s18, s2, 0x10008
; GFX6-NEXT: s_bfe_u32 s20, s2, 0x1000a
; GFX6-NEXT: s_bfe_u32 s22, s2, 0x1000c
; GFX6-NEXT: s_bfe_u32 s24, s2, 0x1000e
; GFX6-NEXT: s_bfe_u32 s26, s2, 0x10010
; GFX6-NEXT: s_bfe_u32 s28, s2, 0x10012
; GFX6-NEXT: s_bfe_u32 s30, s2, 0x10014
; GFX6-NEXT: s_bfe_u32 s33, s2, 0x10016
; GFX6-NEXT: s_bfe_u32 s50, s2, 0x10018
; GFX6-NEXT: s_bfe_u32 s51, s2, 0x1001a
; GFX6-NEXT: s_bfe_u32 s52, s2, 0x1001c
; GFX6-NEXT: s_bfe_u32 s53, s2, 0x1001e
; GFX6-NEXT: s_bfe_u32 s54, s3, 0x10002
; GFX6-NEXT: s_bfe_u32 s55, s3, 0x10004
; GFX6-NEXT: s_bfe_u32 s56, s3, 0x10006
; GFX6-NEXT: s_bfe_u32 s57, s3, 0x10008
; GFX6-NEXT: s_bfe_u32 s58, s3, 0x1000a
; GFX6-NEXT: s_bfe_u32 s59, s3, 0x1000c
; GFX6-NEXT: s_bfe_u32 s60, s3, 0x1000e
; GFX6-NEXT: s_bfe_u32 s61, s3, 0x10010
; GFX6-NEXT: s_bfe_u32 s62, s3, 0x10012
; GFX6-NEXT: s_bfe_u32 s63, s3, 0x10014
; GFX6-NEXT: s_bfe_u32 s64, s3, 0x10016
; GFX6-NEXT: s_bfe_u32 s65, s3, 0x10018
; GFX6-NEXT: s_bfe_u32 s66, s3, 0x1001a
; GFX6-NEXT: s_bfe_u32 s67, s3, 0x1001e
; GFX6-NEXT: s_bfe_u32 s68, s3, 0x1001c
; GFX6-NEXT: s_mov_b32 s3, 0xf000
; GFX6-NEXT: v_mov_b32_e32 v1, 0
; GFX6-NEXT: s_mov_b32 s2, -1
; GFX6-NEXT: v_mov_b32_e32 v3, v1
; GFX6-NEXT: v_mov_b32_e32 v0, s67
; GFX6-NEXT: v_mov_b32_e32 v2, s49
; GFX6-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:496
; GFX6-NEXT: s_waitcnt expcnt(0)
; GFX6-NEXT: v_mov_b32_e32 v0, s68
; GFX6-NEXT: v_mov_b32_e32 v2, s48
; GFX6-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:480
; GFX6-NEXT: s_waitcnt expcnt(0)
; GFX6-NEXT: v_mov_b32_e32 v0, s66
; GFX6-NEXT: v_mov_b32_e32 v2, s47
; GFX6-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:464
; GFX6-NEXT: s_waitcnt expcnt(0)
; GFX6-NEXT: v_mov_b32_e32 v0, s65
; GFX6-NEXT: v_mov_b32_e32 v2, s46
; GFX6-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:448
; GFX6-NEXT: s_waitcnt expcnt(0)
; GFX6-NEXT: v_mov_b32_e32 v0, s64
; GFX6-NEXT: v_mov_b32_e32 v2, s45
; GFX6-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:432
; GFX6-NEXT: s_waitcnt expcnt(0)
; GFX6-NEXT: v_mov_b32_e32 v0, s63
; GFX6-NEXT: v_mov_b32_e32 v2, s44
; GFX6-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:416
; GFX6-NEXT: s_waitcnt expcnt(0)
; GFX6-NEXT: v_mov_b32_e32 v0, s62
; GFX6-NEXT: v_mov_b32_e32 v2, s43
; GFX6-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:400
; GFX6-NEXT: s_waitcnt expcnt(0)
; GFX6-NEXT: v_mov_b32_e32 v0, s61
; GFX6-NEXT: v_mov_b32_e32 v2, s42
; GFX6-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:384
; GFX6-NEXT: s_waitcnt expcnt(0)
; GFX6-NEXT: v_mov_b32_e32 v0, s60
; GFX6-NEXT: v_mov_b32_e32 v2, s41
; GFX6-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:368
; GFX6-NEXT: s_waitcnt expcnt(0)
; GFX6-NEXT: v_mov_b32_e32 v0, s59
; GFX6-NEXT: v_mov_b32_e32 v2, s40
; GFX6-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:352
; GFX6-NEXT: s_waitcnt expcnt(0)
; GFX6-NEXT: v_mov_b32_e32 v0, s58
; GFX6-NEXT: v_mov_b32_e32 v2, s39
; GFX6-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:336
; GFX6-NEXT: s_waitcnt expcnt(0)
; GFX6-NEXT: v_mov_b32_e32 v0, s57
; GFX6-NEXT: v_mov_b32_e32 v2, s38
; GFX6-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:320
; GFX6-NEXT: s_waitcnt expcnt(0)
; GFX6-NEXT: v_mov_b32_e32 v0, s56
; GFX6-NEXT: v_mov_b32_e32 v2, s37
; GFX6-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:304
; GFX6-NEXT: s_waitcnt expcnt(0)
; GFX6-NEXT: v_mov_b32_e32 v0, s55
; GFX6-NEXT: v_mov_b32_e32 v2, s36
; GFX6-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:288
; GFX6-NEXT: s_waitcnt expcnt(0)
; GFX6-NEXT: v_mov_b32_e32 v0, s54
; GFX6-NEXT: v_mov_b32_e32 v2, s35
; GFX6-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:272
; GFX6-NEXT: s_waitcnt expcnt(0)
; GFX6-NEXT: v_mov_b32_e32 v0, s53
; GFX6-NEXT: v_mov_b32_e32 v2, s34
; GFX6-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:240
; GFX6-NEXT: s_waitcnt expcnt(0)
; GFX6-NEXT: v_mov_b32_e32 v0, s52
; GFX6-NEXT: v_mov_b32_e32 v2, s31
; GFX6-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:224
; GFX6-NEXT: s_waitcnt expcnt(0)
; GFX6-NEXT: v_mov_b32_e32 v0, s51
; GFX6-NEXT: v_mov_b32_e32 v2, s29
; GFX6-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:208
; GFX6-NEXT: s_waitcnt expcnt(0)
; GFX6-NEXT: v_mov_b32_e32 v0, s50
; GFX6-NEXT: v_mov_b32_e32 v2, s27
; GFX6-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:192
; GFX6-NEXT: s_waitcnt expcnt(0)
; GFX6-NEXT: v_mov_b32_e32 v0, s33
; GFX6-NEXT: v_mov_b32_e32 v2, s25
; GFX6-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:176
; GFX6-NEXT: s_waitcnt expcnt(0)
; GFX6-NEXT: v_mov_b32_e32 v0, s30
; GFX6-NEXT: v_mov_b32_e32 v2, s23
; GFX6-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:160
; GFX6-NEXT: s_waitcnt expcnt(0)
; GFX6-NEXT: v_mov_b32_e32 v0, s28
; GFX6-NEXT: v_mov_b32_e32 v2, s21
; GFX6-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:144
; GFX6-NEXT: s_waitcnt expcnt(0)
; GFX6-NEXT: v_mov_b32_e32 v0, s26
; GFX6-NEXT: v_mov_b32_e32 v2, s19
; GFX6-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:128
; GFX6-NEXT: s_waitcnt expcnt(0)
; GFX6-NEXT: v_mov_b32_e32 v0, s24
; GFX6-NEXT: v_mov_b32_e32 v2, s17
; GFX6-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:112
; GFX6-NEXT: s_waitcnt expcnt(0)
; GFX6-NEXT: v_mov_b32_e32 v0, s22
; GFX6-NEXT: v_mov_b32_e32 v2, s15
; GFX6-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:96
; GFX6-NEXT: s_waitcnt expcnt(0)
; GFX6-NEXT: v_mov_b32_e32 v0, s20
; GFX6-NEXT: v_mov_b32_e32 v2, s13
; GFX6-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:80
; GFX6-NEXT: s_waitcnt expcnt(0)
; GFX6-NEXT: v_mov_b32_e32 v0, s18
; GFX6-NEXT: v_mov_b32_e32 v2, s11
; GFX6-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:64
; GFX6-NEXT: s_waitcnt expcnt(0)
; GFX6-NEXT: v_mov_b32_e32 v0, s16
; GFX6-NEXT: v_mov_b32_e32 v2, s8
; GFX6-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:48
; GFX6-NEXT: s_waitcnt expcnt(0)
; GFX6-NEXT: v_mov_b32_e32 v0, s14
; GFX6-NEXT: v_mov_b32_e32 v2, s5
; GFX6-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:32
; GFX6-NEXT: s_waitcnt expcnt(0)
; GFX6-NEXT: v_mov_b32_e32 v0, s12
; GFX6-NEXT: v_mov_b32_e32 v2, s4
; GFX6-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:16
; GFX6-NEXT: s_waitcnt expcnt(0)
; GFX6-NEXT: v_mov_b32_e32 v0, s10
; GFX6-NEXT: v_mov_b32_e32 v2, s9
; GFX6-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:256
; GFX6-NEXT: s_waitcnt expcnt(0)
; GFX6-NEXT: v_mov_b32_e32 v0, s7
; GFX6-NEXT: v_mov_b32_e32 v2, s6
; GFX6-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0
; GFX6-NEXT: s_endpgm
;
; GFX8-LABEL: constant_zextload_v64i1_to_v64i64:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
; GFX8-NEXT: v_mov_b32_e32 v1, 0
; GFX8-NEXT: v_mov_b32_e32 v3, v1
; GFX8-NEXT: s_waitcnt lgkmcnt(0)
; GFX8-NEXT: s_load_dwordx2 s[42:43], s[2:3], 0x0
; GFX8-NEXT: s_waitcnt lgkmcnt(0)
; GFX8-NEXT: s_lshr_b32 s44, s43, 31
; GFX8-NEXT: s_bfe_u32 s45, s43, 0x1001d
; GFX8-NEXT: s_bfe_u32 s46, s43, 0x1001b
; GFX8-NEXT: s_bfe_u32 s47, s43, 0x10019
; GFX8-NEXT: s_bfe_u32 s48, s43, 0x10017
; GFX8-NEXT: s_bfe_u32 s49, s43, 0x10013
; GFX8-NEXT: s_bfe_u32 s50, s43, 0x10011
; GFX8-NEXT: s_bfe_u32 s51, s43, 0x1000f
; GFX8-NEXT: s_bfe_u32 s52, s43, 0x1000d
; GFX8-NEXT: s_bfe_u32 s53, s43, 0x1000b
; GFX8-NEXT: s_bfe_u32 s40, s43, 0x10009
; GFX8-NEXT: s_bfe_u32 s38, s43, 0x10007
; GFX8-NEXT: s_bfe_u32 s37, s43, 0x10005
; GFX8-NEXT: s_bfe_u32 s35, s43, 0x10003
; GFX8-NEXT: s_bfe_u32 s33, s43, 0x10001
; GFX8-NEXT: s_lshr_b32 s30, s42, 31
; GFX8-NEXT: s_bfe_u32 s28, s42, 0x1001d
; GFX8-NEXT: s_bfe_u32 s26, s42, 0x1001b
; GFX8-NEXT: s_bfe_u32 s25, s42, 0x10019
; GFX8-NEXT: s_bfe_u32 s22, s42, 0x10017
; GFX8-NEXT: s_bfe_u32 s19, s42, 0x10013
; GFX8-NEXT: s_bfe_u32 s17, s42, 0x10011
; GFX8-NEXT: s_bfe_u32 s15, s42, 0x1000f
; GFX8-NEXT: s_bfe_u32 s13, s42, 0x1000d
; GFX8-NEXT: s_bfe_u32 s12, s42, 0x1000b
; GFX8-NEXT: s_bfe_u32 s10, s42, 0x10009
; GFX8-NEXT: s_bfe_u32 s8, s42, 0x10007
; GFX8-NEXT: s_bfe_u32 s6, s42, 0x10005
; GFX8-NEXT: s_bfe_u32 s4, s42, 0x10003
; GFX8-NEXT: s_bfe_u32 s2, s42, 0x10001
; GFX8-NEXT: s_and_b32 s3, s42, 1
; GFX8-NEXT: s_bfe_u32 s5, s42, 0x10002
; GFX8-NEXT: s_bfe_u32 s7, s42, 0x10004
; GFX8-NEXT: s_bfe_u32 s9, s42, 0x10006
; GFX8-NEXT: s_bfe_u32 s11, s42, 0x10008
; GFX8-NEXT: s_bfe_u32 s14, s42, 0x1000a
; GFX8-NEXT: s_bfe_u32 s16, s42, 0x1000c
; GFX8-NEXT: s_bfe_u32 s18, s42, 0x1000e
; GFX8-NEXT: s_bfe_u32 s20, s42, 0x10010
; GFX8-NEXT: s_bfe_u32 s21, s42, 0x10012
; GFX8-NEXT: s_bfe_u32 s23, s42, 0x10014
; GFX8-NEXT: s_bfe_u32 s24, s42, 0x10015
; GFX8-NEXT: s_bfe_u32 s27, s42, 0x10016
; GFX8-NEXT: s_bfe_u32 s29, s42, 0x10018
; GFX8-NEXT: s_bfe_u32 s31, s42, 0x1001a
; GFX8-NEXT: s_bfe_u32 s34, s42, 0x1001c
; GFX8-NEXT: s_bfe_u32 s36, s42, 0x1001e
; GFX8-NEXT: s_and_b32 s39, s43, 1
; GFX8-NEXT: s_bfe_u32 s41, s43, 0x10002
; GFX8-NEXT: s_bfe_u32 s54, s43, 0x10004
; GFX8-NEXT: s_bfe_u32 s55, s43, 0x10006
; GFX8-NEXT: s_bfe_u32 s56, s43, 0x10008
; GFX8-NEXT: s_bfe_u32 s57, s43, 0x1000a
; GFX8-NEXT: s_bfe_u32 s58, s43, 0x1000c
; GFX8-NEXT: s_bfe_u32 s59, s43, 0x1000e
; GFX8-NEXT: s_bfe_u32 s60, s43, 0x10010
; GFX8-NEXT: s_bfe_u32 s61, s43, 0x10012
; GFX8-NEXT: s_bfe_u32 s62, s43, 0x10016
; GFX8-NEXT: s_bfe_u32 s63, s43, 0x10018
; GFX8-NEXT: s_bfe_u32 s64, s43, 0x1001a
; GFX8-NEXT: s_bfe_u32 s65, s43, 0x1001c
; GFX8-NEXT: s_bfe_u32 s66, s43, 0x1001e
; GFX8-NEXT: s_bfe_u32 s42, s43, 0x10015
; GFX8-NEXT: s_bfe_u32 s43, s43, 0x10014
; GFX8-NEXT: v_mov_b32_e32 v2, s42
; GFX8-NEXT: s_add_u32 s42, s0, 0x1a0
; GFX8-NEXT: v_mov_b32_e32 v0, s43
; GFX8-NEXT: s_addc_u32 s43, s1, 0
; GFX8-NEXT: v_mov_b32_e32 v4, s42
; GFX8-NEXT: v_mov_b32_e32 v5, s43
; GFX8-NEXT: s_add_u32 s42, s0, 0x1f0
; GFX8-NEXT: flat_store_dwordx4 v[4:5], v[0:3]
; GFX8-NEXT: s_addc_u32 s43, s1, 0
; GFX8-NEXT: v_mov_b32_e32 v4, s42
; GFX8-NEXT: v_mov_b32_e32 v0, s66
; GFX8-NEXT: v_mov_b32_e32 v2, s44
; GFX8-NEXT: v_mov_b32_e32 v5, s43
; GFX8-NEXT: s_add_u32 s42, s0, 0x1e0
; GFX8-NEXT: flat_store_dwordx4 v[4:5], v[0:3]
; GFX8-NEXT: s_addc_u32 s43, s1, 0
; GFX8-NEXT: v_mov_b32_e32 v4, s42
; GFX8-NEXT: v_mov_b32_e32 v0, s65
; GFX8-NEXT: v_mov_b32_e32 v2, s45
; GFX8-NEXT: v_mov_b32_e32 v5, s43
; GFX8-NEXT: s_add_u32 s42, s0, 0x1d0
; GFX8-NEXT: flat_store_dwordx4 v[4:5], v[0:3]
; GFX8-NEXT: s_addc_u32 s43, s1, 0
; GFX8-NEXT: v_mov_b32_e32 v4, s42
; GFX8-NEXT: v_mov_b32_e32 v0, s64
; GFX8-NEXT: v_mov_b32_e32 v2, s46
; GFX8-NEXT: v_mov_b32_e32 v5, s43
; GFX8-NEXT: s_add_u32 s42, s0, 0x1c0
; GFX8-NEXT: flat_store_dwordx4 v[4:5], v[0:3]
; GFX8-NEXT: s_addc_u32 s43, s1, 0
; GFX8-NEXT: v_mov_b32_e32 v4, s42
; GFX8-NEXT: v_mov_b32_e32 v0, s63
; GFX8-NEXT: v_mov_b32_e32 v2, s47
; GFX8-NEXT: v_mov_b32_e32 v5, s43
; GFX8-NEXT: s_add_u32 s42, s0, 0x1b0
; GFX8-NEXT: flat_store_dwordx4 v[4:5], v[0:3]
; GFX8-NEXT: s_addc_u32 s43, s1, 0
; GFX8-NEXT: v_mov_b32_e32 v4, s42
; GFX8-NEXT: v_mov_b32_e32 v0, s62
; GFX8-NEXT: v_mov_b32_e32 v2, s48
; GFX8-NEXT: v_mov_b32_e32 v5, s43
; GFX8-NEXT: s_add_u32 s42, s0, 0x190
; GFX8-NEXT: flat_store_dwordx4 v[4:5], v[0:3]
; GFX8-NEXT: s_addc_u32 s43, s1, 0
; GFX8-NEXT: v_mov_b32_e32 v4, s42
; GFX8-NEXT: v_mov_b32_e32 v0, s61
; GFX8-NEXT: v_mov_b32_e32 v2, s49
; GFX8-NEXT: v_mov_b32_e32 v5, s43
; GFX8-NEXT: s_add_u32 s42, s0, 0x180
; GFX8-NEXT: flat_store_dwordx4 v[4:5], v[0:3]
; GFX8-NEXT: s_addc_u32 s43, s1, 0
; GFX8-NEXT: v_mov_b32_e32 v4, s42
; GFX8-NEXT: v_mov_b32_e32 v0, s60
; GFX8-NEXT: v_mov_b32_e32 v2, s50
; GFX8-NEXT: v_mov_b32_e32 v5, s43
; GFX8-NEXT: s_add_u32 s42, s0, 0x170
; GFX8-NEXT: flat_store_dwordx4 v[4:5], v[0:3]
; GFX8-NEXT: s_addc_u32 s43, s1, 0
; GFX8-NEXT: v_mov_b32_e32 v4, s42
; GFX8-NEXT: v_mov_b32_e32 v0, s59
; GFX8-NEXT: v_mov_b32_e32 v2, s51
; GFX8-NEXT: v_mov_b32_e32 v5, s43
; GFX8-NEXT: s_add_u32 s42, s0, 0x160
; GFX8-NEXT: flat_store_dwordx4 v[4:5], v[0:3]
; GFX8-NEXT: s_addc_u32 s43, s1, 0
; GFX8-NEXT: v_mov_b32_e32 v4, s42
; GFX8-NEXT: v_mov_b32_e32 v0, s58
; GFX8-NEXT: v_mov_b32_e32 v2, s52
; GFX8-NEXT: v_mov_b32_e32 v5, s43
; GFX8-NEXT: s_add_u32 s42, s0, 0x150
; GFX8-NEXT: flat_store_dwordx4 v[4:5], v[0:3]
; GFX8-NEXT: s_addc_u32 s43, s1, 0
; GFX8-NEXT: v_mov_b32_e32 v4, s42
; GFX8-NEXT: v_mov_b32_e32 v0, s57
; GFX8-NEXT: v_mov_b32_e32 v2, s53
; GFX8-NEXT: v_mov_b32_e32 v5, s43
; GFX8-NEXT: s_add_u32 s42, s0, 0x140
; GFX8-NEXT: flat_store_dwordx4 v[4:5], v[0:3]
; GFX8-NEXT: s_addc_u32 s43, s1, 0
; GFX8-NEXT: v_mov_b32_e32 v4, s42
; GFX8-NEXT: v_mov_b32_e32 v0, s56
; GFX8-NEXT: v_mov_b32_e32 v2, s40
; GFX8-NEXT: v_mov_b32_e32 v5, s43
; GFX8-NEXT: s_add_u32 s42, s0, 0x130
; GFX8-NEXT: flat_store_dwordx4 v[4:5], v[0:3]
; GFX8-NEXT: s_addc_u32 s43, s1, 0
; GFX8-NEXT: v_mov_b32_e32 v4, s42
; GFX8-NEXT: v_mov_b32_e32 v0, s55
; GFX8-NEXT: v_mov_b32_e32 v2, s38
; GFX8-NEXT: v_mov_b32_e32 v5, s43
; GFX8-NEXT: s_add_u32 s42, s0, 0x120
; GFX8-NEXT: flat_store_dwordx4 v[4:5], v[0:3]
; GFX8-NEXT: s_addc_u32 s43, s1, 0
; GFX8-NEXT: v_mov_b32_e32 v4, s42
; GFX8-NEXT: v_mov_b32_e32 v0, s54
; GFX8-NEXT: v_mov_b32_e32 v2, s37
; GFX8-NEXT: v_mov_b32_e32 v5, s43
; GFX8-NEXT: s_add_u32 s40, s0, 0x110
; GFX8-NEXT: flat_store_dwordx4 v[4:5], v[0:3]
; GFX8-NEXT: s_nop 0
; GFX8-NEXT: v_mov_b32_e32 v0, s41
; GFX8-NEXT: s_addc_u32 s41, s1, 0
; GFX8-NEXT: v_mov_b32_e32 v4, s40
; GFX8-NEXT: v_mov_b32_e32 v2, s35
; GFX8-NEXT: v_mov_b32_e32 v5, s41
; GFX8-NEXT: s_add_u32 s38, s0, 0x100
; GFX8-NEXT: flat_store_dwordx4 v[4:5], v[0:3]
; GFX8-NEXT: s_nop 0
; GFX8-NEXT: v_mov_b32_e32 v0, s39
; GFX8-NEXT: s_addc_u32 s39, s1, 0
; GFX8-NEXT: v_mov_b32_e32 v4, s38
; GFX8-NEXT: v_mov_b32_e32 v2, s33
; GFX8-NEXT: v_mov_b32_e32 v5, s39
; GFX8-NEXT: flat_store_dwordx4 v[4:5], v[0:3]
; GFX8-NEXT: s_nop 0
; GFX8-NEXT: v_mov_b32_e32 v0, s36
; GFX8-NEXT: s_add_u32 s36, s0, 0xf0
; GFX8-NEXT: s_addc_u32 s37, s1, 0
; GFX8-NEXT: v_mov_b32_e32 v4, s36
; GFX8-NEXT: v_mov_b32_e32 v2, s30
; GFX8-NEXT: v_mov_b32_e32 v5, s37
; GFX8-NEXT: flat_store_dwordx4 v[4:5], v[0:3]
; GFX8-NEXT: s_nop 0
; GFX8-NEXT: v_mov_b32_e32 v0, s34
; GFX8-NEXT: s_add_u32 s34, s0, 0xe0
; GFX8-NEXT: s_addc_u32 s35, s1, 0
; GFX8-NEXT: v_mov_b32_e32 v4, s34
; GFX8-NEXT: v_mov_b32_e32 v2, s28
; GFX8-NEXT: v_mov_b32_e32 v5, s35
; GFX8-NEXT: s_add_u32 s30, s0, 0xd0
; GFX8-NEXT: flat_store_dwordx4 v[4:5], v[0:3]
; GFX8-NEXT: s_nop 0
; GFX8-NEXT: v_mov_b32_e32 v0, s31
; GFX8-NEXT: s_addc_u32 s31, s1, 0
; GFX8-NEXT: v_mov_b32_e32 v4, s30
; GFX8-NEXT: v_mov_b32_e32 v2, s26
; GFX8-NEXT: v_mov_b32_e32 v5, s31
; GFX8-NEXT: s_add_u32 s28, s0, 0xc0
; GFX8-NEXT: flat_store_dwordx4 v[4:5], v[0:3]
; GFX8-NEXT: s_nop 0
; GFX8-NEXT: v_mov_b32_e32 v0, s29
; GFX8-NEXT: s_addc_u32 s29, s1, 0
; GFX8-NEXT: v_mov_b32_e32 v4, s28
; GFX8-NEXT: v_mov_b32_e32 v2, s25
; GFX8-NEXT: v_mov_b32_e32 v5, s29
; GFX8-NEXT: s_add_u32 s26, s0, 0xb0
; GFX8-NEXT: flat_store_dwordx4 v[4:5], v[0:3]
; GFX8-NEXT: s_nop 0
; GFX8-NEXT: v_mov_b32_e32 v0, s27
; GFX8-NEXT: s_addc_u32 s27, s1, 0
; GFX8-NEXT: v_mov_b32_e32 v4, s26
; GFX8-NEXT: v_mov_b32_e32 v2, s22
; GFX8-NEXT: v_mov_b32_e32 v5, s27
; GFX8-NEXT: s_add_u32 s22, s0, 0xa0
; GFX8-NEXT: flat_store_dwordx4 v[4:5], v[0:3]
; GFX8-NEXT: s_nop 0
; GFX8-NEXT: v_mov_b32_e32 v0, s23
; GFX8-NEXT: s_addc_u32 s23, s1, 0
; GFX8-NEXT: v_mov_b32_e32 v4, s22
; GFX8-NEXT: v_mov_b32_e32 v2, s24
; GFX8-NEXT: v_mov_b32_e32 v5, s23
; GFX8-NEXT: s_add_u32 s22, s0, 0x90
; GFX8-NEXT: flat_store_dwordx4 v[4:5], v[0:3]
; GFX8-NEXT: s_addc_u32 s23, s1, 0
; GFX8-NEXT: v_mov_b32_e32 v4, s22
; GFX8-NEXT: v_mov_b32_e32 v0, s21
; GFX8-NEXT: v_mov_b32_e32 v2, s19
; GFX8-NEXT: v_mov_b32_e32 v5, s23
; GFX8-NEXT: flat_store_dwordx4 v[4:5], v[0:3]
; GFX8-NEXT: s_nop 0
; GFX8-NEXT: v_mov_b32_e32 v0, s20
; GFX8-NEXT: s_add_u32 s20, s0, 0x80
; GFX8-NEXT: s_addc_u32 s21, s1, 0
; GFX8-NEXT: v_mov_b32_e32 v4, s20
; GFX8-NEXT: v_mov_b32_e32 v2, s17
; GFX8-NEXT: v_mov_b32_e32 v5, s21
; GFX8-NEXT: flat_store_dwordx4 v[4:5], v[0:3]
; GFX8-NEXT: s_nop 0
; GFX8-NEXT: v_mov_b32_e32 v0, s18
; GFX8-NEXT: s_add_u32 s18, s0, 0x70
; GFX8-NEXT: s_addc_u32 s19, s1, 0
; GFX8-NEXT: v_mov_b32_e32 v4, s18
; GFX8-NEXT: v_mov_b32_e32 v2, s15
; GFX8-NEXT: v_mov_b32_e32 v5, s19
; GFX8-NEXT: flat_store_dwordx4 v[4:5], v[0:3]
; GFX8-NEXT: s_nop 0
; GFX8-NEXT: v_mov_b32_e32 v0, s16
; GFX8-NEXT: s_add_u32 s16, s0, 0x60
; GFX8-NEXT: s_addc_u32 s17, s1, 0
; GFX8-NEXT: v_mov_b32_e32 v4, s16
; GFX8-NEXT: v_mov_b32_e32 v2, s13
; GFX8-NEXT: v_mov_b32_e32 v5, s17
; GFX8-NEXT: flat_store_dwordx4 v[4:5], v[0:3]
; GFX8-NEXT: s_nop 0
; GFX8-NEXT: v_mov_b32_e32 v2, s12
; GFX8-NEXT: s_add_u32 s12, s0, 0x50
; GFX8-NEXT: s_addc_u32 s13, s1, 0
; GFX8-NEXT: v_mov_b32_e32 v4, s12
; GFX8-NEXT: v_mov_b32_e32 v0, s14
; GFX8-NEXT: v_mov_b32_e32 v5, s13
; GFX8-NEXT: flat_store_dwordx4 v[4:5], v[0:3]
; GFX8-NEXT: s_nop 0
; GFX8-NEXT: v_mov_b32_e32 v2, s10
; GFX8-NEXT: s_add_u32 s10, s0, 64
; GFX8-NEXT: v_mov_b32_e32 v0, s11
; GFX8-NEXT: s_addc_u32 s11, s1, 0
; GFX8-NEXT: v_mov_b32_e32 v4, s10
; GFX8-NEXT: v_mov_b32_e32 v5, s11
; GFX8-NEXT: flat_store_dwordx4 v[4:5], v[0:3]
; GFX8-NEXT: s_nop 0
; GFX8-NEXT: v_mov_b32_e32 v2, s8
; GFX8-NEXT: s_add_u32 s8, s0, 48
; GFX8-NEXT: v_mov_b32_e32 v0, s9
; GFX8-NEXT: s_addc_u32 s9, s1, 0
; GFX8-NEXT: v_mov_b32_e32 v4, s8
; GFX8-NEXT: v_mov_b32_e32 v5, s9
; GFX8-NEXT: flat_store_dwordx4 v[4:5], v[0:3]
; GFX8-NEXT: s_nop 0
; GFX8-NEXT: v_mov_b32_e32 v2, s6
; GFX8-NEXT: s_add_u32 s6, s0, 32
; GFX8-NEXT: v_mov_b32_e32 v0, s7
; GFX8-NEXT: s_addc_u32 s7, s1, 0
; GFX8-NEXT: v_mov_b32_e32 v4, s6
; GFX8-NEXT: v_mov_b32_e32 v5, s7
; GFX8-NEXT: flat_store_dwordx4 v[4:5], v[0:3]
; GFX8-NEXT: s_nop 0
; GFX8-NEXT: v_mov_b32_e32 v2, s4
; GFX8-NEXT: s_add_u32 s4, s0, 16
; GFX8-NEXT: v_mov_b32_e32 v0, s5
; GFX8-NEXT: s_addc_u32 s5, s1, 0
; GFX8-NEXT: v_mov_b32_e32 v4, s4
; GFX8-NEXT: v_mov_b32_e32 v5, s5
; GFX8-NEXT: flat_store_dwordx4 v[4:5], v[0:3]
; GFX8-NEXT: v_mov_b32_e32 v5, s1
; GFX8-NEXT: v_mov_b32_e32 v0, s3
; GFX8-NEXT: v_mov_b32_e32 v2, s2
; GFX8-NEXT: v_mov_b32_e32 v4, s0
; GFX8-NEXT: flat_store_dwordx4 v[4:5], v[0:3]
; GFX8-NEXT: s_endpgm
;
; EG-LABEL: constant_zextload_v64i1_to_v64i64:
; EG: ; %bb.0:
; EG-NEXT: ALU 0, @40, KC0[CB0:0-32], KC1[]
; EG-NEXT: TEX 0 @38
; EG-NEXT: ALU 95, @41, KC0[], KC1[]
; EG-NEXT: ALU 99, @137, KC0[CB0:0-32], KC1[]
; EG-NEXT: ALU 60, @237, KC0[CB0:0-32], KC1[]
; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T19.XYZW, T82.X, 0
; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T20.XYZW, T81.X, 0
; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T21.XYZW, T80.X, 0
; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T22.XYZW, T79.X, 0
; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T23.XYZW, T78.X, 0
; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T24.XYZW, T77.X, 0
; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T26.XYZW, T76.X, 0
; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T27.XYZW, T75.X, 0
; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T28.XYZW, T74.X, 0
; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T29.XYZW, T73.X, 0
; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T30.XYZW, T72.X, 0
; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T31.XYZW, T71.X, 0
; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T32.XYZW, T70.X, 0
; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T33.XYZW, T69.X, 0
; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T34.XYZW, T68.X, 0
; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T35.XYZW, T67.X, 0
; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T36.XYZW, T66.X, 0
; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T37.XYZW, T65.X, 0
; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T38.XYZW, T64.X, 0
; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T39.XYZW, T63.X, 0
; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T40.XYZW, T62.X, 0
; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T41.XYZW, T61.X, 0
; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T42.XYZW, T60.X, 0
; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T43.XYZW, T59.X, 0
; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T44.XYZW, T58.X, 0
; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T45.XYZW, T57.X, 0
; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T46.XYZW, T56.X, 0
; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T47.XYZW, T55.X, 0
; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T48.XYZW, T54.X, 0
; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T49.XYZW, T53.X, 0
; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T50.XYZW, T52.X, 0
; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T25.XYZW, T51.X, 1
; EG-NEXT: CF_END
; EG-NEXT: Fetch clause starting at 38:
; EG-NEXT: VTX_READ_64 T25.XY, T19.X, 0, #1
; EG-NEXT: ALU clause starting at 40:
; EG-NEXT: MOV * T19.X, KC0[2].Z,
; EG-NEXT: ALU clause starting at 41:
; EG-NEXT: LSHR * T19.Z, T25.Y, literal.x,
; EG-NEXT: 31(4.344025e-44), 0(0.000000e+00)
; EG-NEXT: BFE_UINT T19.X, T25.Y, literal.x, 1,
; EG-NEXT: MOV T19.Y, 0.0,
; EG-NEXT: BFE_UINT * T20.Z, T25.Y, literal.y, 1,
; EG-NEXT: 30(4.203895e-44), 29(4.063766e-44)
; EG-NEXT: BFE_UINT T20.X, T25.Y, literal.x, 1,
; EG-NEXT: MOV T20.Y, 0.0,
; EG-NEXT: BFE_UINT * T21.Z, T25.Y, literal.y, 1,
; EG-NEXT: 28(3.923636e-44), 27(3.783506e-44)
; EG-NEXT: BFE_UINT T21.X, T25.Y, literal.x, 1,
; EG-NEXT: MOV T21.Y, 0.0,
; EG-NEXT: BFE_UINT * T22.Z, T25.Y, literal.y, 1,
; EG-NEXT: 26(3.643376e-44), 25(3.503246e-44)
; EG-NEXT: BFE_UINT T22.X, T25.Y, literal.x, 1,
; EG-NEXT: MOV T22.Y, 0.0,
; EG-NEXT: BFE_UINT * T23.Z, T25.Y, literal.y, 1,
; EG-NEXT: 24(3.363116e-44), 23(3.222986e-44)
; EG-NEXT: BFE_UINT T23.X, T25.Y, literal.x, 1,
; EG-NEXT: MOV T23.Y, 0.0,
; EG-NEXT: BFE_UINT * T24.Z, T25.Y, literal.y, 1,
; EG-NEXT: 22(3.082857e-44), 21(2.942727e-44)
; EG-NEXT: BFE_UINT T24.X, T25.Y, literal.x, 1,
; EG-NEXT: MOV T24.Y, 0.0,
; EG-NEXT: BFE_UINT * T26.Z, T25.Y, literal.y, 1,
; EG-NEXT: 20(2.802597e-44), 19(2.662467e-44)
; EG-NEXT: BFE_UINT T26.X, T25.Y, literal.x, 1,
; EG-NEXT: MOV T26.Y, 0.0,
; EG-NEXT: BFE_UINT * T27.Z, T25.Y, literal.y, 1,
; EG-NEXT: 18(2.522337e-44), 17(2.382207e-44)
; EG-NEXT: BFE_UINT T27.X, T25.Y, literal.x, 1,
; EG-NEXT: MOV T27.Y, 0.0,
; EG-NEXT: BFE_UINT * T28.Z, T25.Y, literal.y, 1,
; EG-NEXT: 16(2.242078e-44), 15(2.101948e-44)
; EG-NEXT: BFE_UINT T28.X, T25.Y, literal.x, 1,
; EG-NEXT: MOV T28.Y, 0.0,
; EG-NEXT: BFE_UINT * T29.Z, T25.Y, literal.y, 1,
; EG-NEXT: 14(1.961818e-44), 13(1.821688e-44)
; EG-NEXT: BFE_UINT T29.X, T25.Y, literal.x, 1,
; EG-NEXT: MOV T29.Y, 0.0,
; EG-NEXT: BFE_UINT * T30.Z, T25.Y, literal.y, 1,
; EG-NEXT: 12(1.681558e-44), 11(1.541428e-44)
; EG-NEXT: BFE_UINT T30.X, T25.Y, literal.x, 1,
; EG-NEXT: MOV T30.Y, 0.0,
; EG-NEXT: BFE_UINT * T31.Z, T25.Y, literal.y, 1,
; EG-NEXT: 10(1.401298e-44), 9(1.261169e-44)
; EG-NEXT: BFE_UINT T31.X, T25.Y, literal.x, 1,
; EG-NEXT: MOV T31.Y, 0.0,
; EG-NEXT: BFE_UINT * T32.Z, T25.Y, literal.y, 1,
; EG-NEXT: 8(1.121039e-44), 7(9.809089e-45)
; EG-NEXT: BFE_UINT T32.X, T25.Y, literal.x, 1,
; EG-NEXT: MOV T32.Y, 0.0,
; EG-NEXT: BFE_UINT * T33.Z, T25.Y, literal.y, 1,
; EG-NEXT: 6(8.407791e-45), 5(7.006492e-45)
; EG-NEXT: BFE_UINT T33.X, T25.Y, literal.x, 1,
; EG-NEXT: MOV T33.Y, 0.0,
; EG-NEXT: BFE_UINT * T34.Z, T25.Y, literal.y, 1,
; EG-NEXT: 4(5.605194e-45), 3(4.203895e-45)
; EG-NEXT: BFE_UINT T34.X, T25.Y, literal.x, 1,
; EG-NEXT: MOV T34.Y, 0.0,
; EG-NEXT: BFE_UINT T35.Z, T25.Y, 1, 1,
; EG-NEXT: AND_INT * T35.X, T25.Y, 1,
; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
; EG-NEXT: MOV T35.Y, 0.0,
; EG-NEXT: LSHR * T36.Z, T25.X, literal.x,
; EG-NEXT: 31(4.344025e-44), 0(0.000000e+00)
; EG-NEXT: BFE_UINT T36.X, T25.X, literal.x, 1,
; EG-NEXT: MOV T36.Y, 0.0,
; EG-NEXT: BFE_UINT * T37.Z, T25.X, literal.y, 1,
; EG-NEXT: 30(4.203895e-44), 29(4.063766e-44)
; EG-NEXT: BFE_UINT T37.X, T25.X, literal.x, 1,
; EG-NEXT: MOV T37.Y, 0.0,
; EG-NEXT: BFE_UINT * T38.Z, T25.X, literal.y, 1,
; EG-NEXT: 28(3.923636e-44), 27(3.783506e-44)
; EG-NEXT: BFE_UINT T38.X, T25.X, literal.x, 1,
; EG-NEXT: MOV T38.Y, 0.0,
; EG-NEXT: BFE_UINT * T39.Z, T25.X, literal.y, 1,
; EG-NEXT: 26(3.643376e-44), 25(3.503246e-44)
; EG-NEXT: BFE_UINT T39.X, T25.X, literal.x, 1,
; EG-NEXT: MOV T39.Y, 0.0,
; EG-NEXT: BFE_UINT * T40.Z, T25.X, literal.y, 1,
; EG-NEXT: 24(3.363116e-44), 23(3.222986e-44)
; EG-NEXT: BFE_UINT T40.X, T25.X, literal.x, 1,
; EG-NEXT: MOV T40.Y, 0.0,
; EG-NEXT: BFE_UINT * T41.Z, T25.X, literal.y, 1,
; EG-NEXT: 22(3.082857e-44), 21(2.942727e-44)
; EG-NEXT: BFE_UINT T41.X, T25.X, literal.x, 1,
; EG-NEXT: MOV T41.Y, 0.0,
; EG-NEXT: BFE_UINT * T42.Z, T25.X, literal.y, 1,
; EG-NEXT: 20(2.802597e-44), 19(2.662467e-44)
; EG-NEXT: BFE_UINT T42.X, T25.X, literal.x, 1,
; EG-NEXT: MOV T42.Y, 0.0,
; EG-NEXT: BFE_UINT * T43.Z, T25.X, literal.y, 1,
; EG-NEXT: 18(2.522337e-44), 17(2.382207e-44)
; EG-NEXT: BFE_UINT * T43.X, T25.X, literal.x, 1,
; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00)
; EG-NEXT: ALU clause starting at 137:
; EG-NEXT: MOV T43.Y, 0.0,
; EG-NEXT: BFE_UINT * T44.Z, T25.X, literal.x, 1,
; EG-NEXT: 15(2.101948e-44), 0(0.000000e+00)
; EG-NEXT: BFE_UINT T44.X, T25.X, literal.x, 1,
; EG-NEXT: MOV T44.Y, 0.0,
; EG-NEXT: BFE_UINT * T45.Z, T25.X, literal.y, 1,
; EG-NEXT: 14(1.961818e-44), 13(1.821688e-44)
; EG-NEXT: BFE_UINT T45.X, T25.X, literal.x, 1,
; EG-NEXT: MOV T45.Y, 0.0,
; EG-NEXT: BFE_UINT * T46.Z, T25.X, literal.y, 1,
; EG-NEXT: 12(1.681558e-44), 11(1.541428e-44)
; EG-NEXT: BFE_UINT T46.X, T25.X, literal.x, 1,
; EG-NEXT: MOV T46.Y, 0.0,
; EG-NEXT: BFE_UINT * T47.Z, T25.X, literal.y, 1,
; EG-NEXT: 10(1.401298e-44), 9(1.261169e-44)
; EG-NEXT: BFE_UINT T47.X, T25.X, literal.x, 1,
; EG-NEXT: MOV T47.Y, 0.0,
; EG-NEXT: BFE_UINT * T48.Z, T25.X, literal.y, 1,
; EG-NEXT: 8(1.121039e-44), 7(9.809089e-45)
; EG-NEXT: BFE_UINT T48.X, T25.X, literal.x, 1,
; EG-NEXT: MOV T48.Y, 0.0,
; EG-NEXT: BFE_UINT * T49.Z, T25.X, literal.y, 1,
; EG-NEXT: 6(8.407791e-45), 5(7.006492e-45)
; EG-NEXT: BFE_UINT T49.X, T25.X, literal.x, 1,
; EG-NEXT: MOV T49.Y, 0.0,
; EG-NEXT: BFE_UINT * T50.Z, T25.X, literal.y, 1,
; EG-NEXT: 4(5.605194e-45), 3(4.203895e-45)
; EG-NEXT: BFE_UINT T50.X, T25.X, literal.x, 1,
; EG-NEXT: MOV T50.Y, 0.0,
; EG-NEXT: BFE_UINT T25.Z, T25.X, 1, 1,
; EG-NEXT: AND_INT * T25.X, T25.X, 1,
; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
; EG-NEXT: MOV T25.Y, 0.0,
; EG-NEXT: MOV T19.W, 0.0,
; EG-NEXT: MOV * T20.W, 0.0,
; EG-NEXT: MOV T21.W, 0.0,
; EG-NEXT: MOV * T22.W, 0.0,
; EG-NEXT: MOV T23.W, 0.0,
; EG-NEXT: MOV * T24.W, 0.0,
; EG-NEXT: MOV T26.W, 0.0,
; EG-NEXT: MOV * T27.W, 0.0,
; EG-NEXT: MOV T28.W, 0.0,
; EG-NEXT: MOV * T29.W, 0.0,
; EG-NEXT: MOV T30.W, 0.0,
; EG-NEXT: MOV * T31.W, 0.0,
; EG-NEXT: MOV T32.W, 0.0,
; EG-NEXT: MOV * T33.W, 0.0,
; EG-NEXT: MOV T34.W, 0.0,
; EG-NEXT: MOV * T35.W, 0.0,
; EG-NEXT: MOV T36.W, 0.0,
; EG-NEXT: MOV * T37.W, 0.0,
; EG-NEXT: MOV T38.W, 0.0,
; EG-NEXT: MOV * T39.W, 0.0,
; EG-NEXT: MOV T40.W, 0.0,
; EG-NEXT: MOV * T41.W, 0.0,
; EG-NEXT: MOV T42.W, 0.0,
; EG-NEXT: MOV * T43.W, 0.0,
; EG-NEXT: MOV T44.W, 0.0,
; EG-NEXT: MOV * T45.W, 0.0,
; EG-NEXT: MOV T46.W, 0.0,
; EG-NEXT: MOV * T47.W, 0.0,
; EG-NEXT: MOV T48.W, 0.0,
; EG-NEXT: MOV * T49.W, 0.0,
; EG-NEXT: MOV T50.W, 0.0,
; EG-NEXT: MOV * T25.W, 0.0,
; EG-NEXT: LSHR T51.X, KC0[2].Y, literal.x,
; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y,
; EG-NEXT: 2(2.802597e-45), 16(2.242078e-44)
; EG-NEXT: LSHR T52.X, PV.W, literal.x,
; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y,
; EG-NEXT: 2(2.802597e-45), 32(4.484155e-44)
; EG-NEXT: LSHR T53.X, PV.W, literal.x,
; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y,
; EG-NEXT: 2(2.802597e-45), 48(6.726233e-44)
; EG-NEXT: LSHR T54.X, PV.W, literal.x,
; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y,
; EG-NEXT: 2(2.802597e-45), 64(8.968310e-44)
; EG-NEXT: LSHR T55.X, PV.W, literal.x,
; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y,
; EG-NEXT: 2(2.802597e-45), 80(1.121039e-43)
; EG-NEXT: LSHR T56.X, PV.W, literal.x,
; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y,
; EG-NEXT: 2(2.802597e-45), 96(1.345247e-43)
; EG-NEXT: LSHR T57.X, PV.W, literal.x,
; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y,
; EG-NEXT: 2(2.802597e-45), 112(1.569454e-43)
; EG-NEXT: LSHR T58.X, PV.W, literal.x,
; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y,
; EG-NEXT: 2(2.802597e-45), 128(1.793662e-43)
; EG-NEXT: LSHR T59.X, PV.W, literal.x,
; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y,
; EG-NEXT: 2(2.802597e-45), 144(2.017870e-43)
; EG-NEXT: LSHR T60.X, PV.W, literal.x,
; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y,
; EG-NEXT: 2(2.802597e-45), 160(2.242078e-43)
; EG-NEXT: LSHR T61.X, PV.W, literal.x,
; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y,
; EG-NEXT: 2(2.802597e-45), 176(2.466285e-43)
; EG-NEXT: LSHR * T62.X, PV.W, literal.x,
; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
; EG-NEXT: ALU clause starting at 237:
; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.x,
; EG-NEXT: 192(2.690493e-43), 0(0.000000e+00)
; EG-NEXT: LSHR T63.X, PV.W, literal.x,
; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y,
; EG-NEXT: 2(2.802597e-45), 208(2.914701e-43)
; EG-NEXT: LSHR T64.X, PV.W, literal.x,
; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y,
; EG-NEXT: 2(2.802597e-45), 224(3.138909e-43)
; EG-NEXT: LSHR T65.X, PV.W, literal.x,
; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y,
; EG-NEXT: 2(2.802597e-45), 240(3.363116e-43)
; EG-NEXT: LSHR T66.X, PV.W, literal.x,
; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y,
; EG-NEXT: 2(2.802597e-45), 256(3.587324e-43)
; EG-NEXT: LSHR T67.X, PV.W, literal.x,
; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y,
; EG-NEXT: 2(2.802597e-45), 272(3.811532e-43)
; EG-NEXT: LSHR T68.X, PV.W, literal.x,
; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y,
; EG-NEXT: 2(2.802597e-45), 288(4.035740e-43)
; EG-NEXT: LSHR T69.X, PV.W, literal.x,
; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y,
; EG-NEXT: 2(2.802597e-45), 304(4.259947e-43)
; EG-NEXT: LSHR T70.X, PV.W, literal.x,
; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y,
; EG-NEXT: 2(2.802597e-45), 320(4.484155e-43)
; EG-NEXT: LSHR T71.X, PV.W, literal.x,
; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y,
; EG-NEXT: 2(2.802597e-45), 336(4.708363e-43)
; EG-NEXT: LSHR T72.X, PV.W, literal.x,
; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y,
; EG-NEXT: 2(2.802597e-45), 352(4.932571e-43)
; EG-NEXT: LSHR T73.X, PV.W, literal.x,
; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y,
; EG-NEXT: 2(2.802597e-45), 368(5.156778e-43)
; EG-NEXT: LSHR T74.X, PV.W, literal.x,
; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y,
; EG-NEXT: 2(2.802597e-45), 384(5.380986e-43)
; EG-NEXT: LSHR T75.X, PV.W, literal.x,
; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y,
; EG-NEXT: 2(2.802597e-45), 400(5.605194e-43)
; EG-NEXT: LSHR T76.X, PV.W, literal.x,
; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y,
; EG-NEXT: 2(2.802597e-45), 416(5.829402e-43)
; EG-NEXT: LSHR T77.X, PV.W, literal.x,
; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y,
; EG-NEXT: 2(2.802597e-45), 432(6.053609e-43)
; EG-NEXT: LSHR T78.X, PV.W, literal.x,
; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y,
; EG-NEXT: 2(2.802597e-45), 448(6.277817e-43)
; EG-NEXT: LSHR T79.X, PV.W, literal.x,
; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y,
; EG-NEXT: 2(2.802597e-45), 464(6.502025e-43)
; EG-NEXT: LSHR T80.X, PV.W, literal.x,
; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y,
; EG-NEXT: 2(2.802597e-45), 480(6.726233e-43)
; EG-NEXT: LSHR T81.X, PV.W, literal.x,
; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y,
; EG-NEXT: 2(2.802597e-45), 496(6.950440e-43)
; EG-NEXT: LSHR * T82.X, PV.W, literal.x,
; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
;
; GFX12-LABEL: constant_zextload_v64i1_to_v64i64:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
; GFX12-NEXT: s_wait_kmcnt 0x0
; GFX12-NEXT: s_load_b64 s[2:3], s[2:3], 0x0
; GFX12-NEXT: s_wait_kmcnt 0x0
; GFX12-NEXT: s_bfe_u32 s4, s3, 0x10014
; GFX12-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_3) | instid1(VALU_DEP_1)
; GFX12-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_mov_b32 v0, s4
; GFX12-NEXT: s_bfe_u32 s5, s3, 0x10015
; GFX12-NEXT: s_lshr_b32 s4, s3, 31
; GFX12-NEXT: s_wait_alu 0xfffe
; GFX12-NEXT: v_dual_mov_b32 v2, s5 :: v_dual_mov_b32 v3, v1
; GFX12-NEXT: s_bfe_u32 s5, s3, 0x1001e
; GFX12-NEXT: global_store_b128 v1, v[0:3], s[0:1] offset:416
; GFX12-NEXT: s_wait_alu 0xfffe
; GFX12-NEXT: v_mov_b32_e32 v0, s5
; GFX12-NEXT: v_mov_b32_e32 v2, s4
; GFX12-NEXT: s_bfe_u32 s4, s3, 0x1001d
; GFX12-NEXT: s_bfe_u32 s5, s3, 0x1001c
; GFX12-NEXT: global_store_b128 v1, v[0:3], s[0:1] offset:496
; GFX12-NEXT: s_wait_alu 0xfffe
; GFX12-NEXT: v_mov_b32_e32 v0, s5
; GFX12-NEXT: v_mov_b32_e32 v2, s4
; GFX12-NEXT: s_bfe_u32 s4, s3, 0x1001b
; GFX12-NEXT: s_bfe_u32 s5, s3, 0x1001a
; GFX12-NEXT: global_store_b128 v1, v[0:3], s[0:1] offset:480
; GFX12-NEXT: s_wait_alu 0xfffe
; GFX12-NEXT: v_mov_b32_e32 v0, s5
; GFX12-NEXT: v_mov_b32_e32 v2, s4
; GFX12-NEXT: s_bfe_u32 s4, s3, 0x10019
; GFX12-NEXT: s_bfe_u32 s5, s3, 0x10018
; GFX12-NEXT: global_store_b128 v1, v[0:3], s[0:1] offset:464
; GFX12-NEXT: s_wait_alu 0xfffe
; GFX12-NEXT: v_mov_b32_e32 v0, s5
; GFX12-NEXT: v_mov_b32_e32 v2, s4
; GFX12-NEXT: s_bfe_u32 s4, s3, 0x10017
; GFX12-NEXT: s_bfe_u32 s5, s3, 0x10016
; GFX12-NEXT: global_store_b128 v1, v[0:3], s[0:1] offset:448
; GFX12-NEXT: s_wait_alu 0xfffe
; GFX12-NEXT: v_mov_b32_e32 v0, s5
; GFX12-NEXT: v_mov_b32_e32 v2, s4
; GFX12-NEXT: s_bfe_u32 s4, s3, 0x10013
; GFX12-NEXT: s_bfe_u32 s5, s3, 0x10012
; GFX12-NEXT: global_store_b128 v1, v[0:3], s[0:1] offset:432
; GFX12-NEXT: s_wait_alu 0xfffe
; GFX12-NEXT: v_mov_b32_e32 v0, s5
; GFX12-NEXT: v_mov_b32_e32 v2, s4
; GFX12-NEXT: s_bfe_u32 s4, s3, 0x10011
; GFX12-NEXT: s_bfe_u32 s5, s3, 0x10010
; GFX12-NEXT: global_store_b128 v1, v[0:3], s[0:1] offset:400
; GFX12-NEXT: s_wait_alu 0xfffe
; GFX12-NEXT: v_mov_b32_e32 v0, s5
; GFX12-NEXT: v_mov_b32_e32 v2, s4
; GFX12-NEXT: s_bfe_u32 s4, s3, 0x1000f
; GFX12-NEXT: s_bfe_u32 s5, s3, 0x1000e
; GFX12-NEXT: global_store_b128 v1, v[0:3], s[0:1] offset:384
; GFX12-NEXT: s_wait_alu 0xfffe
; GFX12-NEXT: v_mov_b32_e32 v0, s5
; GFX12-NEXT: v_mov_b32_e32 v2, s4
; GFX12-NEXT: s_bfe_u32 s4, s3, 0x1000d
; GFX12-NEXT: s_bfe_u32 s5, s3, 0x1000c
; GFX12-NEXT: global_store_b128 v1, v[0:3], s[0:1] offset:368
; GFX12-NEXT: s_wait_alu 0xfffe
; GFX12-NEXT: v_mov_b32_e32 v0, s5
; GFX12-NEXT: v_mov_b32_e32 v2, s4
; GFX12-NEXT: s_bfe_u32 s4, s3, 0x1000b
; GFX12-NEXT: s_bfe_u32 s5, s3, 0x1000a
; GFX12-NEXT: global_store_b128 v1, v[0:3], s[0:1] offset:352
; GFX12-NEXT: s_wait_alu 0xfffe
; GFX12-NEXT: v_mov_b32_e32 v0, s5
; GFX12-NEXT: v_mov_b32_e32 v2, s4
; GFX12-NEXT: s_bfe_u32 s4, s3, 0x10009
; GFX12-NEXT: s_bfe_u32 s5, s3, 0x10008
; GFX12-NEXT: global_store_b128 v1, v[0:3], s[0:1] offset:336
; GFX12-NEXT: s_wait_alu 0xfffe
; GFX12-NEXT: v_mov_b32_e32 v0, s5
; GFX12-NEXT: v_mov_b32_e32 v2, s4
; GFX12-NEXT: s_bfe_u32 s4, s3, 0x10007
; GFX12-NEXT: s_bfe_u32 s5, s3, 0x10006
; GFX12-NEXT: global_store_b128 v1, v[0:3], s[0:1] offset:320
; GFX12-NEXT: s_wait_alu 0xfffe
; GFX12-NEXT: v_mov_b32_e32 v0, s5
; GFX12-NEXT: v_mov_b32_e32 v2, s4
; GFX12-NEXT: s_bfe_u32 s4, s3, 0x10005
; GFX12-NEXT: s_bfe_u32 s5, s3, 0x10004
; GFX12-NEXT: global_store_b128 v1, v[0:3], s[0:1] offset:304
; GFX12-NEXT: s_wait_alu 0xfffe
; GFX12-NEXT: v_mov_b32_e32 v0, s5
; GFX12-NEXT: v_mov_b32_e32 v2, s4
; GFX12-NEXT: s_bfe_u32 s4, s3, 0x10003
; GFX12-NEXT: s_bfe_u32 s5, s3, 0x10002
; GFX12-NEXT: global_store_b128 v1, v[0:3], s[0:1] offset:288
; GFX12-NEXT: s_wait_alu 0xfffe
; GFX12-NEXT: v_mov_b32_e32 v0, s5
; GFX12-NEXT: v_mov_b32_e32 v2, s4
; GFX12-NEXT: s_bfe_u32 s4, s3, 0x10001
; GFX12-NEXT: s_and_b32 s3, s3, 1
; GFX12-NEXT: global_store_b128 v1, v[0:3], s[0:1] offset:272
; GFX12-NEXT: v_mov_b32_e32 v0, s3
; GFX12-NEXT: s_wait_alu 0xfffe
; GFX12-NEXT: v_mov_b32_e32 v2, s4
; GFX12-NEXT: s_lshr_b32 s3, s2, 31
; GFX12-NEXT: s_bfe_u32 s4, s2, 0x1001e
; GFX12-NEXT: global_store_b128 v1, v[0:3], s[0:1] offset:256
; GFX12-NEXT: s_wait_alu 0xfffe
; GFX12-NEXT: v_mov_b32_e32 v0, s4
; GFX12-NEXT: v_mov_b32_e32 v2, s3
; GFX12-NEXT: s_bfe_u32 s3, s2, 0x1001d
; GFX12-NEXT: s_bfe_u32 s4, s2, 0x1001c
; GFX12-NEXT: global_store_b128 v1, v[0:3], s[0:1] offset:240
; GFX12-NEXT: s_wait_alu 0xfffe
; GFX12-NEXT: v_mov_b32_e32 v0, s4
; GFX12-NEXT: v_mov_b32_e32 v2, s3
; GFX12-NEXT: s_bfe_u32 s3, s2, 0x1001b
; GFX12-NEXT: s_bfe_u32 s4, s2, 0x1001a
; GFX12-NEXT: global_store_b128 v1, v[0:3], s[0:1] offset:224
; GFX12-NEXT: s_wait_alu 0xfffe
; GFX12-NEXT: v_mov_b32_e32 v0, s4
; GFX12-NEXT: v_mov_b32_e32 v2, s3
; GFX12-NEXT: s_bfe_u32 s3, s2, 0x10019
; GFX12-NEXT: s_bfe_u32 s4, s2, 0x10018
; GFX12-NEXT: global_store_b128 v1, v[0:3], s[0:1] offset:208
; GFX12-NEXT: s_wait_alu 0xfffe
; GFX12-NEXT: v_mov_b32_e32 v0, s4
; GFX12-NEXT: v_mov_b32_e32 v2, s3
; GFX12-NEXT: s_bfe_u32 s3, s2, 0x10017
; GFX12-NEXT: s_bfe_u32 s4, s2, 0x10016
; GFX12-NEXT: global_store_b128 v1, v[0:3], s[0:1] offset:192
; GFX12-NEXT: s_wait_alu 0xfffe
; GFX12-NEXT: v_mov_b32_e32 v0, s4
; GFX12-NEXT: v_mov_b32_e32 v2, s3
; GFX12-NEXT: s_bfe_u32 s3, s2, 0x10014
; GFX12-NEXT: s_bfe_u32 s4, s2, 0x10015
; GFX12-NEXT: global_store_b128 v1, v[0:3], s[0:1] offset:176
; GFX12-NEXT: s_wait_alu 0xfffe
; GFX12-NEXT: v_mov_b32_e32 v0, s3
; GFX12-NEXT: v_mov_b32_e32 v2, s4
; GFX12-NEXT: s_bfe_u32 s3, s2, 0x10013
; GFX12-NEXT: s_bfe_u32 s4, s2, 0x10012
; GFX12-NEXT: global_store_b128 v1, v[0:3], s[0:1] offset:160
; GFX12-NEXT: s_wait_alu 0xfffe
; GFX12-NEXT: v_mov_b32_e32 v0, s4
; GFX12-NEXT: v_mov_b32_e32 v2, s3
; GFX12-NEXT: s_bfe_u32 s3, s2, 0x10011
; GFX12-NEXT: s_bfe_u32 s4, s2, 0x10010
; GFX12-NEXT: global_store_b128 v1, v[0:3], s[0:1] offset:144
; GFX12-NEXT: s_wait_alu 0xfffe
; GFX12-NEXT: v_mov_b32_e32 v0, s4
; GFX12-NEXT: v_mov_b32_e32 v2, s3
; GFX12-NEXT: s_bfe_u32 s3, s2, 0x1000f
; GFX12-NEXT: s_bfe_u32 s4, s2, 0x1000e
; GFX12-NEXT: global_store_b128 v1, v[0:3], s[0:1] offset:128
; GFX12-NEXT: s_wait_alu 0xfffe
; GFX12-NEXT: v_mov_b32_e32 v0, s4
; GFX12-NEXT: v_mov_b32_e32 v2, s3
; GFX12-NEXT: s_bfe_u32 s3, s2, 0x1000d
; GFX12-NEXT: s_bfe_u32 s4, s2, 0x1000c
; GFX12-NEXT: global_store_b128 v1, v[0:3], s[0:1] offset:112
; GFX12-NEXT: s_wait_alu 0xfffe
; GFX12-NEXT: v_mov_b32_e32 v0, s4
; GFX12-NEXT: v_mov_b32_e32 v2, s3
; GFX12-NEXT: s_bfe_u32 s3, s2, 0x1000b
; GFX12-NEXT: s_bfe_u32 s4, s2, 0x1000a
; GFX12-NEXT: global_store_b128 v1, v[0:3], s[0:1] offset:96
; GFX12-NEXT: s_wait_alu 0xfffe
; GFX12-NEXT: v_mov_b32_e32 v0, s4
; GFX12-NEXT: v_mov_b32_e32 v2, s3
; GFX12-NEXT: s_bfe_u32 s3, s2, 0x10009
; GFX12-NEXT: s_bfe_u32 s4, s2, 0x10008
; GFX12-NEXT: global_store_b128 v1, v[0:3], s[0:1] offset:80
; GFX12-NEXT: s_wait_alu 0xfffe
; GFX12-NEXT: v_mov_b32_e32 v0, s4
; GFX12-NEXT: v_mov_b32_e32 v2, s3
; GFX12-NEXT: s_bfe_u32 s3, s2, 0x10007
; GFX12-NEXT: s_bfe_u32 s4, s2, 0x10006
; GFX12-NEXT: global_store_b128 v1, v[0:3], s[0:1] offset:64
; GFX12-NEXT: s_wait_alu 0xfffe
; GFX12-NEXT: v_mov_b32_e32 v0, s4
; GFX12-NEXT: v_mov_b32_e32 v2, s3
; GFX12-NEXT: s_bfe_u32 s3, s2, 0x10005
; GFX12-NEXT: s_bfe_u32 s4, s2, 0x10004
; GFX12-NEXT: global_store_b128 v1, v[0:3], s[0:1] offset:48
; GFX12-NEXT: s_wait_alu 0xfffe
; GFX12-NEXT: v_mov_b32_e32 v0, s4
; GFX12-NEXT: v_mov_b32_e32 v2, s3
; GFX12-NEXT: s_bfe_u32 s3, s2, 0x10003
; GFX12-NEXT: s_bfe_u32 s4, s2, 0x10002
; GFX12-NEXT: global_store_b128 v1, v[0:3], s[0:1] offset:32
; GFX12-NEXT: s_wait_alu 0xfffe
; GFX12-NEXT: v_mov_b32_e32 v0, s4
; GFX12-NEXT: v_mov_b32_e32 v2, s3
; GFX12-NEXT: s_bfe_u32 s3, s2, 0x10001
; GFX12-NEXT: s_and_b32 s2, s2, 1
; GFX12-NEXT: global_store_b128 v1, v[0:3], s[0:1] offset:16
; GFX12-NEXT: s_wait_alu 0xfffe
; GFX12-NEXT: v_mov_b32_e32 v0, s2
; GFX12-NEXT: v_mov_b32_e32 v2, s3
; GFX12-NEXT: global_store_b128 v1, v[0:3], s[0:1]
; GFX12-NEXT: s_endpgm
%load = load <64 x i1>, ptr addrspace(4) %in
%ext = zext <64 x i1> %load to <64 x i64>
store <64 x i64> %ext, ptr addrspace(1) %out
ret void
}
define amdgpu_kernel void @constant_sextload_v64i1_to_v64i64(ptr addrspace(1) %out, ptr addrspace(4) nocapture %in) #0 {
; GFX6-LABEL: constant_sextload_v64i1_to_v64i64:
; GFX6: ; %bb.0:
; GFX6-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9
; GFX6-NEXT: s_waitcnt lgkmcnt(0)
; GFX6-NEXT: s_load_dwordx2 s[4:5], s[2:3], 0x0
; GFX6-NEXT: s_mov_b32 s3, 0xf000
; GFX6-NEXT: s_mov_b32 s2, -1
; GFX6-NEXT: s_waitcnt lgkmcnt(0)
; GFX6-NEXT: s_lshr_b32 s42, s5, 30
; GFX6-NEXT: s_lshr_b32 s36, s4, 30
; GFX6-NEXT: s_lshr_b32 s38, s4, 31
; GFX6-NEXT: s_lshr_b32 s30, s4, 28
; GFX6-NEXT: s_lshr_b32 s34, s4, 29
; GFX6-NEXT: s_lshr_b32 s26, s4, 26
; GFX6-NEXT: s_lshr_b32 s28, s4, 27
; GFX6-NEXT: s_lshr_b32 s22, s4, 24
; GFX6-NEXT: s_lshr_b32 s24, s4, 25
; GFX6-NEXT: s_lshr_b32 s18, s4, 22
; GFX6-NEXT: s_lshr_b32 s20, s4, 23
; GFX6-NEXT: s_lshr_b32 s14, s4, 20
; GFX6-NEXT: s_lshr_b32 s16, s4, 21
; GFX6-NEXT: s_lshr_b32 s10, s4, 18
; GFX6-NEXT: s_lshr_b32 s12, s4, 19
; GFX6-NEXT: s_lshr_b32 s6, s4, 16
; GFX6-NEXT: s_lshr_b32 s8, s4, 17
; GFX6-NEXT: s_ashr_i32 s7, s5, 31
; GFX6-NEXT: s_bfe_i64 s[44:45], s[4:5], 0x10000
; GFX6-NEXT: v_mov_b32_e32 v4, s7
; GFX6-NEXT: s_lshr_b32 s40, s4, 14
; GFX6-NEXT: v_mov_b32_e32 v0, s44
; GFX6-NEXT: v_mov_b32_e32 v1, s45
; GFX6-NEXT: s_mov_b32 s44, s5
; GFX6-NEXT: s_bfe_i64 s[44:45], s[44:45], 0x10000
; GFX6-NEXT: s_bfe_i64 s[42:43], s[42:43], 0x10000
; GFX6-NEXT: v_mov_b32_e32 v6, s44
; GFX6-NEXT: v_mov_b32_e32 v7, s45
; GFX6-NEXT: s_lshr_b32 s44, s4, 15
; GFX6-NEXT: v_mov_b32_e32 v2, s42
; GFX6-NEXT: v_mov_b32_e32 v3, s43
; GFX6-NEXT: s_lshr_b32 s42, s4, 12
; GFX6-NEXT: s_bfe_i64 s[36:37], s[36:37], 0x10000
; GFX6-NEXT: s_bfe_i64 s[38:39], s[38:39], 0x10000
; GFX6-NEXT: v_mov_b32_e32 v8, s36
; GFX6-NEXT: v_mov_b32_e32 v9, s37
; GFX6-NEXT: s_lshr_b32 s36, s4, 13
; GFX6-NEXT: v_mov_b32_e32 v10, s38
; GFX6-NEXT: v_mov_b32_e32 v11, s39
; GFX6-NEXT: s_lshr_b32 s38, s4, 10
; GFX6-NEXT: s_bfe_i64 s[30:31], s[30:31], 0x10000
; GFX6-NEXT: s_bfe_i64 s[34:35], s[34:35], 0x10000
; GFX6-NEXT: v_mov_b32_e32 v12, s30
; GFX6-NEXT: v_mov_b32_e32 v13, s31
; GFX6-NEXT: s_lshr_b32 s30, s4, 11
; GFX6-NEXT: v_mov_b32_e32 v14, s34
; GFX6-NEXT: v_mov_b32_e32 v15, s35
; GFX6-NEXT: s_lshr_b32 s34, s4, 8
; GFX6-NEXT: s_bfe_i64 s[28:29], s[28:29], 0x10000
; GFX6-NEXT: s_bfe_i64 s[26:27], s[26:27], 0x10000
; GFX6-NEXT: v_mov_b32_e32 v5, s7
; GFX6-NEXT: buffer_store_dwordx4 v[2:5], off, s[0:3], 0 offset:496
; GFX6-NEXT: s_waitcnt expcnt(0)
; GFX6-NEXT: v_mov_b32_e32 v2, s26
; GFX6-NEXT: v_mov_b32_e32 v3, s27
; GFX6-NEXT: s_lshr_b32 s26, s4, 9
; GFX6-NEXT: v_mov_b32_e32 v4, s28
; GFX6-NEXT: v_mov_b32_e32 v5, s29
; GFX6-NEXT: s_lshr_b32 s28, s4, 6
; GFX6-NEXT: s_bfe_i64 s[24:25], s[24:25], 0x10000
; GFX6-NEXT: s_bfe_i64 s[22:23], s[22:23], 0x10000
; GFX6-NEXT: buffer_store_dwordx4 v[8:11], off, s[0:3], 0 offset:240
; GFX6-NEXT: s_waitcnt expcnt(0)
; GFX6-NEXT: v_mov_b32_e32 v8, s22
; GFX6-NEXT: v_mov_b32_e32 v9, s23
; GFX6-NEXT: s_lshr_b32 s22, s4, 7
; GFX6-NEXT: v_mov_b32_e32 v10, s24
; GFX6-NEXT: v_mov_b32_e32 v11, s25
; GFX6-NEXT: s_lshr_b32 s24, s4, 4
; GFX6-NEXT: s_bfe_i64 s[20:21], s[20:21], 0x10000
; GFX6-NEXT: s_bfe_i64 s[18:19], s[18:19], 0x10000
; GFX6-NEXT: buffer_store_dwordx4 v[12:15], off, s[0:3], 0 offset:224
; GFX6-NEXT: s_waitcnt expcnt(0)
; GFX6-NEXT: v_mov_b32_e32 v12, s18
; GFX6-NEXT: v_mov_b32_e32 v13, s19
; GFX6-NEXT: s_lshr_b32 s18, s4, 5
; GFX6-NEXT: v_mov_b32_e32 v14, s20
; GFX6-NEXT: v_mov_b32_e32 v15, s21
; GFX6-NEXT: s_lshr_b32 s20, s4, 2
; GFX6-NEXT: s_bfe_i64 s[16:17], s[16:17], 0x10000
; GFX6-NEXT: s_bfe_i64 s[14:15], s[14:15], 0x10000
; GFX6-NEXT: buffer_store_dwordx4 v[2:5], off, s[0:3], 0 offset:208
; GFX6-NEXT: s_waitcnt expcnt(0)
; GFX6-NEXT: v_mov_b32_e32 v2, s14
; GFX6-NEXT: v_mov_b32_e32 v3, s15
; GFX6-NEXT: s_lshr_b32 s14, s4, 3
; GFX6-NEXT: v_mov_b32_e32 v4, s16
; GFX6-NEXT: v_mov_b32_e32 v5, s17
; GFX6-NEXT: s_lshr_b32 s16, s4, 1
; GFX6-NEXT: s_bfe_i64 s[12:13], s[12:13], 0x10000
; GFX6-NEXT: s_bfe_i64 s[10:11], s[10:11], 0x10000
; GFX6-NEXT: buffer_store_dwordx4 v[8:11], off, s[0:3], 0 offset:192
; GFX6-NEXT: s_waitcnt expcnt(0)
; GFX6-NEXT: v_mov_b32_e32 v8, s10
; GFX6-NEXT: v_mov_b32_e32 v9, s11
; GFX6-NEXT: s_lshr_b32 s10, s5, 29
; GFX6-NEXT: v_mov_b32_e32 v10, s12
; GFX6-NEXT: v_mov_b32_e32 v11, s13
; GFX6-NEXT: s_lshr_b32 s12, s5, 28
; GFX6-NEXT: s_bfe_i64 s[8:9], s[8:9], 0x10000
; GFX6-NEXT: s_bfe_i64 s[6:7], s[6:7], 0x10000
; GFX6-NEXT: buffer_store_dwordx4 v[12:15], off, s[0:3], 0 offset:176
; GFX6-NEXT: s_waitcnt expcnt(0)
; GFX6-NEXT: v_mov_b32_e32 v12, s6
; GFX6-NEXT: v_mov_b32_e32 v13, s7
; GFX6-NEXT: s_lshr_b32 s6, s5, 26
; GFX6-NEXT: v_mov_b32_e32 v14, s8
; GFX6-NEXT: v_mov_b32_e32 v15, s9
; GFX6-NEXT: s_lshr_b32 s8, s5, 27
; GFX6-NEXT: s_bfe_i64 s[44:45], s[44:45], 0x10000
; GFX6-NEXT: s_bfe_i64 s[40:41], s[40:41], 0x10000
; GFX6-NEXT: buffer_store_dwordx4 v[2:5], off, s[0:3], 0 offset:160
; GFX6-NEXT: s_waitcnt expcnt(0)
; GFX6-NEXT: v_mov_b32_e32 v2, s40
; GFX6-NEXT: v_mov_b32_e32 v3, s41
; GFX6-NEXT: s_lshr_b32 s40, s5, 25
; GFX6-NEXT: v_mov_b32_e32 v4, s44
; GFX6-NEXT: v_mov_b32_e32 v5, s45
; GFX6-NEXT: s_lshr_b32 s44, s5, 24
; GFX6-NEXT: s_bfe_i64 s[36:37], s[36:37], 0x10000
; GFX6-NEXT: s_bfe_i64 s[42:43], s[42:43], 0x10000
; GFX6-NEXT: buffer_store_dwordx4 v[8:11], off, s[0:3], 0 offset:144
; GFX6-NEXT: s_waitcnt expcnt(0)
; GFX6-NEXT: v_mov_b32_e32 v8, s42
; GFX6-NEXT: v_mov_b32_e32 v9, s43
; GFX6-NEXT: s_lshr_b32 s42, s5, 22
; GFX6-NEXT: v_mov_b32_e32 v10, s36
; GFX6-NEXT: v_mov_b32_e32 v11, s37
; GFX6-NEXT: s_lshr_b32 s36, s5, 23
; GFX6-NEXT: s_bfe_i64 s[30:31], s[30:31], 0x10000
; GFX6-NEXT: s_bfe_i64 s[38:39], s[38:39], 0x10000
; GFX6-NEXT: buffer_store_dwordx4 v[12:15], off, s[0:3], 0 offset:128
; GFX6-NEXT: s_waitcnt expcnt(0)
; GFX6-NEXT: v_mov_b32_e32 v12, s38
; GFX6-NEXT: v_mov_b32_e32 v13, s39
; GFX6-NEXT: s_lshr_b32 s38, s5, 20
; GFX6-NEXT: v_mov_b32_e32 v14, s30
; GFX6-NEXT: v_mov_b32_e32 v15, s31
; GFX6-NEXT: s_lshr_b32 s4, s5, 21
; GFX6-NEXT: s_bfe_i64 s[26:27], s[26:27], 0x10000
; GFX6-NEXT: s_bfe_i64 s[30:31], s[34:35], 0x10000
; GFX6-NEXT: buffer_store_dwordx4 v[2:5], off, s[0:3], 0 offset:112
; GFX6-NEXT: s_waitcnt expcnt(0)
; GFX6-NEXT: v_mov_b32_e32 v2, s30
; GFX6-NEXT: v_mov_b32_e32 v3, s31
; GFX6-NEXT: s_lshr_b32 s30, s5, 18
; GFX6-NEXT: v_mov_b32_e32 v4, s26
; GFX6-NEXT: v_mov_b32_e32 v5, s27
; GFX6-NEXT: s_lshr_b32 s26, s5, 19
; GFX6-NEXT: s_bfe_i64 s[22:23], s[22:23], 0x10000
; GFX6-NEXT: s_bfe_i64 s[28:29], s[28:29], 0x10000
; GFX6-NEXT: buffer_store_dwordx4 v[8:11], off, s[0:3], 0 offset:96
; GFX6-NEXT: s_waitcnt expcnt(0)
; GFX6-NEXT: v_mov_b32_e32 v8, s28
; GFX6-NEXT: v_mov_b32_e32 v9, s29
; GFX6-NEXT: s_lshr_b32 s28, s5, 17
; GFX6-NEXT: v_mov_b32_e32 v10, s22
; GFX6-NEXT: v_mov_b32_e32 v11, s23
; GFX6-NEXT: s_lshr_b32 s22, s5, 16
; GFX6-NEXT: s_bfe_i64 s[18:19], s[18:19], 0x10000
; GFX6-NEXT: s_bfe_i64 s[24:25], s[24:25], 0x10000
; GFX6-NEXT: buffer_store_dwordx4 v[12:15], off, s[0:3], 0 offset:80
; GFX6-NEXT: s_waitcnt expcnt(0)
; GFX6-NEXT: v_mov_b32_e32 v12, s24
; GFX6-NEXT: v_mov_b32_e32 v13, s25
; GFX6-NEXT: s_lshr_b32 s24, s5, 14
; GFX6-NEXT: v_mov_b32_e32 v14, s18
; GFX6-NEXT: v_mov_b32_e32 v15, s19
; GFX6-NEXT: s_lshr_b32 s18, s5, 15
; GFX6-NEXT: s_bfe_i64 s[20:21], s[20:21], 0x10000
; GFX6-NEXT: buffer_store_dwordx4 v[2:5], off, s[0:3], 0 offset:64
; GFX6-NEXT: v_mov_b32_e32 v16, s20
; GFX6-NEXT: v_mov_b32_e32 v17, s21
; GFX6-NEXT: s_lshr_b32 s20, s5, 12
; GFX6-NEXT: s_bfe_i64 s[14:15], s[14:15], 0x10000
; GFX6-NEXT: s_bfe_i64 s[16:17], s[16:17], 0x10000
; GFX6-NEXT: v_mov_b32_e32 v18, s14
; GFX6-NEXT: v_mov_b32_e32 v19, s15
; GFX6-NEXT: s_lshr_b32 s14, s5, 13
; GFX6-NEXT: s_waitcnt expcnt(0)
; GFX6-NEXT: v_mov_b32_e32 v2, s16
; GFX6-NEXT: v_mov_b32_e32 v3, s17
; GFX6-NEXT: s_lshr_b32 s16, s5, 10
; GFX6-NEXT: s_bfe_i64 s[12:13], s[12:13], 0x10000
; GFX6-NEXT: s_bfe_i64 s[10:11], s[10:11], 0x10000
; GFX6-NEXT: buffer_store_dwordx4 v[8:11], off, s[0:3], 0 offset:48
; GFX6-NEXT: s_waitcnt expcnt(0)
; GFX6-NEXT: v_mov_b32_e32 v8, s12
; GFX6-NEXT: v_mov_b32_e32 v9, s13
; GFX6-NEXT: s_lshr_b32 s12, s5, 11
; GFX6-NEXT: v_mov_b32_e32 v10, s10
; GFX6-NEXT: v_mov_b32_e32 v11, s11
; GFX6-NEXT: s_lshr_b32 s10, s5, 8
; GFX6-NEXT: s_bfe_i64 s[8:9], s[8:9], 0x10000
; GFX6-NEXT: s_bfe_i64 s[6:7], s[6:7], 0x10000
; GFX6-NEXT: buffer_store_dwordx4 v[12:15], off, s[0:3], 0 offset:32
; GFX6-NEXT: s_waitcnt expcnt(0)
; GFX6-NEXT: v_mov_b32_e32 v12, s6
; GFX6-NEXT: v_mov_b32_e32 v13, s7
; GFX6-NEXT: s_lshr_b32 s6, s5, 9
; GFX6-NEXT: v_mov_b32_e32 v14, s8
; GFX6-NEXT: v_mov_b32_e32 v15, s9
; GFX6-NEXT: s_lshr_b32 s8, s5, 6
; GFX6-NEXT: s_bfe_i64 s[34:35], s[44:45], 0x10000
; GFX6-NEXT: s_bfe_i64 s[40:41], s[40:41], 0x10000
; GFX6-NEXT: buffer_store_dwordx4 v[16:19], off, s[0:3], 0 offset:16
; GFX6-NEXT: s_waitcnt expcnt(0)
; GFX6-NEXT: v_mov_b32_e32 v16, s34
; GFX6-NEXT: v_mov_b32_e32 v17, s35
; GFX6-NEXT: s_lshr_b32 s34, s5, 7
; GFX6-NEXT: v_mov_b32_e32 v18, s40
; GFX6-NEXT: v_mov_b32_e32 v19, s41
; GFX6-NEXT: s_lshr_b32 s40, s5, 4
; GFX6-NEXT: s_bfe_i64 s[36:37], s[36:37], 0x10000
; GFX6-NEXT: s_bfe_i64 s[42:43], s[42:43], 0x10000
; GFX6-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0
; GFX6-NEXT: s_waitcnt expcnt(0)
; GFX6-NEXT: v_mov_b32_e32 v0, s42
; GFX6-NEXT: v_mov_b32_e32 v1, s43
; GFX6-NEXT: s_lshr_b32 s42, s5, 5
; GFX6-NEXT: v_mov_b32_e32 v2, s36
; GFX6-NEXT: v_mov_b32_e32 v3, s37
; GFX6-NEXT: s_lshr_b32 s36, s5, 2
; GFX6-NEXT: s_bfe_i64 s[38:39], s[38:39], 0x10000
; GFX6-NEXT: buffer_store_dwordx4 v[8:11], off, s[0:3], 0 offset:480
; GFX6-NEXT: s_waitcnt expcnt(0)
; GFX6-NEXT: v_mov_b32_e32 v8, s38
; GFX6-NEXT: v_mov_b32_e32 v9, s39
; GFX6-NEXT: s_lshr_b32 s38, s5, 3
; GFX6-NEXT: s_lshr_b32 s44, s5, 1
; GFX6-NEXT: s_bfe_i64 s[44:45], s[44:45], 0x10000
; GFX6-NEXT: s_bfe_i64 s[38:39], s[38:39], 0x10000
; GFX6-NEXT: s_bfe_i64 s[36:37], s[36:37], 0x10000
; GFX6-NEXT: s_bfe_i64 s[42:43], s[42:43], 0x10000
; GFX6-NEXT: s_bfe_i64 s[40:41], s[40:41], 0x10000
; GFX6-NEXT: s_bfe_i64 s[34:35], s[34:35], 0x10000
; GFX6-NEXT: s_bfe_i64 s[8:9], s[8:9], 0x10000
; GFX6-NEXT: s_bfe_i64 s[6:7], s[6:7], 0x10000
; GFX6-NEXT: s_bfe_i64 s[10:11], s[10:11], 0x10000
; GFX6-NEXT: s_bfe_i64 s[12:13], s[12:13], 0x10000
; GFX6-NEXT: s_bfe_i64 s[16:17], s[16:17], 0x10000
; GFX6-NEXT: s_bfe_i64 s[14:15], s[14:15], 0x10000
; GFX6-NEXT: s_bfe_i64 s[20:21], s[20:21], 0x10000
; GFX6-NEXT: s_bfe_i64 s[18:19], s[18:19], 0x10000
; GFX6-NEXT: s_bfe_i64 s[24:25], s[24:25], 0x10000
; GFX6-NEXT: s_bfe_i64 s[22:23], s[22:23], 0x10000
; GFX6-NEXT: s_bfe_i64 s[28:29], s[28:29], 0x10000
; GFX6-NEXT: s_bfe_i64 s[26:27], s[26:27], 0x10000
; GFX6-NEXT: s_bfe_i64 s[30:31], s[30:31], 0x10000
; GFX6-NEXT: s_bfe_i64 s[4:5], s[4:5], 0x10000
; GFX6-NEXT: buffer_store_dwordx4 v[12:15], off, s[0:3], 0 offset:464
; GFX6-NEXT: buffer_store_dwordx4 v[16:19], off, s[0:3], 0 offset:448
; GFX6-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:432
; GFX6-NEXT: v_mov_b32_e32 v10, s4
; GFX6-NEXT: v_mov_b32_e32 v11, s5
; GFX6-NEXT: buffer_store_dwordx4 v[8:11], off, s[0:3], 0 offset:416
; GFX6-NEXT: s_waitcnt expcnt(1)
; GFX6-NEXT: v_mov_b32_e32 v0, s30
; GFX6-NEXT: v_mov_b32_e32 v1, s31
; GFX6-NEXT: v_mov_b32_e32 v2, s26
; GFX6-NEXT: v_mov_b32_e32 v3, s27
; GFX6-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:400
; GFX6-NEXT: s_waitcnt expcnt(0)
; GFX6-NEXT: v_mov_b32_e32 v0, s22
; GFX6-NEXT: v_mov_b32_e32 v1, s23
; GFX6-NEXT: v_mov_b32_e32 v2, s28
; GFX6-NEXT: v_mov_b32_e32 v3, s29
; GFX6-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:384
; GFX6-NEXT: s_waitcnt expcnt(0)
; GFX6-NEXT: v_mov_b32_e32 v0, s24
; GFX6-NEXT: v_mov_b32_e32 v1, s25
; GFX6-NEXT: v_mov_b32_e32 v2, s18
; GFX6-NEXT: v_mov_b32_e32 v3, s19
; GFX6-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:368
; GFX6-NEXT: s_waitcnt expcnt(0)
; GFX6-NEXT: v_mov_b32_e32 v0, s20
; GFX6-NEXT: v_mov_b32_e32 v1, s21
; GFX6-NEXT: v_mov_b32_e32 v2, s14
; GFX6-NEXT: v_mov_b32_e32 v3, s15
; GFX6-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:352
; GFX6-NEXT: s_waitcnt expcnt(0)
; GFX6-NEXT: v_mov_b32_e32 v0, s16
; GFX6-NEXT: v_mov_b32_e32 v1, s17
; GFX6-NEXT: v_mov_b32_e32 v2, s12
; GFX6-NEXT: v_mov_b32_e32 v3, s13
; GFX6-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:336
; GFX6-NEXT: s_waitcnt expcnt(0)
; GFX6-NEXT: v_mov_b32_e32 v0, s10
; GFX6-NEXT: v_mov_b32_e32 v1, s11
; GFX6-NEXT: v_mov_b32_e32 v2, s6
; GFX6-NEXT: v_mov_b32_e32 v3, s7
; GFX6-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:320
; GFX6-NEXT: s_waitcnt expcnt(0)
; GFX6-NEXT: v_mov_b32_e32 v0, s8
; GFX6-NEXT: v_mov_b32_e32 v1, s9
; GFX6-NEXT: v_mov_b32_e32 v2, s34
; GFX6-NEXT: v_mov_b32_e32 v3, s35
; GFX6-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:304
; GFX6-NEXT: s_waitcnt expcnt(0)
; GFX6-NEXT: v_mov_b32_e32 v0, s40
; GFX6-NEXT: v_mov_b32_e32 v1, s41
; GFX6-NEXT: v_mov_b32_e32 v2, s42
; GFX6-NEXT: v_mov_b32_e32 v3, s43
; GFX6-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:288
; GFX6-NEXT: s_waitcnt expcnt(0)
; GFX6-NEXT: v_mov_b32_e32 v0, s36
; GFX6-NEXT: v_mov_b32_e32 v1, s37
; GFX6-NEXT: v_mov_b32_e32 v2, s38
; GFX6-NEXT: v_mov_b32_e32 v3, s39
; GFX6-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:272
; GFX6-NEXT: v_mov_b32_e32 v8, s44
; GFX6-NEXT: v_mov_b32_e32 v9, s45
; GFX6-NEXT: buffer_store_dwordx4 v[6:9], off, s[0:3], 0 offset:256
; GFX6-NEXT: s_endpgm
;
; GFX8-LABEL: constant_sextload_v64i1_to_v64i64:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_load_dwordx4 s[8:11], s[4:5], 0x24
; GFX8-NEXT: ; implicit-def: $vgpr62 : SGPR spill to VGPR lane
; GFX8-NEXT: s_waitcnt lgkmcnt(0)
; GFX8-NEXT: s_load_dwordx2 s[2:3], s[10:11], 0x0
; GFX8-NEXT: s_waitcnt lgkmcnt(0)
; GFX8-NEXT: s_lshr_b32 s0, s3, 8
; GFX8-NEXT: s_lshr_b32 s48, s3, 15
; GFX8-NEXT: v_writelane_b32 v62, s0, 0
; GFX8-NEXT: s_lshr_b32 s74, s3, 30
; GFX8-NEXT: s_lshr_b32 s30, s3, 31
; GFX8-NEXT: s_lshr_b32 s72, s3, 28
; GFX8-NEXT: s_lshr_b32 s34, s3, 29
; GFX8-NEXT: s_lshr_b32 s70, s3, 26
; GFX8-NEXT: s_lshr_b32 s36, s3, 27
; GFX8-NEXT: s_lshr_b32 s68, s3, 24
; GFX8-NEXT: s_lshr_b32 s38, s3, 25
; GFX8-NEXT: s_lshr_b32 s64, s3, 22
; GFX8-NEXT: s_lshr_b32 s40, s3, 23
; GFX8-NEXT: s_lshr_b32 s60, s3, 20
; GFX8-NEXT: s_lshr_b32 s42, s3, 21
; GFX8-NEXT: s_lshr_b32 s66, s3, 18
; GFX8-NEXT: s_lshr_b32 s44, s3, 19
; GFX8-NEXT: s_lshr_b32 s56, s3, 16
; GFX8-NEXT: s_lshr_b32 s46, s3, 17
; GFX8-NEXT: s_lshr_b32 s58, s3, 14
; GFX8-NEXT: s_lshr_b32 s62, s3, 12
; GFX8-NEXT: s_lshr_b32 s54, s3, 10
; GFX8-NEXT: v_writelane_b32 v62, s1, 1
; GFX8-NEXT: s_lshr_b32 s0, s3, 9
; GFX8-NEXT: s_bfe_i64 s[48:49], s[48:49], 0x10000
; GFX8-NEXT: s_lshr_b32 s52, s3, 11
; GFX8-NEXT: v_writelane_b32 v62, s0, 2
; GFX8-NEXT: s_bfe_i64 s[60:61], s[60:61], 0x10000
; GFX8-NEXT: s_bfe_i64 s[64:65], s[64:65], 0x10000
; GFX8-NEXT: s_bfe_i64 s[68:69], s[68:69], 0x10000
; GFX8-NEXT: s_bfe_i64 s[70:71], s[70:71], 0x10000
; GFX8-NEXT: s_bfe_i64 s[72:73], s[72:73], 0x10000
; GFX8-NEXT: s_bfe_i64 s[74:75], s[74:75], 0x10000
; GFX8-NEXT: s_bfe_i64 s[66:67], s[66:67], 0x10000
; GFX8-NEXT: s_bfe_i64 s[62:63], s[62:63], 0x10000
; GFX8-NEXT: s_bfe_i64 s[58:59], s[58:59], 0x10000
; GFX8-NEXT: s_bfe_i64 s[56:57], s[56:57], 0x10000
; GFX8-NEXT: s_bfe_i64 s[54:55], s[54:55], 0x10000
; GFX8-NEXT: s_bfe_i64 s[46:47], s[46:47], 0x10000
; GFX8-NEXT: s_bfe_i64 s[44:45], s[44:45], 0x10000
; GFX8-NEXT: s_bfe_i64 s[42:43], s[42:43], 0x10000
; GFX8-NEXT: s_bfe_i64 s[40:41], s[40:41], 0x10000
; GFX8-NEXT: s_bfe_i64 s[38:39], s[38:39], 0x10000
; GFX8-NEXT: s_bfe_i64 s[36:37], s[36:37], 0x10000
; GFX8-NEXT: s_bfe_i64 s[34:35], s[34:35], 0x10000
; GFX8-NEXT: s_bfe_i64 s[30:31], s[30:31], 0x10000
; GFX8-NEXT: v_mov_b32_e32 v34, s48
; GFX8-NEXT: s_lshr_b32 s48, s2, 1
; GFX8-NEXT: s_lshr_b32 s50, s3, 13
; GFX8-NEXT: v_writelane_b32 v62, s1, 3
; GFX8-NEXT: s_lshr_b32 s6, s3, 6
; GFX8-NEXT: s_lshr_b32 s10, s3, 7
; GFX8-NEXT: s_lshr_b32 s12, s3, 4
; GFX8-NEXT: s_lshr_b32 s14, s3, 5
; GFX8-NEXT: s_lshr_b32 s16, s3, 2
; GFX8-NEXT: s_lshr_b32 s18, s3, 3
; GFX8-NEXT: s_lshr_b32 s20, s3, 1
; GFX8-NEXT: s_mov_b32 s22, s3
; GFX8-NEXT: s_lshr_b32 s24, s2, 30
; GFX8-NEXT: s_lshr_b32 s26, s2, 31
; GFX8-NEXT: s_lshr_b32 s28, s2, 28
; GFX8-NEXT: v_mov_b32_e32 v4, s74
; GFX8-NEXT: v_mov_b32_e32 v12, s72
; GFX8-NEXT: v_mov_b32_e32 v0, s70
; GFX8-NEXT: v_mov_b32_e32 v8, s68
; GFX8-NEXT: v_mov_b32_e32 v16, s64
; GFX8-NEXT: v_mov_b32_e32 v20, s60
; GFX8-NEXT: v_mov_b32_e32 v24, s66
; GFX8-NEXT: v_mov_b32_e32 v28, s56
; GFX8-NEXT: v_mov_b32_e32 v32, s58
; GFX8-NEXT: v_mov_b32_e32 v36, s62
; GFX8-NEXT: s_lshr_b32 s86, s2, 29
; GFX8-NEXT: v_mov_b32_e32 v40, s54
; GFX8-NEXT: s_lshr_b32 s84, s2, 26
; GFX8-NEXT: s_lshr_b32 s82, s2, 27
; GFX8-NEXT: s_bfe_i64 vcc, s[52:53], 0x10000
; GFX8-NEXT: s_lshr_b32 s80, s2, 24
; GFX8-NEXT: v_mov_b32_e32 v6, s30
; GFX8-NEXT: v_mov_b32_e32 v7, s31
; GFX8-NEXT: s_lshr_b32 s78, s2, 25
; GFX8-NEXT: s_lshr_b32 s76, s2, 22
; GFX8-NEXT: v_mov_b32_e32 v14, s34
; GFX8-NEXT: s_lshr_b32 s74, s2, 23
; GFX8-NEXT: s_lshr_b32 s72, s2, 20
; GFX8-NEXT: v_mov_b32_e32 v2, s36
; GFX8-NEXT: s_lshr_b32 s70, s2, 21
; GFX8-NEXT: s_lshr_b32 s68, s2, 18
; GFX8-NEXT: v_mov_b32_e32 v10, s38
; GFX8-NEXT: s_lshr_b32 s66, s2, 19
; GFX8-NEXT: s_lshr_b32 s64, s2, 16
; GFX8-NEXT: v_mov_b32_e32 v18, s40
; GFX8-NEXT: s_lshr_b32 s62, s2, 17
; GFX8-NEXT: s_lshr_b32 s60, s2, 14
; GFX8-NEXT: v_mov_b32_e32 v22, s42
; GFX8-NEXT: s_lshr_b32 s58, s2, 15
; GFX8-NEXT: s_lshr_b32 s56, s2, 12
; GFX8-NEXT: v_mov_b32_e32 v26, s44
; GFX8-NEXT: s_lshr_b32 s54, s2, 13
; GFX8-NEXT: s_lshr_b32 s52, s2, 10
; GFX8-NEXT: v_mov_b32_e32 v30, s46
; GFX8-NEXT: s_lshr_b32 s4, s2, 11
; GFX8-NEXT: s_lshr_b32 s0, s2, 8
; GFX8-NEXT: s_lshr_b32 s46, s2, 9
; GFX8-NEXT: s_lshr_b32 s44, s2, 6
; GFX8-NEXT: s_lshr_b32 s42, s2, 7
; GFX8-NEXT: s_lshr_b32 s40, s2, 4
; GFX8-NEXT: s_lshr_b32 s38, s2, 5
; GFX8-NEXT: s_lshr_b32 s36, s2, 2
; GFX8-NEXT: s_lshr_b32 s34, s2, 3
; GFX8-NEXT: s_bfe_i64 s[30:31], s[2:3], 0x10000
; GFX8-NEXT: s_bfe_i64 s[2:3], s[48:49], 0x10000
; GFX8-NEXT: v_writelane_b32 v62, s2, 4
; GFX8-NEXT: v_writelane_b32 v62, s3, 5
; GFX8-NEXT: v_readlane_b32 s2, v62, 2
; GFX8-NEXT: s_bfe_i64 s[50:51], s[50:51], 0x10000
; GFX8-NEXT: v_readlane_b32 s3, v62, 3
; GFX8-NEXT: v_mov_b32_e32 v38, s50
; GFX8-NEXT: v_mov_b32_e32 v39, s51
; GFX8-NEXT: s_bfe_i64 s[50:51], s[4:5], 0x10000
; GFX8-NEXT: s_bfe_i64 s[4:5], s[6:7], 0x10000
; GFX8-NEXT: s_bfe_i64 s[6:7], s[2:3], 0x10000
; GFX8-NEXT: v_readlane_b32 s2, v62, 0
; GFX8-NEXT: v_readlane_b32 s3, v62, 1
; GFX8-NEXT: v_mov_b32_e32 v5, s75
; GFX8-NEXT: v_mov_b32_e32 v13, s73
; GFX8-NEXT: v_mov_b32_e32 v15, s35
; GFX8-NEXT: v_mov_b32_e32 v1, s71
; GFX8-NEXT: v_mov_b32_e32 v3, s37
; GFX8-NEXT: v_mov_b32_e32 v9, s69
; GFX8-NEXT: v_mov_b32_e32 v11, s39
; GFX8-NEXT: v_mov_b32_e32 v17, s65
; GFX8-NEXT: v_mov_b32_e32 v19, s41
; GFX8-NEXT: v_mov_b32_e32 v21, s61
; GFX8-NEXT: v_mov_b32_e32 v23, s43
; GFX8-NEXT: v_mov_b32_e32 v25, s67
; GFX8-NEXT: v_mov_b32_e32 v27, s45
; GFX8-NEXT: v_mov_b32_e32 v29, s57
; GFX8-NEXT: v_mov_b32_e32 v31, s47
; GFX8-NEXT: v_mov_b32_e32 v33, s59
; GFX8-NEXT: v_mov_b32_e32 v35, s49
; GFX8-NEXT: v_mov_b32_e32 v37, s63
; GFX8-NEXT: v_mov_b32_e32 v41, s55
; GFX8-NEXT: s_bfe_i64 s[34:35], s[34:35], 0x10000
; GFX8-NEXT: s_bfe_i64 s[36:37], s[36:37], 0x10000
; GFX8-NEXT: s_bfe_i64 s[38:39], s[38:39], 0x10000
; GFX8-NEXT: s_bfe_i64 s[40:41], s[40:41], 0x10000
; GFX8-NEXT: s_bfe_i64 s[42:43], s[42:43], 0x10000
; GFX8-NEXT: s_bfe_i64 s[44:45], s[44:45], 0x10000
; GFX8-NEXT: s_bfe_i64 s[46:47], s[46:47], 0x10000
; GFX8-NEXT: s_bfe_i64 s[48:49], s[0:1], 0x10000
; GFX8-NEXT: s_bfe_i64 s[52:53], s[52:53], 0x10000
; GFX8-NEXT: s_bfe_i64 s[54:55], s[54:55], 0x10000
; GFX8-NEXT: s_bfe_i64 s[56:57], s[56:57], 0x10000
; GFX8-NEXT: s_bfe_i64 s[58:59], s[58:59], 0x10000
; GFX8-NEXT: s_bfe_i64 s[60:61], s[60:61], 0x10000
; GFX8-NEXT: s_bfe_i64 s[62:63], s[62:63], 0x10000
; GFX8-NEXT: s_bfe_i64 s[64:65], s[64:65], 0x10000
; GFX8-NEXT: s_bfe_i64 s[66:67], s[66:67], 0x10000
; GFX8-NEXT: s_bfe_i64 s[68:69], s[68:69], 0x10000
; GFX8-NEXT: s_bfe_i64 s[70:71], s[70:71], 0x10000
; GFX8-NEXT: s_bfe_i64 s[72:73], s[72:73], 0x10000
; GFX8-NEXT: s_bfe_i64 s[74:75], s[74:75], 0x10000
; GFX8-NEXT: s_bfe_i64 s[76:77], s[76:77], 0x10000
; GFX8-NEXT: s_bfe_i64 s[78:79], s[78:79], 0x10000
; GFX8-NEXT: s_bfe_i64 s[80:81], s[80:81], 0x10000
; GFX8-NEXT: s_bfe_i64 s[82:83], s[82:83], 0x10000
; GFX8-NEXT: s_bfe_i64 s[84:85], s[84:85], 0x10000
; GFX8-NEXT: s_bfe_i64 s[86:87], s[86:87], 0x10000
; GFX8-NEXT: s_bfe_i64 s[28:29], s[28:29], 0x10000
; GFX8-NEXT: s_bfe_i64 s[26:27], s[26:27], 0x10000
; GFX8-NEXT: s_bfe_i64 s[24:25], s[24:25], 0x10000
; GFX8-NEXT: s_bfe_i64 s[22:23], s[22:23], 0x10000
; GFX8-NEXT: s_bfe_i64 s[20:21], s[20:21], 0x10000
; GFX8-NEXT: s_bfe_i64 s[18:19], s[18:19], 0x10000
; GFX8-NEXT: s_bfe_i64 s[16:17], s[16:17], 0x10000
; GFX8-NEXT: s_bfe_i64 s[14:15], s[14:15], 0x10000
; GFX8-NEXT: s_bfe_i64 s[12:13], s[12:13], 0x10000
; GFX8-NEXT: s_bfe_i64 s[0:1], s[10:11], 0x10000
; GFX8-NEXT: s_bfe_i64 s[10:11], s[2:3], 0x10000
; GFX8-NEXT: s_add_u32 s2, s8, 0x1f0
; GFX8-NEXT: s_addc_u32 s3, s9, 0
; GFX8-NEXT: v_mov_b32_e32 v43, s3
; GFX8-NEXT: v_mov_b32_e32 v42, s2
; GFX8-NEXT: s_add_u32 s2, s8, 0x1e0
; GFX8-NEXT: s_addc_u32 s3, s9, 0
; GFX8-NEXT: v_mov_b32_e32 v45, s3
; GFX8-NEXT: v_mov_b32_e32 v44, s2
; GFX8-NEXT: s_add_u32 s2, s8, 0x1d0
; GFX8-NEXT: s_addc_u32 s3, s9, 0
; GFX8-NEXT: v_mov_b32_e32 v47, s3
; GFX8-NEXT: v_mov_b32_e32 v46, s2
; GFX8-NEXT: s_add_u32 s2, s8, 0x1c0
; GFX8-NEXT: s_addc_u32 s3, s9, 0
; GFX8-NEXT: v_mov_b32_e32 v49, s3
; GFX8-NEXT: v_mov_b32_e32 v48, s2
; GFX8-NEXT: s_add_u32 s2, s8, 0x1b0
; GFX8-NEXT: s_addc_u32 s3, s9, 0
; GFX8-NEXT: v_mov_b32_e32 v51, s3
; GFX8-NEXT: v_mov_b32_e32 v50, s2
; GFX8-NEXT: s_add_u32 s2, s8, 0x1a0
; GFX8-NEXT: s_addc_u32 s3, s9, 0
; GFX8-NEXT: v_mov_b32_e32 v53, s3
; GFX8-NEXT: v_mov_b32_e32 v52, s2
; GFX8-NEXT: s_add_u32 s2, s8, 0x190
; GFX8-NEXT: s_addc_u32 s3, s9, 0
; GFX8-NEXT: v_mov_b32_e32 v55, s3
; GFX8-NEXT: v_mov_b32_e32 v54, s2
; GFX8-NEXT: s_add_u32 s2, s8, 0x180
; GFX8-NEXT: s_addc_u32 s3, s9, 0
; GFX8-NEXT: v_mov_b32_e32 v57, s3
; GFX8-NEXT: v_mov_b32_e32 v56, s2
; GFX8-NEXT: s_add_u32 s2, s8, 0x170
; GFX8-NEXT: s_addc_u32 s3, s9, 0
; GFX8-NEXT: v_mov_b32_e32 v59, s3
; GFX8-NEXT: v_mov_b32_e32 v58, s2
; GFX8-NEXT: s_add_u32 s2, s8, 0x160
; GFX8-NEXT: s_addc_u32 s3, s9, 0
; GFX8-NEXT: v_mov_b32_e32 v61, s3
; GFX8-NEXT: v_mov_b32_e32 v60, s2
; GFX8-NEXT: s_add_u32 s2, s8, 0x150
; GFX8-NEXT: s_addc_u32 s3, s9, 0
; GFX8-NEXT: flat_store_dwordx4 v[44:45], v[12:15]
; GFX8-NEXT: flat_store_dwordx4 v[46:47], v[0:3]
; GFX8-NEXT: v_mov_b32_e32 v13, s3
; GFX8-NEXT: v_mov_b32_e32 v12, s2
; GFX8-NEXT: s_add_u32 s2, s8, 0x140
; GFX8-NEXT: s_addc_u32 s3, s9, 0
; GFX8-NEXT: v_mov_b32_e32 v2, s0
; GFX8-NEXT: s_add_u32 s0, s8, 0x130
; GFX8-NEXT: v_mov_b32_e32 v3, s1
; GFX8-NEXT: s_addc_u32 s1, s9, 0
; GFX8-NEXT: flat_store_dwordx4 v[42:43], v[4:7]
; GFX8-NEXT: flat_store_dwordx4 v[48:49], v[8:11]
; GFX8-NEXT: flat_store_dwordx4 v[50:51], v[16:19]
; GFX8-NEXT: v_mov_b32_e32 v4, s10
; GFX8-NEXT: v_mov_b32_e32 v17, s1
; GFX8-NEXT: v_mov_b32_e32 v16, s0
; GFX8-NEXT: s_add_u32 s0, s8, 0x120
; GFX8-NEXT: s_addc_u32 s1, s9, 0
; GFX8-NEXT: v_mov_b32_e32 v19, s1
; GFX8-NEXT: v_mov_b32_e32 v18, s0
; GFX8-NEXT: s_add_u32 s0, s8, 0x110
; GFX8-NEXT: v_mov_b32_e32 v5, s11
; GFX8-NEXT: v_mov_b32_e32 v15, s3
; GFX8-NEXT: s_addc_u32 s1, s9, 0
; GFX8-NEXT: v_mov_b32_e32 v42, vcc_lo
; GFX8-NEXT: v_mov_b32_e32 v43, vcc_hi
; GFX8-NEXT: v_mov_b32_e32 v14, s2
; GFX8-NEXT: v_mov_b32_e32 v6, s6
; GFX8-NEXT: v_mov_b32_e32 v7, s7
; GFX8-NEXT: v_mov_b32_e32 v0, s4
; GFX8-NEXT: v_mov_b32_e32 v1, s5
; GFX8-NEXT: v_mov_b32_e32 v8, s12
; GFX8-NEXT: flat_store_dwordx4 v[52:53], v[20:23]
; GFX8-NEXT: v_mov_b32_e32 v9, s13
; GFX8-NEXT: flat_store_dwordx4 v[54:55], v[24:27]
; GFX8-NEXT: v_mov_b32_e32 v10, s14
; GFX8-NEXT: v_mov_b32_e32 v11, s15
; GFX8-NEXT: flat_store_dwordx4 v[56:57], v[28:31]
; GFX8-NEXT: flat_store_dwordx4 v[58:59], v[32:35]
; GFX8-NEXT: flat_store_dwordx4 v[60:61], v[36:39]
; GFX8-NEXT: flat_store_dwordx4 v[12:13], v[40:43]
; GFX8-NEXT: flat_store_dwordx4 v[14:15], v[4:7]
; GFX8-NEXT: flat_store_dwordx4 v[16:17], v[0:3]
; GFX8-NEXT: flat_store_dwordx4 v[18:19], v[8:11]
; GFX8-NEXT: v_mov_b32_e32 v5, s1
; GFX8-NEXT: v_mov_b32_e32 v4, s0
; GFX8-NEXT: s_add_u32 s0, s8, 0x100
; GFX8-NEXT: v_mov_b32_e32 v0, s16
; GFX8-NEXT: v_mov_b32_e32 v1, s17
; GFX8-NEXT: v_mov_b32_e32 v2, s18
; GFX8-NEXT: v_mov_b32_e32 v3, s19
; GFX8-NEXT: s_addc_u32 s1, s9, 0
; GFX8-NEXT: flat_store_dwordx4 v[4:5], v[0:3]
; GFX8-NEXT: v_mov_b32_e32 v5, s1
; GFX8-NEXT: v_mov_b32_e32 v4, s0
; GFX8-NEXT: s_add_u32 s0, s8, 0xf0
; GFX8-NEXT: v_mov_b32_e32 v0, s22
; GFX8-NEXT: v_mov_b32_e32 v1, s23
; GFX8-NEXT: v_mov_b32_e32 v2, s20
; GFX8-NEXT: v_mov_b32_e32 v3, s21
; GFX8-NEXT: s_addc_u32 s1, s9, 0
; GFX8-NEXT: flat_store_dwordx4 v[4:5], v[0:3]
; GFX8-NEXT: v_mov_b32_e32 v5, s1
; GFX8-NEXT: v_mov_b32_e32 v4, s0
; GFX8-NEXT: s_add_u32 s0, s8, 0xe0
; GFX8-NEXT: v_mov_b32_e32 v0, s24
; GFX8-NEXT: v_mov_b32_e32 v1, s25
; GFX8-NEXT: v_mov_b32_e32 v2, s26
; GFX8-NEXT: v_mov_b32_e32 v3, s27
; GFX8-NEXT: s_addc_u32 s1, s9, 0
; GFX8-NEXT: flat_store_dwordx4 v[4:5], v[0:3]
; GFX8-NEXT: v_mov_b32_e32 v5, s1
; GFX8-NEXT: v_mov_b32_e32 v4, s0
; GFX8-NEXT: s_add_u32 s0, s8, 0xd0
; GFX8-NEXT: v_mov_b32_e32 v0, s28
; GFX8-NEXT: v_mov_b32_e32 v1, s29
; GFX8-NEXT: v_mov_b32_e32 v2, s86
; GFX8-NEXT: v_mov_b32_e32 v3, s87
; GFX8-NEXT: s_addc_u32 s1, s9, 0
; GFX8-NEXT: flat_store_dwordx4 v[4:5], v[0:3]
; GFX8-NEXT: v_mov_b32_e32 v5, s1
; GFX8-NEXT: v_mov_b32_e32 v4, s0
; GFX8-NEXT: s_add_u32 s0, s8, 0xc0
; GFX8-NEXT: v_mov_b32_e32 v0, s84
; GFX8-NEXT: v_mov_b32_e32 v1, s85
; GFX8-NEXT: v_mov_b32_e32 v2, s82
; GFX8-NEXT: v_mov_b32_e32 v3, s83
; GFX8-NEXT: s_addc_u32 s1, s9, 0
; GFX8-NEXT: flat_store_dwordx4 v[4:5], v[0:3]
; GFX8-NEXT: v_mov_b32_e32 v5, s1
; GFX8-NEXT: v_mov_b32_e32 v4, s0
; GFX8-NEXT: s_add_u32 s0, s8, 0xb0
; GFX8-NEXT: v_mov_b32_e32 v0, s80
; GFX8-NEXT: v_mov_b32_e32 v1, s81
; GFX8-NEXT: v_mov_b32_e32 v2, s78
; GFX8-NEXT: v_mov_b32_e32 v3, s79
; GFX8-NEXT: s_addc_u32 s1, s9, 0
; GFX8-NEXT: flat_store_dwordx4 v[4:5], v[0:3]
; GFX8-NEXT: v_mov_b32_e32 v5, s1
; GFX8-NEXT: v_mov_b32_e32 v4, s0
; GFX8-NEXT: s_add_u32 s0, s8, 0xa0
; GFX8-NEXT: v_mov_b32_e32 v0, s76
; GFX8-NEXT: v_mov_b32_e32 v1, s77
; GFX8-NEXT: v_mov_b32_e32 v2, s74
; GFX8-NEXT: v_mov_b32_e32 v3, s75
; GFX8-NEXT: s_addc_u32 s1, s9, 0
; GFX8-NEXT: flat_store_dwordx4 v[4:5], v[0:3]
; GFX8-NEXT: v_mov_b32_e32 v5, s1
; GFX8-NEXT: v_mov_b32_e32 v4, s0
; GFX8-NEXT: s_add_u32 s0, s8, 0x90
; GFX8-NEXT: v_mov_b32_e32 v0, s72
; GFX8-NEXT: v_mov_b32_e32 v1, s73
; GFX8-NEXT: v_mov_b32_e32 v2, s70
; GFX8-NEXT: v_mov_b32_e32 v3, s71
; GFX8-NEXT: s_addc_u32 s1, s9, 0
; GFX8-NEXT: flat_store_dwordx4 v[4:5], v[0:3]
; GFX8-NEXT: v_mov_b32_e32 v5, s1
; GFX8-NEXT: v_mov_b32_e32 v4, s0
; GFX8-NEXT: s_add_u32 s0, s8, 0x80
; GFX8-NEXT: v_mov_b32_e32 v0, s68
; GFX8-NEXT: v_mov_b32_e32 v1, s69
; GFX8-NEXT: v_mov_b32_e32 v2, s66
; GFX8-NEXT: v_mov_b32_e32 v3, s67
; GFX8-NEXT: s_addc_u32 s1, s9, 0
; GFX8-NEXT: flat_store_dwordx4 v[4:5], v[0:3]
; GFX8-NEXT: v_mov_b32_e32 v5, s1
; GFX8-NEXT: v_mov_b32_e32 v4, s0
; GFX8-NEXT: s_add_u32 s0, s8, 0x70
; GFX8-NEXT: v_mov_b32_e32 v0, s64
; GFX8-NEXT: v_mov_b32_e32 v1, s65
; GFX8-NEXT: v_mov_b32_e32 v2, s62
; GFX8-NEXT: v_mov_b32_e32 v3, s63
; GFX8-NEXT: s_addc_u32 s1, s9, 0
; GFX8-NEXT: flat_store_dwordx4 v[4:5], v[0:3]
; GFX8-NEXT: v_mov_b32_e32 v5, s1
; GFX8-NEXT: v_mov_b32_e32 v4, s0
; GFX8-NEXT: s_add_u32 s0, s8, 0x60
; GFX8-NEXT: v_mov_b32_e32 v0, s60
; GFX8-NEXT: v_mov_b32_e32 v1, s61
; GFX8-NEXT: v_mov_b32_e32 v2, s58
; GFX8-NEXT: v_mov_b32_e32 v3, s59
; GFX8-NEXT: s_addc_u32 s1, s9, 0
; GFX8-NEXT: flat_store_dwordx4 v[4:5], v[0:3]
; GFX8-NEXT: v_mov_b32_e32 v5, s1
; GFX8-NEXT: v_mov_b32_e32 v4, s0
; GFX8-NEXT: s_add_u32 s0, s8, 0x50
; GFX8-NEXT: v_mov_b32_e32 v0, s56
; GFX8-NEXT: v_mov_b32_e32 v1, s57
; GFX8-NEXT: v_mov_b32_e32 v2, s54
; GFX8-NEXT: v_mov_b32_e32 v3, s55
; GFX8-NEXT: s_addc_u32 s1, s9, 0
; GFX8-NEXT: flat_store_dwordx4 v[4:5], v[0:3]
; GFX8-NEXT: v_mov_b32_e32 v5, s1
; GFX8-NEXT: v_mov_b32_e32 v4, s0
; GFX8-NEXT: s_add_u32 s0, s8, 64
; GFX8-NEXT: v_mov_b32_e32 v0, s52
; GFX8-NEXT: v_mov_b32_e32 v1, s53
; GFX8-NEXT: v_mov_b32_e32 v2, s50
; GFX8-NEXT: v_mov_b32_e32 v3, s51
; GFX8-NEXT: s_addc_u32 s1, s9, 0
; GFX8-NEXT: flat_store_dwordx4 v[4:5], v[0:3]
; GFX8-NEXT: v_mov_b32_e32 v5, s1
; GFX8-NEXT: v_mov_b32_e32 v4, s0
; GFX8-NEXT: s_add_u32 s0, s8, 48
; GFX8-NEXT: v_mov_b32_e32 v0, s48
; GFX8-NEXT: v_mov_b32_e32 v1, s49
; GFX8-NEXT: v_mov_b32_e32 v2, s46
; GFX8-NEXT: v_mov_b32_e32 v3, s47
; GFX8-NEXT: s_addc_u32 s1, s9, 0
; GFX8-NEXT: flat_store_dwordx4 v[4:5], v[0:3]
; GFX8-NEXT: v_mov_b32_e32 v5, s1
; GFX8-NEXT: v_mov_b32_e32 v4, s0
; GFX8-NEXT: s_add_u32 s0, s8, 32
; GFX8-NEXT: v_mov_b32_e32 v0, s44
; GFX8-NEXT: v_mov_b32_e32 v1, s45
; GFX8-NEXT: v_mov_b32_e32 v2, s42
; GFX8-NEXT: v_mov_b32_e32 v3, s43
; GFX8-NEXT: s_addc_u32 s1, s9, 0
; GFX8-NEXT: flat_store_dwordx4 v[4:5], v[0:3]
; GFX8-NEXT: v_mov_b32_e32 v5, s1
; GFX8-NEXT: v_mov_b32_e32 v4, s0
; GFX8-NEXT: s_add_u32 s0, s8, 16
; GFX8-NEXT: v_mov_b32_e32 v0, s40
; GFX8-NEXT: v_mov_b32_e32 v1, s41
; GFX8-NEXT: v_mov_b32_e32 v2, s38
; GFX8-NEXT: v_mov_b32_e32 v3, s39
; GFX8-NEXT: s_addc_u32 s1, s9, 0
; GFX8-NEXT: flat_store_dwordx4 v[4:5], v[0:3]
; GFX8-NEXT: v_mov_b32_e32 v5, s1
; GFX8-NEXT: v_mov_b32_e32 v0, s36
; GFX8-NEXT: v_mov_b32_e32 v1, s37
; GFX8-NEXT: v_mov_b32_e32 v2, s34
; GFX8-NEXT: v_mov_b32_e32 v3, s35
; GFX8-NEXT: v_mov_b32_e32 v4, s0
; GFX8-NEXT: v_readlane_b32 s0, v62, 4
; GFX8-NEXT: flat_store_dwordx4 v[4:5], v[0:3]
; GFX8-NEXT: v_readlane_b32 s1, v62, 5
; GFX8-NEXT: v_mov_b32_e32 v4, s8
; GFX8-NEXT: v_mov_b32_e32 v0, s30
; GFX8-NEXT: v_mov_b32_e32 v1, s31
; GFX8-NEXT: v_mov_b32_e32 v2, s0
; GFX8-NEXT: v_mov_b32_e32 v3, s1
; GFX8-NEXT: v_mov_b32_e32 v5, s9
; GFX8-NEXT: flat_store_dwordx4 v[4:5], v[0:3]
; GFX8-NEXT: s_endpgm
;
; EG-LABEL: constant_sextload_v64i1_to_v64i64:
; EG: ; %bb.0:
; EG-NEXT: ALU 22, @40, KC0[CB0:0-32], KC1[]
; EG-NEXT: TEX 0 @38
; EG-NEXT: ALU 89, @63, KC0[CB0:0-32], KC1[]
; EG-NEXT: ALU 99, @153, KC0[], KC1[]
; EG-NEXT: ALU 107, @253, KC0[CB0:0-32], KC1[]
; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T50.XYZW, T82.X, 0
; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T80.XYZW, T81.X, 0
; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T51.XYZW, T73.X, 0
; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T79.XYZW, T48.X, 0
; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T52.XYZW, T47.X, 0
; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T78.XYZW, T46.X, 0
; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T53.XYZW, T45.X, 0
; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T77.XYZW, T44.X, 0
; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T54.XYZW, T43.X, 0
; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T76.XYZW, T42.X, 0
; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T55.XYZW, T41.X, 0
; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T75.XYZW, T39.X, 0
; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T56.XYZW, T38.X, 0
; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T74.XYZW, T37.X, 0
; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T57.XYZW, T36.X, 0
; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T66.XYZW, T35.X, 0
; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T58.XYZW, T34.X, 0
; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T72.XYZW, T33.X, 0
; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T59.XYZW, T32.X, 0
; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T71.XYZW, T31.X, 0
; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T60.XYZW, T30.X, 0
; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T70.XYZW, T29.X, 0
; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T61.XYZW, T28.X, 0
; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T69.XYZW, T27.X, 0
; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T62.XYZW, T26.X, 0
; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T68.XYZW, T25.X, 0
; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T63.XYZW, T24.X, 0
; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T40.XYZW, T23.X, 0
; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T64.XYZW, T22.X, 0
; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T67.XYZW, T21.X, 0
; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T65.XYZW, T20.X, 0
; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T49.XYZW, T19.X, 1
; EG-NEXT: CF_END
; EG-NEXT: Fetch clause starting at 38:
; EG-NEXT: VTX_READ_64 T40.XY, T26.X, 0, #1
; EG-NEXT: ALU clause starting at 40:
; EG-NEXT: LSHR T19.X, KC0[2].Y, literal.x,
; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y,
; EG-NEXT: 2(2.802597e-45), 16(2.242078e-44)
; EG-NEXT: LSHR T20.X, PV.W, literal.x,
; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y,
; EG-NEXT: 2(2.802597e-45), 32(4.484155e-44)
; EG-NEXT: LSHR T21.X, PV.W, literal.x,
; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y,
; EG-NEXT: 2(2.802597e-45), 48(6.726233e-44)
; EG-NEXT: LSHR T22.X, PV.W, literal.x,
; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y,
; EG-NEXT: 2(2.802597e-45), 64(8.968310e-44)
; EG-NEXT: LSHR T23.X, PV.W, literal.x,
; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y,
; EG-NEXT: 2(2.802597e-45), 80(1.121039e-43)
; EG-NEXT: LSHR T24.X, PV.W, literal.x,
; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y,
; EG-NEXT: 2(2.802597e-45), 96(1.345247e-43)
; EG-NEXT: LSHR T25.X, PV.W, literal.x,
; EG-NEXT: MOV * T26.X, KC0[2].Z,
; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.x,
; EG-NEXT: 112(1.569454e-43), 0(0.000000e+00)
; EG-NEXT: ALU clause starting at 63:
; EG-NEXT: LSHR T26.X, T0.W, literal.x,
; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y,
; EG-NEXT: 2(2.802597e-45), 128(1.793662e-43)
; EG-NEXT: LSHR T27.X, PV.W, literal.x,
; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y,
; EG-NEXT: 2(2.802597e-45), 144(2.017870e-43)
; EG-NEXT: LSHR T28.X, PV.W, literal.x,
; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y,
; EG-NEXT: 2(2.802597e-45), 160(2.242078e-43)
; EG-NEXT: LSHR T29.X, PV.W, literal.x,
; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y,
; EG-NEXT: 2(2.802597e-45), 176(2.466285e-43)
; EG-NEXT: LSHR T30.X, PV.W, literal.x,
; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y,
; EG-NEXT: 2(2.802597e-45), 192(2.690493e-43)
; EG-NEXT: LSHR T31.X, PV.W, literal.x,
; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y,
; EG-NEXT: 2(2.802597e-45), 208(2.914701e-43)
; EG-NEXT: LSHR T32.X, PV.W, literal.x,
; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y,
; EG-NEXT: 2(2.802597e-45), 224(3.138909e-43)
; EG-NEXT: LSHR T33.X, PV.W, literal.x,
; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y,
; EG-NEXT: 2(2.802597e-45), 240(3.363116e-43)
; EG-NEXT: LSHR T34.X, PV.W, literal.x,
; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y,
; EG-NEXT: 2(2.802597e-45), 256(3.587324e-43)
; EG-NEXT: LSHR T35.X, PV.W, literal.x,
; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y,
; EG-NEXT: 2(2.802597e-45), 272(3.811532e-43)
; EG-NEXT: LSHR T36.X, PV.W, literal.x,
; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y,
; EG-NEXT: 2(2.802597e-45), 288(4.035740e-43)
; EG-NEXT: LSHR T37.X, PV.W, literal.x,
; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y,
; EG-NEXT: 2(2.802597e-45), 304(4.259947e-43)
; EG-NEXT: LSHR T38.X, PV.W, literal.x,
; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y,
; EG-NEXT: 2(2.802597e-45), 320(4.484155e-43)
; EG-NEXT: LSHR T39.X, PV.W, literal.x,
; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y,
; EG-NEXT: 2(2.802597e-45), 336(4.708363e-43)
; EG-NEXT: LSHR T41.X, PV.W, literal.x,
; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y,
; EG-NEXT: 2(2.802597e-45), 352(4.932571e-43)
; EG-NEXT: LSHR T42.X, PV.W, literal.x,
; EG-NEXT: LSHR T0.Z, T40.Y, literal.y,
; EG-NEXT: LSHR T0.W, T40.Y, literal.z,
; EG-NEXT: ADD_INT * T1.W, KC0[2].Y, literal.w,
; EG-NEXT: 2(2.802597e-45), 28(3.923636e-44)
; EG-NEXT: 29(4.063766e-44), 368(5.156778e-43)
; EG-NEXT: LSHR T43.X, PS, literal.x,
; EG-NEXT: LSHR T0.Y, T40.Y, literal.y,
; EG-NEXT: LSHR T1.Z, T40.Y, literal.z,
; EG-NEXT: LSHR * T1.W, T40.Y, literal.w,
; EG-NEXT: 2(2.802597e-45), 24(3.363116e-44)
; EG-NEXT: 25(3.503246e-44), 20(2.802597e-44)
; EG-NEXT: ADD_INT * T2.W, KC0[2].Y, literal.x,
; EG-NEXT: 384(5.380986e-43), 0(0.000000e+00)
; EG-NEXT: LSHR T44.X, PV.W, literal.x,
; EG-NEXT: LSHR T1.Y, T40.Y, literal.y,
; EG-NEXT: LSHR T2.Z, T40.Y, literal.z,
; EG-NEXT: LSHR * T2.W, T40.Y, literal.w,
; EG-NEXT: 2(2.802597e-45), 21(2.942727e-44)
; EG-NEXT: 16(2.242078e-44), 17(2.382207e-44)
; EG-NEXT: ADD_INT * T3.W, KC0[2].Y, literal.x,
; EG-NEXT: 400(5.605194e-43), 0(0.000000e+00)
; EG-NEXT: LSHR T45.X, PV.W, literal.x,
; EG-NEXT: LSHR T2.Y, T40.Y, literal.y,
; EG-NEXT: LSHR T3.Z, T40.Y, literal.z,
; EG-NEXT: LSHR * T3.W, T40.Y, literal.w,
; EG-NEXT: 2(2.802597e-45), 12(1.681558e-44)
; EG-NEXT: 13(1.821688e-44), 8(1.121039e-44)
; EG-NEXT: ADD_INT * T4.W, KC0[2].Y, literal.x,
; EG-NEXT: 416(5.829402e-43), 0(0.000000e+00)
; EG-NEXT: LSHR T46.X, PV.W, literal.x,
; EG-NEXT: LSHR T3.Y, T40.Y, literal.y,
; EG-NEXT: LSHR T4.Z, T40.Y, literal.z,
; EG-NEXT: LSHR * T4.W, T40.Y, literal.w,
; EG-NEXT: 2(2.802597e-45), 9(1.261169e-44)
; EG-NEXT: 4(5.605194e-45), 5(7.006492e-45)
; EG-NEXT: ADD_INT * T5.W, KC0[2].Y, literal.x,
; EG-NEXT: 432(6.053609e-43), 0(0.000000e+00)
; EG-NEXT: LSHR T47.X, PV.W, literal.x,
; EG-NEXT: ADD_INT T4.Y, KC0[2].Y, literal.y,
; EG-NEXT: LSHR T5.Z, T40.Y, 1,
; EG-NEXT: LSHR T5.W, T40.X, literal.z,
; EG-NEXT: ADD_INT * T6.W, KC0[2].Y, literal.w,
; EG-NEXT: 2(2.802597e-45), 464(6.502025e-43)
; EG-NEXT: 28(3.923636e-44), 448(6.277817e-43)
; EG-NEXT: ALU clause starting at 153:
; EG-NEXT: LSHR T48.X, T6.W, literal.x,
; EG-NEXT: LSHR T5.Y, T40.X, literal.y,
; EG-NEXT: LSHR T6.Z, T40.X, literal.z,
; EG-NEXT: LSHR * T6.W, T40.X, literal.w,
; EG-NEXT: 2(2.802597e-45), 29(4.063766e-44)
; EG-NEXT: 24(3.363116e-44), 25(3.503246e-44)
; EG-NEXT: LSHR * T7.W, T40.X, literal.x,
; EG-NEXT: 20(2.802597e-44), 0(0.000000e+00)
; EG-NEXT: BFE_INT T49.X, T40.X, 0.0, 1,
; EG-NEXT: LSHR T6.Y, T40.X, literal.x,
; EG-NEXT: ASHR T50.Z, T40.Y, literal.y,
; EG-NEXT: LSHR T8.W, T40.Y, literal.z,
; EG-NEXT: LSHR * T9.W, T40.Y, literal.w,
; EG-NEXT: 21(2.942727e-44), 31(4.344025e-44)
; EG-NEXT: 27(3.783506e-44), 30(4.203895e-44)
; EG-NEXT: BFE_INT T50.X, PS, 0.0, 1,
; EG-NEXT: LSHR T7.Y, T40.X, literal.x,
; EG-NEXT: BFE_INT T51.Z, PV.W, 0.0, 1,
; EG-NEXT: LSHR T8.W, T40.Y, literal.y,
; EG-NEXT: LSHR * T9.W, T40.Y, literal.z,
; EG-NEXT: 16(2.242078e-44), 23(3.222986e-44)
; EG-NEXT: 26(3.643376e-44), 0(0.000000e+00)
; EG-NEXT: BFE_INT T51.X, PS, 0.0, 1,
; EG-NEXT: MOV T50.Y, PV.X,
; EG-NEXT: BFE_INT T52.Z, PV.W, 0.0, 1,
; EG-NEXT: LSHR T8.W, T40.Y, literal.x,
; EG-NEXT: LSHR * T9.W, T40.Y, literal.y,
; EG-NEXT: 19(2.662467e-44), 22(3.082857e-44)
; EG-NEXT: BFE_INT T52.X, PS, 0.0, 1,
; EG-NEXT: MOV T51.Y, PV.X,
; EG-NEXT: BFE_INT T53.Z, PV.W, 0.0, 1,
; EG-NEXT: LSHR T8.W, T40.Y, literal.x,
; EG-NEXT: LSHR * T9.W, T40.Y, literal.y,
; EG-NEXT: 15(2.101948e-44), 18(2.522337e-44)
; EG-NEXT: BFE_INT T53.X, PS, 0.0, 1,
; EG-NEXT: MOV T52.Y, PV.X,
; EG-NEXT: BFE_INT T54.Z, PV.W, 0.0, 1,
; EG-NEXT: LSHR T8.W, T40.Y, literal.x,
; EG-NEXT: LSHR * T9.W, T40.Y, literal.y,
; EG-NEXT: 11(1.541428e-44), 14(1.961818e-44)
; EG-NEXT: BFE_INT T54.X, PS, 0.0, 1,
; EG-NEXT: MOV T53.Y, PV.X,
; EG-NEXT: BFE_INT T55.Z, PV.W, 0.0, 1,
; EG-NEXT: LSHR T8.W, T40.Y, literal.x,
; EG-NEXT: LSHR * T9.W, T40.Y, literal.y,
; EG-NEXT: 7(9.809089e-45), 10(1.401298e-44)
; EG-NEXT: BFE_INT T55.X, PS, 0.0, 1,
; EG-NEXT: MOV T54.Y, PV.X,
; EG-NEXT: BFE_INT T56.Z, PV.W, 0.0, 1,
; EG-NEXT: LSHR T8.W, T40.Y, literal.x,
; EG-NEXT: LSHR * T9.W, T40.Y, literal.y,
; EG-NEXT: 3(4.203895e-45), 6(8.407791e-45)
; EG-NEXT: BFE_INT T56.X, PS, 0.0, 1,
; EG-NEXT: MOV T55.Y, PV.X,
; EG-NEXT: BFE_INT T57.Z, PV.W, 0.0, 1,
; EG-NEXT: LSHR T8.W, T40.X, literal.x,
; EG-NEXT: LSHR * T9.W, T40.Y, literal.y,
; EG-NEXT: 17(2.382207e-44), 2(2.802597e-45)
; EG-NEXT: BFE_INT T57.X, PS, 0.0, 1,
; EG-NEXT: MOV T56.Y, PV.X,
; EG-NEXT: ASHR T58.Z, T40.X, literal.x,
; EG-NEXT: LSHR T9.W, T40.X, literal.y,
; EG-NEXT: LSHR * T10.W, T40.X, literal.z,
; EG-NEXT: 31(4.344025e-44), 27(3.783506e-44)
; EG-NEXT: 30(4.203895e-44), 0(0.000000e+00)
; EG-NEXT: BFE_INT T58.X, PS, 0.0, 1,
; EG-NEXT: MOV T57.Y, PV.X,
; EG-NEXT: BFE_INT T59.Z, PV.W, 0.0, 1,
; EG-NEXT: LSHR T9.W, T40.X, literal.x,
; EG-NEXT: LSHR * T10.W, T40.X, literal.y,
; EG-NEXT: 23(3.222986e-44), 26(3.643376e-44)
; EG-NEXT: BFE_INT T59.X, PS, 0.0, 1,
; EG-NEXT: MOV T58.Y, PV.X,
; EG-NEXT: BFE_INT T60.Z, PV.W, 0.0, 1,
; EG-NEXT: LSHR T9.W, T40.X, literal.x,
; EG-NEXT: LSHR * T10.W, T40.X, literal.y,
; EG-NEXT: 19(2.662467e-44), 22(3.082857e-44)
; EG-NEXT: BFE_INT T60.X, PS, 0.0, 1,
; EG-NEXT: MOV T59.Y, PV.X,
; EG-NEXT: BFE_INT T61.Z, PV.W, 0.0, 1,
; EG-NEXT: LSHR T9.W, T40.X, literal.x,
; EG-NEXT: LSHR * T10.W, T40.X, literal.y,
; EG-NEXT: 15(2.101948e-44), 18(2.522337e-44)
; EG-NEXT: BFE_INT T61.X, PS, 0.0, 1,
; EG-NEXT: MOV T60.Y, PV.X,
; EG-NEXT: BFE_INT T62.Z, PV.W, 0.0, 1,
; EG-NEXT: LSHR T9.W, T40.X, literal.x,
; EG-NEXT: LSHR * T10.W, T40.X, literal.y,
; EG-NEXT: 11(1.541428e-44), 14(1.961818e-44)
; EG-NEXT: BFE_INT T62.X, PS, 0.0, 1,
; EG-NEXT: MOV T61.Y, PV.X,
; EG-NEXT: BFE_INT T63.Z, PV.W, 0.0, 1,
; EG-NEXT: LSHR T9.W, T40.X, literal.x,
; EG-NEXT: LSHR * T10.W, T40.X, literal.y,
; EG-NEXT: 7(9.809089e-45), 10(1.401298e-44)
; EG-NEXT: BFE_INT T63.X, PS, 0.0, 1,
; EG-NEXT: MOV T62.Y, PV.X,
; EG-NEXT: BFE_INT T64.Z, PV.W, 0.0, 1,
; EG-NEXT: LSHR * T9.W, T40.X, literal.x,
; EG-NEXT: 3(4.203895e-45), 0(0.000000e+00)
; EG-NEXT: ALU clause starting at 253:
; EG-NEXT: LSHR * T10.W, T40.X, literal.x,
; EG-NEXT: 6(8.407791e-45), 0(0.000000e+00)
; EG-NEXT: BFE_INT T64.X, PV.W, 0.0, 1,
; EG-NEXT: MOV T63.Y, T63.X,
; EG-NEXT: BFE_INT T65.Z, T9.W, 0.0, 1,
; EG-NEXT: LSHR T9.W, T40.X, 1, BS:VEC_120/SCL_212
; EG-NEXT: LSHR * T10.W, T40.X, literal.x,
; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
; EG-NEXT: BFE_INT T65.X, PS, 0.0, 1,
; EG-NEXT: MOV T64.Y, PV.X,
; EG-NEXT: BFE_INT T49.Z, PV.W, 0.0, 1,
; EG-NEXT: LSHR T9.W, T40.X, literal.x,
; EG-NEXT: LSHR * T10.W, T40.X, literal.y,
; EG-NEXT: 12(1.681558e-44), 5(7.006492e-45)
; EG-NEXT: BFE_INT T66.X, T40.Y, 0.0, 1,
; EG-NEXT: MOV T65.Y, PV.X,
; EG-NEXT: BFE_INT T67.Z, PS, 0.0, 1,
; EG-NEXT: LSHR T10.W, T40.X, literal.x,
; EG-NEXT: LSHR * T11.W, T40.X, literal.y,
; EG-NEXT: 9(1.261169e-44), 4(5.605194e-45)
; EG-NEXT: BFE_INT T67.X, PS, 0.0, 1,
; EG-NEXT: MOV T49.Y, T49.X,
; EG-NEXT: BFE_INT T40.Z, PV.W, 0.0, 1,
; EG-NEXT: LSHR T10.W, T40.X, literal.x, BS:VEC_120/SCL_212
; EG-NEXT: LSHR * T11.W, T40.X, literal.y,
; EG-NEXT: 13(1.821688e-44), 8(1.121039e-44)
; EG-NEXT: BFE_INT T40.X, PS, 0.0, 1,
; EG-NEXT: MOV T67.Y, PV.X,
; EG-NEXT: BFE_INT T68.Z, PV.W, 0.0, 1,
; EG-NEXT: MOV T49.W, T49.Z,
; EG-NEXT: MOV * T65.W, T65.Z,
; EG-NEXT: BFE_INT T68.X, T9.W, 0.0, 1,
; EG-NEXT: MOV T40.Y, PV.X,
; EG-NEXT: BFE_INT T69.Z, T8.W, 0.0, 1, BS:VEC_120/SCL_212
; EG-NEXT: MOV T67.W, T67.Z,
; EG-NEXT: MOV * T64.W, T64.Z,
; EG-NEXT: BFE_INT T69.X, T7.Y, 0.0, 1,
; EG-NEXT: MOV T68.Y, PV.X,
; EG-NEXT: BFE_INT T70.Z, T6.Y, 0.0, 1, BS:VEC_120/SCL_212
; EG-NEXT: MOV T40.W, T40.Z,
; EG-NEXT: MOV * T63.W, T63.Z,
; EG-NEXT: BFE_INT T70.X, T7.W, 0.0, 1,
; EG-NEXT: MOV T69.Y, PV.X,
; EG-NEXT: BFE_INT T71.Z, T6.W, 0.0, 1, BS:VEC_120/SCL_212
; EG-NEXT: MOV T68.W, T68.Z,
; EG-NEXT: MOV * T62.W, T62.Z,
; EG-NEXT: BFE_INT T71.X, T6.Z, 0.0, 1,
; EG-NEXT: MOV T70.Y, PV.X,
; EG-NEXT: BFE_INT T72.Z, T5.Y, 0.0, 1,
; EG-NEXT: MOV T69.W, T69.Z, BS:VEC_120/SCL_212
; EG-NEXT: MOV * T61.W, T61.Z,
; EG-NEXT: BFE_INT T72.X, T5.W, 0.0, 1,
; EG-NEXT: MOV T71.Y, PV.X,
; EG-NEXT: BFE_INT T66.Z, T5.Z, 0.0, 1,
; EG-NEXT: MOV T70.W, T70.Z, BS:VEC_120/SCL_212
; EG-NEXT: MOV * T60.W, T60.Z,
; EG-NEXT: LSHR T73.X, T4.Y, literal.x,
; EG-NEXT: MOV T72.Y, PV.X,
; EG-NEXT: BFE_INT T74.Z, T4.W, 0.0, 1,
; EG-NEXT: MOV T71.W, T71.Z,
; EG-NEXT: MOV * T59.W, T59.Z,
; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
; EG-NEXT: BFE_INT T74.X, T4.Z, 0.0, 1,
; EG-NEXT: MOV T66.Y, T66.X,
; EG-NEXT: BFE_INT T75.Z, T3.Y, 0.0, 1,
; EG-NEXT: MOV T72.W, T72.Z, BS:VEC_120/SCL_212
; EG-NEXT: MOV * T58.W, T58.Z,
; EG-NEXT: BFE_INT T75.X, T3.W, 0.0, 1,
; EG-NEXT: MOV T74.Y, PV.X,
; EG-NEXT: BFE_INT T76.Z, T3.Z, 0.0, 1,
; EG-NEXT: MOV T66.W, T66.Z, BS:VEC_120/SCL_212
; EG-NEXT: MOV * T57.W, T57.Z,
; EG-NEXT: BFE_INT T76.X, T2.Y, 0.0, 1,
; EG-NEXT: MOV T75.Y, PV.X,
; EG-NEXT: BFE_INT T77.Z, T2.W, 0.0, 1,
; EG-NEXT: MOV T74.W, T74.Z,
; EG-NEXT: MOV * T56.W, T56.Z,
; EG-NEXT: BFE_INT T77.X, T2.Z, 0.0, 1,
; EG-NEXT: MOV T76.Y, PV.X,
; EG-NEXT: BFE_INT T78.Z, T1.Y, 0.0, 1,
; EG-NEXT: MOV T75.W, T75.Z, BS:VEC_120/SCL_212
; EG-NEXT: MOV * T55.W, T55.Z,
; EG-NEXT: BFE_INT T78.X, T1.W, 0.0, 1,
; EG-NEXT: MOV T77.Y, PV.X,
; EG-NEXT: BFE_INT T79.Z, T1.Z, 0.0, 1,
; EG-NEXT: MOV T76.W, T76.Z, BS:VEC_120/SCL_212
; EG-NEXT: MOV * T54.W, T54.Z,
; EG-NEXT: BFE_INT T79.X, T0.Y, 0.0, 1,
; EG-NEXT: MOV T78.Y, PV.X,
; EG-NEXT: BFE_INT T80.Z, T0.W, 0.0, 1,
; EG-NEXT: MOV T77.W, T77.Z,
; EG-NEXT: MOV * T53.W, T53.Z,
; EG-NEXT: BFE_INT T80.X, T0.Z, 0.0, 1,
; EG-NEXT: MOV T79.Y, PV.X,
; EG-NEXT: ADD_INT T0.Z, KC0[2].Y, literal.x,
; EG-NEXT: MOV T78.W, T78.Z, BS:VEC_120/SCL_212
; EG-NEXT: MOV * T52.W, T52.Z,
; EG-NEXT: 480(6.726233e-43), 0(0.000000e+00)
; EG-NEXT: LSHR T81.X, PV.Z, literal.x,
; EG-NEXT: MOV T80.Y, PV.X,
; EG-NEXT: ADD_INT T0.Z, KC0[2].Y, literal.y,
; EG-NEXT: MOV T79.W, T79.Z,
; EG-NEXT: MOV * T51.W, T51.Z,
; EG-NEXT: 2(2.802597e-45), 496(6.950440e-43)
; EG-NEXT: LSHR T82.X, PV.Z, literal.x,
; EG-NEXT: MOV T80.W, T80.Z,
; EG-NEXT: MOV * T50.W, T50.Z,
; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
;
; GFX12-LABEL: constant_sextload_v64i1_to_v64i64:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
; GFX12-NEXT: s_wait_kmcnt 0x0
; GFX12-NEXT: s_load_b64 s[10:11], s[2:3], 0x0
; GFX12-NEXT: s_wait_kmcnt 0x0
; GFX12-NEXT: s_lshr_b32 s96, s11, 30
; GFX12-NEXT: s_lshr_b32 s98, s11, 31
; GFX12-NEXT: s_lshr_b32 s92, s11, 28
; GFX12-NEXT: s_lshr_b32 s94, s11, 29
; GFX12-NEXT: s_lshr_b32 s78, s11, 26
; GFX12-NEXT: s_lshr_b32 s88, s11, 27
; GFX12-NEXT: s_bfe_i64 s[96:97], s[96:97], 0x10000
; GFX12-NEXT: s_bfe_i64 s[100:101], s[98:99], 0x10000
; GFX12-NEXT: s_lshr_b32 s66, s11, 24
; GFX12-NEXT: s_lshr_b32 s74, s11, 25
; GFX12-NEXT: s_bfe_i64 s[92:93], s[92:93], 0x10000
; GFX12-NEXT: s_bfe_i64 s[94:95], s[94:95], 0x10000
; GFX12-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s96
; GFX12-NEXT: s_lshr_b32 s56, s11, 22
; GFX12-NEXT: s_lshr_b32 s62, s11, 23
; GFX12-NEXT: v_dual_mov_b32 v2, s97 :: v_dual_mov_b32 v3, s100
; GFX12-NEXT: v_dual_mov_b32 v4, s101 :: v_dual_mov_b32 v5, s92
; GFX12-NEXT: s_bfe_i64 s[78:79], s[78:79], 0x10000
; GFX12-NEXT: s_bfe_i64 s[88:89], s[88:89], 0x10000
; GFX12-NEXT: s_lshr_b32 s44, s11, 20
; GFX12-NEXT: s_lshr_b32 s52, s11, 21
; GFX12-NEXT: s_lshr_b32 s30, s11, 18
; GFX12-NEXT: s_lshr_b32 s40, s11, 19
; GFX12-NEXT: s_lshr_b32 s18, s11, 16
; GFX12-NEXT: s_lshr_b32 s26, s11, 17
; GFX12-NEXT: s_lshr_b32 s2, s11, 14
; GFX12-NEXT: s_lshr_b32 s4, s11, 15
; GFX12-NEXT: v_dual_mov_b32 v6, s93 :: v_dual_mov_b32 v7, s94
; GFX12-NEXT: v_dual_mov_b32 v8, s95 :: v_dual_mov_b32 v9, s78
; GFX12-NEXT: s_bfe_i64 s[66:67], s[66:67], 0x10000
; GFX12-NEXT: s_bfe_i64 s[74:75], s[74:75], 0x10000
; GFX12-NEXT: s_lshr_b32 s6, s11, 12
; GFX12-NEXT: s_lshr_b32 s8, s11, 13
; GFX12-NEXT: v_dual_mov_b32 v10, s79 :: v_dual_mov_b32 v11, s88
; GFX12-NEXT: v_dual_mov_b32 v12, s89 :: v_dual_mov_b32 v13, s66
; GFX12-NEXT: s_bfe_i64 s[56:57], s[56:57], 0x10000
; GFX12-NEXT: s_bfe_i64 s[62:63], s[62:63], 0x10000
; GFX12-NEXT: s_lshr_b32 s12, s11, 10
; GFX12-NEXT: s_lshr_b32 s14, s11, 11
; GFX12-NEXT: v_dual_mov_b32 v14, s67 :: v_dual_mov_b32 v15, s74
; GFX12-NEXT: v_dual_mov_b32 v16, s75 :: v_dual_mov_b32 v17, s56
; GFX12-NEXT: s_bfe_i64 s[44:45], s[44:45], 0x10000
; GFX12-NEXT: s_bfe_i64 s[52:53], s[52:53], 0x10000
; GFX12-NEXT: s_bfe_i64 s[30:31], s[30:31], 0x10000
; GFX12-NEXT: s_bfe_i64 s[40:41], s[40:41], 0x10000
; GFX12-NEXT: s_bfe_i64 s[18:19], s[18:19], 0x10000
; GFX12-NEXT: s_bfe_i64 s[26:27], s[26:27], 0x10000
; GFX12-NEXT: s_bfe_i64 s[4:5], s[4:5], 0x10000
; GFX12-NEXT: s_bfe_i64 s[2:3], s[2:3], 0x10000
; GFX12-NEXT: s_lshr_b32 s16, s11, 8
; GFX12-NEXT: s_lshr_b32 s20, s11, 9
; GFX12-NEXT: v_dual_mov_b32 v18, s57 :: v_dual_mov_b32 v19, s62
; GFX12-NEXT: v_dual_mov_b32 v20, s63 :: v_dual_mov_b32 v21, s44
; GFX12-NEXT: s_bfe_i64 s[8:9], s[8:9], 0x10000
; GFX12-NEXT: s_bfe_i64 s[6:7], s[6:7], 0x10000
; GFX12-NEXT: s_lshr_b32 s22, s11, 6
; GFX12-NEXT: s_lshr_b32 s24, s11, 7
; GFX12-NEXT: v_dual_mov_b32 v22, s45 :: v_dual_mov_b32 v23, s52
; GFX12-NEXT: v_dual_mov_b32 v24, s53 :: v_dual_mov_b32 v25, s30
; GFX12-NEXT: v_dual_mov_b32 v26, s31 :: v_dual_mov_b32 v27, s40
; GFX12-NEXT: v_dual_mov_b32 v28, s41 :: v_dual_mov_b32 v29, s18
; GFX12-NEXT: v_dual_mov_b32 v30, s19 :: v_dual_mov_b32 v31, s26
; GFX12-NEXT: v_mov_b32_e32 v32, s27
; GFX12-NEXT: s_bfe_i64 s[14:15], s[14:15], 0x10000
; GFX12-NEXT: s_bfe_i64 s[12:13], s[12:13], 0x10000
; GFX12-NEXT: s_clause 0x7
; GFX12-NEXT: global_store_b128 v0, v[1:4], s[0:1] offset:496
; GFX12-NEXT: global_store_b128 v0, v[5:8], s[0:1] offset:480
; GFX12-NEXT: global_store_b128 v0, v[9:12], s[0:1] offset:464
; GFX12-NEXT: global_store_b128 v0, v[13:16], s[0:1] offset:448
; GFX12-NEXT: global_store_b128 v0, v[17:20], s[0:1] offset:432
; GFX12-NEXT: global_store_b128 v0, v[21:24], s[0:1] offset:416
; GFX12-NEXT: global_store_b128 v0, v[25:28], s[0:1] offset:400
; GFX12-NEXT: global_store_b128 v0, v[29:32], s[0:1] offset:384
; GFX12-NEXT: v_dual_mov_b32 v1, s2 :: v_dual_mov_b32 v2, s3
; GFX12-NEXT: v_dual_mov_b32 v3, s4 :: v_dual_mov_b32 v4, s5
; GFX12-NEXT: v_mov_b32_e32 v5, s6
; GFX12-NEXT: s_lshr_b32 s28, s11, 4
; GFX12-NEXT: s_lshr_b32 s34, s11, 5
; GFX12-NEXT: s_lshr_b32 s36, s11, 2
; GFX12-NEXT: s_lshr_b32 s38, s11, 3
; GFX12-NEXT: s_bfe_i64 s[20:21], s[20:21], 0x10000
; GFX12-NEXT: s_bfe_i64 s[16:17], s[16:17], 0x10000
; GFX12-NEXT: v_dual_mov_b32 v6, s7 :: v_dual_mov_b32 v7, s8
; GFX12-NEXT: v_dual_mov_b32 v8, s9 :: v_dual_mov_b32 v9, s12
; GFX12-NEXT: s_lshr_b32 s42, s11, 1
; GFX12-NEXT: s_mov_b32 s46, s11
; GFX12-NEXT: s_bfe_i64 s[24:25], s[24:25], 0x10000
; GFX12-NEXT: s_bfe_i64 s[22:23], s[22:23], 0x10000
; GFX12-NEXT: v_dual_mov_b32 v10, s13 :: v_dual_mov_b32 v11, s14
; GFX12-NEXT: v_dual_mov_b32 v12, s15 :: v_dual_mov_b32 v13, s16
; GFX12-NEXT: s_lshr_b32 s48, s10, 30
; GFX12-NEXT: s_lshr_b32 s50, s10, 31
; GFX12-NEXT: s_bfe_i64 s[38:39], s[38:39], 0x10000
; GFX12-NEXT: s_bfe_i64 s[36:37], s[36:37], 0x10000
; GFX12-NEXT: s_bfe_i64 s[34:35], s[34:35], 0x10000
; GFX12-NEXT: s_bfe_i64 s[28:29], s[28:29], 0x10000
; GFX12-NEXT: v_dual_mov_b32 v14, s17 :: v_dual_mov_b32 v15, s20
; GFX12-NEXT: v_dual_mov_b32 v16, s21 :: v_dual_mov_b32 v17, s22
; GFX12-NEXT: s_lshr_b32 s54, s10, 28
; GFX12-NEXT: s_lshr_b32 s58, s10, 29
; GFX12-NEXT: s_bfe_i64 s[46:47], s[46:47], 0x10000
; GFX12-NEXT: s_bfe_i64 s[42:43], s[42:43], 0x10000
; GFX12-NEXT: v_dual_mov_b32 v18, s23 :: v_dual_mov_b32 v19, s24
; GFX12-NEXT: v_dual_mov_b32 v20, s25 :: v_dual_mov_b32 v21, s28
; GFX12-NEXT: s_lshr_b32 s60, s10, 26
; GFX12-NEXT: s_lshr_b32 s64, s10, 27
; GFX12-NEXT: s_bfe_i64 s[50:51], s[50:51], 0x10000
; GFX12-NEXT: s_bfe_i64 s[48:49], s[48:49], 0x10000
; GFX12-NEXT: v_dual_mov_b32 v22, s29 :: v_dual_mov_b32 v23, s34
; GFX12-NEXT: v_mov_b32_e32 v24, s35
; GFX12-NEXT: s_clause 0x5
; GFX12-NEXT: global_store_b128 v0, v[1:4], s[0:1] offset:368
; GFX12-NEXT: global_store_b128 v0, v[5:8], s[0:1] offset:352
; GFX12-NEXT: global_store_b128 v0, v[9:12], s[0:1] offset:336
; GFX12-NEXT: global_store_b128 v0, v[13:16], s[0:1] offset:320
; GFX12-NEXT: global_store_b128 v0, v[17:20], s[0:1] offset:304
; GFX12-NEXT: global_store_b128 v0, v[21:24], s[0:1] offset:288
; GFX12-NEXT: v_dual_mov_b32 v1, s36 :: v_dual_mov_b32 v2, s37
; GFX12-NEXT: v_dual_mov_b32 v3, s38 :: v_dual_mov_b32 v4, s39
; GFX12-NEXT: v_mov_b32_e32 v5, s46
; GFX12-NEXT: s_lshr_b32 s68, s10, 24
; GFX12-NEXT: s_lshr_b32 s70, s10, 25
; GFX12-NEXT: s_lshr_b32 s72, s10, 22
; GFX12-NEXT: s_lshr_b32 s76, s10, 23
; GFX12-NEXT: s_bfe_i64 s[58:59], s[58:59], 0x10000
; GFX12-NEXT: s_bfe_i64 s[54:55], s[54:55], 0x10000
; GFX12-NEXT: v_dual_mov_b32 v6, s47 :: v_dual_mov_b32 v7, s42
; GFX12-NEXT: v_dual_mov_b32 v8, s43 :: v_dual_mov_b32 v9, s48
; GFX12-NEXT: s_lshr_b32 s80, s10, 20
; GFX12-NEXT: s_lshr_b32 s82, s10, 21
; GFX12-NEXT: s_bfe_i64 s[64:65], s[64:65], 0x10000
; GFX12-NEXT: s_bfe_i64 s[60:61], s[60:61], 0x10000
; GFX12-NEXT: v_dual_mov_b32 v10, s49 :: v_dual_mov_b32 v11, s50
; GFX12-NEXT: v_dual_mov_b32 v12, s51 :: v_dual_mov_b32 v13, s54
; GFX12-NEXT: s_lshr_b32 s84, s10, 18
; GFX12-NEXT: s_lshr_b32 s86, s10, 19
; GFX12-NEXT: s_bfe_i64 s[76:77], s[76:77], 0x10000
; GFX12-NEXT: s_bfe_i64 s[72:73], s[72:73], 0x10000
; GFX12-NEXT: s_bfe_i64 s[70:71], s[70:71], 0x10000
; GFX12-NEXT: s_bfe_i64 s[68:69], s[68:69], 0x10000
; GFX12-NEXT: v_dual_mov_b32 v14, s55 :: v_dual_mov_b32 v15, s58
; GFX12-NEXT: v_dual_mov_b32 v16, s59 :: v_dual_mov_b32 v17, s60
; GFX12-NEXT: s_lshr_b32 s90, s10, 16
; GFX12-NEXT: s_lshr_b32 s98, s10, 17
; GFX12-NEXT: s_bfe_i64 s[82:83], s[82:83], 0x10000
; GFX12-NEXT: s_bfe_i64 s[80:81], s[80:81], 0x10000
; GFX12-NEXT: v_dual_mov_b32 v18, s61 :: v_dual_mov_b32 v19, s64
; GFX12-NEXT: v_dual_mov_b32 v20, s65 :: v_dual_mov_b32 v21, s68
; GFX12-NEXT: s_lshr_b32 s96, s10, 14
; GFX12-NEXT: s_lshr_b32 s100, s10, 15
; GFX12-NEXT: s_lshr_b32 s94, s10, 13
; GFX12-NEXT: s_lshr_b32 s88, s10, 11
; GFX12-NEXT: s_lshr_b32 s74, s10, 9
; GFX12-NEXT: s_lshr_b32 s62, s10, 7
; GFX12-NEXT: s_lshr_b32 s52, s10, 5
; GFX12-NEXT: s_lshr_b32 s40, s10, 3
; GFX12-NEXT: s_lshr_b32 s26, s10, 1
; GFX12-NEXT: s_bfe_i64 s[86:87], s[86:87], 0x10000
; GFX12-NEXT: s_bfe_i64 s[84:85], s[84:85], 0x10000
; GFX12-NEXT: v_dual_mov_b32 v22, s69 :: v_dual_mov_b32 v23, s70
; GFX12-NEXT: v_mov_b32_e32 v24, s71
; GFX12-NEXT: s_clause 0x5
; GFX12-NEXT: global_store_b128 v0, v[1:4], s[0:1] offset:272
; GFX12-NEXT: global_store_b128 v0, v[5:8], s[0:1] offset:256
; GFX12-NEXT: global_store_b128 v0, v[9:12], s[0:1] offset:240
; GFX12-NEXT: global_store_b128 v0, v[13:16], s[0:1] offset:224
; GFX12-NEXT: global_store_b128 v0, v[17:20], s[0:1] offset:208
; GFX12-NEXT: global_store_b128 v0, v[21:24], s[0:1] offset:192
; GFX12-NEXT: v_dual_mov_b32 v1, s72 :: v_dual_mov_b32 v2, s73
; GFX12-NEXT: v_dual_mov_b32 v3, s76 :: v_dual_mov_b32 v4, s77
; GFX12-NEXT: v_mov_b32_e32 v5, s80
; GFX12-NEXT: s_lshr_b32 s92, s10, 12
; GFX12-NEXT: s_lshr_b32 s78, s10, 10
; GFX12-NEXT: s_bfe_i64 s[98:99], s[98:99], 0x10000
; GFX12-NEXT: s_bfe_i64 s[90:91], s[90:91], 0x10000
; GFX12-NEXT: v_dual_mov_b32 v6, s81 :: v_dual_mov_b32 v7, s82
; GFX12-NEXT: v_dual_mov_b32 v8, s83 :: v_dual_mov_b32 v9, s84
; GFX12-NEXT: s_lshr_b32 s66, s10, 8
; GFX12-NEXT: s_lshr_b32 s56, s10, 6
; GFX12-NEXT: s_lshr_b32 s44, s10, 4
; GFX12-NEXT: s_lshr_b32 s30, s10, 2
; GFX12-NEXT: s_bfe_i64 s[18:19], s[10:11], 0x10000
; GFX12-NEXT: s_wait_alu 0xfffe
; GFX12-NEXT: s_bfe_i64 s[10:11], s[26:27], 0x10000
; GFX12-NEXT: s_bfe_i64 s[26:27], s[40:41], 0x10000
; GFX12-NEXT: s_bfe_i64 s[40:41], s[52:53], 0x10000
; GFX12-NEXT: s_bfe_i64 s[52:53], s[62:63], 0x10000
; GFX12-NEXT: s_bfe_i64 s[62:63], s[74:75], 0x10000
; GFX12-NEXT: s_bfe_i64 s[74:75], s[88:89], 0x10000
; GFX12-NEXT: s_bfe_i64 s[88:89], s[94:95], 0x10000
; GFX12-NEXT: s_bfe_i64 s[94:95], s[100:101], 0x10000
; GFX12-NEXT: s_bfe_i64 s[96:97], s[96:97], 0x10000
; GFX12-NEXT: v_dual_mov_b32 v10, s85 :: v_dual_mov_b32 v11, s86
; GFX12-NEXT: v_dual_mov_b32 v12, s87 :: v_dual_mov_b32 v13, s90
; GFX12-NEXT: s_bfe_i64 s[78:79], s[78:79], 0x10000
; GFX12-NEXT: s_bfe_i64 s[92:93], s[92:93], 0x10000
; GFX12-NEXT: v_dual_mov_b32 v14, s91 :: v_dual_mov_b32 v15, s98
; GFX12-NEXT: s_wait_alu 0xfffe
; GFX12-NEXT: v_dual_mov_b32 v16, s99 :: v_dual_mov_b32 v17, s96
; GFX12-NEXT: s_bfe_i64 s[66:67], s[66:67], 0x10000
; GFX12-NEXT: v_dual_mov_b32 v18, s97 :: v_dual_mov_b32 v19, s94
; GFX12-NEXT: v_dual_mov_b32 v20, s95 :: v_dual_mov_b32 v21, s92
; GFX12-NEXT: s_bfe_i64 s[56:57], s[56:57], 0x10000
; GFX12-NEXT: v_dual_mov_b32 v22, s93 :: v_dual_mov_b32 v23, s88
; GFX12-NEXT: v_mov_b32_e32 v24, s89
; GFX12-NEXT: s_clause 0x5
; GFX12-NEXT: global_store_b128 v0, v[1:4], s[0:1] offset:176
; GFX12-NEXT: global_store_b128 v0, v[5:8], s[0:1] offset:160
; GFX12-NEXT: global_store_b128 v0, v[9:12], s[0:1] offset:144
; GFX12-NEXT: global_store_b128 v0, v[13:16], s[0:1] offset:128
; GFX12-NEXT: global_store_b128 v0, v[17:20], s[0:1] offset:112
; GFX12-NEXT: global_store_b128 v0, v[21:24], s[0:1] offset:96
; GFX12-NEXT: v_dual_mov_b32 v1, s78 :: v_dual_mov_b32 v2, s79
; GFX12-NEXT: v_dual_mov_b32 v3, s74 :: v_dual_mov_b32 v4, s75
; GFX12-NEXT: s_wait_alu 0xfffe
; GFX12-NEXT: v_mov_b32_e32 v5, s66
; GFX12-NEXT: s_bfe_i64 s[44:45], s[44:45], 0x10000
; GFX12-NEXT: v_dual_mov_b32 v6, s67 :: v_dual_mov_b32 v7, s62
; GFX12-NEXT: v_dual_mov_b32 v8, s63 :: v_dual_mov_b32 v9, s56
; GFX12-NEXT: s_bfe_i64 s[30:31], s[30:31], 0x10000
; GFX12-NEXT: v_dual_mov_b32 v10, s57 :: v_dual_mov_b32 v11, s52
; GFX12-NEXT: s_wait_alu 0xfffe
; GFX12-NEXT: v_dual_mov_b32 v12, s53 :: v_dual_mov_b32 v13, s44
; GFX12-NEXT: v_dual_mov_b32 v14, s45 :: v_dual_mov_b32 v15, s40
; GFX12-NEXT: v_dual_mov_b32 v16, s41 :: v_dual_mov_b32 v17, s30
; GFX12-NEXT: v_dual_mov_b32 v18, s31 :: v_dual_mov_b32 v19, s26
; GFX12-NEXT: v_dual_mov_b32 v20, s27 :: v_dual_mov_b32 v21, s18
; GFX12-NEXT: v_dual_mov_b32 v22, s19 :: v_dual_mov_b32 v23, s10
; GFX12-NEXT: v_mov_b32_e32 v24, s11
; GFX12-NEXT: s_clause 0x5
; GFX12-NEXT: global_store_b128 v0, v[1:4], s[0:1] offset:80
; GFX12-NEXT: global_store_b128 v0, v[5:8], s[0:1] offset:64
; GFX12-NEXT: global_store_b128 v0, v[9:12], s[0:1] offset:48
; GFX12-NEXT: global_store_b128 v0, v[13:16], s[0:1] offset:32
; GFX12-NEXT: global_store_b128 v0, v[17:20], s[0:1] offset:16
; GFX12-NEXT: global_store_b128 v0, v[21:24], s[0:1]
; GFX12-NEXT: s_endpgm
%load = load <64 x i1>, ptr addrspace(4) %in
%ext = sext <64 x i1> %load to <64 x i64>
store <64 x i64> %ext, ptr addrspace(1) %out
ret void
}
attributes #0 = { nounwind }