[DAG] Always use stack to promote bitcast when the source is vector (#151065)
The optimization introduced by #125637 tried to avoid using stacks to promote bitcast with vector result type. However, it wouldn't be correct if the input type is vector. This patch limits that optimizations to only scalar to vector bitcasts.
This commit is contained in:
parent
849009c635
commit
7ebbbd885f
@ -2217,8 +2217,9 @@ SDValue DAGTypeLegalizer::PromoteIntOp_BITCAST(SDNode *N) {
|
||||
|
||||
switch (getTypeAction(InVT)) {
|
||||
case TargetLowering::TypePromoteInteger: {
|
||||
// TODO: Handle big endian
|
||||
if (OutVT.isVector() && DAG.getDataLayout().isLittleEndian()) {
|
||||
// TODO: Handle big endian & vector input type.
|
||||
if (OutVT.isVector() && !InVT.isVector() &&
|
||||
DAG.getDataLayout().isLittleEndian()) {
|
||||
EVT EltVT = OutVT.getVectorElementType();
|
||||
TypeSize EltSize = EltVT.getSizeInBits();
|
||||
TypeSize NInSize = NInVT.getSizeInBits();
|
||||
|
||||
@ -457,27 +457,58 @@ define amdgpu_kernel void @v_ctpop_v4i16(ptr addrspace(1) noalias %out, ptr addr
|
||||
;
|
||||
; EG-LABEL: v_ctpop_v4i16:
|
||||
; EG: ; %bb.0:
|
||||
; EG-NEXT: ALU 2, @8, KC0[CB0:0-32], KC1[]
|
||||
; EG-NEXT: ALU 3, @8, KC0[CB0:0-32], KC1[]
|
||||
; EG-NEXT: TEX 0 @6
|
||||
; EG-NEXT: ALU 7, @11, KC0[CB0:0-32], KC1[]
|
||||
; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.XY, T6.X, 1
|
||||
; EG-NEXT: ALU 37, @12, KC0[CB0:0-32], KC1[]
|
||||
; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T8.XY, T0.X, 1
|
||||
; EG-NEXT: CF_END
|
||||
; EG-NEXT: PAD
|
||||
; EG-NEXT: Fetch clause starting at 6:
|
||||
; EG-NEXT: VTX_READ_32 T0.X, T0.X, 0, #1
|
||||
; EG-NEXT: VTX_READ_64 T8.XY, T0.X, 0, #1
|
||||
; EG-NEXT: ALU clause starting at 8:
|
||||
; EG-NEXT: LSHL * T0.W, T0.X, literal.x,
|
||||
; EG-NEXT: MOV T0.Y, T4.X,
|
||||
; EG-NEXT: LSHL * T0.W, T0.X, literal.x, BS:VEC_120/SCL_212
|
||||
; EG-NEXT: 3(4.203895e-45), 0(0.000000e+00)
|
||||
; EG-NEXT: ADD_INT * T0.X, KC0[2].Z, PV.W,
|
||||
; EG-NEXT: ALU clause starting at 11:
|
||||
; EG-NEXT: LSHR * T0.W, T0.X, literal.x,
|
||||
; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00)
|
||||
; EG-NEXT: BCNT_INT T0.Y, PV.W,
|
||||
; EG-NEXT: AND_INT * T0.W, T0.X, literal.x,
|
||||
; EG-NEXT: ALU clause starting at 12:
|
||||
; EG-NEXT: AND_INT * T0.W, T8.X, literal.x,
|
||||
; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00)
|
||||
; EG-NEXT: BCNT_INT T0.X, PV.W,
|
||||
; EG-NEXT: LSHR * T6.X, KC0[2].Y, literal.x,
|
||||
; EG-NEXT: BCNT_INT T0.W, PV.W,
|
||||
; EG-NEXT: AND_INT * T1.W, T0.Y, literal.x,
|
||||
; EG-NEXT: -65536(nan), 0(0.000000e+00)
|
||||
; EG-NEXT: OR_INT * T0.W, PS, PV.W,
|
||||
; EG-NEXT: MOV * T4.X, PV.W,
|
||||
; EG-NEXT: MOV T0.X, PV.X,
|
||||
; EG-NEXT: LSHR * T0.W, T8.X, literal.x,
|
||||
; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00)
|
||||
; EG-NEXT: BCNT_INT T0.W, PV.W,
|
||||
; EG-NEXT: AND_INT * T1.W, PV.X, literal.x,
|
||||
; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00)
|
||||
; EG-NEXT: LSHL * T0.W, PV.W, literal.x,
|
||||
; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00)
|
||||
; EG-NEXT: OR_INT * T0.W, T1.W, PV.W,
|
||||
; EG-NEXT: MOV T4.X, PV.W,
|
||||
; EG-NEXT: MOV * T0.X, T5.X,
|
||||
; EG-NEXT: AND_INT * T0.W, T8.Y, literal.x,
|
||||
; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00)
|
||||
; EG-NEXT: BCNT_INT T0.W, PV.W,
|
||||
; EG-NEXT: AND_INT * T1.W, T0.X, literal.x,
|
||||
; EG-NEXT: -65536(nan), 0(0.000000e+00)
|
||||
; EG-NEXT: OR_INT * T0.W, PS, PV.W,
|
||||
; EG-NEXT: MOV * T5.X, PV.W,
|
||||
; EG-NEXT: MOV T0.X, PV.X,
|
||||
; EG-NEXT: LSHR * T0.W, T8.Y, literal.x,
|
||||
; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00)
|
||||
; EG-NEXT: BCNT_INT T0.W, PV.W,
|
||||
; EG-NEXT: AND_INT * T1.W, PV.X, literal.x,
|
||||
; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00)
|
||||
; EG-NEXT: LSHL * T0.W, PV.W, literal.x,
|
||||
; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00)
|
||||
; EG-NEXT: LSHR T0.X, KC0[2].Y, literal.x,
|
||||
; EG-NEXT: OR_INT * T8.Y, T1.W, PV.W,
|
||||
; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
|
||||
; EG-NEXT: MOV T5.X, PV.Y,
|
||||
; EG-NEXT: MOV * T8.X, T4.X,
|
||||
%tid = call i32 @llvm.amdgcn.workitem.id.x()
|
||||
%in.gep = getelementptr <4 x i16>, ptr addrspace(1) %in, i32 %tid
|
||||
%val = load <4 x i16>, ptr addrspace(1) %in.gep, align 16
|
||||
@ -570,33 +601,94 @@ define amdgpu_kernel void @v_ctpop_v8i16(ptr addrspace(1) noalias %out, ptr addr
|
||||
;
|
||||
; EG-LABEL: v_ctpop_v8i16:
|
||||
; EG: ; %bb.0:
|
||||
; EG-NEXT: ALU 2, @8, KC0[CB0:0-32], KC1[]
|
||||
; EG-NEXT: ALU 3, @8, KC0[CB0:0-32], KC1[]
|
||||
; EG-NEXT: TEX 0 @6
|
||||
; EG-NEXT: ALU 13, @11, KC0[CB0:0-32], KC1[]
|
||||
; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.XYZW, T8.X, 1
|
||||
; EG-NEXT: ALU 73, @12, KC0[CB0:0-32], KC1[]
|
||||
; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.XYZW, T12.X, 1
|
||||
; EG-NEXT: CF_END
|
||||
; EG-NEXT: PAD
|
||||
; EG-NEXT: Fetch clause starting at 6:
|
||||
; EG-NEXT: VTX_READ_128 T0.XYZW, T0.X, 0, #1
|
||||
; EG-NEXT: VTX_READ_128 T12.XYZW, T0.X, 0, #1
|
||||
; EG-NEXT: ALU clause starting at 8:
|
||||
; EG-NEXT: LSHL * T0.W, T0.X, literal.x,
|
||||
; EG-NEXT: MOV T0.Y, T4.X,
|
||||
; EG-NEXT: LSHL * T0.W, T0.X, literal.x, BS:VEC_120/SCL_212
|
||||
; EG-NEXT: 4(5.605194e-45), 0(0.000000e+00)
|
||||
; EG-NEXT: ADD_INT * T0.X, KC0[2].Z, PV.W,
|
||||
; EG-NEXT: ALU clause starting at 11:
|
||||
; EG-NEXT: LSHR * T0.W, T0.Z, literal.x,
|
||||
; EG-NEXT: ALU clause starting at 12:
|
||||
; EG-NEXT: LSHR * T0.W, T12.X, literal.x,
|
||||
; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00)
|
||||
; EG-NEXT: BCNT_INT * T0.W, PV.W,
|
||||
; EG-NEXT: LSHL T0.W, PV.W, literal.x,
|
||||
; EG-NEXT: AND_INT * T1.W, T0.Y, literal.y,
|
||||
; EG-NEXT: 16(2.242078e-44), 65535(9.183409e-41)
|
||||
; EG-NEXT: OR_INT * T0.W, PS, PV.W,
|
||||
; EG-NEXT: MOV * T4.X, PV.W,
|
||||
; EG-NEXT: MOV T0.X, PV.X,
|
||||
; EG-NEXT: AND_INT * T0.W, T12.X, literal.x,
|
||||
; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00)
|
||||
; EG-NEXT: BCNT_INT T0.W, PV.W,
|
||||
; EG-NEXT: AND_INT * T1.W, PV.X, literal.x,
|
||||
; EG-NEXT: -65536(nan), 0(0.000000e+00)
|
||||
; EG-NEXT: OR_INT * T0.W, PS, PV.W,
|
||||
; EG-NEXT: MOV T4.X, PV.W,
|
||||
; EG-NEXT: MOV * T0.X, T5.X,
|
||||
; EG-NEXT: LSHR * T0.W, T12.Y, literal.x,
|
||||
; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00)
|
||||
; EG-NEXT: BCNT_INT T0.W, PV.W,
|
||||
; EG-NEXT: AND_INT * T1.W, T0.Z, literal.x,
|
||||
; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00)
|
||||
; EG-NEXT: BCNT_INT T0.Z, PS,
|
||||
; EG-NEXT: LSHR * T1.W, T0.X, literal.x,
|
||||
; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00)
|
||||
; EG-NEXT: BCNT_INT T0.Y, PV.W,
|
||||
; EG-NEXT: AND_INT * T1.W, T0.X, literal.x,
|
||||
; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00)
|
||||
; EG-NEXT: BCNT_INT T0.X, PV.W,
|
||||
; EG-NEXT: LSHR * T8.X, KC0[2].Y, literal.x,
|
||||
; EG-NEXT: LSHL * T0.W, PV.W, literal.x,
|
||||
; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00)
|
||||
; EG-NEXT: OR_INT * T0.W, T1.W, PV.W,
|
||||
; EG-NEXT: MOV * T5.X, PV.W,
|
||||
; EG-NEXT: MOV T0.X, PV.X,
|
||||
; EG-NEXT: AND_INT * T0.W, T12.Y, literal.x,
|
||||
; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00)
|
||||
; EG-NEXT: BCNT_INT T0.W, PV.W,
|
||||
; EG-NEXT: AND_INT * T1.W, PV.X, literal.x,
|
||||
; EG-NEXT: -65536(nan), 0(0.000000e+00)
|
||||
; EG-NEXT: OR_INT * T0.Y, PS, PV.W,
|
||||
; EG-NEXT: MOV T5.X, PV.Y,
|
||||
; EG-NEXT: MOV * T0.X, T8.X,
|
||||
; EG-NEXT: LSHR * T0.W, T12.Z, literal.x,
|
||||
; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00)
|
||||
; EG-NEXT: BCNT_INT T0.W, PV.W,
|
||||
; EG-NEXT: AND_INT * T1.W, T0.X, literal.x,
|
||||
; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00)
|
||||
; EG-NEXT: LSHL * T0.W, PV.W, literal.x,
|
||||
; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00)
|
||||
; EG-NEXT: OR_INT * T0.W, T1.W, PV.W,
|
||||
; EG-NEXT: MOV * T8.X, PV.W,
|
||||
; EG-NEXT: MOV T0.X, PV.X,
|
||||
; EG-NEXT: AND_INT * T0.W, T12.Z, literal.x,
|
||||
; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00)
|
||||
; EG-NEXT: BCNT_INT T0.W, PV.W,
|
||||
; EG-NEXT: AND_INT * T1.W, PV.X, literal.x,
|
||||
; EG-NEXT: -65536(nan), 0(0.000000e+00)
|
||||
; EG-NEXT: OR_INT * T0.W, PS, PV.W,
|
||||
; EG-NEXT: MOV T8.X, PV.W,
|
||||
; EG-NEXT: MOV * T0.X, T9.X,
|
||||
; EG-NEXT: LSHR * T0.W, T12.W, literal.x,
|
||||
; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00)
|
||||
; EG-NEXT: BCNT_INT T0.W, PV.W,
|
||||
; EG-NEXT: AND_INT * T1.W, T0.X, literal.x,
|
||||
; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00)
|
||||
; EG-NEXT: LSHL * T0.W, PV.W, literal.x,
|
||||
; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00)
|
||||
; EG-NEXT: OR_INT * T0.W, T1.W, PV.W,
|
||||
; EG-NEXT: MOV * T9.X, PV.W,
|
||||
; EG-NEXT: MOV T0.X, PV.X,
|
||||
; EG-NEXT: AND_INT * T0.W, T12.W, literal.x,
|
||||
; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00)
|
||||
; EG-NEXT: BCNT_INT T0.W, PV.W,
|
||||
; EG-NEXT: AND_INT * T1.W, PV.X, literal.x,
|
||||
; EG-NEXT: -65536(nan), 0(0.000000e+00)
|
||||
; EG-NEXT: LSHR T12.X, KC0[2].Y, literal.x,
|
||||
; EG-NEXT: OR_INT * T0.W, PS, PV.W,
|
||||
; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
|
||||
; EG-NEXT: MOV T9.X, PV.W,
|
||||
; EG-NEXT: MOV * T0.X, T4.X,
|
||||
; EG-NEXT: MOV * T0.Z, T8.X,
|
||||
%tid = call i32 @llvm.amdgcn.workitem.id.x()
|
||||
%in.gep = getelementptr <8 x i16>, ptr addrspace(1) %in, i32 %tid
|
||||
%val = load <8 x i16>, ptr addrspace(1) %in.gep, align 32
|
||||
@ -745,46 +837,174 @@ define amdgpu_kernel void @v_ctpop_v16i16(ptr addrspace(1) noalias %out, ptr add
|
||||
;
|
||||
; EG-LABEL: v_ctpop_v16i16:
|
||||
; EG: ; %bb.0:
|
||||
; EG-NEXT: ALU 2, @10, KC0[CB0:0-32], KC1[]
|
||||
; EG-NEXT: TEX 1 @6
|
||||
; EG-NEXT: ALU 25, @13, KC0[CB0:0-32], KC1[]
|
||||
; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.XYZW, T14.X, 0
|
||||
; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T12.XYZW, T13.X, 1
|
||||
; EG-NEXT: ALU 3, @12, KC0[CB0:0-32], KC1[]
|
||||
; EG-NEXT: TEX 1 @8
|
||||
; EG-NEXT: ALU 114, @16, KC0[], KC1[]
|
||||
; EG-NEXT: ALU 34, @131, KC0[CB0:0-32], KC1[]
|
||||
; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.XYZW, T22.X, 0
|
||||
; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T20.XYZW, T21.X, 1
|
||||
; EG-NEXT: CF_END
|
||||
; EG-NEXT: Fetch clause starting at 6:
|
||||
; EG-NEXT: VTX_READ_128 T12.XYZW, T0.X, 16, #1
|
||||
; EG-NEXT: VTX_READ_128 T0.XYZW, T0.X, 0, #1
|
||||
; EG-NEXT: ALU clause starting at 10:
|
||||
; EG-NEXT: LSHL * T0.W, T0.X, literal.x,
|
||||
; EG-NEXT: PAD
|
||||
; EG-NEXT: Fetch clause starting at 8:
|
||||
; EG-NEXT: VTX_READ_128 T20.XYZW, T0.X, 16, #1
|
||||
; EG-NEXT: VTX_READ_128 T21.XYZW, T0.X, 0, #1
|
||||
; EG-NEXT: ALU clause starting at 12:
|
||||
; EG-NEXT: MOV T0.Y, T4.X,
|
||||
; EG-NEXT: LSHL * T0.W, T0.X, literal.x, BS:VEC_120/SCL_212
|
||||
; EG-NEXT: 5(7.006492e-45), 0(0.000000e+00)
|
||||
; EG-NEXT: ADD_INT * T0.X, KC0[2].Z, PV.W,
|
||||
; EG-NEXT: ALU clause starting at 13:
|
||||
; EG-NEXT: LSHR * T0.W, T12.Z, literal.x,
|
||||
; EG-NEXT: ALU clause starting at 16:
|
||||
; EG-NEXT: LSHR * T0.W, T20.X, literal.x,
|
||||
; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00)
|
||||
; EG-NEXT: BCNT_INT T12.W, PV.W,
|
||||
; EG-NEXT: AND_INT * T0.W, T12.Z, literal.x,
|
||||
; EG-NEXT: BCNT_INT * T0.W, PV.W,
|
||||
; EG-NEXT: LSHL T0.W, PV.W, literal.x,
|
||||
; EG-NEXT: AND_INT * T1.W, T0.Y, literal.y,
|
||||
; EG-NEXT: 16(2.242078e-44), 65535(9.183409e-41)
|
||||
; EG-NEXT: OR_INT * T0.W, PS, PV.W,
|
||||
; EG-NEXT: MOV * T4.X, PV.W,
|
||||
; EG-NEXT: MOV T0.X, PV.X,
|
||||
; EG-NEXT: AND_INT * T0.W, T20.X, literal.x,
|
||||
; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00)
|
||||
; EG-NEXT: BCNT_INT T12.Z, PS,
|
||||
; EG-NEXT: LSHR T0.W, T0.Z, literal.x,
|
||||
; EG-NEXT: LSHR * T1.W, T12.X, literal.x,
|
||||
; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00)
|
||||
; EG-NEXT: BCNT_INT T12.Y, PS,
|
||||
; EG-NEXT: AND_INT T0.Z, T0.Z, literal.x,
|
||||
; EG-NEXT: BCNT_INT T0.W, PV.W,
|
||||
; EG-NEXT: AND_INT * T1.W, T12.X, literal.x,
|
||||
; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00)
|
||||
; EG-NEXT: BCNT_INT T12.X, PS,
|
||||
; EG-NEXT: BCNT_INT T0.Z, PV.Z,
|
||||
; EG-NEXT: LSHR T1.W, T0.X, literal.x,
|
||||
; EG-NEXT: ADD_INT * T2.W, KC0[2].Y, literal.x,
|
||||
; EG-NEXT: AND_INT * T1.W, PV.X, literal.x,
|
||||
; EG-NEXT: -65536(nan), 0(0.000000e+00)
|
||||
; EG-NEXT: OR_INT * T0.W, PS, PV.W,
|
||||
; EG-NEXT: MOV T4.X, PV.W,
|
||||
; EG-NEXT: MOV * T0.X, T5.X,
|
||||
; EG-NEXT: LSHR * T0.W, T20.Y, literal.x,
|
||||
; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00)
|
||||
; EG-NEXT: LSHR T13.X, PS, literal.x,
|
||||
; EG-NEXT: BCNT_INT T0.Y, PV.W,
|
||||
; EG-NEXT: AND_INT * T1.W, T0.X, literal.y,
|
||||
; EG-NEXT: 2(2.802597e-45), 65535(9.183409e-41)
|
||||
; EG-NEXT: BCNT_INT T0.X, PV.W,
|
||||
; EG-NEXT: LSHR * T14.X, KC0[2].Y, literal.x,
|
||||
; EG-NEXT: BCNT_INT T0.W, PV.W,
|
||||
; EG-NEXT: AND_INT * T1.W, T0.X, literal.x,
|
||||
; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00)
|
||||
; EG-NEXT: LSHL * T0.W, PV.W, literal.x,
|
||||
; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00)
|
||||
; EG-NEXT: OR_INT * T0.W, T1.W, PV.W,
|
||||
; EG-NEXT: MOV * T5.X, PV.W,
|
||||
; EG-NEXT: MOV T0.X, PV.X,
|
||||
; EG-NEXT: AND_INT * T0.W, T20.Y, literal.x,
|
||||
; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00)
|
||||
; EG-NEXT: BCNT_INT T0.W, PV.W,
|
||||
; EG-NEXT: AND_INT * T1.W, PV.X, literal.x,
|
||||
; EG-NEXT: -65536(nan), 0(0.000000e+00)
|
||||
; EG-NEXT: OR_INT * T0.Y, PS, PV.W,
|
||||
; EG-NEXT: MOV T5.X, PV.Y,
|
||||
; EG-NEXT: MOV * T0.X, T8.X,
|
||||
; EG-NEXT: LSHR * T0.W, T20.Z, literal.x,
|
||||
; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00)
|
||||
; EG-NEXT: BCNT_INT T0.W, PV.W,
|
||||
; EG-NEXT: AND_INT * T1.W, T0.X, literal.x,
|
||||
; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00)
|
||||
; EG-NEXT: LSHL * T0.W, PV.W, literal.x,
|
||||
; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00)
|
||||
; EG-NEXT: OR_INT * T0.W, T1.W, PV.W,
|
||||
; EG-NEXT: MOV * T8.X, PV.W,
|
||||
; EG-NEXT: MOV T0.X, PV.X,
|
||||
; EG-NEXT: AND_INT * T0.W, T20.Z, literal.x,
|
||||
; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00)
|
||||
; EG-NEXT: BCNT_INT T0.W, PV.W,
|
||||
; EG-NEXT: AND_INT * T1.W, PV.X, literal.x,
|
||||
; EG-NEXT: -65536(nan), 0(0.000000e+00)
|
||||
; EG-NEXT: OR_INT * T0.W, PS, PV.W,
|
||||
; EG-NEXT: MOV T8.X, PV.W,
|
||||
; EG-NEXT: MOV * T0.X, T9.X,
|
||||
; EG-NEXT: LSHR * T0.W, T20.W, literal.x,
|
||||
; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00)
|
||||
; EG-NEXT: BCNT_INT T0.W, PV.W,
|
||||
; EG-NEXT: AND_INT * T1.W, T0.X, literal.x,
|
||||
; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00)
|
||||
; EG-NEXT: LSHL * T0.W, PV.W, literal.x,
|
||||
; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00)
|
||||
; EG-NEXT: OR_INT * T0.W, T1.W, PV.W,
|
||||
; EG-NEXT: MOV * T9.X, PV.W,
|
||||
; EG-NEXT: MOV T0.X, PV.X,
|
||||
; EG-NEXT: AND_INT * T0.W, T20.W, literal.x,
|
||||
; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00)
|
||||
; EG-NEXT: BCNT_INT T0.W, PV.W,
|
||||
; EG-NEXT: AND_INT * T1.W, PV.X, literal.x,
|
||||
; EG-NEXT: -65536(nan), 0(0.000000e+00)
|
||||
; EG-NEXT: OR_INT * T0.W, PS, PV.W,
|
||||
; EG-NEXT: MOV T9.X, PV.W,
|
||||
; EG-NEXT: MOV * T0.X, T12.X,
|
||||
; EG-NEXT: LSHR * T1.W, T21.X, literal.x,
|
||||
; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00)
|
||||
; EG-NEXT: BCNT_INT T1.W, PV.W,
|
||||
; EG-NEXT: AND_INT * T2.W, T0.X, literal.x,
|
||||
; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00)
|
||||
; EG-NEXT: LSHL * T1.W, PV.W, literal.x,
|
||||
; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00)
|
||||
; EG-NEXT: OR_INT * T1.W, T2.W, PV.W,
|
||||
; EG-NEXT: MOV * T12.X, PV.W,
|
||||
; EG-NEXT: MOV T0.X, PV.X,
|
||||
; EG-NEXT: AND_INT * T1.W, T21.X, literal.x,
|
||||
; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00)
|
||||
; EG-NEXT: BCNT_INT T1.W, PV.W,
|
||||
; EG-NEXT: AND_INT * T2.W, PV.X, literal.x,
|
||||
; EG-NEXT: -65536(nan), 0(0.000000e+00)
|
||||
; EG-NEXT: OR_INT * T1.W, PS, PV.W,
|
||||
; EG-NEXT: MOV T12.X, PV.W,
|
||||
; EG-NEXT: MOV * T0.X, T13.X,
|
||||
; EG-NEXT: LSHR * T1.W, T21.Y, literal.x,
|
||||
; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00)
|
||||
; EG-NEXT: BCNT_INT T1.W, PV.W,
|
||||
; EG-NEXT: AND_INT * T2.W, T0.X, literal.x,
|
||||
; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00)
|
||||
; EG-NEXT: LSHL * T1.W, PV.W, literal.x,
|
||||
; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00)
|
||||
; EG-NEXT: OR_INT * T1.W, T2.W, PV.W,
|
||||
; EG-NEXT: MOV * T13.X, PV.W,
|
||||
; EG-NEXT: MOV T0.X, PV.X,
|
||||
; EG-NEXT: AND_INT * T1.W, T21.Y, literal.x,
|
||||
; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00)
|
||||
; EG-NEXT: BCNT_INT T1.W, PV.W,
|
||||
; EG-NEXT: AND_INT * T2.W, PV.X, literal.x,
|
||||
; EG-NEXT: -65536(nan), 0(0.000000e+00)
|
||||
; EG-NEXT: OR_INT * T20.Y, PS, PV.W,
|
||||
; EG-NEXT: MOV T13.X, PV.Y,
|
||||
; EG-NEXT: MOV * T0.X, T16.X,
|
||||
; EG-NEXT: LSHR * T1.W, T21.Z, literal.x,
|
||||
; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00)
|
||||
; EG-NEXT: BCNT_INT T1.W, PV.W,
|
||||
; EG-NEXT: AND_INT * T2.W, T0.X, literal.x,
|
||||
; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00)
|
||||
; EG-NEXT: LSHL * T1.W, PV.W, literal.x,
|
||||
; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00)
|
||||
; EG-NEXT: OR_INT * T1.W, T2.W, PV.W,
|
||||
; EG-NEXT: ALU clause starting at 131:
|
||||
; EG-NEXT: MOV * T16.X, T1.W,
|
||||
; EG-NEXT: MOV T0.X, PV.X,
|
||||
; EG-NEXT: AND_INT * T1.W, T21.Z, literal.x,
|
||||
; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00)
|
||||
; EG-NEXT: BCNT_INT T1.W, PV.W,
|
||||
; EG-NEXT: AND_INT * T2.W, PV.X, literal.x,
|
||||
; EG-NEXT: -65536(nan), 0(0.000000e+00)
|
||||
; EG-NEXT: OR_INT * T1.W, PS, PV.W,
|
||||
; EG-NEXT: MOV T16.X, PV.W,
|
||||
; EG-NEXT: MOV * T0.X, T17.X,
|
||||
; EG-NEXT: LSHR * T1.W, T21.W, literal.x,
|
||||
; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00)
|
||||
; EG-NEXT: BCNT_INT T1.W, PV.W,
|
||||
; EG-NEXT: AND_INT * T2.W, T0.X, literal.x,
|
||||
; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00)
|
||||
; EG-NEXT: LSHL * T1.W, PV.W, literal.x,
|
||||
; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00)
|
||||
; EG-NEXT: OR_INT * T1.W, T2.W, PV.W,
|
||||
; EG-NEXT: MOV * T17.X, PV.W,
|
||||
; EG-NEXT: MOV T0.X, PV.X,
|
||||
; EG-NEXT: AND_INT T1.W, T21.W, literal.x,
|
||||
; EG-NEXT: LSHR * T21.X, KC0[2].Y, literal.y,
|
||||
; EG-NEXT: 65535(9.183409e-41), 2(2.802597e-45)
|
||||
; EG-NEXT: AND_INT T0.Z, PV.X, literal.x,
|
||||
; EG-NEXT: BCNT_INT T1.W, PV.W,
|
||||
; EG-NEXT: ADD_INT * T2.W, KC0[2].Y, literal.y,
|
||||
; EG-NEXT: -65536(nan), 16(2.242078e-44)
|
||||
; EG-NEXT: LSHR T22.X, PS, literal.x,
|
||||
; EG-NEXT: OR_INT * T20.W, PV.Z, PV.W,
|
||||
; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
|
||||
; EG-NEXT: MOV T17.X, PV.W,
|
||||
; EG-NEXT: MOV * T0.X, T4.X,
|
||||
; EG-NEXT: MOV * T0.Z, T8.X,
|
||||
; EG-NEXT: MOV T20.X, T12.X,
|
||||
; EG-NEXT: MOV * T20.Z, T16.X, BS:VEC_120/SCL_212
|
||||
%tid = call i32 @llvm.amdgcn.workitem.id.x()
|
||||
%in.gep = getelementptr <16 x i16>, ptr addrspace(1) %in, i32 %tid
|
||||
%val = load <16 x i16>, ptr addrspace(1) %in.gep, align 32
|
||||
@ -1292,7 +1512,7 @@ define amdgpu_kernel void @ctpop_i16_in_br(ptr addrspace(1) %out, ptr addrspace(
|
||||
; SI-NEXT: buffer_store_short v0, off, s[0:3], 0
|
||||
; SI-NEXT: s_endpgm
|
||||
; SI-NEXT: .LBB14_4:
|
||||
; SI-NEXT: ; implicit-def: $vgpr0
|
||||
; SI-NEXT: ; implicit-def: $vgpr0
|
||||
; SI-NEXT: s_branch .LBB14_2
|
||||
;
|
||||
; VI-LABEL: ctpop_i16_in_br:
|
||||
|
||||
@ -1025,67 +1025,74 @@ define amdgpu_kernel void @v3i16_arg(ptr addrspace(1) nocapture %out, <3 x i16>
|
||||
;
|
||||
; EG-LABEL: v3i16_arg:
|
||||
; EG: ; %bb.0: ; %entry
|
||||
; EG-NEXT: ALU 0, @10, KC0[], KC1[]
|
||||
; EG-NEXT: TEX 1 @6
|
||||
; EG-NEXT: ALU 14, @11, KC0[CB0:0-32], KC1[]
|
||||
; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T1.X, T3.X, 0
|
||||
; EG-NEXT: MEM_RAT MSKOR T2.XW, T0.X
|
||||
; EG-NEXT: ALU 0, @12, KC0[], KC1[]
|
||||
; EG-NEXT: TEX 2 @6
|
||||
; EG-NEXT: ALU 19, @13, KC0[CB0:0-32], KC1[]
|
||||
; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T6.X, T7.X, 0
|
||||
; EG-NEXT: MEM_RAT MSKOR T5.XW, T8.X
|
||||
; EG-NEXT: CF_END
|
||||
; EG-NEXT: Fetch clause starting at 6:
|
||||
; EG-NEXT: VTX_READ_16 T1.X, T0.X, 44, #3
|
||||
; EG-NEXT: VTX_READ_16 T0.X, T0.X, 48, #3
|
||||
; EG-NEXT: ALU clause starting at 10:
|
||||
; EG-NEXT: MOV * T0.X, 0.0,
|
||||
; EG-NEXT: ALU clause starting at 11:
|
||||
; EG-NEXT: VTX_READ_16 T6.X, T5.X, 44, #3
|
||||
; EG-NEXT: VTX_READ_16 T7.X, T5.X, 46, #3
|
||||
; EG-NEXT: VTX_READ_16 T5.X, T5.X, 48, #3
|
||||
; EG-NEXT: ALU clause starting at 12:
|
||||
; EG-NEXT: MOV * T5.X, 0.0,
|
||||
; EG-NEXT: ALU clause starting at 13:
|
||||
; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.x,
|
||||
; EG-NEXT: 4(5.605194e-45), 0(0.000000e+00)
|
||||
; EG-NEXT: AND_INT T1.W, PV.W, literal.x,
|
||||
; EG-NEXT: AND_INT * T2.W, T0.X, literal.y,
|
||||
; EG-NEXT: AND_INT * T2.W, T5.X, literal.y,
|
||||
; EG-NEXT: 3(4.203895e-45), 65535(9.183409e-41)
|
||||
; EG-NEXT: LSHL * T1.W, PV.W, literal.x,
|
||||
; EG-NEXT: 3(4.203895e-45), 0(0.000000e+00)
|
||||
; EG-NEXT: LSHL T2.X, T2.W, PV.W,
|
||||
; EG-NEXT: LSHL * T2.W, literal.x, PV.W,
|
||||
; EG-NEXT: LSHL T5.X, T2.W, PV.W,
|
||||
; EG-NEXT: LSHL * T5.W, literal.x, PV.W,
|
||||
; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00)
|
||||
; EG-NEXT: MOV T2.Y, 0.0,
|
||||
; EG-NEXT: MOV * T2.Z, 0.0,
|
||||
; EG-NEXT: LSHR T0.X, T0.W, literal.x,
|
||||
; EG-NEXT: LSHR * T3.X, KC0[2].Y, literal.x,
|
||||
; EG-NEXT: MOV T5.Y, 0.0,
|
||||
; EG-NEXT: MOV * T5.Z, 0.0,
|
||||
; EG-NEXT: LSHR T8.X, T0.W, literal.x,
|
||||
; EG-NEXT: LSHL T0.W, T7.X, literal.y,
|
||||
; EG-NEXT: AND_INT * T1.W, T6.X, literal.z,
|
||||
; EG-NEXT: 2(2.802597e-45), 16(2.242078e-44)
|
||||
; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00)
|
||||
; EG-NEXT: OR_INT T6.X, PV.W, PS,
|
||||
; EG-NEXT: LSHR * T7.X, KC0[2].Y, literal.x,
|
||||
; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
|
||||
;
|
||||
; CM-LABEL: v3i16_arg:
|
||||
; CM: ; %bb.0: ; %entry
|
||||
; CM-NEXT: ALU 0, @12, KC0[], KC1[]
|
||||
; CM-NEXT: TEX 0 @8
|
||||
; CM-NEXT: ALU 13, @13, KC0[CB0:0-32], KC1[]
|
||||
; CM-NEXT: MEM_RAT MSKOR T1.XW, T2.X
|
||||
; CM-NEXT: ALU 1, @27, KC0[CB0:0-32], KC1[]
|
||||
; CM-NEXT: TEX 0 @10
|
||||
; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T0.X, T1.X
|
||||
; CM-NEXT: TEX 2 @6
|
||||
; CM-NEXT: ALU 19, @13, KC0[CB0:0-32], KC1[]
|
||||
; CM-NEXT: MEM_RAT MSKOR T5.XW, T8.X
|
||||
; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T6.X, T7.X
|
||||
; CM-NEXT: CF_END
|
||||
; CM-NEXT: Fetch clause starting at 8:
|
||||
; CM-NEXT: VTX_READ_16 T1.X, T0.X, 48, #3
|
||||
; CM-NEXT: Fetch clause starting at 10:
|
||||
; CM-NEXT: VTX_READ_16 T0.X, T0.X, 44, #3
|
||||
; CM-NEXT: Fetch clause starting at 6:
|
||||
; CM-NEXT: VTX_READ_16 T6.X, T5.X, 44, #3
|
||||
; CM-NEXT: VTX_READ_16 T7.X, T5.X, 46, #3
|
||||
; CM-NEXT: VTX_READ_16 T5.X, T5.X, 48, #3
|
||||
; CM-NEXT: ALU clause starting at 12:
|
||||
; CM-NEXT: MOV * T0.X, 0.0,
|
||||
; CM-NEXT: MOV * T5.X, 0.0,
|
||||
; CM-NEXT: ALU clause starting at 13:
|
||||
; CM-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.x,
|
||||
; CM-NEXT: 4(5.605194e-45), 0(0.000000e+00)
|
||||
; CM-NEXT: AND_INT * T1.W, PV.W, literal.x,
|
||||
; CM-NEXT: 3(4.203895e-45), 0(0.000000e+00)
|
||||
; CM-NEXT: AND_INT T0.Z, T1.X, literal.x,
|
||||
; CM-NEXT: AND_INT T0.Z, T5.X, literal.x,
|
||||
; CM-NEXT: LSHL * T1.W, PV.W, literal.y,
|
||||
; CM-NEXT: 65535(9.183409e-41), 3(4.203895e-45)
|
||||
; CM-NEXT: LSHL T1.X, PV.Z, PV.W,
|
||||
; CM-NEXT: LSHL * T1.W, literal.x, PV.W,
|
||||
; CM-NEXT: LSHL T5.X, PV.Z, PV.W,
|
||||
; CM-NEXT: LSHL * T5.W, literal.x, PV.W,
|
||||
; CM-NEXT: 65535(9.183409e-41), 0(0.000000e+00)
|
||||
; CM-NEXT: MOV T1.Y, 0.0,
|
||||
; CM-NEXT: MOV * T1.Z, 0.0,
|
||||
; CM-NEXT: LSHR * T2.X, T0.W, literal.x,
|
||||
; CM-NEXT: MOV T5.Y, 0.0,
|
||||
; CM-NEXT: MOV * T5.Z, 0.0,
|
||||
; CM-NEXT: LSHL T0.Z, T7.X, literal.x,
|
||||
; CM-NEXT: AND_INT * T1.W, T6.X, literal.y, BS:VEC_120/SCL_212
|
||||
; CM-NEXT: 16(2.242078e-44), 65535(9.183409e-41)
|
||||
; CM-NEXT: OR_INT * T6.X, PV.Z, PV.W,
|
||||
; CM-NEXT: LSHR * T7.X, KC0[2].Y, literal.x,
|
||||
; CM-NEXT: 2(2.802597e-45), 0(0.000000e+00)
|
||||
; CM-NEXT: ALU clause starting at 27:
|
||||
; CM-NEXT: LSHR * T1.X, KC0[2].Y, literal.x,
|
||||
; CM-NEXT: LSHR * T8.X, T0.W, literal.x,
|
||||
; CM-NEXT: 2(2.802597e-45), 0(0.000000e+00)
|
||||
entry:
|
||||
store <3 x i16> %in, ptr addrspace(1) %out, align 4
|
||||
@ -2669,47 +2676,205 @@ define amdgpu_kernel void @v8i16_arg(ptr addrspace(1) %out, <8 x i16> %in) {
|
||||
;
|
||||
; EG-LABEL: v8i16_arg:
|
||||
; EG: ; %bb.0: ; %entry
|
||||
; EG-NEXT: ALU 0, @14, KC0[], KC1[]
|
||||
; EG-NEXT: TEX 3 @6
|
||||
; EG-NEXT: ALU 4, @15, KC0[CB0:0-32], KC1[]
|
||||
; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T1.XYZW, T0.X, 1
|
||||
; EG-NEXT: ALU 1, @36, KC0[], KC1[]
|
||||
; EG-NEXT: TEX 0 @20
|
||||
; EG-NEXT: ALU 5, @38, KC0[], KC1[]
|
||||
; EG-NEXT: TEX 0 @22
|
||||
; EG-NEXT: ALU 5, @44, KC0[], KC1[]
|
||||
; EG-NEXT: TEX 0 @24
|
||||
; EG-NEXT: ALU 5, @50, KC0[], KC1[]
|
||||
; EG-NEXT: TEX 0 @26
|
||||
; EG-NEXT: ALU 5, @56, KC0[], KC1[]
|
||||
; EG-NEXT: TEX 0 @28
|
||||
; EG-NEXT: ALU 5, @62, KC0[], KC1[]
|
||||
; EG-NEXT: TEX 0 @30
|
||||
; EG-NEXT: ALU 5, @68, KC0[], KC1[]
|
||||
; EG-NEXT: TEX 0 @32
|
||||
; EG-NEXT: ALU 5, @74, KC0[], KC1[]
|
||||
; EG-NEXT: TEX 0 @34
|
||||
; EG-NEXT: ALU 8, @80, KC0[CB0:0-32], KC1[]
|
||||
; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T7.XYZW, T8.X, 1
|
||||
; EG-NEXT: CF_END
|
||||
; EG-NEXT: PAD
|
||||
; EG-NEXT: Fetch clause starting at 6:
|
||||
; EG-NEXT: VTX_READ_16 T1.X, T0.X, 52, #3
|
||||
; EG-NEXT: VTX_READ_16 T2.X, T0.X, 54, #3
|
||||
; EG-NEXT: VTX_READ_16 T3.X, T0.X, 62, #3
|
||||
; EG-NEXT: VTX_READ_16 T0.X, T0.X, 60, #3
|
||||
; EG-NEXT: ALU clause starting at 14:
|
||||
; EG-NEXT: MOV * T0.X, 0.0,
|
||||
; EG-NEXT: ALU clause starting at 15:
|
||||
; EG-NEXT: MOV T1.Y, T2.X,
|
||||
; EG-NEXT: MOV * T1.Z, T0.X, BS:VEC_120/SCL_212
|
||||
; EG-NEXT: LSHR T0.X, KC0[2].Y, literal.x,
|
||||
; EG-NEXT: MOV * T1.W, T3.X,
|
||||
; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
|
||||
; EG-NEXT: Fetch clause starting at 20:
|
||||
; EG-NEXT: VTX_READ_16 T8.X, T7.X, 66, #3
|
||||
; EG-NEXT: Fetch clause starting at 22:
|
||||
; EG-NEXT: VTX_READ_16 T8.X, T7.X, 58, #3
|
||||
; EG-NEXT: Fetch clause starting at 24:
|
||||
; EG-NEXT: VTX_READ_16 T8.X, T7.X, 64, #3
|
||||
; EG-NEXT: Fetch clause starting at 26:
|
||||
; EG-NEXT: VTX_READ_16 T8.X, T7.X, 56, #3
|
||||
; EG-NEXT: Fetch clause starting at 28:
|
||||
; EG-NEXT: VTX_READ_16 T8.X, T7.X, 62, #3
|
||||
; EG-NEXT: Fetch clause starting at 30:
|
||||
; EG-NEXT: VTX_READ_16 T8.X, T7.X, 54, #3
|
||||
; EG-NEXT: Fetch clause starting at 32:
|
||||
; EG-NEXT: VTX_READ_16 T8.X, T7.X, 60, #3
|
||||
; EG-NEXT: Fetch clause starting at 34:
|
||||
; EG-NEXT: VTX_READ_16 T7.X, T7.X, 52, #3
|
||||
; EG-NEXT: ALU clause starting at 36:
|
||||
; EG-NEXT: MOV * T0.Y, T3.X,
|
||||
; EG-NEXT: MOV * T7.X, 0.0,
|
||||
; EG-NEXT: ALU clause starting at 38:
|
||||
; EG-NEXT: LSHL T0.W, T8.X, literal.x,
|
||||
; EG-NEXT: AND_INT * T1.W, T0.Y, literal.y,
|
||||
; EG-NEXT: 16(2.242078e-44), 65535(9.183409e-41)
|
||||
; EG-NEXT: OR_INT * T0.W, PS, PV.W,
|
||||
; EG-NEXT: MOV T3.X, PV.W,
|
||||
; EG-NEXT: MOV * T0.Y, T5.X,
|
||||
; EG-NEXT: ALU clause starting at 44:
|
||||
; EG-NEXT: AND_INT T0.W, T0.Y, literal.x,
|
||||
; EG-NEXT: LSHL * T1.W, T8.X, literal.y,
|
||||
; EG-NEXT: 65535(9.183409e-41), 16(2.242078e-44)
|
||||
; EG-NEXT: OR_INT * T0.W, PV.W, PS,
|
||||
; EG-NEXT: MOV T5.X, PV.W,
|
||||
; EG-NEXT: MOV * T0.Y, T3.X,
|
||||
; EG-NEXT: ALU clause starting at 50:
|
||||
; EG-NEXT: AND_INT T0.W, T0.Y, literal.x,
|
||||
; EG-NEXT: AND_INT * T1.W, T8.X, literal.y,
|
||||
; EG-NEXT: -65536(nan), 65535(9.183409e-41)
|
||||
; EG-NEXT: OR_INT * T0.W, PV.W, PS,
|
||||
; EG-NEXT: MOV T3.X, PV.W,
|
||||
; EG-NEXT: MOV * T0.Y, T5.X,
|
||||
; EG-NEXT: ALU clause starting at 56:
|
||||
; EG-NEXT: AND_INT T0.W, T0.Y, literal.x,
|
||||
; EG-NEXT: AND_INT * T1.W, T8.X, literal.y,
|
||||
; EG-NEXT: -65536(nan), 65535(9.183409e-41)
|
||||
; EG-NEXT: OR_INT * T0.W, PV.W, PS,
|
||||
; EG-NEXT: MOV T5.X, PV.W,
|
||||
; EG-NEXT: MOV * T0.Y, T2.X,
|
||||
; EG-NEXT: ALU clause starting at 62:
|
||||
; EG-NEXT: AND_INT T0.W, T0.Y, literal.x,
|
||||
; EG-NEXT: LSHL * T1.W, T8.X, literal.y,
|
||||
; EG-NEXT: 65535(9.183409e-41), 16(2.242078e-44)
|
||||
; EG-NEXT: OR_INT * T0.W, PV.W, PS,
|
||||
; EG-NEXT: MOV T2.X, PV.W,
|
||||
; EG-NEXT: MOV * T0.Y, T4.X,
|
||||
; EG-NEXT: ALU clause starting at 68:
|
||||
; EG-NEXT: AND_INT T0.W, T0.Y, literal.x,
|
||||
; EG-NEXT: LSHL * T1.W, T8.X, literal.y,
|
||||
; EG-NEXT: 65535(9.183409e-41), 16(2.242078e-44)
|
||||
; EG-NEXT: OR_INT * T0.W, PV.W, PS,
|
||||
; EG-NEXT: MOV T4.X, PV.W,
|
||||
; EG-NEXT: MOV * T0.Y, T2.X,
|
||||
; EG-NEXT: ALU clause starting at 74:
|
||||
; EG-NEXT: AND_INT T0.W, T0.Y, literal.x,
|
||||
; EG-NEXT: AND_INT * T1.W, T8.X, literal.y,
|
||||
; EG-NEXT: -65536(nan), 65535(9.183409e-41)
|
||||
; EG-NEXT: OR_INT * T7.Z, PV.W, PS,
|
||||
; EG-NEXT: MOV T2.X, PV.Z,
|
||||
; EG-NEXT: MOV * T0.Y, T4.X,
|
||||
; EG-NEXT: ALU clause starting at 80:
|
||||
; EG-NEXT: LSHR T8.X, KC0[2].Y, literal.x,
|
||||
; EG-NEXT: AND_INT T0.W, T0.Y, literal.y,
|
||||
; EG-NEXT: AND_INT * T1.W, T7.X, literal.z,
|
||||
; EG-NEXT: 2(2.802597e-45), -65536(nan)
|
||||
; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00)
|
||||
; EG-NEXT: OR_INT * T7.X, PV.W, PS,
|
||||
; EG-NEXT: MOV T4.X, PV.X,
|
||||
; EG-NEXT: MOV * T7.W, T3.X,
|
||||
; EG-NEXT: MOV * T7.Y, T5.X,
|
||||
;
|
||||
; CM-LABEL: v8i16_arg:
|
||||
; CM: ; %bb.0: ; %entry
|
||||
; CM-NEXT: ALU 0, @14, KC0[], KC1[]
|
||||
; CM-NEXT: TEX 3 @6
|
||||
; CM-NEXT: ALU 4, @15, KC0[CB0:0-32], KC1[]
|
||||
; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T1, T0.X
|
||||
; CM-NEXT: ALU 1, @36, KC0[], KC1[]
|
||||
; CM-NEXT: TEX 0 @20
|
||||
; CM-NEXT: ALU 5, @38, KC0[], KC1[]
|
||||
; CM-NEXT: TEX 0 @22
|
||||
; CM-NEXT: ALU 5, @44, KC0[], KC1[]
|
||||
; CM-NEXT: TEX 0 @24
|
||||
; CM-NEXT: ALU 5, @50, KC0[], KC1[]
|
||||
; CM-NEXT: TEX 0 @26
|
||||
; CM-NEXT: ALU 5, @56, KC0[], KC1[]
|
||||
; CM-NEXT: TEX 0 @28
|
||||
; CM-NEXT: ALU 5, @62, KC0[], KC1[]
|
||||
; CM-NEXT: TEX 0 @30
|
||||
; CM-NEXT: ALU 5, @68, KC0[], KC1[]
|
||||
; CM-NEXT: TEX 0 @32
|
||||
; CM-NEXT: ALU 5, @74, KC0[], KC1[]
|
||||
; CM-NEXT: TEX 0 @34
|
||||
; CM-NEXT: ALU 8, @80, KC0[CB0:0-32], KC1[]
|
||||
; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T7, T8.X
|
||||
; CM-NEXT: CF_END
|
||||
; CM-NEXT: PAD
|
||||
; CM-NEXT: Fetch clause starting at 6:
|
||||
; CM-NEXT: VTX_READ_16 T1.X, T0.X, 52, #3
|
||||
; CM-NEXT: VTX_READ_16 T2.X, T0.X, 54, #3
|
||||
; CM-NEXT: VTX_READ_16 T3.X, T0.X, 62, #3
|
||||
; CM-NEXT: VTX_READ_16 T0.X, T0.X, 60, #3
|
||||
; CM-NEXT: ALU clause starting at 14:
|
||||
; CM-NEXT: MOV * T0.X, 0.0,
|
||||
; CM-NEXT: ALU clause starting at 15:
|
||||
; CM-NEXT: MOV T1.Y, T2.X,
|
||||
; CM-NEXT: MOV * T1.Z, T0.X, BS:VEC_120/SCL_212
|
||||
; CM-NEXT: LSHR T0.X, KC0[2].Y, literal.x,
|
||||
; CM-NEXT: MOV * T1.W, T3.X,
|
||||
; CM-NEXT: 2(2.802597e-45), 0(0.000000e+00)
|
||||
; CM-NEXT: Fetch clause starting at 20:
|
||||
; CM-NEXT: VTX_READ_16 T8.X, T7.X, 66, #3
|
||||
; CM-NEXT: Fetch clause starting at 22:
|
||||
; CM-NEXT: VTX_READ_16 T8.X, T7.X, 58, #3
|
||||
; CM-NEXT: Fetch clause starting at 24:
|
||||
; CM-NEXT: VTX_READ_16 T8.X, T7.X, 64, #3
|
||||
; CM-NEXT: Fetch clause starting at 26:
|
||||
; CM-NEXT: VTX_READ_16 T8.X, T7.X, 56, #3
|
||||
; CM-NEXT: Fetch clause starting at 28:
|
||||
; CM-NEXT: VTX_READ_16 T8.X, T7.X, 62, #3
|
||||
; CM-NEXT: Fetch clause starting at 30:
|
||||
; CM-NEXT: VTX_READ_16 T8.X, T7.X, 54, #3
|
||||
; CM-NEXT: Fetch clause starting at 32:
|
||||
; CM-NEXT: VTX_READ_16 T8.X, T7.X, 60, #3
|
||||
; CM-NEXT: Fetch clause starting at 34:
|
||||
; CM-NEXT: VTX_READ_16 T7.X, T7.X, 52, #3
|
||||
; CM-NEXT: ALU clause starting at 36:
|
||||
; CM-NEXT: MOV * T0.Y, T3.X,
|
||||
; CM-NEXT: MOV * T7.X, 0.0,
|
||||
; CM-NEXT: ALU clause starting at 38:
|
||||
; CM-NEXT: LSHL T0.Z, T8.X, literal.x,
|
||||
; CM-NEXT: AND_INT * T0.W, T0.Y, literal.y,
|
||||
; CM-NEXT: 16(2.242078e-44), 65535(9.183409e-41)
|
||||
; CM-NEXT: OR_INT * T0.W, PV.W, PV.Z,
|
||||
; CM-NEXT: MOV T3.X, PV.W,
|
||||
; CM-NEXT: MOV * T0.Y, T5.X,
|
||||
; CM-NEXT: ALU clause starting at 44:
|
||||
; CM-NEXT: AND_INT T0.Z, T0.Y, literal.x,
|
||||
; CM-NEXT: LSHL * T0.W, T8.X, literal.y,
|
||||
; CM-NEXT: 65535(9.183409e-41), 16(2.242078e-44)
|
||||
; CM-NEXT: OR_INT * T0.W, PV.Z, PV.W,
|
||||
; CM-NEXT: MOV T5.X, PV.W,
|
||||
; CM-NEXT: MOV * T0.Y, T3.X,
|
||||
; CM-NEXT: ALU clause starting at 50:
|
||||
; CM-NEXT: AND_INT T0.Z, T0.Y, literal.x,
|
||||
; CM-NEXT: AND_INT * T0.W, T8.X, literal.y,
|
||||
; CM-NEXT: -65536(nan), 65535(9.183409e-41)
|
||||
; CM-NEXT: OR_INT * T0.W, PV.Z, PV.W,
|
||||
; CM-NEXT: MOV T3.X, PV.W,
|
||||
; CM-NEXT: MOV * T0.Y, T5.X,
|
||||
; CM-NEXT: ALU clause starting at 56:
|
||||
; CM-NEXT: AND_INT T0.Z, T0.Y, literal.x,
|
||||
; CM-NEXT: AND_INT * T0.W, T8.X, literal.y,
|
||||
; CM-NEXT: -65536(nan), 65535(9.183409e-41)
|
||||
; CM-NEXT: OR_INT * T0.W, PV.Z, PV.W,
|
||||
; CM-NEXT: MOV T5.X, PV.W,
|
||||
; CM-NEXT: MOV * T0.Y, T2.X,
|
||||
; CM-NEXT: ALU clause starting at 62:
|
||||
; CM-NEXT: AND_INT T0.Z, T0.Y, literal.x,
|
||||
; CM-NEXT: LSHL * T0.W, T8.X, literal.y,
|
||||
; CM-NEXT: 65535(9.183409e-41), 16(2.242078e-44)
|
||||
; CM-NEXT: OR_INT * T0.W, PV.Z, PV.W,
|
||||
; CM-NEXT: MOV T2.X, PV.W,
|
||||
; CM-NEXT: MOV * T0.Y, T4.X,
|
||||
; CM-NEXT: ALU clause starting at 68:
|
||||
; CM-NEXT: AND_INT T0.Z, T0.Y, literal.x,
|
||||
; CM-NEXT: LSHL * T0.W, T8.X, literal.y,
|
||||
; CM-NEXT: 65535(9.183409e-41), 16(2.242078e-44)
|
||||
; CM-NEXT: OR_INT * T0.W, PV.Z, PV.W,
|
||||
; CM-NEXT: MOV T4.X, PV.W,
|
||||
; CM-NEXT: MOV * T0.Y, T2.X,
|
||||
; CM-NEXT: ALU clause starting at 74:
|
||||
; CM-NEXT: AND_INT T0.Z, T0.Y, literal.x,
|
||||
; CM-NEXT: AND_INT * T0.W, T8.X, literal.y,
|
||||
; CM-NEXT: -65536(nan), 65535(9.183409e-41)
|
||||
; CM-NEXT: OR_INT * T7.Z, PV.Z, PV.W,
|
||||
; CM-NEXT: MOV T2.X, PV.Z,
|
||||
; CM-NEXT: MOV * T0.Y, T4.X,
|
||||
; CM-NEXT: ALU clause starting at 80:
|
||||
; CM-NEXT: LSHR T8.X, KC0[2].Y, literal.x,
|
||||
; CM-NEXT: AND_INT T0.Z, T0.Y, literal.y,
|
||||
; CM-NEXT: AND_INT * T0.W, T7.X, literal.z,
|
||||
; CM-NEXT: 2(2.802597e-45), -65536(nan)
|
||||
; CM-NEXT: 65535(9.183409e-41), 0(0.000000e+00)
|
||||
; CM-NEXT: OR_INT * T7.X, PV.Z, PV.W,
|
||||
; CM-NEXT: MOV T4.X, PV.X,
|
||||
; CM-NEXT: MOV * T7.W, T3.X,
|
||||
; CM-NEXT: MOV * T7.Y, T5.X,
|
||||
entry:
|
||||
store <8 x i16> %in, ptr addrspace(1) %out
|
||||
ret void
|
||||
@ -3453,68 +3618,392 @@ define amdgpu_kernel void @v16i16_arg(ptr addrspace(1) %out, <16 x i16> %in) {
|
||||
;
|
||||
; EG-LABEL: v16i16_arg:
|
||||
; EG: ; %bb.0: ; %entry
|
||||
; EG-NEXT: ALU 0, @22, KC0[], KC1[]
|
||||
; EG-NEXT: TEX 7 @6
|
||||
; EG-NEXT: ALU 10, @23, KC0[CB0:0-32], KC1[]
|
||||
; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T1.XYZW, T2.X, 0
|
||||
; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T7.XYZW, T0.X, 1
|
||||
; EG-NEXT: ALU 1, @68, KC0[], KC1[]
|
||||
; EG-NEXT: TEX 0 @36
|
||||
; EG-NEXT: ALU 5, @70, KC0[], KC1[]
|
||||
; EG-NEXT: TEX 0 @38
|
||||
; EG-NEXT: ALU 5, @76, KC0[], KC1[]
|
||||
; EG-NEXT: TEX 0 @40
|
||||
; EG-NEXT: ALU 5, @82, KC0[], KC1[]
|
||||
; EG-NEXT: TEX 0 @42
|
||||
; EG-NEXT: ALU 5, @88, KC0[], KC1[]
|
||||
; EG-NEXT: TEX 0 @44
|
||||
; EG-NEXT: ALU 5, @94, KC0[], KC1[]
|
||||
; EG-NEXT: TEX 0 @46
|
||||
; EG-NEXT: ALU 5, @100, KC0[], KC1[]
|
||||
; EG-NEXT: TEX 0 @48
|
||||
; EG-NEXT: ALU 5, @106, KC0[], KC1[]
|
||||
; EG-NEXT: TEX 0 @50
|
||||
; EG-NEXT: ALU 5, @112, KC0[], KC1[]
|
||||
; EG-NEXT: TEX 0 @52
|
||||
; EG-NEXT: ALU 5, @118, KC0[], KC1[]
|
||||
; EG-NEXT: TEX 0 @54
|
||||
; EG-NEXT: ALU 5, @124, KC0[], KC1[]
|
||||
; EG-NEXT: TEX 0 @56
|
||||
; EG-NEXT: ALU 5, @130, KC0[], KC1[]
|
||||
; EG-NEXT: TEX 0 @58
|
||||
; EG-NEXT: ALU 5, @136, KC0[], KC1[]
|
||||
; EG-NEXT: TEX 0 @60
|
||||
; EG-NEXT: ALU 5, @142, KC0[], KC1[]
|
||||
; EG-NEXT: TEX 0 @62
|
||||
; EG-NEXT: ALU 5, @148, KC0[], KC1[]
|
||||
; EG-NEXT: TEX 0 @64
|
||||
; EG-NEXT: ALU 5, @154, KC0[], KC1[]
|
||||
; EG-NEXT: TEX 0 @66
|
||||
; EG-NEXT: ALU 13, @160, KC0[CB0:0-32], KC1[]
|
||||
; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T12.XYZW, T14.X, 0
|
||||
; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T11.XYZW, T13.X, 1
|
||||
; EG-NEXT: CF_END
|
||||
; EG-NEXT: Fetch clause starting at 6:
|
||||
; EG-NEXT: VTX_READ_16 T1.X, T0.X, 84, #3
|
||||
; EG-NEXT: VTX_READ_16 T2.X, T0.X, 86, #3
|
||||
; EG-NEXT: VTX_READ_16 T3.X, T0.X, 94, #3
|
||||
; EG-NEXT: VTX_READ_16 T4.X, T0.X, 78, #3
|
||||
; EG-NEXT: VTX_READ_16 T5.X, T0.X, 76, #3
|
||||
; EG-NEXT: VTX_READ_16 T6.X, T0.X, 92, #3
|
||||
; EG-NEXT: VTX_READ_16 T7.X, T0.X, 68, #3
|
||||
; EG-NEXT: VTX_READ_16 T0.X, T0.X, 70, #3
|
||||
; EG-NEXT: ALU clause starting at 22:
|
||||
; EG-NEXT: MOV * T0.X, 0.0,
|
||||
; EG-NEXT: ALU clause starting at 23:
|
||||
; EG-NEXT: MOV T1.Y, T2.X,
|
||||
; EG-NEXT: MOV * T7.Y, T0.X,
|
||||
; EG-NEXT: MOV * T1.Z, T6.X,
|
||||
; EG-NEXT: LSHR T0.X, KC0[2].Y, literal.x,
|
||||
; EG-NEXT: MOV T7.Z, T5.X,
|
||||
; EG-NEXT: Fetch clause starting at 36:
|
||||
; EG-NEXT: VTX_READ_16 T12.X, T11.X, 98, #3
|
||||
; EG-NEXT: Fetch clause starting at 38:
|
||||
; EG-NEXT: VTX_READ_16 T12.X, T11.X, 90, #3
|
||||
; EG-NEXT: Fetch clause starting at 40:
|
||||
; EG-NEXT: VTX_READ_16 T12.X, T11.X, 82, #3
|
||||
; EG-NEXT: Fetch clause starting at 42:
|
||||
; EG-NEXT: VTX_READ_16 T12.X, T11.X, 74, #3
|
||||
; EG-NEXT: Fetch clause starting at 44:
|
||||
; EG-NEXT: VTX_READ_16 T12.X, T11.X, 96, #3
|
||||
; EG-NEXT: Fetch clause starting at 46:
|
||||
; EG-NEXT: VTX_READ_16 T12.X, T11.X, 88, #3
|
||||
; EG-NEXT: Fetch clause starting at 48:
|
||||
; EG-NEXT: VTX_READ_16 T12.X, T11.X, 80, #3
|
||||
; EG-NEXT: Fetch clause starting at 50:
|
||||
; EG-NEXT: VTX_READ_16 T12.X, T11.X, 72, #3
|
||||
; EG-NEXT: Fetch clause starting at 52:
|
||||
; EG-NEXT: VTX_READ_16 T12.X, T11.X, 94, #3
|
||||
; EG-NEXT: Fetch clause starting at 54:
|
||||
; EG-NEXT: VTX_READ_16 T12.X, T11.X, 86, #3
|
||||
; EG-NEXT: Fetch clause starting at 56:
|
||||
; EG-NEXT: VTX_READ_16 T12.X, T11.X, 78, #3
|
||||
; EG-NEXT: Fetch clause starting at 58:
|
||||
; EG-NEXT: VTX_READ_16 T12.X, T11.X, 70, #3
|
||||
; EG-NEXT: Fetch clause starting at 60:
|
||||
; EG-NEXT: VTX_READ_16 T12.X, T11.X, 92, #3
|
||||
; EG-NEXT: Fetch clause starting at 62:
|
||||
; EG-NEXT: VTX_READ_16 T12.X, T11.X, 84, #3
|
||||
; EG-NEXT: Fetch clause starting at 64:
|
||||
; EG-NEXT: VTX_READ_16 T13.X, T11.X, 76, #3
|
||||
; EG-NEXT: Fetch clause starting at 66:
|
||||
; EG-NEXT: VTX_READ_16 T11.X, T11.X, 68, #3
|
||||
; EG-NEXT: ALU clause starting at 68:
|
||||
; EG-NEXT: MOV * T0.Y, T3.X,
|
||||
; EG-NEXT: MOV * T11.X, 0.0,
|
||||
; EG-NEXT: ALU clause starting at 70:
|
||||
; EG-NEXT: LSHL T0.W, T12.X, literal.x,
|
||||
; EG-NEXT: AND_INT * T1.W, T0.Y, literal.y,
|
||||
; EG-NEXT: 16(2.242078e-44), 65535(9.183409e-41)
|
||||
; EG-NEXT: OR_INT * T0.W, PS, PV.W,
|
||||
; EG-NEXT: MOV T3.X, PV.W,
|
||||
; EG-NEXT: MOV * T0.Y, T5.X,
|
||||
; EG-NEXT: ALU clause starting at 76:
|
||||
; EG-NEXT: AND_INT T0.W, T0.Y, literal.x,
|
||||
; EG-NEXT: LSHL * T1.W, T12.X, literal.y,
|
||||
; EG-NEXT: 65535(9.183409e-41), 16(2.242078e-44)
|
||||
; EG-NEXT: OR_INT * T0.W, PV.W, PS,
|
||||
; EG-NEXT: MOV T5.X, PV.W,
|
||||
; EG-NEXT: MOV * T0.Y, T7.X,
|
||||
; EG-NEXT: ALU clause starting at 82:
|
||||
; EG-NEXT: AND_INT T0.W, T0.Y, literal.x,
|
||||
; EG-NEXT: LSHL * T1.W, T12.X, literal.y,
|
||||
; EG-NEXT: 65535(9.183409e-41), 16(2.242078e-44)
|
||||
; EG-NEXT: OR_INT * T0.W, PV.W, PS,
|
||||
; EG-NEXT: MOV T7.X, PV.W,
|
||||
; EG-NEXT: MOV * T0.Y, T9.X,
|
||||
; EG-NEXT: ALU clause starting at 88:
|
||||
; EG-NEXT: AND_INT T0.W, T0.Y, literal.x,
|
||||
; EG-NEXT: LSHL * T1.W, T12.X, literal.y,
|
||||
; EG-NEXT: 65535(9.183409e-41), 16(2.242078e-44)
|
||||
; EG-NEXT: OR_INT * T0.W, PV.W, PS,
|
||||
; EG-NEXT: MOV T9.X, PV.W,
|
||||
; EG-NEXT: MOV * T0.Y, T3.X,
|
||||
; EG-NEXT: ALU clause starting at 94:
|
||||
; EG-NEXT: AND_INT T0.W, T0.Y, literal.x,
|
||||
; EG-NEXT: AND_INT * T1.W, T12.X, literal.y,
|
||||
; EG-NEXT: -65536(nan), 65535(9.183409e-41)
|
||||
; EG-NEXT: OR_INT * T0.W, PV.W, PS,
|
||||
; EG-NEXT: MOV T3.X, PV.W,
|
||||
; EG-NEXT: MOV * T0.Y, T5.X,
|
||||
; EG-NEXT: ALU clause starting at 100:
|
||||
; EG-NEXT: AND_INT T0.W, T0.Y, literal.x,
|
||||
; EG-NEXT: AND_INT * T1.W, T12.X, literal.y,
|
||||
; EG-NEXT: -65536(nan), 65535(9.183409e-41)
|
||||
; EG-NEXT: OR_INT * T0.W, PV.W, PS,
|
||||
; EG-NEXT: MOV T5.X, PV.W,
|
||||
; EG-NEXT: MOV * T0.Y, T7.X,
|
||||
; EG-NEXT: ALU clause starting at 106:
|
||||
; EG-NEXT: AND_INT T0.W, T0.Y, literal.x,
|
||||
; EG-NEXT: AND_INT * T1.W, T12.X, literal.y,
|
||||
; EG-NEXT: -65536(nan), 65535(9.183409e-41)
|
||||
; EG-NEXT: OR_INT * T0.W, PV.W, PS,
|
||||
; EG-NEXT: MOV T7.X, PV.W,
|
||||
; EG-NEXT: MOV * T0.Y, T9.X,
|
||||
; EG-NEXT: ALU clause starting at 112:
|
||||
; EG-NEXT: AND_INT T0.W, T0.Y, literal.x,
|
||||
; EG-NEXT: AND_INT * T1.W, T12.X, literal.y,
|
||||
; EG-NEXT: -65536(nan), 65535(9.183409e-41)
|
||||
; EG-NEXT: OR_INT * T0.W, PV.W, PS,
|
||||
; EG-NEXT: MOV T9.X, PV.W,
|
||||
; EG-NEXT: MOV * T0.Y, T2.X,
|
||||
; EG-NEXT: ALU clause starting at 118:
|
||||
; EG-NEXT: AND_INT T0.W, T0.Y, literal.x,
|
||||
; EG-NEXT: LSHL * T1.W, T12.X, literal.y,
|
||||
; EG-NEXT: 65535(9.183409e-41), 16(2.242078e-44)
|
||||
; EG-NEXT: OR_INT * T0.W, PV.W, PS,
|
||||
; EG-NEXT: MOV T2.X, PV.W,
|
||||
; EG-NEXT: MOV * T0.Y, T4.X,
|
||||
; EG-NEXT: ALU clause starting at 124:
|
||||
; EG-NEXT: AND_INT T0.W, T0.Y, literal.x,
|
||||
; EG-NEXT: LSHL * T1.W, T12.X, literal.y,
|
||||
; EG-NEXT: 65535(9.183409e-41), 16(2.242078e-44)
|
||||
; EG-NEXT: OR_INT * T0.W, PV.W, PS,
|
||||
; EG-NEXT: MOV T4.X, PV.W,
|
||||
; EG-NEXT: MOV * T0.Y, T6.X,
|
||||
; EG-NEXT: ALU clause starting at 130:
|
||||
; EG-NEXT: AND_INT T0.W, T0.Y, literal.x,
|
||||
; EG-NEXT: LSHL * T1.W, T12.X, literal.y,
|
||||
; EG-NEXT: 65535(9.183409e-41), 16(2.242078e-44)
|
||||
; EG-NEXT: OR_INT * T0.W, PV.W, PS,
|
||||
; EG-NEXT: MOV T6.X, PV.W,
|
||||
; EG-NEXT: MOV * T0.Y, T8.X,
|
||||
; EG-NEXT: ALU clause starting at 136:
|
||||
; EG-NEXT: AND_INT T0.W, T0.Y, literal.x,
|
||||
; EG-NEXT: LSHL * T1.W, T12.X, literal.y,
|
||||
; EG-NEXT: 65535(9.183409e-41), 16(2.242078e-44)
|
||||
; EG-NEXT: OR_INT * T0.W, PV.W, PS,
|
||||
; EG-NEXT: MOV T8.X, PV.W,
|
||||
; EG-NEXT: MOV * T0.Y, T2.X,
|
||||
; EG-NEXT: ALU clause starting at 142:
|
||||
; EG-NEXT: AND_INT T0.W, T0.Y, literal.x,
|
||||
; EG-NEXT: AND_INT * T1.W, T12.X, literal.y,
|
||||
; EG-NEXT: -65536(nan), 65535(9.183409e-41)
|
||||
; EG-NEXT: OR_INT * T12.Z, PV.W, PS,
|
||||
; EG-NEXT: MOV T2.X, PV.Z,
|
||||
; EG-NEXT: MOV * T0.Y, T4.X,
|
||||
; EG-NEXT: ALU clause starting at 148:
|
||||
; EG-NEXT: AND_INT T0.W, T0.Y, literal.x,
|
||||
; EG-NEXT: AND_INT * T1.W, T12.X, literal.y,
|
||||
; EG-NEXT: -65536(nan), 65535(9.183409e-41)
|
||||
; EG-NEXT: OR_INT * T12.X, PV.W, PS,
|
||||
; EG-NEXT: MOV T4.X, PV.X,
|
||||
; EG-NEXT: MOV * T0.Y, T6.X,
|
||||
; EG-NEXT: ALU clause starting at 154:
|
||||
; EG-NEXT: AND_INT T0.W, T0.Y, literal.x,
|
||||
; EG-NEXT: AND_INT * T1.W, T13.X, literal.y,
|
||||
; EG-NEXT: -65536(nan), 65535(9.183409e-41)
|
||||
; EG-NEXT: OR_INT * T11.Z, PV.W, PS,
|
||||
; EG-NEXT: MOV T6.X, PV.Z,
|
||||
; EG-NEXT: MOV * T0.Y, T8.X,
|
||||
; EG-NEXT: ALU clause starting at 160:
|
||||
; EG-NEXT: LSHR T13.X, KC0[2].Y, literal.x,
|
||||
; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y,
|
||||
; EG-NEXT: 2(2.802597e-45), 16(2.242078e-44)
|
||||
; EG-NEXT: LSHR T2.X, PV.W, literal.x,
|
||||
; EG-NEXT: MOV T7.W, T4.X,
|
||||
; EG-NEXT: MOV * T1.W, T3.X,
|
||||
; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
|
||||
; EG-NEXT: LSHR T14.X, PV.W, literal.x,
|
||||
; EG-NEXT: AND_INT T0.W, T0.Y, literal.y,
|
||||
; EG-NEXT: AND_INT * T1.W, T11.X, literal.z,
|
||||
; EG-NEXT: 2(2.802597e-45), -65536(nan)
|
||||
; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00)
|
||||
; EG-NEXT: OR_INT * T11.X, PV.W, PS,
|
||||
; EG-NEXT: MOV T8.X, PV.X,
|
||||
; EG-NEXT: MOV * T12.W, T3.X,
|
||||
; EG-NEXT: MOV T12.Y, T5.X,
|
||||
; EG-NEXT: MOV T11.W, T7.X, BS:VEC_120/SCL_212
|
||||
; EG-NEXT: MOV * T11.Y, T9.X,
|
||||
;
|
||||
; CM-LABEL: v16i16_arg:
|
||||
; CM: ; %bb.0: ; %entry
|
||||
; CM-NEXT: ALU 0, @22, KC0[], KC1[]
|
||||
; CM-NEXT: TEX 7 @6
|
||||
; CM-NEXT: ALU 11, @23, KC0[CB0:0-32], KC1[]
|
||||
; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T7, T2.X
|
||||
; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T1, T0.X
|
||||
; CM-NEXT: ALU 1, @68, KC0[], KC1[]
|
||||
; CM-NEXT: TEX 0 @36
|
||||
; CM-NEXT: ALU 5, @70, KC0[], KC1[]
|
||||
; CM-NEXT: TEX 0 @38
|
||||
; CM-NEXT: ALU 5, @76, KC0[], KC1[]
|
||||
; CM-NEXT: TEX 0 @40
|
||||
; CM-NEXT: ALU 5, @82, KC0[], KC1[]
|
||||
; CM-NEXT: TEX 0 @42
|
||||
; CM-NEXT: ALU 5, @88, KC0[], KC1[]
|
||||
; CM-NEXT: TEX 0 @44
|
||||
; CM-NEXT: ALU 5, @94, KC0[], KC1[]
|
||||
; CM-NEXT: TEX 0 @46
|
||||
; CM-NEXT: ALU 5, @100, KC0[], KC1[]
|
||||
; CM-NEXT: TEX 0 @48
|
||||
; CM-NEXT: ALU 5, @106, KC0[], KC1[]
|
||||
; CM-NEXT: TEX 0 @50
|
||||
; CM-NEXT: ALU 5, @112, KC0[], KC1[]
|
||||
; CM-NEXT: TEX 0 @52
|
||||
; CM-NEXT: ALU 5, @118, KC0[], KC1[]
|
||||
; CM-NEXT: TEX 0 @54
|
||||
; CM-NEXT: ALU 5, @124, KC0[], KC1[]
|
||||
; CM-NEXT: TEX 0 @56
|
||||
; CM-NEXT: ALU 5, @130, KC0[], KC1[]
|
||||
; CM-NEXT: TEX 0 @58
|
||||
; CM-NEXT: ALU 5, @136, KC0[], KC1[]
|
||||
; CM-NEXT: TEX 0 @60
|
||||
; CM-NEXT: ALU 5, @142, KC0[], KC1[]
|
||||
; CM-NEXT: TEX 0 @62
|
||||
; CM-NEXT: ALU 5, @148, KC0[], KC1[]
|
||||
; CM-NEXT: TEX 0 @64
|
||||
; CM-NEXT: ALU 5, @154, KC0[], KC1[]
|
||||
; CM-NEXT: TEX 0 @66
|
||||
; CM-NEXT: ALU 14, @160, KC0[CB0:0-32], KC1[]
|
||||
; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T11, T14.X
|
||||
; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T12, T13.X
|
||||
; CM-NEXT: CF_END
|
||||
; CM-NEXT: Fetch clause starting at 6:
|
||||
; CM-NEXT: VTX_READ_16 T1.X, T0.X, 84, #3
|
||||
; CM-NEXT: VTX_READ_16 T2.X, T0.X, 86, #3
|
||||
; CM-NEXT: VTX_READ_16 T3.X, T0.X, 78, #3
|
||||
; CM-NEXT: VTX_READ_16 T4.X, T0.X, 94, #3
|
||||
; CM-NEXT: VTX_READ_16 T5.X, T0.X, 76, #3
|
||||
; CM-NEXT: VTX_READ_16 T6.X, T0.X, 92, #3
|
||||
; CM-NEXT: VTX_READ_16 T7.X, T0.X, 68, #3
|
||||
; CM-NEXT: VTX_READ_16 T0.X, T0.X, 70, #3
|
||||
; CM-NEXT: ALU clause starting at 22:
|
||||
; CM-NEXT: MOV * T0.X, 0.0,
|
||||
; CM-NEXT: ALU clause starting at 23:
|
||||
; CM-NEXT: MOV * T1.Y, T2.X,
|
||||
; CM-NEXT: MOV T7.Y, T0.X,
|
||||
; CM-NEXT: MOV T1.Z, T6.X, BS:VEC_120/SCL_212
|
||||
; CM-NEXT: Fetch clause starting at 36:
|
||||
; CM-NEXT: VTX_READ_16 T12.X, T11.X, 98, #3
|
||||
; CM-NEXT: Fetch clause starting at 38:
|
||||
; CM-NEXT: VTX_READ_16 T12.X, T11.X, 90, #3
|
||||
; CM-NEXT: Fetch clause starting at 40:
|
||||
; CM-NEXT: VTX_READ_16 T12.X, T11.X, 82, #3
|
||||
; CM-NEXT: Fetch clause starting at 42:
|
||||
; CM-NEXT: VTX_READ_16 T12.X, T11.X, 74, #3
|
||||
; CM-NEXT: Fetch clause starting at 44:
|
||||
; CM-NEXT: VTX_READ_16 T12.X, T11.X, 96, #3
|
||||
; CM-NEXT: Fetch clause starting at 46:
|
||||
; CM-NEXT: VTX_READ_16 T12.X, T11.X, 88, #3
|
||||
; CM-NEXT: Fetch clause starting at 48:
|
||||
; CM-NEXT: VTX_READ_16 T12.X, T11.X, 80, #3
|
||||
; CM-NEXT: Fetch clause starting at 50:
|
||||
; CM-NEXT: VTX_READ_16 T12.X, T11.X, 72, #3
|
||||
; CM-NEXT: Fetch clause starting at 52:
|
||||
; CM-NEXT: VTX_READ_16 T12.X, T11.X, 94, #3
|
||||
; CM-NEXT: Fetch clause starting at 54:
|
||||
; CM-NEXT: VTX_READ_16 T12.X, T11.X, 86, #3
|
||||
; CM-NEXT: Fetch clause starting at 56:
|
||||
; CM-NEXT: VTX_READ_16 T12.X, T11.X, 78, #3
|
||||
; CM-NEXT: Fetch clause starting at 58:
|
||||
; CM-NEXT: VTX_READ_16 T12.X, T11.X, 70, #3
|
||||
; CM-NEXT: Fetch clause starting at 60:
|
||||
; CM-NEXT: VTX_READ_16 T12.X, T11.X, 92, #3
|
||||
; CM-NEXT: Fetch clause starting at 62:
|
||||
; CM-NEXT: VTX_READ_16 T12.X, T11.X, 84, #3
|
||||
; CM-NEXT: Fetch clause starting at 64:
|
||||
; CM-NEXT: VTX_READ_16 T13.X, T11.X, 76, #3
|
||||
; CM-NEXT: Fetch clause starting at 66:
|
||||
; CM-NEXT: VTX_READ_16 T11.X, T11.X, 68, #3
|
||||
; CM-NEXT: ALU clause starting at 68:
|
||||
; CM-NEXT: MOV * T0.Y, T3.X,
|
||||
; CM-NEXT: MOV * T11.X, 0.0,
|
||||
; CM-NEXT: ALU clause starting at 70:
|
||||
; CM-NEXT: LSHL T0.Z, T12.X, literal.x,
|
||||
; CM-NEXT: AND_INT * T0.W, T0.Y, literal.y,
|
||||
; CM-NEXT: 16(2.242078e-44), 65535(9.183409e-41)
|
||||
; CM-NEXT: OR_INT * T0.W, PV.W, PV.Z,
|
||||
; CM-NEXT: MOV T3.X, PV.W,
|
||||
; CM-NEXT: MOV * T0.Y, T5.X,
|
||||
; CM-NEXT: ALU clause starting at 76:
|
||||
; CM-NEXT: AND_INT T0.Z, T0.Y, literal.x,
|
||||
; CM-NEXT: LSHL * T0.W, T12.X, literal.y,
|
||||
; CM-NEXT: 65535(9.183409e-41), 16(2.242078e-44)
|
||||
; CM-NEXT: OR_INT * T0.W, PV.Z, PV.W,
|
||||
; CM-NEXT: MOV T5.X, PV.W,
|
||||
; CM-NEXT: MOV * T0.Y, T7.X,
|
||||
; CM-NEXT: ALU clause starting at 82:
|
||||
; CM-NEXT: AND_INT T0.Z, T0.Y, literal.x,
|
||||
; CM-NEXT: LSHL * T0.W, T12.X, literal.y,
|
||||
; CM-NEXT: 65535(9.183409e-41), 16(2.242078e-44)
|
||||
; CM-NEXT: OR_INT * T0.W, PV.Z, PV.W,
|
||||
; CM-NEXT: MOV T7.X, PV.W,
|
||||
; CM-NEXT: MOV * T0.Y, T9.X,
|
||||
; CM-NEXT: ALU clause starting at 88:
|
||||
; CM-NEXT: AND_INT T0.Z, T0.Y, literal.x,
|
||||
; CM-NEXT: LSHL * T0.W, T12.X, literal.y,
|
||||
; CM-NEXT: 65535(9.183409e-41), 16(2.242078e-44)
|
||||
; CM-NEXT: OR_INT * T0.W, PV.Z, PV.W,
|
||||
; CM-NEXT: MOV T9.X, PV.W,
|
||||
; CM-NEXT: MOV * T0.Y, T3.X,
|
||||
; CM-NEXT: ALU clause starting at 94:
|
||||
; CM-NEXT: AND_INT T0.Z, T0.Y, literal.x,
|
||||
; CM-NEXT: AND_INT * T0.W, T12.X, literal.y,
|
||||
; CM-NEXT: -65536(nan), 65535(9.183409e-41)
|
||||
; CM-NEXT: OR_INT * T0.W, PV.Z, PV.W,
|
||||
; CM-NEXT: MOV T3.X, PV.W,
|
||||
; CM-NEXT: MOV * T0.Y, T5.X,
|
||||
; CM-NEXT: ALU clause starting at 100:
|
||||
; CM-NEXT: AND_INT T0.Z, T0.Y, literal.x,
|
||||
; CM-NEXT: AND_INT * T0.W, T12.X, literal.y,
|
||||
; CM-NEXT: -65536(nan), 65535(9.183409e-41)
|
||||
; CM-NEXT: OR_INT * T0.W, PV.Z, PV.W,
|
||||
; CM-NEXT: MOV T5.X, PV.W,
|
||||
; CM-NEXT: MOV * T0.Y, T7.X,
|
||||
; CM-NEXT: ALU clause starting at 106:
|
||||
; CM-NEXT: AND_INT T0.Z, T0.Y, literal.x,
|
||||
; CM-NEXT: AND_INT * T0.W, T12.X, literal.y,
|
||||
; CM-NEXT: -65536(nan), 65535(9.183409e-41)
|
||||
; CM-NEXT: OR_INT * T0.W, PV.Z, PV.W,
|
||||
; CM-NEXT: MOV T7.X, PV.W,
|
||||
; CM-NEXT: MOV * T0.Y, T9.X,
|
||||
; CM-NEXT: ALU clause starting at 112:
|
||||
; CM-NEXT: AND_INT T0.Z, T0.Y, literal.x,
|
||||
; CM-NEXT: AND_INT * T0.W, T12.X, literal.y,
|
||||
; CM-NEXT: -65536(nan), 65535(9.183409e-41)
|
||||
; CM-NEXT: OR_INT * T0.W, PV.Z, PV.W,
|
||||
; CM-NEXT: MOV T9.X, PV.W,
|
||||
; CM-NEXT: MOV * T0.Y, T2.X,
|
||||
; CM-NEXT: ALU clause starting at 118:
|
||||
; CM-NEXT: AND_INT T0.Z, T0.Y, literal.x,
|
||||
; CM-NEXT: LSHL * T0.W, T12.X, literal.y,
|
||||
; CM-NEXT: 65535(9.183409e-41), 16(2.242078e-44)
|
||||
; CM-NEXT: OR_INT * T0.W, PV.Z, PV.W,
|
||||
; CM-NEXT: MOV T2.X, PV.W,
|
||||
; CM-NEXT: MOV * T0.Y, T4.X,
|
||||
; CM-NEXT: ALU clause starting at 124:
|
||||
; CM-NEXT: AND_INT T0.Z, T0.Y, literal.x,
|
||||
; CM-NEXT: LSHL * T0.W, T12.X, literal.y,
|
||||
; CM-NEXT: 65535(9.183409e-41), 16(2.242078e-44)
|
||||
; CM-NEXT: OR_INT * T0.W, PV.Z, PV.W,
|
||||
; CM-NEXT: MOV T4.X, PV.W,
|
||||
; CM-NEXT: MOV * T0.Y, T6.X,
|
||||
; CM-NEXT: ALU clause starting at 130:
|
||||
; CM-NEXT: AND_INT T0.Z, T0.Y, literal.x,
|
||||
; CM-NEXT: LSHL * T0.W, T12.X, literal.y,
|
||||
; CM-NEXT: 65535(9.183409e-41), 16(2.242078e-44)
|
||||
; CM-NEXT: OR_INT * T0.W, PV.Z, PV.W,
|
||||
; CM-NEXT: MOV T6.X, PV.W,
|
||||
; CM-NEXT: MOV * T0.Y, T8.X,
|
||||
; CM-NEXT: ALU clause starting at 136:
|
||||
; CM-NEXT: AND_INT T0.Z, T0.Y, literal.x,
|
||||
; CM-NEXT: LSHL * T0.W, T12.X, literal.y,
|
||||
; CM-NEXT: 65535(9.183409e-41), 16(2.242078e-44)
|
||||
; CM-NEXT: OR_INT * T0.W, PV.Z, PV.W,
|
||||
; CM-NEXT: MOV T8.X, PV.W,
|
||||
; CM-NEXT: MOV * T0.Y, T2.X,
|
||||
; CM-NEXT: ALU clause starting at 142:
|
||||
; CM-NEXT: AND_INT T0.Z, T0.Y, literal.x,
|
||||
; CM-NEXT: AND_INT * T0.W, T12.X, literal.y,
|
||||
; CM-NEXT: -65536(nan), 65535(9.183409e-41)
|
||||
; CM-NEXT: OR_INT * T12.Z, PV.Z, PV.W,
|
||||
; CM-NEXT: MOV T2.X, PV.Z,
|
||||
; CM-NEXT: MOV * T0.Y, T4.X,
|
||||
; CM-NEXT: ALU clause starting at 148:
|
||||
; CM-NEXT: AND_INT T0.Z, T0.Y, literal.x,
|
||||
; CM-NEXT: AND_INT * T0.W, T12.X, literal.y,
|
||||
; CM-NEXT: -65536(nan), 65535(9.183409e-41)
|
||||
; CM-NEXT: OR_INT * T12.X, PV.Z, PV.W,
|
||||
; CM-NEXT: MOV T4.X, PV.X,
|
||||
; CM-NEXT: MOV * T0.Y, T6.X,
|
||||
; CM-NEXT: ALU clause starting at 154:
|
||||
; CM-NEXT: AND_INT T0.Z, T0.Y, literal.x,
|
||||
; CM-NEXT: AND_INT * T0.W, T13.X, literal.y,
|
||||
; CM-NEXT: -65536(nan), 65535(9.183409e-41)
|
||||
; CM-NEXT: OR_INT * T11.Z, PV.Z, PV.W,
|
||||
; CM-NEXT: MOV T6.X, PV.Z,
|
||||
; CM-NEXT: MOV * T0.Y, T8.X,
|
||||
; CM-NEXT: ALU clause starting at 160:
|
||||
; CM-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.x,
|
||||
; CM-NEXT: 16(2.242078e-44), 0(0.000000e+00)
|
||||
; CM-NEXT: LSHR T0.X, PV.W, literal.x,
|
||||
; CM-NEXT: MOV T7.Z, T5.X,
|
||||
; CM-NEXT: MOV * T1.W, T4.X, BS:VEC_120/SCL_212
|
||||
; CM-NEXT: 2(2.802597e-45), 0(0.000000e+00)
|
||||
; CM-NEXT: LSHR T2.X, KC0[2].Y, literal.x,
|
||||
; CM-NEXT: MOV * T7.W, T3.X,
|
||||
; CM-NEXT: LSHR * T13.X, PV.W, literal.x,
|
||||
; CM-NEXT: 2(2.802597e-45), 0(0.000000e+00)
|
||||
; CM-NEXT: LSHR T14.X, KC0[2].Y, literal.x,
|
||||
; CM-NEXT: AND_INT T0.Z, T0.Y, literal.y,
|
||||
; CM-NEXT: AND_INT * T0.W, T11.X, literal.z,
|
||||
; CM-NEXT: 2(2.802597e-45), -65536(nan)
|
||||
; CM-NEXT: 65535(9.183409e-41), 0(0.000000e+00)
|
||||
; CM-NEXT: OR_INT * T11.X, PV.Z, PV.W,
|
||||
; CM-NEXT: MOV T8.X, PV.X,
|
||||
; CM-NEXT: MOV * T12.W, T3.X,
|
||||
; CM-NEXT: MOV T12.Y, T5.X,
|
||||
; CM-NEXT: MOV * T11.W, T7.X, BS:VEC_120/SCL_212
|
||||
; CM-NEXT: MOV * T11.Y, T9.X,
|
||||
entry:
|
||||
store <16 x i16> %in, ptr addrspace(1) %out
|
||||
ret void
|
||||
|
||||
@ -232,32 +232,38 @@ define amdgpu_kernel void @constant_load_v3i16(ptr addrspace(1) %out, ptr addrsp
|
||||
;
|
||||
; EG-LABEL: constant_load_v3i16:
|
||||
; EG: ; %bb.0: ; %entry
|
||||
; EG-NEXT: ALU 0, @10, KC0[CB0:0-32], KC1[]
|
||||
; EG-NEXT: TEX 1 @6
|
||||
; EG-NEXT: ALU 14, @11, KC0[CB0:0-32], KC1[]
|
||||
; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T1.X, T3.X, 0
|
||||
; EG-NEXT: MEM_RAT MSKOR T2.XW, T0.X
|
||||
; EG-NEXT: ALU 0, @12, KC0[CB0:0-32], KC1[]
|
||||
; EG-NEXT: TEX 2 @6
|
||||
; EG-NEXT: ALU 19, @13, KC0[CB0:0-32], KC1[]
|
||||
; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T6.X, T7.X, 0
|
||||
; EG-NEXT: MEM_RAT MSKOR T5.XW, T8.X
|
||||
; EG-NEXT: CF_END
|
||||
; EG-NEXT: Fetch clause starting at 6:
|
||||
; EG-NEXT: VTX_READ_16 T1.X, T0.X, 0, #1
|
||||
; EG-NEXT: VTX_READ_16 T0.X, T0.X, 4, #1
|
||||
; EG-NEXT: ALU clause starting at 10:
|
||||
; EG-NEXT: MOV * T0.X, KC0[2].Z,
|
||||
; EG-NEXT: ALU clause starting at 11:
|
||||
; EG-NEXT: VTX_READ_16 T6.X, T5.X, 0, #1
|
||||
; EG-NEXT: VTX_READ_16 T7.X, T5.X, 2, #1
|
||||
; EG-NEXT: VTX_READ_16 T5.X, T5.X, 4, #1
|
||||
; EG-NEXT: ALU clause starting at 12:
|
||||
; EG-NEXT: MOV * T5.X, KC0[2].Z,
|
||||
; EG-NEXT: ALU clause starting at 13:
|
||||
; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.x,
|
||||
; EG-NEXT: 4(5.605194e-45), 0(0.000000e+00)
|
||||
; EG-NEXT: AND_INT T1.W, PV.W, literal.x,
|
||||
; EG-NEXT: AND_INT * T2.W, T0.X, literal.y,
|
||||
; EG-NEXT: AND_INT * T2.W, T5.X, literal.y,
|
||||
; EG-NEXT: 3(4.203895e-45), 65535(9.183409e-41)
|
||||
; EG-NEXT: LSHL * T1.W, PV.W, literal.x,
|
||||
; EG-NEXT: 3(4.203895e-45), 0(0.000000e+00)
|
||||
; EG-NEXT: LSHL T2.X, T2.W, PV.W,
|
||||
; EG-NEXT: LSHL * T2.W, literal.x, PV.W,
|
||||
; EG-NEXT: LSHL T5.X, T2.W, PV.W,
|
||||
; EG-NEXT: LSHL * T5.W, literal.x, PV.W,
|
||||
; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00)
|
||||
; EG-NEXT: MOV T2.Y, 0.0,
|
||||
; EG-NEXT: MOV * T2.Z, 0.0,
|
||||
; EG-NEXT: LSHR T0.X, T0.W, literal.x,
|
||||
; EG-NEXT: LSHR * T3.X, KC0[2].Y, literal.x,
|
||||
; EG-NEXT: MOV T5.Y, 0.0,
|
||||
; EG-NEXT: MOV * T5.Z, 0.0,
|
||||
; EG-NEXT: LSHR T8.X, T0.W, literal.x,
|
||||
; EG-NEXT: LSHL T0.W, T7.X, literal.y,
|
||||
; EG-NEXT: AND_INT * T1.W, T6.X, literal.z,
|
||||
; EG-NEXT: 2(2.802597e-45), 16(2.242078e-44)
|
||||
; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00)
|
||||
; EG-NEXT: OR_INT T6.X, PV.W, PS,
|
||||
; EG-NEXT: LSHR * T7.X, KC0[2].Y, literal.x,
|
||||
; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
|
||||
;
|
||||
; GFX12-LABEL: constant_load_v3i16:
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@ -263,63 +263,74 @@ define amdgpu_kernel void @global_load_v3i16(ptr addrspace(1) %out, ptr addrspac
|
||||
;
|
||||
; EG-LABEL: global_load_v3i16:
|
||||
; EG: ; %bb.0: ; %entry
|
||||
; EG-NEXT: ALU 0, @10, KC0[CB0:0-32], KC1[]
|
||||
; EG-NEXT: TEX 1 @6
|
||||
; EG-NEXT: ALU 14, @11, KC0[CB0:0-32], KC1[]
|
||||
; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T1.X, T3.X, 0
|
||||
; EG-NEXT: MEM_RAT MSKOR T2.XW, T0.X
|
||||
; EG-NEXT: ALU 0, @12, KC0[CB0:0-32], KC1[]
|
||||
; EG-NEXT: TEX 2 @6
|
||||
; EG-NEXT: ALU 19, @13, KC0[CB0:0-32], KC1[]
|
||||
; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T6.X, T7.X, 0
|
||||
; EG-NEXT: MEM_RAT MSKOR T5.XW, T8.X
|
||||
; EG-NEXT: CF_END
|
||||
; EG-NEXT: Fetch clause starting at 6:
|
||||
; EG-NEXT: VTX_READ_16 T1.X, T0.X, 0, #1
|
||||
; EG-NEXT: VTX_READ_16 T0.X, T0.X, 4, #1
|
||||
; EG-NEXT: ALU clause starting at 10:
|
||||
; EG-NEXT: MOV * T0.X, KC0[2].Z,
|
||||
; EG-NEXT: ALU clause starting at 11:
|
||||
; EG-NEXT: VTX_READ_16 T6.X, T5.X, 0, #1
|
||||
; EG-NEXT: VTX_READ_16 T7.X, T5.X, 2, #1
|
||||
; EG-NEXT: VTX_READ_16 T5.X, T5.X, 4, #1
|
||||
; EG-NEXT: ALU clause starting at 12:
|
||||
; EG-NEXT: MOV * T5.X, KC0[2].Z,
|
||||
; EG-NEXT: ALU clause starting at 13:
|
||||
; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.x,
|
||||
; EG-NEXT: 4(5.605194e-45), 0(0.000000e+00)
|
||||
; EG-NEXT: AND_INT T1.W, PV.W, literal.x,
|
||||
; EG-NEXT: AND_INT * T2.W, T0.X, literal.y,
|
||||
; EG-NEXT: AND_INT * T2.W, T5.X, literal.y,
|
||||
; EG-NEXT: 3(4.203895e-45), 65535(9.183409e-41)
|
||||
; EG-NEXT: LSHL * T1.W, PV.W, literal.x,
|
||||
; EG-NEXT: 3(4.203895e-45), 0(0.000000e+00)
|
||||
; EG-NEXT: LSHL T2.X, T2.W, PV.W,
|
||||
; EG-NEXT: LSHL * T2.W, literal.x, PV.W,
|
||||
; EG-NEXT: LSHL T5.X, T2.W, PV.W,
|
||||
; EG-NEXT: LSHL * T5.W, literal.x, PV.W,
|
||||
; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00)
|
||||
; EG-NEXT: MOV T2.Y, 0.0,
|
||||
; EG-NEXT: MOV * T2.Z, 0.0,
|
||||
; EG-NEXT: LSHR T0.X, T0.W, literal.x,
|
||||
; EG-NEXT: LSHR * T3.X, KC0[2].Y, literal.x,
|
||||
; EG-NEXT: MOV T5.Y, 0.0,
|
||||
; EG-NEXT: MOV * T5.Z, 0.0,
|
||||
; EG-NEXT: LSHR T8.X, T0.W, literal.x,
|
||||
; EG-NEXT: LSHL T0.W, T7.X, literal.y,
|
||||
; EG-NEXT: AND_INT * T1.W, T6.X, literal.z,
|
||||
; EG-NEXT: 2(2.802597e-45), 16(2.242078e-44)
|
||||
; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00)
|
||||
; EG-NEXT: OR_INT T6.X, PV.W, PS,
|
||||
; EG-NEXT: LSHR * T7.X, KC0[2].Y, literal.x,
|
||||
; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
|
||||
;
|
||||
; CM-LABEL: global_load_v3i16:
|
||||
; CM: ; %bb.0: ; %entry
|
||||
; CM-NEXT: ALU 0, @10, KC0[CB0:0-32], KC1[]
|
||||
; CM-NEXT: TEX 1 @6
|
||||
; CM-NEXT: ALU 15, @11, KC0[CB0:0-32], KC1[]
|
||||
; CM-NEXT: MEM_RAT MSKOR T2.XW, T3.X
|
||||
; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T1.X, T0.X
|
||||
; CM-NEXT: ALU 0, @12, KC0[CB0:0-32], KC1[]
|
||||
; CM-NEXT: TEX 2 @6
|
||||
; CM-NEXT: ALU 19, @13, KC0[CB0:0-32], KC1[]
|
||||
; CM-NEXT: MEM_RAT MSKOR T5.XW, T8.X
|
||||
; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T6.X, T7.X
|
||||
; CM-NEXT: CF_END
|
||||
; CM-NEXT: Fetch clause starting at 6:
|
||||
; CM-NEXT: VTX_READ_16 T1.X, T0.X, 0, #1
|
||||
; CM-NEXT: VTX_READ_16 T0.X, T0.X, 4, #1
|
||||
; CM-NEXT: ALU clause starting at 10:
|
||||
; CM-NEXT: MOV * T0.X, KC0[2].Z,
|
||||
; CM-NEXT: ALU clause starting at 11:
|
||||
; CM-NEXT: VTX_READ_16 T6.X, T5.X, 0, #1
|
||||
; CM-NEXT: VTX_READ_16 T7.X, T5.X, 2, #1
|
||||
; CM-NEXT: VTX_READ_16 T5.X, T5.X, 4, #1
|
||||
; CM-NEXT: ALU clause starting at 12:
|
||||
; CM-NEXT: MOV * T5.X, KC0[2].Z,
|
||||
; CM-NEXT: ALU clause starting at 13:
|
||||
; CM-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.x,
|
||||
; CM-NEXT: 4(5.605194e-45), 0(0.000000e+00)
|
||||
; CM-NEXT: AND_INT * T1.W, PV.W, literal.x,
|
||||
; CM-NEXT: 3(4.203895e-45), 0(0.000000e+00)
|
||||
; CM-NEXT: AND_INT T0.Z, T0.X, literal.x,
|
||||
; CM-NEXT: AND_INT T0.Z, T5.X, literal.x,
|
||||
; CM-NEXT: LSHL * T1.W, PV.W, literal.y,
|
||||
; CM-NEXT: 65535(9.183409e-41), 3(4.203895e-45)
|
||||
; CM-NEXT: LSHL T2.X, PV.Z, PV.W,
|
||||
; CM-NEXT: LSHL * T2.W, literal.x, PV.W,
|
||||
; CM-NEXT: LSHL T5.X, PV.Z, PV.W,
|
||||
; CM-NEXT: LSHL * T5.W, literal.x, PV.W,
|
||||
; CM-NEXT: 65535(9.183409e-41), 0(0.000000e+00)
|
||||
; CM-NEXT: MOV T2.Y, 0.0,
|
||||
; CM-NEXT: MOV * T2.Z, 0.0,
|
||||
; CM-NEXT: LSHR * T0.X, KC0[2].Y, literal.x,
|
||||
; CM-NEXT: MOV T5.Y, 0.0,
|
||||
; CM-NEXT: MOV * T5.Z, 0.0,
|
||||
; CM-NEXT: LSHL T0.Z, T7.X, literal.x,
|
||||
; CM-NEXT: AND_INT * T1.W, T6.X, literal.y, BS:VEC_120/SCL_212
|
||||
; CM-NEXT: 16(2.242078e-44), 65535(9.183409e-41)
|
||||
; CM-NEXT: OR_INT * T6.X, PV.Z, PV.W,
|
||||
; CM-NEXT: LSHR * T7.X, KC0[2].Y, literal.x,
|
||||
; CM-NEXT: 2(2.802597e-45), 0(0.000000e+00)
|
||||
; CM-NEXT: LSHR * T3.X, T0.W, literal.x,
|
||||
; CM-NEXT: LSHR * T8.X, T0.W, literal.x,
|
||||
; CM-NEXT: 2(2.802597e-45), 0(0.000000e+00)
|
||||
entry:
|
||||
%ld = load <3 x i16>, ptr addrspace(1) %in
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@ -151,19 +151,27 @@ define amdgpu_kernel void @local_load_v3i16(ptr addrspace(3) %out, ptr addrspace
|
||||
;
|
||||
; EG-LABEL: local_load_v3i16:
|
||||
; EG: ; %bb.0: ; %entry
|
||||
; EG-NEXT: ALU 11, @2, KC0[CB0:0-32], KC1[]
|
||||
; EG-NEXT: ADD_INT * T0.W, KC0[2].Z, literal.x,
|
||||
; EG-NEXT: 4(5.605194e-45), 0(0.000000e+00)
|
||||
; EG-NEXT: LDS_USHORT_READ_RET * OQAP, T0.W
|
||||
; EG-NEXT: MOV T0.X, OQAP,
|
||||
; EG-NEXT: ALU 19, @2, KC0[CB0:0-32], KC1[]
|
||||
; EG-NEXT: MOV * T0.W, KC0[2].Z,
|
||||
; EG-NEXT: LDS_USHORT_READ_RET * OQAP, T0.W
|
||||
; EG-NEXT: MOV T0.Y, OQAP,
|
||||
; EG-NEXT: MOV * T0.W, KC0[2].Y,
|
||||
; EG-NEXT: LDS_WRITE * T0.W, T0.Y,
|
||||
; EG-NEXT: ADD_INT * T0.W, KC0[2].Z, literal.x,
|
||||
; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
|
||||
; EG-NEXT: LDS_USHORT_READ_RET * OQAP, T0.W
|
||||
; EG-NEXT: MOV * T0.Z, OQAP,
|
||||
; EG-NEXT: LSHL T0.Z, PV.Z, literal.x,
|
||||
; EG-NEXT: AND_INT T0.W, T0.Y, literal.y,
|
||||
; EG-NEXT: ADD_INT * T1.W, KC0[2].Z, literal.z,
|
||||
; EG-NEXT: 16(2.242078e-44), 65535(9.183409e-41)
|
||||
; EG-NEXT: 4(5.605194e-45), 0(0.000000e+00)
|
||||
; EG-NEXT: LDS_USHORT_READ_RET * OQAP, T1.W
|
||||
; EG-NEXT: MOV T0.Y, OQAP,
|
||||
; EG-NEXT: OR_INT T0.W, T0.Z, T0.W,
|
||||
; EG-NEXT: MOV * T1.W, KC0[2].Y,
|
||||
; EG-NEXT: LDS_WRITE * T1.W, T0.W,
|
||||
; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.x,
|
||||
; EG-NEXT: 4(5.605194e-45), 0(0.000000e+00)
|
||||
; EG-NEXT: LDS_SHORT_WRITE * T0.W, T0.X,
|
||||
; EG-NEXT: LDS_SHORT_WRITE * T0.W, T0.Y,
|
||||
; EG-NEXT: RETURN
|
||||
entry:
|
||||
%ld = load <3 x i16>, ptr addrspace(3) %in
|
||||
|
||||
@ -991,30 +991,81 @@ define amdgpu_kernel void @s_test_imin_sle_v2i16(ptr addrspace(1) %out, <2 x i16
|
||||
define amdgpu_kernel void @s_test_imin_sle_v4i16(ptr addrspace(1) %out, <4 x i16> %a, <4 x i16> %b) #0 {
|
||||
; EG-LABEL: s_test_imin_sle_v4i16:
|
||||
; EG: ; %bb.0:
|
||||
; EG-NEXT: ALU 0, @14, KC0[], KC1[]
|
||||
; EG-NEXT: TEX 3 @6
|
||||
; EG-NEXT: ALU 9, @15, KC0[CB0:0-32], KC1[]
|
||||
; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.XY, T1.X, 1
|
||||
; EG-NEXT: ALU 1, @28, KC0[], KC1[]
|
||||
; EG-NEXT: TEX 1 @12
|
||||
; EG-NEXT: ALU 9, @30, KC0[], KC1[]
|
||||
; EG-NEXT: TEX 1 @16
|
||||
; EG-NEXT: ALU 10, @40, KC0[], KC1[]
|
||||
; EG-NEXT: TEX 1 @20
|
||||
; EG-NEXT: ALU 10, @51, KC0[], KC1[]
|
||||
; EG-NEXT: TEX 1 @24
|
||||
; EG-NEXT: ALU 11, @62, KC0[CB0:0-32], KC1[]
|
||||
; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T6.XY, T5.X, 1
|
||||
; EG-NEXT: CF_END
|
||||
; EG-NEXT: PAD
|
||||
; EG-NEXT: Fetch clause starting at 6:
|
||||
; EG-NEXT: VTX_READ_16 T1.X, T0.X, 46, #3
|
||||
; EG-NEXT: VTX_READ_16 T2.X, T0.X, 52, #3
|
||||
; EG-NEXT: VTX_READ_16 T3.X, T0.X, 44, #3
|
||||
; EG-NEXT: VTX_READ_16 T0.X, T0.X, 54, #3
|
||||
; EG-NEXT: ALU clause starting at 14:
|
||||
; EG-NEXT: MOV * T0.X, 0.0,
|
||||
; EG-NEXT: ALU clause starting at 15:
|
||||
; EG-NEXT: BFE_INT T0.Z, T1.X, 0.0, literal.x,
|
||||
; EG-NEXT: BFE_INT * T0.W, T0.X, 0.0, literal.x, BS:VEC_120/SCL_212
|
||||
; EG-NEXT: Fetch clause starting at 12:
|
||||
; EG-NEXT: VTX_READ_16 T6.X, T5.X, 50, #3
|
||||
; EG-NEXT: VTX_READ_16 T7.X, T5.X, 58, #3
|
||||
; EG-NEXT: Fetch clause starting at 16:
|
||||
; EG-NEXT: VTX_READ_16 T6.X, T5.X, 48, #3
|
||||
; EG-NEXT: VTX_READ_16 T7.X, T5.X, 56, #3
|
||||
; EG-NEXT: Fetch clause starting at 20:
|
||||
; EG-NEXT: VTX_READ_16 T6.X, T5.X, 46, #3
|
||||
; EG-NEXT: VTX_READ_16 T7.X, T5.X, 54, #3
|
||||
; EG-NEXT: Fetch clause starting at 24:
|
||||
; EG-NEXT: VTX_READ_16 T6.X, T5.X, 44, #3
|
||||
; EG-NEXT: VTX_READ_16 T5.X, T5.X, 52, #3
|
||||
; EG-NEXT: ALU clause starting at 28:
|
||||
; EG-NEXT: MOV * T0.Y, T3.X,
|
||||
; EG-NEXT: MOV * T5.X, 0.0,
|
||||
; EG-NEXT: ALU clause starting at 30:
|
||||
; EG-NEXT: BFE_INT T0.Z, T6.X, 0.0, literal.x,
|
||||
; EG-NEXT: BFE_INT * T0.W, T7.X, 0.0, literal.x, BS:VEC_120/SCL_212
|
||||
; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00)
|
||||
; EG-NEXT: MIN_INT T0.Y, PV.Z, PV.W,
|
||||
; EG-NEXT: BFE_INT T0.Z, T3.X, 0.0, literal.x,
|
||||
; EG-NEXT: BFE_INT * T0.W, T2.X, 0.0, literal.x, BS:VEC_120/SCL_212
|
||||
; EG-NEXT: MIN_INT * T0.W, PV.Z, PV.W,
|
||||
; EG-NEXT: LSHL T0.W, PV.W, literal.x,
|
||||
; EG-NEXT: AND_INT * T1.W, T0.Y, literal.y,
|
||||
; EG-NEXT: 16(2.242078e-44), 65535(9.183409e-41)
|
||||
; EG-NEXT: OR_INT * T0.W, PS, PV.W,
|
||||
; EG-NEXT: MOV * T3.X, PV.W,
|
||||
; EG-NEXT: MOV * T0.Y, PV.X,
|
||||
; EG-NEXT: ALU clause starting at 40:
|
||||
; EG-NEXT: BFE_INT T0.Z, T6.X, 0.0, literal.x,
|
||||
; EG-NEXT: BFE_INT * T0.W, T7.X, 0.0, literal.x, BS:VEC_120/SCL_212
|
||||
; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00)
|
||||
; EG-NEXT: MIN_INT T0.X, PV.Z, PV.W,
|
||||
; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.x,
|
||||
; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
|
||||
; EG-NEXT: MIN_INT T0.W, PV.Z, PV.W,
|
||||
; EG-NEXT: AND_INT * T1.W, T0.Y, literal.x,
|
||||
; EG-NEXT: -65536(nan), 0(0.000000e+00)
|
||||
; EG-NEXT: AND_INT * T0.W, PV.W, literal.x,
|
||||
; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00)
|
||||
; EG-NEXT: OR_INT * T0.W, T1.W, PV.W,
|
||||
; EG-NEXT: MOV T3.X, PV.W,
|
||||
; EG-NEXT: MOV * T0.Y, T2.X,
|
||||
; EG-NEXT: ALU clause starting at 51:
|
||||
; EG-NEXT: BFE_INT T0.Z, T6.X, 0.0, literal.x,
|
||||
; EG-NEXT: BFE_INT * T0.W, T7.X, 0.0, literal.x, BS:VEC_120/SCL_212
|
||||
; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00)
|
||||
; EG-NEXT: MIN_INT T0.W, PV.Z, PV.W,
|
||||
; EG-NEXT: AND_INT * T1.W, T0.Y, literal.x,
|
||||
; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00)
|
||||
; EG-NEXT: LSHL * T0.W, PV.W, literal.x,
|
||||
; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00)
|
||||
; EG-NEXT: OR_INT * T0.W, T1.W, PV.W,
|
||||
; EG-NEXT: MOV * T2.X, PV.W,
|
||||
; EG-NEXT: MOV * T0.Y, PV.X,
|
||||
; EG-NEXT: ALU clause starting at 62:
|
||||
; EG-NEXT: BFE_INT T0.Z, T6.X, 0.0, literal.x,
|
||||
; EG-NEXT: BFE_INT * T0.W, T5.X, 0.0, literal.x, BS:VEC_120/SCL_212
|
||||
; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00)
|
||||
; EG-NEXT: MIN_INT * T0.W, PV.Z, PV.W,
|
||||
; EG-NEXT: LSHR T5.X, KC0[2].Y, literal.x,
|
||||
; EG-NEXT: AND_INT T1.W, T0.Y, literal.y,
|
||||
; EG-NEXT: AND_INT * T0.W, PV.W, literal.z,
|
||||
; EG-NEXT: 2(2.802597e-45), -65536(nan)
|
||||
; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00)
|
||||
; EG-NEXT: OR_INT * T6.X, PV.W, PS,
|
||||
; EG-NEXT: MOV T2.X, PV.X,
|
||||
; EG-NEXT: MOV * T6.Y, T3.X,
|
||||
;
|
||||
; CI-LABEL: s_test_imin_sle_v4i16:
|
||||
; CI: ; %bb.0:
|
||||
@ -2154,40 +2205,49 @@ define amdgpu_kernel void @v_test_umin_ule_v3i32(ptr addrspace(1) %out, ptr addr
|
||||
define amdgpu_kernel void @v_test_umin_ule_v3i16(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr, ptr addrspace(1) %b.ptr) #0 {
|
||||
; EG-LABEL: v_test_umin_ule_v3i16:
|
||||
; EG: ; %bb.0:
|
||||
; EG-NEXT: ALU 3, @14, KC0[CB0:0-32], KC1[]
|
||||
; EG-NEXT: TEX 3 @6
|
||||
; EG-NEXT: ALU 17, @18, KC0[CB0:0-32], KC1[]
|
||||
; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T1.X, T2.X, 0
|
||||
; EG-NEXT: MEM_RAT MSKOR T4.XW, T0.X
|
||||
; EG-NEXT: ALU 3, @20, KC0[CB0:0-32], KC1[]
|
||||
; EG-NEXT: TEX 1 @8
|
||||
; EG-NEXT: ALU 11, @24, KC0[CB0:0-32], KC1[]
|
||||
; EG-NEXT: TEX 3 @12
|
||||
; EG-NEXT: ALU 8, @36, KC0[], KC1[]
|
||||
; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T6.X, T8.X, 0
|
||||
; EG-NEXT: MEM_RAT MSKOR T7.XW, T0.X
|
||||
; EG-NEXT: CF_END
|
||||
; EG-NEXT: Fetch clause starting at 6:
|
||||
; EG-NEXT: VTX_READ_16 T2.X, T1.X, 0, #1
|
||||
; EG-NEXT: VTX_READ_16 T3.X, T0.X, 0, #1
|
||||
; EG-NEXT: VTX_READ_16 T1.X, T1.X, 4, #1
|
||||
; EG-NEXT: VTX_READ_16 T0.X, T0.X, 4, #1
|
||||
; EG-NEXT: ALU clause starting at 14:
|
||||
; EG-NEXT: Fetch clause starting at 8:
|
||||
; EG-NEXT: VTX_READ_16 T7.X, T6.X, 4, #1
|
||||
; EG-NEXT: VTX_READ_16 T8.X, T0.X, 4, #1
|
||||
; EG-NEXT: Fetch clause starting at 12:
|
||||
; EG-NEXT: VTX_READ_16 T8.X, T6.X, 0, #1
|
||||
; EG-NEXT: VTX_READ_16 T9.X, T0.X, 0, #1
|
||||
; EG-NEXT: VTX_READ_16 T6.X, T6.X, 2, #1
|
||||
; EG-NEXT: VTX_READ_16 T0.X, T0.X, 2, #1
|
||||
; EG-NEXT: ALU clause starting at 20:
|
||||
; EG-NEXT: LSHL * T0.W, T0.X, literal.x,
|
||||
; EG-NEXT: 3(4.203895e-45), 0(0.000000e+00)
|
||||
; EG-NEXT: ADD_INT T0.X, KC0[2].Z, PV.W,
|
||||
; EG-NEXT: ADD_INT * T1.X, KC0[2].W, PV.W,
|
||||
; EG-NEXT: ALU clause starting at 18:
|
||||
; EG-NEXT: ADD_INT * T6.X, KC0[2].W, PV.W,
|
||||
; EG-NEXT: ALU clause starting at 24:
|
||||
; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, T0.W,
|
||||
; EG-NEXT: ADD_INT * T1.W, PV.W, literal.x,
|
||||
; EG-NEXT: 4(5.605194e-45), 0(0.000000e+00)
|
||||
; EG-NEXT: AND_INT * T2.W, PV.W, literal.x,
|
||||
; EG-NEXT: 3(4.203895e-45), 0(0.000000e+00)
|
||||
; EG-NEXT: LSHL T2.W, PV.W, literal.x,
|
||||
; EG-NEXT: MIN_UINT * T3.W, T0.X, T1.X,
|
||||
; EG-NEXT: MIN_UINT * T3.W, T8.X, T7.X,
|
||||
; EG-NEXT: 3(4.203895e-45), 0(0.000000e+00)
|
||||
; EG-NEXT: LSHL T4.X, PS, PV.W,
|
||||
; EG-NEXT: LSHL * T4.W, literal.x, PV.W,
|
||||
; EG-NEXT: LSHL T7.X, PS, PV.W,
|
||||
; EG-NEXT: LSHL * T7.W, literal.x, PV.W,
|
||||
; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00)
|
||||
; EG-NEXT: MOV T4.Y, 0.0,
|
||||
; EG-NEXT: MOV * T4.Z, 0.0,
|
||||
; EG-NEXT: MOV * T7.Y, 0.0,
|
||||
; EG-NEXT: ALU clause starting at 36:
|
||||
; EG-NEXT: MOV T7.Z, 0.0,
|
||||
; EG-NEXT: MIN_UINT * T2.W, T0.X, T6.X,
|
||||
; EG-NEXT: LSHR T0.X, T1.W, literal.x,
|
||||
; EG-NEXT: MIN_UINT * T1.X, T3.X, T2.X,
|
||||
; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
|
||||
; EG-NEXT: LSHR * T2.X, T0.W, literal.x,
|
||||
; EG-NEXT: LSHL T1.W, PV.W, literal.y,
|
||||
; EG-NEXT: MIN_UINT * T2.W, T9.X, T8.X,
|
||||
; EG-NEXT: 2(2.802597e-45), 16(2.242078e-44)
|
||||
; EG-NEXT: OR_INT T6.X, PV.W, PS,
|
||||
; EG-NEXT: LSHR * T8.X, T0.W, literal.x,
|
||||
; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
|
||||
;
|
||||
; CI-LABEL: v_test_umin_ule_v3i16:
|
||||
@ -3483,46 +3543,142 @@ define amdgpu_kernel void @s_test_umin_ult_v8i32(ptr addrspace(1) %out, <8 x i32
|
||||
define amdgpu_kernel void @s_test_umin_ult_v8i16(ptr addrspace(1) %out, <8 x i16> %a, <8 x i16> %b) #0 {
|
||||
; EG-LABEL: s_test_umin_ult_v8i16:
|
||||
; EG: ; %bb.0:
|
||||
; EG-NEXT: ALU 0, @24, KC0[], KC1[]
|
||||
; EG-NEXT: TEX 2 @8
|
||||
; EG-NEXT: ALU 2, @25, KC0[], KC1[]
|
||||
; EG-NEXT: TEX 4 @14
|
||||
; EG-NEXT: ALU 14, @28, KC0[CB0:0-32], KC1[]
|
||||
; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.XYZW, T1.X, 1
|
||||
; EG-NEXT: ALU 1, @52, KC0[], KC1[]
|
||||
; EG-NEXT: TEX 1 @20
|
||||
; EG-NEXT: ALU 9, @54, KC0[], KC1[]
|
||||
; EG-NEXT: TEX 1 @24
|
||||
; EG-NEXT: ALU 8, @64, KC0[], KC1[]
|
||||
; EG-NEXT: TEX 1 @28
|
||||
; EG-NEXT: ALU 10, @73, KC0[], KC1[]
|
||||
; EG-NEXT: TEX 1 @32
|
||||
; EG-NEXT: ALU 8, @84, KC0[], KC1[]
|
||||
; EG-NEXT: TEX 1 @36
|
||||
; EG-NEXT: ALU 10, @93, KC0[], KC1[]
|
||||
; EG-NEXT: TEX 1 @40
|
||||
; EG-NEXT: ALU 8, @104, KC0[], KC1[]
|
||||
; EG-NEXT: TEX 1 @44
|
||||
; EG-NEXT: ALU 10, @113, KC0[], KC1[]
|
||||
; EG-NEXT: TEX 1 @48
|
||||
; EG-NEXT: ALU 10, @124, KC0[CB0:0-32], KC1[]
|
||||
; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T7.XYZW, T8.X, 1
|
||||
; EG-NEXT: CF_END
|
||||
; EG-NEXT: PAD
|
||||
; EG-NEXT: Fetch clause starting at 8:
|
||||
; EG-NEXT: VTX_READ_16 T1.X, T0.X, 62, #3
|
||||
; EG-NEXT: VTX_READ_16 T2.X, T0.X, 60, #3
|
||||
; EG-NEXT: VTX_READ_16 T3.X, T0.X, 78, #3
|
||||
; EG-NEXT: Fetch clause starting at 14:
|
||||
; EG-NEXT: VTX_READ_16 T1.X, T0.X, 68, #3
|
||||
; EG-NEXT: VTX_READ_16 T3.X, T0.X, 52, #3
|
||||
; EG-NEXT: VTX_READ_16 T4.X, T0.X, 70, #3
|
||||
; EG-NEXT: VTX_READ_16 T5.X, T0.X, 54, #3
|
||||
; EG-NEXT: VTX_READ_16 T0.X, T0.X, 76, #3
|
||||
; EG-NEXT: ALU clause starting at 24:
|
||||
; EG-NEXT: MOV * T0.X, 0.0,
|
||||
; EG-NEXT: ALU clause starting at 25:
|
||||
; EG-NEXT: AND_INT T0.W, T1.X, literal.x,
|
||||
; EG-NEXT: AND_INT * T1.W, T3.X, literal.x,
|
||||
; EG-NEXT: Fetch clause starting at 20:
|
||||
; EG-NEXT: VTX_READ_16 T8.X, T7.X, 66, #3
|
||||
; EG-NEXT: VTX_READ_16 T9.X, T7.X, 82, #3
|
||||
; EG-NEXT: Fetch clause starting at 24:
|
||||
; EG-NEXT: VTX_READ_16 T8.X, T7.X, 64, #3
|
||||
; EG-NEXT: VTX_READ_16 T9.X, T7.X, 80, #3
|
||||
; EG-NEXT: Fetch clause starting at 28:
|
||||
; EG-NEXT: VTX_READ_16 T8.X, T7.X, 62, #3
|
||||
; EG-NEXT: VTX_READ_16 T9.X, T7.X, 78, #3
|
||||
; EG-NEXT: Fetch clause starting at 32:
|
||||
; EG-NEXT: VTX_READ_16 T8.X, T7.X, 60, #3
|
||||
; EG-NEXT: VTX_READ_16 T9.X, T7.X, 76, #3
|
||||
; EG-NEXT: Fetch clause starting at 36:
|
||||
; EG-NEXT: VTX_READ_16 T8.X, T7.X, 58, #3
|
||||
; EG-NEXT: VTX_READ_16 T9.X, T7.X, 74, #3
|
||||
; EG-NEXT: Fetch clause starting at 40:
|
||||
; EG-NEXT: VTX_READ_16 T8.X, T7.X, 56, #3
|
||||
; EG-NEXT: VTX_READ_16 T9.X, T7.X, 72, #3
|
||||
; EG-NEXT: Fetch clause starting at 44:
|
||||
; EG-NEXT: VTX_READ_16 T8.X, T7.X, 54, #3
|
||||
; EG-NEXT: VTX_READ_16 T9.X, T7.X, 70, #3
|
||||
; EG-NEXT: Fetch clause starting at 48:
|
||||
; EG-NEXT: VTX_READ_16 T8.X, T7.X, 52, #3
|
||||
; EG-NEXT: VTX_READ_16 T7.X, T7.X, 68, #3
|
||||
; EG-NEXT: ALU clause starting at 52:
|
||||
; EG-NEXT: MOV * T0.Y, T3.X,
|
||||
; EG-NEXT: MOV * T7.X, 0.0,
|
||||
; EG-NEXT: ALU clause starting at 54:
|
||||
; EG-NEXT: AND_INT T0.W, T8.X, literal.x,
|
||||
; EG-NEXT: AND_INT * T1.W, T9.X, literal.x,
|
||||
; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00)
|
||||
; EG-NEXT: ALU clause starting at 28:
|
||||
; EG-NEXT: AND_INT T0.Z, T2.X, literal.x,
|
||||
; EG-NEXT: AND_INT T2.W, T0.X, literal.x, BS:VEC_120/SCL_212
|
||||
; EG-NEXT: MIN_UINT * T0.W, T0.W, T1.W,
|
||||
; EG-NEXT: MIN_UINT * T0.W, PV.W, PS,
|
||||
; EG-NEXT: LSHL T0.W, PV.W, literal.x,
|
||||
; EG-NEXT: AND_INT * T1.W, T0.Y, literal.y,
|
||||
; EG-NEXT: 16(2.242078e-44), 65535(9.183409e-41)
|
||||
; EG-NEXT: OR_INT * T0.W, PS, PV.W,
|
||||
; EG-NEXT: MOV * T3.X, PV.W,
|
||||
; EG-NEXT: MOV * T0.Y, PV.X,
|
||||
; EG-NEXT: ALU clause starting at 64:
|
||||
; EG-NEXT: AND_INT T0.W, T8.X, literal.x,
|
||||
; EG-NEXT: AND_INT * T1.W, T9.X, literal.x,
|
||||
; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00)
|
||||
; EG-NEXT: MIN_UINT T0.Z, PV.Z, PV.W,
|
||||
; EG-NEXT: AND_INT T1.W, T5.X, literal.x,
|
||||
; EG-NEXT: AND_INT * T2.W, T4.X, literal.x,
|
||||
; EG-NEXT: AND_INT T2.W, T0.Y, literal.x,
|
||||
; EG-NEXT: MIN_UINT * T0.W, PV.W, PS,
|
||||
; EG-NEXT: -65536(nan), 0(0.000000e+00)
|
||||
; EG-NEXT: OR_INT * T0.W, PV.W, PS,
|
||||
; EG-NEXT: MOV T3.X, PV.W,
|
||||
; EG-NEXT: MOV * T0.Y, T2.X,
|
||||
; EG-NEXT: ALU clause starting at 73:
|
||||
; EG-NEXT: AND_INT T0.W, T8.X, literal.x,
|
||||
; EG-NEXT: AND_INT * T1.W, T9.X, literal.x,
|
||||
; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00)
|
||||
; EG-NEXT: MIN_UINT T0.Y, PV.W, PS,
|
||||
; EG-NEXT: AND_INT T1.W, T3.X, literal.x,
|
||||
; EG-NEXT: AND_INT * T2.W, T1.X, literal.x,
|
||||
; EG-NEXT: MIN_UINT T0.W, PV.W, PS,
|
||||
; EG-NEXT: AND_INT * T1.W, T0.Y, literal.x,
|
||||
; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00)
|
||||
; EG-NEXT: MIN_UINT T0.X, PV.W, PS,
|
||||
; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.x,
|
||||
; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
|
||||
; EG-NEXT: LSHL * T0.W, PV.W, literal.x,
|
||||
; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00)
|
||||
; EG-NEXT: OR_INT * T0.W, T1.W, PV.W,
|
||||
; EG-NEXT: MOV * T2.X, PV.W,
|
||||
; EG-NEXT: MOV * T0.Y, PV.X,
|
||||
; EG-NEXT: ALU clause starting at 84:
|
||||
; EG-NEXT: AND_INT T0.W, T8.X, literal.x,
|
||||
; EG-NEXT: AND_INT * T1.W, T9.X, literal.x,
|
||||
; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00)
|
||||
; EG-NEXT: AND_INT T2.W, T0.Y, literal.x,
|
||||
; EG-NEXT: MIN_UINT * T0.W, PV.W, PS,
|
||||
; EG-NEXT: -65536(nan), 0(0.000000e+00)
|
||||
; EG-NEXT: OR_INT * T7.Z, PV.W, PS,
|
||||
; EG-NEXT: MOV T2.X, PV.Z,
|
||||
; EG-NEXT: MOV * T0.Y, T5.X,
|
||||
; EG-NEXT: ALU clause starting at 93:
|
||||
; EG-NEXT: AND_INT T0.W, T8.X, literal.x,
|
||||
; EG-NEXT: AND_INT * T1.W, T9.X, literal.x,
|
||||
; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00)
|
||||
; EG-NEXT: MIN_UINT T0.W, PV.W, PS,
|
||||
; EG-NEXT: AND_INT * T1.W, T0.Y, literal.x,
|
||||
; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00)
|
||||
; EG-NEXT: LSHL * T0.W, PV.W, literal.x,
|
||||
; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00)
|
||||
; EG-NEXT: OR_INT * T0.W, T1.W, PV.W,
|
||||
; EG-NEXT: MOV * T5.X, PV.W,
|
||||
; EG-NEXT: MOV * T0.Y, PV.X,
|
||||
; EG-NEXT: ALU clause starting at 104:
|
||||
; EG-NEXT: AND_INT T0.W, T8.X, literal.x,
|
||||
; EG-NEXT: AND_INT * T1.W, T9.X, literal.x,
|
||||
; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00)
|
||||
; EG-NEXT: AND_INT T2.W, T0.Y, literal.x,
|
||||
; EG-NEXT: MIN_UINT * T0.W, PV.W, PS,
|
||||
; EG-NEXT: -65536(nan), 0(0.000000e+00)
|
||||
; EG-NEXT: OR_INT * T0.W, PV.W, PS,
|
||||
; EG-NEXT: MOV T5.X, PV.W,
|
||||
; EG-NEXT: MOV * T0.Y, T4.X,
|
||||
; EG-NEXT: ALU clause starting at 113:
|
||||
; EG-NEXT: AND_INT T0.W, T8.X, literal.x,
|
||||
; EG-NEXT: AND_INT * T1.W, T9.X, literal.x,
|
||||
; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00)
|
||||
; EG-NEXT: MIN_UINT T0.W, PV.W, PS,
|
||||
; EG-NEXT: AND_INT * T1.W, T0.Y, literal.x,
|
||||
; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00)
|
||||
; EG-NEXT: LSHL * T0.W, PV.W, literal.x,
|
||||
; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00)
|
||||
; EG-NEXT: OR_INT * T0.W, T1.W, PV.W,
|
||||
; EG-NEXT: MOV * T4.X, PV.W,
|
||||
; EG-NEXT: MOV * T0.Y, PV.X,
|
||||
; EG-NEXT: ALU clause starting at 124:
|
||||
; EG-NEXT: AND_INT T0.W, T8.X, literal.x,
|
||||
; EG-NEXT: AND_INT * T1.W, T7.X, literal.x,
|
||||
; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00)
|
||||
; EG-NEXT: LSHR T8.X, KC0[2].Y, literal.x,
|
||||
; EG-NEXT: AND_INT T2.W, T0.Y, literal.y,
|
||||
; EG-NEXT: MIN_UINT * T0.W, PV.W, PS,
|
||||
; EG-NEXT: 2(2.802597e-45), -65536(nan)
|
||||
; EG-NEXT: OR_INT * T7.X, PV.W, PS,
|
||||
; EG-NEXT: MOV T4.X, PV.X,
|
||||
; EG-NEXT: MOV * T7.W, T3.X,
|
||||
; EG-NEXT: MOV * T7.Y, T5.X,
|
||||
;
|
||||
; CI-LABEL: s_test_umin_ult_v8i16:
|
||||
; CI: ; %bb.0:
|
||||
|
||||
@ -681,30 +681,63 @@ define amdgpu_kernel void @shl_v4i16(ptr addrspace(1) %out, ptr addrspace(1) %in
|
||||
;
|
||||
; EG-LABEL: shl_v4i16:
|
||||
; EG: ; %bb.0:
|
||||
; EG-NEXT: ALU 2, @8, KC0[CB0:0-32], KC1[]
|
||||
; EG-NEXT: ALU 3, @8, KC0[CB0:0-32], KC1[]
|
||||
; EG-NEXT: TEX 0 @6
|
||||
; EG-NEXT: ALU 10, @11, KC0[CB0:0-32], KC1[]
|
||||
; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.XY, T8.X, 1
|
||||
; EG-NEXT: ALU 42, @12, KC0[CB0:0-32], KC1[]
|
||||
; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T10.XY, T0.X, 1
|
||||
; EG-NEXT: CF_END
|
||||
; EG-NEXT: PAD
|
||||
; EG-NEXT: Fetch clause starting at 6:
|
||||
; EG-NEXT: VTX_READ_128 T8.XYZW, T0.X, 0, #1
|
||||
; EG-NEXT: VTX_READ_128 T10.XYZW, T0.X, 0, #1
|
||||
; EG-NEXT: ALU clause starting at 8:
|
||||
; EG-NEXT: LSHL * T0.W, T0.X, literal.x,
|
||||
; EG-NEXT: MOV T0.Y, T6.X,
|
||||
; EG-NEXT: LSHL * T0.W, T0.X, literal.x, BS:VEC_120/SCL_212
|
||||
; EG-NEXT: 3(4.203895e-45), 0(0.000000e+00)
|
||||
; EG-NEXT: ADD_INT * T0.X, KC0[2].Z, PV.W,
|
||||
; EG-NEXT: ALU clause starting at 11:
|
||||
; EG-NEXT: LSHR T1.W, T8.Z, literal.x,
|
||||
; EG-NEXT: LSHR * T2.W, T8.X, literal.x,
|
||||
; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00)
|
||||
; EG-NEXT: LSHL T0.Y, PS, PV.W,
|
||||
; EG-NEXT: AND_INT T1.W, T8.Z, literal.x,
|
||||
; EG-NEXT: AND_INT * T2.W, T8.X, literal.x,
|
||||
; EG-NEXT: ALU clause starting at 12:
|
||||
; EG-NEXT: AND_INT * T1.W, T10.Z, literal.x,
|
||||
; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00)
|
||||
; EG-NEXT: LSHL T0.X, PS, PV.W,
|
||||
; EG-NEXT: LSHL * T1.W, T10.X, PV.W,
|
||||
; EG-NEXT: AND_INT T1.W, PV.W, literal.x,
|
||||
; EG-NEXT: AND_INT * T2.W, T0.Y, literal.y,
|
||||
; EG-NEXT: 65535(9.183409e-41), -65536(nan)
|
||||
; EG-NEXT: OR_INT * T1.W, PS, PV.W,
|
||||
; EG-NEXT: MOV * T6.X, PV.W,
|
||||
; EG-NEXT: MOV T0.X, PV.X,
|
||||
; EG-NEXT: LSHR T1.W, T10.Z, literal.x,
|
||||
; EG-NEXT: LSHR * T2.W, T10.X, literal.x,
|
||||
; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00)
|
||||
; EG-NEXT: LSHL T1.W, PS, PV.W,
|
||||
; EG-NEXT: AND_INT * T2.W, PV.X, literal.x,
|
||||
; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00)
|
||||
; EG-NEXT: LSHL * T1.W, PV.W, literal.x,
|
||||
; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00)
|
||||
; EG-NEXT: OR_INT * T1.W, T2.W, PV.W,
|
||||
; EG-NEXT: MOV T6.X, PV.W,
|
||||
; EG-NEXT: MOV * T0.X, T7.X,
|
||||
; EG-NEXT: AND_INT * T1.W, T10.W, literal.x,
|
||||
; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00)
|
||||
; EG-NEXT: LSHL T1.W, T10.Y, PV.W,
|
||||
; EG-NEXT: AND_INT * T2.W, T0.X, literal.x,
|
||||
; EG-NEXT: -65536(nan), 0(0.000000e+00)
|
||||
; EG-NEXT: AND_INT * T1.W, PV.W, literal.x,
|
||||
; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00)
|
||||
; EG-NEXT: OR_INT * T1.W, T2.W, PV.W,
|
||||
; EG-NEXT: MOV * T7.X, PV.W,
|
||||
; EG-NEXT: MOV T0.X, PV.X,
|
||||
; EG-NEXT: LSHR T1.W, T10.W, literal.x,
|
||||
; EG-NEXT: LSHR * T2.W, T10.Y, literal.x,
|
||||
; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00)
|
||||
; EG-NEXT: LSHL * T1.W, PS, PV.W,
|
||||
; EG-NEXT: AND_INT T0.Z, T0.X, literal.x,
|
||||
; EG-NEXT: LSHL T1.W, PV.W, literal.y,
|
||||
; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, T0.W,
|
||||
; EG-NEXT: LSHR * T8.X, PV.W, literal.x,
|
||||
; EG-NEXT: 65535(9.183409e-41), 16(2.242078e-44)
|
||||
; EG-NEXT: LSHR T0.X, PS, literal.x,
|
||||
; EG-NEXT: OR_INT * T10.Y, PV.Z, PV.W,
|
||||
; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
|
||||
; EG-NEXT: MOV T7.X, PV.Y,
|
||||
; EG-NEXT: MOV * T10.X, T6.X,
|
||||
%tid = call i32 @llvm.amdgcn.workitem.id.x() #0
|
||||
%gep = getelementptr inbounds <4 x i16>, ptr addrspace(1) %in, i32 %tid
|
||||
%gep.out = getelementptr inbounds <4 x i16>, ptr addrspace(1) %out, i32 %tid
|
||||
|
||||
@ -320,28 +320,67 @@ define amdgpu_kernel void @ashr_v4i16(ptr addrspace(1) %out, ptr addrspace(1) %i
|
||||
;
|
||||
; EG-LABEL: ashr_v4i16:
|
||||
; EG: ; %bb.0:
|
||||
; EG-NEXT: ALU 0, @8, KC0[CB0:0-32], KC1[]
|
||||
; EG-NEXT: ALU 1, @8, KC0[CB0:0-32], KC1[]
|
||||
; EG-NEXT: TEX 0 @6
|
||||
; EG-NEXT: ALU 10, @9, KC0[CB0:0-32], KC1[]
|
||||
; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T7.XY, T8.X, 1
|
||||
; EG-NEXT: ALU 48, @10, KC0[CB0:0-32], KC1[]
|
||||
; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T10.XY, T9.X, 1
|
||||
; EG-NEXT: CF_END
|
||||
; EG-NEXT: PAD
|
||||
; EG-NEXT: Fetch clause starting at 6:
|
||||
; EG-NEXT: VTX_READ_128 T7.XYZW, T7.X, 0, #1
|
||||
; EG-NEXT: VTX_READ_128 T9.XYZW, T9.X, 0, #1
|
||||
; EG-NEXT: ALU clause starting at 8:
|
||||
; EG-NEXT: MOV * T7.X, KC0[2].Z,
|
||||
; EG-NEXT: ALU clause starting at 9:
|
||||
; EG-NEXT: LSHR T0.Z, T7.X, literal.x,
|
||||
; EG-NEXT: BFE_INT T0.W, T7.X, 0.0, literal.x,
|
||||
; EG-NEXT: AND_INT * T1.W, T7.Z, literal.y,
|
||||
; EG-NEXT: MOV * T0.Y, T6.X,
|
||||
; EG-NEXT: MOV * T9.X, KC0[2].Z,
|
||||
; EG-NEXT: ALU clause starting at 10:
|
||||
; EG-NEXT: BFE_INT T0.W, T9.X, 0.0, literal.x,
|
||||
; EG-NEXT: AND_INT * T1.W, T9.Z, literal.y,
|
||||
; EG-NEXT: 16(2.242078e-44), 65535(9.183409e-41)
|
||||
; EG-NEXT: ASHR T7.X, PV.W, PS,
|
||||
; EG-NEXT: BFE_INT T0.W, PV.Z, 0.0, literal.x,
|
||||
; EG-NEXT: LSHR * T1.W, T7.Z, literal.x,
|
||||
; EG-NEXT: ASHR * T0.W, PV.W, PS,
|
||||
; EG-NEXT: AND_INT T0.W, PV.W, literal.x,
|
||||
; EG-NEXT: AND_INT * T1.W, T0.Y, literal.y,
|
||||
; EG-NEXT: 65535(9.183409e-41), -65536(nan)
|
||||
; EG-NEXT: OR_INT * T0.W, PS, PV.W,
|
||||
; EG-NEXT: MOV * T6.X, PV.W,
|
||||
; EG-NEXT: MOV T0.Y, PV.X,
|
||||
; EG-NEXT: LSHR * T0.W, T9.X, literal.x,
|
||||
; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00)
|
||||
; EG-NEXT: LSHR T8.X, KC0[2].Y, literal.x,
|
||||
; EG-NEXT: ASHR * T7.Y, PV.W, PS,
|
||||
; EG-NEXT: BFE_INT T0.W, PV.W, 0.0, literal.x,
|
||||
; EG-NEXT: LSHR * T1.W, T9.Z, literal.x,
|
||||
; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00)
|
||||
; EG-NEXT: ASHR T0.W, PV.W, PS,
|
||||
; EG-NEXT: AND_INT * T1.W, T0.Y, literal.x,
|
||||
; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00)
|
||||
; EG-NEXT: LSHL * T0.W, PV.W, literal.x,
|
||||
; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00)
|
||||
; EG-NEXT: OR_INT * T0.W, T1.W, PV.W,
|
||||
; EG-NEXT: MOV T6.X, PV.W,
|
||||
; EG-NEXT: MOV T0.Y, T7.X,
|
||||
; EG-NEXT: BFE_INT T0.W, T9.Y, 0.0, literal.x,
|
||||
; EG-NEXT: AND_INT * T1.W, T9.W, literal.y,
|
||||
; EG-NEXT: 16(2.242078e-44), 65535(9.183409e-41)
|
||||
; EG-NEXT: ASHR T0.W, PV.W, PS,
|
||||
; EG-NEXT: AND_INT * T1.W, PV.Y, literal.x,
|
||||
; EG-NEXT: -65536(nan), 0(0.000000e+00)
|
||||
; EG-NEXT: AND_INT * T0.W, PV.W, literal.x,
|
||||
; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00)
|
||||
; EG-NEXT: OR_INT * T0.W, T1.W, PV.W,
|
||||
; EG-NEXT: MOV * T7.X, PV.W,
|
||||
; EG-NEXT: MOV T0.Y, PV.X,
|
||||
; EG-NEXT: LSHR * T0.W, T9.Y, literal.x,
|
||||
; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00)
|
||||
; EG-NEXT: BFE_INT T0.W, PV.W, 0.0, literal.x,
|
||||
; EG-NEXT: LSHR * T1.W, T9.W, literal.x,
|
||||
; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00)
|
||||
; EG-NEXT: ASHR T0.W, PV.W, PS,
|
||||
; EG-NEXT: AND_INT * T1.W, T0.Y, literal.x,
|
||||
; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00)
|
||||
; EG-NEXT: LSHL * T0.W, PV.W, literal.x,
|
||||
; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00)
|
||||
; EG-NEXT: LSHR T9.X, KC0[2].Y, literal.x,
|
||||
; EG-NEXT: OR_INT * T10.Y, T1.W, PV.W,
|
||||
; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
|
||||
; EG-NEXT: MOV T7.X, PV.Y,
|
||||
; EG-NEXT: MOV * T10.X, T6.X,
|
||||
%b_ptr = getelementptr <4 x i16>, ptr addrspace(1) %in, i16 1
|
||||
%a = load <4 x i16>, ptr addrspace(1) %in
|
||||
%b = load <4 x i16>, ptr addrspace(1) %b_ptr
|
||||
|
||||
33
llvm/test/CodeGen/RISCV/rvv/fp4-bitcast.ll
Normal file
33
llvm/test/CodeGen/RISCV/rvv/fp4-bitcast.ll
Normal file
@ -0,0 +1,33 @@
|
||||
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
|
||||
; RUN: llc -mtriple=riscv64 -mattr='+v' < %s | FileCheck %s
|
||||
|
||||
define <2 x i8> @fp4(<4 x i4> %0) nounwind {
|
||||
; CHECK-LABEL: fp4:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: addi sp, sp, -16
|
||||
; CHECK-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
|
||||
; CHECK-NEXT: vmv.x.s a0, v8
|
||||
; CHECK-NEXT: vslidedown.vi v9, v8, 1
|
||||
; CHECK-NEXT: vmv.x.s a1, v9
|
||||
; CHECK-NEXT: vslidedown.vi v9, v8, 2
|
||||
; CHECK-NEXT: vslidedown.vi v8, v8, 3
|
||||
; CHECK-NEXT: andi a0, a0, 15
|
||||
; CHECK-NEXT: vmv.x.s a2, v9
|
||||
; CHECK-NEXT: andi a1, a1, 15
|
||||
; CHECK-NEXT: slli a1, a1, 4
|
||||
; CHECK-NEXT: or a0, a0, a1
|
||||
; CHECK-NEXT: vmv.x.s a1, v8
|
||||
; CHECK-NEXT: andi a2, a2, 15
|
||||
; CHECK-NEXT: slli a1, a1, 12
|
||||
; CHECK-NEXT: slli a2, a2, 8
|
||||
; CHECK-NEXT: or a1, a2, a1
|
||||
; CHECK-NEXT: or a0, a0, a1
|
||||
; CHECK-NEXT: sh a0, 14(sp)
|
||||
; CHECK-NEXT: addi a0, sp, 14
|
||||
; CHECK-NEXT: vsetivli zero, 2, e8, mf8, ta, ma
|
||||
; CHECK-NEXT: vle8.v v8, (a0)
|
||||
; CHECK-NEXT: addi sp, sp, 16
|
||||
; CHECK-NEXT: ret
|
||||
%2 = bitcast <4 x i4> %0 to <2 x i8>
|
||||
ret <2 x i8> %2
|
||||
}
|
||||
Loading…
x
Reference in New Issue
Block a user