[AMDGPU] llvm.amdgcn.exp.compr is not supported on GFX11
Differential Revision: https://reviews.llvm.org/D128259
This commit is contained in:
parent
8cf28585a4
commit
3fbc945c3a
@ -1824,10 +1824,17 @@ bool AMDGPUInstructionSelector::selectG_INTRINSIC_W_SIDE_EFFECTS(
|
||||
return selectBufferLoadLds(I);
|
||||
case Intrinsic::amdgcn_global_load_lds:
|
||||
return selectGlobalLoadLds(I);
|
||||
default: {
|
||||
return selectImpl(I, *CoverageInfo);
|
||||
}
|
||||
case Intrinsic::amdgcn_exp_compr:
|
||||
if (!STI.hasCompressedExport()) {
|
||||
Function &F = I.getMF()->getFunction();
|
||||
DiagnosticInfoUnsupported NoFpRet(
|
||||
F, "intrinsic not supported on subtarget", I.getDebugLoc(), DS_Error);
|
||||
F.getContext().diagnose(NoFpRet);
|
||||
return false;
|
||||
}
|
||||
break;
|
||||
}
|
||||
return selectImpl(I, *CoverageInfo);
|
||||
}
|
||||
|
||||
bool AMDGPUInstructionSelector::selectG_SELECT(MachineInstr &I) const {
|
||||
|
||||
@ -7864,6 +7864,12 @@ SDValue SITargetLowering::LowerINTRINSIC_VOID(SDValue Op,
|
||||
|
||||
switch (IntrinsicID) {
|
||||
case Intrinsic::amdgcn_exp_compr: {
|
||||
if (!Subtarget->hasCompressedExport()) {
|
||||
DiagnosticInfoUnsupported BadIntrin(
|
||||
DAG.getMachineFunction().getFunction(),
|
||||
"intrinsic not supported on subtarget", DL.getDebugLoc());
|
||||
DAG.getContext()->diagnose(BadIntrin);
|
||||
}
|
||||
SDValue Src0 = Op.getOperand(4);
|
||||
SDValue Src1 = Op.getOperand(5);
|
||||
// Hack around illegal type on SI by directly selecting it.
|
||||
|
||||
@ -0,0 +1,29 @@
|
||||
# RUN: llc -march=amdgcn -run-pass=instruction-select -verify-machineinstrs -global-isel %s -o - | FileCheck %s
|
||||
# RUN: not llc -march=amdgcn -mcpu=gfx1100 -run-pass=instruction-select -verify-machineinstrs -global-isel -global-isel-abort=0 %s -o - 2>&1 | FileCheck --check-prefix=ERR %s
|
||||
|
||||
# ERR: error: <unknown>:0:0: in function exp0 void (): intrinsic not supported on subtarget
|
||||
|
||||
---
|
||||
name: exp0
|
||||
legalized: true
|
||||
regBankSelected: true
|
||||
|
||||
# CHECK: name: exp0
|
||||
body: |
|
||||
bb.0:
|
||||
liveins: $vgpr0
|
||||
%0:vgpr(s32) = COPY $vgpr0
|
||||
|
||||
%5:vgpr(<2 x s16>) = G_BITCAST %0(s32)
|
||||
|
||||
; CHECK: [[UNDEF0:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
|
||||
; CHECK: [[UNDEF1:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
|
||||
; CHECK: EXP 1, %0, %0, [[UNDEF1]], [[UNDEF0]], 0, 1, 15, implicit $exec
|
||||
G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.exp.compr), 1, 15, %5:vgpr(<2 x s16>), %5:vgpr(<2 x s16>), 0, 0
|
||||
|
||||
; CHECK: [[UNDEF2:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
|
||||
; CHECK: [[UNDEF3:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
|
||||
; CHECK: EXP_DONE 1, %0, %0, [[UNDEF3]], [[UNDEF2]], 0, 1, 15, implicit $exec
|
||||
G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.exp.compr), 1, 15, %5:vgpr(<2 x s16>), %5:vgpr(<2 x s16>), -1, 0
|
||||
|
||||
...
|
||||
@ -18,16 +18,4 @@ body: |
|
||||
; CHECK: EXP_DONE 1, %0, %0, %0, %0, 0, 0, 15, implicit $exec
|
||||
G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.exp), 1, 15, %0:vgpr(s32), %0:vgpr(s32), %0:vgpr(s32), %0:vgpr(s32), -1, 0
|
||||
|
||||
%5:vgpr(<2 x s16>) = G_BITCAST %0(s32)
|
||||
|
||||
; CHECK: [[UNDEF0:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
|
||||
; CHECK: [[UNDEF1:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
|
||||
; CHECK: EXP 1, %0, %0, [[UNDEF1]], [[UNDEF0]], 0, 1, 15, implicit $exec
|
||||
G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.exp.compr), 1, 15, %5:vgpr(<2 x s16>), %5:vgpr(<2 x s16>), 0, 0
|
||||
|
||||
; CHECK: [[UNDEF2:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
|
||||
; CHECK: [[UNDEF3:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
|
||||
; CHECK: EXP_DONE 1, %0, %0, [[UNDEF3]], [[UNDEF2]], 0, 1, 15, implicit $exec
|
||||
G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.exp.compr), 1, 15, %5:vgpr(<2 x s16>), %5:vgpr(<2 x s16>), -1, 0
|
||||
|
||||
...
|
||||
|
||||
@ -1,6 +1,9 @@
|
||||
; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -strict-whitespace -check-prefix=GCN %s
|
||||
; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -strict-whitespace -check-prefix=GCN %s
|
||||
; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -strict-whitespace -check-prefix=GCN %s
|
||||
; RUN: not llc -march=amdgcn -mcpu=gfx1100 -verify-machineinstrs < %s 2>&1 | FileCheck -strict-whitespace -check-prefix=ERR %s
|
||||
|
||||
; ERR: error: <unknown>:0:0: in function test_export_compr_zeroes_v2f16 void (): intrinsic not supported on subtarget
|
||||
|
||||
declare void @llvm.amdgcn.exp.compr.v2f16(i32, i32, <2 x half>, <2 x half>, i1, i1) #0
|
||||
declare void @llvm.amdgcn.exp.compr.v2i16(i32, i32, <2 x i16>, <2 x i16>, i1, i1) #0
|
||||
|
||||
@ -1,5 +1,6 @@
|
||||
; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -strict-whitespace -check-prefix=GCN %s
|
||||
; RUN: llc -march=amdgcn -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck -strict-whitespace -check-prefixes=GCN,GFX10 %s
|
||||
; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -strict-whitespace -check-prefixes=GCN,PREGFX11 %s
|
||||
; RUN: llc -march=amdgcn -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck -strict-whitespace -check-prefixes=GCN,GFX10,PREGFX11 %s
|
||||
; RUN: llc -march=amdgcn -mcpu=gfx1100 -verify-machineinstrs < %s | FileCheck -strict-whitespace -check-prefixes=GCN,GFX11 %s
|
||||
|
||||
declare void @llvm.amdgcn.exp.f32(i32, i32, float, float, float, float, i1, i1) #1
|
||||
declare void @llvm.amdgcn.exp.i32(i32, i32, i32, i32, i32, i32, i1, i1) #1
|
||||
@ -133,12 +134,12 @@ define amdgpu_kernel void @test_export_z_f32() #0 {
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}test_export_null_f32:
|
||||
; GCN-DAG: v_mov_b32_e32 [[SRC0:v[0-9]+]], 1.0
|
||||
; GCN-DAG: v_mov_b32_e32 [[SRC1:v[0-9]+]], 2.0
|
||||
; GCN-DAG: v_mov_b32_e32 [[SRC2:v[0-9]+]], 0.5
|
||||
; GCN-DAG: v_mov_b32_e32 [[SRC3:v[0-9]+]], 4.0
|
||||
; GCN: exp null [[SRC0]], [[SRC1]], [[SRC2]], [[SRC3]]{{$}}
|
||||
; GCN: exp null [[SRC0]], [[SRC1]], [[SRC2]], [[SRC3]] done{{$}}
|
||||
; PREGFX11-DAG: v_mov_b32_e32 [[SRC0:v[0-9]+]], 1.0
|
||||
; PREGFX11-DAG: v_mov_b32_e32 [[SRC1:v[0-9]+]], 2.0
|
||||
; PREGFX11-DAG: v_mov_b32_e32 [[SRC2:v[0-9]+]], 0.5
|
||||
; PREGFX11-DAG: v_mov_b32_e32 [[SRC3:v[0-9]+]], 4.0
|
||||
; PREGFX11: exp null [[SRC0]], [[SRC1]], [[SRC2]], [[SRC3]]{{$}}
|
||||
; PREGFX11: exp null [[SRC0]], [[SRC1]], [[SRC2]], [[SRC3]] done{{$}}
|
||||
define amdgpu_kernel void @test_export_null_f32() #0 {
|
||||
call void @llvm.amdgcn.exp.f32(i32 9, i32 15, float 1.0, float 2.0, float 0.5, float 4.0, i1 false, i1 false)
|
||||
call void @llvm.amdgcn.exp.f32(i32 9, i32 15, float 1.0, float 2.0, float 0.5, float 4.0, i1 true, i1 false)
|
||||
@ -198,12 +199,12 @@ define amdgpu_kernel void @test_export_pos3_f32() #0 {
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}test_export_param0_f32:
|
||||
; GCN-DAG: v_mov_b32_e32 [[SRC0:v[0-9]+]], 1.0
|
||||
; GCN-DAG: v_mov_b32_e32 [[SRC1:v[0-9]+]], 2.0
|
||||
; GCN-DAG: v_mov_b32_e32 [[SRC2:v[0-9]+]], 0.5
|
||||
; GCN-DAG: v_mov_b32_e32 [[SRC3:v[0-9]+]], 4.0
|
||||
; GCN: exp param0 [[SRC0]], [[SRC1]], [[SRC2]], [[SRC3]]{{$}}
|
||||
; GCN: exp param0 [[SRC0]], [[SRC1]], [[SRC2]], [[SRC3]] done{{$}}
|
||||
; PREGFX11-DAG: v_mov_b32_e32 [[SRC0:v[0-9]+]], 1.0
|
||||
; PREGFX11-DAG: v_mov_b32_e32 [[SRC1:v[0-9]+]], 2.0
|
||||
; PREGFX11-DAG: v_mov_b32_e32 [[SRC2:v[0-9]+]], 0.5
|
||||
; PREGFX11-DAG: v_mov_b32_e32 [[SRC3:v[0-9]+]], 4.0
|
||||
; PREGFX11: exp param0 [[SRC0]], [[SRC1]], [[SRC2]], [[SRC3]]{{$}}
|
||||
; PREGFX11: exp param0 [[SRC0]], [[SRC1]], [[SRC2]], [[SRC3]] done{{$}}
|
||||
define amdgpu_kernel void @test_export_param0_f32() #0 {
|
||||
call void @llvm.amdgcn.exp.f32(i32 32, i32 15, float 1.0, float 2.0, float 0.5, float 4.0, i1 false, i1 false)
|
||||
call void @llvm.amdgcn.exp.f32(i32 32, i32 15, float 1.0, float 2.0, float 0.5, float 4.0, i1 true, i1 false)
|
||||
@ -211,12 +212,12 @@ define amdgpu_kernel void @test_export_param0_f32() #0 {
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}test_export_param31_f32:
|
||||
; GCN-DAG: v_mov_b32_e32 [[SRC0:v[0-9]+]], 1.0
|
||||
; GCN-DAG: v_mov_b32_e32 [[SRC1:v[0-9]+]], 2.0
|
||||
; GCN-DAG: v_mov_b32_e32 [[SRC2:v[0-9]+]], 0.5
|
||||
; GCN-DAG: v_mov_b32_e32 [[SRC3:v[0-9]+]], 4.0
|
||||
; GCN: exp param31 [[SRC0]], [[SRC1]], [[SRC2]], [[SRC3]]{{$}}
|
||||
; GCN: exp param31 [[SRC0]], [[SRC1]], [[SRC2]], [[SRC3]] done{{$}}
|
||||
; PREGFX11-DAG: v_mov_b32_e32 [[SRC0:v[0-9]+]], 1.0
|
||||
; PREGFX11-DAG: v_mov_b32_e32 [[SRC1:v[0-9]+]], 2.0
|
||||
; PREGFX11-DAG: v_mov_b32_e32 [[SRC2:v[0-9]+]], 0.5
|
||||
; PREGFX11-DAG: v_mov_b32_e32 [[SRC3:v[0-9]+]], 4.0
|
||||
; PREGFX11: exp param31 [[SRC0]], [[SRC1]], [[SRC2]], [[SRC3]]{{$}}
|
||||
; PREGFX11: exp param31 [[SRC0]], [[SRC1]], [[SRC2]], [[SRC3]] done{{$}}
|
||||
define amdgpu_kernel void @test_export_param31_f32() #0 {
|
||||
call void @llvm.amdgcn.exp.f32(i32 63, i32 15, float 1.0, float 2.0, float 0.5, float 4.0, i1 false, i1 false)
|
||||
call void @llvm.amdgcn.exp.f32(i32 63, i32 15, float 1.0, float 2.0, float 0.5, float 4.0, i1 true, i1 false)
|
||||
@ -228,8 +229,10 @@ define amdgpu_kernel void @test_export_param31_f32() #0 {
|
||||
; GCN-DAG: v_mov_b32_e32 [[SRC1:v[0-9]+]], 2.0
|
||||
; GCN-DAG: v_mov_b32_e32 [[SRC2:v[0-9]+]], 0.5
|
||||
; GCN-DAG: v_mov_b32_e32 [[SRC3:v[0-9]+]], 4.0
|
||||
; GCN: exp mrt0 [[SRC0]], [[SRC1]], [[SRC2]], [[SRC3]] vm{{$}}
|
||||
; GCN: exp mrt0 [[SRC0]], [[SRC1]], [[SRC2]], [[SRC3]] done vm{{$}}
|
||||
; PREGFX11: exp mrt0 [[SRC0]], [[SRC1]], [[SRC2]], [[SRC3]] vm{{$}}
|
||||
; PREGFX11: exp mrt0 [[SRC0]], [[SRC1]], [[SRC2]], [[SRC3]] done vm{{$}}
|
||||
; GFX11: exp mrt0 [[SRC0]], [[SRC1]], [[SRC2]], [[SRC3]]{{$}}
|
||||
; GFX11: exp mrt0 [[SRC0]], [[SRC1]], [[SRC2]], [[SRC3]] done{{$}}
|
||||
define amdgpu_kernel void @test_export_vm_f32() #0 {
|
||||
call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float 1.0, float 2.0, float 0.5, float 4.0, i1 false, i1 true)
|
||||
call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float 1.0, float 2.0, float 0.5, float 4.0, i1 true, i1 true)
|
||||
@ -378,12 +381,12 @@ define amdgpu_kernel void @test_export_z_i32() #0 {
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}test_export_null_i32:
|
||||
; GCN-DAG: v_mov_b32_e32 [[SRC0:v[0-9]+]], 1
|
||||
; GCN-DAG: v_mov_b32_e32 [[SRC1:v[0-9]+]], 2
|
||||
; GCN-DAG: v_mov_b32_e32 [[SRC2:v[0-9]+]], 5
|
||||
; GCN-DAG: v_mov_b32_e32 [[SRC3:v[0-9]+]], 4
|
||||
; GCN: exp null [[SRC0]], [[SRC1]], [[SRC2]], [[SRC3]]{{$}}
|
||||
; GCN: exp null [[SRC0]], [[SRC1]], [[SRC2]], [[SRC3]] done{{$}}
|
||||
; PREGFX11-DAG: v_mov_b32_e32 [[SRC0:v[0-9]+]], 1
|
||||
; PREGFX11-DAG: v_mov_b32_e32 [[SRC1:v[0-9]+]], 2
|
||||
; PREGFX11-DAG: v_mov_b32_e32 [[SRC2:v[0-9]+]], 5
|
||||
; PREGFX11-DAG: v_mov_b32_e32 [[SRC3:v[0-9]+]], 4
|
||||
; PREGFX11: exp null [[SRC0]], [[SRC1]], [[SRC2]], [[SRC3]]{{$}}
|
||||
; PREGFX11: exp null [[SRC0]], [[SRC1]], [[SRC2]], [[SRC3]] done{{$}}
|
||||
define amdgpu_kernel void @test_export_null_i32() #0 {
|
||||
call void @llvm.amdgcn.exp.i32(i32 9, i32 15, i32 1, i32 2, i32 5, i32 4, i1 false, i1 false)
|
||||
call void @llvm.amdgcn.exp.i32(i32 9, i32 15, i32 1, i32 2, i32 5, i32 4, i1 true, i1 false)
|
||||
@ -443,12 +446,12 @@ define amdgpu_kernel void @test_export_pos3_i32() #0 {
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}test_export_param0_i32:
|
||||
; GCN-DAG: v_mov_b32_e32 [[SRC0:v[0-9]+]], 1
|
||||
; GCN-DAG: v_mov_b32_e32 [[SRC1:v[0-9]+]], 2
|
||||
; GCN-DAG: v_mov_b32_e32 [[SRC2:v[0-9]+]], 5
|
||||
; GCN-DAG: v_mov_b32_e32 [[SRC3:v[0-9]+]], 4
|
||||
; GCN: exp param0 [[SRC0]], [[SRC1]], [[SRC2]], [[SRC3]]{{$}}
|
||||
; GCN: exp param0 [[SRC0]], [[SRC1]], [[SRC2]], [[SRC3]] done{{$}}
|
||||
; PREGFX11-DAG: v_mov_b32_e32 [[SRC0:v[0-9]+]], 1
|
||||
; PREGFX11-DAG: v_mov_b32_e32 [[SRC1:v[0-9]+]], 2
|
||||
; PREGFX11-DAG: v_mov_b32_e32 [[SRC2:v[0-9]+]], 5
|
||||
; PREGFX11-DAG: v_mov_b32_e32 [[SRC3:v[0-9]+]], 4
|
||||
; PREGFX11: exp param0 [[SRC0]], [[SRC1]], [[SRC2]], [[SRC3]]{{$}}
|
||||
; PREGFX11: exp param0 [[SRC0]], [[SRC1]], [[SRC2]], [[SRC3]] done{{$}}
|
||||
define amdgpu_kernel void @test_export_param0_i32() #0 {
|
||||
call void @llvm.amdgcn.exp.i32(i32 32, i32 15, i32 1, i32 2, i32 5, i32 4, i1 false, i1 false)
|
||||
call void @llvm.amdgcn.exp.i32(i32 32, i32 15, i32 1, i32 2, i32 5, i32 4, i1 true, i1 false)
|
||||
@ -456,12 +459,12 @@ define amdgpu_kernel void @test_export_param0_i32() #0 {
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}test_export_param31_i32:
|
||||
; GCN-DAG: v_mov_b32_e32 [[SRC0:v[0-9]+]], 1
|
||||
; GCN-DAG: v_mov_b32_e32 [[SRC1:v[0-9]+]], 2
|
||||
; GCN-DAG: v_mov_b32_e32 [[SRC2:v[0-9]+]], 5
|
||||
; GCN-DAG: v_mov_b32_e32 [[SRC3:v[0-9]+]], 4
|
||||
; GCN: exp param31 [[SRC0]], [[SRC1]], [[SRC2]], [[SRC3]]{{$}}
|
||||
; GCN: exp param31 [[SRC0]], [[SRC1]], [[SRC2]], [[SRC3]] done{{$}}
|
||||
; PREGFX11-DAG: v_mov_b32_e32 [[SRC0:v[0-9]+]], 1
|
||||
; PREGFX11-DAG: v_mov_b32_e32 [[SRC1:v[0-9]+]], 2
|
||||
; PREGFX11-DAG: v_mov_b32_e32 [[SRC2:v[0-9]+]], 5
|
||||
; PREGFX11-DAG: v_mov_b32_e32 [[SRC3:v[0-9]+]], 4
|
||||
; PREGFX11: exp param31 [[SRC0]], [[SRC1]], [[SRC2]], [[SRC3]]{{$}}
|
||||
; PREGFX11: exp param31 [[SRC0]], [[SRC1]], [[SRC2]], [[SRC3]] done{{$}}
|
||||
define amdgpu_kernel void @test_export_param31_i32() #0 {
|
||||
call void @llvm.amdgcn.exp.i32(i32 63, i32 15, i32 1, i32 2, i32 5, i32 4, i1 false, i1 false)
|
||||
call void @llvm.amdgcn.exp.i32(i32 63, i32 15, i32 1, i32 2, i32 5, i32 4, i1 true, i1 false)
|
||||
@ -473,8 +476,10 @@ define amdgpu_kernel void @test_export_param31_i32() #0 {
|
||||
; GCN-DAG: v_mov_b32_e32 [[SRC1:v[0-9]+]], 2
|
||||
; GCN-DAG: v_mov_b32_e32 [[SRC2:v[0-9]+]], 5
|
||||
; GCN-DAG: v_mov_b32_e32 [[SRC3:v[0-9]+]], 4
|
||||
; GCN: exp mrt0 [[SRC0]], [[SRC1]], [[SRC2]], [[SRC3]] vm{{$}}
|
||||
; GCN: exp mrt0 [[SRC0]], [[SRC1]], [[SRC2]], [[SRC3]] done vm{{$}}
|
||||
; PREGFX11: exp mrt0 [[SRC0]], [[SRC1]], [[SRC2]], [[SRC3]] vm{{$}}
|
||||
; PREGFX11: exp mrt0 [[SRC0]], [[SRC1]], [[SRC2]], [[SRC3]] done vm{{$}}
|
||||
; GFX11: exp mrt0 [[SRC0]], [[SRC1]], [[SRC2]], [[SRC3]]{{$}}
|
||||
; GFX11: exp mrt0 [[SRC0]], [[SRC1]], [[SRC2]], [[SRC3]] done{{$}}
|
||||
define amdgpu_kernel void @test_export_vm_i32() #0 {
|
||||
call void @llvm.amdgcn.exp.i32(i32 0, i32 15, i32 1, i32 2, i32 5, i32 4, i1 false, i1 true)
|
||||
call void @llvm.amdgcn.exp.i32(i32 0, i32 15, i32 1, i32 2, i32 5, i32 4, i1 true, i1 true)
|
||||
@ -542,14 +547,14 @@ end:
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}test_export_clustering:
|
||||
; GCN-DAG: v_mov_b32_e32 [[W0:v[0-9]+]], 0
|
||||
; GCN-DAG: v_mov_b32_e32 [[W1:v[0-9]+]], 1.0
|
||||
; GCN-DAG: v_mov_b32_e32 [[X:v[0-9]+]], s0
|
||||
; GCN-DAG: v_mov_b32_e32 [[Y:v[0-9]+]], s1
|
||||
; GCN-DAG: v_add_f32_e{{32|64}} [[Z0:v[0-9]+]]
|
||||
; GCN-DAG: v_sub_f32_e{{32|64}} [[Z1:v[0-9]+]]
|
||||
; GCN: exp param0 [[X]], [[Y]], [[Z0]], [[W0]]{{$}}
|
||||
; GCN-NEXT: exp param1 [[X]], [[Y]], [[Z1]], [[W1]] done{{$}}
|
||||
; PREGFX11-DAG: v_mov_b32_e32 [[W0:v[0-9]+]], 0
|
||||
; PREGFX11-DAG: v_mov_b32_e32 [[W1:v[0-9]+]], 1.0
|
||||
; PREGFX11-DAG: v_mov_b32_e32 [[X:v[0-9]+]], s0
|
||||
; PREGFX11-DAG: v_mov_b32_e32 [[Y:v[0-9]+]], s1
|
||||
; PREGFX11-DAG: v_add_f32_e{{32|64}} [[Z0:v[0-9]+]]
|
||||
; PREGFX11-DAG: v_sub_f32_e{{32|64}} [[Z1:v[0-9]+]]
|
||||
; PREGFX11: exp param0 [[X]], [[Y]], [[Z0]], [[W0]]{{$}}
|
||||
; PREGFX11-NEXT: exp param1 [[X]], [[Y]], [[Z1]], [[W1]] done{{$}}
|
||||
define amdgpu_kernel void @test_export_clustering(float %x, float %y) #0 {
|
||||
%z0 = fadd float %x, %y
|
||||
call void @llvm.amdgcn.exp.f32(i32 32, i32 15, float %x, float %y, float %z0, float 0.0, i1 false, i1 false)
|
||||
@ -559,9 +564,9 @@ define amdgpu_kernel void @test_export_clustering(float %x, float %y) #0 {
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}test_export_pos_before_param:
|
||||
; GCN: exp pos0
|
||||
; GCN-NOT: s_waitcnt
|
||||
; GCN: exp param0
|
||||
; PREGFX11: exp pos0
|
||||
; PREGFX11-NOT: s_waitcnt
|
||||
; PREGFX11: exp param0
|
||||
define amdgpu_kernel void @test_export_pos_before_param(float %x, float %y) #0 {
|
||||
%z0 = fadd float %x, %y
|
||||
call void @llvm.amdgcn.exp.f32(i32 32, i32 15, float 1.0, float 1.0, float 1.0, float %z0, i1 false, i1 false)
|
||||
@ -583,13 +588,13 @@ define amdgpu_kernel void @test_export_pos4_before_param(float %x, float %y) #0
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}test_export_pos_before_param_ordered:
|
||||
; GCN: exp pos0
|
||||
; GCN: exp pos1
|
||||
; GCN: exp pos2
|
||||
; GCN-NOT: s_waitcnt
|
||||
; GCN: exp param0
|
||||
; GCN: exp param1
|
||||
; GCN: exp param2
|
||||
; PREGFX11: exp pos0
|
||||
; PREGFX11: exp pos1
|
||||
; PREGFX11: exp pos2
|
||||
; PREGFX11-NOT: s_waitcnt
|
||||
; PREGFX11: exp param0
|
||||
; PREGFX11: exp param1
|
||||
; PREGFX11: exp param2
|
||||
define amdgpu_kernel void @test_export_pos_before_param_ordered(float %x, float %y) #0 {
|
||||
%z0 = fadd float %x, %y
|
||||
call void @llvm.amdgcn.exp.f32(i32 32, i32 15, float 1.0, float 1.0, float 1.0, float %z0, i1 false, i1 false)
|
||||
@ -603,9 +608,9 @@ define amdgpu_kernel void @test_export_pos_before_param_ordered(float %x, float
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}test_export_pos_before_param_across_load:
|
||||
; GCN: exp pos0
|
||||
; GCN-NEXT: exp param0
|
||||
; GCN-NEXT: exp param1
|
||||
; PREGFX11: exp pos0
|
||||
; PREGFX11-NEXT: exp param0
|
||||
; PREGFX11-NEXT: exp param1
|
||||
define amdgpu_kernel void @test_export_pos_before_param_across_load(i32 %idx) #0 {
|
||||
call void @llvm.amdgcn.exp.f32(i32 32, i32 15, float 1.0, float 1.0, float 1.0, float 1.0, i1 false, i1 false)
|
||||
call void @llvm.amdgcn.exp.f32(i32 33, i32 15, float 1.0, float 1.0, float 1.0, float 0.5, i1 false, i1 false)
|
||||
@ -615,11 +620,11 @@ define amdgpu_kernel void @test_export_pos_before_param_across_load(i32 %idx) #0
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}test_export_across_store_load:
|
||||
; GCN: buffer_store
|
||||
; GCN: buffer_load
|
||||
; GCN: exp pos0
|
||||
; GCN: exp param0
|
||||
; GCN: exp param1
|
||||
; PREGFX11: buffer_store
|
||||
; PREGFX11: buffer_load
|
||||
; PREGFX11: exp pos0
|
||||
; PREGFX11: exp param0
|
||||
; PREGFX11: exp param1
|
||||
define amdgpu_kernel void @test_export_across_store_load(i32 %idx, float %v) #0 {
|
||||
%data0 = alloca <4 x float>, align 8, addrspace(5)
|
||||
%data1 = alloca <4 x float>, align 8, addrspace(5)
|
||||
|
||||
@ -1,5 +1,6 @@
|
||||
; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -strict-whitespace -check-prefix=GCN -check-prefix=NOPRIM %s
|
||||
; RUN: llc -march=amdgcn -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck -strict-whitespace -check-prefix=GCN -check-prefix=PRIM %s
|
||||
; RUN: llc -march=amdgcn -mcpu=gfx1100 -verify-machineinstrs < %s | FileCheck -strict-whitespace -check-prefix=GCN -check-prefix=PRIM %s
|
||||
|
||||
declare void @llvm.amdgcn.exp.i32(i32, i32, i32, i32, i32, i32, i1, i1) #1
|
||||
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user