
This reverts commit 14cd1339318b16e08c1363ec6896bd7d1e4ae281. The buildbot failure seems to have been a cmake issue which has been discussed in more detail in this Discourse post: https://discourse.llvm.org/t/cmake-doesnt-regenerate-all-tablegen-target-files/87901 If any buildbots fail to select arbitrary intrinsics with this patch, it's worth considering using clean builds with ccache instead of incremental builds, as recommended here: https://llvm.org/docs/HowToAddABuilder.html#:~:text=Use%20CCache%20and%20NOT%20incremental%20builds The original commit message for this patch: Add the llvm.amdgcn.call.whole.wave intrinsic for calling whole wave functions. This will take as its first argument the callee with the amdgpu_gfx_whole_wave calling convention, followed by the call parameters which must match the signature of the callee except for the first function argument (the i1 original EXEC mask, which doesn't need to be passed in). Indirect calls are not allowed. Make direct calls to amdgpu_gfx_whole_wave functions a verifier error. Tail calls are handled in a future patch.
268 lines
18 KiB
LLVM
268 lines
18 KiB
LLVM
; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5
|
|
; RUN: llc -global-isel=0 -mtriple=amdgcn--amdpal -mcpu=gfx1200 -stop-after=finalize-isel < %s | FileCheck --check-prefix=DAGISEL %s
|
|
; RUN: llc -global-isel=1 -mtriple=amdgcn--amdpal -mcpu=gfx1200 -stop-after=finalize-isel < %s | FileCheck --check-prefix=GISEL %s
|
|
|
|
define amdgpu_gfx_whole_wave i32 @basic_test(i1 %active, i32 %a, i32 %b) {
|
|
; DAGISEL-LABEL: name: basic_test
|
|
; DAGISEL: bb.0 (%ir-block.0):
|
|
; DAGISEL-NEXT: liveins: $vgpr0, $vgpr1
|
|
; DAGISEL-NEXT: {{ $}}
|
|
; DAGISEL-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr1
|
|
; DAGISEL-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0
|
|
; DAGISEL-NEXT: [[SI_WHOLE_WAVE_FUNC_SETUP:%[0-9]+]]:sreg_32_xm0_xexec = SI_WHOLE_WAVE_FUNC_SETUP implicit-def dead $exec, implicit $exec
|
|
; DAGISEL-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 5
|
|
; DAGISEL-NEXT: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, killed [[S_MOV_B32_]], 0, [[COPY1]], [[SI_WHOLE_WAVE_FUNC_SETUP]], implicit $exec
|
|
; DAGISEL-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 3
|
|
; DAGISEL-NEXT: [[V_CNDMASK_B32_e64_1:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, killed [[S_MOV_B32_1]], 0, [[COPY]], [[SI_WHOLE_WAVE_FUNC_SETUP]], implicit $exec
|
|
; DAGISEL-NEXT: [[V_MOV_B32_dpp:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[V_CNDMASK_B32_e64_]], killed [[V_CNDMASK_B32_e64_1]], 1, 1, 1, 0, implicit $exec
|
|
; DAGISEL-NEXT: $vgpr0 = COPY [[V_MOV_B32_dpp]]
|
|
; DAGISEL-NEXT: [[DEF:%[0-9]+]]:sreg_32 = IMPLICIT_DEF
|
|
; DAGISEL-NEXT: SI_WHOLE_WAVE_FUNC_RETURN killed [[SI_WHOLE_WAVE_FUNC_SETUP]], implicit $vgpr0
|
|
;
|
|
; GISEL-LABEL: name: basic_test
|
|
; GISEL: bb.1 (%ir-block.0):
|
|
; GISEL-NEXT: liveins: $vgpr0, $vgpr1
|
|
; GISEL-NEXT: {{ $}}
|
|
; GISEL-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
|
|
; GISEL-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
|
|
; GISEL-NEXT: [[SI_WHOLE_WAVE_FUNC_SETUP:%[0-9]+]]:sreg_32_xm0_xexec = SI_WHOLE_WAVE_FUNC_SETUP implicit-def dead $exec, implicit $exec
|
|
; GISEL-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 5
|
|
; GISEL-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]]
|
|
; GISEL-NEXT: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[COPY2]], 0, [[COPY]], [[SI_WHOLE_WAVE_FUNC_SETUP]], implicit $exec
|
|
; GISEL-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 3
|
|
; GISEL-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_1]]
|
|
; GISEL-NEXT: [[V_CNDMASK_B32_e64_1:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[COPY3]], 0, [[COPY1]], [[SI_WHOLE_WAVE_FUNC_SETUP]], implicit $exec
|
|
; GISEL-NEXT: [[V_MOV_B32_dpp:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[V_CNDMASK_B32_e64_]], [[V_CNDMASK_B32_e64_1]], 1, 1, 1, 0, implicit $exec
|
|
; GISEL-NEXT: $vgpr0 = COPY [[V_MOV_B32_dpp]]
|
|
; GISEL-NEXT: SI_WHOLE_WAVE_FUNC_RETURN [[SI_WHOLE_WAVE_FUNC_SETUP]], implicit $vgpr0
|
|
%x = select i1 %active, i32 %a, i32 5
|
|
%y = select i1 %active, i32 %b, i32 3
|
|
%ret = call i32 @llvm.amdgcn.update.dpp.i32(i32 %x, i32 %y, i32 1, i32 1, i32 1, i1 false)
|
|
ret i32 %ret
|
|
}
|
|
|
|
; Make sure we don't crash if %active is not used at all.
|
|
define amdgpu_gfx_whole_wave i32 @unused_active(i1 %active, i32 %a, i32 %b) {
|
|
; DAGISEL-LABEL: name: unused_active
|
|
; DAGISEL: bb.0 (%ir-block.0):
|
|
; DAGISEL-NEXT: [[SI_WHOLE_WAVE_FUNC_SETUP:%[0-9]+]]:sreg_32 = SI_WHOLE_WAVE_FUNC_SETUP implicit-def dead $exec, implicit $exec
|
|
; DAGISEL-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 14, implicit $exec
|
|
; DAGISEL-NEXT: $vgpr0 = COPY [[V_MOV_B32_e32_]]
|
|
; DAGISEL-NEXT: [[DEF:%[0-9]+]]:sreg_32 = IMPLICIT_DEF
|
|
; DAGISEL-NEXT: SI_WHOLE_WAVE_FUNC_RETURN killed [[SI_WHOLE_WAVE_FUNC_SETUP]], implicit $vgpr0
|
|
;
|
|
; GISEL-LABEL: name: unused_active
|
|
; GISEL: bb.1 (%ir-block.0):
|
|
; GISEL-NEXT: liveins: $vgpr0, $vgpr1
|
|
; GISEL-NEXT: {{ $}}
|
|
; GISEL-NEXT: [[SI_WHOLE_WAVE_FUNC_SETUP:%[0-9]+]]:sreg_32_xm0_xexec = SI_WHOLE_WAVE_FUNC_SETUP implicit-def dead $exec, implicit $exec
|
|
; GISEL-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 14
|
|
; GISEL-NEXT: $vgpr0 = COPY [[S_MOV_B32_]]
|
|
; GISEL-NEXT: SI_WHOLE_WAVE_FUNC_RETURN [[SI_WHOLE_WAVE_FUNC_SETUP]], implicit $vgpr0
|
|
ret i32 14
|
|
}
|
|
|
|
define amdgpu_gfx_whole_wave i32 @multiple_blocks(i1 %active, i32 %a, i32 %b) {
|
|
; DAGISEL-LABEL: name: multiple_blocks
|
|
; DAGISEL: bb.0 (%ir-block.0):
|
|
; DAGISEL-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000)
|
|
; DAGISEL-NEXT: liveins: $vgpr0, $vgpr1
|
|
; DAGISEL-NEXT: {{ $}}
|
|
; DAGISEL-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr1
|
|
; DAGISEL-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0
|
|
; DAGISEL-NEXT: [[SI_WHOLE_WAVE_FUNC_SETUP:%[0-9]+]]:sreg_32 = SI_WHOLE_WAVE_FUNC_SETUP implicit-def dead $exec, implicit $exec
|
|
; DAGISEL-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY [[SI_WHOLE_WAVE_FUNC_SETUP]]
|
|
; DAGISEL-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_32 = V_CMP_EQ_U32_e64 [[COPY1]], [[COPY]], implicit $exec
|
|
; DAGISEL-NEXT: [[SI_IF:%[0-9]+]]:sreg_32 = SI_IF killed [[V_CMP_EQ_U32_e64_]], %bb.2, implicit-def dead $exec, implicit-def dead $scc, implicit $exec
|
|
; DAGISEL-NEXT: S_BRANCH %bb.1
|
|
; DAGISEL-NEXT: {{ $}}
|
|
; DAGISEL-NEXT: bb.1.if.then:
|
|
; DAGISEL-NEXT: successors: %bb.2(0x80000000)
|
|
; DAGISEL-NEXT: {{ $}}
|
|
; DAGISEL-NEXT: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[COPY1]], [[COPY]], 0, implicit $exec
|
|
; DAGISEL-NEXT: {{ $}}
|
|
; DAGISEL-NEXT: bb.2.if.end:
|
|
; DAGISEL-NEXT: [[PHI:%[0-9]+]]:vgpr_32 = PHI [[COPY]], %bb.0, [[V_ADD_U32_e64_]], %bb.1
|
|
; DAGISEL-NEXT: SI_END_CF [[SI_IF]], implicit-def dead $exec, implicit-def dead $scc, implicit $exec
|
|
; DAGISEL-NEXT: [[COPY3:%[0-9]+]]:sreg_32_xm0_xexec = COPY [[COPY2]]
|
|
; DAGISEL-NEXT: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[PHI]], 0, [[COPY1]], [[COPY3]], implicit $exec
|
|
; DAGISEL-NEXT: $vgpr0 = COPY [[V_CNDMASK_B32_e64_]]
|
|
; DAGISEL-NEXT: [[DEF:%[0-9]+]]:sreg_32 = IMPLICIT_DEF
|
|
; DAGISEL-NEXT: SI_WHOLE_WAVE_FUNC_RETURN killed [[SI_WHOLE_WAVE_FUNC_SETUP]], implicit $vgpr0
|
|
;
|
|
; GISEL-LABEL: name: multiple_blocks
|
|
; GISEL: bb.1 (%ir-block.0):
|
|
; GISEL-NEXT: successors: %bb.2(0x40000000), %bb.3(0x40000000)
|
|
; GISEL-NEXT: liveins: $vgpr0, $vgpr1
|
|
; GISEL-NEXT: {{ $}}
|
|
; GISEL-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
|
|
; GISEL-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
|
|
; GISEL-NEXT: [[SI_WHOLE_WAVE_FUNC_SETUP:%[0-9]+]]:sreg_32_xm0_xexec = SI_WHOLE_WAVE_FUNC_SETUP implicit-def dead $exec, implicit $exec
|
|
; GISEL-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_EQ_U32_e64 [[COPY]], [[COPY1]], implicit $exec
|
|
; GISEL-NEXT: [[SI_IF:%[0-9]+]]:sreg_32_xm0_xexec = SI_IF [[V_CMP_EQ_U32_e64_]], %bb.3, implicit-def $exec, implicit-def $scc, implicit $exec
|
|
; GISEL-NEXT: S_BRANCH %bb.2
|
|
; GISEL-NEXT: {{ $}}
|
|
; GISEL-NEXT: bb.2.if.then:
|
|
; GISEL-NEXT: successors: %bb.3(0x80000000)
|
|
; GISEL-NEXT: {{ $}}
|
|
; GISEL-NEXT: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[COPY]], [[COPY1]], 0, implicit $exec
|
|
; GISEL-NEXT: {{ $}}
|
|
; GISEL-NEXT: bb.3.if.end:
|
|
; GISEL-NEXT: [[PHI:%[0-9]+]]:vgpr_32 = PHI [[COPY1]], %bb.1, [[V_ADD_U32_e64_]], %bb.2
|
|
; GISEL-NEXT: SI_END_CF [[SI_IF]], implicit-def $exec, implicit-def $scc, implicit $exec
|
|
; GISEL-NEXT: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[PHI]], 0, [[COPY]], [[SI_WHOLE_WAVE_FUNC_SETUP]], implicit $exec
|
|
; GISEL-NEXT: $vgpr0 = COPY [[V_CNDMASK_B32_e64_]]
|
|
; GISEL-NEXT: SI_WHOLE_WAVE_FUNC_RETURN [[SI_WHOLE_WAVE_FUNC_SETUP]], implicit $vgpr0
|
|
%c = icmp eq i32 %a, %b
|
|
br i1 %c, label %if.then, label %if.end
|
|
|
|
if.then: ; preds = %0
|
|
%d = add i32 %a, %b
|
|
br label %if.end
|
|
|
|
if.end:
|
|
%f = phi i32 [ %d, %if.then ], [ %b, %0 ]
|
|
%e = select i1 %active, i32 %a, i32 %f
|
|
ret i32 %e
|
|
}
|
|
|
|
define amdgpu_gfx_whole_wave i64 @ret_64(i1 %active, i64 %a, i64 %b) {
|
|
; DAGISEL-LABEL: name: ret_64
|
|
; DAGISEL: bb.0 (%ir-block.0):
|
|
; DAGISEL-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3
|
|
; DAGISEL-NEXT: {{ $}}
|
|
; DAGISEL-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr3
|
|
; DAGISEL-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
|
|
; DAGISEL-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr1
|
|
; DAGISEL-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr0
|
|
; DAGISEL-NEXT: [[DEF:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF
|
|
; DAGISEL-NEXT: [[DEF1:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF
|
|
; DAGISEL-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY]], %subreg.sub1
|
|
; DAGISEL-NEXT: [[DEF2:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF
|
|
; DAGISEL-NEXT: [[DEF3:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF
|
|
; DAGISEL-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY3]], %subreg.sub0, [[COPY2]], %subreg.sub1
|
|
; DAGISEL-NEXT: [[SI_WHOLE_WAVE_FUNC_SETUP:%[0-9]+]]:sreg_32 = SI_WHOLE_WAVE_FUNC_SETUP implicit-def dead $exec, implicit $exec
|
|
; DAGISEL-NEXT: [[COPY4:%[0-9]+]]:sreg_32_xm0_xexec = COPY [[SI_WHOLE_WAVE_FUNC_SETUP]]
|
|
; DAGISEL-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub1
|
|
; DAGISEL-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
|
|
; DAGISEL-NEXT: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[S_MOV_B32_]], 0, killed [[COPY5]], [[COPY4]], implicit $exec
|
|
; DAGISEL-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub0
|
|
; DAGISEL-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 5
|
|
; DAGISEL-NEXT: [[V_CNDMASK_B32_e64_1:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, killed [[S_MOV_B32_1]], 0, killed [[COPY6]], [[COPY4]], implicit $exec
|
|
; DAGISEL-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1
|
|
; DAGISEL-NEXT: [[V_CNDMASK_B32_e64_2:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[S_MOV_B32_]], 0, killed [[COPY7]], [[COPY4]], implicit $exec
|
|
; DAGISEL-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0
|
|
; DAGISEL-NEXT: [[S_MOV_B32_2:%[0-9]+]]:sreg_32 = S_MOV_B32 3
|
|
; DAGISEL-NEXT: [[V_CNDMASK_B32_e64_3:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, killed [[S_MOV_B32_2]], 0, killed [[COPY8]], [[COPY4]], implicit $exec
|
|
; DAGISEL-NEXT: [[V_MOV_B32_dpp:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[V_CNDMASK_B32_e64_1]], killed [[V_CNDMASK_B32_e64_3]], 1, 1, 1, 0, implicit $exec
|
|
; DAGISEL-NEXT: [[V_MOV_B32_dpp1:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[V_CNDMASK_B32_e64_]], killed [[V_CNDMASK_B32_e64_2]], 1, 1, 1, 0, implicit $exec
|
|
; DAGISEL-NEXT: $vgpr0 = COPY [[V_MOV_B32_dpp]]
|
|
; DAGISEL-NEXT: $vgpr1 = COPY [[V_MOV_B32_dpp1]]
|
|
; DAGISEL-NEXT: [[DEF4:%[0-9]+]]:sreg_32 = IMPLICIT_DEF
|
|
; DAGISEL-NEXT: SI_WHOLE_WAVE_FUNC_RETURN killed [[SI_WHOLE_WAVE_FUNC_SETUP]], implicit $vgpr0, implicit $vgpr1
|
|
;
|
|
; GISEL-LABEL: name: ret_64
|
|
; GISEL: bb.1 (%ir-block.0):
|
|
; GISEL-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3
|
|
; GISEL-NEXT: {{ $}}
|
|
; GISEL-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
|
|
; GISEL-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
|
|
; GISEL-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
|
|
; GISEL-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3
|
|
; GISEL-NEXT: [[SI_WHOLE_WAVE_FUNC_SETUP:%[0-9]+]]:sreg_32_xm0_xexec = SI_WHOLE_WAVE_FUNC_SETUP implicit-def dead $exec, implicit $exec
|
|
; GISEL-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 5, implicit $exec
|
|
; GISEL-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
|
|
; GISEL-NEXT: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[V_MOV_B32_e32_]], 0, [[COPY]], [[SI_WHOLE_WAVE_FUNC_SETUP]], implicit $exec
|
|
; GISEL-NEXT: [[V_CNDMASK_B32_e64_1:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[V_MOV_B32_e32_1]], 0, [[COPY1]], [[SI_WHOLE_WAVE_FUNC_SETUP]], implicit $exec
|
|
; GISEL-NEXT: [[V_MOV_B32_e32_2:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 3, implicit $exec
|
|
; GISEL-NEXT: [[V_MOV_B32_e32_3:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
|
|
; GISEL-NEXT: [[V_CNDMASK_B32_e64_2:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[V_MOV_B32_e32_2]], 0, [[COPY2]], [[SI_WHOLE_WAVE_FUNC_SETUP]], implicit $exec
|
|
; GISEL-NEXT: [[V_CNDMASK_B32_e64_3:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[V_MOV_B32_e32_3]], 0, [[COPY3]], [[SI_WHOLE_WAVE_FUNC_SETUP]], implicit $exec
|
|
; GISEL-NEXT: [[V_MOV_B32_dpp:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[V_CNDMASK_B32_e64_]], [[V_CNDMASK_B32_e64_2]], 1, 1, 1, 0, implicit $exec
|
|
; GISEL-NEXT: [[V_MOV_B32_dpp1:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[V_CNDMASK_B32_e64_1]], [[V_CNDMASK_B32_e64_3]], 1, 1, 1, 0, implicit $exec
|
|
; GISEL-NEXT: $vgpr0 = COPY [[V_MOV_B32_dpp]]
|
|
; GISEL-NEXT: $vgpr1 = COPY [[V_MOV_B32_dpp1]]
|
|
; GISEL-NEXT: SI_WHOLE_WAVE_FUNC_RETURN [[SI_WHOLE_WAVE_FUNC_SETUP]], implicit $vgpr0, implicit $vgpr1
|
|
%x = select i1 %active, i64 %a, i64 5
|
|
%y = select i1 %active, i64 %b, i64 3
|
|
%ret = call i64 @llvm.amdgcn.update.dpp.i64(i64 %x, i64 %y, i32 1, i32 1, i32 1, i1 false)
|
|
ret i64 %ret
|
|
}
|
|
|
|
declare amdgpu_gfx_whole_wave i32 @callee(i1 %active, <8 x i32> %x)
|
|
|
|
; Make sure we don't pass the first argument (i1).
|
|
define amdgpu_cs void @call(<8 x i32> %x, ptr %p) {
|
|
; DAGISEL-LABEL: name: call
|
|
; DAGISEL: bb.0 (%ir-block.0):
|
|
; DAGISEL-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9
|
|
; DAGISEL-NEXT: {{ $}}
|
|
; DAGISEL-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr9
|
|
; DAGISEL-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr8
|
|
; DAGISEL-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr7
|
|
; DAGISEL-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr6
|
|
; DAGISEL-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr5
|
|
; DAGISEL-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr4
|
|
; DAGISEL-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr3
|
|
; DAGISEL-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr2
|
|
; DAGISEL-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY $vgpr1
|
|
; DAGISEL-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY $vgpr0
|
|
; DAGISEL-NEXT: [[DEF:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF
|
|
; DAGISEL-NEXT: [[DEF1:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF
|
|
; DAGISEL-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY]], %subreg.sub1
|
|
; DAGISEL-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 target-flags(amdgpu-abs32-hi) @callee
|
|
; DAGISEL-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 target-flags(amdgpu-abs32-lo) @callee
|
|
; DAGISEL-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64 = REG_SEQUENCE killed [[S_MOV_B32_1]], %subreg.sub0, killed [[S_MOV_B32_]], %subreg.sub1
|
|
; DAGISEL-NEXT: ADJCALLSTACKUP 0, 0, implicit-def dead $scc, implicit-def $sgpr32, implicit $sgpr32
|
|
; DAGISEL-NEXT: $vgpr0 = COPY [[COPY9]]
|
|
; DAGISEL-NEXT: $vgpr1 = COPY [[COPY8]]
|
|
; DAGISEL-NEXT: $vgpr2 = COPY [[COPY7]]
|
|
; DAGISEL-NEXT: $vgpr3 = COPY [[COPY6]]
|
|
; DAGISEL-NEXT: $vgpr4 = COPY [[COPY5]]
|
|
; DAGISEL-NEXT: $vgpr5 = COPY [[COPY4]]
|
|
; DAGISEL-NEXT: $vgpr6 = COPY [[COPY3]]
|
|
; DAGISEL-NEXT: $vgpr7 = COPY [[COPY2]]
|
|
; DAGISEL-NEXT: $sgpr30_sgpr31 = SI_CALL killed [[REG_SEQUENCE1]], @callee, csr_amdgpu_si_gfx, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit-def $vgpr0
|
|
; DAGISEL-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def dead $scc, implicit-def $sgpr32, implicit $sgpr32
|
|
; DAGISEL-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY $vgpr0
|
|
; DAGISEL-NEXT: [[COPY11:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]]
|
|
; DAGISEL-NEXT: FLAT_STORE_DWORD killed [[COPY11]], [[COPY10]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %ir.p)
|
|
; DAGISEL-NEXT: S_ENDPGM 0
|
|
;
|
|
; GISEL-LABEL: name: call
|
|
; GISEL: bb.1 (%ir-block.0):
|
|
; GISEL-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9
|
|
; GISEL-NEXT: {{ $}}
|
|
; GISEL-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
|
|
; GISEL-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
|
|
; GISEL-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
|
|
; GISEL-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3
|
|
; GISEL-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr4
|
|
; GISEL-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr5
|
|
; GISEL-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr6
|
|
; GISEL-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr7
|
|
; GISEL-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY $vgpr8
|
|
; GISEL-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY $vgpr9
|
|
; GISEL-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY8]], %subreg.sub0, [[COPY9]], %subreg.sub1
|
|
; GISEL-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc, implicit-def $sgpr32, implicit $sgpr32
|
|
; GISEL-NEXT: $vgpr0 = COPY [[COPY]]
|
|
; GISEL-NEXT: $vgpr1 = COPY [[COPY1]]
|
|
; GISEL-NEXT: $vgpr2 = COPY [[COPY2]]
|
|
; GISEL-NEXT: $vgpr3 = COPY [[COPY3]]
|
|
; GISEL-NEXT: $vgpr4 = COPY [[COPY4]]
|
|
; GISEL-NEXT: $vgpr5 = COPY [[COPY5]]
|
|
; GISEL-NEXT: $vgpr6 = COPY [[COPY6]]
|
|
; GISEL-NEXT: $vgpr7 = COPY [[COPY7]]
|
|
; GISEL-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 target-flags(amdgpu-abs32-lo) @callee
|
|
; GISEL-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 target-flags(amdgpu-abs32-hi) @callee
|
|
; GISEL-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
|
|
; GISEL-NEXT: $sgpr30_sgpr31 = SI_CALL [[REG_SEQUENCE1]], @callee, csr_amdgpu_si_gfx, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit-def $vgpr0
|
|
; GISEL-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY $vgpr0
|
|
; GISEL-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc, implicit-def $sgpr32, implicit $sgpr32
|
|
; GISEL-NEXT: FLAT_STORE_DWORD [[REG_SEQUENCE]], [[COPY10]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %ir.p)
|
|
; GISEL-NEXT: S_ENDPGM 0
|
|
%ret = call i32(ptr, ...) @llvm.amdgcn.call.whole.wave(ptr @callee, <8 x i32> %x) convergent
|
|
store i32 %ret, ptr %p
|
|
ret void
|
|
}
|