[AMDGPU] Support bfloat comparison for ballot intrinsic (#165495)
We do not have native instructions for direct bfloat comparisons. However, we can expand bfloat to float, and do float comparison instead. TODO: handle bfloat comparison for ballot intrinsic on global isel path. Fixes: SWDEV-563403
This commit is contained in:
parent
88cee4c737
commit
6b5afdc3ab
@ -7035,9 +7035,15 @@ static SDValue lowerBALLOTIntrinsic(const SITargetLowering &TLI, SDNode *N,
|
||||
SDLoc SL(N);
|
||||
|
||||
if (Src.getOpcode() == ISD::SETCC) {
|
||||
SDValue Op0 = Src.getOperand(0);
|
||||
SDValue Op1 = Src.getOperand(1);
|
||||
// Need to expand bfloat to float for comparison (setcc).
|
||||
if (Op0.getValueType() == MVT::bf16) {
|
||||
Op0 = DAG.getNode(ISD::FP_EXTEND, SL, MVT::f32, Op0);
|
||||
Op1 = DAG.getNode(ISD::FP_EXTEND, SL, MVT::f32, Op1);
|
||||
}
|
||||
// (ballot (ISD::SETCC ...)) -> (AMDGPUISD::SETCC ...)
|
||||
return DAG.getNode(AMDGPUISD::SETCC, SL, VT, Src.getOperand(0),
|
||||
Src.getOperand(1), Src.getOperand(2));
|
||||
return DAG.getNode(AMDGPUISD::SETCC, SL, VT, Op0, Op1, Src.getOperand(2));
|
||||
}
|
||||
if (const ConstantSDNode *Arg = dyn_cast<ConstantSDNode>(Src)) {
|
||||
// (ballot 0) -> 0
|
||||
|
||||
@ -591,3 +591,24 @@ exit:
|
||||
store i32 %ballot, ptr addrspace(1) %out
|
||||
ret void
|
||||
}
|
||||
|
||||
define amdgpu_cs i32 @compare_bfloats(bfloat %x, bfloat %y) {
|
||||
; GFX10-LABEL: compare_bfloats:
|
||||
; GFX10: ; %bb.0:
|
||||
; GFX10-NEXT: v_lshlrev_b32_e32 v1, 16, v1
|
||||
; GFX10-NEXT: v_lshlrev_b32_e32 v0, 16, v0
|
||||
; GFX10-NEXT: v_cmp_gt_f32_e64 s0, v0, v1
|
||||
; GFX10-NEXT: ; return to shader part epilog
|
||||
;
|
||||
; GFX11-LABEL: compare_bfloats:
|
||||
; GFX11: ; %bb.0:
|
||||
; GFX11-NEXT: v_mov_b16_e32 v2.l, 0
|
||||
; GFX11-NEXT: v_mov_b16_e32 v2.h, v1.l
|
||||
; GFX11-NEXT: v_mov_b16_e32 v1.h, v0.l
|
||||
; GFX11-NEXT: v_mov_b16_e32 v1.l, v2.l
|
||||
; GFX11-NEXT: v_cmp_gt_f32_e64 s0, v1, v2
|
||||
; GFX11-NEXT: ; return to shader part epilog
|
||||
%cmp = fcmp ogt bfloat %x, %y
|
||||
%ballot = call i32 @llvm.amdgcn.ballot.i32(i1 %cmp)
|
||||
ret i32 %ballot
|
||||
}
|
||||
|
||||
@ -557,3 +557,15 @@ exit:
|
||||
store i64 %ballot, ptr addrspace(1) %out
|
||||
ret void
|
||||
}
|
||||
|
||||
define amdgpu_cs i64 @compare_bfloats(bfloat %x, bfloat %y) {
|
||||
; CHECK-LABEL: compare_bfloats:
|
||||
; CHECK: ; %bb.0:
|
||||
; CHECK-NEXT: v_lshlrev_b32_e32 v1, 16, v1
|
||||
; CHECK-NEXT: v_lshlrev_b32_e32 v0, 16, v0
|
||||
; CHECK-NEXT: v_cmp_gt_f32_e64 s[0:1], v0, v1
|
||||
; CHECK-NEXT: ; return to shader part epilog
|
||||
%cmp = fcmp ogt bfloat %x, %y
|
||||
%ballot = call i64 @llvm.amdgcn.ballot.i64(i1 %cmp)
|
||||
ret i64 %ballot
|
||||
}
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user