[GlobalISel] Remove UnsafeFPMath references (#146319)
This is the GlobalISel part to remove `UnsafeFPMath` flag in CodeGen pipeline.
This commit is contained in:
parent
b39160ddfb
commit
ce86ff105b
@ -5949,8 +5949,7 @@ bool CombinerHelper::canCombineFMadOrFMA(MachineInstr &MI,
|
||||
const TargetOptions &Options = MF->getTarget().Options;
|
||||
LLT DstType = MRI.getType(MI.getOperand(0).getReg());
|
||||
|
||||
if (CanReassociate &&
|
||||
!(Options.UnsafeFPMath || MI.getFlag(MachineInstr::MIFlag::FmReassoc)))
|
||||
if (CanReassociate && !MI.getFlag(MachineInstr::MIFlag::FmReassoc))
|
||||
return false;
|
||||
|
||||
// Floating-point multiply-add with intermediate rounding.
|
||||
@ -5962,8 +5961,7 @@ bool CombinerHelper::canCombineFMadOrFMA(MachineInstr &MI,
|
||||
if (!HasFMAD && !HasFMA)
|
||||
return false;
|
||||
|
||||
AllowFusionGlobally = Options.AllowFPOpFusion == FPOpFusion::Fast ||
|
||||
Options.UnsafeFPMath || HasFMAD;
|
||||
AllowFusionGlobally = Options.AllowFPOpFusion == FPOpFusion::Fast || HasFMAD;
|
||||
// If the addition is not contractable, do not combine.
|
||||
if (!AllowFusionGlobally && !MI.getFlag(MachineInstr::MIFlag::FmContract))
|
||||
return false;
|
||||
|
||||
@ -8004,7 +8004,7 @@ LegalizerHelper::lowerFPTRUNC_F64_TO_F16(MachineInstr &MI) {
|
||||
if (MRI.getType(Src).isVector()) // TODO: Handle vectors directly.
|
||||
return UnableToLegalize;
|
||||
|
||||
if (MIRBuilder.getMF().getTarget().Options.UnsafeFPMath) {
|
||||
if (MI.getFlag(MachineInstr::FmAfn)) {
|
||||
unsigned Flags = MI.getFlags();
|
||||
auto Src32 = MIRBuilder.buildFPTrunc(S32, Src, Flags);
|
||||
MIRBuilder.buildFPTrunc(Dst, Src32, Flags);
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
@ -24,8 +24,8 @@ body: |
|
||||
%ptr:_(p1) = COPY $vgpr2_vgpr3
|
||||
%vec:_(<2 x s32>) = G_LOAD %ptr(p1) :: (load (<2 x s32>), addrspace 1)
|
||||
%el0:_(s32), %el1:_(s32) = G_UNMERGE_VALUES %vec(<2 x s32>)
|
||||
%6:_(s32) = G_FMUL %0, %1
|
||||
%7:_(s32) = G_FADD %6, %el1
|
||||
%6:_(s32) = contract G_FMUL %0, %1
|
||||
%7:_(s32) = contract G_FADD %6, %el1
|
||||
$vgpr0 = COPY %7(s32)
|
||||
...
|
||||
|
||||
@ -54,8 +54,8 @@ body: |
|
||||
%ptr:_(p1) = COPY $vgpr2_vgpr3
|
||||
%vec:_(<2 x s32>) = G_LOAD %ptr(p1) :: (load (<2 x s32>), addrspace 1)
|
||||
%el0:_(s32), %el1:_(s32) = G_UNMERGE_VALUES %vec(<2 x s32>)
|
||||
%6:_(s32) = G_FMUL %0, %1
|
||||
%7:_(s32) = G_FADD %el1, %6
|
||||
%6:_(s32) = contract G_FMUL %0, %1
|
||||
%7:_(s32) = contract G_FADD %el1, %6
|
||||
$vgpr0 = COPY %7(s32)
|
||||
...
|
||||
|
||||
@ -233,10 +233,10 @@ body: |
|
||||
%7:_(s16) = G_TRUNC %6(s32)
|
||||
%8:_(s32) = COPY $vgpr5
|
||||
%9:_(s16) = G_TRUNC %8(s32)
|
||||
%10:_(s16) = G_FMUL %7, %9
|
||||
%10:_(s16) = contract G_FMUL %7, %9
|
||||
%11:_(s32) = G_FPEXT %10(s16)
|
||||
%12:_(s32) = G_FMA %0, %1, %11
|
||||
%13:_(s32) = G_FADD %12, %el1
|
||||
%13:_(s32) = contract G_FADD %12, %el1
|
||||
$vgpr0 = COPY %13(s32)
|
||||
...
|
||||
|
||||
@ -282,11 +282,11 @@ body: |
|
||||
%9:_(s16) = G_TRUNC %8(s32)
|
||||
%10:_(s32) = COPY $vgpr5
|
||||
%11:_(s16) = G_TRUNC %10(s32)
|
||||
%12:_(s16) = G_FMUL %9, %11
|
||||
%13:_(s16) = G_FMUL %1, %3
|
||||
%14:_(s16) = G_FADD %13, %12
|
||||
%12:_(s16) = contract G_FMUL %9, %11
|
||||
%13:_(s16) = contract G_FMUL %1, %3
|
||||
%14:_(s16) = contract G_FADD %13, %12
|
||||
%15:_(s32) = G_FPEXT %14(s16)
|
||||
%16:_(s32) = G_FADD %15, %el1
|
||||
%16:_(s32) = contract G_FADD %15, %el1
|
||||
$vgpr0 = COPY %16(s32)
|
||||
...
|
||||
|
||||
@ -326,10 +326,10 @@ body: |
|
||||
%7:_(s16) = G_TRUNC %6(s32)
|
||||
%8:_(s32) = COPY $vgpr5
|
||||
%9:_(s16) = G_TRUNC %8(s32)
|
||||
%10:_(s16) = G_FMUL %7, %9
|
||||
%10:_(s16) = contract G_FMUL %7, %9
|
||||
%11:_(s32) = G_FPEXT %10(s16)
|
||||
%12:_(s32) = G_FMA %4, %5, %11
|
||||
%13:_(s32) = G_FADD %el1, %12
|
||||
%13:_(s32) = contract G_FADD %el1, %12
|
||||
$vgpr0 = COPY %13(s32)
|
||||
...
|
||||
|
||||
@ -375,11 +375,11 @@ body: |
|
||||
%9:_(s16) = G_TRUNC %8(s32)
|
||||
%10:_(s32) = COPY $vgpr5
|
||||
%11:_(s16) = G_TRUNC %10(s32)
|
||||
%12:_(s16) = G_FMUL %9, %11
|
||||
%13:_(s16) = G_FMUL %5, %7
|
||||
%14:_(s16) = G_FADD %13, %12
|
||||
%12:_(s16) = contract G_FMUL %9, %11
|
||||
%13:_(s16) = contract G_FMUL %5, %7
|
||||
%14:_(s16) = contract G_FADD %13, %12
|
||||
%15:_(s32) = G_FPEXT %14(s16)
|
||||
%16:_(s32) = G_FADD %el1, %15
|
||||
%16:_(s32) = contract G_FADD %el1, %15
|
||||
$vgpr0 = COPY %16(s32)
|
||||
...
|
||||
|
||||
@ -409,8 +409,8 @@ body: |
|
||||
%ptr:_(p1) = COPY $vgpr0_vgpr1
|
||||
%vec:_(<2 x s32>) = G_LOAD %ptr(p1) :: (load (<2 x s32>), addrspace 1)
|
||||
%el0:_(s32), %el1:_(s32) = G_UNMERGE_VALUES %vec(<2 x s32>)
|
||||
%6:_(s32) = G_FMUL %0, %1
|
||||
%7:_(s32) = G_FSUB %6, %el1
|
||||
%6:_(s32) = contract G_FMUL %0, %1
|
||||
%7:_(s32) = contract G_FSUB %6, %el1
|
||||
$vgpr0 = COPY %7(s32)
|
||||
...
|
||||
|
||||
@ -440,7 +440,7 @@ body: |
|
||||
%ptr:_(p1) = COPY $vgpr2_vgpr3
|
||||
%vec:_(<2 x s32>) = G_LOAD %ptr(p1) :: (load (<2 x s32>), addrspace 1)
|
||||
%el0:_(s32), %el1:_(s32) = G_UNMERGE_VALUES %vec(<2 x s32>)
|
||||
%6:_(s32) = G_FMUL %0, %1
|
||||
%7:_(s32) = G_FSUB %el1, %6
|
||||
%6:_(s32) = contract G_FMUL %0, %1
|
||||
%7:_(s32) = contract G_FSUB %el1, %6
|
||||
$vgpr0 = COPY %7(s32)
|
||||
...
|
||||
|
||||
@ -385,117 +385,16 @@ body: |
|
||||
; CHECK-NEXT: {{ $}}
|
||||
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
|
||||
; CHECK-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](<2 x s64>)
|
||||
; CHECK-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV]](s64)
|
||||
; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
|
||||
; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C]](s32)
|
||||
; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2047
|
||||
; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C1]]
|
||||
; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1008
|
||||
; CHECK-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[AND]], [[C2]]
|
||||
; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
|
||||
; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C3]](s32)
|
||||
; CHECK-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 4094
|
||||
; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C4]]
|
||||
; CHECK-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 511
|
||||
; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[UV3]], [[C5]]
|
||||
; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[UV2]]
|
||||
; CHECK-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
|
||||
; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[OR]](s32), [[C6]]
|
||||
; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[ICMP]](s1)
|
||||
; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND1]], [[ZEXT]]
|
||||
; CHECK-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 512
|
||||
; CHECK-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[OR1]](s32), [[C6]]
|
||||
; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[C7]], [[C6]]
|
||||
; CHECK-NEXT: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 31744
|
||||
; CHECK-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SELECT]], [[C8]]
|
||||
; CHECK-NEXT: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 12
|
||||
; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ADD]], [[C9]](s32)
|
||||
; CHECK-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL]]
|
||||
; CHECK-NEXT: [[C10:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
|
||||
; CHECK-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C10]], [[ADD]]
|
||||
; CHECK-NEXT: [[SMAX:%[0-9]+]]:_(s32) = G_SMAX [[SUB]], [[C6]]
|
||||
; CHECK-NEXT: [[C11:%[0-9]+]]:_(s32) = G_CONSTANT i32 13
|
||||
; CHECK-NEXT: [[SMIN:%[0-9]+]]:_(s32) = G_SMIN [[SMAX]], [[C11]]
|
||||
; CHECK-NEXT: [[C12:%[0-9]+]]:_(s32) = G_CONSTANT i32 4096
|
||||
; CHECK-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[C12]]
|
||||
; CHECK-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[OR4]], [[SMIN]](s32)
|
||||
; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LSHR2]], [[SMIN]](s32)
|
||||
; CHECK-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SHL1]](s32), [[OR4]]
|
||||
; CHECK-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[ICMP2]](s1)
|
||||
; CHECK-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[LSHR2]], [[ZEXT1]]
|
||||
; CHECK-NEXT: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[ADD]](s32), [[C10]]
|
||||
; CHECK-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP3]](s1), [[OR5]], [[OR3]]
|
||||
; CHECK-NEXT: [[C13:%[0-9]+]]:_(s32) = G_CONSTANT i32 7
|
||||
; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[SELECT1]], [[C13]]
|
||||
; CHECK-NEXT: [[C14:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
|
||||
; CHECK-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[SELECT1]], [[C14]](s32)
|
||||
; CHECK-NEXT: [[C15:%[0-9]+]]:_(s32) = G_CONSTANT i32 3
|
||||
; CHECK-NEXT: [[ICMP4:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[AND3]](s32), [[C15]]
|
||||
; CHECK-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[ICMP4]](s1)
|
||||
; CHECK-NEXT: [[C16:%[0-9]+]]:_(s32) = G_CONSTANT i32 5
|
||||
; CHECK-NEXT: [[ICMP5:%[0-9]+]]:_(s1) = G_ICMP intpred(sgt), [[AND3]](s32), [[C16]]
|
||||
; CHECK-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[ICMP5]](s1)
|
||||
; CHECK-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[ZEXT3]]
|
||||
; CHECK-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[LSHR3]], [[OR6]]
|
||||
; CHECK-NEXT: [[C17:%[0-9]+]]:_(s32) = G_CONSTANT i32 30
|
||||
; CHECK-NEXT: [[ICMP6:%[0-9]+]]:_(s1) = G_ICMP intpred(sgt), [[ADD]](s32), [[C17]]
|
||||
; CHECK-NEXT: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[ICMP6]](s1), [[C8]], [[ADD1]]
|
||||
; CHECK-NEXT: [[C18:%[0-9]+]]:_(s32) = G_CONSTANT i32 1039
|
||||
; CHECK-NEXT: [[ICMP7:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[ADD]](s32), [[C18]]
|
||||
; CHECK-NEXT: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[ICMP7]](s1), [[OR2]], [[SELECT2]]
|
||||
; CHECK-NEXT: [[C19:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
|
||||
; CHECK-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C19]](s32)
|
||||
; CHECK-NEXT: [[C20:%[0-9]+]]:_(s32) = G_CONSTANT i32 32768
|
||||
; CHECK-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LSHR4]], [[C20]]
|
||||
; CHECK-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SELECT3]]
|
||||
; CHECK-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV1]](s64)
|
||||
; CHECK-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[UV5]], [[C]](s32)
|
||||
; CHECK-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LSHR5]], [[C1]]
|
||||
; CHECK-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[AND5]], [[C2]]
|
||||
; CHECK-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[UV5]], [[C3]](s32)
|
||||
; CHECK-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LSHR6]], [[C4]]
|
||||
; CHECK-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[UV5]], [[C5]]
|
||||
; CHECK-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[AND7]], [[UV4]]
|
||||
; CHECK-NEXT: [[ICMP8:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[OR8]](s32), [[C6]]
|
||||
; CHECK-NEXT: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[ICMP8]](s1)
|
||||
; CHECK-NEXT: [[OR9:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[ZEXT4]]
|
||||
; CHECK-NEXT: [[ICMP9:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[OR9]](s32), [[C6]]
|
||||
; CHECK-NEXT: [[SELECT4:%[0-9]+]]:_(s32) = G_SELECT [[ICMP9]](s1), [[C7]], [[C6]]
|
||||
; CHECK-NEXT: [[OR10:%[0-9]+]]:_(s32) = G_OR [[SELECT4]], [[C8]]
|
||||
; CHECK-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ADD2]], [[C9]](s32)
|
||||
; CHECK-NEXT: [[OR11:%[0-9]+]]:_(s32) = G_OR [[OR9]], [[SHL2]]
|
||||
; CHECK-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[C10]], [[ADD2]]
|
||||
; CHECK-NEXT: [[SMAX1:%[0-9]+]]:_(s32) = G_SMAX [[SUB1]], [[C6]]
|
||||
; CHECK-NEXT: [[SMIN1:%[0-9]+]]:_(s32) = G_SMIN [[SMAX1]], [[C11]]
|
||||
; CHECK-NEXT: [[OR12:%[0-9]+]]:_(s32) = G_OR [[OR9]], [[C12]]
|
||||
; CHECK-NEXT: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[OR12]], [[SMIN1]](s32)
|
||||
; CHECK-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[LSHR7]], [[SMIN1]](s32)
|
||||
; CHECK-NEXT: [[ICMP10:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SHL3]](s32), [[OR12]]
|
||||
; CHECK-NEXT: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[ICMP10]](s1)
|
||||
; CHECK-NEXT: [[OR13:%[0-9]+]]:_(s32) = G_OR [[LSHR7]], [[ZEXT5]]
|
||||
; CHECK-NEXT: [[ICMP11:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[ADD2]](s32), [[C10]]
|
||||
; CHECK-NEXT: [[SELECT5:%[0-9]+]]:_(s32) = G_SELECT [[ICMP11]](s1), [[OR13]], [[OR11]]
|
||||
; CHECK-NEXT: [[AND8:%[0-9]+]]:_(s32) = G_AND [[SELECT5]], [[C13]]
|
||||
; CHECK-NEXT: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[SELECT5]], [[C14]](s32)
|
||||
; CHECK-NEXT: [[ICMP12:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[AND8]](s32), [[C15]]
|
||||
; CHECK-NEXT: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[ICMP12]](s1)
|
||||
; CHECK-NEXT: [[ICMP13:%[0-9]+]]:_(s1) = G_ICMP intpred(sgt), [[AND8]](s32), [[C16]]
|
||||
; CHECK-NEXT: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[ICMP13]](s1)
|
||||
; CHECK-NEXT: [[OR14:%[0-9]+]]:_(s32) = G_OR [[ZEXT6]], [[ZEXT7]]
|
||||
; CHECK-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[LSHR8]], [[OR14]]
|
||||
; CHECK-NEXT: [[ICMP14:%[0-9]+]]:_(s1) = G_ICMP intpred(sgt), [[ADD2]](s32), [[C17]]
|
||||
; CHECK-NEXT: [[SELECT6:%[0-9]+]]:_(s32) = G_SELECT [[ICMP14]](s1), [[C8]], [[ADD3]]
|
||||
; CHECK-NEXT: [[ICMP15:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[ADD2]](s32), [[C18]]
|
||||
; CHECK-NEXT: [[SELECT7:%[0-9]+]]:_(s32) = G_SELECT [[ICMP15]](s1), [[OR10]], [[SELECT6]]
|
||||
; CHECK-NEXT: [[LSHR9:%[0-9]+]]:_(s32) = G_LSHR [[UV5]], [[C19]](s32)
|
||||
; CHECK-NEXT: [[AND9:%[0-9]+]]:_(s32) = G_AND [[LSHR9]], [[C20]]
|
||||
; CHECK-NEXT: [[OR15:%[0-9]+]]:_(s32) = G_OR [[AND9]], [[SELECT7]]
|
||||
; CHECK-NEXT: [[C21:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
|
||||
; CHECK-NEXT: [[AND10:%[0-9]+]]:_(s32) = G_AND [[OR7]], [[C21]]
|
||||
; CHECK-NEXT: [[AND11:%[0-9]+]]:_(s32) = G_AND [[OR15]], [[C21]]
|
||||
; CHECK-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C19]](s32)
|
||||
; CHECK-NEXT: [[OR16:%[0-9]+]]:_(s32) = G_OR [[AND10]], [[SHL4]]
|
||||
; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR16]](s32)
|
||||
; CHECK-NEXT: [[FPTRUNC:%[0-9]+]]:_(s32) = afn G_FPTRUNC [[UV]](s64)
|
||||
; CHECK-NEXT: [[FPTRUNC1:%[0-9]+]]:_(s16) = afn G_FPTRUNC [[FPTRUNC]](s32)
|
||||
; CHECK-NEXT: [[FPTRUNC2:%[0-9]+]]:_(s32) = afn G_FPTRUNC [[UV1]](s64)
|
||||
; CHECK-NEXT: [[FPTRUNC3:%[0-9]+]]:_(s16) = afn G_FPTRUNC [[FPTRUNC2]](s32)
|
||||
; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC1]](s16)
|
||||
; CHECK-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC3]](s16)
|
||||
; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
|
||||
; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32)
|
||||
; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]]
|
||||
; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
|
||||
; CHECK-NEXT: $vgpr0 = COPY [[BITCAST]](<2 x s16>)
|
||||
%0:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
|
||||
%1:_(<2 x s16>) = afn G_FPTRUNC %0
|
||||
|
||||
@ -1,16 +1,16 @@
|
||||
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
|
||||
; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn -global-isel=0 < %s | FileCheck -enable-var-scope -check-prefixes=SI-SDAG %s
|
||||
; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn -global-isel=1 -enable-unsafe-fp-math < %s | FileCheck -check-prefixes=SI-GISEL %s
|
||||
; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn -global-isel=1 < %s | FileCheck -check-prefixes=SI-GISEL %s
|
||||
; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn -mcpu=fiji -global-isel=0 -mattr=-flat-for-global < %s | FileCheck -enable-var-scope -check-prefixes=VI-SDAG %s
|
||||
; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn -mcpu=fiji -global-isel=1 -mattr=-flat-for-global -enable-unsafe-fp-math < %s | FileCheck -enable-var-scope -check-prefixes=VI-GISEL %s
|
||||
; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn -mcpu=fiji -global-isel=1 -mattr=-flat-for-global < %s | FileCheck -enable-var-scope -check-prefixes=VI-GISEL %s
|
||||
; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn -mcpu=gfx900 -global-isel=0 -mattr=-flat-for-global -denormal-fp-math=preserve-sign < %s | FileCheck -enable-var-scope -check-prefixes=GFX9-SDAG %s
|
||||
; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn -mcpu=gfx900 -global-isel=1 -mattr=-flat-for-global -denormal-fp-math=preserve-sign -enable-unsafe-fp-math < %s | FileCheck -enable-var-scope -check-prefixes=GFX9-GISEL %s
|
||||
; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn -mcpu=gfx900 -global-isel=1 -mattr=-flat-for-global -denormal-fp-math=preserve-sign < %s | FileCheck -enable-var-scope -check-prefixes=GFX9-GISEL %s
|
||||
; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn -mcpu=gfx950 -global-isel=0 -mattr=-flat-for-global -denormal-fp-math=preserve-sign < %s | FileCheck -enable-var-scope -check-prefixes=GFX950-SDAG %s
|
||||
; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn -mcpu=gfx950 -global-isel=1 -mattr=-flat-for-global -denormal-fp-math=preserve-sign -enable-unsafe-fp-math < %s | FileCheck -enable-var-scope -check-prefixes=GFX950-GISEL %s
|
||||
; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn -mcpu=gfx950 -global-isel=1 -mattr=-flat-for-global -denormal-fp-math=preserve-sign < %s | FileCheck -enable-var-scope -check-prefixes=GFX950-GISEL %s
|
||||
; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn -mcpu=gfx1100 -global-isel=0 -mattr=-flat-for-global,+real-true16 -denormal-fp-math=preserve-sign < %s | FileCheck -enable-var-scope -check-prefixes=GFX11-SDAG-TRUE16 %s
|
||||
; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn -mcpu=gfx1100 -global-isel=0 -mattr=-flat-for-global,-real-true16 -denormal-fp-math=preserve-sign < %s | FileCheck -enable-var-scope -check-prefixes=GFX11-SDAG-FAKE16 %s
|
||||
; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn -mcpu=gfx1100 -global-isel=1 -mattr=-flat-for-global,+real-true16 -denormal-fp-math=preserve-sign -enable-unsafe-fp-math < %s | FileCheck -enable-var-scope -check-prefixes=GFX11-GISEL-TRUE16 %s
|
||||
; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn -mcpu=gfx1100 -global-isel=1 -mattr=-flat-for-global,-real-true16 -denormal-fp-math=preserve-sign -enable-unsafe-fp-math < %s | FileCheck -enable-var-scope -check-prefixes=GFX11-GISEL-FAKE16 %s
|
||||
; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn -mcpu=gfx1100 -global-isel=1 -mattr=-flat-for-global,+real-true16 -denormal-fp-math=preserve-sign < %s | FileCheck -enable-var-scope -check-prefixes=GFX11-GISEL-TRUE16 %s
|
||||
; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn -mcpu=gfx1100 -global-isel=1 -mattr=-flat-for-global,-real-true16 -denormal-fp-math=preserve-sign < %s | FileCheck -enable-var-scope -check-prefixes=GFX11-GISEL-FAKE16 %s
|
||||
|
||||
define amdgpu_kernel void @fptrunc_f32_to_f16(
|
||||
; SI-SDAG-LABEL: fptrunc_f32_to_f16:
|
||||
@ -457,9 +457,49 @@ define amdgpu_kernel void @fptrunc_f64_to_f16(
|
||||
; SI-GISEL-NEXT: s_load_dwordx2 s[4:5], s[2:3], 0x0
|
||||
; SI-GISEL-NEXT: s_mov_b32 s2, -1
|
||||
; SI-GISEL-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; SI-GISEL-NEXT: v_cvt_f32_f64_e32 v0, s[4:5]
|
||||
; SI-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0
|
||||
; SI-GISEL-NEXT: s_bfe_u32 s3, s5, 0xb0014
|
||||
; SI-GISEL-NEXT: s_lshr_b32 s6, s5, 8
|
||||
; SI-GISEL-NEXT: s_and_b32 s7, s5, 0x1ff
|
||||
; SI-GISEL-NEXT: s_addk_i32 s3, 0xfc10
|
||||
; SI-GISEL-NEXT: s_and_b32 s6, s6, 0xffe
|
||||
; SI-GISEL-NEXT: s_or_b32 s4, s7, s4
|
||||
; SI-GISEL-NEXT: s_cmp_lg_u32 s4, 0
|
||||
; SI-GISEL-NEXT: s_cselect_b32 s4, 1, 0
|
||||
; SI-GISEL-NEXT: s_or_b32 s4, s6, s4
|
||||
; SI-GISEL-NEXT: s_cmp_lg_u32 s4, 0
|
||||
; SI-GISEL-NEXT: s_cselect_b32 s6, 1, 0
|
||||
; SI-GISEL-NEXT: s_lshl_b32 s6, s6, 9
|
||||
; SI-GISEL-NEXT: s_lshl_b32 s7, s3, 12
|
||||
; SI-GISEL-NEXT: s_sub_i32 s8, 1, s3
|
||||
; SI-GISEL-NEXT: s_or_b32 s9, s4, 0x1000
|
||||
; SI-GISEL-NEXT: s_or_b32 s6, s6, 0x7c00
|
||||
; SI-GISEL-NEXT: s_or_b32 s4, s4, s7
|
||||
; SI-GISEL-NEXT: s_max_i32 s7, s8, 0
|
||||
; SI-GISEL-NEXT: s_min_i32 s7, s7, 13
|
||||
; SI-GISEL-NEXT: s_lshr_b32 s8, s9, s7
|
||||
; SI-GISEL-NEXT: s_lshl_b32 s7, s8, s7
|
||||
; SI-GISEL-NEXT: s_cmp_lg_u32 s7, s9
|
||||
; SI-GISEL-NEXT: s_cselect_b32 s7, 1, 0
|
||||
; SI-GISEL-NEXT: s_or_b32 s7, s8, s7
|
||||
; SI-GISEL-NEXT: s_cmp_lt_i32 s3, 1
|
||||
; SI-GISEL-NEXT: s_cselect_b32 s4, s7, s4
|
||||
; SI-GISEL-NEXT: s_and_b32 s7, s4, 7
|
||||
; SI-GISEL-NEXT: s_lshr_b32 s4, s4, 2
|
||||
; SI-GISEL-NEXT: s_cmp_eq_u32 s7, 3
|
||||
; SI-GISEL-NEXT: s_cselect_b32 s8, 1, 0
|
||||
; SI-GISEL-NEXT: s_cmp_gt_i32 s7, 5
|
||||
; SI-GISEL-NEXT: s_cselect_b32 s7, 1, 0
|
||||
; SI-GISEL-NEXT: s_or_b32 s7, s8, s7
|
||||
; SI-GISEL-NEXT: s_add_i32 s4, s4, s7
|
||||
; SI-GISEL-NEXT: s_cmp_gt_i32 s3, 30
|
||||
; SI-GISEL-NEXT: s_cselect_b32 s4, 0x7c00, s4
|
||||
; SI-GISEL-NEXT: s_cmpk_eq_i32 s3, 0x40f
|
||||
; SI-GISEL-NEXT: s_cselect_b32 s3, s6, s4
|
||||
; SI-GISEL-NEXT: s_lshr_b32 s4, s5, 16
|
||||
; SI-GISEL-NEXT: s_and_b32 s4, s4, 0x8000
|
||||
; SI-GISEL-NEXT: s_or_b32 s4, s4, s3
|
||||
; SI-GISEL-NEXT: s_mov_b32 s3, 0xf000
|
||||
; SI-GISEL-NEXT: v_mov_b32_e32 v0, s4
|
||||
; SI-GISEL-NEXT: buffer_store_short v0, off, s[0:3], 0
|
||||
; SI-GISEL-NEXT: s_endpgm
|
||||
;
|
||||
@ -529,10 +569,50 @@ define amdgpu_kernel void @fptrunc_f64_to_f16(
|
||||
; VI-GISEL-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; VI-GISEL-NEXT: s_load_dwordx2 s[2:3], s[2:3], 0x0
|
||||
; VI-GISEL-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; VI-GISEL-NEXT: v_cvt_f32_f64_e32 v0, s[2:3]
|
||||
; VI-GISEL-NEXT: s_bfe_u32 s4, s3, 0xb0014
|
||||
; VI-GISEL-NEXT: s_lshr_b32 s5, s3, 8
|
||||
; VI-GISEL-NEXT: s_and_b32 s6, s3, 0x1ff
|
||||
; VI-GISEL-NEXT: s_addk_i32 s4, 0xfc10
|
||||
; VI-GISEL-NEXT: s_and_b32 s5, s5, 0xffe
|
||||
; VI-GISEL-NEXT: s_or_b32 s2, s6, s2
|
||||
; VI-GISEL-NEXT: s_cmp_lg_u32 s2, 0
|
||||
; VI-GISEL-NEXT: s_cselect_b32 s2, 1, 0
|
||||
; VI-GISEL-NEXT: s_or_b32 s2, s5, s2
|
||||
; VI-GISEL-NEXT: s_cmp_lg_u32 s2, 0
|
||||
; VI-GISEL-NEXT: s_cselect_b32 s5, 1, 0
|
||||
; VI-GISEL-NEXT: s_sub_i32 s7, 1, s4
|
||||
; VI-GISEL-NEXT: s_lshl_b32 s6, s4, 12
|
||||
; VI-GISEL-NEXT: s_max_i32 s7, s7, 0
|
||||
; VI-GISEL-NEXT: s_or_b32 s6, s2, s6
|
||||
; VI-GISEL-NEXT: s_min_i32 s7, s7, 13
|
||||
; VI-GISEL-NEXT: s_bitset1_b32 s2, 12
|
||||
; VI-GISEL-NEXT: s_lshl_b32 s5, s5, 9
|
||||
; VI-GISEL-NEXT: s_lshr_b32 s8, s2, s7
|
||||
; VI-GISEL-NEXT: s_or_b32 s5, s5, 0x7c00
|
||||
; VI-GISEL-NEXT: s_lshl_b32 s7, s8, s7
|
||||
; VI-GISEL-NEXT: s_cmp_lg_u32 s7, s2
|
||||
; VI-GISEL-NEXT: s_cselect_b32 s2, 1, 0
|
||||
; VI-GISEL-NEXT: s_or_b32 s2, s8, s2
|
||||
; VI-GISEL-NEXT: s_cmp_lt_i32 s4, 1
|
||||
; VI-GISEL-NEXT: s_cselect_b32 s2, s2, s6
|
||||
; VI-GISEL-NEXT: s_and_b32 s6, s2, 7
|
||||
; VI-GISEL-NEXT: s_lshr_b32 s2, s2, 2
|
||||
; VI-GISEL-NEXT: s_cmp_eq_u32 s6, 3
|
||||
; VI-GISEL-NEXT: s_cselect_b32 s7, 1, 0
|
||||
; VI-GISEL-NEXT: s_cmp_gt_i32 s6, 5
|
||||
; VI-GISEL-NEXT: s_cselect_b32 s6, 1, 0
|
||||
; VI-GISEL-NEXT: s_or_b32 s6, s7, s6
|
||||
; VI-GISEL-NEXT: s_add_i32 s2, s2, s6
|
||||
; VI-GISEL-NEXT: s_cmp_gt_i32 s4, 30
|
||||
; VI-GISEL-NEXT: s_cselect_b32 s2, 0x7c00, s2
|
||||
; VI-GISEL-NEXT: s_cmpk_eq_i32 s4, 0x40f
|
||||
; VI-GISEL-NEXT: s_cselect_b32 s2, s5, s2
|
||||
; VI-GISEL-NEXT: s_lshr_b32 s3, s3, 16
|
||||
; VI-GISEL-NEXT: s_and_b32 s3, s3, 0x8000
|
||||
; VI-GISEL-NEXT: s_or_b32 s2, s3, s2
|
||||
; VI-GISEL-NEXT: v_mov_b32_e32 v0, s2
|
||||
; VI-GISEL-NEXT: s_mov_b32 s2, -1
|
||||
; VI-GISEL-NEXT: s_mov_b32 s3, 0xf000
|
||||
; VI-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0
|
||||
; VI-GISEL-NEXT: buffer_store_short v0, off, s[0:3], 0
|
||||
; VI-GISEL-NEXT: s_endpgm
|
||||
;
|
||||
@ -602,10 +682,50 @@ define amdgpu_kernel void @fptrunc_f64_to_f16(
|
||||
; GFX9-GISEL-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX9-GISEL-NEXT: s_load_dwordx2 s[2:3], s[2:3], 0x0
|
||||
; GFX9-GISEL-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX9-GISEL-NEXT: v_cvt_f32_f64_e32 v0, s[2:3]
|
||||
; GFX9-GISEL-NEXT: s_bfe_u32 s4, s3, 0xb0014
|
||||
; GFX9-GISEL-NEXT: s_lshr_b32 s5, s3, 8
|
||||
; GFX9-GISEL-NEXT: s_and_b32 s6, s3, 0x1ff
|
||||
; GFX9-GISEL-NEXT: s_addk_i32 s4, 0xfc10
|
||||
; GFX9-GISEL-NEXT: s_and_b32 s5, s5, 0xffe
|
||||
; GFX9-GISEL-NEXT: s_or_b32 s2, s6, s2
|
||||
; GFX9-GISEL-NEXT: s_cmp_lg_u32 s2, 0
|
||||
; GFX9-GISEL-NEXT: s_cselect_b32 s2, 1, 0
|
||||
; GFX9-GISEL-NEXT: s_or_b32 s2, s5, s2
|
||||
; GFX9-GISEL-NEXT: s_cmp_lg_u32 s2, 0
|
||||
; GFX9-GISEL-NEXT: s_cselect_b32 s5, 1, 0
|
||||
; GFX9-GISEL-NEXT: s_sub_i32 s7, 1, s4
|
||||
; GFX9-GISEL-NEXT: s_lshl_b32 s6, s4, 12
|
||||
; GFX9-GISEL-NEXT: s_max_i32 s7, s7, 0
|
||||
; GFX9-GISEL-NEXT: s_or_b32 s6, s2, s6
|
||||
; GFX9-GISEL-NEXT: s_min_i32 s7, s7, 13
|
||||
; GFX9-GISEL-NEXT: s_bitset1_b32 s2, 12
|
||||
; GFX9-GISEL-NEXT: s_lshl_b32 s5, s5, 9
|
||||
; GFX9-GISEL-NEXT: s_lshr_b32 s8, s2, s7
|
||||
; GFX9-GISEL-NEXT: s_or_b32 s5, s5, 0x7c00
|
||||
; GFX9-GISEL-NEXT: s_lshl_b32 s7, s8, s7
|
||||
; GFX9-GISEL-NEXT: s_cmp_lg_u32 s7, s2
|
||||
; GFX9-GISEL-NEXT: s_cselect_b32 s2, 1, 0
|
||||
; GFX9-GISEL-NEXT: s_or_b32 s2, s8, s2
|
||||
; GFX9-GISEL-NEXT: s_cmp_lt_i32 s4, 1
|
||||
; GFX9-GISEL-NEXT: s_cselect_b32 s2, s2, s6
|
||||
; GFX9-GISEL-NEXT: s_and_b32 s6, s2, 7
|
||||
; GFX9-GISEL-NEXT: s_lshr_b32 s2, s2, 2
|
||||
; GFX9-GISEL-NEXT: s_cmp_eq_u32 s6, 3
|
||||
; GFX9-GISEL-NEXT: s_cselect_b32 s7, 1, 0
|
||||
; GFX9-GISEL-NEXT: s_cmp_gt_i32 s6, 5
|
||||
; GFX9-GISEL-NEXT: s_cselect_b32 s6, 1, 0
|
||||
; GFX9-GISEL-NEXT: s_or_b32 s6, s7, s6
|
||||
; GFX9-GISEL-NEXT: s_add_i32 s2, s2, s6
|
||||
; GFX9-GISEL-NEXT: s_cmp_gt_i32 s4, 30
|
||||
; GFX9-GISEL-NEXT: s_cselect_b32 s2, 0x7c00, s2
|
||||
; GFX9-GISEL-NEXT: s_cmpk_eq_i32 s4, 0x40f
|
||||
; GFX9-GISEL-NEXT: s_cselect_b32 s2, s5, s2
|
||||
; GFX9-GISEL-NEXT: s_lshr_b32 s3, s3, 16
|
||||
; GFX9-GISEL-NEXT: s_and_b32 s3, s3, 0x8000
|
||||
; GFX9-GISEL-NEXT: s_or_b32 s2, s3, s2
|
||||
; GFX9-GISEL-NEXT: v_mov_b32_e32 v0, s2
|
||||
; GFX9-GISEL-NEXT: s_mov_b32 s2, -1
|
||||
; GFX9-GISEL-NEXT: s_mov_b32 s3, 0xf000
|
||||
; GFX9-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0
|
||||
; GFX9-GISEL-NEXT: buffer_store_short v0, off, s[0:3], 0
|
||||
; GFX9-GISEL-NEXT: s_endpgm
|
||||
;
|
||||
@ -675,8 +795,48 @@ define amdgpu_kernel void @fptrunc_f64_to_f16(
|
||||
; GFX950-GISEL-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX950-GISEL-NEXT: s_load_dwordx2 s[2:3], s[2:3], 0x0
|
||||
; GFX950-GISEL-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX950-GISEL-NEXT: v_cvt_f32_f64_e32 v0, s[2:3]
|
||||
; GFX950-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0
|
||||
; GFX950-GISEL-NEXT: s_bfe_u32 s4, s3, 0xb0014
|
||||
; GFX950-GISEL-NEXT: s_lshr_b32 s5, s3, 8
|
||||
; GFX950-GISEL-NEXT: s_and_b32 s6, s3, 0x1ff
|
||||
; GFX950-GISEL-NEXT: s_addk_i32 s4, 0xfc10
|
||||
; GFX950-GISEL-NEXT: s_and_b32 s5, s5, 0xffe
|
||||
; GFX950-GISEL-NEXT: s_or_b32 s2, s6, s2
|
||||
; GFX950-GISEL-NEXT: s_cmp_lg_u32 s2, 0
|
||||
; GFX950-GISEL-NEXT: s_cselect_b32 s2, 1, 0
|
||||
; GFX950-GISEL-NEXT: s_or_b32 s2, s5, s2
|
||||
; GFX950-GISEL-NEXT: s_cmp_lg_u32 s2, 0
|
||||
; GFX950-GISEL-NEXT: s_cselect_b32 s5, 1, 0
|
||||
; GFX950-GISEL-NEXT: s_sub_i32 s7, 1, s4
|
||||
; GFX950-GISEL-NEXT: s_lshl_b32 s6, s4, 12
|
||||
; GFX950-GISEL-NEXT: s_max_i32 s7, s7, 0
|
||||
; GFX950-GISEL-NEXT: s_or_b32 s6, s2, s6
|
||||
; GFX950-GISEL-NEXT: s_min_i32 s7, s7, 13
|
||||
; GFX950-GISEL-NEXT: s_bitset1_b32 s2, 12
|
||||
; GFX950-GISEL-NEXT: s_lshl_b32 s5, s5, 9
|
||||
; GFX950-GISEL-NEXT: s_lshr_b32 s8, s2, s7
|
||||
; GFX950-GISEL-NEXT: s_or_b32 s5, s5, 0x7c00
|
||||
; GFX950-GISEL-NEXT: s_lshl_b32 s7, s8, s7
|
||||
; GFX950-GISEL-NEXT: s_cmp_lg_u32 s7, s2
|
||||
; GFX950-GISEL-NEXT: s_cselect_b32 s2, 1, 0
|
||||
; GFX950-GISEL-NEXT: s_or_b32 s2, s8, s2
|
||||
; GFX950-GISEL-NEXT: s_cmp_lt_i32 s4, 1
|
||||
; GFX950-GISEL-NEXT: s_cselect_b32 s2, s2, s6
|
||||
; GFX950-GISEL-NEXT: s_and_b32 s6, s2, 7
|
||||
; GFX950-GISEL-NEXT: s_lshr_b32 s2, s2, 2
|
||||
; GFX950-GISEL-NEXT: s_cmp_eq_u32 s6, 3
|
||||
; GFX950-GISEL-NEXT: s_cselect_b32 s7, 1, 0
|
||||
; GFX950-GISEL-NEXT: s_cmp_gt_i32 s6, 5
|
||||
; GFX950-GISEL-NEXT: s_cselect_b32 s6, 1, 0
|
||||
; GFX950-GISEL-NEXT: s_or_b32 s6, s7, s6
|
||||
; GFX950-GISEL-NEXT: s_add_i32 s2, s2, s6
|
||||
; GFX950-GISEL-NEXT: s_cmp_gt_i32 s4, 30
|
||||
; GFX950-GISEL-NEXT: s_cselect_b32 s2, 0x7c00, s2
|
||||
; GFX950-GISEL-NEXT: s_cmpk_eq_i32 s4, 0x40f
|
||||
; GFX950-GISEL-NEXT: s_cselect_b32 s2, s5, s2
|
||||
; GFX950-GISEL-NEXT: s_lshr_b32 s3, s3, 16
|
||||
; GFX950-GISEL-NEXT: s_and_b32 s3, s3, 0x8000
|
||||
; GFX950-GISEL-NEXT: s_or_b32 s2, s3, s2
|
||||
; GFX950-GISEL-NEXT: v_mov_b32_e32 v0, s2
|
||||
; GFX950-GISEL-NEXT: s_mov_b32 s2, -1
|
||||
; GFX950-GISEL-NEXT: s_mov_b32 s3, 0xf000
|
||||
; GFX950-GISEL-NEXT: buffer_store_short v0, off, s[0:3], 0
|
||||
@ -822,11 +982,54 @@ define amdgpu_kernel void @fptrunc_f64_to_f16(
|
||||
; GFX11-GISEL-TRUE16-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX11-GISEL-TRUE16-NEXT: s_load_b64 s[2:3], s[2:3], 0x0
|
||||
; GFX11-GISEL-TRUE16-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX11-GISEL-TRUE16-NEXT: v_cvt_f32_f64_e32 v0, s[2:3]
|
||||
; GFX11-GISEL-TRUE16-NEXT: s_mov_b32 s2, -1
|
||||
; GFX11-GISEL-TRUE16-NEXT: s_and_b32 s6, s3, 0x1ff
|
||||
; GFX11-GISEL-TRUE16-NEXT: s_bfe_u32 s4, s3, 0xb0014
|
||||
; GFX11-GISEL-TRUE16-NEXT: s_lshr_b32 s5, s3, 8
|
||||
; GFX11-GISEL-TRUE16-NEXT: s_or_b32 s2, s6, s2
|
||||
; GFX11-GISEL-TRUE16-NEXT: s_addk_i32 s4, 0xfc10
|
||||
; GFX11-GISEL-TRUE16-NEXT: s_and_b32 s5, s5, 0xffe
|
||||
; GFX11-GISEL-TRUE16-NEXT: s_cmp_lg_u32 s2, 0
|
||||
; GFX11-GISEL-TRUE16-NEXT: s_cselect_b32 s2, 1, 0
|
||||
; GFX11-GISEL-TRUE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
|
||||
; GFX11-GISEL-TRUE16-NEXT: s_or_b32 s2, s5, s2
|
||||
; GFX11-GISEL-TRUE16-NEXT: s_cmp_lg_u32 s2, 0
|
||||
; GFX11-GISEL-TRUE16-NEXT: s_cselect_b32 s5, 1, 0
|
||||
; GFX11-GISEL-TRUE16-NEXT: s_sub_i32 s6, 1, s4
|
||||
; GFX11-GISEL-TRUE16-NEXT: s_or_b32 s8, s2, 0x1000
|
||||
; GFX11-GISEL-TRUE16-NEXT: s_max_i32 s6, s6, 0
|
||||
; GFX11-GISEL-TRUE16-NEXT: s_lshl_b32 s7, s4, 12
|
||||
; GFX11-GISEL-TRUE16-NEXT: s_min_i32 s6, s6, 13
|
||||
; GFX11-GISEL-TRUE16-NEXT: s_lshl_b32 s5, s5, 9
|
||||
; GFX11-GISEL-TRUE16-NEXT: s_lshr_b32 s9, s8, s6
|
||||
; GFX11-GISEL-TRUE16-NEXT: s_or_b32 s2, s2, s7
|
||||
; GFX11-GISEL-TRUE16-NEXT: s_lshl_b32 s6, s9, s6
|
||||
; GFX11-GISEL-TRUE16-NEXT: s_or_b32 s5, s5, 0x7c00
|
||||
; GFX11-GISEL-TRUE16-NEXT: s_cmp_lg_u32 s6, s8
|
||||
; GFX11-GISEL-TRUE16-NEXT: s_cselect_b32 s6, 1, 0
|
||||
; GFX11-GISEL-TRUE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_2) | instid1(SALU_CYCLE_1)
|
||||
; GFX11-GISEL-TRUE16-NEXT: s_or_b32 s6, s9, s6
|
||||
; GFX11-GISEL-TRUE16-NEXT: s_cmp_lt_i32 s4, 1
|
||||
; GFX11-GISEL-TRUE16-NEXT: s_cselect_b32 s2, s6, s2
|
||||
; GFX11-GISEL-TRUE16-NEXT: s_and_b32 s6, s2, 7
|
||||
; GFX11-GISEL-TRUE16-NEXT: s_lshr_b32 s2, s2, 2
|
||||
; GFX11-GISEL-TRUE16-NEXT: s_cmp_eq_u32 s6, 3
|
||||
; GFX11-GISEL-TRUE16-NEXT: s_cselect_b32 s7, 1, 0
|
||||
; GFX11-GISEL-TRUE16-NEXT: s_cmp_gt_i32 s6, 5
|
||||
; GFX11-GISEL-TRUE16-NEXT: s_cselect_b32 s6, 1, 0
|
||||
; GFX11-GISEL-TRUE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
|
||||
; GFX11-GISEL-TRUE16-NEXT: s_or_b32 s6, s7, s6
|
||||
; GFX11-GISEL-TRUE16-NEXT: s_add_i32 s2, s2, s6
|
||||
; GFX11-GISEL-TRUE16-NEXT: s_cmp_gt_i32 s4, 30
|
||||
; GFX11-GISEL-TRUE16-NEXT: s_cselect_b32 s2, 0x7c00, s2
|
||||
; GFX11-GISEL-TRUE16-NEXT: s_cmpk_eq_i32 s4, 0x40f
|
||||
; GFX11-GISEL-TRUE16-NEXT: s_cselect_b32 s2, s5, s2
|
||||
; GFX11-GISEL-TRUE16-NEXT: s_lshr_b32 s3, s3, 16
|
||||
; GFX11-GISEL-TRUE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
|
||||
; GFX11-GISEL-TRUE16-NEXT: s_and_b32 s3, s3, 0x8000
|
||||
; GFX11-GISEL-TRUE16-NEXT: s_or_b32 s2, s3, s2
|
||||
; GFX11-GISEL-TRUE16-NEXT: s_mov_b32 s3, 0x31016000
|
||||
; GFX11-GISEL-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
|
||||
; GFX11-GISEL-TRUE16-NEXT: v_cvt_f16_f32_e32 v0.l, v0
|
||||
; GFX11-GISEL-TRUE16-NEXT: v_mov_b32_e32 v0, s2
|
||||
; GFX11-GISEL-TRUE16-NEXT: s_mov_b32 s2, -1
|
||||
; GFX11-GISEL-TRUE16-NEXT: buffer_store_b16 v0, off, s[0:3], 0
|
||||
; GFX11-GISEL-TRUE16-NEXT: s_endpgm
|
||||
;
|
||||
@ -836,11 +1039,54 @@ define amdgpu_kernel void @fptrunc_f64_to_f16(
|
||||
; GFX11-GISEL-FAKE16-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX11-GISEL-FAKE16-NEXT: s_load_b64 s[2:3], s[2:3], 0x0
|
||||
; GFX11-GISEL-FAKE16-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX11-GISEL-FAKE16-NEXT: v_cvt_f32_f64_e32 v0, s[2:3]
|
||||
; GFX11-GISEL-FAKE16-NEXT: s_mov_b32 s2, -1
|
||||
; GFX11-GISEL-FAKE16-NEXT: s_and_b32 s6, s3, 0x1ff
|
||||
; GFX11-GISEL-FAKE16-NEXT: s_bfe_u32 s4, s3, 0xb0014
|
||||
; GFX11-GISEL-FAKE16-NEXT: s_lshr_b32 s5, s3, 8
|
||||
; GFX11-GISEL-FAKE16-NEXT: s_or_b32 s2, s6, s2
|
||||
; GFX11-GISEL-FAKE16-NEXT: s_addk_i32 s4, 0xfc10
|
||||
; GFX11-GISEL-FAKE16-NEXT: s_and_b32 s5, s5, 0xffe
|
||||
; GFX11-GISEL-FAKE16-NEXT: s_cmp_lg_u32 s2, 0
|
||||
; GFX11-GISEL-FAKE16-NEXT: s_cselect_b32 s2, 1, 0
|
||||
; GFX11-GISEL-FAKE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
|
||||
; GFX11-GISEL-FAKE16-NEXT: s_or_b32 s2, s5, s2
|
||||
; GFX11-GISEL-FAKE16-NEXT: s_cmp_lg_u32 s2, 0
|
||||
; GFX11-GISEL-FAKE16-NEXT: s_cselect_b32 s5, 1, 0
|
||||
; GFX11-GISEL-FAKE16-NEXT: s_sub_i32 s6, 1, s4
|
||||
; GFX11-GISEL-FAKE16-NEXT: s_or_b32 s8, s2, 0x1000
|
||||
; GFX11-GISEL-FAKE16-NEXT: s_max_i32 s6, s6, 0
|
||||
; GFX11-GISEL-FAKE16-NEXT: s_lshl_b32 s7, s4, 12
|
||||
; GFX11-GISEL-FAKE16-NEXT: s_min_i32 s6, s6, 13
|
||||
; GFX11-GISEL-FAKE16-NEXT: s_lshl_b32 s5, s5, 9
|
||||
; GFX11-GISEL-FAKE16-NEXT: s_lshr_b32 s9, s8, s6
|
||||
; GFX11-GISEL-FAKE16-NEXT: s_or_b32 s2, s2, s7
|
||||
; GFX11-GISEL-FAKE16-NEXT: s_lshl_b32 s6, s9, s6
|
||||
; GFX11-GISEL-FAKE16-NEXT: s_or_b32 s5, s5, 0x7c00
|
||||
; GFX11-GISEL-FAKE16-NEXT: s_cmp_lg_u32 s6, s8
|
||||
; GFX11-GISEL-FAKE16-NEXT: s_cselect_b32 s6, 1, 0
|
||||
; GFX11-GISEL-FAKE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_2) | instid1(SALU_CYCLE_1)
|
||||
; GFX11-GISEL-FAKE16-NEXT: s_or_b32 s6, s9, s6
|
||||
; GFX11-GISEL-FAKE16-NEXT: s_cmp_lt_i32 s4, 1
|
||||
; GFX11-GISEL-FAKE16-NEXT: s_cselect_b32 s2, s6, s2
|
||||
; GFX11-GISEL-FAKE16-NEXT: s_and_b32 s6, s2, 7
|
||||
; GFX11-GISEL-FAKE16-NEXT: s_lshr_b32 s2, s2, 2
|
||||
; GFX11-GISEL-FAKE16-NEXT: s_cmp_eq_u32 s6, 3
|
||||
; GFX11-GISEL-FAKE16-NEXT: s_cselect_b32 s7, 1, 0
|
||||
; GFX11-GISEL-FAKE16-NEXT: s_cmp_gt_i32 s6, 5
|
||||
; GFX11-GISEL-FAKE16-NEXT: s_cselect_b32 s6, 1, 0
|
||||
; GFX11-GISEL-FAKE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
|
||||
; GFX11-GISEL-FAKE16-NEXT: s_or_b32 s6, s7, s6
|
||||
; GFX11-GISEL-FAKE16-NEXT: s_add_i32 s2, s2, s6
|
||||
; GFX11-GISEL-FAKE16-NEXT: s_cmp_gt_i32 s4, 30
|
||||
; GFX11-GISEL-FAKE16-NEXT: s_cselect_b32 s2, 0x7c00, s2
|
||||
; GFX11-GISEL-FAKE16-NEXT: s_cmpk_eq_i32 s4, 0x40f
|
||||
; GFX11-GISEL-FAKE16-NEXT: s_cselect_b32 s2, s5, s2
|
||||
; GFX11-GISEL-FAKE16-NEXT: s_lshr_b32 s3, s3, 16
|
||||
; GFX11-GISEL-FAKE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
|
||||
; GFX11-GISEL-FAKE16-NEXT: s_and_b32 s3, s3, 0x8000
|
||||
; GFX11-GISEL-FAKE16-NEXT: s_or_b32 s2, s3, s2
|
||||
; GFX11-GISEL-FAKE16-NEXT: s_mov_b32 s3, 0x31016000
|
||||
; GFX11-GISEL-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
|
||||
; GFX11-GISEL-FAKE16-NEXT: v_cvt_f16_f32_e32 v0, v0
|
||||
; GFX11-GISEL-FAKE16-NEXT: v_mov_b32_e32 v0, s2
|
||||
; GFX11-GISEL-FAKE16-NEXT: s_mov_b32 s2, -1
|
||||
; GFX11-GISEL-FAKE16-NEXT: buffer_store_b16 v0, off, s[0:3], 0
|
||||
; GFX11-GISEL-FAKE16-NEXT: s_endpgm
|
||||
ptr addrspace(1) %r,
|
||||
@ -1644,13 +1890,94 @@ define amdgpu_kernel void @fptrunc_v2f64_to_v2f16(
|
||||
; SI-GISEL-NEXT: s_load_dwordx4 s[4:7], s[2:3], 0x0
|
||||
; SI-GISEL-NEXT: s_mov_b32 s2, -1
|
||||
; SI-GISEL-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; SI-GISEL-NEXT: v_cvt_f32_f64_e32 v0, s[4:5]
|
||||
; SI-GISEL-NEXT: v_cvt_f32_f64_e32 v1, s[6:7]
|
||||
; SI-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0
|
||||
; SI-GISEL-NEXT: v_cvt_f16_f32_e32 v1, v1
|
||||
; SI-GISEL-NEXT: v_lshlrev_b32_e32 v1, 16, v1
|
||||
; SI-GISEL-NEXT: v_or_b32_e32 v0, v0, v1
|
||||
; SI-GISEL-NEXT: s_bfe_u32 s3, s5, 0xb0014
|
||||
; SI-GISEL-NEXT: s_lshr_b32 s8, s5, 8
|
||||
; SI-GISEL-NEXT: s_and_b32 s9, s5, 0x1ff
|
||||
; SI-GISEL-NEXT: s_addk_i32 s3, 0xfc10
|
||||
; SI-GISEL-NEXT: s_and_b32 s8, s8, 0xffe
|
||||
; SI-GISEL-NEXT: s_or_b32 s4, s9, s4
|
||||
; SI-GISEL-NEXT: s_cmp_lg_u32 s4, 0
|
||||
; SI-GISEL-NEXT: s_cselect_b32 s4, 1, 0
|
||||
; SI-GISEL-NEXT: s_or_b32 s4, s8, s4
|
||||
; SI-GISEL-NEXT: s_cmp_lg_u32 s4, 0
|
||||
; SI-GISEL-NEXT: s_cselect_b32 s8, 1, 0
|
||||
; SI-GISEL-NEXT: s_lshl_b32 s8, s8, 9
|
||||
; SI-GISEL-NEXT: s_lshl_b32 s9, s3, 12
|
||||
; SI-GISEL-NEXT: s_sub_i32 s10, 1, s3
|
||||
; SI-GISEL-NEXT: s_or_b32 s11, s4, 0x1000
|
||||
; SI-GISEL-NEXT: s_or_b32 s8, s8, 0x7c00
|
||||
; SI-GISEL-NEXT: s_or_b32 s4, s4, s9
|
||||
; SI-GISEL-NEXT: s_max_i32 s9, s10, 0
|
||||
; SI-GISEL-NEXT: s_min_i32 s9, s9, 13
|
||||
; SI-GISEL-NEXT: s_lshr_b32 s10, s11, s9
|
||||
; SI-GISEL-NEXT: s_lshl_b32 s9, s10, s9
|
||||
; SI-GISEL-NEXT: s_cmp_lg_u32 s9, s11
|
||||
; SI-GISEL-NEXT: s_cselect_b32 s9, 1, 0
|
||||
; SI-GISEL-NEXT: s_or_b32 s9, s10, s9
|
||||
; SI-GISEL-NEXT: s_cmp_lt_i32 s3, 1
|
||||
; SI-GISEL-NEXT: s_cselect_b32 s4, s9, s4
|
||||
; SI-GISEL-NEXT: s_and_b32 s9, s4, 7
|
||||
; SI-GISEL-NEXT: s_lshr_b32 s4, s4, 2
|
||||
; SI-GISEL-NEXT: s_cmp_eq_u32 s9, 3
|
||||
; SI-GISEL-NEXT: s_cselect_b32 s10, 1, 0
|
||||
; SI-GISEL-NEXT: s_cmp_gt_i32 s9, 5
|
||||
; SI-GISEL-NEXT: s_cselect_b32 s9, 1, 0
|
||||
; SI-GISEL-NEXT: s_or_b32 s9, s10, s9
|
||||
; SI-GISEL-NEXT: s_add_i32 s4, s4, s9
|
||||
; SI-GISEL-NEXT: s_cmp_gt_i32 s3, 30
|
||||
; SI-GISEL-NEXT: s_cselect_b32 s4, 0x7c00, s4
|
||||
; SI-GISEL-NEXT: s_cmpk_eq_i32 s3, 0x40f
|
||||
; SI-GISEL-NEXT: s_cselect_b32 s3, s8, s4
|
||||
; SI-GISEL-NEXT: s_lshr_b32 s4, s5, 16
|
||||
; SI-GISEL-NEXT: s_bfe_u32 s5, s7, 0xb0014
|
||||
; SI-GISEL-NEXT: s_lshr_b32 s8, s7, 8
|
||||
; SI-GISEL-NEXT: s_and_b32 s9, s7, 0x1ff
|
||||
; SI-GISEL-NEXT: s_and_b32 s4, s4, 0x8000
|
||||
; SI-GISEL-NEXT: s_addk_i32 s5, 0xfc10
|
||||
; SI-GISEL-NEXT: s_and_b32 s8, s8, 0xffe
|
||||
; SI-GISEL-NEXT: s_or_b32 s6, s9, s6
|
||||
; SI-GISEL-NEXT: s_or_b32 s3, s4, s3
|
||||
; SI-GISEL-NEXT: s_cmp_lg_u32 s6, 0
|
||||
; SI-GISEL-NEXT: s_cselect_b32 s4, 1, 0
|
||||
; SI-GISEL-NEXT: s_or_b32 s4, s8, s4
|
||||
; SI-GISEL-NEXT: s_cmp_lg_u32 s4, 0
|
||||
; SI-GISEL-NEXT: s_cselect_b32 s6, 1, 0
|
||||
; SI-GISEL-NEXT: s_lshl_b32 s6, s6, 9
|
||||
; SI-GISEL-NEXT: s_lshl_b32 s8, s5, 12
|
||||
; SI-GISEL-NEXT: s_sub_i32 s9, 1, s5
|
||||
; SI-GISEL-NEXT: s_or_b32 s10, s4, 0x1000
|
||||
; SI-GISEL-NEXT: s_or_b32 s6, s6, 0x7c00
|
||||
; SI-GISEL-NEXT: s_or_b32 s4, s4, s8
|
||||
; SI-GISEL-NEXT: s_max_i32 s8, s9, 0
|
||||
; SI-GISEL-NEXT: s_min_i32 s8, s8, 13
|
||||
; SI-GISEL-NEXT: s_lshr_b32 s9, s10, s8
|
||||
; SI-GISEL-NEXT: s_lshl_b32 s8, s9, s8
|
||||
; SI-GISEL-NEXT: s_cmp_lg_u32 s8, s10
|
||||
; SI-GISEL-NEXT: s_cselect_b32 s8, 1, 0
|
||||
; SI-GISEL-NEXT: s_or_b32 s8, s9, s8
|
||||
; SI-GISEL-NEXT: s_cmp_lt_i32 s5, 1
|
||||
; SI-GISEL-NEXT: s_cselect_b32 s4, s8, s4
|
||||
; SI-GISEL-NEXT: s_and_b32 s8, s4, 7
|
||||
; SI-GISEL-NEXT: s_lshr_b32 s4, s4, 2
|
||||
; SI-GISEL-NEXT: s_cmp_eq_u32 s8, 3
|
||||
; SI-GISEL-NEXT: s_cselect_b32 s9, 1, 0
|
||||
; SI-GISEL-NEXT: s_cmp_gt_i32 s8, 5
|
||||
; SI-GISEL-NEXT: s_cselect_b32 s8, 1, 0
|
||||
; SI-GISEL-NEXT: s_or_b32 s8, s9, s8
|
||||
; SI-GISEL-NEXT: s_add_i32 s4, s4, s8
|
||||
; SI-GISEL-NEXT: s_cmp_gt_i32 s5, 30
|
||||
; SI-GISEL-NEXT: s_cselect_b32 s4, 0x7c00, s4
|
||||
; SI-GISEL-NEXT: s_cmpk_eq_i32 s5, 0x40f
|
||||
; SI-GISEL-NEXT: s_cselect_b32 s4, s6, s4
|
||||
; SI-GISEL-NEXT: s_lshr_b32 s5, s7, 16
|
||||
; SI-GISEL-NEXT: s_and_b32 s3, s3, 0xffff
|
||||
; SI-GISEL-NEXT: s_and_b32 s5, s5, 0x8000
|
||||
; SI-GISEL-NEXT: s_or_b32 s4, s5, s4
|
||||
; SI-GISEL-NEXT: s_and_b32 s4, s4, 0xffff
|
||||
; SI-GISEL-NEXT: s_lshl_b32 s4, s4, 16
|
||||
; SI-GISEL-NEXT: s_or_b32 s4, s3, s4
|
||||
; SI-GISEL-NEXT: s_mov_b32 s3, 0xf000
|
||||
; SI-GISEL-NEXT: v_mov_b32_e32 v0, s4
|
||||
; SI-GISEL-NEXT: buffer_store_dword v0, off, s[0:3], 0
|
||||
; SI-GISEL-NEXT: s_endpgm
|
||||
;
|
||||
@ -1763,14 +2090,96 @@ define amdgpu_kernel void @fptrunc_v2f64_to_v2f16(
|
||||
; VI-GISEL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
|
||||
; VI-GISEL-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; VI-GISEL-NEXT: s_load_dwordx4 s[4:7], s[2:3], 0x0
|
||||
; VI-GISEL-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; VI-GISEL-NEXT: s_bfe_u32 s2, s5, 0xb0014
|
||||
; VI-GISEL-NEXT: s_lshr_b32 s3, s5, 8
|
||||
; VI-GISEL-NEXT: s_and_b32 s8, s5, 0x1ff
|
||||
; VI-GISEL-NEXT: s_addk_i32 s2, 0xfc10
|
||||
; VI-GISEL-NEXT: s_and_b32 s3, s3, 0xffe
|
||||
; VI-GISEL-NEXT: s_or_b32 s4, s8, s4
|
||||
; VI-GISEL-NEXT: s_cmp_lg_u32 s4, 0
|
||||
; VI-GISEL-NEXT: s_cselect_b32 s4, 1, 0
|
||||
; VI-GISEL-NEXT: s_or_b32 s3, s3, s4
|
||||
; VI-GISEL-NEXT: s_cmp_lg_u32 s3, 0
|
||||
; VI-GISEL-NEXT: s_cselect_b32 s4, 1, 0
|
||||
; VI-GISEL-NEXT: s_sub_i32 s9, 1, s2
|
||||
; VI-GISEL-NEXT: s_lshl_b32 s8, s2, 12
|
||||
; VI-GISEL-NEXT: s_max_i32 s9, s9, 0
|
||||
; VI-GISEL-NEXT: s_or_b32 s8, s3, s8
|
||||
; VI-GISEL-NEXT: s_min_i32 s9, s9, 13
|
||||
; VI-GISEL-NEXT: s_bitset1_b32 s3, 12
|
||||
; VI-GISEL-NEXT: s_lshl_b32 s4, s4, 9
|
||||
; VI-GISEL-NEXT: s_lshr_b32 s10, s3, s9
|
||||
; VI-GISEL-NEXT: s_or_b32 s4, s4, 0x7c00
|
||||
; VI-GISEL-NEXT: s_lshl_b32 s9, s10, s9
|
||||
; VI-GISEL-NEXT: s_cmp_lg_u32 s9, s3
|
||||
; VI-GISEL-NEXT: s_cselect_b32 s3, 1, 0
|
||||
; VI-GISEL-NEXT: s_or_b32 s3, s10, s3
|
||||
; VI-GISEL-NEXT: s_cmp_lt_i32 s2, 1
|
||||
; VI-GISEL-NEXT: s_cselect_b32 s3, s3, s8
|
||||
; VI-GISEL-NEXT: s_and_b32 s8, s3, 7
|
||||
; VI-GISEL-NEXT: s_lshr_b32 s3, s3, 2
|
||||
; VI-GISEL-NEXT: s_cmp_eq_u32 s8, 3
|
||||
; VI-GISEL-NEXT: s_cselect_b32 s9, 1, 0
|
||||
; VI-GISEL-NEXT: s_cmp_gt_i32 s8, 5
|
||||
; VI-GISEL-NEXT: s_cselect_b32 s8, 1, 0
|
||||
; VI-GISEL-NEXT: s_or_b32 s8, s9, s8
|
||||
; VI-GISEL-NEXT: s_add_i32 s3, s3, s8
|
||||
; VI-GISEL-NEXT: s_cmp_gt_i32 s2, 30
|
||||
; VI-GISEL-NEXT: s_cselect_b32 s3, 0x7c00, s3
|
||||
; VI-GISEL-NEXT: s_cmpk_eq_i32 s2, 0x40f
|
||||
; VI-GISEL-NEXT: s_cselect_b32 s2, s4, s3
|
||||
; VI-GISEL-NEXT: s_lshr_b32 s3, s5, 16
|
||||
; VI-GISEL-NEXT: s_and_b32 s3, s3, 0x8000
|
||||
; VI-GISEL-NEXT: s_or_b32 s2, s3, s2
|
||||
; VI-GISEL-NEXT: s_bfe_u32 s3, s7, 0xb0014
|
||||
; VI-GISEL-NEXT: s_lshr_b32 s4, s7, 8
|
||||
; VI-GISEL-NEXT: s_and_b32 s5, s7, 0x1ff
|
||||
; VI-GISEL-NEXT: s_addk_i32 s3, 0xfc10
|
||||
; VI-GISEL-NEXT: s_and_b32 s4, s4, 0xffe
|
||||
; VI-GISEL-NEXT: s_or_b32 s5, s5, s6
|
||||
; VI-GISEL-NEXT: s_cmp_lg_u32 s5, 0
|
||||
; VI-GISEL-NEXT: s_cselect_b32 s5, 1, 0
|
||||
; VI-GISEL-NEXT: s_or_b32 s4, s4, s5
|
||||
; VI-GISEL-NEXT: s_cmp_lg_u32 s4, 0
|
||||
; VI-GISEL-NEXT: s_cselect_b32 s5, 1, 0
|
||||
; VI-GISEL-NEXT: s_sub_i32 s8, 1, s3
|
||||
; VI-GISEL-NEXT: s_lshl_b32 s6, s3, 12
|
||||
; VI-GISEL-NEXT: s_max_i32 s8, s8, 0
|
||||
; VI-GISEL-NEXT: s_or_b32 s6, s4, s6
|
||||
; VI-GISEL-NEXT: s_min_i32 s8, s8, 13
|
||||
; VI-GISEL-NEXT: s_bitset1_b32 s4, 12
|
||||
; VI-GISEL-NEXT: s_lshl_b32 s5, s5, 9
|
||||
; VI-GISEL-NEXT: s_lshr_b32 s9, s4, s8
|
||||
; VI-GISEL-NEXT: s_or_b32 s5, s5, 0x7c00
|
||||
; VI-GISEL-NEXT: s_lshl_b32 s8, s9, s8
|
||||
; VI-GISEL-NEXT: s_cmp_lg_u32 s8, s4
|
||||
; VI-GISEL-NEXT: s_cselect_b32 s4, 1, 0
|
||||
; VI-GISEL-NEXT: s_or_b32 s4, s9, s4
|
||||
; VI-GISEL-NEXT: s_cmp_lt_i32 s3, 1
|
||||
; VI-GISEL-NEXT: s_cselect_b32 s4, s4, s6
|
||||
; VI-GISEL-NEXT: s_and_b32 s6, s4, 7
|
||||
; VI-GISEL-NEXT: s_lshr_b32 s4, s4, 2
|
||||
; VI-GISEL-NEXT: s_cmp_eq_u32 s6, 3
|
||||
; VI-GISEL-NEXT: s_cselect_b32 s8, 1, 0
|
||||
; VI-GISEL-NEXT: s_cmp_gt_i32 s6, 5
|
||||
; VI-GISEL-NEXT: s_cselect_b32 s6, 1, 0
|
||||
; VI-GISEL-NEXT: s_or_b32 s6, s8, s6
|
||||
; VI-GISEL-NEXT: s_add_i32 s4, s4, s6
|
||||
; VI-GISEL-NEXT: s_cmp_gt_i32 s3, 30
|
||||
; VI-GISEL-NEXT: s_cselect_b32 s4, 0x7c00, s4
|
||||
; VI-GISEL-NEXT: s_cmpk_eq_i32 s3, 0x40f
|
||||
; VI-GISEL-NEXT: s_cselect_b32 s3, s5, s4
|
||||
; VI-GISEL-NEXT: s_lshr_b32 s4, s7, 16
|
||||
; VI-GISEL-NEXT: s_and_b32 s4, s4, 0x8000
|
||||
; VI-GISEL-NEXT: s_or_b32 s3, s4, s3
|
||||
; VI-GISEL-NEXT: s_and_b32 s3, s3, 0xffff
|
||||
; VI-GISEL-NEXT: s_and_b32 s2, s2, 0xffff
|
||||
; VI-GISEL-NEXT: s_lshl_b32 s3, s3, 16
|
||||
; VI-GISEL-NEXT: s_or_b32 s2, s2, s3
|
||||
; VI-GISEL-NEXT: v_mov_b32_e32 v0, s2
|
||||
; VI-GISEL-NEXT: s_mov_b32 s2, -1
|
||||
; VI-GISEL-NEXT: s_mov_b32 s3, 0xf000
|
||||
; VI-GISEL-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; VI-GISEL-NEXT: v_cvt_f32_f64_e32 v0, s[4:5]
|
||||
; VI-GISEL-NEXT: v_cvt_f32_f64_e32 v1, s[6:7]
|
||||
; VI-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0
|
||||
; VI-GISEL-NEXT: v_cvt_f16_f32_sdwa v1, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD
|
||||
; VI-GISEL-NEXT: v_or_b32_e32 v0, v0, v1
|
||||
; VI-GISEL-NEXT: buffer_store_dword v0, off, s[0:3], 0
|
||||
; VI-GISEL-NEXT: s_endpgm
|
||||
;
|
||||
@ -1881,14 +2290,93 @@ define amdgpu_kernel void @fptrunc_v2f64_to_v2f16(
|
||||
; GFX9-GISEL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
|
||||
; GFX9-GISEL-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX9-GISEL-NEXT: s_load_dwordx4 s[4:7], s[2:3], 0x0
|
||||
; GFX9-GISEL-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX9-GISEL-NEXT: s_bfe_u32 s2, s5, 0xb0014
|
||||
; GFX9-GISEL-NEXT: s_lshr_b32 s3, s5, 8
|
||||
; GFX9-GISEL-NEXT: s_and_b32 s8, s5, 0x1ff
|
||||
; GFX9-GISEL-NEXT: s_addk_i32 s2, 0xfc10
|
||||
; GFX9-GISEL-NEXT: s_and_b32 s3, s3, 0xffe
|
||||
; GFX9-GISEL-NEXT: s_or_b32 s4, s8, s4
|
||||
; GFX9-GISEL-NEXT: s_cmp_lg_u32 s4, 0
|
||||
; GFX9-GISEL-NEXT: s_cselect_b32 s4, 1, 0
|
||||
; GFX9-GISEL-NEXT: s_or_b32 s3, s3, s4
|
||||
; GFX9-GISEL-NEXT: s_cmp_lg_u32 s3, 0
|
||||
; GFX9-GISEL-NEXT: s_cselect_b32 s4, 1, 0
|
||||
; GFX9-GISEL-NEXT: s_sub_i32 s9, 1, s2
|
||||
; GFX9-GISEL-NEXT: s_lshl_b32 s8, s2, 12
|
||||
; GFX9-GISEL-NEXT: s_max_i32 s9, s9, 0
|
||||
; GFX9-GISEL-NEXT: s_or_b32 s8, s3, s8
|
||||
; GFX9-GISEL-NEXT: s_min_i32 s9, s9, 13
|
||||
; GFX9-GISEL-NEXT: s_bitset1_b32 s3, 12
|
||||
; GFX9-GISEL-NEXT: s_lshl_b32 s4, s4, 9
|
||||
; GFX9-GISEL-NEXT: s_lshr_b32 s10, s3, s9
|
||||
; GFX9-GISEL-NEXT: s_or_b32 s4, s4, 0x7c00
|
||||
; GFX9-GISEL-NEXT: s_lshl_b32 s9, s10, s9
|
||||
; GFX9-GISEL-NEXT: s_cmp_lg_u32 s9, s3
|
||||
; GFX9-GISEL-NEXT: s_cselect_b32 s3, 1, 0
|
||||
; GFX9-GISEL-NEXT: s_or_b32 s3, s10, s3
|
||||
; GFX9-GISEL-NEXT: s_cmp_lt_i32 s2, 1
|
||||
; GFX9-GISEL-NEXT: s_cselect_b32 s3, s3, s8
|
||||
; GFX9-GISEL-NEXT: s_and_b32 s8, s3, 7
|
||||
; GFX9-GISEL-NEXT: s_lshr_b32 s3, s3, 2
|
||||
; GFX9-GISEL-NEXT: s_cmp_eq_u32 s8, 3
|
||||
; GFX9-GISEL-NEXT: s_cselect_b32 s9, 1, 0
|
||||
; GFX9-GISEL-NEXT: s_cmp_gt_i32 s8, 5
|
||||
; GFX9-GISEL-NEXT: s_cselect_b32 s8, 1, 0
|
||||
; GFX9-GISEL-NEXT: s_or_b32 s8, s9, s8
|
||||
; GFX9-GISEL-NEXT: s_add_i32 s3, s3, s8
|
||||
; GFX9-GISEL-NEXT: s_cmp_gt_i32 s2, 30
|
||||
; GFX9-GISEL-NEXT: s_cselect_b32 s3, 0x7c00, s3
|
||||
; GFX9-GISEL-NEXT: s_cmpk_eq_i32 s2, 0x40f
|
||||
; GFX9-GISEL-NEXT: s_cselect_b32 s2, s4, s3
|
||||
; GFX9-GISEL-NEXT: s_lshr_b32 s3, s5, 16
|
||||
; GFX9-GISEL-NEXT: s_and_b32 s3, s3, 0x8000
|
||||
; GFX9-GISEL-NEXT: s_or_b32 s2, s3, s2
|
||||
; GFX9-GISEL-NEXT: s_bfe_u32 s3, s7, 0xb0014
|
||||
; GFX9-GISEL-NEXT: s_lshr_b32 s4, s7, 8
|
||||
; GFX9-GISEL-NEXT: s_and_b32 s5, s7, 0x1ff
|
||||
; GFX9-GISEL-NEXT: s_addk_i32 s3, 0xfc10
|
||||
; GFX9-GISEL-NEXT: s_and_b32 s4, s4, 0xffe
|
||||
; GFX9-GISEL-NEXT: s_or_b32 s5, s5, s6
|
||||
; GFX9-GISEL-NEXT: s_cmp_lg_u32 s5, 0
|
||||
; GFX9-GISEL-NEXT: s_cselect_b32 s5, 1, 0
|
||||
; GFX9-GISEL-NEXT: s_or_b32 s4, s4, s5
|
||||
; GFX9-GISEL-NEXT: s_cmp_lg_u32 s4, 0
|
||||
; GFX9-GISEL-NEXT: s_cselect_b32 s5, 1, 0
|
||||
; GFX9-GISEL-NEXT: s_sub_i32 s8, 1, s3
|
||||
; GFX9-GISEL-NEXT: s_lshl_b32 s6, s3, 12
|
||||
; GFX9-GISEL-NEXT: s_max_i32 s8, s8, 0
|
||||
; GFX9-GISEL-NEXT: s_or_b32 s6, s4, s6
|
||||
; GFX9-GISEL-NEXT: s_min_i32 s8, s8, 13
|
||||
; GFX9-GISEL-NEXT: s_bitset1_b32 s4, 12
|
||||
; GFX9-GISEL-NEXT: s_lshl_b32 s5, s5, 9
|
||||
; GFX9-GISEL-NEXT: s_lshr_b32 s9, s4, s8
|
||||
; GFX9-GISEL-NEXT: s_or_b32 s5, s5, 0x7c00
|
||||
; GFX9-GISEL-NEXT: s_lshl_b32 s8, s9, s8
|
||||
; GFX9-GISEL-NEXT: s_cmp_lg_u32 s8, s4
|
||||
; GFX9-GISEL-NEXT: s_cselect_b32 s4, 1, 0
|
||||
; GFX9-GISEL-NEXT: s_or_b32 s4, s9, s4
|
||||
; GFX9-GISEL-NEXT: s_cmp_lt_i32 s3, 1
|
||||
; GFX9-GISEL-NEXT: s_cselect_b32 s4, s4, s6
|
||||
; GFX9-GISEL-NEXT: s_and_b32 s6, s4, 7
|
||||
; GFX9-GISEL-NEXT: s_lshr_b32 s4, s4, 2
|
||||
; GFX9-GISEL-NEXT: s_cmp_eq_u32 s6, 3
|
||||
; GFX9-GISEL-NEXT: s_cselect_b32 s8, 1, 0
|
||||
; GFX9-GISEL-NEXT: s_cmp_gt_i32 s6, 5
|
||||
; GFX9-GISEL-NEXT: s_cselect_b32 s6, 1, 0
|
||||
; GFX9-GISEL-NEXT: s_or_b32 s6, s8, s6
|
||||
; GFX9-GISEL-NEXT: s_add_i32 s4, s4, s6
|
||||
; GFX9-GISEL-NEXT: s_cmp_gt_i32 s3, 30
|
||||
; GFX9-GISEL-NEXT: s_cselect_b32 s4, 0x7c00, s4
|
||||
; GFX9-GISEL-NEXT: s_cmpk_eq_i32 s3, 0x40f
|
||||
; GFX9-GISEL-NEXT: s_cselect_b32 s3, s5, s4
|
||||
; GFX9-GISEL-NEXT: s_lshr_b32 s4, s7, 16
|
||||
; GFX9-GISEL-NEXT: s_and_b32 s4, s4, 0x8000
|
||||
; GFX9-GISEL-NEXT: s_or_b32 s3, s4, s3
|
||||
; GFX9-GISEL-NEXT: s_pack_ll_b32_b16 s2, s2, s3
|
||||
; GFX9-GISEL-NEXT: v_mov_b32_e32 v0, s2
|
||||
; GFX9-GISEL-NEXT: s_mov_b32 s2, -1
|
||||
; GFX9-GISEL-NEXT: s_mov_b32 s3, 0xf000
|
||||
; GFX9-GISEL-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX9-GISEL-NEXT: v_cvt_f32_f64_e32 v0, s[4:5]
|
||||
; GFX9-GISEL-NEXT: v_cvt_f32_f64_e32 v1, s[6:7]
|
||||
; GFX9-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0
|
||||
; GFX9-GISEL-NEXT: v_cvt_f16_f32_e32 v1, v1
|
||||
; GFX9-GISEL-NEXT: v_pack_b32_f16 v0, v0, v1
|
||||
; GFX9-GISEL-NEXT: buffer_store_dword v0, off, s[0:3], 0
|
||||
; GFX9-GISEL-NEXT: s_endpgm
|
||||
;
|
||||
@ -1999,14 +2487,93 @@ define amdgpu_kernel void @fptrunc_v2f64_to_v2f16(
|
||||
; GFX950-GISEL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
|
||||
; GFX950-GISEL-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX950-GISEL-NEXT: s_load_dwordx4 s[4:7], s[2:3], 0x0
|
||||
; GFX950-GISEL-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX950-GISEL-NEXT: s_bfe_u32 s2, s5, 0xb0014
|
||||
; GFX950-GISEL-NEXT: s_lshr_b32 s3, s5, 8
|
||||
; GFX950-GISEL-NEXT: s_and_b32 s8, s5, 0x1ff
|
||||
; GFX950-GISEL-NEXT: s_addk_i32 s2, 0xfc10
|
||||
; GFX950-GISEL-NEXT: s_and_b32 s3, s3, 0xffe
|
||||
; GFX950-GISEL-NEXT: s_or_b32 s4, s8, s4
|
||||
; GFX950-GISEL-NEXT: s_cmp_lg_u32 s4, 0
|
||||
; GFX950-GISEL-NEXT: s_cselect_b32 s4, 1, 0
|
||||
; GFX950-GISEL-NEXT: s_or_b32 s3, s3, s4
|
||||
; GFX950-GISEL-NEXT: s_cmp_lg_u32 s3, 0
|
||||
; GFX950-GISEL-NEXT: s_cselect_b32 s4, 1, 0
|
||||
; GFX950-GISEL-NEXT: s_sub_i32 s9, 1, s2
|
||||
; GFX950-GISEL-NEXT: s_lshl_b32 s8, s2, 12
|
||||
; GFX950-GISEL-NEXT: s_max_i32 s9, s9, 0
|
||||
; GFX950-GISEL-NEXT: s_or_b32 s8, s3, s8
|
||||
; GFX950-GISEL-NEXT: s_min_i32 s9, s9, 13
|
||||
; GFX950-GISEL-NEXT: s_bitset1_b32 s3, 12
|
||||
; GFX950-GISEL-NEXT: s_lshl_b32 s4, s4, 9
|
||||
; GFX950-GISEL-NEXT: s_lshr_b32 s10, s3, s9
|
||||
; GFX950-GISEL-NEXT: s_or_b32 s4, s4, 0x7c00
|
||||
; GFX950-GISEL-NEXT: s_lshl_b32 s9, s10, s9
|
||||
; GFX950-GISEL-NEXT: s_cmp_lg_u32 s9, s3
|
||||
; GFX950-GISEL-NEXT: s_cselect_b32 s3, 1, 0
|
||||
; GFX950-GISEL-NEXT: s_or_b32 s3, s10, s3
|
||||
; GFX950-GISEL-NEXT: s_cmp_lt_i32 s2, 1
|
||||
; GFX950-GISEL-NEXT: s_cselect_b32 s3, s3, s8
|
||||
; GFX950-GISEL-NEXT: s_and_b32 s8, s3, 7
|
||||
; GFX950-GISEL-NEXT: s_lshr_b32 s3, s3, 2
|
||||
; GFX950-GISEL-NEXT: s_cmp_eq_u32 s8, 3
|
||||
; GFX950-GISEL-NEXT: s_cselect_b32 s9, 1, 0
|
||||
; GFX950-GISEL-NEXT: s_cmp_gt_i32 s8, 5
|
||||
; GFX950-GISEL-NEXT: s_cselect_b32 s8, 1, 0
|
||||
; GFX950-GISEL-NEXT: s_or_b32 s8, s9, s8
|
||||
; GFX950-GISEL-NEXT: s_add_i32 s3, s3, s8
|
||||
; GFX950-GISEL-NEXT: s_cmp_gt_i32 s2, 30
|
||||
; GFX950-GISEL-NEXT: s_cselect_b32 s3, 0x7c00, s3
|
||||
; GFX950-GISEL-NEXT: s_cmpk_eq_i32 s2, 0x40f
|
||||
; GFX950-GISEL-NEXT: s_cselect_b32 s2, s4, s3
|
||||
; GFX950-GISEL-NEXT: s_lshr_b32 s3, s5, 16
|
||||
; GFX950-GISEL-NEXT: s_and_b32 s3, s3, 0x8000
|
||||
; GFX950-GISEL-NEXT: s_or_b32 s2, s3, s2
|
||||
; GFX950-GISEL-NEXT: s_bfe_u32 s3, s7, 0xb0014
|
||||
; GFX950-GISEL-NEXT: s_lshr_b32 s4, s7, 8
|
||||
; GFX950-GISEL-NEXT: s_and_b32 s5, s7, 0x1ff
|
||||
; GFX950-GISEL-NEXT: s_addk_i32 s3, 0xfc10
|
||||
; GFX950-GISEL-NEXT: s_and_b32 s4, s4, 0xffe
|
||||
; GFX950-GISEL-NEXT: s_or_b32 s5, s5, s6
|
||||
; GFX950-GISEL-NEXT: s_cmp_lg_u32 s5, 0
|
||||
; GFX950-GISEL-NEXT: s_cselect_b32 s5, 1, 0
|
||||
; GFX950-GISEL-NEXT: s_or_b32 s4, s4, s5
|
||||
; GFX950-GISEL-NEXT: s_cmp_lg_u32 s4, 0
|
||||
; GFX950-GISEL-NEXT: s_cselect_b32 s5, 1, 0
|
||||
; GFX950-GISEL-NEXT: s_sub_i32 s8, 1, s3
|
||||
; GFX950-GISEL-NEXT: s_lshl_b32 s6, s3, 12
|
||||
; GFX950-GISEL-NEXT: s_max_i32 s8, s8, 0
|
||||
; GFX950-GISEL-NEXT: s_or_b32 s6, s4, s6
|
||||
; GFX950-GISEL-NEXT: s_min_i32 s8, s8, 13
|
||||
; GFX950-GISEL-NEXT: s_bitset1_b32 s4, 12
|
||||
; GFX950-GISEL-NEXT: s_lshl_b32 s5, s5, 9
|
||||
; GFX950-GISEL-NEXT: s_lshr_b32 s9, s4, s8
|
||||
; GFX950-GISEL-NEXT: s_or_b32 s5, s5, 0x7c00
|
||||
; GFX950-GISEL-NEXT: s_lshl_b32 s8, s9, s8
|
||||
; GFX950-GISEL-NEXT: s_cmp_lg_u32 s8, s4
|
||||
; GFX950-GISEL-NEXT: s_cselect_b32 s4, 1, 0
|
||||
; GFX950-GISEL-NEXT: s_or_b32 s4, s9, s4
|
||||
; GFX950-GISEL-NEXT: s_cmp_lt_i32 s3, 1
|
||||
; GFX950-GISEL-NEXT: s_cselect_b32 s4, s4, s6
|
||||
; GFX950-GISEL-NEXT: s_and_b32 s6, s4, 7
|
||||
; GFX950-GISEL-NEXT: s_lshr_b32 s4, s4, 2
|
||||
; GFX950-GISEL-NEXT: s_cmp_eq_u32 s6, 3
|
||||
; GFX950-GISEL-NEXT: s_cselect_b32 s8, 1, 0
|
||||
; GFX950-GISEL-NEXT: s_cmp_gt_i32 s6, 5
|
||||
; GFX950-GISEL-NEXT: s_cselect_b32 s6, 1, 0
|
||||
; GFX950-GISEL-NEXT: s_or_b32 s6, s8, s6
|
||||
; GFX950-GISEL-NEXT: s_add_i32 s4, s4, s6
|
||||
; GFX950-GISEL-NEXT: s_cmp_gt_i32 s3, 30
|
||||
; GFX950-GISEL-NEXT: s_cselect_b32 s4, 0x7c00, s4
|
||||
; GFX950-GISEL-NEXT: s_cmpk_eq_i32 s3, 0x40f
|
||||
; GFX950-GISEL-NEXT: s_cselect_b32 s3, s5, s4
|
||||
; GFX950-GISEL-NEXT: s_lshr_b32 s4, s7, 16
|
||||
; GFX950-GISEL-NEXT: s_and_b32 s4, s4, 0x8000
|
||||
; GFX950-GISEL-NEXT: s_or_b32 s3, s4, s3
|
||||
; GFX950-GISEL-NEXT: s_pack_ll_b32_b16 s2, s2, s3
|
||||
; GFX950-GISEL-NEXT: v_mov_b32_e32 v0, s2
|
||||
; GFX950-GISEL-NEXT: s_mov_b32 s2, -1
|
||||
; GFX950-GISEL-NEXT: s_mov_b32 s3, 0xf000
|
||||
; GFX950-GISEL-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX950-GISEL-NEXT: v_cvt_f32_f64_e32 v0, s[4:5]
|
||||
; GFX950-GISEL-NEXT: v_cvt_f32_f64_e32 v1, s[6:7]
|
||||
; GFX950-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0
|
||||
; GFX950-GISEL-NEXT: v_cvt_f16_f32_e32 v1, v1
|
||||
; GFX950-GISEL-NEXT: v_pack_b32_f16 v0, v0, v1
|
||||
; GFX950-GISEL-NEXT: buffer_store_dword v0, off, s[0:3], 0
|
||||
; GFX950-GISEL-NEXT: s_endpgm
|
||||
;
|
||||
@ -2247,16 +2814,101 @@ define amdgpu_kernel void @fptrunc_v2f64_to_v2f16(
|
||||
; GFX11-GISEL-TRUE16-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
|
||||
; GFX11-GISEL-TRUE16-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX11-GISEL-TRUE16-NEXT: s_load_b128 s[4:7], s[2:3], 0x0
|
||||
; GFX11-GISEL-TRUE16-NEXT: s_mov_b32 s2, -1
|
||||
; GFX11-GISEL-TRUE16-NEXT: s_mov_b32 s3, 0x31016000
|
||||
; GFX11-GISEL-TRUE16-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX11-GISEL-TRUE16-NEXT: v_cvt_f32_f64_e32 v0, s[4:5]
|
||||
; GFX11-GISEL-TRUE16-NEXT: v_cvt_f32_f64_e32 v1, s[6:7]
|
||||
; GFX11-GISEL-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
|
||||
; GFX11-GISEL-TRUE16-NEXT: v_cvt_f16_f32_e32 v0.l, v0
|
||||
; GFX11-GISEL-TRUE16-NEXT: v_cvt_f16_f32_e32 v0.h, v1
|
||||
; GFX11-GISEL-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
|
||||
; GFX11-GISEL-TRUE16-NEXT: v_pack_b32_f16 v0, v0.l, v0.h
|
||||
; GFX11-GISEL-TRUE16-NEXT: s_and_b32 s8, s5, 0x1ff
|
||||
; GFX11-GISEL-TRUE16-NEXT: s_bfe_u32 s2, s5, 0xb0014
|
||||
; GFX11-GISEL-TRUE16-NEXT: s_lshr_b32 s3, s5, 8
|
||||
; GFX11-GISEL-TRUE16-NEXT: s_or_b32 s4, s8, s4
|
||||
; GFX11-GISEL-TRUE16-NEXT: s_addk_i32 s2, 0xfc10
|
||||
; GFX11-GISEL-TRUE16-NEXT: s_and_b32 s3, s3, 0xffe
|
||||
; GFX11-GISEL-TRUE16-NEXT: s_cmp_lg_u32 s4, 0
|
||||
; GFX11-GISEL-TRUE16-NEXT: s_cselect_b32 s4, 1, 0
|
||||
; GFX11-GISEL-TRUE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
|
||||
; GFX11-GISEL-TRUE16-NEXT: s_or_b32 s3, s3, s4
|
||||
; GFX11-GISEL-TRUE16-NEXT: s_cmp_lg_u32 s3, 0
|
||||
; GFX11-GISEL-TRUE16-NEXT: s_cselect_b32 s4, 1, 0
|
||||
; GFX11-GISEL-TRUE16-NEXT: s_sub_i32 s8, 1, s2
|
||||
; GFX11-GISEL-TRUE16-NEXT: s_or_b32 s10, s3, 0x1000
|
||||
; GFX11-GISEL-TRUE16-NEXT: s_max_i32 s8, s8, 0
|
||||
; GFX11-GISEL-TRUE16-NEXT: s_lshl_b32 s9, s2, 12
|
||||
; GFX11-GISEL-TRUE16-NEXT: s_min_i32 s8, s8, 13
|
||||
; GFX11-GISEL-TRUE16-NEXT: s_lshl_b32 s4, s4, 9
|
||||
; GFX11-GISEL-TRUE16-NEXT: s_lshr_b32 s11, s10, s8
|
||||
; GFX11-GISEL-TRUE16-NEXT: s_or_b32 s3, s3, s9
|
||||
; GFX11-GISEL-TRUE16-NEXT: s_lshl_b32 s8, s11, s8
|
||||
; GFX11-GISEL-TRUE16-NEXT: s_or_b32 s4, s4, 0x7c00
|
||||
; GFX11-GISEL-TRUE16-NEXT: s_cmp_lg_u32 s8, s10
|
||||
; GFX11-GISEL-TRUE16-NEXT: s_cselect_b32 s8, 1, 0
|
||||
; GFX11-GISEL-TRUE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_2) | instid1(SALU_CYCLE_1)
|
||||
; GFX11-GISEL-TRUE16-NEXT: s_or_b32 s8, s11, s8
|
||||
; GFX11-GISEL-TRUE16-NEXT: s_cmp_lt_i32 s2, 1
|
||||
; GFX11-GISEL-TRUE16-NEXT: s_cselect_b32 s3, s8, s3
|
||||
; GFX11-GISEL-TRUE16-NEXT: s_and_b32 s8, s3, 7
|
||||
; GFX11-GISEL-TRUE16-NEXT: s_lshr_b32 s3, s3, 2
|
||||
; GFX11-GISEL-TRUE16-NEXT: s_cmp_eq_u32 s8, 3
|
||||
; GFX11-GISEL-TRUE16-NEXT: s_cselect_b32 s9, 1, 0
|
||||
; GFX11-GISEL-TRUE16-NEXT: s_cmp_gt_i32 s8, 5
|
||||
; GFX11-GISEL-TRUE16-NEXT: s_cselect_b32 s8, 1, 0
|
||||
; GFX11-GISEL-TRUE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
|
||||
; GFX11-GISEL-TRUE16-NEXT: s_or_b32 s8, s9, s8
|
||||
; GFX11-GISEL-TRUE16-NEXT: s_add_i32 s3, s3, s8
|
||||
; GFX11-GISEL-TRUE16-NEXT: s_cmp_gt_i32 s2, 30
|
||||
; GFX11-GISEL-TRUE16-NEXT: s_cselect_b32 s3, 0x7c00, s3
|
||||
; GFX11-GISEL-TRUE16-NEXT: s_cmpk_eq_i32 s2, 0x40f
|
||||
; GFX11-GISEL-TRUE16-NEXT: s_cselect_b32 s2, s4, s3
|
||||
; GFX11-GISEL-TRUE16-NEXT: s_lshr_b32 s3, s5, 16
|
||||
; GFX11-GISEL-TRUE16-NEXT: s_and_b32 s8, s7, 0x1ff
|
||||
; GFX11-GISEL-TRUE16-NEXT: s_bfe_u32 s4, s7, 0xb0014
|
||||
; GFX11-GISEL-TRUE16-NEXT: s_lshr_b32 s5, s7, 8
|
||||
; GFX11-GISEL-TRUE16-NEXT: s_and_b32 s3, s3, 0x8000
|
||||
; GFX11-GISEL-TRUE16-NEXT: s_or_b32 s6, s8, s6
|
||||
; GFX11-GISEL-TRUE16-NEXT: s_addk_i32 s4, 0xfc10
|
||||
; GFX11-GISEL-TRUE16-NEXT: s_and_b32 s5, s5, 0xffe
|
||||
; GFX11-GISEL-TRUE16-NEXT: s_or_b32 s2, s3, s2
|
||||
; GFX11-GISEL-TRUE16-NEXT: s_cmp_lg_u32 s6, 0
|
||||
; GFX11-GISEL-TRUE16-NEXT: s_cselect_b32 s3, 1, 0
|
||||
; GFX11-GISEL-TRUE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
|
||||
; GFX11-GISEL-TRUE16-NEXT: s_or_b32 s3, s5, s3
|
||||
; GFX11-GISEL-TRUE16-NEXT: s_cmp_lg_u32 s3, 0
|
||||
; GFX11-GISEL-TRUE16-NEXT: s_cselect_b32 s5, 1, 0
|
||||
; GFX11-GISEL-TRUE16-NEXT: s_sub_i32 s6, 1, s4
|
||||
; GFX11-GISEL-TRUE16-NEXT: s_or_b32 s9, s3, 0x1000
|
||||
; GFX11-GISEL-TRUE16-NEXT: s_max_i32 s6, s6, 0
|
||||
; GFX11-GISEL-TRUE16-NEXT: s_lshl_b32 s8, s4, 12
|
||||
; GFX11-GISEL-TRUE16-NEXT: s_min_i32 s6, s6, 13
|
||||
; GFX11-GISEL-TRUE16-NEXT: s_lshl_b32 s5, s5, 9
|
||||
; GFX11-GISEL-TRUE16-NEXT: s_lshr_b32 s10, s9, s6
|
||||
; GFX11-GISEL-TRUE16-NEXT: s_or_b32 s3, s3, s8
|
||||
; GFX11-GISEL-TRUE16-NEXT: s_lshl_b32 s6, s10, s6
|
||||
; GFX11-GISEL-TRUE16-NEXT: s_or_b32 s5, s5, 0x7c00
|
||||
; GFX11-GISEL-TRUE16-NEXT: s_cmp_lg_u32 s6, s9
|
||||
; GFX11-GISEL-TRUE16-NEXT: s_cselect_b32 s6, 1, 0
|
||||
; GFX11-GISEL-TRUE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_2) | instid1(SALU_CYCLE_1)
|
||||
; GFX11-GISEL-TRUE16-NEXT: s_or_b32 s6, s10, s6
|
||||
; GFX11-GISEL-TRUE16-NEXT: s_cmp_lt_i32 s4, 1
|
||||
; GFX11-GISEL-TRUE16-NEXT: s_cselect_b32 s3, s6, s3
|
||||
; GFX11-GISEL-TRUE16-NEXT: s_and_b32 s6, s3, 7
|
||||
; GFX11-GISEL-TRUE16-NEXT: s_lshr_b32 s3, s3, 2
|
||||
; GFX11-GISEL-TRUE16-NEXT: s_cmp_eq_u32 s6, 3
|
||||
; GFX11-GISEL-TRUE16-NEXT: s_cselect_b32 s8, 1, 0
|
||||
; GFX11-GISEL-TRUE16-NEXT: s_cmp_gt_i32 s6, 5
|
||||
; GFX11-GISEL-TRUE16-NEXT: s_cselect_b32 s6, 1, 0
|
||||
; GFX11-GISEL-TRUE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
|
||||
; GFX11-GISEL-TRUE16-NEXT: s_or_b32 s6, s8, s6
|
||||
; GFX11-GISEL-TRUE16-NEXT: s_add_i32 s3, s3, s6
|
||||
; GFX11-GISEL-TRUE16-NEXT: s_cmp_gt_i32 s4, 30
|
||||
; GFX11-GISEL-TRUE16-NEXT: s_cselect_b32 s3, 0x7c00, s3
|
||||
; GFX11-GISEL-TRUE16-NEXT: s_cmpk_eq_i32 s4, 0x40f
|
||||
; GFX11-GISEL-TRUE16-NEXT: s_cselect_b32 s3, s5, s3
|
||||
; GFX11-GISEL-TRUE16-NEXT: s_lshr_b32 s4, s7, 16
|
||||
; GFX11-GISEL-TRUE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
|
||||
; GFX11-GISEL-TRUE16-NEXT: s_and_b32 s4, s4, 0x8000
|
||||
; GFX11-GISEL-TRUE16-NEXT: s_or_b32 s3, s4, s3
|
||||
; GFX11-GISEL-TRUE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
|
||||
; GFX11-GISEL-TRUE16-NEXT: s_pack_ll_b32_b16 s2, s2, s3
|
||||
; GFX11-GISEL-TRUE16-NEXT: s_mov_b32 s3, 0x31016000
|
||||
; GFX11-GISEL-TRUE16-NEXT: v_mov_b32_e32 v0, s2
|
||||
; GFX11-GISEL-TRUE16-NEXT: s_mov_b32 s2, -1
|
||||
; GFX11-GISEL-TRUE16-NEXT: buffer_store_b32 v0, off, s[0:3], 0
|
||||
; GFX11-GISEL-TRUE16-NEXT: s_endpgm
|
||||
;
|
||||
@ -2265,16 +2917,101 @@ define amdgpu_kernel void @fptrunc_v2f64_to_v2f16(
|
||||
; GFX11-GISEL-FAKE16-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
|
||||
; GFX11-GISEL-FAKE16-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX11-GISEL-FAKE16-NEXT: s_load_b128 s[4:7], s[2:3], 0x0
|
||||
; GFX11-GISEL-FAKE16-NEXT: s_mov_b32 s2, -1
|
||||
; GFX11-GISEL-FAKE16-NEXT: s_mov_b32 s3, 0x31016000
|
||||
; GFX11-GISEL-FAKE16-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX11-GISEL-FAKE16-NEXT: v_cvt_f32_f64_e32 v0, s[4:5]
|
||||
; GFX11-GISEL-FAKE16-NEXT: v_cvt_f32_f64_e32 v1, s[6:7]
|
||||
; GFX11-GISEL-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
|
||||
; GFX11-GISEL-FAKE16-NEXT: v_cvt_f16_f32_e32 v0, v0
|
||||
; GFX11-GISEL-FAKE16-NEXT: v_cvt_f16_f32_e32 v1, v1
|
||||
; GFX11-GISEL-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
|
||||
; GFX11-GISEL-FAKE16-NEXT: v_pack_b32_f16 v0, v0, v1
|
||||
; GFX11-GISEL-FAKE16-NEXT: s_and_b32 s8, s5, 0x1ff
|
||||
; GFX11-GISEL-FAKE16-NEXT: s_bfe_u32 s2, s5, 0xb0014
|
||||
; GFX11-GISEL-FAKE16-NEXT: s_lshr_b32 s3, s5, 8
|
||||
; GFX11-GISEL-FAKE16-NEXT: s_or_b32 s4, s8, s4
|
||||
; GFX11-GISEL-FAKE16-NEXT: s_addk_i32 s2, 0xfc10
|
||||
; GFX11-GISEL-FAKE16-NEXT: s_and_b32 s3, s3, 0xffe
|
||||
; GFX11-GISEL-FAKE16-NEXT: s_cmp_lg_u32 s4, 0
|
||||
; GFX11-GISEL-FAKE16-NEXT: s_cselect_b32 s4, 1, 0
|
||||
; GFX11-GISEL-FAKE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
|
||||
; GFX11-GISEL-FAKE16-NEXT: s_or_b32 s3, s3, s4
|
||||
; GFX11-GISEL-FAKE16-NEXT: s_cmp_lg_u32 s3, 0
|
||||
; GFX11-GISEL-FAKE16-NEXT: s_cselect_b32 s4, 1, 0
|
||||
; GFX11-GISEL-FAKE16-NEXT: s_sub_i32 s8, 1, s2
|
||||
; GFX11-GISEL-FAKE16-NEXT: s_or_b32 s10, s3, 0x1000
|
||||
; GFX11-GISEL-FAKE16-NEXT: s_max_i32 s8, s8, 0
|
||||
; GFX11-GISEL-FAKE16-NEXT: s_lshl_b32 s9, s2, 12
|
||||
; GFX11-GISEL-FAKE16-NEXT: s_min_i32 s8, s8, 13
|
||||
; GFX11-GISEL-FAKE16-NEXT: s_lshl_b32 s4, s4, 9
|
||||
; GFX11-GISEL-FAKE16-NEXT: s_lshr_b32 s11, s10, s8
|
||||
; GFX11-GISEL-FAKE16-NEXT: s_or_b32 s3, s3, s9
|
||||
; GFX11-GISEL-FAKE16-NEXT: s_lshl_b32 s8, s11, s8
|
||||
; GFX11-GISEL-FAKE16-NEXT: s_or_b32 s4, s4, 0x7c00
|
||||
; GFX11-GISEL-FAKE16-NEXT: s_cmp_lg_u32 s8, s10
|
||||
; GFX11-GISEL-FAKE16-NEXT: s_cselect_b32 s8, 1, 0
|
||||
; GFX11-GISEL-FAKE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_2) | instid1(SALU_CYCLE_1)
|
||||
; GFX11-GISEL-FAKE16-NEXT: s_or_b32 s8, s11, s8
|
||||
; GFX11-GISEL-FAKE16-NEXT: s_cmp_lt_i32 s2, 1
|
||||
; GFX11-GISEL-FAKE16-NEXT: s_cselect_b32 s3, s8, s3
|
||||
; GFX11-GISEL-FAKE16-NEXT: s_and_b32 s8, s3, 7
|
||||
; GFX11-GISEL-FAKE16-NEXT: s_lshr_b32 s3, s3, 2
|
||||
; GFX11-GISEL-FAKE16-NEXT: s_cmp_eq_u32 s8, 3
|
||||
; GFX11-GISEL-FAKE16-NEXT: s_cselect_b32 s9, 1, 0
|
||||
; GFX11-GISEL-FAKE16-NEXT: s_cmp_gt_i32 s8, 5
|
||||
; GFX11-GISEL-FAKE16-NEXT: s_cselect_b32 s8, 1, 0
|
||||
; GFX11-GISEL-FAKE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
|
||||
; GFX11-GISEL-FAKE16-NEXT: s_or_b32 s8, s9, s8
|
||||
; GFX11-GISEL-FAKE16-NEXT: s_add_i32 s3, s3, s8
|
||||
; GFX11-GISEL-FAKE16-NEXT: s_cmp_gt_i32 s2, 30
|
||||
; GFX11-GISEL-FAKE16-NEXT: s_cselect_b32 s3, 0x7c00, s3
|
||||
; GFX11-GISEL-FAKE16-NEXT: s_cmpk_eq_i32 s2, 0x40f
|
||||
; GFX11-GISEL-FAKE16-NEXT: s_cselect_b32 s2, s4, s3
|
||||
; GFX11-GISEL-FAKE16-NEXT: s_lshr_b32 s3, s5, 16
|
||||
; GFX11-GISEL-FAKE16-NEXT: s_and_b32 s8, s7, 0x1ff
|
||||
; GFX11-GISEL-FAKE16-NEXT: s_bfe_u32 s4, s7, 0xb0014
|
||||
; GFX11-GISEL-FAKE16-NEXT: s_lshr_b32 s5, s7, 8
|
||||
; GFX11-GISEL-FAKE16-NEXT: s_and_b32 s3, s3, 0x8000
|
||||
; GFX11-GISEL-FAKE16-NEXT: s_or_b32 s6, s8, s6
|
||||
; GFX11-GISEL-FAKE16-NEXT: s_addk_i32 s4, 0xfc10
|
||||
; GFX11-GISEL-FAKE16-NEXT: s_and_b32 s5, s5, 0xffe
|
||||
; GFX11-GISEL-FAKE16-NEXT: s_or_b32 s2, s3, s2
|
||||
; GFX11-GISEL-FAKE16-NEXT: s_cmp_lg_u32 s6, 0
|
||||
; GFX11-GISEL-FAKE16-NEXT: s_cselect_b32 s3, 1, 0
|
||||
; GFX11-GISEL-FAKE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
|
||||
; GFX11-GISEL-FAKE16-NEXT: s_or_b32 s3, s5, s3
|
||||
; GFX11-GISEL-FAKE16-NEXT: s_cmp_lg_u32 s3, 0
|
||||
; GFX11-GISEL-FAKE16-NEXT: s_cselect_b32 s5, 1, 0
|
||||
; GFX11-GISEL-FAKE16-NEXT: s_sub_i32 s6, 1, s4
|
||||
; GFX11-GISEL-FAKE16-NEXT: s_or_b32 s9, s3, 0x1000
|
||||
; GFX11-GISEL-FAKE16-NEXT: s_max_i32 s6, s6, 0
|
||||
; GFX11-GISEL-FAKE16-NEXT: s_lshl_b32 s8, s4, 12
|
||||
; GFX11-GISEL-FAKE16-NEXT: s_min_i32 s6, s6, 13
|
||||
; GFX11-GISEL-FAKE16-NEXT: s_lshl_b32 s5, s5, 9
|
||||
; GFX11-GISEL-FAKE16-NEXT: s_lshr_b32 s10, s9, s6
|
||||
; GFX11-GISEL-FAKE16-NEXT: s_or_b32 s3, s3, s8
|
||||
; GFX11-GISEL-FAKE16-NEXT: s_lshl_b32 s6, s10, s6
|
||||
; GFX11-GISEL-FAKE16-NEXT: s_or_b32 s5, s5, 0x7c00
|
||||
; GFX11-GISEL-FAKE16-NEXT: s_cmp_lg_u32 s6, s9
|
||||
; GFX11-GISEL-FAKE16-NEXT: s_cselect_b32 s6, 1, 0
|
||||
; GFX11-GISEL-FAKE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_2) | instid1(SALU_CYCLE_1)
|
||||
; GFX11-GISEL-FAKE16-NEXT: s_or_b32 s6, s10, s6
|
||||
; GFX11-GISEL-FAKE16-NEXT: s_cmp_lt_i32 s4, 1
|
||||
; GFX11-GISEL-FAKE16-NEXT: s_cselect_b32 s3, s6, s3
|
||||
; GFX11-GISEL-FAKE16-NEXT: s_and_b32 s6, s3, 7
|
||||
; GFX11-GISEL-FAKE16-NEXT: s_lshr_b32 s3, s3, 2
|
||||
; GFX11-GISEL-FAKE16-NEXT: s_cmp_eq_u32 s6, 3
|
||||
; GFX11-GISEL-FAKE16-NEXT: s_cselect_b32 s8, 1, 0
|
||||
; GFX11-GISEL-FAKE16-NEXT: s_cmp_gt_i32 s6, 5
|
||||
; GFX11-GISEL-FAKE16-NEXT: s_cselect_b32 s6, 1, 0
|
||||
; GFX11-GISEL-FAKE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
|
||||
; GFX11-GISEL-FAKE16-NEXT: s_or_b32 s6, s8, s6
|
||||
; GFX11-GISEL-FAKE16-NEXT: s_add_i32 s3, s3, s6
|
||||
; GFX11-GISEL-FAKE16-NEXT: s_cmp_gt_i32 s4, 30
|
||||
; GFX11-GISEL-FAKE16-NEXT: s_cselect_b32 s3, 0x7c00, s3
|
||||
; GFX11-GISEL-FAKE16-NEXT: s_cmpk_eq_i32 s4, 0x40f
|
||||
; GFX11-GISEL-FAKE16-NEXT: s_cselect_b32 s3, s5, s3
|
||||
; GFX11-GISEL-FAKE16-NEXT: s_lshr_b32 s4, s7, 16
|
||||
; GFX11-GISEL-FAKE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
|
||||
; GFX11-GISEL-FAKE16-NEXT: s_and_b32 s4, s4, 0x8000
|
||||
; GFX11-GISEL-FAKE16-NEXT: s_or_b32 s3, s4, s3
|
||||
; GFX11-GISEL-FAKE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
|
||||
; GFX11-GISEL-FAKE16-NEXT: s_pack_ll_b32_b16 s2, s2, s3
|
||||
; GFX11-GISEL-FAKE16-NEXT: s_mov_b32 s3, 0x31016000
|
||||
; GFX11-GISEL-FAKE16-NEXT: v_mov_b32_e32 v0, s2
|
||||
; GFX11-GISEL-FAKE16-NEXT: s_mov_b32 s2, -1
|
||||
; GFX11-GISEL-FAKE16-NEXT: buffer_store_b32 v0, off, s[0:3], 0
|
||||
; GFX11-GISEL-FAKE16-NEXT: s_endpgm
|
||||
ptr addrspace(1) %r,
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
Loading…
x
Reference in New Issue
Block a user