[AMDGPU] Optimize S_OR_B32 to S_ADDK_I32 where possible (#177949)

This PR fixes #177753, converting disjoint S_OR_B32 to S_ADDK_I32
whenever possible, it avoids this transformation in case S_OR_B32 can be
converted to bitset.

Note on Test Failures (Draft Status) This change causes significant
register reshuffling across the test suite due to the new allocation
hints and the swaps performed in case src0 is not a register and src1,
along with the change from or to addk. To avoid a massive, noisy diff
during the initial logic review:

This Draft PR only includes a representative sample of updated tests.
CodeGen/AMDGPU/combine-reg-or-const.ll -> Showcases change from S_OR to
S_ADDK
CodeGen/AMDGPU/s-barrier.ll -> Showcases swap between Src0 and Src1 if
src0 is not a register

The rest of the tests show the result of the register allocation hint we
give, I have checked every test I updated and they seem ok to me.

Once the core logic is approved, I will run the update script across the
remaining ~70 failing tests and mark the PR as "Ready for Review."
This commit is contained in:
Iasonaskrpr 2026-02-07 11:10:12 +02:00 committed by GitHub
parent 15c9c77ccf
commit 6c6fb00c94
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
7 changed files with 701 additions and 681 deletions

View File

@ -909,9 +909,21 @@ bool SIShrinkInstructions::run(MachineFunction &MF) {
}
}
// Shrink scalar logic operations.
if (MI.getOpcode() == AMDGPU::S_AND_B32 ||
MI.getOpcode() == AMDGPU::S_OR_B32 ||
MI.getOpcode() == AMDGPU::S_XOR_B32) {
ChangeKind CK = shrinkScalarLogicOp(MI);
if (CK == ChangeKind::UpdateHint)
continue;
Changed |= (CK == ChangeKind::UpdateInst);
}
// Try to use S_ADDK_I32 and S_MULK_I32.
if (MI.getOpcode() == AMDGPU::S_ADD_I32 ||
MI.getOpcode() == AMDGPU::S_MUL_I32) {
MI.getOpcode() == AMDGPU::S_MUL_I32 ||
(MI.getOpcode() == AMDGPU::S_OR_B32 &&
MI.getFlag(MachineInstr::MIFlag::Disjoint))) {
const MachineOperand *Dest = &MI.getOperand(0);
MachineOperand *Src0 = &MI.getOperand(1);
MachineOperand *Src1 = &MI.getOperand(2);
@ -931,12 +943,11 @@ bool SIShrinkInstructions::run(MachineFunction &MF) {
MRI->setRegAllocationHint(Src0->getReg(), 0, Dest->getReg());
continue;
}
if (Src0->isReg() && Src0->getReg() == Dest->getReg()) {
if (Src1->isImm() && isKImmOperand(*Src1)) {
unsigned Opc = (MI.getOpcode() == AMDGPU::S_ADD_I32) ?
AMDGPU::S_ADDK_I32 : AMDGPU::S_MULK_I32;
unsigned Opc = (MI.getOpcode() == AMDGPU::S_MUL_I32)
? AMDGPU::S_MULK_I32
: AMDGPU::S_ADDK_I32;
Src1->setImm(SignExtend64(Src1->getImm(), 32));
MI.setDesc(TII->get(Opc));
MI.tieOperands(0, 1);
@ -974,16 +985,6 @@ bool SIShrinkInstructions::run(MachineFunction &MF) {
continue;
}
// Shrink scalar logic operations.
if (MI.getOpcode() == AMDGPU::S_AND_B32 ||
MI.getOpcode() == AMDGPU::S_OR_B32 ||
MI.getOpcode() == AMDGPU::S_XOR_B32) {
ChangeKind CK = shrinkScalarLogicOp(MI);
if (CK == ChangeKind::UpdateHint)
continue;
Changed |= (CK == ChangeKind::UpdateInst);
}
if (IsPostRA && TII->isMIMG(MI.getOpcode()) &&
ST->getGeneration() >= AMDGPUSubtarget::GFX10) {
Changed |= shrinkMIMG(MI);

View File

@ -664,8 +664,8 @@ define amdgpu_ps i16 @s_fshl_v2i8(i16 inreg %lhs.arg, i16 inreg %rhs.arg, i16 in
; GFX6-NEXT: s_lshr_b32 s1, s1, 1
; GFX6-NEXT: s_lshl_b32 s2, s3, s2
; GFX6-NEXT: s_lshr_b32 s1, s1, s4
; GFX6-NEXT: s_or_b32 s1, s2, s1
; GFX6-NEXT: s_and_b32 s1, s1, 0xff
; GFX6-NEXT: s_or_b32 s2, s2, s1
; GFX6-NEXT: s_and_b32 s1, s2, 0xff
; GFX6-NEXT: s_and_b32 s0, s0, 0xff
; GFX6-NEXT: s_lshl_b32 s1, s1, 8
; GFX6-NEXT: s_or_b32 s0, s0, s1
@ -954,17 +954,17 @@ define amdgpu_ps i32 @s_fshl_v4i8(i32 inreg %lhs.arg, i32 inreg %rhs.arg, i32 in
; GFX6-NEXT: s_and_b32 s4, s8, 7
; GFX6-NEXT: s_andn2_b32 s6, 7, s8
; GFX6-NEXT: s_lshr_b32 s1, s1, 25
; GFX6-NEXT: s_and_b32 s2, s2, 0xff
; GFX6-NEXT: s_lshl_b32 s4, s5, s4
; GFX6-NEXT: s_lshr_b32 s1, s1, s6
; GFX6-NEXT: s_or_b32 s4, s4, s1
; GFX6-NEXT: s_and_b32 s1, s2, 0xff
; GFX6-NEXT: s_and_b32 s0, s0, 0xff
; GFX6-NEXT: s_lshl_b32 s2, s2, 8
; GFX6-NEXT: s_or_b32 s1, s4, s1
; GFX6-NEXT: s_or_b32 s0, s0, s2
; GFX6-NEXT: s_and_b32 s2, s3, 0xff
; GFX6-NEXT: s_lshl_b32 s2, s2, 16
; GFX6-NEXT: s_and_b32 s1, s1, 0xff
; GFX6-NEXT: s_or_b32 s0, s0, s2
; GFX6-NEXT: s_lshl_b32 s1, s1, 8
; GFX6-NEXT: s_or_b32 s0, s0, s1
; GFX6-NEXT: s_and_b32 s1, s3, 0xff
; GFX6-NEXT: s_lshl_b32 s1, s1, 16
; GFX6-NEXT: s_or_b32 s0, s0, s1
; GFX6-NEXT: s_and_b32 s1, s4, 0xff
; GFX6-NEXT: s_lshl_b32 s1, s1, 24
; GFX6-NEXT: s_or_b32 s0, s0, s1
; GFX6-NEXT: ; return to shader part epilog
@ -3689,8 +3689,8 @@ define amdgpu_ps i32 @s_fshl_v2i16(<2 x i16> inreg %lhs, <2 x i16> inreg %rhs, <
; GFX8-NEXT: s_lshr_b32 s1, s1, 17
; GFX8-NEXT: s_lshl_b32 s2, s3, s2
; GFX8-NEXT: s_lshr_b32 s1, s1, s4
; GFX8-NEXT: s_or_b32 s1, s2, s1
; GFX8-NEXT: s_and_b32 s1, 0xffff, s1
; GFX8-NEXT: s_or_b32 s2, s2, s1
; GFX8-NEXT: s_and_b32 s1, 0xffff, s2
; GFX8-NEXT: s_and_b32 s0, 0xffff, s0
; GFX8-NEXT: s_lshl_b32 s1, s1, 16
; GFX8-NEXT: s_or_b32 s0, s0, s1
@ -4268,15 +4268,15 @@ define amdgpu_ps i48 @s_fshl_v3i16(<3 x i16> inreg %lhs, <3 x i16> inreg %rhs, <
; GFX8-NEXT: s_lshr_b32 s2, s2, 17
; GFX8-NEXT: s_lshl_b32 s4, s6, s4
; GFX8-NEXT: s_lshr_b32 s2, s2, s7
; GFX8-NEXT: s_and_b32 s3, 0xffff, s3
; GFX8-NEXT: s_or_b32 s2, s4, s2
; GFX8-NEXT: s_and_b32 s4, s5, 15
; GFX8-NEXT: s_or_b32 s4, s4, s2
; GFX8-NEXT: s_and_b32 s2, s5, 15
; GFX8-NEXT: s_lshl_b32 s1, s1, s2
; GFX8-NEXT: s_and_b32 s2, 0xffff, s3
; GFX8-NEXT: s_andn2_b32 s5, 15, s5
; GFX8-NEXT: s_lshr_b32 s3, s3, 1
; GFX8-NEXT: s_lshl_b32 s1, s1, s4
; GFX8-NEXT: s_lshr_b32 s3, s3, s5
; GFX8-NEXT: s_and_b32 s2, 0xffff, s2
; GFX8-NEXT: s_or_b32 s1, s1, s3
; GFX8-NEXT: s_lshr_b32 s2, s2, 1
; GFX8-NEXT: s_lshr_b32 s2, s2, s5
; GFX8-NEXT: s_or_b32 s1, s1, s2
; GFX8-NEXT: s_and_b32 s2, 0xffff, s4
; GFX8-NEXT: s_and_b32 s0, 0xffff, s0
; GFX8-NEXT: s_lshl_b32 s2, s2, 16
; GFX8-NEXT: s_or_b32 s0, s0, s2
@ -4614,29 +4614,29 @@ define amdgpu_ps <2 x i32> @s_fshl_v4i16(<4 x i16> inreg %lhs, <4 x i16> inreg %
; GFX8-NEXT: s_lshr_b32 s2, s2, 17
; GFX8-NEXT: s_lshl_b32 s4, s6, s4
; GFX8-NEXT: s_lshr_b32 s2, s2, s8
; GFX8-NEXT: s_or_b32 s2, s4, s2
; GFX8-NEXT: s_and_b32 s4, s5, 15
; GFX8-NEXT: s_or_b32 s4, s4, s2
; GFX8-NEXT: s_and_b32 s2, s5, 15
; GFX8-NEXT: s_lshr_b32 s7, s1, 16
; GFX8-NEXT: s_lshl_b32 s1, s1, s4
; GFX8-NEXT: s_and_b32 s4, 0xffff, s3
; GFX8-NEXT: s_lshl_b32 s1, s1, s2
; GFX8-NEXT: s_and_b32 s2, 0xffff, s3
; GFX8-NEXT: s_lshr_b32 s9, s5, 16
; GFX8-NEXT: s_andn2_b32 s5, 15, s5
; GFX8-NEXT: s_lshr_b32 s4, s4, 1
; GFX8-NEXT: s_lshr_b32 s4, s4, s5
; GFX8-NEXT: s_or_b32 s1, s1, s4
; GFX8-NEXT: s_and_b32 s4, s9, 15
; GFX8-NEXT: s_lshr_b32 s2, s2, 1
; GFX8-NEXT: s_lshr_b32 s2, s2, s5
; GFX8-NEXT: s_or_b32 s1, s1, s2
; GFX8-NEXT: s_and_b32 s2, s9, 15
; GFX8-NEXT: s_andn2_b32 s5, 15, s9
; GFX8-NEXT: s_lshr_b32 s3, s3, 17
; GFX8-NEXT: s_lshl_b32 s4, s7, s4
; GFX8-NEXT: s_lshl_b32 s2, s7, s2
; GFX8-NEXT: s_lshr_b32 s3, s3, s5
; GFX8-NEXT: s_or_b32 s2, s2, s3
; GFX8-NEXT: s_and_b32 s3, 0xffff, s4
; GFX8-NEXT: s_and_b32 s2, 0xffff, s2
; GFX8-NEXT: s_or_b32 s3, s4, s3
; GFX8-NEXT: s_and_b32 s0, 0xffff, s0
; GFX8-NEXT: s_lshl_b32 s2, s2, 16
; GFX8-NEXT: s_or_b32 s0, s0, s2
; GFX8-NEXT: s_and_b32 s2, 0xffff, s3
; GFX8-NEXT: s_lshl_b32 s3, s3, 16
; GFX8-NEXT: s_and_b32 s1, 0xffff, s1
; GFX8-NEXT: s_lshl_b32 s2, s2, 16
; GFX8-NEXT: s_or_b32 s0, s0, s3
; GFX8-NEXT: s_or_b32 s1, s1, s2
; GFX8-NEXT: ; return to shader part epilog
;

View File

@ -665,8 +665,8 @@ define amdgpu_ps i16 @s_fshr_v2i8(i16 inreg %lhs.arg, i16 inreg %rhs.arg, i16 in
; GFX6-NEXT: s_bfe_u32 s1, s1, 0x80008
; GFX6-NEXT: s_lshl_b32 s3, s3, s4
; GFX6-NEXT: s_lshr_b32 s1, s1, s2
; GFX6-NEXT: s_or_b32 s1, s3, s1
; GFX6-NEXT: s_and_b32 s1, s1, 0xff
; GFX6-NEXT: s_or_b32 s3, s3, s1
; GFX6-NEXT: s_and_b32 s1, s3, 0xff
; GFX6-NEXT: s_and_b32 s0, s0, 0xff
; GFX6-NEXT: s_lshl_b32 s1, s1, 8
; GFX6-NEXT: s_or_b32 s0, s0, s1
@ -946,28 +946,28 @@ define amdgpu_ps i32 @s_fshr_v4i8(i32 inreg %lhs.arg, i32 inreg %rhs.arg, i32 in
; GFX6-NEXT: s_bfe_u32 s7, s1, 0x80008
; GFX6-NEXT: s_lshr_b32 s2, s7, s2
; GFX6-NEXT: s_lshr_b32 s6, s1, 24
; GFX6-NEXT: s_or_b32 s2, s3, s2
; GFX6-NEXT: s_and_b32 s3, s8, 7
; GFX6-NEXT: s_or_b32 s3, s3, s2
; GFX6-NEXT: s_and_b32 s2, s8, 7
; GFX6-NEXT: s_andn2_b32 s7, 7, s8
; GFX6-NEXT: s_lshl_b32 s4, s4, 1
; GFX6-NEXT: s_bfe_u32 s1, s1, 0x80010
; GFX6-NEXT: s_lshl_b32 s4, s4, s7
; GFX6-NEXT: s_lshr_b32 s1, s1, s3
; GFX6-NEXT: s_or_b32 s1, s4, s1
; GFX6-NEXT: s_and_b32 s3, s9, 7
; GFX6-NEXT: s_andn2_b32 s4, 7, s9
; GFX6-NEXT: s_lshr_b32 s1, s1, s2
; GFX6-NEXT: s_or_b32 s4, s4, s1
; GFX6-NEXT: s_and_b32 s1, s9, 7
; GFX6-NEXT: s_andn2_b32 s2, 7, s9
; GFX6-NEXT: s_lshl_b32 s5, s5, 1
; GFX6-NEXT: s_and_b32 s2, s2, 0xff
; GFX6-NEXT: s_lshl_b32 s4, s5, s4
; GFX6-NEXT: s_lshr_b32 s3, s6, s3
; GFX6-NEXT: s_lshl_b32 s2, s5, s2
; GFX6-NEXT: s_lshr_b32 s1, s6, s1
; GFX6-NEXT: s_or_b32 s2, s2, s1
; GFX6-NEXT: s_and_b32 s1, s3, 0xff
; GFX6-NEXT: s_and_b32 s0, s0, 0xff
; GFX6-NEXT: s_lshl_b32 s2, s2, 8
; GFX6-NEXT: s_and_b32 s1, s1, 0xff
; GFX6-NEXT: s_or_b32 s3, s4, s3
; GFX6-NEXT: s_or_b32 s0, s0, s2
; GFX6-NEXT: s_lshl_b32 s1, s1, 8
; GFX6-NEXT: s_or_b32 s0, s0, s1
; GFX6-NEXT: s_and_b32 s1, s4, 0xff
; GFX6-NEXT: s_lshl_b32 s1, s1, 16
; GFX6-NEXT: s_or_b32 s0, s0, s1
; GFX6-NEXT: s_and_b32 s1, s3, 0xff
; GFX6-NEXT: s_and_b32 s1, s2, 0xff
; GFX6-NEXT: s_lshl_b32 s1, s1, 24
; GFX6-NEXT: s_or_b32 s0, s0, s1
; GFX6-NEXT: ; return to shader part epilog
@ -3443,8 +3443,8 @@ define amdgpu_ps i32 @s_fshr_v2i16(<2 x i16> inreg %lhs, <2 x i16> inreg %rhs, <
; GFX8-NEXT: s_lshl_b32 s3, s3, 1
; GFX8-NEXT: s_lshl_b32 s2, s3, s2
; GFX8-NEXT: s_lshr_b32 s1, s4, s1
; GFX8-NEXT: s_or_b32 s1, s2, s1
; GFX8-NEXT: s_and_b32 s1, 0xffff, s1
; GFX8-NEXT: s_or_b32 s2, s2, s1
; GFX8-NEXT: s_and_b32 s1, 0xffff, s2
; GFX8-NEXT: s_and_b32 s0, 0xffff, s0
; GFX8-NEXT: s_lshl_b32 s1, s1, 16
; GFX8-NEXT: s_or_b32 s0, s0, s1
@ -4026,15 +4026,15 @@ define amdgpu_ps i48 @s_fshr_v3i16(<3 x i16> inreg %lhs, <3 x i16> inreg %rhs, <
; GFX8-NEXT: s_lshl_b32 s6, s6, 1
; GFX8-NEXT: s_lshl_b32 s4, s6, s4
; GFX8-NEXT: s_lshr_b32 s2, s7, s2
; GFX8-NEXT: s_or_b32 s2, s4, s2
; GFX8-NEXT: s_and_b32 s4, s5, 15
; GFX8-NEXT: s_or_b32 s4, s4, s2
; GFX8-NEXT: s_and_b32 s2, s5, 15
; GFX8-NEXT: s_andn2_b32 s5, 15, s5
; GFX8-NEXT: s_lshl_b32 s1, s1, 1
; GFX8-NEXT: s_and_b32 s3, 0xffff, s3
; GFX8-NEXT: s_lshl_b32 s1, s1, s5
; GFX8-NEXT: s_lshr_b32 s3, s3, s4
; GFX8-NEXT: s_and_b32 s2, 0xffff, s2
; GFX8-NEXT: s_or_b32 s1, s1, s3
; GFX8-NEXT: s_lshr_b32 s2, s3, s2
; GFX8-NEXT: s_or_b32 s1, s1, s2
; GFX8-NEXT: s_and_b32 s2, 0xffff, s4
; GFX8-NEXT: s_and_b32 s0, 0xffff, s0
; GFX8-NEXT: s_lshl_b32 s2, s2, 16
; GFX8-NEXT: s_or_b32 s0, s0, s2
@ -4376,8 +4376,8 @@ define amdgpu_ps <2 x i32> @s_fshr_v4i16(<4 x i16> inreg %lhs, <4 x i16> inreg %
; GFX8-NEXT: s_lshl_b32 s6, s6, 1
; GFX8-NEXT: s_lshl_b32 s4, s6, s4
; GFX8-NEXT: s_lshr_b32 s2, s7, s2
; GFX8-NEXT: s_or_b32 s2, s4, s2
; GFX8-NEXT: s_and_b32 s2, 0xffff, s2
; GFX8-NEXT: s_or_b32 s4, s4, s2
; GFX8-NEXT: s_and_b32 s2, 0xffff, s4
; GFX8-NEXT: s_and_b32 s0, 0xffff, s0
; GFX8-NEXT: s_lshl_b32 s2, s2, 16
; GFX8-NEXT: s_or_b32 s0, s0, s2

File diff suppressed because it is too large Load Diff

View File

@ -27,7 +27,7 @@ define protected amdgpu_kernel void @_Z11test_kernelPii(ptr addrspace(1) nocaptu
; CHECK-NEXT: s_addc_u32 s1, s3, s5
; CHECK-NEXT: s_bfe_u32 s2, s6, 0xd0003
; CHECK-NEXT: s_add_i32 s2, s2, s7
; CHECK-NEXT: s_or_b32 s2, s2, 0xc0
; CHECK-NEXT: s_addk_i32 s2, 0xc0
; CHECK-NEXT: v_mov_b32_e32 v0, s0
; CHECK-NEXT: v_mov_b32_e32 v1, s1
; CHECK-NEXT: v_mov_b32_e32 v2, s2

View File

@ -3798,36 +3798,36 @@ define amdgpu_kernel void @s_test_umin_ult_v8i16(ptr addrspace(1) %out, <8 x i16
; VI-NEXT: s_min_u32 s3, s3, s7
; VI-NEXT: s_min_u32 s10, s11, s10
; VI-NEXT: s_lshl_b32 s3, s3, 16
; VI-NEXT: s_or_b32 s3, s10, s3
; VI-NEXT: s_and_b32 s7, s6, 0xffff
; VI-NEXT: s_and_b32 s10, s2, 0xffff
; VI-NEXT: s_or_b32 s10, s10, s3
; VI-NEXT: s_and_b32 s3, s6, 0xffff
; VI-NEXT: s_and_b32 s7, s2, 0xffff
; VI-NEXT: s_lshr_b32 s6, s6, 16
; VI-NEXT: s_lshr_b32 s2, s2, 16
; VI-NEXT: s_min_u32 s2, s2, s6
; VI-NEXT: s_min_u32 s7, s10, s7
; VI-NEXT: s_min_u32 s3, s7, s3
; VI-NEXT: s_lshl_b32 s2, s2, 16
; VI-NEXT: s_or_b32 s2, s7, s2
; VI-NEXT: s_and_b32 s6, s5, 0xffff
; VI-NEXT: s_and_b32 s7, s1, 0xffff
; VI-NEXT: s_or_b32 s3, s3, s2
; VI-NEXT: s_and_b32 s2, s5, 0xffff
; VI-NEXT: s_and_b32 s6, s1, 0xffff
; VI-NEXT: s_lshr_b32 s5, s5, 16
; VI-NEXT: s_lshr_b32 s1, s1, 16
; VI-NEXT: s_min_u32 s1, s1, s5
; VI-NEXT: s_min_u32 s6, s7, s6
; VI-NEXT: s_min_u32 s2, s6, s2
; VI-NEXT: s_lshl_b32 s1, s1, 16
; VI-NEXT: s_or_b32 s1, s6, s1
; VI-NEXT: s_and_b32 s5, s4, 0xffff
; VI-NEXT: s_and_b32 s6, s0, 0xffff
; VI-NEXT: s_or_b32 s2, s2, s1
; VI-NEXT: s_and_b32 s1, s4, 0xffff
; VI-NEXT: s_and_b32 s5, s0, 0xffff
; VI-NEXT: s_lshr_b32 s4, s4, 16
; VI-NEXT: s_lshr_b32 s0, s0, 16
; VI-NEXT: s_min_u32 s0, s0, s4
; VI-NEXT: s_min_u32 s5, s6, s5
; VI-NEXT: s_min_u32 s1, s5, s1
; VI-NEXT: s_lshl_b32 s0, s0, 16
; VI-NEXT: s_or_b32 s0, s5, s0
; VI-NEXT: s_or_b32 s1, s1, s0
; VI-NEXT: v_mov_b32_e32 v4, s8
; VI-NEXT: v_mov_b32_e32 v0, s0
; VI-NEXT: v_mov_b32_e32 v1, s1
; VI-NEXT: v_mov_b32_e32 v2, s2
; VI-NEXT: v_mov_b32_e32 v3, s3
; VI-NEXT: v_mov_b32_e32 v0, s1
; VI-NEXT: v_mov_b32_e32 v1, s2
; VI-NEXT: v_mov_b32_e32 v2, s3
; VI-NEXT: v_mov_b32_e32 v3, s10
; VI-NEXT: v_mov_b32_e32 v5, s9
; VI-NEXT: flat_store_dwordx4 v[4:5], v[0:3]
; VI-NEXT: s_endpgm

View File

@ -0,0 +1,31 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6
; RUN: llc -mtriple=amdgcn -mcpu=gfx900 < %s | FileCheck %s
; This tests if disjoint s_or_b32 gets transformed to s_addk_i32 when we can't use s_bitset1_b32
define amdgpu_ps i32 @s_or_b32_i32(i32 inreg %x) {
; CHECK-LABEL: s_or_b32_i32:
; CHECK: ; %bb.0:
; CHECK-NEXT: s_or_b32 s0, s0, 0x101
; CHECK-NEXT: ; return to shader part epilog
%or = or i32 %x, 257
ret i32 %or
}
define amdgpu_ps i32 @s_or_b32_disjoint_to_s_addk_i32(i32 inreg %x) {
; CHECK-LABEL: s_or_b32_disjoint_to_s_addk_i32:
; CHECK: ; %bb.0:
; CHECK-NEXT: s_addk_i32 s0, 0x101
; CHECK-NEXT: ; return to shader part epilog
%or = or disjoint i32 %x, 257
ret i32 %or
}
define amdgpu_ps i32 @s_or_b32_to_s_bitset1_b32(i32 inreg %x) {
; CHECK-LABEL: s_or_b32_to_s_bitset1_b32:
; CHECK: ; %bb.0:
; CHECK-NEXT: s_bitset1_b32 s0, 8
; CHECK-NEXT: ; return to shader part epilog
%or = or disjoint i32 %x, 256
ret i32 %or
}