; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -march=amdgcn -mcpu=gfx1030 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GFX10 %s ; When translating sequences like ; bitfieldInsert(bitfieldInsert(...)), ; where one bitfieldInsert's result is the base for another one, ; the SimplifyDemandedBits transform executed during InstCombine ; can merge the inverted mask of the outer bitfieldInsert and the ; inverted mask of the inner bitfieldInsert. When it is possible, ; e. g. if the constants are disjoint and the original inverted mask of ; the outer bitfieldInsert can be reconstructed, aim to generate multiple ; v_bfi instructions. define float @v_bfi_single_nesting_level(float %x, float %y, float %z) { ; GFX10-LABEL: v_bfi_single_nesting_level: ; GFX10: ; %bb.0: ; %.entry ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_mul_f32_e32 v0, 0x447fc000, v0 ; GFX10-NEXT: v_cvt_u32_f32_e32 v1, v1 ; GFX10-NEXT: v_mul_f32_e32 v2, 0x447fc000, v2 ; GFX10-NEXT: v_cvt_u32_f32_e32 v0, v0 ; GFX10-NEXT: v_lshlrev_b32_e32 v1, 10, v1 ; GFX10-NEXT: v_cvt_u32_f32_e32 v2, v2 ; GFX10-NEXT: v_lshlrev_b32_e32 v0, 20, v0 ; GFX10-NEXT: v_and_b32_e32 v1, 0xffc00, v1 ; GFX10-NEXT: v_and_b32_e32 v2, 0xc00003ff, v2 ; GFX10-NEXT: v_and_b32_e32 v0, 0x3ff00000, v0 ; GFX10-NEXT: v_or3_b32 v0, v1, v2, v0 ; GFX10-NEXT: s_setpc_b64 s[30:31] .entry: %mul.base = fmul reassoc nnan nsz arcp contract afn float %z, 1.023000e+03 %mul.base.i32 = fptoui float %mul.base to i32 %y.i32 = fptoui float %y to i32 %shl.inner.insert = shl i32 %y.i32, 10 %bfi1.and = and i32 %shl.inner.insert, 1047552 %bfi1.andnot = and i32 %mul.base.i32, -1073740801 %bfi1.or = or i32 %bfi1.and, %bfi1.andnot %mul.outer.insert = fmul reassoc nnan nsz arcp contract afn float %x, 1.023000e+03 %mul.outer.insert.i32 = fptoui float %mul.outer.insert to i32 %shl.outer.insert = shl i32 %mul.outer.insert.i32, 20 %and.outer = and i32 %shl.outer.insert, 1072693248 %or.outer = or i32 %bfi1.or, %and.outer %result = bitcast i32 %or.outer to float ret float %result } define float @v_bfi_single_nesting_level_inner_use(float %x, float %y, float %z) { ; GFX10-LABEL: v_bfi_single_nesting_level_inner_use: ; GFX10: ; %bb.0: ; %.entry ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_mul_f32_e32 v0, 0x447fc000, v2 ; GFX10-NEXT: v_cvt_u32_f32_e32 v1, v1 ; GFX10-NEXT: v_cvt_u32_f32_e32 v0, v0 ; GFX10-NEXT: v_lshlrev_b32_e32 v1, 10, v1 ; GFX10-NEXT: v_and_b32_e32 v0, 0x400003ff, v0 ; GFX10-NEXT: v_and_or_b32 v0, 0xffc00, v1, v0 ; GFX10-NEXT: v_lshlrev_b32_e32 v0, 1, v0 ; GFX10-NEXT: s_setpc_b64 s[30:31] .entry: %mul.base = fmul reassoc nnan nsz arcp contract afn float %z, 1.023000e+03 %mul.base.i32 = fptoui float %mul.base to i32 %y.i32 = fptoui float %y to i32 %shl.inner.insert = shl i32 %y.i32, 10 %bfi1.and = and i32 %shl.inner.insert, 1047552 %bfi1.andnot = and i32 %mul.base.i32, -1073740801 %bfi1.or = or i32 %bfi1.and, %bfi1.andnot %mul.outer.insert = fmul reassoc nnan nsz arcp contract afn float %x, 1.023000e+03 %mul.outer.insert.i32 = fptoui float %mul.outer.insert to i32 %shl.outer.insert = shl i32 %mul.outer.insert.i32, 20 %and.outer = and i32 %shl.outer.insert, 1072693248 %or.outer = or i32 %bfi1.or, %and.outer %bfi1.or.seconduse = mul i32 %bfi1.or, 2 %result = bitcast i32 %bfi1.or.seconduse to float ret float %result } define float @v_bfi_no_nesting(float %x, float %y, float %z) { ; GFX10-LABEL: v_bfi_no_nesting: ; GFX10: ; %bb.0: ; %.entry ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_mul_f32_e32 v0, 0x447fc000, v0 ; GFX10-NEXT: v_cvt_u32_f32_e32 v1, v1 ; GFX10-NEXT: v_mul_f32_e32 v2, 0x447fc000, v2 ; GFX10-NEXT: v_cvt_u32_f32_e32 v0, v0 ; GFX10-NEXT: v_lshlrev_b32_e32 v1, 10, v1 ; GFX10-NEXT: v_cvt_u32_f32_e32 v2, v2 ; GFX10-NEXT: v_lshlrev_b32_e32 v0, 20, v0 ; GFX10-NEXT: v_and_b32_e32 v1, 0xffc00, v1 ; GFX10-NEXT: v_and_b32_e32 v2, 0xc0000400, v2 ; GFX10-NEXT: v_and_b32_e32 v0, 0x3ff00000, v0 ; GFX10-NEXT: v_or3_b32 v0, v1, v2, v0 ; GFX10-NEXT: s_setpc_b64 s[30:31] .entry: %mul.base = fmul reassoc nnan nsz arcp contract afn float %z, 1.023000e+03 %mul.base.i32 = fptoui float %mul.base to i32 %y.i32 = fptoui float %y to i32 %shl.inner.insert = shl i32 %y.i32, 10 %inner.and = and i32 %shl.inner.insert, 1047552 %inner.and2 = and i32 %mul.base.i32, -1073740800 %inner.or = or i32 %inner.and, %inner.and2 %mul.outer.insert = fmul reassoc nnan nsz arcp contract afn float %x, 1.023000e+03 %mul.outer.insert.i32 = fptoui float %mul.outer.insert to i32 %shl.outer.insert = shl i32 %mul.outer.insert.i32, 20 %and.outer = and i32 %shl.outer.insert, 1072693248 %or.outer = or i32 %inner.or, %and.outer %result = bitcast i32 %or.outer to float ret float %result } define float @v_bfi_two_levels(float %x, float %y, float %z) { ; GFX10-LABEL: v_bfi_two_levels: ; GFX10: ; %bb.0: ; %.entry ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_mul_f32_e32 v0, 0x447fc000, v0 ; GFX10-NEXT: v_cvt_u32_f32_e32 v1, v1 ; GFX10-NEXT: v_cvt_u32_f32_e32 v2, v2 ; GFX10-NEXT: v_cvt_u32_f32_e32 v0, v0 ; GFX10-NEXT: v_lshlrev_b32_e32 v3, 5, v1 ; GFX10-NEXT: v_and_b32_e32 v2, 0xc000001f, v2 ; GFX10-NEXT: v_lshlrev_b32_e32 v1, 10, v1 ; GFX10-NEXT: v_lshlrev_b32_e32 v0, 20, v0 ; GFX10-NEXT: v_and_or_b32 v2, 0x3e0, v3, v2 ; GFX10-NEXT: v_and_b32_e32 v1, 0xffc00, v1 ; GFX10-NEXT: v_and_b32_e32 v0, 0x3ff00000, v0 ; GFX10-NEXT: v_or3_b32 v0, v2, v1, v0 ; GFX10-NEXT: s_setpc_b64 s[30:31] .entry: %y.i32 = fptoui float %y to i32 %shl.insert.inner = shl i32 %y.i32, 5 %and.insert.inner = and i32 %shl.insert.inner, 992 %z.i32 = fptoui float %z to i32 %base.inner = and i32 %z.i32, -1073741793 %or.inner = or i32 %and.insert.inner , %base.inner %shl.insert.mid = shl i32 %y.i32, 10 %and.insert.mid = and i32 %shl.insert.mid, 1047552 %or.mid = or i32 %or.inner, %and.insert.mid %fmul.insert.outer = fmul reassoc nnan nsz arcp contract afn float %x, 1.023000e+03 %cast.insert.outer = fptoui float %fmul.insert.outer to i32 %shl.insert.outer = shl i32 %cast.insert.outer, 20 %and.insert.outer = and i32 %shl.insert.outer, 1072693248 %or.outer = or i32 %or.mid, %and.insert.outer %result = bitcast i32 %or.outer to float ret float %result }