diff --git a/llvm/lib/Target/AMDGPU/FLATInstructions.td b/llvm/lib/Target/AMDGPU/FLATInstructions.td index c92d154846d8..c0fb73df9c76 100644 --- a/llvm/lib/Target/AMDGPU/FLATInstructions.td +++ b/llvm/lib/Target/AMDGPU/FLATInstructions.td @@ -2093,7 +2093,7 @@ let OtherPredicates = [D16PreservesUnusedBits, HasFlatAddressSpace], True16Predi defm : FlatLoadPats_D16_t16; } // End let OtherPredicates = [D16PreservesUnusedBits, HasFlatAddressSpace], True16Predicate = UseRealTrue16Insts -let OtherPredicates = [D16PreservesUnusedBits], True16Predicate = UseRealTrue16Insts in { +let OtherPredicates = [HasFlatAddressSpace], True16Predicate = UseRealTrue16Insts in { defm : FlatStorePats_t16 ; defm : FlatStorePats_t16 ; defm : FlatStorePats_t16 ; diff --git a/llvm/test/CodeGen/AMDGPU/flat-saddr-store.ll b/llvm/test/CodeGen/AMDGPU/flat-saddr-store.ll index 046575b280a5..28efba68b9a9 100644 --- a/llvm/test/CodeGen/AMDGPU/flat-saddr-store.ll +++ b/llvm/test/CodeGen/AMDGPU/flat-saddr-store.ll @@ -1,6 +1,8 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1250 < %s | FileCheck -check-prefixes=GFX1250,GFX1250-SDAG %s -; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1250 < %s | FileCheck -check-prefixes=GFX1250,GFX1250-GISEL %s +; RUN: llc -global-isel=0 -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1250 -mattr=-real-true16 < %s | FileCheck -check-prefixes=GFX1250,GFX1250-SDAG,GFX1250-SDAG-FAKE16 %s +; RUN: llc -global-isel=1 -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1250 -mattr=-real-true16 < %s | FileCheck -check-prefixes=GFX1250,GFX1250-GISEL,GFX1250-GISEL-FAKE16 %s +; RUN: llc -global-isel=0 -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1250 -mattr=+real-true16 < %s | FileCheck -check-prefixes=GFX1250,GFX1250-SDAG,GFX1250-SDAG-REAL16 %s +; RUN: llc -global-isel=1 -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1250 -mattr=+real-true16 < %s | FileCheck -check-prefixes=GFX1250,GFX1250-GISEL,GFX1250-GISEL-REAL16 %s ; Test using saddr addressing mode of flat_*store_* instructions. @@ -1165,11 +1167,24 @@ define amdgpu_ps void @atomic_flat_store_saddr_i64_zext_vgpr_offset_neg128(ptr i ; -------------------------------------------------------------------------------- define amdgpu_ps void @flat_store_saddr_i16_d16hi_zext_vgpr(ptr inreg %sbase, i32 %voffset, <2 x i16> %data) { -; GFX1250-LABEL: flat_store_saddr_i16_d16hi_zext_vgpr: -; GFX1250: ; %bb.0: -; GFX1250-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1 ; msbs: dst=0 src0=0 src1=0 src2=0 -; GFX1250-NEXT: flat_store_d16_hi_b16 v0, v1, s[2:3] -; GFX1250-NEXT: s_endpgm +; GFX1250-SDAG-LABEL: flat_store_saddr_i16_d16hi_zext_vgpr: +; GFX1250-SDAG: ; %bb.0: +; GFX1250-SDAG-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1 ; msbs: dst=0 src0=0 src1=0 src2=0 +; GFX1250-SDAG-NEXT: flat_store_d16_hi_b16 v0, v1, s[2:3] +; GFX1250-SDAG-NEXT: s_endpgm +; +; GFX1250-GISEL-FAKE16-LABEL: flat_store_saddr_i16_d16hi_zext_vgpr: +; GFX1250-GISEL-FAKE16: ; %bb.0: +; GFX1250-GISEL-FAKE16-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1 ; msbs: dst=0 src0=0 src1=0 src2=0 +; GFX1250-GISEL-FAKE16-NEXT: flat_store_d16_hi_b16 v0, v1, s[2:3] +; GFX1250-GISEL-FAKE16-NEXT: s_endpgm +; +; GFX1250-GISEL-REAL16-LABEL: flat_store_saddr_i16_d16hi_zext_vgpr: +; GFX1250-GISEL-REAL16: ; %bb.0: +; GFX1250-GISEL-REAL16-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1 ; msbs: dst=0 src0=0 src1=0 src2=0 +; GFX1250-GISEL-REAL16-NEXT: v_lshrrev_b32_e32 v1, 16, v1 +; GFX1250-GISEL-REAL16-NEXT: flat_store_b16 v0, v1, s[2:3] +; GFX1250-GISEL-REAL16-NEXT: s_endpgm %zext.offset = zext i32 %voffset to i64 %gep0 = getelementptr inbounds i8, ptr %sbase, i64 %zext.offset %data.hi = extractelement <2 x i16> %data, i32 1 @@ -1178,11 +1193,24 @@ define amdgpu_ps void @flat_store_saddr_i16_d16hi_zext_vgpr(ptr inreg %sbase, i3 } define amdgpu_ps void @flat_store_saddr_i16_d16hi_zext_vgpr_offset_neg128(ptr inreg %sbase, i32 %voffset, <2 x i16> %data) { -; GFX1250-LABEL: flat_store_saddr_i16_d16hi_zext_vgpr_offset_neg128: -; GFX1250: ; %bb.0: -; GFX1250-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1 ; msbs: dst=0 src0=0 src1=0 src2=0 -; GFX1250-NEXT: flat_store_d16_hi_b16 v0, v1, s[2:3] offset:-128 -; GFX1250-NEXT: s_endpgm +; GFX1250-SDAG-LABEL: flat_store_saddr_i16_d16hi_zext_vgpr_offset_neg128: +; GFX1250-SDAG: ; %bb.0: +; GFX1250-SDAG-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1 ; msbs: dst=0 src0=0 src1=0 src2=0 +; GFX1250-SDAG-NEXT: flat_store_d16_hi_b16 v0, v1, s[2:3] offset:-128 +; GFX1250-SDAG-NEXT: s_endpgm +; +; GFX1250-GISEL-FAKE16-LABEL: flat_store_saddr_i16_d16hi_zext_vgpr_offset_neg128: +; GFX1250-GISEL-FAKE16: ; %bb.0: +; GFX1250-GISEL-FAKE16-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1 ; msbs: dst=0 src0=0 src1=0 src2=0 +; GFX1250-GISEL-FAKE16-NEXT: flat_store_d16_hi_b16 v0, v1, s[2:3] offset:-128 +; GFX1250-GISEL-FAKE16-NEXT: s_endpgm +; +; GFX1250-GISEL-REAL16-LABEL: flat_store_saddr_i16_d16hi_zext_vgpr_offset_neg128: +; GFX1250-GISEL-REAL16: ; %bb.0: +; GFX1250-GISEL-REAL16-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1 ; msbs: dst=0 src0=0 src1=0 src2=0 +; GFX1250-GISEL-REAL16-NEXT: v_lshrrev_b32_e32 v1, 16, v1 +; GFX1250-GISEL-REAL16-NEXT: flat_store_b16 v0, v1, s[2:3] offset:-128 +; GFX1250-GISEL-REAL16-NEXT: s_endpgm %zext.offset = zext i32 %voffset to i64 %gep0 = getelementptr inbounds i8, ptr %sbase, i64 %zext.offset %gep1 = getelementptr inbounds i8, ptr %gep0, i64 -128 @@ -1219,3 +1247,6 @@ define amdgpu_ps void @flat_store_saddr_i16_d16hi_trunci8_zext_vgpr_offset_neg12 store i8 %data.hi.trunc, ptr %gep1 ret void } +;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: +; GFX1250-SDAG-FAKE16: {{.*}} +; GFX1250-SDAG-REAL16: {{.*}} diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cvt.sat.pk.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cvt.sat.pk.ll index c262996a5099..08dccdf5872d 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cvt.sat.pk.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cvt.sat.pk.ll @@ -65,8 +65,8 @@ define amdgpu_kernel void @sat_pk4_i4_i8_f32_s(i32 inreg %src, ptr %out) #1 { ; SDAG-REAL16: ; %bb.0: ; SDAG-REAL16-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1 ; msbs: dst=0 src0=0 src1=0 src2=0 ; SDAG-REAL16-NEXT: s_load_b64 s[0:1], s[4:5], 0x8 nv -; SDAG-REAL16-NEXT: v_sat_pk4_i4_i8_e32 v0.l, s8 ; SDAG-REAL16-NEXT: v_mov_b32_e32 v1, 0 +; SDAG-REAL16-NEXT: v_sat_pk4_i4_i8_e32 v0.l, s8 ; SDAG-REAL16-NEXT: s_wait_kmcnt 0x0 ; SDAG-REAL16-NEXT: flat_store_b16 v1, v0, s[0:1] ; SDAG-REAL16-NEXT: s_endpgm @@ -114,8 +114,8 @@ define amdgpu_kernel void @sat_pk4_i4_i8_f32_i(ptr %out) #1 { ; SDAG-REAL16: ; %bb.0: ; SDAG-REAL16-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1 ; msbs: dst=0 src0=0 src1=0 src2=0 ; SDAG-REAL16-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 nv -; SDAG-REAL16-NEXT: v_sat_pk4_i4_i8_e32 v0.l, 0x64 ; SDAG-REAL16-NEXT: v_mov_b32_e32 v1, 0 +; SDAG-REAL16-NEXT: v_sat_pk4_i4_i8_e32 v0.l, 0x64 ; SDAG-REAL16-NEXT: s_wait_kmcnt 0x0 ; SDAG-REAL16-NEXT: flat_store_b16 v1, v0, s[0:1] ; SDAG-REAL16-NEXT: s_endpgm @@ -212,8 +212,8 @@ define amdgpu_kernel void @sat_pk4_u4_u8_f32_s(i32 inreg %src, ptr %out) #1 { ; SDAG-REAL16: ; %bb.0: ; SDAG-REAL16-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1 ; msbs: dst=0 src0=0 src1=0 src2=0 ; SDAG-REAL16-NEXT: s_load_b64 s[0:1], s[4:5], 0x8 nv -; SDAG-REAL16-NEXT: v_sat_pk4_u4_u8_e32 v0.l, s8 ; SDAG-REAL16-NEXT: v_mov_b32_e32 v1, 0 +; SDAG-REAL16-NEXT: v_sat_pk4_u4_u8_e32 v0.l, s8 ; SDAG-REAL16-NEXT: s_wait_kmcnt 0x0 ; SDAG-REAL16-NEXT: flat_store_b16 v1, v0, s[0:1] ; SDAG-REAL16-NEXT: s_endpgm @@ -261,8 +261,8 @@ define amdgpu_kernel void @sat_pk4_u4_u8_f32_i(ptr %out) #1 { ; SDAG-REAL16: ; %bb.0: ; SDAG-REAL16-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1 ; msbs: dst=0 src0=0 src1=0 src2=0 ; SDAG-REAL16-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 nv -; SDAG-REAL16-NEXT: v_sat_pk4_u4_u8_e32 v0.l, 0x64 ; SDAG-REAL16-NEXT: v_mov_b32_e32 v1, 0 +; SDAG-REAL16-NEXT: v_sat_pk4_u4_u8_e32 v0.l, 0x64 ; SDAG-REAL16-NEXT: s_wait_kmcnt 0x0 ; SDAG-REAL16-NEXT: flat_store_b16 v1, v0, s[0:1] ; SDAG-REAL16-NEXT: s_endpgm