[AMDGPU] Fix i16/i8 flat store in true16 with sramecc (#190238)

The pattern was guarded by the D16PreservesUnusedBits predicate
which is not needed for stores.
This commit is contained in:
Stanislav Mekhanoshin 2026-04-02 17:32:50 -07:00 committed by GitHub
parent 935f21e1d5
commit 7084f18f27
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
3 changed files with 48 additions and 17 deletions

View File

@ -2093,7 +2093,7 @@ let OtherPredicates = [D16PreservesUnusedBits, HasFlatAddressSpace], True16Predi
defm : FlatLoadPats_D16_t16<FLAT_LOAD_SBYTE_D16_t16, atomic_load_sext_8_flat, i16>;
} // End let OtherPredicates = [D16PreservesUnusedBits, HasFlatAddressSpace], True16Predicate = UseRealTrue16Insts
let OtherPredicates = [D16PreservesUnusedBits], True16Predicate = UseRealTrue16Insts in {
let OtherPredicates = [HasFlatAddressSpace], True16Predicate = UseRealTrue16Insts in {
defm : FlatStorePats_t16 <FLAT_STORE_BYTE, truncstorei8_flat, i16>;
defm : FlatStorePats_t16 <FLAT_STORE_SHORT, store_flat, i16>;
defm : FlatStorePats_t16 <FLAT_STORE_BYTE, atomic_store_8_flat, i16>;

View File

@ -1,6 +1,8 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1250 < %s | FileCheck -check-prefixes=GFX1250,GFX1250-SDAG %s
; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1250 < %s | FileCheck -check-prefixes=GFX1250,GFX1250-GISEL %s
; RUN: llc -global-isel=0 -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1250 -mattr=-real-true16 < %s | FileCheck -check-prefixes=GFX1250,GFX1250-SDAG,GFX1250-SDAG-FAKE16 %s
; RUN: llc -global-isel=1 -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1250 -mattr=-real-true16 < %s | FileCheck -check-prefixes=GFX1250,GFX1250-GISEL,GFX1250-GISEL-FAKE16 %s
; RUN: llc -global-isel=0 -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1250 -mattr=+real-true16 < %s | FileCheck -check-prefixes=GFX1250,GFX1250-SDAG,GFX1250-SDAG-REAL16 %s
; RUN: llc -global-isel=1 -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1250 -mattr=+real-true16 < %s | FileCheck -check-prefixes=GFX1250,GFX1250-GISEL,GFX1250-GISEL-REAL16 %s
; Test using saddr addressing mode of flat_*store_* instructions.
@ -1165,11 +1167,24 @@ define amdgpu_ps void @atomic_flat_store_saddr_i64_zext_vgpr_offset_neg128(ptr i
; --------------------------------------------------------------------------------
define amdgpu_ps void @flat_store_saddr_i16_d16hi_zext_vgpr(ptr inreg %sbase, i32 %voffset, <2 x i16> %data) {
; GFX1250-LABEL: flat_store_saddr_i16_d16hi_zext_vgpr:
; GFX1250: ; %bb.0:
; GFX1250-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1 ; msbs: dst=0 src0=0 src1=0 src2=0
; GFX1250-NEXT: flat_store_d16_hi_b16 v0, v1, s[2:3]
; GFX1250-NEXT: s_endpgm
; GFX1250-SDAG-LABEL: flat_store_saddr_i16_d16hi_zext_vgpr:
; GFX1250-SDAG: ; %bb.0:
; GFX1250-SDAG-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1 ; msbs: dst=0 src0=0 src1=0 src2=0
; GFX1250-SDAG-NEXT: flat_store_d16_hi_b16 v0, v1, s[2:3]
; GFX1250-SDAG-NEXT: s_endpgm
;
; GFX1250-GISEL-FAKE16-LABEL: flat_store_saddr_i16_d16hi_zext_vgpr:
; GFX1250-GISEL-FAKE16: ; %bb.0:
; GFX1250-GISEL-FAKE16-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1 ; msbs: dst=0 src0=0 src1=0 src2=0
; GFX1250-GISEL-FAKE16-NEXT: flat_store_d16_hi_b16 v0, v1, s[2:3]
; GFX1250-GISEL-FAKE16-NEXT: s_endpgm
;
; GFX1250-GISEL-REAL16-LABEL: flat_store_saddr_i16_d16hi_zext_vgpr:
; GFX1250-GISEL-REAL16: ; %bb.0:
; GFX1250-GISEL-REAL16-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1 ; msbs: dst=0 src0=0 src1=0 src2=0
; GFX1250-GISEL-REAL16-NEXT: v_lshrrev_b32_e32 v1, 16, v1
; GFX1250-GISEL-REAL16-NEXT: flat_store_b16 v0, v1, s[2:3]
; GFX1250-GISEL-REAL16-NEXT: s_endpgm
%zext.offset = zext i32 %voffset to i64
%gep0 = getelementptr inbounds i8, ptr %sbase, i64 %zext.offset
%data.hi = extractelement <2 x i16> %data, i32 1
@ -1178,11 +1193,24 @@ define amdgpu_ps void @flat_store_saddr_i16_d16hi_zext_vgpr(ptr inreg %sbase, i3
}
define amdgpu_ps void @flat_store_saddr_i16_d16hi_zext_vgpr_offset_neg128(ptr inreg %sbase, i32 %voffset, <2 x i16> %data) {
; GFX1250-LABEL: flat_store_saddr_i16_d16hi_zext_vgpr_offset_neg128:
; GFX1250: ; %bb.0:
; GFX1250-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1 ; msbs: dst=0 src0=0 src1=0 src2=0
; GFX1250-NEXT: flat_store_d16_hi_b16 v0, v1, s[2:3] offset:-128
; GFX1250-NEXT: s_endpgm
; GFX1250-SDAG-LABEL: flat_store_saddr_i16_d16hi_zext_vgpr_offset_neg128:
; GFX1250-SDAG: ; %bb.0:
; GFX1250-SDAG-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1 ; msbs: dst=0 src0=0 src1=0 src2=0
; GFX1250-SDAG-NEXT: flat_store_d16_hi_b16 v0, v1, s[2:3] offset:-128
; GFX1250-SDAG-NEXT: s_endpgm
;
; GFX1250-GISEL-FAKE16-LABEL: flat_store_saddr_i16_d16hi_zext_vgpr_offset_neg128:
; GFX1250-GISEL-FAKE16: ; %bb.0:
; GFX1250-GISEL-FAKE16-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1 ; msbs: dst=0 src0=0 src1=0 src2=0
; GFX1250-GISEL-FAKE16-NEXT: flat_store_d16_hi_b16 v0, v1, s[2:3] offset:-128
; GFX1250-GISEL-FAKE16-NEXT: s_endpgm
;
; GFX1250-GISEL-REAL16-LABEL: flat_store_saddr_i16_d16hi_zext_vgpr_offset_neg128:
; GFX1250-GISEL-REAL16: ; %bb.0:
; GFX1250-GISEL-REAL16-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1 ; msbs: dst=0 src0=0 src1=0 src2=0
; GFX1250-GISEL-REAL16-NEXT: v_lshrrev_b32_e32 v1, 16, v1
; GFX1250-GISEL-REAL16-NEXT: flat_store_b16 v0, v1, s[2:3] offset:-128
; GFX1250-GISEL-REAL16-NEXT: s_endpgm
%zext.offset = zext i32 %voffset to i64
%gep0 = getelementptr inbounds i8, ptr %sbase, i64 %zext.offset
%gep1 = getelementptr inbounds i8, ptr %gep0, i64 -128
@ -1219,3 +1247,6 @@ define amdgpu_ps void @flat_store_saddr_i16_d16hi_trunci8_zext_vgpr_offset_neg12
store i8 %data.hi.trunc, ptr %gep1
ret void
}
;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
; GFX1250-SDAG-FAKE16: {{.*}}
; GFX1250-SDAG-REAL16: {{.*}}

View File

@ -65,8 +65,8 @@ define amdgpu_kernel void @sat_pk4_i4_i8_f32_s(i32 inreg %src, ptr %out) #1 {
; SDAG-REAL16: ; %bb.0:
; SDAG-REAL16-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1 ; msbs: dst=0 src0=0 src1=0 src2=0
; SDAG-REAL16-NEXT: s_load_b64 s[0:1], s[4:5], 0x8 nv
; SDAG-REAL16-NEXT: v_sat_pk4_i4_i8_e32 v0.l, s8
; SDAG-REAL16-NEXT: v_mov_b32_e32 v1, 0
; SDAG-REAL16-NEXT: v_sat_pk4_i4_i8_e32 v0.l, s8
; SDAG-REAL16-NEXT: s_wait_kmcnt 0x0
; SDAG-REAL16-NEXT: flat_store_b16 v1, v0, s[0:1]
; SDAG-REAL16-NEXT: s_endpgm
@ -114,8 +114,8 @@ define amdgpu_kernel void @sat_pk4_i4_i8_f32_i(ptr %out) #1 {
; SDAG-REAL16: ; %bb.0:
; SDAG-REAL16-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1 ; msbs: dst=0 src0=0 src1=0 src2=0
; SDAG-REAL16-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 nv
; SDAG-REAL16-NEXT: v_sat_pk4_i4_i8_e32 v0.l, 0x64
; SDAG-REAL16-NEXT: v_mov_b32_e32 v1, 0
; SDAG-REAL16-NEXT: v_sat_pk4_i4_i8_e32 v0.l, 0x64
; SDAG-REAL16-NEXT: s_wait_kmcnt 0x0
; SDAG-REAL16-NEXT: flat_store_b16 v1, v0, s[0:1]
; SDAG-REAL16-NEXT: s_endpgm
@ -212,8 +212,8 @@ define amdgpu_kernel void @sat_pk4_u4_u8_f32_s(i32 inreg %src, ptr %out) #1 {
; SDAG-REAL16: ; %bb.0:
; SDAG-REAL16-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1 ; msbs: dst=0 src0=0 src1=0 src2=0
; SDAG-REAL16-NEXT: s_load_b64 s[0:1], s[4:5], 0x8 nv
; SDAG-REAL16-NEXT: v_sat_pk4_u4_u8_e32 v0.l, s8
; SDAG-REAL16-NEXT: v_mov_b32_e32 v1, 0
; SDAG-REAL16-NEXT: v_sat_pk4_u4_u8_e32 v0.l, s8
; SDAG-REAL16-NEXT: s_wait_kmcnt 0x0
; SDAG-REAL16-NEXT: flat_store_b16 v1, v0, s[0:1]
; SDAG-REAL16-NEXT: s_endpgm
@ -261,8 +261,8 @@ define amdgpu_kernel void @sat_pk4_u4_u8_f32_i(ptr %out) #1 {
; SDAG-REAL16: ; %bb.0:
; SDAG-REAL16-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1 ; msbs: dst=0 src0=0 src1=0 src2=0
; SDAG-REAL16-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 nv
; SDAG-REAL16-NEXT: v_sat_pk4_u4_u8_e32 v0.l, 0x64
; SDAG-REAL16-NEXT: v_mov_b32_e32 v1, 0
; SDAG-REAL16-NEXT: v_sat_pk4_u4_u8_e32 v0.l, 0x64
; SDAG-REAL16-NEXT: s_wait_kmcnt 0x0
; SDAG-REAL16-NEXT: flat_store_b16 v1, v0, s[0:1]
; SDAG-REAL16-NEXT: s_endpgm