[AMDGPU] Fix i16/i8 flat store in true16 with sramecc (#190238)
The pattern was guarded by the D16PreservesUnusedBits predicate which is not needed for stores.
This commit is contained in:
parent
935f21e1d5
commit
7084f18f27
@ -2093,7 +2093,7 @@ let OtherPredicates = [D16PreservesUnusedBits, HasFlatAddressSpace], True16Predi
|
||||
defm : FlatLoadPats_D16_t16<FLAT_LOAD_SBYTE_D16_t16, atomic_load_sext_8_flat, i16>;
|
||||
} // End let OtherPredicates = [D16PreservesUnusedBits, HasFlatAddressSpace], True16Predicate = UseRealTrue16Insts
|
||||
|
||||
let OtherPredicates = [D16PreservesUnusedBits], True16Predicate = UseRealTrue16Insts in {
|
||||
let OtherPredicates = [HasFlatAddressSpace], True16Predicate = UseRealTrue16Insts in {
|
||||
defm : FlatStorePats_t16 <FLAT_STORE_BYTE, truncstorei8_flat, i16>;
|
||||
defm : FlatStorePats_t16 <FLAT_STORE_SHORT, store_flat, i16>;
|
||||
defm : FlatStorePats_t16 <FLAT_STORE_BYTE, atomic_store_8_flat, i16>;
|
||||
|
||||
@ -1,6 +1,8 @@
|
||||
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
||||
; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1250 < %s | FileCheck -check-prefixes=GFX1250,GFX1250-SDAG %s
|
||||
; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1250 < %s | FileCheck -check-prefixes=GFX1250,GFX1250-GISEL %s
|
||||
; RUN: llc -global-isel=0 -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1250 -mattr=-real-true16 < %s | FileCheck -check-prefixes=GFX1250,GFX1250-SDAG,GFX1250-SDAG-FAKE16 %s
|
||||
; RUN: llc -global-isel=1 -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1250 -mattr=-real-true16 < %s | FileCheck -check-prefixes=GFX1250,GFX1250-GISEL,GFX1250-GISEL-FAKE16 %s
|
||||
; RUN: llc -global-isel=0 -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1250 -mattr=+real-true16 < %s | FileCheck -check-prefixes=GFX1250,GFX1250-SDAG,GFX1250-SDAG-REAL16 %s
|
||||
; RUN: llc -global-isel=1 -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1250 -mattr=+real-true16 < %s | FileCheck -check-prefixes=GFX1250,GFX1250-GISEL,GFX1250-GISEL-REAL16 %s
|
||||
|
||||
; Test using saddr addressing mode of flat_*store_* instructions.
|
||||
|
||||
@ -1165,11 +1167,24 @@ define amdgpu_ps void @atomic_flat_store_saddr_i64_zext_vgpr_offset_neg128(ptr i
|
||||
; --------------------------------------------------------------------------------
|
||||
|
||||
define amdgpu_ps void @flat_store_saddr_i16_d16hi_zext_vgpr(ptr inreg %sbase, i32 %voffset, <2 x i16> %data) {
|
||||
; GFX1250-LABEL: flat_store_saddr_i16_d16hi_zext_vgpr:
|
||||
; GFX1250: ; %bb.0:
|
||||
; GFX1250-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1 ; msbs: dst=0 src0=0 src1=0 src2=0
|
||||
; GFX1250-NEXT: flat_store_d16_hi_b16 v0, v1, s[2:3]
|
||||
; GFX1250-NEXT: s_endpgm
|
||||
; GFX1250-SDAG-LABEL: flat_store_saddr_i16_d16hi_zext_vgpr:
|
||||
; GFX1250-SDAG: ; %bb.0:
|
||||
; GFX1250-SDAG-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1 ; msbs: dst=0 src0=0 src1=0 src2=0
|
||||
; GFX1250-SDAG-NEXT: flat_store_d16_hi_b16 v0, v1, s[2:3]
|
||||
; GFX1250-SDAG-NEXT: s_endpgm
|
||||
;
|
||||
; GFX1250-GISEL-FAKE16-LABEL: flat_store_saddr_i16_d16hi_zext_vgpr:
|
||||
; GFX1250-GISEL-FAKE16: ; %bb.0:
|
||||
; GFX1250-GISEL-FAKE16-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1 ; msbs: dst=0 src0=0 src1=0 src2=0
|
||||
; GFX1250-GISEL-FAKE16-NEXT: flat_store_d16_hi_b16 v0, v1, s[2:3]
|
||||
; GFX1250-GISEL-FAKE16-NEXT: s_endpgm
|
||||
;
|
||||
; GFX1250-GISEL-REAL16-LABEL: flat_store_saddr_i16_d16hi_zext_vgpr:
|
||||
; GFX1250-GISEL-REAL16: ; %bb.0:
|
||||
; GFX1250-GISEL-REAL16-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1 ; msbs: dst=0 src0=0 src1=0 src2=0
|
||||
; GFX1250-GISEL-REAL16-NEXT: v_lshrrev_b32_e32 v1, 16, v1
|
||||
; GFX1250-GISEL-REAL16-NEXT: flat_store_b16 v0, v1, s[2:3]
|
||||
; GFX1250-GISEL-REAL16-NEXT: s_endpgm
|
||||
%zext.offset = zext i32 %voffset to i64
|
||||
%gep0 = getelementptr inbounds i8, ptr %sbase, i64 %zext.offset
|
||||
%data.hi = extractelement <2 x i16> %data, i32 1
|
||||
@ -1178,11 +1193,24 @@ define amdgpu_ps void @flat_store_saddr_i16_d16hi_zext_vgpr(ptr inreg %sbase, i3
|
||||
}
|
||||
|
||||
define amdgpu_ps void @flat_store_saddr_i16_d16hi_zext_vgpr_offset_neg128(ptr inreg %sbase, i32 %voffset, <2 x i16> %data) {
|
||||
; GFX1250-LABEL: flat_store_saddr_i16_d16hi_zext_vgpr_offset_neg128:
|
||||
; GFX1250: ; %bb.0:
|
||||
; GFX1250-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1 ; msbs: dst=0 src0=0 src1=0 src2=0
|
||||
; GFX1250-NEXT: flat_store_d16_hi_b16 v0, v1, s[2:3] offset:-128
|
||||
; GFX1250-NEXT: s_endpgm
|
||||
; GFX1250-SDAG-LABEL: flat_store_saddr_i16_d16hi_zext_vgpr_offset_neg128:
|
||||
; GFX1250-SDAG: ; %bb.0:
|
||||
; GFX1250-SDAG-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1 ; msbs: dst=0 src0=0 src1=0 src2=0
|
||||
; GFX1250-SDAG-NEXT: flat_store_d16_hi_b16 v0, v1, s[2:3] offset:-128
|
||||
; GFX1250-SDAG-NEXT: s_endpgm
|
||||
;
|
||||
; GFX1250-GISEL-FAKE16-LABEL: flat_store_saddr_i16_d16hi_zext_vgpr_offset_neg128:
|
||||
; GFX1250-GISEL-FAKE16: ; %bb.0:
|
||||
; GFX1250-GISEL-FAKE16-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1 ; msbs: dst=0 src0=0 src1=0 src2=0
|
||||
; GFX1250-GISEL-FAKE16-NEXT: flat_store_d16_hi_b16 v0, v1, s[2:3] offset:-128
|
||||
; GFX1250-GISEL-FAKE16-NEXT: s_endpgm
|
||||
;
|
||||
; GFX1250-GISEL-REAL16-LABEL: flat_store_saddr_i16_d16hi_zext_vgpr_offset_neg128:
|
||||
; GFX1250-GISEL-REAL16: ; %bb.0:
|
||||
; GFX1250-GISEL-REAL16-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1 ; msbs: dst=0 src0=0 src1=0 src2=0
|
||||
; GFX1250-GISEL-REAL16-NEXT: v_lshrrev_b32_e32 v1, 16, v1
|
||||
; GFX1250-GISEL-REAL16-NEXT: flat_store_b16 v0, v1, s[2:3] offset:-128
|
||||
; GFX1250-GISEL-REAL16-NEXT: s_endpgm
|
||||
%zext.offset = zext i32 %voffset to i64
|
||||
%gep0 = getelementptr inbounds i8, ptr %sbase, i64 %zext.offset
|
||||
%gep1 = getelementptr inbounds i8, ptr %gep0, i64 -128
|
||||
@ -1219,3 +1247,6 @@ define amdgpu_ps void @flat_store_saddr_i16_d16hi_trunci8_zext_vgpr_offset_neg12
|
||||
store i8 %data.hi.trunc, ptr %gep1
|
||||
ret void
|
||||
}
|
||||
;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
|
||||
; GFX1250-SDAG-FAKE16: {{.*}}
|
||||
; GFX1250-SDAG-REAL16: {{.*}}
|
||||
|
||||
@ -65,8 +65,8 @@ define amdgpu_kernel void @sat_pk4_i4_i8_f32_s(i32 inreg %src, ptr %out) #1 {
|
||||
; SDAG-REAL16: ; %bb.0:
|
||||
; SDAG-REAL16-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1 ; msbs: dst=0 src0=0 src1=0 src2=0
|
||||
; SDAG-REAL16-NEXT: s_load_b64 s[0:1], s[4:5], 0x8 nv
|
||||
; SDAG-REAL16-NEXT: v_sat_pk4_i4_i8_e32 v0.l, s8
|
||||
; SDAG-REAL16-NEXT: v_mov_b32_e32 v1, 0
|
||||
; SDAG-REAL16-NEXT: v_sat_pk4_i4_i8_e32 v0.l, s8
|
||||
; SDAG-REAL16-NEXT: s_wait_kmcnt 0x0
|
||||
; SDAG-REAL16-NEXT: flat_store_b16 v1, v0, s[0:1]
|
||||
; SDAG-REAL16-NEXT: s_endpgm
|
||||
@ -114,8 +114,8 @@ define amdgpu_kernel void @sat_pk4_i4_i8_f32_i(ptr %out) #1 {
|
||||
; SDAG-REAL16: ; %bb.0:
|
||||
; SDAG-REAL16-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1 ; msbs: dst=0 src0=0 src1=0 src2=0
|
||||
; SDAG-REAL16-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 nv
|
||||
; SDAG-REAL16-NEXT: v_sat_pk4_i4_i8_e32 v0.l, 0x64
|
||||
; SDAG-REAL16-NEXT: v_mov_b32_e32 v1, 0
|
||||
; SDAG-REAL16-NEXT: v_sat_pk4_i4_i8_e32 v0.l, 0x64
|
||||
; SDAG-REAL16-NEXT: s_wait_kmcnt 0x0
|
||||
; SDAG-REAL16-NEXT: flat_store_b16 v1, v0, s[0:1]
|
||||
; SDAG-REAL16-NEXT: s_endpgm
|
||||
@ -212,8 +212,8 @@ define amdgpu_kernel void @sat_pk4_u4_u8_f32_s(i32 inreg %src, ptr %out) #1 {
|
||||
; SDAG-REAL16: ; %bb.0:
|
||||
; SDAG-REAL16-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1 ; msbs: dst=0 src0=0 src1=0 src2=0
|
||||
; SDAG-REAL16-NEXT: s_load_b64 s[0:1], s[4:5], 0x8 nv
|
||||
; SDAG-REAL16-NEXT: v_sat_pk4_u4_u8_e32 v0.l, s8
|
||||
; SDAG-REAL16-NEXT: v_mov_b32_e32 v1, 0
|
||||
; SDAG-REAL16-NEXT: v_sat_pk4_u4_u8_e32 v0.l, s8
|
||||
; SDAG-REAL16-NEXT: s_wait_kmcnt 0x0
|
||||
; SDAG-REAL16-NEXT: flat_store_b16 v1, v0, s[0:1]
|
||||
; SDAG-REAL16-NEXT: s_endpgm
|
||||
@ -261,8 +261,8 @@ define amdgpu_kernel void @sat_pk4_u4_u8_f32_i(ptr %out) #1 {
|
||||
; SDAG-REAL16: ; %bb.0:
|
||||
; SDAG-REAL16-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1 ; msbs: dst=0 src0=0 src1=0 src2=0
|
||||
; SDAG-REAL16-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 nv
|
||||
; SDAG-REAL16-NEXT: v_sat_pk4_u4_u8_e32 v0.l, 0x64
|
||||
; SDAG-REAL16-NEXT: v_mov_b32_e32 v1, 0
|
||||
; SDAG-REAL16-NEXT: v_sat_pk4_u4_u8_e32 v0.l, 0x64
|
||||
; SDAG-REAL16-NEXT: s_wait_kmcnt 0x0
|
||||
; SDAG-REAL16-NEXT: flat_store_b16 v1, v0, s[0:1]
|
||||
; SDAG-REAL16-NEXT: s_endpgm
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user