; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mtriple=amdgcn -mcpu=gfx90a -mattr=-enable-flat-scratch < %s | FileCheck --check-prefixes=GFX90A,GFX90A-SDAG,GFX90A-MUBUF,GFX90A-SDAG-MUBUF %s ; RUN: llc -mtriple=amdgcn -mcpu=gfx90a -mattr=+enable-flat-scratch < %s | FileCheck --check-prefixes=GFX90A,GFX90A-SDAG,GFX90A-FLATSCR,GFX90A-SDAG-FLATSCR %s ; RUN: llc -mtriple=amdgcn -mcpu=gfx1030 -mattr=-enable-flat-scratch < %s | FileCheck --check-prefixes=GFX10,GFX10-SDAG,GFX10-MUBUF,GFX10-SDAG-MUBUF %s ; RUN: llc -mtriple=amdgcn -mcpu=gfx1030 -mattr=+enable-flat-scratch < %s | FileCheck --check-prefixes=GFX10,GFX10-SDAG,GFX10-FLATSCR,GFX10-SDAG-FLATSCR %s ; RUN: llc -mtriple=amdgcn -mcpu=gfx942 < %s | FileCheck --check-prefixes=GFX942,GFX942-SDAG %s ; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 < %s | FileCheck --check-prefixes=GFX11,GFX11-SDAG %s ; RUN: llc -mtriple=amdgcn -mcpu=gfx1200 < %s | FileCheck --check-prefixes=GFX12,GFX12-SDAG %s ; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx90a -mattr=-enable-flat-scratch < %s | FileCheck --check-prefixes=GFX90A,GFX90A-GISEL,GFX90A-MUBUF,GFX90A-GISEL-MUBUF %s ; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx90a -mattr=+enable-flat-scratch < %s | FileCheck --check-prefixes=GFX90A,GFX90A-GISEL,GFX90A-FLATSCR,GFX90A-GISEL-FLATSCR %s ; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx1030 -mattr=-enable-flat-scratch < %s | FileCheck --check-prefixes=GFX10,GFX10-GISEL,GFX10-MUBUF,GFX10-GISEL-MUBUF %s ; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx1030 -mattr=+enable-flat-scratch < %s | FileCheck --check-prefixes=GFX10,GFX10-GISEL,GFX10-FLATSCR,GFX10-GISEL-FLATSCR %s ; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx942 < %s | FileCheck --check-prefixes=GFX942,GFX942-GISEL %s ; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx1100 < %s | FileCheck --check-prefixes=GFX11,GFX11-GISEL %s ; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx1200 < %s | FileCheck --check-prefixes=GFX12,GFX12-GISEL %s ; This test checks memory addresses with constant offset components that should ; not be folded into memory accesses with immediate offsets. ; SeparateConstOffsetsFromGEP transforms the GEPs in a way that can lead to ; out-of-bounds or negative intermediate results in the address computation, ; which are problematic for flat and scratch instructions: ; gep[inbounds](p, i + 3) -> gep(gep(p, i), 3) ; FIXME the offset here should not be folded: if %p points to the beginning of ; scratch or LDS and %i is -1, a folded offset crashes the program. define i32 @flat_offset_maybe_oob(ptr %p, i32 %i) { ; GFX90A-SDAG-LABEL: flat_offset_maybe_oob: ; GFX90A-SDAG: ; %bb.0: ; GFX90A-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX90A-SDAG-NEXT: v_ashrrev_i32_e32 v3, 31, v2 ; GFX90A-SDAG-NEXT: v_lshlrev_b64 v[2:3], 2, v[2:3] ; GFX90A-SDAG-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2 ; GFX90A-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, v1, v3, vcc ; GFX90A-SDAG-NEXT: flat_load_dword v0, v[0:1] offset:12 ; GFX90A-SDAG-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-SDAG-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-SDAG-LABEL: flat_offset_maybe_oob: ; GFX10-SDAG: ; %bb.0: ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-SDAG-NEXT: v_ashrrev_i32_e32 v3, 31, v2 ; GFX10-SDAG-NEXT: v_lshlrev_b64 v[2:3], 2, v[2:3] ; GFX10-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2 ; GFX10-SDAG-NEXT: v_add_co_ci_u32_e64 v1, null, v1, v3, vcc_lo ; GFX10-SDAG-NEXT: flat_load_dword v0, v[0:1] offset:12 ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31] ; ; GFX942-SDAG-LABEL: flat_offset_maybe_oob: ; GFX942-SDAG: ; %bb.0: ; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX942-SDAG-NEXT: v_ashrrev_i32_e32 v3, 31, v2 ; GFX942-SDAG-NEXT: v_lshl_add_u64 v[0:1], v[2:3], 2, v[0:1] ; GFX942-SDAG-NEXT: flat_load_dword v0, v[0:1] offset:12 ; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX942-SDAG-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-SDAG-LABEL: flat_offset_maybe_oob: ; GFX11-SDAG: ; %bb.0: ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-SDAG-NEXT: v_ashrrev_i32_e32 v3, 31, v2 ; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX11-SDAG-NEXT: v_lshlrev_b64 v[2:3], 2, v[2:3] ; GFX11-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2 ; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX11-SDAG-NEXT: v_add_co_ci_u32_e64 v1, null, v1, v3, vcc_lo ; GFX11-SDAG-NEXT: flat_load_b32 v0, v[0:1] offset:12 ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31] ; ; GFX12-SDAG-LABEL: flat_offset_maybe_oob: ; GFX12-SDAG: ; %bb.0: ; GFX12-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX12-SDAG-NEXT: s_wait_expcnt 0x0 ; GFX12-SDAG-NEXT: s_wait_samplecnt 0x0 ; GFX12-SDAG-NEXT: s_wait_bvhcnt 0x0 ; GFX12-SDAG-NEXT: s_wait_kmcnt 0x0 ; GFX12-SDAG-NEXT: v_ashrrev_i32_e32 v3, 31, v2 ; GFX12-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX12-SDAG-NEXT: v_lshlrev_b64_e32 v[2:3], 2, v[2:3] ; GFX12-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2 ; GFX12-SDAG-NEXT: s_wait_alu 0xfffd ; GFX12-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) ; GFX12-SDAG-NEXT: v_add_co_ci_u32_e64 v1, null, v1, v3, vcc_lo ; GFX12-SDAG-NEXT: flat_load_b32 v0, v[0:1] offset:12 ; GFX12-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX12-SDAG-NEXT: s_setpc_b64 s[30:31] ; ; GFX90A-GISEL-LABEL: flat_offset_maybe_oob: ; GFX90A-GISEL: ; %bb.0: ; GFX90A-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX90A-GISEL-NEXT: v_ashrrev_i32_e32 v3, 31, v2 ; GFX90A-GISEL-NEXT: v_lshlrev_b64 v[2:3], 2, v[2:3] ; GFX90A-GISEL-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2 ; GFX90A-GISEL-NEXT: v_addc_co_u32_e32 v1, vcc, v1, v3, vcc ; GFX90A-GISEL-NEXT: v_add_co_u32_e32 v0, vcc, 12, v0 ; GFX90A-GISEL-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc ; GFX90A-GISEL-NEXT: flat_load_dword v0, v[0:1] ; GFX90A-GISEL-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-GISEL-LABEL: flat_offset_maybe_oob: ; GFX10-GISEL: ; %bb.0: ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-GISEL-NEXT: v_ashrrev_i32_e32 v3, 31, v2 ; GFX10-GISEL-NEXT: v_lshlrev_b64 v[2:3], 2, v[2:3] ; GFX10-GISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2 ; GFX10-GISEL-NEXT: v_add_co_ci_u32_e64 v1, null, v1, v3, vcc_lo ; GFX10-GISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, 12 ; GFX10-GISEL-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo ; GFX10-GISEL-NEXT: flat_load_dword v0, v[0:1] ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; GFX942-GISEL-LABEL: flat_offset_maybe_oob: ; GFX942-GISEL: ; %bb.0: ; GFX942-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX942-GISEL-NEXT: v_ashrrev_i32_e32 v3, 31, v2 ; GFX942-GISEL-NEXT: v_lshlrev_b64 v[2:3], 2, v[2:3] ; GFX942-GISEL-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2 ; GFX942-GISEL-NEXT: s_nop 1 ; GFX942-GISEL-NEXT: v_addc_co_u32_e32 v1, vcc, v1, v3, vcc ; GFX942-GISEL-NEXT: v_add_co_u32_e32 v0, vcc, 12, v0 ; GFX942-GISEL-NEXT: s_nop 1 ; GFX942-GISEL-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc ; GFX942-GISEL-NEXT: flat_load_dword v0, v[0:1] ; GFX942-GISEL-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX942-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-GISEL-LABEL: flat_offset_maybe_oob: ; GFX11-GISEL: ; %bb.0: ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-GISEL-NEXT: v_ashrrev_i32_e32 v3, 31, v2 ; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX11-GISEL-NEXT: v_lshlrev_b64 v[2:3], 2, v[2:3] ; GFX11-GISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2 ; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2) ; GFX11-GISEL-NEXT: v_add_co_ci_u32_e64 v1, null, v1, v3, vcc_lo ; GFX11-GISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, 12 ; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX11-GISEL-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo ; GFX11-GISEL-NEXT: flat_load_b32 v0, v[0:1] ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; GFX12-GISEL-LABEL: flat_offset_maybe_oob: ; GFX12-GISEL: ; %bb.0: ; GFX12-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX12-GISEL-NEXT: s_wait_expcnt 0x0 ; GFX12-GISEL-NEXT: s_wait_samplecnt 0x0 ; GFX12-GISEL-NEXT: s_wait_bvhcnt 0x0 ; GFX12-GISEL-NEXT: s_wait_kmcnt 0x0 ; GFX12-GISEL-NEXT: v_ashrrev_i32_e32 v3, 31, v2 ; GFX12-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX12-GISEL-NEXT: v_lshlrev_b64_e32 v[2:3], 2, v[2:3] ; GFX12-GISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2 ; GFX12-GISEL-NEXT: s_wait_alu 0xfffd ; GFX12-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) ; GFX12-GISEL-NEXT: v_add_co_ci_u32_e64 v1, null, v1, v3, vcc_lo ; GFX12-GISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, 12 ; GFX12-GISEL-NEXT: s_wait_alu 0xfffd ; GFX12-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) ; GFX12-GISEL-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo ; GFX12-GISEL-NEXT: flat_load_b32 v0, v[0:1] ; GFX12-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX12-GISEL-NEXT: s_setpc_b64 s[30:31] %idx = add nsw i32 %i, 3 %arrayidx = getelementptr inbounds i32, ptr %p, i32 %idx %l = load i32, ptr %arrayidx ret i32 %l } ; For MUBUF and for GFX12, folding the offset is okay. define i32 @private_offset_maybe_oob(ptr addrspace(5) %p, i32 %i) { ; GFX90A-SDAG-MUBUF-LABEL: private_offset_maybe_oob: ; GFX90A-SDAG-MUBUF: ; %bb.0: ; GFX90A-SDAG-MUBUF-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX90A-SDAG-MUBUF-NEXT: v_lshl_add_u32 v0, v1, 2, v0 ; GFX90A-SDAG-MUBUF-NEXT: buffer_load_dword v0, v0, s[0:3], 0 offen offset:12 ; GFX90A-SDAG-MUBUF-NEXT: s_waitcnt vmcnt(0) ; GFX90A-SDAG-MUBUF-NEXT: s_setpc_b64 s[30:31] ; ; GFX90A-FLATSCR-LABEL: private_offset_maybe_oob: ; GFX90A-FLATSCR: ; %bb.0: ; GFX90A-FLATSCR-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX90A-FLATSCR-NEXT: v_lshlrev_b32_e32 v1, 2, v1 ; GFX90A-FLATSCR-NEXT: v_add3_u32 v0, v0, v1, 12 ; GFX90A-FLATSCR-NEXT: scratch_load_dword v0, v0, off ; GFX90A-FLATSCR-NEXT: s_waitcnt vmcnt(0) ; GFX90A-FLATSCR-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-SDAG-MUBUF-LABEL: private_offset_maybe_oob: ; GFX10-SDAG-MUBUF: ; %bb.0: ; GFX10-SDAG-MUBUF-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-SDAG-MUBUF-NEXT: v_lshl_add_u32 v0, v1, 2, v0 ; GFX10-SDAG-MUBUF-NEXT: buffer_load_dword v0, v0, s[0:3], 0 offen offset:12 ; GFX10-SDAG-MUBUF-NEXT: s_waitcnt vmcnt(0) ; GFX10-SDAG-MUBUF-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-FLATSCR-LABEL: private_offset_maybe_oob: ; GFX10-FLATSCR: ; %bb.0: ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-FLATSCR-NEXT: v_lshlrev_b32_e32 v1, 2, v1 ; GFX10-FLATSCR-NEXT: v_add3_u32 v0, v0, v1, 12 ; GFX10-FLATSCR-NEXT: scratch_load_dword v0, v0, off ; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) ; GFX10-FLATSCR-NEXT: s_setpc_b64 s[30:31] ; ; GFX942-LABEL: private_offset_maybe_oob: ; GFX942: ; %bb.0: ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX942-NEXT: v_lshlrev_b32_e32 v1, 2, v1 ; GFX942-NEXT: v_add3_u32 v0, v0, v1, 12 ; GFX942-NEXT: scratch_load_dword v0, v0, off ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: private_offset_maybe_oob: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-NEXT: v_lshlrev_b32_e32 v1, 2, v1 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX11-NEXT: v_add3_u32 v0, v0, v1, 12 ; GFX11-NEXT: scratch_load_b32 v0, v0, off ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: s_setpc_b64 s[30:31] ; ; GFX12-SDAG-LABEL: private_offset_maybe_oob: ; GFX12-SDAG: ; %bb.0: ; GFX12-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX12-SDAG-NEXT: s_wait_expcnt 0x0 ; GFX12-SDAG-NEXT: s_wait_samplecnt 0x0 ; GFX12-SDAG-NEXT: s_wait_bvhcnt 0x0 ; GFX12-SDAG-NEXT: s_wait_kmcnt 0x0 ; GFX12-SDAG-NEXT: v_lshl_add_u32 v0, v1, 2, v0 ; GFX12-SDAG-NEXT: scratch_load_b32 v0, v0, off offset:12 ; GFX12-SDAG-NEXT: s_wait_loadcnt 0x0 ; GFX12-SDAG-NEXT: s_setpc_b64 s[30:31] ; ; GFX90A-GISEL-MUBUF-LABEL: private_offset_maybe_oob: ; GFX90A-GISEL-MUBUF: ; %bb.0: ; GFX90A-GISEL-MUBUF-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX90A-GISEL-MUBUF-NEXT: v_lshlrev_b32_e32 v1, 2, v1 ; GFX90A-GISEL-MUBUF-NEXT: v_add_u32_e32 v0, v0, v1 ; GFX90A-GISEL-MUBUF-NEXT: buffer_load_dword v0, v0, s[0:3], 0 offen offset:12 ; GFX90A-GISEL-MUBUF-NEXT: s_waitcnt vmcnt(0) ; GFX90A-GISEL-MUBUF-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-GISEL-MUBUF-LABEL: private_offset_maybe_oob: ; GFX10-GISEL-MUBUF: ; %bb.0: ; GFX10-GISEL-MUBUF-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-GISEL-MUBUF-NEXT: v_lshlrev_b32_e32 v1, 2, v1 ; GFX10-GISEL-MUBUF-NEXT: v_add_nc_u32_e32 v0, v0, v1 ; GFX10-GISEL-MUBUF-NEXT: buffer_load_dword v0, v0, s[0:3], 0 offen offset:12 ; GFX10-GISEL-MUBUF-NEXT: s_waitcnt vmcnt(0) ; GFX10-GISEL-MUBUF-NEXT: s_setpc_b64 s[30:31] ; ; GFX12-GISEL-LABEL: private_offset_maybe_oob: ; GFX12-GISEL: ; %bb.0: ; GFX12-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX12-GISEL-NEXT: s_wait_expcnt 0x0 ; GFX12-GISEL-NEXT: s_wait_samplecnt 0x0 ; GFX12-GISEL-NEXT: s_wait_bvhcnt 0x0 ; GFX12-GISEL-NEXT: s_wait_kmcnt 0x0 ; GFX12-GISEL-NEXT: v_lshlrev_b32_e32 v1, 2, v1 ; GFX12-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX12-GISEL-NEXT: v_add_nc_u32_e32 v0, v0, v1 ; GFX12-GISEL-NEXT: scratch_load_b32 v0, v0, off offset:12 ; GFX12-GISEL-NEXT: s_wait_loadcnt 0x0 ; GFX12-GISEL-NEXT: s_setpc_b64 s[30:31] %idx = add nsw i32 %i, 3 %arrayidx = getelementptr inbounds i32, ptr addrspace(5) %p, i32 %idx %l = load i32, ptr addrspace(5) %arrayidx ret i32 %l } ;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: ; GFX10: {{.*}} ; GFX10-GISEL-FLATSCR: {{.*}} ; GFX10-MUBUF: {{.*}} ; GFX10-SDAG-FLATSCR: {{.*}} ; GFX12: {{.*}} ; GFX90A: {{.*}} ; GFX90A-GISEL-FLATSCR: {{.*}} ; GFX90A-MUBUF: {{.*}} ; GFX90A-SDAG-FLATSCR: {{.*}}