llvm-project/llvm/test/CodeGen/AMDGPU/lower-lds-with-alias-scope.ll
Sirish Pande abec9ff47d
[AMDGPU] Correctly merge noalias scopes during lowering of LDS data. (#131664)
Currently, if there is already noalias metadata present on loads and
stores, lower module lds pass is generating a more conservative aliasing
set. This results in inhibiting scheduling intrinsics that would have
otherwise generated a better pipelined instruction.

The fix is not to always intersect already existing noalias metadata
with noalias created for lowering of LDS. But to intersect only if
noalias scopes are from the same domain, otherwise concatenate exising
noalias sets with LDS noalias.

There a few patches that have come for scopedAA in the past. Following
three should be enough background information.
https://reviews.llvm.org/D91576
https://reviews.llvm.org/D108315
https://reviews.llvm.org/D110049

Essentially, after a pass that might change aliasing info, one should
check if that pass results in change number of MayAlias or ModRef using
the following:
`opt -S -aa-pipeline=basic-aa,scoped-noalias-aa -passes=aa-eval
-evaluate-aa-metadata -print-all-alias-modref-info -disable-output`
2025-04-28 14:02:18 -05:00

55 lines
2.3 KiB
LLVM

; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
; RUN: llc -mtriple=amdgcn -mcpu=gfx900 -O3 < %s | FileCheck -check-prefix=GCN %s
@a = internal unnamed_addr addrspace(3) global [64 x i32] poison, align 4
@b = internal unnamed_addr addrspace(3) global [64 x i32] poison, align 4
@c = internal unnamed_addr addrspace(3) global [64 x i32] poison, align 4
define amdgpu_kernel void @ds_load_stores_aainfo(ptr addrspace(1) %arg, i32 %i) {
; GCN-LABEL: ds_load_stores_aainfo:
; GCN: ; %bb.0: ; %bb
; GCN-NEXT: s_load_dword s0, s[4:5], 0x2c
; GCN-NEXT: v_mov_b32_e32 v0, 1
; GCN-NEXT: v_mov_b32_e32 v1, 0
; GCN-NEXT: s_waitcnt lgkmcnt(0)
; GCN-NEXT: s_lshl_b32 s0, s0, 2
; GCN-NEXT: v_mov_b32_e32 v4, s0
; GCN-NEXT: ds_read2_b32 v[2:3], v4 offset1:1
; GCN-NEXT: ds_write_b64 v1, v[0:1] offset:512
; GCN-NEXT: ds_read2_b32 v[4:5], v4 offset0:64 offset1:65
; GCN-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
; GCN-NEXT: ; sched_group_barrier mask(0x00000100) size(1) SyncID(0)
; GCN-NEXT: ; sched_group_barrier mask(0x00000200) size(1) SyncID(0)
; GCN-NEXT: ; sched_group_barrier mask(0x00000100) size(1) SyncID(0)
; GCN-NEXT: s_waitcnt lgkmcnt(0)
; GCN-NEXT: v_add_co_u32_e32 v2, vcc, v2, v4
; GCN-NEXT: v_addc_co_u32_e32 v3, vcc, v3, v5, vcc
; GCN-NEXT: global_store_dwordx2 v1, v[2:3], s[0:1]
; GCN-NEXT: s_endpgm
bb:
%gep.a = getelementptr inbounds [64 x i32], ptr addrspace(3) @a, i32 0, i32 %i
%gep.b = getelementptr inbounds [64 x i32], ptr addrspace(3) @b, i32 0, i32 %i
%val.a = load i64, ptr addrspace(3) %gep.a, align 4, !tbaa !0, !alias.scope !6, !noalias !5
%val.b = load i64, ptr addrspace(3) %gep.b, align 4, !tbaa !0, !alias.scope !6, !noalias !5
store i64 1, ptr addrspace(3) @c, align 4, !tbaa !0, !noalias !2
%val = add i64 %val.a, %val.b
store i64 %val, ptr addrspace(1) %arg, align 4
tail call void @llvm.amdgcn.sched.group.barrier(i32 256, i32 1, i32 0)
tail call void @llvm.amdgcn.sched.group.barrier(i32 512, i32 1, i32 0)
tail call void @llvm.amdgcn.sched.group.barrier(i32 256, i32 1, i32 0)
ret void
}
!0 = !{!"omnipotent char", !1, i64 0}
!1 = !{!1}
!2 = !{!3}
!3 = distinct !{!3, !4}
!4 = distinct !{!4}
!5 = !{!3}
!6 = !{!7}
!7 = !{!7, !4}