
Currently, if there is already noalias metadata present on loads and stores, lower module lds pass is generating a more conservative aliasing set. This results in inhibiting scheduling intrinsics that would have otherwise generated a better pipelined instruction. The fix is not to always intersect already existing noalias metadata with noalias created for lowering of LDS. But to intersect only if noalias scopes are from the same domain, otherwise concatenate exising noalias sets with LDS noalias. There a few patches that have come for scopedAA in the past. Following three should be enough background information. https://reviews.llvm.org/D91576 https://reviews.llvm.org/D108315 https://reviews.llvm.org/D110049 Essentially, after a pass that might change aliasing info, one should check if that pass results in change number of MayAlias or ModRef using the following: `opt -S -aa-pipeline=basic-aa,scoped-noalias-aa -passes=aa-eval -evaluate-aa-metadata -print-all-alias-modref-info -disable-output`
147 lines
7.6 KiB
LLVM
147 lines
7.6 KiB
LLVM
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
|
|
; RUN: opt -S -mtriple=amdgcn-- -passes=amdgpu-lower-module-lds < %s --amdgpu-lower-module-lds-strategy=module | FileCheck -check-prefixes=CHECK,MODULE %s
|
|
; RUN: opt -S -mtriple=amdgcn-- -passes=amdgpu-lower-module-lds < %s --amdgpu-lower-module-lds-strategy=table | FileCheck -check-prefixes=CHECK,TABLE %s
|
|
; RUN: opt -S -mtriple=amdgcn-- -passes=amdgpu-lower-module-lds < %s --amdgpu-lower-module-lds-strategy=kernel | FileCheck -check-prefixes=CHECK,K_OR_HY %s
|
|
; RUN: opt -S -mtriple=amdgcn-- -passes=amdgpu-lower-module-lds < %s --amdgpu-lower-module-lds-strategy=hybrid | FileCheck -check-prefixes=CHECK,K_OR_HY %s
|
|
|
|
;; Same checks for kernel and for hybrid as an unambiguous reference to a variable - one where exactly one kernel
|
|
;; can reach it - is the case where hybrid lowering can always prefer the direct access.
|
|
|
|
;; Single kernel is sole user of single variable, all options codegen as direct access to kernel struct
|
|
|
|
@k0.lds = addrspace(3) global i8 poison
|
|
define amdgpu_kernel void @k0() {
|
|
; CHECK-LABEL: @k0(
|
|
; CHECK-NEXT: [[LD:%.*]] = load i8, ptr addrspace(3) @llvm.amdgcn.kernel.k0.lds, align 1
|
|
; CHECK-NEXT: [[MUL:%.*]] = mul i8 [[LD]], 2
|
|
; CHECK-NEXT: store i8 [[MUL]], ptr addrspace(3) @llvm.amdgcn.kernel.k0.lds, align 1
|
|
; CHECK-NEXT: ret void
|
|
;
|
|
%ld = load i8, ptr addrspace(3) @k0.lds
|
|
%mul = mul i8 %ld, 2
|
|
store i8 %mul, ptr addrspace(3) @k0.lds
|
|
ret void
|
|
}
|
|
|
|
;; Function is reachable from one kernel. Variable goes in module lds or the kernel struct, but never both.
|
|
|
|
@f0.lds = addrspace(3) global i16 poison
|
|
define void @f0() {
|
|
; MODULE-LABEL: @f0(
|
|
; MODULE-NEXT: [[LD:%.*]] = load i16, ptr addrspace(3) getelementptr inbounds ([[LLVM_AMDGCN_MODULE_LDS_T:%.*]], ptr addrspace(3) @llvm.amdgcn.module.lds, i32 0, i32 1), align 4, !alias.scope [[META1:![0-9]+]], !noalias [[META4:![0-9]+]]
|
|
; MODULE-NEXT: [[MUL:%.*]] = mul i16 [[LD]], 3
|
|
; MODULE-NEXT: store i16 [[MUL]], ptr addrspace(3) getelementptr inbounds ([[LLVM_AMDGCN_MODULE_LDS_T]], ptr addrspace(3) @llvm.amdgcn.module.lds, i32 0, i32 1), align 4, !alias.scope [[META1]], !noalias [[META4]]
|
|
; MODULE-NEXT: ret void
|
|
;
|
|
; TABLE-LABEL: @f0(
|
|
; TABLE-NEXT: [[TMP1:%.*]] = call i32 @llvm.amdgcn.lds.kernel.id()
|
|
; TABLE-NEXT: [[F0_LDS2:%.*]] = getelementptr inbounds [2 x [2 x i32]], ptr addrspace(4) @llvm.amdgcn.lds.offset.table, i32 0, i32 [[TMP1]], i32 1
|
|
; TABLE-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(4) [[F0_LDS2]], align 4
|
|
; TABLE-NEXT: [[F0_LDS3:%.*]] = inttoptr i32 [[TMP2]] to ptr addrspace(3)
|
|
; TABLE-NEXT: [[LD:%.*]] = load i16, ptr addrspace(3) [[F0_LDS3]], align 2
|
|
; TABLE-NEXT: [[MUL:%.*]] = mul i16 [[LD]], 3
|
|
; TABLE-NEXT: [[F0_LDS:%.*]] = getelementptr inbounds [2 x [2 x i32]], ptr addrspace(4) @llvm.amdgcn.lds.offset.table, i32 0, i32 [[TMP1]], i32 1
|
|
; TABLE-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(4) [[F0_LDS]], align 4
|
|
; TABLE-NEXT: [[F0_LDS1:%.*]] = inttoptr i32 [[TMP3]] to ptr addrspace(3)
|
|
; TABLE-NEXT: store i16 [[MUL]], ptr addrspace(3) [[F0_LDS1]], align 2
|
|
; TABLE-NEXT: ret void
|
|
;
|
|
; K_OR_HY-LABEL: @f0(
|
|
; K_OR_HY-NEXT: [[LD:%.*]] = load i16, ptr addrspace(3) @llvm.amdgcn.kernel.k_f0.lds, align 2
|
|
; K_OR_HY-NEXT: [[MUL:%.*]] = mul i16 [[LD]], 3
|
|
; K_OR_HY-NEXT: store i16 [[MUL]], ptr addrspace(3) @llvm.amdgcn.kernel.k_f0.lds, align 2
|
|
; K_OR_HY-NEXT: ret void
|
|
;
|
|
%ld = load i16, ptr addrspace(3) @f0.lds
|
|
%mul = mul i16 %ld, 3
|
|
store i16 %mul, ptr addrspace(3) @f0.lds
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_kernel void @k_f0() {
|
|
; MODULE-LABEL: @k_f0(
|
|
; MODULE-NEXT: call void @llvm.donothing() [ "ExplicitUse"(ptr addrspace(3) @llvm.amdgcn.module.lds) ], !alias.scope [[META10:![0-9]+]], !noalias [[META1]]
|
|
; MODULE-NEXT: call void @f0()
|
|
; MODULE-NEXT: ret void
|
|
;
|
|
; TABLE-LABEL: @k_f0(
|
|
; TABLE-NEXT: call void @llvm.donothing() [ "ExplicitUse"(ptr addrspace(3) @llvm.amdgcn.kernel.k_f0.lds) ]
|
|
; TABLE-NEXT: call void @f0()
|
|
; TABLE-NEXT: ret void
|
|
;
|
|
; K_OR_HY-LABEL: @k_f0(
|
|
; K_OR_HY-NEXT: call void @llvm.donothing() [ "ExplicitUse"(ptr addrspace(3) @llvm.amdgcn.kernel.k_f0.lds) ]
|
|
; K_OR_HY-NEXT: call void @f0()
|
|
; K_OR_HY-NEXT: ret void
|
|
;
|
|
call void @f0()
|
|
ret void
|
|
}
|
|
|
|
;; As above, but with the kernel also uing the variable.
|
|
|
|
@both.lds = addrspace(3) global i32 poison
|
|
define void @f_both() {
|
|
; MODULE-LABEL: @f_both(
|
|
; MODULE-NEXT: [[LD:%.*]] = load i32, ptr addrspace(3) @llvm.amdgcn.module.lds, align 4, !alias.scope [[META10]], !noalias [[META11:![0-9]+]]
|
|
; MODULE-NEXT: [[MUL:%.*]] = mul i32 [[LD]], 4
|
|
; MODULE-NEXT: store i32 [[MUL]], ptr addrspace(3) @llvm.amdgcn.module.lds, align 4, !alias.scope [[META10]], !noalias [[META11]]
|
|
; MODULE-NEXT: ret void
|
|
;
|
|
; TABLE-LABEL: @f_both(
|
|
; TABLE-NEXT: [[TMP1:%.*]] = call i32 @llvm.amdgcn.lds.kernel.id()
|
|
; TABLE-NEXT: [[BOTH_LDS2:%.*]] = getelementptr inbounds [2 x [2 x i32]], ptr addrspace(4) @llvm.amdgcn.lds.offset.table, i32 0, i32 [[TMP1]], i32 0
|
|
; TABLE-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(4) [[BOTH_LDS2]], align 4
|
|
; TABLE-NEXT: [[BOTH_LDS3:%.*]] = inttoptr i32 [[TMP2]] to ptr addrspace(3)
|
|
; TABLE-NEXT: [[LD:%.*]] = load i32, ptr addrspace(3) [[BOTH_LDS3]], align 4
|
|
; TABLE-NEXT: [[MUL:%.*]] = mul i32 [[LD]], 4
|
|
; TABLE-NEXT: [[BOTH_LDS:%.*]] = getelementptr inbounds [2 x [2 x i32]], ptr addrspace(4) @llvm.amdgcn.lds.offset.table, i32 0, i32 [[TMP1]], i32 0
|
|
; TABLE-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(4) [[BOTH_LDS]], align 4
|
|
; TABLE-NEXT: [[BOTH_LDS1:%.*]] = inttoptr i32 [[TMP3]] to ptr addrspace(3)
|
|
; TABLE-NEXT: store i32 [[MUL]], ptr addrspace(3) [[BOTH_LDS1]], align 4
|
|
; TABLE-NEXT: ret void
|
|
;
|
|
; K_OR_HY-LABEL: @f_both(
|
|
; K_OR_HY-NEXT: [[LD:%.*]] = load i32, ptr addrspace(3) @llvm.amdgcn.kernel.k0_both.lds, align 4
|
|
; K_OR_HY-NEXT: [[MUL:%.*]] = mul i32 [[LD]], 4
|
|
; K_OR_HY-NEXT: store i32 [[MUL]], ptr addrspace(3) @llvm.amdgcn.kernel.k0_both.lds, align 4
|
|
; K_OR_HY-NEXT: ret void
|
|
;
|
|
%ld = load i32, ptr addrspace(3) @both.lds
|
|
%mul = mul i32 %ld, 4
|
|
store i32 %mul, ptr addrspace(3) @both.lds
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_kernel void @k0_both() {
|
|
; MODULE-LABEL: @k0_both(
|
|
; MODULE-NEXT: call void @llvm.donothing() [ "ExplicitUse"(ptr addrspace(3) @llvm.amdgcn.module.lds) ]
|
|
; MODULE-NEXT: [[LD:%.*]] = load i32, ptr addrspace(3) @llvm.amdgcn.module.lds, align 4, !alias.scope [[META10]], !noalias [[META1]]
|
|
; MODULE-NEXT: [[MUL:%.*]] = mul i32 [[LD]], 5
|
|
; MODULE-NEXT: store i32 [[MUL]], ptr addrspace(3) @llvm.amdgcn.module.lds, align 4, !alias.scope [[META10]], !noalias [[META1]]
|
|
; MODULE-NEXT: call void @f_both()
|
|
; MODULE-NEXT: ret void
|
|
;
|
|
; TABLE-LABEL: @k0_both(
|
|
; TABLE-NEXT: call void @llvm.donothing() [ "ExplicitUse"(ptr addrspace(3) @llvm.amdgcn.kernel.k0_both.lds) ]
|
|
; TABLE-NEXT: [[LD:%.*]] = load i32, ptr addrspace(3) @llvm.amdgcn.kernel.k0_both.lds, align 4
|
|
; TABLE-NEXT: [[MUL:%.*]] = mul i32 [[LD]], 5
|
|
; TABLE-NEXT: store i32 [[MUL]], ptr addrspace(3) @llvm.amdgcn.kernel.k0_both.lds, align 4
|
|
; TABLE-NEXT: call void @f_both()
|
|
; TABLE-NEXT: ret void
|
|
;
|
|
; K_OR_HY-LABEL: @k0_both(
|
|
; K_OR_HY-NEXT: call void @llvm.donothing() [ "ExplicitUse"(ptr addrspace(3) @llvm.amdgcn.kernel.k0_both.lds) ]
|
|
; K_OR_HY-NEXT: [[LD:%.*]] = load i32, ptr addrspace(3) @llvm.amdgcn.kernel.k0_both.lds, align 4
|
|
; K_OR_HY-NEXT: [[MUL:%.*]] = mul i32 [[LD]], 5
|
|
; K_OR_HY-NEXT: store i32 [[MUL]], ptr addrspace(3) @llvm.amdgcn.kernel.k0_both.lds, align 4
|
|
; K_OR_HY-NEXT: call void @f_both()
|
|
; K_OR_HY-NEXT: ret void
|
|
;
|
|
%ld = load i32, ptr addrspace(3) @both.lds
|
|
%mul = mul i32 %ld, 5
|
|
store i32 %mul, ptr addrspace(3) @both.lds
|
|
call void @f_both()
|
|
ret void
|
|
}
|