
This PR is to thoroughly rework duplicate tracker implementation and tracking of IR entities and types. These are legacy parts of the project resulting in an extremely bloated intermediate representation and computational delays due to inefficient data flow and structure choices. Main results of the rework: 1) Improved compile-time performance. The reference binary LLVM IR used to measure speed gains in https://github.com/llvm/llvm-project/pull/120415 shows ~x5 speed up also after this PR. The timing before this PR is ~42s and after this PR it's ~7.5s. In total this PR and the previous overhaul of the module analysis in https://github.com/llvm/llvm-project/pull/120415 results in ~x25 speed improvement. ``` $ time llc -O0 -mtriple=spirv64v1.6-unknown-unknown _group_barrier_phi.bc -o 1 --filetype=obj real 0m7.545s user 0m6.685s sys 0m0.859s ``` 2) Less bloated intermediate representation of internal translation steps. Elimination of `spv_track_constant` intrinsic usage for scalar constants, rework of `spv_assign_name`, removal of the gMIR `GET_XXX` pseudo code and a smaller number of generated `ASSIGN_TYPE` pseudo codes substantially decrease volume of data generated during translation. 3) Simpler code and easier maintenance. The duplicate tracker implementation is simplified, as well as other features. 4) Numerous fixes of issues and logical flaws in different passes. The main achievement is rework of the duplicate tracker itself that had never guaranteed a correct caching of LLVM IR entities, rarely and randomly returning stale/incorrect records (like, remove an instruction from gMIR but still refer to it). Other fixes comprise consistent generation of OpConstantNull, assigning types to newly created registers, creation of integer/bool types, and other minor fixes. 5) Numerous fixes of LIT tests: mainly CHECK-DAG to properly reflect SPIR-V spec guarantees, `{{$}}` at the end of constants to avoid matching of substrings, and XFAILS for `SPV_INTEL_long_composites` test cases, because the feature is not completed in full yet and doesn't generate a requested by the extension sequence of instructions. 6) New test cases are added.
316 lines
14 KiB
LLVM
316 lines
14 KiB
LLVM
; RUN: llc -verify-machineinstrs -O0 -mtriple=spirv64-unknown-unknown %s -o - | FileCheck %s --check-prefix=CHECK-SPIRV
|
|
; RUN: %if spirv-tools %{ llc -O0 -mtriple=spirv64-unknown-unknown %s -o - -filetype=obj | spirv-val %}
|
|
|
|
; RUN: llc -verify-machineinstrs -O0 -mtriple=spirv32-unknown-unknown %s -o - | FileCheck %s --check-prefix=CHECK-SPIRV
|
|
; RUN: %if spirv-tools %{ llc -O0 -mtriple=spirv32-unknown-unknown %s -o - -filetype=obj | spirv-val %}
|
|
|
|
; CHECK-SPIRV-DAG: %[[#int:]] = OpTypeInt 32 0
|
|
; CHECK-SPIRV-DAG: %[[#intv2:]] = OpTypeVector %[[#int]] 2
|
|
; CHECK-SPIRV-DAG: %[[#intv3:]] = OpTypeVector %[[#int]] 3
|
|
; CHECK-SPIRV-DAG: %[[#float:]] = OpTypeFloat 32
|
|
; CHECK-SPIRV-DAG: %[[#ScopeCrossWorkgroup:]] = OpConstantNull %[[#int]]
|
|
; CHECK-SPIRV-DAG: %[[#ScopeWorkgroup:]] = OpConstant %[[#int]] 2{{$}}
|
|
; CHECK-SPIRV-DAG: %[[#ScopeSubgroup:]] = OpConstant %[[#int]] 3{{$}}
|
|
|
|
; CHECK-SPIRV: OpFunction
|
|
; CHECK-SPIRV: %[[#]] = OpGroupFMax %[[#float]] %[[#ScopeWorkgroup]] Reduce
|
|
; CHECK-SPIRV: OpFunctionEnd
|
|
|
|
;; kernel void testWorkGroupFMax(float a, global float *res) {
|
|
;; res[0] = work_group_reduce_max(a);
|
|
;; }
|
|
|
|
define dso_local spir_kernel void @testWorkGroupFMax(float noundef %a, float addrspace(1)* nocapture noundef writeonly %res) local_unnamed_addr {
|
|
entry:
|
|
%call = call spir_func float @_Z21work_group_reduce_maxf(float noundef %a)
|
|
store float %call, float addrspace(1)* %res, align 4
|
|
ret void
|
|
}
|
|
|
|
declare spir_func float @_Z21work_group_reduce_maxf(float noundef) local_unnamed_addr
|
|
|
|
; CHECK-SPIRV: OpFunction
|
|
; CHECK-SPIRV: %[[#]] = OpGroupFMin %[[#float]] %[[#ScopeWorkgroup]] Reduce
|
|
; CHECK-SPIRV: OpFunctionEnd
|
|
|
|
;; kernel void testWorkGroupFMin(float a, global float *res) {
|
|
;; res[0] = work_group_reduce_min(a);
|
|
;; }
|
|
|
|
define dso_local spir_kernel void @testWorkGroupFMin(float noundef %a, float addrspace(1)* nocapture noundef writeonly %res) local_unnamed_addr {
|
|
entry:
|
|
%call = call spir_func float @_Z21work_group_reduce_minf(float noundef %a)
|
|
store float %call, float addrspace(1)* %res, align 4
|
|
ret void
|
|
}
|
|
|
|
declare spir_func float @_Z21work_group_reduce_minf(float noundef) local_unnamed_addr
|
|
|
|
; CHECK-SPIRV: OpFunction
|
|
; CHECK-SPIRV: %[[#]] = OpGroupFAdd %[[#float]] %[[#ScopeWorkgroup]] Reduce
|
|
; CHECK-SPIRV: OpFunctionEnd
|
|
|
|
;; kernel void testWorkGroupFAdd(float a, global float *res) {
|
|
;; res[0] = work_group_reduce_add(a);
|
|
;; }
|
|
|
|
define dso_local spir_kernel void @testWorkGroupFAdd(float noundef %a, float addrspace(1)* nocapture noundef writeonly %res) local_unnamed_addr {
|
|
entry:
|
|
%call = call spir_func float @_Z21work_group_reduce_addf(float noundef %a)
|
|
store float %call, float addrspace(1)* %res, align 4
|
|
ret void
|
|
}
|
|
|
|
declare spir_func float @_Z21work_group_reduce_addf(float noundef) local_unnamed_addr
|
|
|
|
; CHECK-SPIRV: OpFunction
|
|
; CHECK-SPIRV: %[[#]] = OpGroupFMax %[[#float]] %[[#ScopeWorkgroup]] InclusiveScan
|
|
; CHECK-SPIRV: OpFunctionEnd
|
|
|
|
;; kernel void testWorkGroupScanInclusiveFMax(float a, global float *res) {
|
|
;; res[0] = work_group_scan_inclusive_max(a);
|
|
;; }
|
|
|
|
define dso_local spir_kernel void @testWorkGroupScanInclusiveFMax(float noundef %a, float addrspace(1)* nocapture noundef writeonly %res) local_unnamed_addr {
|
|
entry:
|
|
%call = call spir_func float @_Z29work_group_scan_inclusive_maxf(float noundef %a)
|
|
store float %call, float addrspace(1)* %res, align 4
|
|
ret void
|
|
}
|
|
|
|
declare spir_func float @_Z29work_group_scan_inclusive_maxf(float noundef) local_unnamed_addr
|
|
|
|
; CHECK-SPIRV: OpFunction
|
|
; CHECK-SPIRV: %[[#]] = OpGroupFMax %[[#float]] %[[#ScopeWorkgroup]] ExclusiveScan
|
|
; CHECK-SPIRV: OpFunctionEnd
|
|
|
|
;; kernel void testWorkGroupScanExclusiveFMax(float a, global float *res) {
|
|
;; res[0] = work_group_scan_exclusive_max(a);
|
|
;; }
|
|
|
|
define dso_local spir_kernel void @testWorkGroupScanExclusiveFMax(float noundef %a, float addrspace(1)* nocapture noundef writeonly %res) local_unnamed_addr {
|
|
entry:
|
|
%call = call spir_func float @_Z29work_group_scan_exclusive_maxf(float noundef %a)
|
|
store float %call, float addrspace(1)* %res, align 4
|
|
ret void
|
|
}
|
|
|
|
declare spir_func float @_Z29work_group_scan_exclusive_maxf(float noundef) local_unnamed_addr
|
|
|
|
; CHECK-SPIRV: OpFunction
|
|
; CHECK-SPIRV: %[[#]] = OpGroupSMax %[[#int]] %[[#ScopeWorkgroup]] Reduce
|
|
; CHECK-SPIRV: OpFunctionEnd
|
|
|
|
;; kernel void testWorkGroupSMax(int a, global int *res) {
|
|
;; res[0] = work_group_reduce_max(a);
|
|
;; }
|
|
|
|
define dso_local spir_kernel void @testWorkGroupSMax(i32 noundef %a, i32 addrspace(1)* nocapture noundef writeonly %res) local_unnamed_addr {
|
|
entry:
|
|
%call = call spir_func i32 @_Z21work_group_reduce_maxi(i32 noundef %a)
|
|
store i32 %call, i32 addrspace(1)* %res, align 4
|
|
ret void
|
|
}
|
|
|
|
declare spir_func i32 @_Z21work_group_reduce_maxi(i32 noundef) local_unnamed_addr
|
|
|
|
; CHECK-SPIRV: OpFunction
|
|
; CHECK-SPIRV: %[[#]] = OpGroupSMin %[[#int]] %[[#ScopeWorkgroup]] Reduce
|
|
; CHECK-SPIRV: OpFunctionEnd
|
|
|
|
;; kernel void testWorkGroupSMin(int a, global int *res) {
|
|
;; res[0] = work_group_reduce_min(a);
|
|
;; }
|
|
|
|
define dso_local spir_kernel void @testWorkGroupSMin(i32 noundef %a, i32 addrspace(1)* nocapture noundef writeonly %res) local_unnamed_addr {
|
|
entry:
|
|
%call = call spir_func i32 @_Z21work_group_reduce_mini(i32 noundef %a)
|
|
store i32 %call, i32 addrspace(1)* %res, align 4
|
|
ret void
|
|
}
|
|
|
|
declare spir_func i32 @_Z21work_group_reduce_mini(i32 noundef) local_unnamed_addr
|
|
|
|
; CHECK-SPIRV: OpFunction
|
|
; CHECK-SPIRV: %[[#]] = OpGroupIAdd %[[#int]] %[[#ScopeWorkgroup]] Reduce
|
|
; CHECK-SPIRV: OpFunctionEnd
|
|
|
|
;; kernel void testWorkGroupIAddSigned(int a, global int *res) {
|
|
;; res[0] = work_group_reduce_add(a);
|
|
;; }
|
|
|
|
define dso_local spir_kernel void @testWorkGroupIAddSigned(i32 noundef %a, i32 addrspace(1)* nocapture noundef writeonly %res) local_unnamed_addr {
|
|
entry:
|
|
%call = call spir_func i32 @_Z21work_group_reduce_addi(i32 noundef %a)
|
|
store i32 %call, i32 addrspace(1)* %res, align 4
|
|
ret void
|
|
}
|
|
|
|
declare spir_func i32 @_Z21work_group_reduce_addi(i32 noundef) local_unnamed_addr
|
|
|
|
; CHECK-SPIRV: OpFunction
|
|
; CHECK-SPIRV: %[[#]] = OpGroupIAdd %[[#int]] %[[#ScopeWorkgroup]] Reduce
|
|
; CHECK-SPIRV: OpFunctionEnd
|
|
|
|
;; kernel void testWorkGroupIAddUnsigned(uint a, global uint *res) {
|
|
;; res[0] = work_group_reduce_add(a);
|
|
;; }
|
|
|
|
define dso_local spir_kernel void @testWorkGroupIAddUnsigned(i32 noundef %a, i32 addrspace(1)* nocapture noundef writeonly %res) local_unnamed_addr {
|
|
entry:
|
|
%call = call spir_func i32 @_Z21work_group_reduce_addj(i32 noundef %a)
|
|
store i32 %call, i32 addrspace(1)* %res, align 4
|
|
ret void
|
|
}
|
|
|
|
declare spir_func i32 @_Z21work_group_reduce_addj(i32 noundef) local_unnamed_addr
|
|
|
|
; CHECK-SPIRV: OpFunction
|
|
; CHECK-SPIRV: %[[#]] = OpGroupUMax %[[#int]] %[[#ScopeWorkgroup]] Reduce
|
|
; CHECK-SPIRV: OpFunctionEnd
|
|
|
|
;; kernel void testWorkGroupUMax(uint a, global uint *res) {
|
|
;; res[0] = work_group_reduce_max(a);
|
|
;; }
|
|
|
|
define dso_local spir_kernel void @testWorkGroupUMax(i32 noundef %a, i32 addrspace(1)* nocapture noundef writeonly %res) local_unnamed_addr {
|
|
entry:
|
|
%call = call spir_func i32 @_Z21work_group_reduce_maxj(i32 noundef %a)
|
|
store i32 %call, i32 addrspace(1)* %res, align 4
|
|
ret void
|
|
}
|
|
|
|
declare spir_func i32 @_Z21work_group_reduce_maxj(i32 noundef) local_unnamed_addr
|
|
|
|
; CHECK-SPIRV: OpFunction
|
|
; CHECK-SPIRV: %[[#]] = OpGroupUMax %[[#int]] %[[#ScopeSubgroup]] Reduce
|
|
; CHECK-SPIRV: OpFunctionEnd
|
|
|
|
;; #pragma OPENCL EXTENSION cl_khr_subgroups: enable
|
|
;; kernel void testSubGroupUMax(uint a, global uint *res) {
|
|
;; res[0] = sub_group_reduce_max(a);
|
|
;; }
|
|
;; #pragma OPENCL EXTENSION cl_khr_subgroups: disable
|
|
|
|
define dso_local spir_kernel void @testSubGroupUMax(i32 noundef %a, i32 addrspace(1)* nocapture noundef writeonly %res) local_unnamed_addr {
|
|
entry:
|
|
%call = call spir_func i32 @_Z20sub_group_reduce_maxj(i32 noundef %a)
|
|
store i32 %call, i32 addrspace(1)* %res, align 4
|
|
ret void
|
|
}
|
|
|
|
declare spir_func i32 @_Z20sub_group_reduce_maxj(i32 noundef) local_unnamed_addr
|
|
|
|
; CHECK-SPIRV: OpFunction
|
|
; CHECK-SPIRV: %[[#]] = OpGroupUMax %[[#int]] %[[#ScopeWorkgroup]] InclusiveScan
|
|
; CHECK-SPIRV: OpFunctionEnd
|
|
|
|
;; kernel void testWorkGroupScanInclusiveUMax(uint a, global uint *res) {
|
|
;; res[0] = work_group_scan_inclusive_max(a);
|
|
;; }
|
|
|
|
define dso_local spir_kernel void @testWorkGroupScanInclusiveUMax(i32 noundef %a, i32 addrspace(1)* nocapture noundef writeonly %res) local_unnamed_addr {
|
|
entry:
|
|
%call = call spir_func i32 @_Z29work_group_scan_inclusive_maxj(i32 noundef %a)
|
|
store i32 %call, i32 addrspace(1)* %res, align 4
|
|
ret void
|
|
}
|
|
|
|
declare spir_func i32 @_Z29work_group_scan_inclusive_maxj(i32 noundef) local_unnamed_addr
|
|
|
|
; CHECK-SPIRV: OpFunction
|
|
; CHECK-SPIRV: %[[#]] = OpGroupUMax %[[#int]] %[[#ScopeWorkgroup]] ExclusiveScan
|
|
; CHECK-SPIRV: OpFunctionEnd
|
|
|
|
;; kernel void testWorkGroupScanExclusiveUMax(uint a, global uint *res) {
|
|
;; res[0] = work_group_scan_exclusive_max(a);
|
|
;; }
|
|
|
|
define dso_local spir_kernel void @testWorkGroupScanExclusiveUMax(i32 noundef %a, i32 addrspace(1)* nocapture noundef writeonly %res) local_unnamed_addr {
|
|
entry:
|
|
%call = call spir_func i32 @_Z29work_group_scan_exclusive_maxj(i32 noundef %a)
|
|
store i32 %call, i32 addrspace(1)* %res, align 4
|
|
ret void
|
|
}
|
|
|
|
declare spir_func i32 @_Z29work_group_scan_exclusive_maxj(i32 noundef) local_unnamed_addr
|
|
|
|
; CHECK-SPIRV: OpFunction
|
|
; CHECK-SPIRV: %[[#]] = OpGroupUMin %[[#int]] %[[#ScopeWorkgroup]] Reduce
|
|
; CHECK-SPIRV: OpFunctionEnd
|
|
|
|
;; kernel void testWorkGroupUMin(uint a, global uint *res) {
|
|
;; res[0] = work_group_reduce_min(a);
|
|
;; }
|
|
|
|
define dso_local spir_kernel void @testWorkGroupUMin(i32 noundef %a, i32 addrspace(1)* nocapture noundef writeonly %res) local_unnamed_addr {
|
|
entry:
|
|
%call = call spir_func i32 @_Z21work_group_reduce_minj(i32 noundef %a)
|
|
store i32 %call, i32 addrspace(1)* %res, align 4
|
|
ret void
|
|
}
|
|
|
|
declare spir_func i32 @_Z21work_group_reduce_minj(i32 noundef) local_unnamed_addr
|
|
|
|
; CHECK-SPIRV: OpFunction
|
|
; CHECK-SPIRV: %[[#]] = OpGroupBroadcast %[[#int]] %[[#ScopeWorkgroup]] %[[#BroadcastValue:]] %[[#BroadcastLocalId:]]
|
|
; CHECK-SPIRV: %[[#BroadcastVec2:]] = OpCompositeConstruct %[[#intv2]] %[[#BroadcastLocalId]] %[[#BroadcastLocalId]]
|
|
; CHECK-SPIRV: %[[#]] = OpGroupBroadcast %[[#int]] %[[#ScopeWorkgroup]] %[[#BroadcastValue]] %[[#BroadcastVec2]]
|
|
; CHECK-SPIRV: %[[#BroadcastVec3:]] = OpCompositeConstruct %[[#intv3]] %[[#BroadcastLocalId]] %[[#BroadcastLocalId]] %[[#BroadcastLocalId]]
|
|
; CHECK-SPIRV: %[[#]] = OpGroupBroadcast %[[#int]] %[[#ScopeWorkgroup]] %[[#BroadcastValue]] %[[#BroadcastVec3]]
|
|
; CHECK-SPIRV: %[[#]] = OpGroupBroadcast %[[#int]] %[[#ScopeCrossWorkgroup]] %[[#BroadcastValue]] %[[#BroadcastLocalId]]
|
|
; CHECK-SPIRV: OpFunctionEnd
|
|
|
|
;; kernel void testWorkGroupBroadcast(uint a, global size_t *id, global int *res) {
|
|
;; res[0] = work_group_broadcast(a, *id);
|
|
;; }
|
|
|
|
define dso_local spir_kernel void @testWorkGroupBroadcast(i32 noundef %a, i32 addrspace(1)* nocapture noundef readonly %id, i32 addrspace(1)* nocapture noundef writeonly %res) local_unnamed_addr {
|
|
entry:
|
|
%0 = load i32, i32 addrspace(1)* %id, align 4
|
|
%call = call spir_func i32 @_Z20work_group_broadcastjj(i32 noundef %a, i32 noundef %0)
|
|
%call_v2 = call spir_func i32 @_Z20work_group_broadcastjj(i32 noundef %a, i32 noundef %0, i32 noundef %0)
|
|
%call_v3 = call spir_func i32 @_Z20work_group_broadcastjj(i32 noundef %a, i32 noundef %0, i32 noundef %0, i32 noundef %0)
|
|
store i32 %call, i32 addrspace(1)* %res, align 4
|
|
%call1 = call spir_func i32 @__spirv_GroupBroadcast(i32 0, i32 noundef %a, i32 noundef %0)
|
|
ret void
|
|
}
|
|
|
|
declare spir_func i32 @_Z20work_group_broadcastjj(i32 noundef, i32 noundef) local_unnamed_addr
|
|
declare spir_func i32 @_Z20work_group_broadcastjjj(i32 noundef, i32 noundef, i32 noundef) local_unnamed_addr
|
|
declare spir_func i32 @_Z20work_group_broadcastjjjj(i32 noundef, i32 noundef, i32 noundef, i32 noundef) local_unnamed_addr
|
|
declare spir_func i32 @__spirv_GroupBroadcast(i32 noundef, i32 noundef, i32 noundef) local_unnamed_addr
|
|
|
|
; CHECK-SPIRV: OpFunction
|
|
; CHECK-SPIRV: %[[#]] = OpGroupFAdd %[[#float]] %[[#ScopeCrossWorkgroup]] Reduce %[[#FValue:]]
|
|
; CHECK-SPIRV: %[[#]] = OpGroupFMin %[[#float]] %[[#ScopeWorkgroup]] InclusiveScan %[[#FValue]]
|
|
; CHECK-SPIRV: %[[#]] = OpGroupFMax %[[#float]] %[[#ScopeSubgroup]] ExclusiveScan %[[#FValue]]
|
|
; CHECK-SPIRV: %[[#]] = OpGroupIAdd %[[#int]] %[[#ScopeCrossWorkgroup]] Reduce %[[#IValue:]]
|
|
; CHECK-SPIRV: %[[#]] = OpGroupUMin %[[#int]] %[[#ScopeWorkgroup]] InclusiveScan %[[#IValue]]
|
|
; CHECK-SPIRV: %[[#]] = OpGroupSMin %[[#int]] %[[#ScopeSubgroup]] ExclusiveScan %[[#IValue]]
|
|
; CHECK-SPIRV: %[[#]] = OpGroupUMax %[[#int]] %[[#ScopeCrossWorkgroup]] Reduce %[[#IValue]]
|
|
; CHECK-SPIRV: %[[#]] = OpGroupSMax %[[#int]] %[[#ScopeWorkgroup]] InclusiveScan %[[#IValue]]
|
|
; CHECK-SPIRV: OpFunctionEnd
|
|
|
|
define spir_kernel void @foo(float %a, i32 %b) {
|
|
entry:
|
|
%f1 = call spir_func float @__spirv_GroupFAdd(i32 0, i32 0, float %a)
|
|
%f2 = call spir_func float @__spirv_GroupFMin(i32 2, i32 1, float %a)
|
|
%f3 = call spir_func float @__spirv_GroupFMax(i32 3, i32 2, float %a)
|
|
%i1 = call spir_func i32 @__spirv_GroupIAdd(i32 0, i32 0, i32 %b)
|
|
%i2 = call spir_func i32 @__spirv_GroupUMin(i32 2, i32 1, i32 %b)
|
|
%i3 = call spir_func i32 @__spirv_GroupSMin(i32 3, i32 2, i32 %b)
|
|
%i4 = call spir_func i32 @__spirv_GroupUMax(i32 0, i32 0, i32 %b)
|
|
%i5 = call spir_func i32 @__spirv_GroupSMax(i32 2, i32 1, i32 %b)
|
|
ret void
|
|
}
|
|
|
|
declare spir_func float @__spirv_GroupFAdd(i32, i32, float)
|
|
declare spir_func float @__spirv_GroupFMin(i32, i32, float)
|
|
declare spir_func float @__spirv_GroupFMax(i32, i32, float)
|
|
declare spir_func i32 @__spirv_GroupIAdd(i32, i32, i32)
|
|
declare spir_func i32 @__spirv_GroupUMin(i32, i32, i32)
|
|
declare spir_func i32 @__spirv_GroupSMin(i32, i32, i32)
|
|
declare spir_func i32 @__spirv_GroupUMax(i32, i32, i32)
|
|
declare spir_func i32 @__spirv_GroupSMax(i32, i32, i32)
|