Vyacheslav Levytskyy 83c1d00311
[SPIR-V] Overhaul module analysis to improve translation speed and simplify the underlying logics (#120415)
This PR is to address legacy issues with module analysis that currently
uses a complicated and not so efficient approach to trace dependencies
between SPIR-V id's via a duplicate tracker data structures and an
explicitly built dependency graph. Even a quick performance check
without any specialized benchmarks points to this part of the
implementation as a biggest bottleneck.

This PR specifically:
* eliminates a need to build a dependency graph as a data structure,
* updates the test suite (mainly, by fixing incorrect CHECK's referring
to a hardcoded order of definitions, contradicting the spec requirement
to allow certain definitions to go "in any order", see
https://registry.khronos.org/SPIR-V/specs/unified1/SPIRV.html#_logical_layout_of_a_module),
* improves function pointers implementation so that it now passes
EXPENSIVE_CHECKS (thus removing 3 XFAIL's in the test suite).

As a quick sanity check of whether goals of the PR are achieved, we can
measure time of translation for any big LLVM IR. While testing the PR in
the local development environment, improvements of the x5 order have
been observed.

For example, the SYCL test case "group barrier" that is a ~1Mb binary IR
input shows the following values of the naive performance metric that we
can nevertheless apply here to roughly estimate effects of the PR.

before the PR:
```
$ time llc -O0 -mtriple=spirv64v1.6-unknown-unknown _group_barrier_phi.bc -o 1 --filetype=obj

real    3m33.241s
user    3m14.688s
sys     0m18.530s
```

after the PR

```
$ time llc -O0 -mtriple=spirv64v1.6-unknown-unknown _group_barrier_phi.bc -o 1 --filetype=obj

real    0m42.031s
user    0m38.834s
sys     0m3.193s
```

Next work should probably address Duplicate Tracker further, as it needs
analysis now from the perspective of what parts of it are not necessary
now, after changing the approach to implementation of the module
analysis step.
2025-01-07 10:42:23 +01:00

89 lines
6.1 KiB
LLVM

; RUN: llc -verify-machineinstrs -O0 -mtriple=spirv64-unknown-unknown %s -o - | FileCheck %s --check-prefix=CHECK-SPIRV
; RUN: %if spirv-tools %{ llc -O0 -mtriple=spirv64-unknown-unknown %s -o - -filetype=obj | spirv-val %}
; RUN: llc -verify-machineinstrs -O0 -mtriple=spirv32-unknown-unknown %s -o - | FileCheck %s --check-prefix=CHECK-SPIRV
; RUN: %if spirv-tools %{ llc -O0 -mtriple=spirv32-unknown-unknown %s -o - -filetype=obj | spirv-val %}
;; constant sampler_t constSampl = CLK_FILTER_LINEAR;
;;
;; __kernel
;; void sample_kernel_float(image2d_t input, float2 coords, global float4 *results, sampler_t argSampl) {
;; *results = read_imagef(input, constSampl, coords);
;; *results = read_imagef(input, argSampl, coords);
;; *results = read_imagef(input, CLK_FILTER_NEAREST|CLK_ADDRESS_REPEAT, coords);
;; }
;;
;; __kernel
;; void sample_kernel_int(image2d_t input, float2 coords, global int4 *results, sampler_t argSampl) {
;; *results = read_imagei(input, constSampl, coords);
;; *results = read_imagei(input, argSampl, coords);
;; *results = read_imagei(input, CLK_FILTER_NEAREST|CLK_ADDRESS_REPEAT, coords);
;; }
; CHECK-SPIRV: OpCapability LiteralSampler
; CHECK-SPIRV: OpName %[[#sample_kernel_float:]] "sample_kernel_float"
; CHECK-SPIRV: OpName %[[#sample_kernel_int:]] "sample_kernel_int"
; CHECK-SPIRV-DAG: %[[#TypeSampler:]] = OpTypeSampler
; CHECK-SPIRV-DAG: %[[#SampledImageTy:]] = OpTypeSampledImage
; CHECK-SPIRV-DAG: %[[#ConstSampler1:]] = OpConstantSampler %[[#TypeSampler]] None 0 Linear
; CHECK-SPIRV-DAG: %[[#ConstSampler2:]] = OpConstantSampler %[[#TypeSampler]] Repeat 0 Nearest
; CHECK-SPIRV: %[[#sample_kernel_float]] = OpFunction %{{.*}}
; CHECK-SPIRV: %[[#InputImage:]] = OpFunctionParameter %{{.*}}
; CHECK-SPIRV: %[[#argSampl:]] = OpFunctionParameter %[[#TypeSampler]]
; CHECK-SPIRV: %[[#SampledImage1:]] = OpSampledImage %[[#SampledImageTy]] %[[#InputImage]] %[[#ConstSampler1]]
; CHECK-SPIRV: %[[#]] = OpImageSampleExplicitLod %[[#]] %[[#SampledImage1]]
; CHECK-SPIRV: %[[#SampledImage2:]] = OpSampledImage %[[#SampledImageTy]] %[[#InputImage]] %[[#argSampl]]
; CHECK-SPIRV: %[[#]] = OpImageSampleExplicitLod %[[#]] %[[#SampledImage2]]
; CHECK-SPIRV: %[[#SampledImage3:]] = OpSampledImage %[[#SampledImageTy]] %[[#InputImage]] %[[#ConstSampler2]]
; CHECK-SPIRV: %[[#]] = OpImageSampleExplicitLod %[[#]] %[[#SampledImage3]]
define dso_local spir_kernel void @sample_kernel_float(target("spirv.Image", void, 1, 0, 0, 0, 0, 0, 0) %input, <2 x float> noundef %coords, <4 x float> addrspace(1)* nocapture noundef writeonly %results, target("spirv.Sampler") %argSampl) local_unnamed_addr {
entry:
%0 = tail call spir_func target("spirv.Sampler") @__translate_sampler_initializer(i32 32)
%call = tail call spir_func <4 x float> @_Z11read_imagef14ocl_image2d_ro11ocl_samplerDv2_f(target("spirv.Image", void, 1, 0, 0, 0, 0, 0, 0) %input, target("spirv.Sampler") %0, <2 x float> noundef %coords)
store <4 x float> %call, <4 x float> addrspace(1)* %results, align 16
%call1 = tail call spir_func <4 x float> @_Z11read_imagef14ocl_image2d_ro11ocl_samplerDv2_f(target("spirv.Image", void, 1, 0, 0, 0, 0, 0, 0) %input, target("spirv.Sampler") %argSampl, <2 x float> noundef %coords)
store <4 x float> %call1, <4 x float> addrspace(1)* %results, align 16
%1 = tail call spir_func target("spirv.Sampler") @__translate_sampler_initializer(i32 22)
%call2 = tail call spir_func <4 x float> @_Z11read_imagef14ocl_image2d_ro11ocl_samplerDv2_f(target("spirv.Image", void, 1, 0, 0, 0, 0, 0, 0) %input, target("spirv.Sampler") %1, <2 x float> noundef %coords)
store <4 x float> %call2, <4 x float> addrspace(1)* %results, align 16
ret void
}
declare spir_func <4 x float> @_Z11read_imagef14ocl_image2d_ro11ocl_samplerDv2_f(target("spirv.Image", void, 1, 0, 0, 0, 0, 0, 0), target("spirv.Sampler"), <2 x float> noundef) local_unnamed_addr
declare spir_func target("spirv.Sampler") @__translate_sampler_initializer(i32) local_unnamed_addr
; CHECK-SPIRV: %[[#sample_kernel_int]] = OpFunction %{{.*}}
; CHECK-SPIRV: %[[#InputImage:]] = OpFunctionParameter %{{.*}}
; CHECK-SPIRV: %[[#argSampl:]] = OpFunctionParameter %[[#TypeSampler]]
; CHECK-SPIRV: %[[#SampledImage4:]] = OpSampledImage %[[#SampledImageTy]] %[[#InputImage]] %[[#ConstSampler1]]
; CHECK-SPIRV: %[[#]] = OpImageSampleExplicitLod %[[#]] %[[#SampledImage4]]
; CHECK-SPIRV: %[[#SampledImage5:]] = OpSampledImage %[[#SampledImageTy]] %[[#InputImage]] %[[#argSampl]]
; CHECK-SPIRV: %[[#]] = OpImageSampleExplicitLod %[[#]] %[[#SampledImage5]]
; CHECK-SPIRV: %[[#SampledImage6:]] = OpSampledImage %[[#SampledImageTy]] %[[#InputImage]] %[[#ConstSampler2]]
; CHECK-SPIRV: %[[#]] = OpImageSampleExplicitLod %[[#]] %[[#SampledImage6]]
define dso_local spir_kernel void @sample_kernel_int(target("spirv.Image", void, 1, 0, 0, 0, 0, 0, 0) %input, <2 x float> noundef %coords, <4 x i32> addrspace(1)* nocapture noundef writeonly %results, target("spirv.Sampler") %argSampl) local_unnamed_addr {
entry:
%0 = tail call spir_func target("spirv.Sampler") @__translate_sampler_initializer(i32 32)
%call = tail call spir_func <4 x i32> @_Z11read_imagei14ocl_image2d_ro11ocl_samplerDv2_f(target("spirv.Image", void, 1, 0, 0, 0, 0, 0, 0) %input, target("spirv.Sampler") %0, <2 x float> noundef %coords)
store <4 x i32> %call, <4 x i32> addrspace(1)* %results, align 16
%call1 = tail call spir_func <4 x i32> @_Z11read_imagei14ocl_image2d_ro11ocl_samplerDv2_f(target("spirv.Image", void, 1, 0, 0, 0, 0, 0, 0) %input, target("spirv.Sampler") %argSampl, <2 x float> noundef %coords)
store <4 x i32> %call1, <4 x i32> addrspace(1)* %results, align 16
%1 = tail call spir_func target("spirv.Sampler") @__translate_sampler_initializer(i32 22)
%call2 = tail call spir_func <4 x i32> @_Z11read_imagei14ocl_image2d_ro11ocl_samplerDv2_f(target("spirv.Image", void, 1, 0, 0, 0, 0, 0, 0) %input, target("spirv.Sampler") %1, <2 x float> noundef %coords)
store <4 x i32> %call2, <4 x i32> addrspace(1)* %results, align 16
ret void
}
declare spir_func <4 x i32> @_Z11read_imagei14ocl_image2d_ro11ocl_samplerDv2_f(target("spirv.Image", void, 1, 0, 0, 0, 0, 0, 0), target("spirv.Sampler"), <2 x float> noundef) local_unnamed_addr