
This PR is to address legacy issues with module analysis that currently uses a complicated and not so efficient approach to trace dependencies between SPIR-V id's via a duplicate tracker data structures and an explicitly built dependency graph. Even a quick performance check without any specialized benchmarks points to this part of the implementation as a biggest bottleneck. This PR specifically: * eliminates a need to build a dependency graph as a data structure, * updates the test suite (mainly, by fixing incorrect CHECK's referring to a hardcoded order of definitions, contradicting the spec requirement to allow certain definitions to go "in any order", see https://registry.khronos.org/SPIR-V/specs/unified1/SPIRV.html#_logical_layout_of_a_module), * improves function pointers implementation so that it now passes EXPENSIVE_CHECKS (thus removing 3 XFAIL's in the test suite). As a quick sanity check of whether goals of the PR are achieved, we can measure time of translation for any big LLVM IR. While testing the PR in the local development environment, improvements of the x5 order have been observed. For example, the SYCL test case "group barrier" that is a ~1Mb binary IR input shows the following values of the naive performance metric that we can nevertheless apply here to roughly estimate effects of the PR. before the PR: ``` $ time llc -O0 -mtriple=spirv64v1.6-unknown-unknown _group_barrier_phi.bc -o 1 --filetype=obj real 3m33.241s user 3m14.688s sys 0m18.530s ``` after the PR ``` $ time llc -O0 -mtriple=spirv64v1.6-unknown-unknown _group_barrier_phi.bc -o 1 --filetype=obj real 0m42.031s user 0m38.834s sys 0m3.193s ``` Next work should probably address Duplicate Tracker further, as it needs analysis now from the perspective of what parts of it are not necessary now, after changing the approach to implementation of the module analysis step.
87 lines
4.6 KiB
LLVM
87 lines
4.6 KiB
LLVM
; RUN: llc -O0 -mtriple=spirv32-unknown-unknown %s -o - | FileCheck %s
|
|
|
|
; CHECK-DAG: %[[#TypeImage:]] = OpTypeImage
|
|
; CHECK-DAG: %[[#TypeSampler:]] = OpTypeSampler
|
|
; CHECK-DAG: %[[#TypeImagePtr:]] = OpTypePointer {{.*}} %[[#TypeImage]]
|
|
; CHECK-DAG: %[[#TypeSamplerPtr:]] = OpTypePointer {{.*}} %[[#TypeSampler]]
|
|
|
|
; CHECK: %[[#srcimg:]] = OpFunctionParameter %[[#TypeImage]]
|
|
; CHECK: %[[#sampler:]] = OpFunctionParameter %[[#TypeSampler]]
|
|
|
|
; CHECK: %[[#srcimg_addr:]] = OpVariable %[[#TypeImagePtr]]
|
|
; CHECK: %[[#sampler_addr:]] = OpVariable %[[#TypeSamplerPtr]]
|
|
|
|
; CHECK: OpStore %[[#srcimg_addr]] %[[#srcimg]]
|
|
; CHECK: OpStore %[[#sampler_addr]] %[[#sampler]]
|
|
|
|
; CHECK: %[[#srcimg_val:]] = OpLoad %[[#]] %[[#srcimg_addr]]
|
|
; CHECK: %[[#sampler_val:]] = OpLoad %[[#]] %[[#sampler_addr]]
|
|
|
|
; CHECK: %[[#]] = OpSampledImage %[[#]] %[[#srcimg_val]] %[[#sampler_val]]
|
|
; CHECK-NEXT: OpImageSampleExplicitLod
|
|
|
|
; CHECK: %[[#srcimg_val:]] = OpLoad %[[#]] %[[#srcimg_addr]]
|
|
; CHECK: %[[#]] = OpImageQuerySizeLod %[[#]] %[[#srcimg_val]]
|
|
|
|
;; Excerpt from opencl-c-base.h
|
|
;; typedef float float4 __attribute__((ext_vector_type(4)));
|
|
;; typedef int int2 __attribute__((ext_vector_type(2)));
|
|
;; typedef __SIZE_TYPE__ size_t;
|
|
;;
|
|
;; Excerpt from opencl-c.h to speed up compilation.
|
|
;; #define __ovld __attribute__((overloadable))
|
|
;; #define __purefn __attribute__((pure))
|
|
;; #define __cnfn __attribute__((const))
|
|
;; size_t __ovld __cnfn get_global_id(unsigned int dimindx);
|
|
;; int __ovld __cnfn get_image_width(read_only image2d_t image);
|
|
;; float4 __purefn __ovld read_imagef(read_only image2d_t image, sampler_t sampler, int2 coord);
|
|
;;
|
|
;;
|
|
;; __kernel void test_fn(image2d_t srcimg, sampler_t sampler, global float4 *results) {
|
|
;; int tid_x = get_global_id(0);
|
|
;; int tid_y = get_global_id(1);
|
|
;; results[tid_x + tid_y * get_image_width(srcimg)] = read_imagef(srcimg, sampler, (int2){tid_x, tid_y});
|
|
;; }
|
|
|
|
define dso_local spir_kernel void @test_fn(target("spirv.Image", void, 1, 0, 0, 0, 0, 0, 0) %srcimg, target("spirv.Sampler") %sampler, <4 x float> addrspace(1)* noundef %results) {
|
|
entry:
|
|
%srcimg.addr = alloca target("spirv.Image", void, 1, 0, 0, 0, 0, 0, 0), align 4
|
|
%sampler.addr = alloca target("spirv.Sampler"), align 4
|
|
%results.addr = alloca <4 x float> addrspace(1)*, align 4
|
|
%tid_x = alloca i32, align 4
|
|
%tid_y = alloca i32, align 4
|
|
%.compoundliteral = alloca <2 x i32>, align 8
|
|
store target("spirv.Image", void, 1, 0, 0, 0, 0, 0, 0) %srcimg, target("spirv.Image", void, 1, 0, 0, 0, 0, 0, 0)* %srcimg.addr, align 4
|
|
store target("spirv.Sampler") %sampler, target("spirv.Sampler")* %sampler.addr, align 4
|
|
store <4 x float> addrspace(1)* %results, <4 x float> addrspace(1)** %results.addr, align 4
|
|
%call = call spir_func i32 @_Z13get_global_idj(i32 noundef 0)
|
|
store i32 %call, i32* %tid_x, align 4
|
|
%call1 = call spir_func i32 @_Z13get_global_idj(i32 noundef 1)
|
|
store i32 %call1, i32* %tid_y, align 4
|
|
%0 = load target("spirv.Image", void, 1, 0, 0, 0, 0, 0, 0), target("spirv.Image", void, 1, 0, 0, 0, 0, 0, 0)* %srcimg.addr, align 4
|
|
%1 = load target("spirv.Sampler"), target("spirv.Sampler")* %sampler.addr, align 4
|
|
%2 = load i32, i32* %tid_x, align 4
|
|
%vecinit = insertelement <2 x i32> undef, i32 %2, i32 0
|
|
%3 = load i32, i32* %tid_y, align 4
|
|
%vecinit2 = insertelement <2 x i32> %vecinit, i32 %3, i32 1
|
|
store <2 x i32> %vecinit2, <2 x i32>* %.compoundliteral, align 8
|
|
%4 = load <2 x i32>, <2 x i32>* %.compoundliteral, align 8
|
|
%call3 = call spir_func <4 x float> @_Z11read_imagef14ocl_image2d_ro11ocl_samplerDv2_i(target("spirv.Image", void, 1, 0, 0, 0, 0, 0, 0) %0, target("spirv.Sampler") %1, <2 x i32> noundef %4)
|
|
%5 = load <4 x float> addrspace(1)*, <4 x float> addrspace(1)** %results.addr, align 4
|
|
%6 = load i32, i32* %tid_x, align 4
|
|
%7 = load i32, i32* %tid_y, align 4
|
|
%8 = load target("spirv.Image", void, 1, 0, 0, 0, 0, 0, 0), target("spirv.Image", void, 1, 0, 0, 0, 0, 0, 0)* %srcimg.addr, align 4
|
|
%call4 = call spir_func i32 @_Z15get_image_width14ocl_image2d_ro(target("spirv.Image", void, 1, 0, 0, 0, 0, 0, 0) %8)
|
|
%mul = mul nsw i32 %7, %call4
|
|
%add = add nsw i32 %6, %mul
|
|
%arrayidx = getelementptr inbounds <4 x float>, <4 x float> addrspace(1)* %5, i32 %add
|
|
store <4 x float> %call3, <4 x float> addrspace(1)* %arrayidx, align 16
|
|
ret void
|
|
}
|
|
|
|
declare spir_func i32 @_Z13get_global_idj(i32 noundef)
|
|
|
|
declare spir_func <4 x float> @_Z11read_imagef14ocl_image2d_ro11ocl_samplerDv2_i(target("spirv.Image", void, 1, 0, 0, 0, 0, 0, 0), target("spirv.Sampler"), <2 x i32> noundef)
|
|
|
|
declare spir_func i32 @_Z15get_image_width14ocl_image2d_ro(target("spirv.Image", void, 1, 0, 0, 0, 0, 0, 0))
|