Prior to this patch, when `NumElems` was 0, `OpTypeRuntimeArray` was directly generated, but it requires `Shader` capability, so it can only be generated if `Shader` env is being used. We have observed a pattern of using unbound arrays that translate into `[0 x ...]` types in OpenCL, which implies `Kernel` capability, so `OpTypeRuntimeArray` should not be used. To prevent this scenario, this patch simplifies GEP instructions where type is a 0-length array and the first index is also 0. In such scenario, we effectively drop the 0-length array and the first index. Additionally, the newly added test prior to this patch was generating a module with both `Shader` and `Kernel` capabilities at the same time, but they're incompatible. This patch also fixes that. Finally, prior to this patch, the newly added test was adding `Shader` capability to the module even with the command line flag `--avoid-spirv-capabilities=Shader`. This patch also has a fix for that.
27 lines
1.3 KiB
LLVM
27 lines
1.3 KiB
LLVM
; RUN: llc -verify-machineinstrs -O0 -mtriple=spirv64-unknown-unknown %s -o - | FileCheck %s
|
|
; RUN: %if spirv-tools %{ llc -O0 -mtriple=spirv64v1.2-unknown-unknown %s -o - -filetype=obj | spirv-val --target-env opencl2.2 %}
|
|
|
|
; CHECK-DAG: %[[#U8:]] = OpTypeInt 8 0
|
|
; CHECK-DAG: %[[#U32:]] = OpTypeInt 32 0
|
|
|
|
; CHECK-DAG: %[[#TYPE:]] = OpTypePointer CrossWorkgroup %[[#U8]]
|
|
; CHECK-DAG: %[[#VAL:]] = OpConstantNull %[[#TYPE]]
|
|
; CHECK-DAG: %[[#VTYPE:]] = OpTypePointer CrossWorkgroup %[[#TYPE]]
|
|
; CHECK-DAG: %[[#PTR:]] = OpVariable %[[#VTYPE]] CrossWorkgroup %[[#VAL]]
|
|
@Ptr = addrspace(1) global ptr addrspace(1) null
|
|
|
|
; CHECK-DAG: %[[#VAL:]] = OpConstant %[[#U32]] 123
|
|
; CHECK-DAG: %[[#VTYPE:]] = OpTypePointer UniformConstant %[[#U32]]
|
|
; CHECK-DAG: %[[#INIT:]] = OpVariable %[[#VTYPE]] UniformConstant %[[#VAL]]
|
|
@Init = private addrspace(2) constant i32 123
|
|
|
|
define internal spir_func void @Foo() {
|
|
; CHECK: %[[#]] = OpLoad %[[#]] %[[#PTR]] Aligned 8
|
|
%l = load ptr addrspace(1), ptr addrspace(1) @Ptr, align 8
|
|
; CHECK: OpCopyMemorySized %[[#]] %[[#INIT]] %[[#]] Aligned 4
|
|
call void @llvm.memcpy.p1.p2.i64(ptr addrspace(1) align 4 %l, ptr addrspace(2) align 1 @Init, i64 4, i1 false)
|
|
ret void
|
|
}
|
|
|
|
declare void @llvm.memcpy.p1.p2.i64(ptr addrspace(1) noalias nocapture writeonly, ptr addrspace(2) noalias nocapture readonly, i64, i1 immarg)
|