This issue was discovered during some downstream work around Vulkan CTS tests, specifically `dEQP-VK.subgroups.arithmetic.compute.subgroupadd_float` --------- Co-authored-by: Matt Arsenault <arsenm2@gmail.com>
65 lines
3.7 KiB
LLVM
65 lines
3.7 KiB
LLVM
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-attributes --check-globals all --version 5
|
|
; Test the generation of the attribute amdgpu-no-wwm
|
|
; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1200 -passes=amdgpu-attributor < %s | FileCheck -check-prefixes=CHECK %s
|
|
|
|
define amdgpu_kernel void @test_no_wwm(i32 %input, ptr addrspace(1) %out) {
|
|
; CHECK-LABEL: define amdgpu_kernel void @test_no_wwm(
|
|
; CHECK-SAME: i32 [[INPUT:%.*]], ptr addrspace(1) [[OUT:%.*]]) #[[ATTR0:[0-9]+]] {
|
|
; CHECK-NEXT: store i32 [[INPUT]], ptr addrspace(1) [[OUT]], align 4
|
|
; CHECK-NEXT: ret void
|
|
;
|
|
store i32 %input, ptr addrspace(1) %out
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_kernel void @test_old_wwm(i32 %input, ptr addrspace(1) %out) {
|
|
; CHECK-LABEL: define amdgpu_kernel void @test_old_wwm(
|
|
; CHECK-SAME: i32 [[INPUT:%.*]], ptr addrspace(1) [[OUT:%.*]]) #[[ATTR1:[0-9]+]] {
|
|
; CHECK-NEXT: [[WWM:%.*]] = call i32 @llvm.amdgcn.wwm.i32(i32 [[INPUT]])
|
|
; CHECK-NEXT: store i32 [[WWM]], ptr addrspace(1) [[OUT]], align 4
|
|
; CHECK-NEXT: ret void
|
|
;
|
|
%wwm = call i32 @llvm.amdgcn.wwm.i32(i32 %input)
|
|
store i32 %wwm, ptr addrspace(1) %out
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_kernel void @test_strict_wwm(i32 %input, ptr addrspace(1) %out) {
|
|
; CHECK-LABEL: define amdgpu_kernel void @test_strict_wwm(
|
|
; CHECK-SAME: i32 [[INPUT:%.*]], ptr addrspace(1) [[OUT:%.*]]) #[[ATTR1]] {
|
|
; CHECK-NEXT: [[WWM:%.*]] = call i32 @llvm.amdgcn.strict.wwm.i32(i32 [[INPUT]])
|
|
; CHECK-NEXT: store i32 [[WWM]], ptr addrspace(1) [[OUT]], align 4
|
|
; CHECK-NEXT: ret void
|
|
;
|
|
%wwm = call i32 @llvm.amdgcn.strict.wwm.i32(i32 %input)
|
|
store i32 %wwm, ptr addrspace(1) %out
|
|
ret void
|
|
}
|
|
|
|
define i32 @test_calls_wwm(i32 %input) {
|
|
; CHECK-LABEL: define i32 @test_calls_wwm(
|
|
; CHECK-SAME: i32 [[INPUT:%.*]]) #[[ATTR1]] {
|
|
; CHECK-NEXT: [[WWM:%.*]] = call i32 @llvm.amdgcn.wwm.i32(i32 [[INPUT]])
|
|
; CHECK-NEXT: ret i32 [[WWM]]
|
|
;
|
|
%wwm = call i32 @llvm.amdgcn.wwm.i32(i32 %input)
|
|
ret i32 %wwm
|
|
}
|
|
|
|
define amdgpu_kernel void @test_nested(i32 %input, ptr addrspace(1) %out) {
|
|
; CHECK-LABEL: define amdgpu_kernel void @test_nested(
|
|
; CHECK-SAME: i32 [[INPUT:%.*]], ptr addrspace(1) [[OUT:%.*]]) #[[ATTR1]] {
|
|
; CHECK-NEXT: [[RES:%.*]] = call i32 @test_calls_wwm(i32 [[INPUT]])
|
|
; CHECK-NEXT: store i32 [[RES]], ptr addrspace(1) [[OUT]], align 4
|
|
; CHECK-NEXT: ret void
|
|
;
|
|
%res = call i32 @test_calls_wwm(i32 %input)
|
|
store i32 %res, ptr addrspace(1) %out
|
|
ret void
|
|
}
|
|
;.
|
|
; CHECK: attributes #[[ATTR0]] = { "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-no-wwm" "target-cpu"="gfx1200" }
|
|
; CHECK: attributes #[[ATTR1]] = { "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="gfx1200" }
|
|
; CHECK: attributes #[[ATTR2:[0-9]+]] = { convergent nocallback nofree nounwind speculatable willreturn memory(none) "target-cpu"="gfx1200" }
|
|
;.
|