llvm-project/llvm/test/CodeGen/NVPTX/cp-async-bulk-tensor-g2s-invalid.ll
Durgadoss R 3e5d50f9c6
[NVPTX] Add cta_group support to TMA G2S intrinsics (#143178)
This patch extends the TMA G2S intrinsics with the
support for cta_group::1/2 available from Blackwell onwards.
The existing intrinsics are auto-upgraded with a default
value of '0' for the `cta_group` flag operand.

* lit tests are added for all combinations of the newer variants.
* Negative tests are added to validate the error-handling 
   when the value of the cta_group flag falls out-of-range.
* The generated PTX is verified with a 12.8 ptxas executable.

Signed-off-by: Durgadoss R <durgadossr@nvidia.com>
2025-06-12 15:20:39 +05:30

16 lines
875 B
LLVM

; RUN: not llc < %s -mtriple=nvptx64 -mcpu=sm_100a -o /dev/null 2>&1 | FileCheck %s
target triple = "nvptx64-nvidia-cuda"
declare void @llvm.nvvm.cp.async.bulk.tensor.g2s.tile.1d(ptr addrspace(7) writeonly, ptr addrspace(3), ptr readonly, i32, i16, i64, i1 immarg, i1 immarg, i32 immarg range(i32 0, 3))
define void @test_cp_async_bulk_tensor_g2s_tile_1d(ptr addrspace(7) %d, ptr addrspace(3) %bar, ptr %tmap, i32 %d0, i16 %mc, i64 %ch) {
; CHECK: immarg value 3 out of range [0, 3)
tail call void @llvm.nvvm.cp.async.bulk.tensor.g2s.tile.1d(ptr addrspace(7) %d, ptr addrspace(3) %bar, ptr %tmap, i32 %d0, i16 %mc, i64 %ch, i1 0, i1 0, i32 3)
; CHECK: immarg value -1 out of range [0, 3)
tail call void @llvm.nvvm.cp.async.bulk.tensor.g2s.tile.1d(ptr addrspace(7) %d, ptr addrspace(3) %bar, ptr %tmap, i32 %d0, i16 %mc, i64 %ch, i1 0, i1 0, i32 -1)
ret void
}