llvm-project/mlir/test/Target/LLVMIR/omptarget-wsloop.mlir
Alexander Richardson 07e2ba445d
[AMDGPU] Set AS8 address width to 48 bits
Of the 128-bits of buffer descriptor only 48 bits are address bits, so
following the discussion on https://discourse.llvm.org/t/clarifiying-the-semantics-of-ptrtoint/83987/54,
the logic conclusion is to set the index width to 48 bits instead of
the current value of 128.

Most of the test changes are mechanical datalayout updates, but there
is one actual change: the ptrmask test now uses .i48 instead of .i128
and I had to update SelectionDAGBuilder to correctly extend the mask.

Reviewed By: krzysz00

Pull Request: https://github.com/llvm/llvm-project/pull/139419
2025-05-19 17:26:05 -07:00

52 lines
3.0 KiB
MLIR

// RUN: mlir-translate -mlir-to-llvmir %s | FileCheck %s
// The aim of the test is to check the GPU LLVM IR codegen
// for nested omp do loop inside omp target region
module attributes {dlti.dl_spec = #dlti.dl_spec<#dlti.dl_entry<"dlti.alloca_memory_space", 5 : ui32>>, llvm.data_layout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-p7:160:256:256:32-p8:128:128:128:48-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7:8", llvm.target_triple = "amdgcn-amd-amdhsa", omp.is_gpu = true, omp.is_target_device = true } {
llvm.func @target_wsloop(%arg0: !llvm.ptr ) attributes {omp.declare_target = #omp.declaretarget<device_type = (any), capture_clause = (to)>} {
%loop_ub = llvm.mlir.constant(9 : i32) : i32
%loop_lb = llvm.mlir.constant(0 : i32) : i32
%loop_step = llvm.mlir.constant(1 : i32) : i32
omp.wsloop {
omp.loop_nest (%loop_cnt) : i32 = (%loop_lb) to (%loop_ub) inclusive step (%loop_step) {
%gep = llvm.getelementptr %arg0[0, %loop_cnt] : (!llvm.ptr, i32) -> !llvm.ptr, !llvm.array<10 x i32>
llvm.store %loop_cnt, %gep : i32, !llvm.ptr
omp.yield
}
}
llvm.return
}
llvm.func @target_empty_wsloop() attributes {omp.declare_target = #omp.declaretarget<device_type = (any), capture_clause = (to)>} {
%loop_ub = llvm.mlir.constant(9 : i32) : i32
%loop_lb = llvm.mlir.constant(0 : i32) : i32
%loop_step = llvm.mlir.constant(1 : i32) : i32
omp.wsloop {
omp.loop_nest (%loop_cnt) : i32 = (%loop_lb) to (%loop_ub) inclusive step (%loop_step) {
omp.yield
}
}
llvm.return
}
}
// CHECK: define void @[[FUNC0:.*]](ptr %[[ARG0:.*]])
// CHECK: %[[STRUCTARG:.*]] = alloca { ptr }, align 8, addrspace(5)
// CHECK: %[[STRUCTARG_ASCAST:.*]] = addrspacecast ptr addrspace(5) %[[STRUCTARG]] to ptr
// CHECK: %[[GEP:.*]] = getelementptr { ptr }, ptr addrspace(5) %[[STRUCTARG]], i32 0, i32 0
// CHECK: store ptr %[[ARG0]], ptr addrspace(5) %[[GEP]], align 8
// CHECK: %[[NUM_THREADS:.*]] = call i32 @omp_get_num_threads()
// CHECK: call void @__kmpc_for_static_loop_4u(ptr addrspacecast (ptr addrspace(1) @[[GLOB1:[0-9]+]] to ptr), ptr @[[LOOP_BODY_FN:.*]], ptr %[[STRUCTARG_ASCAST]], i32 10, i32 %[[NUM_THREADS]], i32 0)
// CHECK: define internal void @[[LOOP_BODY_FN]](i32 %[[LOOP_CNT:.*]], ptr %[[LOOP_BODY_ARG:.*]])
// CHECK: %[[GEP2:.*]] = getelementptr { ptr }, ptr %[[LOOP_BODY_ARG]], i32 0, i32 0
// CHECK: %[[LOADGEP:.*]] = load ptr, ptr %[[GEP2]], align 8
// CHECK: %[[GEP3:.*]] = getelementptr [10 x i32], ptr %[[LOADGEP]], i32 0, i32 %[[TMP2:.*]]
// CHECK: store i32 %[[VAL0:.*]], ptr %[[GEP3]], align 4
// CHECK: define void @[[FUNC_EMPTY_WSLOOP:.*]]()
// CHECK: call void @__kmpc_for_static_loop_4u(ptr addrspacecast (ptr addrspace(1) @[[GLOB2:[0-9]+]] to ptr), ptr @[[LOOP_EMPTY_BODY_FN:.*]], ptr null, i32 10, i32 %[[NUM_THREADS:.*]], i32 0)
// CHECK: define internal void @[[LOOP_EMPTY_BODY_FN]](i32 %[[LOOP_CNT:.*]])