diff --git a/clang/lib/Basic/Targets/AMDGPU.cpp b/clang/lib/Basic/Targets/AMDGPU.cpp index 9a935948882f..95a308dc2d1e 100644 --- a/clang/lib/Basic/Targets/AMDGPU.cpp +++ b/clang/lib/Basic/Targets/AMDGPU.cpp @@ -33,10 +33,9 @@ static const char *const DataLayoutStringR600 = static const char *const DataLayoutStringAMDGCN = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32" - "-p7:160:256:256:32-p8:128:128-p9:192:256:256:32-i64:64-v16:16-v24:32-v32:" - "32-v48:64-v96:128" - "-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1" - "-ni:7:8:9"; + "-p7:160:256:256:32-p8:128:128:128:48-p9:192:256:256:32-i64:64-" + "v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-" + "v2048:2048-n32:64-S32-A5-G1-ni:7:8:9"; const LangASMap AMDGPUTargetInfo::AMDGPUDefIsGenMap = { llvm::AMDGPUAS::FLAT_ADDRESS, // Default diff --git a/clang/test/CodeGen/target-data.c b/clang/test/CodeGen/target-data.c index 9cb00e8ee73d..41a3f59b0fc8 100644 --- a/clang/test/CodeGen/target-data.c +++ b/clang/test/CodeGen/target-data.c @@ -176,12 +176,12 @@ // RUN: %clang_cc1 -triple amdgcn-unknown -target-cpu hawaii -o - -emit-llvm %s \ // RUN: | FileCheck %s -check-prefix=R600SI -// R600SI: target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-p7:160:256:256:32-p8:128:128-p9:192:256:256:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7:8:9" +// R600SI: target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-p7:160:256:256:32-p8:128:128:128:48-p9:192:256:256:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7:8:9" // Test default -target-cpu // RUN: %clang_cc1 -triple amdgcn-unknown -o - -emit-llvm %s \ // RUN: | FileCheck %s -check-prefix=R600SIDefault -// R600SIDefault: target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-p7:160:256:256:32-p8:128:128-p9:192:256:256:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7:8:9" +// R600SIDefault: target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-p7:160:256:256:32-p8:128:128:128:48-p9:192:256:256:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7:8:9" // RUN: %clang_cc1 -triple arm64-unknown -o - -emit-llvm %s | \ // RUN: FileCheck %s -check-prefix=AARCH64 diff --git a/clang/test/CodeGenOpenCL/amdgpu-env-amdgcn.cl b/clang/test/CodeGenOpenCL/amdgpu-env-amdgcn.cl index bb52f8761521..713ae48648aa 100644 --- a/clang/test/CodeGenOpenCL/amdgpu-env-amdgcn.cl +++ b/clang/test/CodeGenOpenCL/amdgpu-env-amdgcn.cl @@ -1,5 +1,5 @@ // RUN: %clang_cc1 %s -O0 -triple amdgcn -emit-llvm -o - | FileCheck %s // RUN: %clang_cc1 %s -O0 -triple amdgcn---opencl -emit-llvm -o - | FileCheck %s -// CHECK: target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-p7:160:256:256:32-p8:128:128-p9:192:256:256:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7:8:9" +// CHECK: target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-p7:160:256:256:32-p8:128:128:128:48-p9:192:256:256:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7:8:9" void foo(void) {} diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp index 3ebd3a4b8809..e32fcfe6148d 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -7966,17 +7966,26 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, // On arm64_32, pointers are 32 bits when stored in memory, but // zero-extended to 64 bits when in registers. Thus the mask is 32 bits to - // match the index type, but the pointer is 64 bits, so the the mask must be + // match the index type, but the pointer is 64 bits, so the mask must be // zero-extended up to 64 bits to match the pointer. EVT PtrVT = TLI.getValueType(DAG.getDataLayout(), I.getOperand(0)->getType()); EVT MemVT = TLI.getMemValueType(DAG.getDataLayout(), I.getOperand(0)->getType()); assert(PtrVT == Ptr.getValueType()); - assert(MemVT == Mask.getValueType()); - if (MemVT != PtrVT) + if (Mask.getValueType().getFixedSizeInBits() < MemVT.getFixedSizeInBits()) { + // For AMDGPU buffer descriptors the mask is 48 bits, but the pointer is + // 128-bit, so we have to pad the mask with ones for unused bits. + auto HighOnes = DAG.getNode( + ISD::SHL, sdl, PtrVT, DAG.getAllOnesConstant(sdl, PtrVT), + DAG.getShiftAmountConstant(Mask.getValueType().getFixedSizeInBits(), + PtrVT, sdl)); + Mask = DAG.getNode(ISD::OR, sdl, PtrVT, + DAG.getZExtOrTrunc(Mask, sdl, PtrVT), HighOnes); + } else if (Mask.getValueType() != PtrVT) Mask = DAG.getPtrExtOrTrunc(Mask, sdl, PtrVT); + assert(Mask.getValueType() == PtrVT); setValue(&I, DAG.getNode(ISD::AND, sdl, PtrVT, Ptr, Mask)); return; } diff --git a/llvm/lib/IR/AutoUpgrade.cpp b/llvm/lib/IR/AutoUpgrade.cpp index 41aa06add6ab..7157baf394e3 100644 --- a/llvm/lib/IR/AutoUpgrade.cpp +++ b/llvm/lib/IR/AutoUpgrade.cpp @@ -5781,7 +5781,10 @@ std::string llvm::UpgradeDataLayoutString(StringRef DL, StringRef TT) { if (!DL.contains("-p7") && !DL.starts_with("p7")) Res.append("-p7:160:256:256:32"); if (!DL.contains("-p8") && !DL.starts_with("p8")) - Res.append("-p8:128:128"); + Res.append("-p8:128:128:128:48"); + constexpr StringRef OldP8("-p8:128:128-"); + if (DL.contains(OldP8)) + Res.replace(Res.find(OldP8), OldP8.size(), "-p8:128:128:128:48-"); if (!DL.contains("-p9") && !DL.starts_with("p9")) Res.append("-p9:192:256:256:32"); diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp index ccb251b730f1..e24d8481408a 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp @@ -688,10 +688,9 @@ static StringRef computeDataLayout(const Triple &TT) { // space 8) which cannot be non-trivilally accessed by LLVM memory operations // like getelementptr. return "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32" - "-p7:160:256:256:32-p8:128:128-p9:192:256:256:32-i64:64-v16:16-v24:32-" - "v32:32-v48:64-v96:" - "128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-" - "G1-ni:7:8:9"; + "-p7:160:256:256:32-p8:128:128:128:48-p9:192:256:256:32-i64:64-" + "v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-" + "v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7:8:9"; } LLVM_READNONE diff --git a/llvm/test/Analysis/StackSafetyAnalysis/extend-ptr.ll b/llvm/test/Analysis/StackSafetyAnalysis/extend-ptr.ll index 2bfe32c654ff..39f8ffedc219 100644 --- a/llvm/test/Analysis/StackSafetyAnalysis/extend-ptr.ll +++ b/llvm/test/Analysis/StackSafetyAnalysis/extend-ptr.ll @@ -8,7 +8,7 @@ ; CHECK-NEXT: x[]: full-set ; CHECK-NEXT: allocas uses: -target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-p7:160:256:256:32-p8:128:128-p9:192:256:256:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7:8:9" +target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-p7:160:256:256:32-p8:128:128:128:48-p9:192:256:256:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7:8:9" define void @a(ptr addrspace(5) %x) { entry: diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/unsupported-ptr-add.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/unsupported-ptr-add.ll index 82a15f7497f5..316fe65361f4 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/unsupported-ptr-add.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/unsupported-ptr-add.ll @@ -1,14 +1,13 @@ ; RUN: not --crash llc -global-isel=1 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -o - < %s 2>&1 | FileCheck -check-prefix=GISEL-ERR %s -; GISEL-ERR: LLVM ERROR: unable to legalize instruction: %{{[0-9]+}}:_(p8) = G_PTR_ADD %{{[0-9]+}}:_, %{{[0-9]+}}:_(s128) +; GISEL-ERR: LLVM ERROR: unable to legalize instruction: %{{[0-9]+}}:_(p8) = G_PTR_ADD %{{[0-9]+}}:_, %{{[0-9]+}}:_(s48) define float @gep_on_rsrc(ptr addrspace(8) %rsrc) { body: - %next = getelementptr float, ptr addrspace(8) %rsrc, i128 1 + %next = getelementptr float, ptr addrspace(8) %rsrc, i48 1 %res = call float @llvm.amdgcn.raw.ptr.buffer.load.f32(ptr addrspace(8) %next, i32 0, i32 0, i32 0) ret float %res } declare float @llvm.amdgcn.raw.ptr.buffer.load.f32(ptr addrspace(8), i32, i32, i32 immarg) - diff --git a/llvm/test/CodeGen/AMDGPU/ptrmask.ll b/llvm/test/CodeGen/AMDGPU/ptrmask.ll index 9ad9c80d82ff..17c4eaa2736f 100644 --- a/llvm/test/CodeGen/AMDGPU/ptrmask.ll +++ b/llvm/test/CodeGen/AMDGPU/ptrmask.ll @@ -1,4 +1,4 @@ -; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 ; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -o - %s | FileCheck -check-prefix=GCN %s ; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -o - %s | FileCheck -check-prefixes=GFX10PLUS,GFX10 %s ; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1100 -amdgpu-enable-delay-alu=0 -o - %s | FileCheck -check-prefixes=GFX10PLUS,GFX11 %s @@ -145,64 +145,64 @@ define amdgpu_ps ptr addrspace(7) @s_ptrmask_buffer_fat_ptr_i32_neg8(ptr addrspa ret ptr addrspace(7) %masked } -define ptr addrspace(8) @v_ptrmask_buffer_resource_variable_i128(ptr addrspace(8) %ptr, i128 %mask) { -; GCN-LABEL: v_ptrmask_buffer_resource_variable_i128: +define ptr addrspace(8) @v_ptrmask_buffer_resource_variable_i48(ptr addrspace(8) %ptr, i48 %mask) { +; GCN-LABEL: v_ptrmask_buffer_resource_variable_i48: ; GCN: ; %bb.0: ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GCN-NEXT: v_or_b32_e32 v5, 0xffff0000, v5 ; GCN-NEXT: v_and_b32_e32 v1, v1, v5 ; GCN-NEXT: v_and_b32_e32 v0, v0, v4 -; GCN-NEXT: v_and_b32_e32 v3, v3, v7 -; GCN-NEXT: v_and_b32_e32 v2, v2, v6 ; GCN-NEXT: s_setpc_b64 s[30:31] ; -; GFX10PLUS-LABEL: v_ptrmask_buffer_resource_variable_i128: +; GFX10PLUS-LABEL: v_ptrmask_buffer_resource_variable_i48: ; GFX10PLUS: ; %bb.0: ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10PLUS-NEXT: v_or_b32_e32 v5, 0xffff0000, v5 ; GFX10PLUS-NEXT: v_and_b32_e32 v0, v0, v4 ; GFX10PLUS-NEXT: v_and_b32_e32 v1, v1, v5 -; GFX10PLUS-NEXT: v_and_b32_e32 v2, v2, v6 -; GFX10PLUS-NEXT: v_and_b32_e32 v3, v3, v7 ; GFX10PLUS-NEXT: s_setpc_b64 s[30:31] - %masked = call ptr addrspace(8) @llvm.ptrmask.p8.i128(ptr addrspace(8) %ptr, i128 %mask) + %masked = call ptr addrspace(8) @llvm.ptrmask.p8.i48(ptr addrspace(8) %ptr, i48 %mask) ret ptr addrspace(8) %masked } -define ptr addrspace(8) @v_ptrmask_buffer_resource_variable_i128_neg8(ptr addrspace(8) %ptr) { -; GCN-LABEL: v_ptrmask_buffer_resource_variable_i128_neg8: +define ptr addrspace(8) @v_ptrmask_buffer_resource_variable_i48_neg8(ptr addrspace(8) %ptr) { +; GCN-LABEL: v_ptrmask_buffer_resource_variable_i48_neg8: ; GCN: ; %bb.0: ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GCN-NEXT: v_and_b32_e32 v0, -8, v0 ; GCN-NEXT: s_setpc_b64 s[30:31] ; -; GFX10PLUS-LABEL: v_ptrmask_buffer_resource_variable_i128_neg8: +; GFX10PLUS-LABEL: v_ptrmask_buffer_resource_variable_i48_neg8: ; GFX10PLUS: ; %bb.0: ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10PLUS-NEXT: v_and_b32_e32 v0, -8, v0 ; GFX10PLUS-NEXT: s_setpc_b64 s[30:31] - %masked = call ptr addrspace(8) @llvm.ptrmask.p8.i128(ptr addrspace(8) %ptr, i128 -8) + %masked = call ptr addrspace(8) @llvm.ptrmask.p8.i48(ptr addrspace(8) %ptr, i48 -8) ret ptr addrspace(8) %masked } -define amdgpu_ps ptr addrspace(8) @s_ptrmask_buffer_resource_variable_i128(ptr addrspace(8) inreg %ptr, i128 inreg %mask) { -; GCN-LABEL: s_ptrmask_buffer_resource_variable_i128: +define amdgpu_ps ptr addrspace(8) @s_ptrmask_buffer_resource_variable_i48(ptr addrspace(8) inreg %ptr, i48 inreg %mask) { +; GCN-LABEL: s_ptrmask_buffer_resource_variable_i48: ; GCN: ; %bb.0: -; GCN-NEXT: s_and_b64 s[4:5], s[4:5], s[8:9] +; GCN-NEXT: s_or_b32 s7, s7, 0xffff0000 ; GCN-NEXT: s_and_b64 s[0:1], s[2:3], s[6:7] ; GCN-NEXT: s_mov_b32 s2, s4 ; GCN-NEXT: s_mov_b32 s3, s5 ; GCN-NEXT: ; return to shader part epilog ; -; GFX10PLUS-LABEL: s_ptrmask_buffer_resource_variable_i128: +; GFX10PLUS-LABEL: s_ptrmask_buffer_resource_variable_i48: ; GFX10PLUS: ; %bb.0: +; GFX10PLUS-NEXT: s_or_b32 s7, s7, 0xffff0000 ; GFX10PLUS-NEXT: s_and_b64 s[0:1], s[2:3], s[6:7] -; GFX10PLUS-NEXT: s_and_b64 s[2:3], s[4:5], s[8:9] +; GFX10PLUS-NEXT: s_mov_b32 s2, s4 +; GFX10PLUS-NEXT: s_mov_b32 s3, s5 ; GFX10PLUS-NEXT: ; return to shader part epilog - %masked = call ptr addrspace(8) @llvm.ptrmask.p8.i128(ptr addrspace(8) %ptr, i128 %mask) + %masked = call ptr addrspace(8) @llvm.ptrmask.p8.i48(ptr addrspace(8) %ptr, i48 %mask) ret ptr addrspace(8) %masked } -define amdgpu_ps ptr addrspace(8) @s_ptrmask_buffer_resource_variable_i128_neg8(ptr addrspace(8) inreg %ptr) { -; GCN-LABEL: s_ptrmask_buffer_resource_variable_i128_neg8: +define amdgpu_ps ptr addrspace(8) @s_ptrmask_buffer_resource_variable_i48_neg8(ptr addrspace(8) inreg %ptr) { +; GCN-LABEL: s_ptrmask_buffer_resource_variable_i48_neg8: ; GCN: ; %bb.0: ; GCN-NEXT: s_mov_b32 s1, s3 ; GCN-NEXT: s_and_b32 s0, s2, -8 @@ -210,14 +210,14 @@ define amdgpu_ps ptr addrspace(8) @s_ptrmask_buffer_resource_variable_i128_neg8( ; GCN-NEXT: s_mov_b32 s3, s5 ; GCN-NEXT: ; return to shader part epilog ; -; GFX10PLUS-LABEL: s_ptrmask_buffer_resource_variable_i128_neg8: +; GFX10PLUS-LABEL: s_ptrmask_buffer_resource_variable_i48_neg8: ; GFX10PLUS: ; %bb.0: ; GFX10PLUS-NEXT: s_mov_b32 s1, s3 ; GFX10PLUS-NEXT: s_and_b32 s0, s2, -8 ; GFX10PLUS-NEXT: s_mov_b32 s2, s4 ; GFX10PLUS-NEXT: s_mov_b32 s3, s5 ; GFX10PLUS-NEXT: ; return to shader part epilog - %masked = call ptr addrspace(8) @llvm.ptrmask.p8.i128(ptr addrspace(8) %ptr, i128 -8) + %masked = call ptr addrspace(8) @llvm.ptrmask.p8.i48(ptr addrspace(8) %ptr, i48 -8) ret ptr addrspace(8) %masked } diff --git a/llvm/test/Transforms/AlignmentFromAssumptions/amdgpu-crash.ll b/llvm/test/Transforms/AlignmentFromAssumptions/amdgpu-crash.ll index 2c5286edbfe0..6cef895aaf42 100644 --- a/llvm/test/Transforms/AlignmentFromAssumptions/amdgpu-crash.ll +++ b/llvm/test/Transforms/AlignmentFromAssumptions/amdgpu-crash.ll @@ -1,7 +1,7 @@ ; Test that we don't crash. ; RUN: opt < %s -passes=alignment-from-assumptions -S -target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-p7:160:256:256:32-p8:128:128-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-ni:7:8" +target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-p7:160:256:256:32-p8:128:128:128:48-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-ni:7:8" %"core::str::CharIndices.29.66.90.114.138.149.165.173.181.197.205.213.229.387.398" = type { [0 x i64], i64, [0 x i64], { ptr, ptr }, [0 x i64] } %"unwind::libunwind::_Unwind_Exception.9.51.75.99.123.147.163.171.179.195.203.211.227.385.396" = type { [0 x i64], i64, [0 x i64], ptr, [0 x i64], [6 x i64], [0 x i64] } diff --git a/llvm/test/Transforms/EarlyCSE/AMDGPU/memrealtime.ll b/llvm/test/Transforms/EarlyCSE/AMDGPU/memrealtime.ll index 6d313a6cb417..e603ee980ef3 100644 --- a/llvm/test/Transforms/EarlyCSE/AMDGPU/memrealtime.ll +++ b/llvm/test/Transforms/EarlyCSE/AMDGPU/memrealtime.ll @@ -1,5 +1,5 @@ ; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -passes='early-cse' -earlycse-debug-hash < %s | FileCheck %s -target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-p7:160:256:256:32-p8:128:128-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5" +target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-p7:160:256:256:32-p8:128:128:128:48-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5" ; CHECK-LABEL: @memrealtime( ; CHECK: call i64 @llvm.amdgcn.s.memrealtime() diff --git a/llvm/test/Transforms/FunctionAttrs/make-buffer-rsrc.ll b/llvm/test/Transforms/FunctionAttrs/make-buffer-rsrc.ll index 9ef153183cc9..f09a51c48a52 100644 --- a/llvm/test/Transforms/FunctionAttrs/make-buffer-rsrc.ll +++ b/llvm/test/Transforms/FunctionAttrs/make-buffer-rsrc.ll @@ -3,7 +3,7 @@ ; RUN: opt -passes=attributor-light -S < %s | FileCheck --check-prefixes=COMMON,ATTRIBUTOR %s ;; target triple = "amdgcn-amd-amdhsa" -target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-p7:160:256:256:32-p8:128:128-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-ni:7:8" +target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-p7:160:256:256:32-p8:128:128:128:48-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-ni:7:8" define amdgpu_kernel void @test_make_buffer_rsrc(ptr %p, ptr %q) { ; FNATTRS: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(argmem: readwrite) diff --git a/llvm/test/Transforms/InferAddressSpaces/AMDGPU/noop-ptrint-pair.ll b/llvm/test/Transforms/InferAddressSpaces/AMDGPU/noop-ptrint-pair.ll index 422ac0dfd2cd..f6619b791e7e 100644 --- a/llvm/test/Transforms/InferAddressSpaces/AMDGPU/noop-ptrint-pair.ll +++ b/llvm/test/Transforms/InferAddressSpaces/AMDGPU/noop-ptrint-pair.ll @@ -1,7 +1,7 @@ ; RUN: opt -mtriple=amdgcn-amd-amdhsa -S -o - -passes=infer-address-spaces %s | FileCheck -check-prefixes=COMMON,AMDGCN %s ; RUN: opt -S -o - -passes=infer-address-spaces -assume-default-is-flat-addrspace %s | FileCheck -check-prefixes=COMMON,NOTTI %s -target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-p7:160:256:256:32-p8:128:128-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-ni:7:8" +target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-p7:160:256:256:32-p8:128:128:128:48-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-ni:7:8" ; COMMON-LABEL: @noop_ptrint_pair( ; AMDGCN-NEXT: store i32 0, ptr addrspace(1) %{{.*}} diff --git a/llvm/test/Transforms/InferAddressSpaces/X86/noop-ptrint-pair.ll b/llvm/test/Transforms/InferAddressSpaces/X86/noop-ptrint-pair.ll index 0eaf6e32e5a9..fbdac1e07ca1 100644 --- a/llvm/test/Transforms/InferAddressSpaces/X86/noop-ptrint-pair.ll +++ b/llvm/test/Transforms/InferAddressSpaces/X86/noop-ptrint-pair.ll @@ -2,7 +2,7 @@ ; Check that assert in X86TargetMachine::isNoopAddrSpaceCast is not triggered. -target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-p7:160:256:256:32-p8:128:128-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-ni:7:8" +target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-p7:160:256:256:32-p8:128:128:128:48-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-ni:7:8" ; CHECK-LABEL: @noop_ptrint_pair( ; CHECK: addrspacecast ptr addrspace(1) %x to ptr addrspace(4) diff --git a/llvm/test/Transforms/LoopLoadElim/pr46854-adress-spaces.ll b/llvm/test/Transforms/LoopLoadElim/pr46854-adress-spaces.ll index 83379f6a54fc..f8a6f843cc24 100644 --- a/llvm/test/Transforms/LoopLoadElim/pr46854-adress-spaces.ll +++ b/llvm/test/Transforms/LoopLoadElim/pr46854-adress-spaces.ll @@ -3,7 +3,7 @@ ; RUN: opt -passes='require,loop-simplify,loop-load-elim' -S %s | FileCheck %s -target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-p7:160:256:256:32-p8:128:128-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-ni:7:8" +target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-p7:160:256:256:32-p8:128:128:128:48-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-ni:7:8" target triple = "amdgcn-amd-amdhsa" %struct.foo = type { %struct.pluto, i8, ptr, i32 } diff --git a/llvm/test/Transforms/OpenMP/attributor_pointer_offset_crash.ll b/llvm/test/Transforms/OpenMP/attributor_pointer_offset_crash.ll index 4dfa8cc82824..1341a5ec57b0 100644 --- a/llvm/test/Transforms/OpenMP/attributor_pointer_offset_crash.ll +++ b/llvm/test/Transforms/OpenMP/attributor_pointer_offset_crash.ll @@ -2,7 +2,7 @@ ; Verify the address space cast doesn't cause a crash -target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-p7:160:256:256:32-p8:128:128-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7:8:9" +target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-p7:160:256:256:32-p8:128:128:128:48-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7:8:9" %"struct.(anonymous namespace)::TeamStateTy" = type { %"struct.(anonymous namespace)::ICVStateTy", i32, ptr } %"struct.(anonymous namespace)::ICVStateTy" = type { i32, i32, i32, i32, i32, i32 } diff --git a/llvm/test/Transforms/OpenMP/indirect_call_kernel_info_crash.ll b/llvm/test/Transforms/OpenMP/indirect_call_kernel_info_crash.ll index 19d41f9d1e30..3f4077bd7e02 100644 --- a/llvm/test/Transforms/OpenMP/indirect_call_kernel_info_crash.ll +++ b/llvm/test/Transforms/OpenMP/indirect_call_kernel_info_crash.ll @@ -1,5 +1,5 @@ ; RUN: opt -S -passes=openmp-opt < %s -target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-p7:160:256:256:32-p8:128:128-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7:8:9" +target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-p7:160:256:256:32-p8:128:128:128:48-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7:8:9" target triple = "amdgcn-amd-amdhsa" %"struct.ompx::state::TeamStateTy" = type { %"struct.ompx::state::ICVStateTy", i32, i32, ptr } diff --git a/llvm/test/Transforms/OpenMP/spmdization_constant_prop.ll b/llvm/test/Transforms/OpenMP/spmdization_constant_prop.ll index 63b54bfddf9d..70c0d046dad4 100644 --- a/llvm/test/Transforms/OpenMP/spmdization_constant_prop.ll +++ b/llvm/test/Transforms/OpenMP/spmdization_constant_prop.ll @@ -8,7 +8,7 @@ ; CHECK: store i32 1, ptr addrspace(3) @IsSPMDMode ; CHECK-NOT: store i32 0, ptr addrspace(3) @IsSPMDMode ; -target datalayout = "A5-G1-ni:7:8:9-p7:160:256:256:32-p8:128:128-p9:192:256:256:32" +target datalayout = "A5-G1-ni:7:8:9-p7:160:256:256:32-p8:128:128:128:48-p9:192:256:256:32" target triple = "amdgcn-amd-amdhsa" %struct.ident_t = type { i32, i32, i32, i32, ptr } diff --git a/llvm/test/Transforms/OpenMP/spmdization_kernel_env_dep.ll b/llvm/test/Transforms/OpenMP/spmdization_kernel_env_dep.ll index 52be16c41f87..d3e8e98b6f51 100644 --- a/llvm/test/Transforms/OpenMP/spmdization_kernel_env_dep.ll +++ b/llvm/test/Transforms/OpenMP/spmdization_kernel_env_dep.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --check-globals ; RUN: opt --mtriple=amdgcn-amd-amdhsa --data-layout=A5 -S -passes=openmp-opt < %s | FileCheck %s --check-prefixes=AMDGPU -target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-p7:160:256:256:32-p8:128:128-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7:8:9" +target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-p7:160:256:256:32-p8:128:128:128:48-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7:8:9" target triple = "amdgcn-amd-amdhsa" %struct.KernelEnvironmentTy = type { %struct.ConfigurationEnvironmentTy.8, ptr, ptr } diff --git a/llvm/test/Transforms/OpenMP/values_in_offload_arrays.alloca.ll b/llvm/test/Transforms/OpenMP/values_in_offload_arrays.alloca.ll index 74871a2babcb..4e8bae08c87c 100644 --- a/llvm/test/Transforms/OpenMP/values_in_offload_arrays.alloca.ll +++ b/llvm/test/Transforms/OpenMP/values_in_offload_arrays.alloca.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py ; RUN: opt -S -passes=openmp-opt-cgscc -aa-pipeline=basic-aa -openmp-hide-memory-transfer-latency < %s | FileCheck %s -target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-p7:160:256:256:32-p8:128:128-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7:8:9" +target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-p7:160:256:256:32-p8:128:128:128:48-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7:8:9" @.__omp_offloading_heavyComputation.region_id = weak constant i8 0 @.offload_maptypes. = private unnamed_addr constant [2 x i64] [i64 35, i64 35] diff --git a/llvm/unittests/Bitcode/DataLayoutUpgradeTest.cpp b/llvm/unittests/Bitcode/DataLayoutUpgradeTest.cpp index 5eef8ee87b4d..3ab2caf702f6 100644 --- a/llvm/unittests/Bitcode/DataLayoutUpgradeTest.cpp +++ b/llvm/unittests/Bitcode/DataLayoutUpgradeTest.cpp @@ -41,12 +41,16 @@ TEST(DataLayoutUpgradeTest, ValidDataLayoutUpgrade) { // Check that AMDGPU targets add -G1 if it's not present. EXPECT_EQ(UpgradeDataLayoutString("e-p:32:32", "r600"), "e-p:32:32-G1"); // and that ANDGCN adds p7 and p8 as well. - EXPECT_EQ( - UpgradeDataLayoutString("e-p:64:64", "amdgcn"), - "e-p:64:64-G1-ni:7:8:9-p7:160:256:256:32-p8:128:128-p9:192:256:256:32"); - EXPECT_EQ( - UpgradeDataLayoutString("e-p:64:64-G1", "amdgcn"), - "e-p:64:64-G1-ni:7:8:9-p7:160:256:256:32-p8:128:128-p9:192:256:256:32"); + EXPECT_EQ(UpgradeDataLayoutString("e-p:64:64", "amdgcn"), + "e-p:64:64-G1-ni:7:8:9-p7:160:256:256:32-p8:128:128:128:48-p9:192:" + "256:256:32"); + EXPECT_EQ(UpgradeDataLayoutString("e-p:64:64-G1", "amdgcn"), + "e-p:64:64-G1-ni:7:8:9-p7:160:256:256:32-p8:128:128:128:48-p9:192:" + "256:256:32"); + // Check that the old AMDGCN p8:128:128 definition is upgraded + EXPECT_EQ(UpgradeDataLayoutString("e-p:64:64-p8:128:128-G1", "amdgcn"), + "e-p:64:64-p8:128:128:128:48-G1-ni:7:8:9-p7:160:256:256:32-" + "p9:192:256:256:32"); // but that r600 does not. EXPECT_EQ(UpgradeDataLayoutString("e-p:32:32-G1", "r600"), "e-p:32:32-G1"); @@ -60,7 +64,8 @@ TEST(DataLayoutUpgradeTest, ValidDataLayoutUpgrade) { "amdgcn"), "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-i64:64-" "v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:" - "1024-v2048:2048-n32:64-S32-A5-G1-ni:7:8:9-p7:160:256:256:32-p8:128:128-" + "1024-v2048:2048-n32:64-S32-A5-G1-ni:7:8:9-p7:160:256:256:32-p8:128:128:" + "128:48-" "p9:192:256:256:32"); // Check that RISCV64 upgrades -n64 to -n32:64. @@ -144,23 +149,26 @@ TEST(DataLayoutUpgradeTest, NoDataLayoutUpgrade) { // Check that AMDGPU targets don't add -G1 if there is already a -G flag. EXPECT_EQ(UpgradeDataLayoutString("e-p:32:32-G2", "r600"), "e-p:32:32-G2"); EXPECT_EQ(UpgradeDataLayoutString("G2", "r600"), "G2"); - EXPECT_EQ( - UpgradeDataLayoutString("e-p:64:64-G2", "amdgcn"), - "e-p:64:64-G2-ni:7:8:9-p7:160:256:256:32-p8:128:128-p9:192:256:256:32"); - EXPECT_EQ( - UpgradeDataLayoutString("G2-e-p:64:64", "amdgcn"), - "G2-e-p:64:64-ni:7:8:9-p7:160:256:256:32-p8:128:128-p9:192:256:256:32"); - EXPECT_EQ( - UpgradeDataLayoutString("e-p:64:64-G0", "amdgcn"), - "e-p:64:64-G0-ni:7:8:9-p7:160:256:256:32-p8:128:128-p9:192:256:256:32"); + EXPECT_EQ(UpgradeDataLayoutString("e-p:64:64-G2", "amdgcn"), + "e-p:64:64-G2-ni:7:8:9-p7:160:256:256:32-p8:128:128:128:48-p9:192:" + "256:256:32"); + EXPECT_EQ(UpgradeDataLayoutString("G2-e-p:64:64", "amdgcn"), + "G2-e-p:64:64-ni:7:8:9-p7:160:256:256:32-p8:128:128:128:48-p9:192:" + "256:256:32"); + EXPECT_EQ(UpgradeDataLayoutString("e-p:64:64-G0", "amdgcn"), + "e-p:64:64-G0-ni:7:8:9-p7:160:256:256:32-p8:128:128:128:48-p9:192:" + "256:256:32"); // Check that AMDGCN targets don't add already declared address space 7. - EXPECT_EQ(UpgradeDataLayoutString("e-p:64:64-p7:64:64", "amdgcn"), - "e-p:64:64-p7:64:64-G1-ni:7:8:9-p8:128:128-p9:192:256:256:32"); - EXPECT_EQ(UpgradeDataLayoutString("p7:64:64-G2-e-p:64:64", "amdgcn"), - "p7:64:64-G2-e-p:64:64-ni:7:8:9-p8:128:128-p9:192:256:256:32"); - EXPECT_EQ(UpgradeDataLayoutString("e-p:64:64-p7:64:64-G1", "amdgcn"), - "e-p:64:64-p7:64:64-G1-ni:7:8:9-p8:128:128-p9:192:256:256:32"); + EXPECT_EQ( + UpgradeDataLayoutString("e-p:64:64-p7:64:64", "amdgcn"), + "e-p:64:64-p7:64:64-G1-ni:7:8:9-p8:128:128:128:48-p9:192:256:256:32"); + EXPECT_EQ( + UpgradeDataLayoutString("p7:64:64-G2-e-p:64:64", "amdgcn"), + "p7:64:64-G2-e-p:64:64-ni:7:8:9-p8:128:128:128:48-p9:192:256:256:32"); + EXPECT_EQ( + UpgradeDataLayoutString("e-p:64:64-p7:64:64-G1", "amdgcn"), + "e-p:64:64-p7:64:64-G1-ni:7:8:9-p8:128:128:128:48-p9:192:256:256:32"); // Check that SPIR & SPIRV targets don't add -G1 if there is already a -G // flag. @@ -191,8 +199,9 @@ TEST(DataLayoutUpgradeTest, EmptyDataLayout) { // Check that AMDGPU targets add G1 if it's not present. EXPECT_EQ(UpgradeDataLayoutString("", "r600"), "G1"); - EXPECT_EQ(UpgradeDataLayoutString("", "amdgcn"), - "G1-ni:7:8:9-p7:160:256:256:32-p8:128:128-p9:192:256:256:32"); + EXPECT_EQ( + UpgradeDataLayoutString("", "amdgcn"), + "G1-ni:7:8:9-p7:160:256:256:32-p8:128:128:128:48-p9:192:256:256:32"); // Check that SPIR & SPIRV targets add G1 if it's not present. EXPECT_EQ(UpgradeDataLayoutString("", "spir"), "G1"); diff --git a/llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp b/llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp index b98dc0c9ae1a..be98be260c9d 100644 --- a/llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp +++ b/llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp @@ -644,7 +644,8 @@ TEST_F(OpenMPIRBuilderTest, ParallelSimpleGPU) { std::string oldDLStr = M->getDataLayoutStr(); M->setDataLayout( "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-p7:160:" - "256:256:32-p8:128:128-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:" + "256:256:32-p8:128:128:128:48-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-" + "v192:" "256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7:8"); OpenMPIRBuilder OMPBuilder(*M); OMPBuilder.Config.IsTargetDevice = true; @@ -2349,7 +2350,8 @@ TEST_F(OpenMPIRBuilderTest, StaticWorkshareLoopTarget) { using InsertPointTy = OpenMPIRBuilder::InsertPointTy; M->setDataLayout( "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-p7:160:" - "256:256:32-p8:128:128-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:" + "256:256:32-p8:128:128:128:48-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-" + "v192:" "256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7:8"); OpenMPIRBuilder OMPBuilder(*M); OMPBuilder.Config.IsTargetDevice = true; diff --git a/llvm/unittests/Transforms/Utils/CodeExtractorTest.cpp b/llvm/unittests/Transforms/Utils/CodeExtractorTest.cpp index 239e84d6024f..9ea8de3da1e5 100644 --- a/llvm/unittests/Transforms/Utils/CodeExtractorTest.cpp +++ b/llvm/unittests/Transforms/Utils/CodeExtractorTest.cpp @@ -677,7 +677,7 @@ TEST(CodeExtractor, OpenMPAggregateArgs) { LLVMContext Ctx; SMDiagnostic Err; std::unique_ptr M(parseAssemblyString(R"ir( - target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-p7:160:256:256:32-p8:128:128-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7:8:9" + target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-p7:160:256:256:32-p8:128:128:128:48-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7:8:9" target triple = "amdgcn-amd-amdhsa" define void @foo(ptr %0) { diff --git a/mlir/lib/Conversion/GPUToROCDL/LowerGpuOpsToROCDLOps.cpp b/mlir/lib/Conversion/GPUToROCDL/LowerGpuOpsToROCDLOps.cpp index 4ad874659620..e543ca98d8d4 100644 --- a/mlir/lib/Conversion/GPUToROCDL/LowerGpuOpsToROCDLOps.cpp +++ b/mlir/lib/Conversion/GPUToROCDL/LowerGpuOpsToROCDLOps.cpp @@ -95,7 +95,8 @@ static Value getLaneId(ConversionPatternRewriter &rewriter, Location loc, } static constexpr StringLiteral amdgcnDataLayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32" - "-p7:160:256:256:32-p8:128:128-p9:192:256:256:32-i64:64-v16:16-v24:32-v32:" + "-p7:160:256:256:32-p8:128:128:128:48-p9:192:256:256:32-i64:64-v16:16-v24:" + "32-v32:" "32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:" "64-S32-A5-G1-ni:7:8:9"; diff --git a/mlir/test/Conversion/GPUToROCDL/gpu-to-rocdl.mlir b/mlir/test/Conversion/GPUToROCDL/gpu-to-rocdl.mlir index d28aa9e34c22..79d0f5dd3e61 100644 --- a/mlir/test/Conversion/GPUToROCDL/gpu-to-rocdl.mlir +++ b/mlir/test/Conversion/GPUToROCDL/gpu-to-rocdl.mlir @@ -3,7 +3,7 @@ // RUN: mlir-opt %s -convert-gpu-to-rocdl='index-bitwidth=32' -split-input-file | FileCheck --check-prefix=CHECK32 %s // CHECK-LABEL: @test_module -// CHECK-SAME: llvm.data_layout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-p7:160:256:256:32-p8:128:128-p9:192:256:256:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7:8:9" +// CHECK-SAME: llvm.data_layout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-p7:160:256:256:32-p8:128:128:128:48-p9:192:256:256:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7:8:9" gpu.module @test_module { // CHECK-LABEL: func @gpu_index_ops() diff --git a/mlir/test/Target/LLVMIR/omptarget-memcpy-align-metadata.mlir b/mlir/test/Target/LLVMIR/omptarget-memcpy-align-metadata.mlir index 633df9686688..13c18401cafa 100644 --- a/mlir/test/Target/LLVMIR/omptarget-memcpy-align-metadata.mlir +++ b/mlir/test/Target/LLVMIR/omptarget-memcpy-align-metadata.mlir @@ -4,7 +4,7 @@ // alignment of loaded objects is passed to outlined // functions. -module attributes {llvm.data_layout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-p7:160:256:256:32-p8:128:128-p9:192:256:256:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7:8:9", llvm.target_triple = "amdgcn-amd-amdhsa", omp.is_gpu = true, omp.is_target_device = true} { +module attributes {llvm.data_layout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-p7:160:256:256:32-p8:128:128:128:48-p9:192:256:256:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7:8:9", llvm.target_triple = "amdgcn-amd-amdhsa", omp.is_gpu = true, omp.is_target_device = true} { omp.private {type = private} @_QFEk_private_i32 : i32 llvm.func @_QQmain() { %0 = llvm.mlir.constant(1 : i32) : i32 diff --git a/mlir/test/Target/LLVMIR/omptarget-multi-reduction.mlir b/mlir/test/Target/LLVMIR/omptarget-multi-reduction.mlir index 1a4c081414c9..b8b7c780a74d 100644 --- a/mlir/test/Target/LLVMIR/omptarget-multi-reduction.mlir +++ b/mlir/test/Target/LLVMIR/omptarget-multi-reduction.mlir @@ -3,7 +3,7 @@ // Only check the overall shape of the code and the presence of relevant // runtime calls. Actual IR checking is done at the OpenMPIRBuilder level. -module attributes {dlti.dl_spec = #dlti.dl_spec<#dlti.dl_entry<"dlti.alloca_memory_space", 5 : ui32>>, llvm.data_layout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-p7:160:256:256:32-p8:128:128-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7:8", llvm.target_triple = "amdgcn-amd-amdhsa", omp.is_gpu = true, omp.is_target_device = true } { +module attributes {dlti.dl_spec = #dlti.dl_spec<#dlti.dl_entry<"dlti.alloca_memory_space", 5 : ui32>>, llvm.data_layout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-p7:160:256:256:32-p8:128:128:128:48-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7:8", llvm.target_triple = "amdgcn-amd-amdhsa", omp.is_gpu = true, omp.is_target_device = true } { omp.private {type = private} @_QFEj_private_i32 : i32 omp.declare_reduction @add_reduction_f32 : f32 init { ^bb0(%arg0: f32): diff --git a/mlir/test/Target/LLVMIR/omptarget-parallel-llvm.mlir b/mlir/test/Target/LLVMIR/omptarget-parallel-llvm.mlir index 593d8010f55d..60c6fa4dd8f1 100644 --- a/mlir/test/Target/LLVMIR/omptarget-parallel-llvm.mlir +++ b/mlir/test/Target/LLVMIR/omptarget-parallel-llvm.mlir @@ -3,7 +3,7 @@ // The aim of the test is to check the LLVM IR codegen for the device // for omp target parallel construct -module attributes {dlti.dl_spec = #dlti.dl_spec<#dlti.dl_entry<"dlti.alloca_memory_space", 5 : ui32>>, llvm.data_layout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-p7:160:256:256:32-p8:128:128-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7:8:9", llvm.target_triple = "amdgcn-amd-amdhsa", omp.is_gpu = true, omp.is_target_device = true} { +module attributes {dlti.dl_spec = #dlti.dl_spec<#dlti.dl_entry<"dlti.alloca_memory_space", 5 : ui32>>, llvm.data_layout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-p7:160:256:256:32-p8:128:128:128:48-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7:8:9", llvm.target_triple = "amdgcn-amd-amdhsa", omp.is_gpu = true, omp.is_target_device = true} { llvm.func @_QQmain_omp_outline_1(%arg0: !llvm.ptr) attributes {omp.declare_target = #omp.declaretarget} { %0 = omp.map.info var_ptr(%arg0 : !llvm.ptr, i32) map_clauses(from) capture(ByRef) -> !llvm.ptr {name = "d"} omp.target map_entries(%0 -> %arg2 : !llvm.ptr) { diff --git a/mlir/test/Target/LLVMIR/omptarget-parallel-wsloop.mlir b/mlir/test/Target/LLVMIR/omptarget-parallel-wsloop.mlir index 649210795ff5..830610f12a5d 100644 --- a/mlir/test/Target/LLVMIR/omptarget-parallel-wsloop.mlir +++ b/mlir/test/Target/LLVMIR/omptarget-parallel-wsloop.mlir @@ -3,7 +3,7 @@ // The aim of the test is to check the GPU LLVM IR codegen // for nested omp do loop inside omp target region -module attributes {dlti.dl_spec = #dlti.dl_spec<#dlti.dl_entry<"dlti.alloca_memory_space", 5 : ui32>>, llvm.data_layout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-p7:160:256:256:32-p8:128:128-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7:8", llvm.target_triple = "amdgcn-amd-amdhsa", omp.is_gpu = true, omp.is_target_device = true } { +module attributes {dlti.dl_spec = #dlti.dl_spec<#dlti.dl_entry<"dlti.alloca_memory_space", 5 : ui32>>, llvm.data_layout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-p7:160:256:256:32-p8:128:128:128:48-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7:8", llvm.target_triple = "amdgcn-amd-amdhsa", omp.is_gpu = true, omp.is_target_device = true } { llvm.func @target_parallel_wsloop(%arg0: !llvm.ptr) attributes {omp.declare_target = #omp.declaretarget, target_cpu = "gfx90a", target_features = #llvm.target_features<["+gfx9-insts", "+wavefrontsize64"]>} diff --git a/mlir/test/Target/LLVMIR/omptarget-private-llvm.mlir b/mlir/test/Target/LLVMIR/omptarget-private-llvm.mlir index a2500f3a579d..b978354e2532 100644 --- a/mlir/test/Target/LLVMIR/omptarget-private-llvm.mlir +++ b/mlir/test/Target/LLVMIR/omptarget-private-llvm.mlir @@ -3,7 +3,7 @@ // Regression tset for calling a function using pointer alloca'ed on // device for private variable -module attributes {dlti.dl_spec = #dlti.dl_spec<#dlti.dl_entry<"dlti.alloca_memory_space", 5 : ui32>>, llvm.data_layout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-p7:160:256:256:32-p8:128:128-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7:8:9", llvm.target_triple = "amdgcn-amd-amdhsa", omp.is_gpu = true, omp.is_target_device = true} { +module attributes {dlti.dl_spec = #dlti.dl_spec<#dlti.dl_entry<"dlti.alloca_memory_space", 5 : ui32>>, llvm.data_layout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-p7:160:256:256:32-p8:128:128:128:48-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7:8:9", llvm.target_triple = "amdgcn-amd-amdhsa", omp.is_gpu = true, omp.is_target_device = true} { omp.private {type = private} @_QMmodFfailingEi_private_i32 : i32 llvm.func @_QMotherProutine(%arg0: !llvm.ptr {fir.bindc_name = "i", llvm.nocapture}) attributes {frame_pointer = #llvm.framePointerKind, omp.declare_target = #omp.declaretarget, target_cpu = "gfx90a", target_features = #llvm.target_features<["+16-bit-insts", "+atomic-buffer-global-pk-add-f16-insts", "+atomic-fadd-rtn-insts", "+ci-insts", "+dl-insts", "+dot1-insts", "+dot10-insts", "+dot2-insts", "+dot3-insts", "+dot4-insts", "+dot5-insts", "+dot6-insts", "+dot7-insts", "+dpp", "+gfx8-insts", "+gfx9-insts", "+gfx90a-insts", "+gws", "+image-insts", "+mai-insts", "+s-memrealtime", "+s-memtime-inst", "+wavefrontsize64"]>} { llvm.return diff --git a/mlir/test/Target/LLVMIR/omptarget-teams-distribute-reduction.mlir b/mlir/test/Target/LLVMIR/omptarget-teams-distribute-reduction.mlir index af8fe7aacc33..9aba72dabf13 100644 --- a/mlir/test/Target/LLVMIR/omptarget-teams-distribute-reduction.mlir +++ b/mlir/test/Target/LLVMIR/omptarget-teams-distribute-reduction.mlir @@ -3,7 +3,7 @@ // Only check the overall shape of the code and the presence of relevant // runtime calls. Actual IR checking is done at the OpenMPIRBuilder level. -module attributes {dlti.dl_spec = #dlti.dl_spec<#dlti.dl_entry<"dlti.alloca_memory_space", 5 : ui32>>, llvm.data_layout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-p7:160:256:256:32-p8:128:128-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7:8", llvm.target_triple = "amdgcn-amd-amdhsa", omp.is_gpu = true, omp.is_target_device = true } { +module attributes {dlti.dl_spec = #dlti.dl_spec<#dlti.dl_entry<"dlti.alloca_memory_space", 5 : ui32>>, llvm.data_layout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-p7:160:256:256:32-p8:128:128:128:48-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7:8", llvm.target_triple = "amdgcn-amd-amdhsa", omp.is_gpu = true, omp.is_target_device = true } { omp.private {type = private} @_QFsimple_target_teams_only_reductionEindex__private_i32 : i32 omp.declare_reduction @add_reduction_i32 : i32 init { ^bb0(%arg0: i32): diff --git a/mlir/test/Target/LLVMIR/omptarget-teams-reduction.mlir b/mlir/test/Target/LLVMIR/omptarget-teams-reduction.mlir index edfb2839d660..dc22fe11666c 100644 --- a/mlir/test/Target/LLVMIR/omptarget-teams-reduction.mlir +++ b/mlir/test/Target/LLVMIR/omptarget-teams-reduction.mlir @@ -3,7 +3,7 @@ // Only check the overall shape of the code and the presence of relevant // runtime calls. Actual IR checking is done at the OpenMPIRBuilder level. -module attributes {dlti.dl_spec = #dlti.dl_spec<#dlti.dl_entry<"dlti.alloca_memory_space", 5 : ui32>>, llvm.data_layout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-p7:160:256:256:32-p8:128:128-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7:8", llvm.target_triple = "amdgcn-amd-amdhsa", omp.is_gpu = true, omp.is_target_device = true } { +module attributes {dlti.dl_spec = #dlti.dl_spec<#dlti.dl_entry<"dlti.alloca_memory_space", 5 : ui32>>, llvm.data_layout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-p7:160:256:256:32-p8:128:128:128:48-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7:8", llvm.target_triple = "amdgcn-amd-amdhsa", omp.is_gpu = true, omp.is_target_device = true } { omp.declare_reduction @add_reduction_i32 : i32 init { ^bb0(%arg0: i32): %0 = llvm.mlir.constant(0 : i32) : i32 diff --git a/mlir/test/Target/LLVMIR/omptarget-wsloop-collapsed.mlir b/mlir/test/Target/LLVMIR/omptarget-wsloop-collapsed.mlir index b7aecec308ef..0ebcec0e0ec3 100644 --- a/mlir/test/Target/LLVMIR/omptarget-wsloop-collapsed.mlir +++ b/mlir/test/Target/LLVMIR/omptarget-wsloop-collapsed.mlir @@ -3,7 +3,7 @@ // The aim of the test is to check the GPU LLVM IR codegen // for nested omp do loop with collapse clause inside omp target region -module attributes {dlti.dl_spec = #dlti.dl_spec<#dlti.dl_entry<"dlti.alloca_memory_space", 5 : ui32>>, llvm.data_layout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-p7:160:256:256:32-p8:128:128-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7:8", llvm.target_triple = "amdgcn-amd-amdhsa", omp.is_gpu = true, omp.is_target_device = true } { +module attributes {dlti.dl_spec = #dlti.dl_spec<#dlti.dl_entry<"dlti.alloca_memory_space", 5 : ui32>>, llvm.data_layout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-p7:160:256:256:32-p8:128:128:128:48-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7:8", llvm.target_triple = "amdgcn-amd-amdhsa", omp.is_gpu = true, omp.is_target_device = true } { llvm.func @target_collapsed_wsloop(%arg0: !llvm.ptr) attributes {omp.declare_target = #omp.declaretarget} { %loop_ub = llvm.mlir.constant(99 : i32) : i32 %loop_lb = llvm.mlir.constant(0 : i32) : i32 diff --git a/mlir/test/Target/LLVMIR/omptarget-wsloop.mlir b/mlir/test/Target/LLVMIR/omptarget-wsloop.mlir index e2a8d88bd181..a9f913b74448 100644 --- a/mlir/test/Target/LLVMIR/omptarget-wsloop.mlir +++ b/mlir/test/Target/LLVMIR/omptarget-wsloop.mlir @@ -3,7 +3,7 @@ // The aim of the test is to check the GPU LLVM IR codegen // for nested omp do loop inside omp target region -module attributes {dlti.dl_spec = #dlti.dl_spec<#dlti.dl_entry<"dlti.alloca_memory_space", 5 : ui32>>, llvm.data_layout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-p7:160:256:256:32-p8:128:128-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7:8", llvm.target_triple = "amdgcn-amd-amdhsa", omp.is_gpu = true, omp.is_target_device = true } { +module attributes {dlti.dl_spec = #dlti.dl_spec<#dlti.dl_entry<"dlti.alloca_memory_space", 5 : ui32>>, llvm.data_layout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-p7:160:256:256:32-p8:128:128:128:48-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7:8", llvm.target_triple = "amdgcn-amd-amdhsa", omp.is_gpu = true, omp.is_target_device = true } { llvm.func @target_wsloop(%arg0: !llvm.ptr ) attributes {omp.declare_target = #omp.declaretarget} { %loop_ub = llvm.mlir.constant(9 : i32) : i32 %loop_lb = llvm.mlir.constant(0 : i32) : i32