From 79cf877627ec341c62f64e25a44f3ba340edad1e Mon Sep 17 00:00:00 2001 From: Abhinav Gaba Date: Fri, 15 Aug 2025 11:49:35 -0700 Subject: [PATCH 001/214] [Offload] Introduce dataFence plugin interface. (#153793) The purpose of this fence is to ensure that any `dataSubmit`s inserted into a queue before a `dataFence` finish before finish before any `dataSubmit`s inserted after it begin. This is a no-op for most queues, since they are in-order, and by design any operations inserted into them occur in order. But the interface is supposed to be functional for out-of-order queues. The addition of the interface means that any operations that rely on such ordering (like ATTACH map-type support in #149036) can invoke it, without worrying about whether the underlying queue is in-order or out-of-order. Once a plugin supports out-of-order queues, the plugin can implement this function, without requiring any change at the libomptarget level. --------- Co-authored-by: Alex Duran --- offload/plugins-nextgen/amdgpu/src/rtl.cpp | 7 +++++++ .../plugins-nextgen/common/include/PluginInterface.h | 8 ++++++++ .../plugins-nextgen/common/src/PluginInterface.cpp | 12 ++++++++++++ offload/plugins-nextgen/cuda/src/rtl.cpp | 7 +++++++ offload/plugins-nextgen/host/src/rtl.cpp | 7 +++++++ 5 files changed, 41 insertions(+) diff --git a/offload/plugins-nextgen/amdgpu/src/rtl.cpp b/offload/plugins-nextgen/amdgpu/src/rtl.cpp index 536c662451df..83280fe0a49c 100644 --- a/offload/plugins-nextgen/amdgpu/src/rtl.cpp +++ b/offload/plugins-nextgen/amdgpu/src/rtl.cpp @@ -2576,6 +2576,13 @@ struct AMDGPUDeviceTy : public GenericDeviceTy, AMDGenericDeviceTy { getAgent(), (uint64_t)Size); } + /// Insert a data fence between previous data operations and the following + /// operations. This is a no-op for AMDGPU devices as operations inserted into + /// a queue are in-order. + Error dataFence(__tgt_async_info *Async) override { + return Plugin::success(); + } + /// Initialize the async info for interoperability purposes. Error initAsyncInfoImpl(AsyncInfoWrapperTy &AsyncInfoWrapper) override { // TODO: Implement this function. diff --git a/offload/plugins-nextgen/common/include/PluginInterface.h b/offload/plugins-nextgen/common/include/PluginInterface.h index 5e32a1a76d96..a448721755a6 100644 --- a/offload/plugins-nextgen/common/include/PluginInterface.h +++ b/offload/plugins-nextgen/common/include/PluginInterface.h @@ -944,6 +944,10 @@ struct GenericDeviceTy : public DeviceAllocatorTy { virtual Error dataRetrieveImpl(void *HstPtr, const void *TgtPtr, int64_t Size, AsyncInfoWrapperTy &AsyncInfoWrapper) = 0; + /// Instert a data fence between previous data operations and the following + /// operations if necessary for the device + virtual Error dataFence(__tgt_async_info *AsyncInfo) = 0; + /// Exchange data between devices (device to device transfer). Calling this /// function is only valid if GenericPlugin::isDataExchangable() passing the /// two devices returns true. @@ -1454,6 +1458,10 @@ public: int DstDeviceId, void *DstPtr, int64_t Size, __tgt_async_info *AsyncInfo); + /// Places a fence between previous data movements and following data + /// movements if necessary on the device + int32_t data_fence(int32_t DeviceId, __tgt_async_info *AsyncInfo); + /// Begin executing a kernel on the given device. int32_t launch_kernel(int32_t DeviceId, void *TgtEntryPtr, void **TgtArgs, ptrdiff_t *TgtOffsets, KernelArgsTy *KernelArgs, diff --git a/offload/plugins-nextgen/common/src/PluginInterface.cpp b/offload/plugins-nextgen/common/src/PluginInterface.cpp index f177c5bc9f48..c06c35e1e6a5 100644 --- a/offload/plugins-nextgen/common/src/PluginInterface.cpp +++ b/offload/plugins-nextgen/common/src/PluginInterface.cpp @@ -2333,3 +2333,15 @@ int32_t GenericPluginTy::async_barrier(omp_interop_val_t *Interop) { } return OFFLOAD_SUCCESS; } + +int32_t GenericPluginTy::data_fence(int32_t DeviceId, + __tgt_async_info *AsyncInfo) { + auto Err = getDevice(DeviceId).dataFence(AsyncInfo); + if (Err) { + REPORT("failure to place data fence on device %d: %s\n", DeviceId, + toString(std::move(Err)).data()); + return OFFLOAD_FAIL; + } + + return OFFLOAD_SUCCESS; +} diff --git a/offload/plugins-nextgen/cuda/src/rtl.cpp b/offload/plugins-nextgen/cuda/src/rtl.cpp index 155a580faee2..a99357a3adea 100644 --- a/offload/plugins-nextgen/cuda/src/rtl.cpp +++ b/offload/plugins-nextgen/cuda/src/rtl.cpp @@ -856,6 +856,13 @@ struct CUDADeviceTy : public GenericDeviceTy { return Plugin::success(); } + /// Insert a data fence between previous data operations and the following + /// operations. This is a no-op for CUDA devices as operations inserted into + /// a queue are in-order. + Error dataFence(__tgt_async_info *Async) override { + return Plugin::success(); + } + /// Initialize the device info for interoperability purposes. Error initDeviceInfoImpl(__tgt_device_info *DeviceInfo) override { assert(Context && "Context is null"); diff --git a/offload/plugins-nextgen/host/src/rtl.cpp b/offload/plugins-nextgen/host/src/rtl.cpp index f8ddc6713c01..25443fd1ac0b 100644 --- a/offload/plugins-nextgen/host/src/rtl.cpp +++ b/offload/plugins-nextgen/host/src/rtl.cpp @@ -295,6 +295,13 @@ struct GenELF64DeviceTy : public GenericDeviceTy { "dataExchangeImpl not supported"); } + /// Insert a data fence between previous data operations and the following + /// operations. This is a no-op for Host devices as operations inserted into + /// a queue are in-order. + Error dataFence(__tgt_async_info *Async) override { + return Plugin::success(); + } + /// All functions are already synchronous. No need to do anything on this /// synchronization function. Error synchronizeImpl(__tgt_async_info &AsyncInfo, From d7a29e5d5605f277d991b03a3923597a033d73ed Mon Sep 17 00:00:00 2001 From: Jasmine Tang Date: Fri, 15 Aug 2025 12:06:47 -0700 Subject: [PATCH 002/214] [WebAssembly] Reapply #149461 with correct CondCode in combine of SETCC (#153703) This PR reapplies https://github.com/llvm/llvm-project/pull/149461 In the original `combineVectorSizedSetCCEquality`, the result of setcc is being negated by returning setcc with the same cond code, leading to wrong logic. For example, with ```llvm %cmp_16 = call i32 @memcmp(ptr %a, ptr %b, i32 16) %res = icmp eq i32 %cmp_16, 0 ``` the original PR producese all_true and then also compares the result equal to 0 (using the same SETEQ in the returning setcc), meaning that semantically, it effectively is calling icmp ne. Instead, the PR should have use SETNE in the returning setcc, this way, all true return 1, then it is compared again ne 0, which is equivalent to icmp eq. --- .../WebAssembly/WebAssemblyISelLowering.cpp | 55 +++++++++++- .../WebAssemblyTargetTransformInfo.cpp | 3 +- .../test/CodeGen/WebAssembly/memcmp-expand.ll | 21 ++--- llvm/test/CodeGen/WebAssembly/simd-setcc.ll | 87 +++++++++++++++++++ 4 files changed, 148 insertions(+), 18 deletions(-) create mode 100644 llvm/test/CodeGen/WebAssembly/simd-setcc.ll diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp index 8baca2ad3133..35d5c3ed90c9 100644 --- a/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp +++ b/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp @@ -3386,8 +3386,56 @@ static SDValue TryMatchTrue(SDNode *N, EVT VecVT, SelectionDAG &DAG) { return DAG.getZExtOrTrunc(Ret, DL, N->getValueType(0)); } +/// Try to convert a i128 comparison to a v16i8 comparison before type +/// legalization splits it up into chunks +static SDValue +combineVectorSizedSetCCEquality(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, + const WebAssemblySubtarget *Subtarget) { + + SDLoc DL(N); + SDValue X = N->getOperand(0); + SDValue Y = N->getOperand(1); + EVT VT = N->getValueType(0); + EVT OpVT = X.getValueType(); + + SelectionDAG &DAG = DCI.DAG; + if (DCI.DAG.getMachineFunction().getFunction().hasFnAttribute( + Attribute::NoImplicitFloat)) + return SDValue(); + + ISD::CondCode CC = cast(N->getOperand(2))->get(); + // We're looking for an oversized integer equality comparison with SIMD + if (!OpVT.isScalarInteger() || !OpVT.isByteSized() || OpVT != MVT::i128 || + !Subtarget->hasSIMD128() || !isIntEqualitySetCC(CC)) + return SDValue(); + + // Don't perform this combine if constructing the vector will be expensive. + auto IsVectorBitCastCheap = [](SDValue X) { + X = peekThroughBitcasts(X); + return isa(X) || X.getOpcode() == ISD::LOAD; + }; + + if (!IsVectorBitCastCheap(X) || !IsVectorBitCastCheap(Y)) + return SDValue(); + + SDValue VecX = DAG.getBitcast(MVT::v16i8, X); + SDValue VecY = DAG.getBitcast(MVT::v16i8, Y); + SDValue Cmp = DAG.getSetCC(DL, MVT::v16i8, VecX, VecY, CC); + + SDValue Intr = + DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, MVT::i32, + {DAG.getConstant(CC == ISD::SETEQ ? Intrinsic::wasm_alltrue + : Intrinsic::wasm_anytrue, + DL, MVT::i32), + Cmp}); + + return DAG.getSetCC(DL, VT, Intr, DAG.getConstant(0, DL, MVT::i32), + ISD::SETNE); +} + static SDValue performSETCCCombine(SDNode *N, - TargetLowering::DAGCombinerInfo &DCI) { + TargetLowering::DAGCombinerInfo &DCI, + const WebAssemblySubtarget *Subtarget) { if (!DCI.isBeforeLegalize()) return SDValue(); @@ -3395,6 +3443,9 @@ static SDValue performSETCCCombine(SDNode *N, if (!VT.isScalarInteger()) return SDValue(); + if (SDValue V = combineVectorSizedSetCCEquality(N, DCI, Subtarget)) + return V; + SDValue LHS = N->getOperand(0); if (LHS->getOpcode() != ISD::BITCAST) return SDValue(); @@ -3574,7 +3625,7 @@ WebAssemblyTargetLowering::PerformDAGCombine(SDNode *N, case ISD::BITCAST: return performBitcastCombine(N, DCI); case ISD::SETCC: - return performSETCCCombine(N, DCI); + return performSETCCCombine(N, DCI, Subtarget); case ISD::VECTOR_SHUFFLE: return performVECTOR_SHUFFLECombine(N, DCI); case ISD::SIGN_EXTEND: diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyTargetTransformInfo.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyTargetTransformInfo.cpp index 52e706514226..08fb7586d215 100644 --- a/llvm/lib/Target/WebAssembly/WebAssemblyTargetTransformInfo.cpp +++ b/llvm/lib/Target/WebAssembly/WebAssemblyTargetTransformInfo.cpp @@ -147,7 +147,8 @@ WebAssemblyTTIImpl::enableMemCmpExpansion(bool OptSize, bool IsZeroCmp) const { Options.AllowOverlappingLoads = true; - // TODO: Teach WebAssembly backend about load v128. + if (ST->hasSIMD128()) + Options.LoadSizes.push_back(16); Options.LoadSizes.append({8, 4, 2, 1}); Options.MaxNumLoads = TLI->getMaxExpandSizeMemcmp(OptSize); diff --git a/llvm/test/CodeGen/WebAssembly/memcmp-expand.ll b/llvm/test/CodeGen/WebAssembly/memcmp-expand.ll index 8030438645f8..4357dc5631eb 100644 --- a/llvm/test/CodeGen/WebAssembly/memcmp-expand.ll +++ b/llvm/test/CodeGen/WebAssembly/memcmp-expand.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 -; RUN: llc < %s -disable-wasm-fallthrough-return-opt -wasm-disable-explicit-locals -wasm-keep-registers | FileCheck %s +; RUN: llc < %s -disable-wasm-fallthrough-return-opt -wasm-disable-explicit-locals -wasm-keep-registers -mattr=+simd128 | FileCheck %s target triple = "wasm32-unknown-unknown" @@ -127,24 +127,15 @@ define i1 @memcmp_expand_8(ptr %a, ptr %b) { ret i1 %res } -; TODO: Should be using a single load i64x2 or equivalent in bitsizes define i1 @memcmp_expand_16(ptr %a, ptr %b) { ; CHECK-LABEL: memcmp_expand_16: ; CHECK: .functype memcmp_expand_16 (i32, i32) -> (i32) ; CHECK-NEXT: # %bb.0: -; CHECK-NEXT: i64.load $push7=, 0($0):p2align=0 -; CHECK-NEXT: i64.load $push6=, 0($1):p2align=0 -; CHECK-NEXT: i64.xor $push8=, $pop7, $pop6 -; CHECK-NEXT: i32.const $push0=, 8 -; CHECK-NEXT: i32.add $push3=, $0, $pop0 -; CHECK-NEXT: i64.load $push4=, 0($pop3):p2align=0 -; CHECK-NEXT: i32.const $push11=, 8 -; CHECK-NEXT: i32.add $push1=, $1, $pop11 -; CHECK-NEXT: i64.load $push2=, 0($pop1):p2align=0 -; CHECK-NEXT: i64.xor $push5=, $pop4, $pop2 -; CHECK-NEXT: i64.or $push9=, $pop8, $pop5 -; CHECK-NEXT: i64.eqz $push10=, $pop9 -; CHECK-NEXT: return $pop10 +; CHECK-NEXT: v128.load $push1=, 0($0):p2align=0 +; CHECK-NEXT: v128.load $push0=, 0($1):p2align=0 +; CHECK-NEXT: i8x16.eq $push2=, $pop1, $pop0 +; CHECK-NEXT: i8x16.all_true $push3=, $pop2 +; CHECK-NEXT: return $pop3 %cmp_16 = call i32 @memcmp(ptr %a, ptr %b, i32 16) %res = icmp eq i32 %cmp_16, 0 ret i1 %res diff --git a/llvm/test/CodeGen/WebAssembly/simd-setcc.ll b/llvm/test/CodeGen/WebAssembly/simd-setcc.ll new file mode 100644 index 000000000000..127fd4e96303 --- /dev/null +++ b/llvm/test/CodeGen/WebAssembly/simd-setcc.ll @@ -0,0 +1,87 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +; RUN: llc < %s -disable-wasm-fallthrough-return-opt -wasm-disable-explicit-locals -wasm-keep-registers -mattr=+simd128 | FileCheck %s + +target triple = "wasm32-unknown-unknown" + +declare i32 @memcmp(ptr, ptr, i32) + +define i1 @setcc_load(ptr %a, ptr %b) { +; CHECK-LABEL: setcc_load: +; CHECK: .functype setcc_load (i32, i32) -> (i32) +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: v128.load $push1=, 0($0):p2align=0 +; CHECK-NEXT: v128.load $push0=, 0($1):p2align=0 +; CHECK-NEXT: i8x16.eq $push2=, $pop1, $pop0 +; CHECK-NEXT: i8x16.all_true $push3=, $pop2 +; CHECK-NEXT: return $pop3 + %cmp_16 = call i32 @memcmp(ptr %a, ptr %b, i32 16) + %res = icmp eq i32 %cmp_16, 0 + ret i1 %res +} + +; INFO: Negative test: noimplicitfloat disables simd +define i1 @setcc_load_should_not_vectorize(ptr %a, ptr %b) noimplicitfloat { +; CHECK-LABEL: setcc_load_should_not_vectorize: +; CHECK: .functype setcc_load_should_not_vectorize (i32, i32) -> (i32) +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: i64.load $push4=, 0($0):p2align=0 +; CHECK-NEXT: i64.load $push3=, 0($1):p2align=0 +; CHECK-NEXT: i64.xor $push5=, $pop4, $pop3 +; CHECK-NEXT: i64.load $push1=, 8($0):p2align=0 +; CHECK-NEXT: i64.load $push0=, 8($1):p2align=0 +; CHECK-NEXT: i64.xor $push2=, $pop1, $pop0 +; CHECK-NEXT: i64.or $push6=, $pop5, $pop2 +; CHECK-NEXT: i64.eqz $push7=, $pop6 +; CHECK-NEXT: return $pop7 + %cmp_16 = call i32 @memcmp(ptr %a, ptr %b, i32 16) + %res = icmp eq i32 %cmp_16, 0 + ret i1 %res +} + +define i1 @setcc_eq_const_i128(ptr %ptr) { +; CHECK-LABEL: setcc_eq_const_i128: +; CHECK: .functype setcc_eq_const_i128 (i32) -> (i32) +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: v128.load $push0=, 0($0) +; CHECK-NEXT: v128.const $push1=, 6, 0 +; CHECK-NEXT: i8x16.eq $push2=, $pop0, $pop1 +; CHECK-NEXT: i8x16.all_true $push3=, $pop2 +; CHECK-NEXT: return $pop3 + %l = load i128, ptr %ptr + %res = icmp eq i128 %l, 6 + ret i1 %res +} + +define i1 @setcc_ne_const_i128(ptr %ptr) { +; CHECK-LABEL: setcc_ne_const_i128: +; CHECK: .functype setcc_ne_const_i128 (i32) -> (i32) +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: v128.load $push0=, 0($0) +; CHECK-NEXT: v128.const $push1=, 16, 0 +; CHECK-NEXT: i8x16.ne $push2=, $pop0, $pop1 +; CHECK-NEXT: v128.any_true $push3=, $pop2 +; CHECK-NEXT: return $pop3 + %l = load i128, ptr %ptr + %res = icmp ne i128 %l, 16 + ret i1 %res +} + +; INFO: Negative test: only eq and ne works +define i1 @setcc_slt_const_i128(ptr %ptr) { +; CHECK-LABEL: setcc_slt_const_i128: +; CHECK: .functype setcc_slt_const_i128 (i32) -> (i32) +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: i64.load $push2=, 0($0) +; CHECK-NEXT: i64.const $push3=, 25 +; CHECK-NEXT: i64.lt_u $push4=, $pop2, $pop3 +; CHECK-NEXT: i64.load $push8=, 8($0) +; CHECK-NEXT: local.tee $push7=, $1=, $pop8 +; CHECK-NEXT: i64.const $push0=, 0 +; CHECK-NEXT: i64.lt_s $push1=, $pop7, $pop0 +; CHECK-NEXT: i64.eqz $push5=, $1 +; CHECK-NEXT: i32.select $push6=, $pop4, $pop1, $pop5 +; CHECK-NEXT: return $pop6 + %l = load i128, ptr %ptr + %res = icmp slt i128 %l, 25 + ret i1 %res +} From 09f5b9ab0a40b7905701f05094b19964d16cc183 Mon Sep 17 00:00:00 2001 From: Alexey Bataev Date: Fri, 15 Aug 2025 12:06:54 -0700 Subject: [PATCH 003/214] Revert "[SLP]Do not include copyable data to the same user twice" This reverts commit 758c6852c3ffe6b5e259cafadd811e60d8c276fb to fix buildbot https://lab.llvm.org/buildbot/#/builders/195/builds/13298 --- .../Transforms/Vectorize/SLPVectorizer.cpp | 5 -- .../X86/schedule-same-user-with-copyable.ll | 64 ------------------- 2 files changed, 69 deletions(-) delete mode 100644 llvm/test/Transforms/SLPVectorizer/X86/schedule-same-user-with-copyable.ll diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp index 49331702d145..45c24bee6516 100644 --- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -5336,7 +5336,6 @@ private: ArrayRef Op = EI.UserTE->getOperand(EI.EdgeIdx); const auto *It = find(Op, I); assert(It != Op.end() && "Lane not set"); - SmallPtrSet Visited; do { int Lane = std::distance(Op.begin(), It); assert(Lane >= 0 && "Lane not set"); @@ -5346,10 +5345,6 @@ private: assert(Lane < static_cast(EI.UserTE->Scalars.size()) && "Couldn't find extract lane"); auto *In = cast(EI.UserTE->Scalars[Lane]); - if (!Visited.insert(In).second) { - It = find(make_range(std::next(It), Op.end()), I); - continue; - } ScheduleCopyableDataMapByInstUser .try_emplace(std::make_pair(std::make_pair(In, EI.EdgeIdx), I)) .first->getSecond() diff --git a/llvm/test/Transforms/SLPVectorizer/X86/schedule-same-user-with-copyable.ll b/llvm/test/Transforms/SLPVectorizer/X86/schedule-same-user-with-copyable.ll deleted file mode 100644 index c53ccf425a31..000000000000 --- a/llvm/test/Transforms/SLPVectorizer/X86/schedule-same-user-with-copyable.ll +++ /dev/null @@ -1,64 +0,0 @@ -; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 -; RUN: opt -S --passes=slp-vectorizer -mtriple=x86_64-unknown-linux-gnu < %s | FileCheck %s - -define i32 @test(ptr %o, i32 %b.021.i) { -; CHECK-LABEL: define i32 @test( -; CHECK-SAME: ptr [[O:%.*]], i32 [[B_021_I:%.*]]) { -; CHECK-NEXT: [[ENTRY:.*]]: -; CHECK-NEXT: [[O1:%.*]] = alloca [3 x i32], align 4 -; CHECK-NEXT: br label %[[WHILE_BODY:.*]] -; CHECK: [[WHILE_BODY]]: -; CHECK-NEXT: [[SUB623:%.*]] = phi i32 [ [[SUB6:%.*]], %[[N_EXIT:.*]] ], [ 0, %[[ENTRY]] ] -; CHECK-NEXT: [[ADD21:%.*]] = phi i32 [ [[ADD:%.*]], %[[N_EXIT]] ], [ 0, %[[ENTRY]] ] -; CHECK-NEXT: [[ADD419:%.*]] = phi i32 [ [[ADD4:%.*]], %[[N_EXIT]] ], [ 0, %[[ENTRY]] ] -; CHECK-NEXT: [[ADD18:%.*]] = phi i32 [ [[ADD]], %[[N_EXIT]] ], [ 1, %[[ENTRY]] ] -; CHECK-NEXT: store i32 [[ADD419]], ptr [[O1]], align 4 -; CHECK-NEXT: store i32 [[ADD18]], ptr [[O]], align 4 -; CHECK-NEXT: br label %[[FOR_BODY4_I:.*]] -; CHECK: [[FOR_COND1_I:.*]]: -; CHECK-NEXT: ret i32 0 -; CHECK: [[FOR_BODY4_I]]: -; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[O1]], align 4 -; CHECK-NEXT: [[TOBOOL_NOT_I:%.*]] = icmp eq i32 [[TMP0]], 0 -; CHECK-NEXT: br i1 [[TOBOOL_NOT_I]], label %[[FOR_COND1_I]], label %[[N_EXIT]] -; CHECK: [[N_EXIT]]: -; CHECK-NEXT: [[SUB:%.*]] = or i32 [[B_021_I]], [[ADD21]] -; CHECK-NEXT: [[ADD]] = or i32 [[SUB]], 1 -; CHECK-NEXT: [[ADD2:%.*]] = or i32 [[B_021_I]], 1 -; CHECK-NEXT: [[ADD3:%.*]] = add i32 [[ADD2]], [[SUB623]] -; CHECK-NEXT: [[ADD4]] = or i32 [[ADD3]], 1 -; CHECK-NEXT: [[MUL:%.*]] = shl i32 [[B_021_I]], 1 -; CHECK-NEXT: [[SUB6]] = or i32 [[MUL]], 1 -; CHECK-NEXT: br label %[[WHILE_BODY]] -; -entry: - %o1 = alloca [3 x i32], align 4 - br label %while.body - -while.body: ; preds = %n.exit, %entry - %sub623 = phi i32 [ %sub6, %n.exit ], [ 0, %entry ] - %add21 = phi i32 [ %add, %n.exit ], [ 0, %entry ] - %add419 = phi i32 [ %add4, %n.exit ], [ 0, %entry ] - %add18 = phi i32 [ %add, %n.exit ], [ 1, %entry ] - store i32 %add419, ptr %o1, align 4 - store i32 %add18, ptr %o, align 4 - br label %for.body4.i - -for.cond1.i: ; preds = %for.body4.i - ret i32 0 - -for.body4.i: ; preds = %while.body - %0 = load i32, ptr %o1, align 4 - %tobool.not.i = icmp eq i32 %0, 0 - br i1 %tobool.not.i, label %for.cond1.i, label %n.exit - -n.exit: ; preds = %for.body4.i - %sub = or i32 %b.021.i, %add21 - %add = or i32 %sub, 1 - %add2 = or i32 %b.021.i, 1 - %add3 = add i32 %add2, %sub623 - %add4 = or i32 %add3, 1 - %mul = shl i32 %b.021.i, 1 - %sub6 = or i32 %mul, 1 - br label %while.body -} From 139bde203535a89aa975047d496392931bc972b4 Mon Sep 17 00:00:00 2001 From: Bill Wendling Date: Fri, 15 Aug 2025 12:27:54 -0700 Subject: [PATCH 004/214] [llvm] Ignore coding assistant artifacts (#153853) Now that "vibe coding" is a thing, ignore the documentation artifacts that coding assistants, like Claude and Gemini, use to retain coding workflows and other metadata. --- .gitignore | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/.gitignore b/.gitignore index a84268a7f686..860b8ea12abd 100644 --- a/.gitignore +++ b/.gitignore @@ -52,6 +52,11 @@ autoconf/autom4te.cache # CLion project configuration /.idea /cmake-build* +# Coding assistants' stuff +/CLAUDE.md +/.claude/ +/GEMINI.md +/.gemini/ #==============================================================================# # Directories to ignore (do not add trailing '/'s, they skip symlinks). From c6ea7d72d12073c63681bca998a87b4a436a9dff Mon Sep 17 00:00:00 2001 From: Augusto Noronha Date: Fri, 15 Aug 2025 12:30:21 -0700 Subject: [PATCH 005/214] [lldb] Fix CXX's SymbolNameFitsToLanguage matching other languages (#153685) The current implementation of CPlusPlusLanguage::SymbolNameFitsToLanguage will return true if the symbol is mangled for any language that lldb knows about. --- .../Language/CPlusPlus/CPlusPlusLanguage.cpp | 4 ++- .../CPlusPlus/CPlusPlusLanguageTest.cpp | 30 +++++++++++++++++++ 2 files changed, 33 insertions(+), 1 deletion(-) diff --git a/lldb/source/Plugins/Language/CPlusPlus/CPlusPlusLanguage.cpp b/lldb/source/Plugins/Language/CPlusPlus/CPlusPlusLanguage.cpp index 3118ff151d1c..b4207439f528 100644 --- a/lldb/source/Plugins/Language/CPlusPlus/CPlusPlusLanguage.cpp +++ b/lldb/source/Plugins/Language/CPlusPlus/CPlusPlusLanguage.cpp @@ -105,7 +105,9 @@ CPlusPlusLanguage::GetFunctionNameInfo(ConstString name) const { bool CPlusPlusLanguage::SymbolNameFitsToLanguage(Mangled mangled) const { const char *mangled_name = mangled.GetMangledName().GetCString(); - return mangled_name && Mangled::IsMangledName(mangled_name); + auto mangling_scheme = Mangled::GetManglingScheme(mangled_name); + return mangled_name && (mangling_scheme == Mangled::eManglingSchemeItanium || + mangling_scheme == Mangled::eManglingSchemeMSVC); } ConstString CPlusPlusLanguage::GetDemangledFunctionNameWithoutArguments( diff --git a/lldb/unittests/Language/CPlusPlus/CPlusPlusLanguageTest.cpp b/lldb/unittests/Language/CPlusPlus/CPlusPlusLanguageTest.cpp index 6eeb4f54952b..cc0275163b34 100644 --- a/lldb/unittests/Language/CPlusPlus/CPlusPlusLanguageTest.cpp +++ b/lldb/unittests/Language/CPlusPlus/CPlusPlusLanguageTest.cpp @@ -397,3 +397,33 @@ TEST(CPlusPlusLanguage, CPlusPlusNameParser) { // Don't crash. CPlusPlusNameParser((const char *)nullptr); } + +TEST(CPlusPlusLanguage, DoesNotMatchCxx) { + // Test that a symbol name that is NOT C++ does not match C++. + + SubsystemRAII lang; + Language *CPlusPlusLang = + Language::FindPlugin(lldb::eLanguageTypeC_plus_plus); + + EXPECT_TRUE(CPlusPlusLang != nullptr); + + Mangled swiftSymbol("$sS"); + EXPECT_FALSE(CPlusPlusLang->SymbolNameFitsToLanguage(swiftSymbol)); +} + +TEST(CPlusPlusLanguage, MatchesCxx) { + // Test that a symbol name that is C++ does match C++ (both Itanium and MSVC). + + SubsystemRAII lang; + Language *CPlusPlusLang = + Language::FindPlugin(lldb::eLanguageTypeC_plus_plus); + + EXPECT_TRUE(CPlusPlusLang != nullptr); + + Mangled itaniumSymbol("_ZFoo"); + EXPECT_TRUE(CPlusPlusLang->SymbolNameFitsToLanguage(itaniumSymbol)); + Mangled itaniumExtensionSymbol("___ZBar"); + EXPECT_TRUE(CPlusPlusLang->SymbolNameFitsToLanguage(itaniumExtensionSymbol)); + Mangled msvcSymbol("?Baz"); + EXPECT_TRUE(CPlusPlusLang->SymbolNameFitsToLanguage(msvcSymbol)); +} From 49e28d77b8df2ee2a7f97d0f685a3ccbf3360050 Mon Sep 17 00:00:00 2001 From: CatherineMoore Date: Fri, 15 Aug 2025 15:30:47 -0400 Subject: [PATCH 006/214] [OpenMP] Update ompdModule.c printf to match argument type (#152785) Update printf format string to match argument list --------- Co-authored-by: Joachim Co-authored-by: Joachim Jenke --- openmp/libompd/gdb-plugin/ompdModule.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/openmp/libompd/gdb-plugin/ompdModule.c b/openmp/libompd/gdb-plugin/ompdModule.c index 6edc1cdc9c60..c6020de30671 100644 --- a/openmp/libompd/gdb-plugin/ompdModule.c +++ b/openmp/libompd/gdb-plugin/ompdModule.c @@ -1181,9 +1181,10 @@ static PyObject *call_ompd_get_icv_from_scope(PyObject *self, PyObject *args) { if (retVal != ompd_rc_ok) { if (retVal != ompd_rc_incomplete) { - _printf("An error occurred when calling ompd_get_icv_from_scope(%i, %i): " - "Error code: %d", - scope, icvId, retVal); + _printf( + "An error occurred when calling ompd_get_icv_from_scope(%i, %" PRIu64 + "): Error code: %d", + scope, icvId, retVal); } return Py_None; } From b3e3a2090b7307c7efbfbc7cee9d9573f2226d3b Mon Sep 17 00:00:00 2001 From: Chenguang Wang Date: Fri, 15 Aug 2025 12:32:15 -0700 Subject: [PATCH 007/214] [bazel] Add missing test inputs inclusion on mlir/test/Target. (#153854) https://github.com/llvm/llvm-project/pull/152131 added a few tests that depend on `mlir/test/Target/Wasm/inputs/*`, e.g. `mlir/test/Target/Wasm/import.mlir` reads `inputs/import.yaml.wasm`. These inputs should be included as data dependency. --- utils/bazel/llvm-project-overlay/mlir/test/Target/BUILD.bazel | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/utils/bazel/llvm-project-overlay/mlir/test/Target/BUILD.bazel b/utils/bazel/llvm-project-overlay/mlir/test/Target/BUILD.bazel index 7fab1ea1dd75..9a98f640d272 100644 --- a/utils/bazel/llvm-project-overlay/mlir/test/Target/BUILD.bazel +++ b/utils/bazel/llvm-project-overlay/mlir/test/Target/BUILD.bazel @@ -13,7 +13,7 @@ package(default_visibility = ["//visibility:public"]) "//mlir:mlir-opt", "//mlir:mlir-translate", "//mlir/test:lit_data", - ], + ] + glob(["Wasm/inputs/*"]), ) for src in glob([ "**/*.mlir", From 2ed727f3f6eedaff061cb38a2404beff970a0243 Mon Sep 17 00:00:00 2001 From: Florian Hahn Date: Fri, 15 Aug 2025 20:32:40 +0100 Subject: [PATCH 008/214] [VPlan] Move SCEV invalidation to ::executePlan. (NFCI) Move SCEV invalidation from legacy ILV code-path directly to ::executePlan. --- .../Transforms/Vectorize/LoopVectorize.cpp | 28 ++++++++++--------- 1 file changed, 15 insertions(+), 13 deletions(-) diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp index 09c9e63ff6a2..a179775e1a04 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -2633,19 +2633,6 @@ void InnerLoopVectorizer::fixVectorizedLoop(VPTransformState &State) { // Fix widened non-induction PHIs by setting up the PHI operands. fixNonInductionPHIs(State); - // After vectorization, the exit blocks of the original loop will have - // additional predecessors. Invalidate SCEVs for the exit phis in case SE - // looked through single-entry phis. - SmallVector ExitBlocks; - OrigLoop->getExitBlocks(ExitBlocks); - for (BasicBlock *Exit : ExitBlocks) - for (PHINode &PN : Exit->phis()) - PSE.getSE()->forgetLcssaPhiWithNewPredecessor(OrigLoop, &PN); - - // Forget the original basic block. - PSE.getSE()->forgetLoop(OrigLoop); - PSE.getSE()->forgetBlockAndLoopDispositions(); - // Don't apply optimizations below when no (vector) loop remains, as they all // require one at the moment. VPBasicBlock *HeaderVPBB = @@ -7351,6 +7338,21 @@ DenseMap LoopVectorizationPlanner::executePlan( assert(verifyVPlanIsValid(BestVPlan, true /*VerifyLate*/) && "final VPlan is invalid"); + // After vectorization, the exit blocks of the original loop will have + // additional predecessors. Invalidate SCEVs for the exit phis in case SE + // looked through single-entry phis. + ScalarEvolution &SE = *PSE.getSE(); + for (VPIRBasicBlock *Exit : BestVPlan.getExitBlocks()) { + if (Exit->getNumPredecessors() == 0) + continue; + for (VPRecipeBase &PhiR : Exit->phis()) + SE.forgetLcssaPhiWithNewPredecessor( + OrigLoop, cast(&cast(PhiR).getInstruction())); + } + // Forget the original loop and block dispositions. + SE.forgetLoop(OrigLoop); + SE.forgetBlockAndLoopDispositions(); + ILV.printDebugTracesAtStart(); //===------------------------------------------------===// From 732eb5427cfcb103710b21ca6f2de8dbacaec215 Mon Sep 17 00:00:00 2001 From: David Green Date: Fri, 15 Aug 2025 20:35:27 +0100 Subject: [PATCH 009/214] [AArch64] Replace SIMDLongThreeVectorBHSabd with SIMDLongThreeVectorBHS. (#152987) We just need to use a BinOpFrag to share the patterns. This also moves UABDL to where it belongs in with similar instructions, and removes some patterns that are now handled by abd nodes. This is mostly NFC except for GISel, which will catch back up when it handles abd nodes in the same way. --- .../lib/Target/AArch64/AArch64InstrFormats.td | 38 ------------------- llvm/lib/Target/AArch64/AArch64InstrInfo.td | 26 ++----------- 2 files changed, 4 insertions(+), 60 deletions(-) diff --git a/llvm/lib/Target/AArch64/AArch64InstrFormats.td b/llvm/lib/Target/AArch64/AArch64InstrFormats.td index 456b21a70e90..98c01ee2f4ad 100644 --- a/llvm/lib/Target/AArch64/AArch64InstrFormats.td +++ b/llvm/lib/Target/AArch64/AArch64InstrFormats.td @@ -7405,44 +7405,6 @@ multiclass SIMDLongThreeVectorHS opc, string asm, (extract_high_v4i32 (v4i32 V128:$Rm))))]>; } -let isCommutable = 1 in -multiclass SIMDLongThreeVectorBHSabdl opc, string asm, - SDPatternOperator OpNode = null_frag> { - def v8i8_v8i16 : BaseSIMDDifferentThreeVector; - def v16i8_v8i16 : BaseSIMDDifferentThreeVector; - def v4i16_v4i32 : BaseSIMDDifferentThreeVector; - def v8i16_v4i32 : BaseSIMDDifferentThreeVector; - def v2i32_v2i64 : BaseSIMDDifferentThreeVector; - def v4i32_v2i64 : BaseSIMDDifferentThreeVector; -} - multiclass SIMDLongThreeVectorTiedBHSabal opc, string asm, SDPatternOperator OpNode> { diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/llvm/lib/Target/AArch64/AArch64InstrInfo.td index 8cfbff938a39..2f60a8b6fe34 100644 --- a/llvm/lib/Target/AArch64/AArch64InstrInfo.td +++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.td @@ -5707,27 +5707,6 @@ let Predicates = [HasFullFP16] in { // Advanced SIMD two vector instructions. //===----------------------------------------------------------------------===// -defm UABDL : SIMDLongThreeVectorBHSabdl<1, 0b0111, "uabdl", abdu>; -// Match UABDL in log2-shuffle patterns. -def : Pat<(abs (v8i16 (sub (zext (v8i8 V64:$opA)), - (zext (v8i8 V64:$opB))))), - (UABDLv8i8_v8i16 V64:$opA, V64:$opB)>; -def : Pat<(abs (v8i16 (sub (zext (extract_high_v16i8 (v16i8 V128:$opA))), - (zext (extract_high_v16i8 (v16i8 V128:$opB)))))), - (UABDLv16i8_v8i16 V128:$opA, V128:$opB)>; -def : Pat<(abs (v4i32 (sub (zext (v4i16 V64:$opA)), - (zext (v4i16 V64:$opB))))), - (UABDLv4i16_v4i32 V64:$opA, V64:$opB)>; -def : Pat<(abs (v4i32 (sub (zext (extract_high_v8i16 (v8i16 V128:$opA))), - (zext (extract_high_v8i16 (v8i16 V128:$opB)))))), - (UABDLv8i16_v4i32 V128:$opA, V128:$opB)>; -def : Pat<(abs (v2i64 (sub (zext (v2i32 V64:$opA)), - (zext (v2i32 V64:$opB))))), - (UABDLv2i32_v2i64 V64:$opA, V64:$opB)>; -def : Pat<(abs (v2i64 (sub (zext (extract_high_v4i32 (v4i32 V128:$opA))), - (zext (extract_high_v4i32 (v4i32 V128:$opB)))))), - (UABDLv4i32_v2i64 V128:$opA, V128:$opB)>; - defm ABS : SIMDTwoVectorBHSD<0, 0b01011, "abs", abs>; defm CLS : SIMDTwoVectorBHS<0, 0b00100, "cls", int_aarch64_neon_cls>; defm CLZ : SIMDTwoVectorBHS<1, 0b00100, "clz", ctlz>; @@ -6810,7 +6789,8 @@ defm RSUBHN : SIMDNarrowThreeVectorBHS<1,0b0110,"rsubhn",int_aarch64_neon_rsubhn let isCommutable = 1 in defm PMULL : SIMDDifferentThreeVectorBD<0,0b1110,"pmull", AArch64pmull>; defm SABAL : SIMDLongThreeVectorTiedBHSabal<0,0b0101,"sabal", abds>; -defm SABDL : SIMDLongThreeVectorBHSabdl<0, 0b0111, "sabdl", abds>; +defm SABDL : SIMDLongThreeVectorBHS<0, 0b0111, "sabdl", + BinOpFrag<(zext (abds node:$LHS, node:$RHS))>>; defm SADDL : SIMDLongThreeVectorBHS< 0, 0b0000, "saddl", BinOpFrag<(add (sext node:$LHS), (sext node:$RHS))>>; defm SADDW : SIMDWideThreeVectorBHS< 0, 0b0001, "saddw", @@ -6830,6 +6810,8 @@ defm SSUBL : SIMDLongThreeVectorBHS<0, 0b0010, "ssubl", defm SSUBW : SIMDWideThreeVectorBHS<0, 0b0011, "ssubw", BinOpFrag<(sub node:$LHS, (sext node:$RHS))>>; defm UABAL : SIMDLongThreeVectorTiedBHSabal<1, 0b0101, "uabal", abdu>; +defm UABDL : SIMDLongThreeVectorBHS<1, 0b0111, "uabdl", + BinOpFrag<(zext (abdu node:$LHS, node:$RHS))>>; defm UADDL : SIMDLongThreeVectorBHS<1, 0b0000, "uaddl", BinOpFrag<(add (zanyext node:$LHS), (zanyext node:$RHS))>>; defm UADDW : SIMDWideThreeVectorBHS<1, 0b0001, "uaddw", From b157599156942de04d1174a5dbf5d07ca81256d7 Mon Sep 17 00:00:00 2001 From: Alexey Bataev Date: Fri, 15 Aug 2025 10:46:56 -0700 Subject: [PATCH 010/214] [SLP]Do not include copyable data to the same user twice If the copyable schedule data is created and the user is used several times in the user node, no need to count same data for the same user several times, need to include it only ones. Fixes #153754 --- .../Transforms/Vectorize/SLPVectorizer.cpp | 11 ++++ .../X86/schedule-same-user-with-copyable.ll | 64 +++++++++++++++++++ 2 files changed, 75 insertions(+) create mode 100644 llvm/test/Transforms/SLPVectorizer/X86/schedule-same-user-with-copyable.ll diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp index 45c24bee6516..b88de09a3e44 100644 --- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -5336,6 +5336,7 @@ private: ArrayRef Op = EI.UserTE->getOperand(EI.EdgeIdx); const auto *It = find(Op, I); assert(It != Op.end() && "Lane not set"); + SmallPtrSet Visited; do { int Lane = std::distance(Op.begin(), It); assert(Lane >= 0 && "Lane not set"); @@ -5345,6 +5346,10 @@ private: assert(Lane < static_cast(EI.UserTE->Scalars.size()) && "Couldn't find extract lane"); auto *In = cast(EI.UserTE->Scalars[Lane]); + if (!Visited.insert(In).second) { + It = find(make_range(std::next(It), Op.end()), I); + continue; + } ScheduleCopyableDataMapByInstUser .try_emplace(std::make_pair(std::make_pair(In, EI.EdgeIdx), I)) .first->getSecond() @@ -20927,6 +20932,7 @@ BoUpSLP::BlockScheduling::tryScheduleBundle(ArrayRef VL, BoUpSLP *SLP, } ScheduledBundlesList.pop_back(); SmallVector ControlDependentMembers; + SmallPtrSet Visited; for (Value *V : VL) { if (S.isNonSchedulable(V)) continue; @@ -20944,6 +20950,7 @@ BoUpSLP::BlockScheduling::tryScheduleBundle(ArrayRef VL, BoUpSLP *SLP, ArrayRef Op = EI.UserTE->getOperand(EI.EdgeIdx); const auto *It = find(Op, I); assert(It != Op.end() && "Lane not set"); + SmallPtrSet Visited; do { int Lane = std::distance(Op.begin(), It); assert(Lane >= 0 && "Lane not set"); @@ -20953,6 +20960,10 @@ BoUpSLP::BlockScheduling::tryScheduleBundle(ArrayRef VL, BoUpSLP *SLP, assert(Lane < static_cast(EI.UserTE->Scalars.size()) && "Couldn't find extract lane"); auto *In = cast(EI.UserTE->Scalars[Lane]); + if (!Visited.insert(In).second) { + It = find(make_range(std::next(It), Op.end()), I); + break; + } ScheduleCopyableDataMapByInstUser [std::make_pair(std::make_pair(In, EI.EdgeIdx), I)] .pop_back(); diff --git a/llvm/test/Transforms/SLPVectorizer/X86/schedule-same-user-with-copyable.ll b/llvm/test/Transforms/SLPVectorizer/X86/schedule-same-user-with-copyable.ll new file mode 100644 index 000000000000..c53ccf425a31 --- /dev/null +++ b/llvm/test/Transforms/SLPVectorizer/X86/schedule-same-user-with-copyable.ll @@ -0,0 +1,64 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 +; RUN: opt -S --passes=slp-vectorizer -mtriple=x86_64-unknown-linux-gnu < %s | FileCheck %s + +define i32 @test(ptr %o, i32 %b.021.i) { +; CHECK-LABEL: define i32 @test( +; CHECK-SAME: ptr [[O:%.*]], i32 [[B_021_I:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*]]: +; CHECK-NEXT: [[O1:%.*]] = alloca [3 x i32], align 4 +; CHECK-NEXT: br label %[[WHILE_BODY:.*]] +; CHECK: [[WHILE_BODY]]: +; CHECK-NEXT: [[SUB623:%.*]] = phi i32 [ [[SUB6:%.*]], %[[N_EXIT:.*]] ], [ 0, %[[ENTRY]] ] +; CHECK-NEXT: [[ADD21:%.*]] = phi i32 [ [[ADD:%.*]], %[[N_EXIT]] ], [ 0, %[[ENTRY]] ] +; CHECK-NEXT: [[ADD419:%.*]] = phi i32 [ [[ADD4:%.*]], %[[N_EXIT]] ], [ 0, %[[ENTRY]] ] +; CHECK-NEXT: [[ADD18:%.*]] = phi i32 [ [[ADD]], %[[N_EXIT]] ], [ 1, %[[ENTRY]] ] +; CHECK-NEXT: store i32 [[ADD419]], ptr [[O1]], align 4 +; CHECK-NEXT: store i32 [[ADD18]], ptr [[O]], align 4 +; CHECK-NEXT: br label %[[FOR_BODY4_I:.*]] +; CHECK: [[FOR_COND1_I:.*]]: +; CHECK-NEXT: ret i32 0 +; CHECK: [[FOR_BODY4_I]]: +; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[O1]], align 4 +; CHECK-NEXT: [[TOBOOL_NOT_I:%.*]] = icmp eq i32 [[TMP0]], 0 +; CHECK-NEXT: br i1 [[TOBOOL_NOT_I]], label %[[FOR_COND1_I]], label %[[N_EXIT]] +; CHECK: [[N_EXIT]]: +; CHECK-NEXT: [[SUB:%.*]] = or i32 [[B_021_I]], [[ADD21]] +; CHECK-NEXT: [[ADD]] = or i32 [[SUB]], 1 +; CHECK-NEXT: [[ADD2:%.*]] = or i32 [[B_021_I]], 1 +; CHECK-NEXT: [[ADD3:%.*]] = add i32 [[ADD2]], [[SUB623]] +; CHECK-NEXT: [[ADD4]] = or i32 [[ADD3]], 1 +; CHECK-NEXT: [[MUL:%.*]] = shl i32 [[B_021_I]], 1 +; CHECK-NEXT: [[SUB6]] = or i32 [[MUL]], 1 +; CHECK-NEXT: br label %[[WHILE_BODY]] +; +entry: + %o1 = alloca [3 x i32], align 4 + br label %while.body + +while.body: ; preds = %n.exit, %entry + %sub623 = phi i32 [ %sub6, %n.exit ], [ 0, %entry ] + %add21 = phi i32 [ %add, %n.exit ], [ 0, %entry ] + %add419 = phi i32 [ %add4, %n.exit ], [ 0, %entry ] + %add18 = phi i32 [ %add, %n.exit ], [ 1, %entry ] + store i32 %add419, ptr %o1, align 4 + store i32 %add18, ptr %o, align 4 + br label %for.body4.i + +for.cond1.i: ; preds = %for.body4.i + ret i32 0 + +for.body4.i: ; preds = %while.body + %0 = load i32, ptr %o1, align 4 + %tobool.not.i = icmp eq i32 %0, 0 + br i1 %tobool.not.i, label %for.cond1.i, label %n.exit + +n.exit: ; preds = %for.body4.i + %sub = or i32 %b.021.i, %add21 + %add = or i32 %sub, 1 + %add2 = or i32 %b.021.i, 1 + %add3 = add i32 %add2, %sub623 + %add4 = or i32 %add3, 1 + %mul = shl i32 %b.021.i, 1 + %sub6 = or i32 %mul, 1 + br label %while.body +} From c61fb5ca69a7b678eba2c96a399b2597ddcf091c Mon Sep 17 00:00:00 2001 From: Augusto Noronha Date: Fri, 15 Aug 2025 12:40:24 -0700 Subject: [PATCH 011/214] [NFC][lldb] Make C++ symbols in CPlusPlusLanguageTest.cpp valid (#153857) --- lldb/unittests/Language/CPlusPlus/CPlusPlusLanguageTest.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/lldb/unittests/Language/CPlusPlus/CPlusPlusLanguageTest.cpp b/lldb/unittests/Language/CPlusPlus/CPlusPlusLanguageTest.cpp index cc0275163b34..957fb3f60049 100644 --- a/lldb/unittests/Language/CPlusPlus/CPlusPlusLanguageTest.cpp +++ b/lldb/unittests/Language/CPlusPlus/CPlusPlusLanguageTest.cpp @@ -420,10 +420,10 @@ TEST(CPlusPlusLanguage, MatchesCxx) { EXPECT_TRUE(CPlusPlusLang != nullptr); - Mangled itaniumSymbol("_ZFoo"); + Mangled itaniumSymbol("_Z3Foo"); EXPECT_TRUE(CPlusPlusLang->SymbolNameFitsToLanguage(itaniumSymbol)); - Mangled itaniumExtensionSymbol("___ZBar"); + Mangled itaniumExtensionSymbol("___Z3Bar_block_invoke"); EXPECT_TRUE(CPlusPlusLang->SymbolNameFitsToLanguage(itaniumExtensionSymbol)); - Mangled msvcSymbol("?Baz"); + Mangled msvcSymbol("??x@@3AH"); EXPECT_TRUE(CPlusPlusLang->SymbolNameFitsToLanguage(msvcSymbol)); } From ca8ee49c1fa34d12f5c068e9b36f4a72629a8ecf Mon Sep 17 00:00:00 2001 From: Aiden Grossman Date: Fri, 15 Aug 2025 12:40:32 -0700 Subject: [PATCH 012/214] [MLIR] Set LLVM_LIT_ARGS in Standalone Example CMake (#152423) Setting LLVM_LIT_ARGS to include --quiet and then running check-mlir in a standard checkout will otherwise cause test failures here because LLVM_LIT_ARGS gets propagated into this project. --- mlir/examples/standalone/CMakeLists.txt | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/mlir/examples/standalone/CMakeLists.txt b/mlir/examples/standalone/CMakeLists.txt index 038242ba1437..42b487fe2d40 100644 --- a/mlir/examples/standalone/CMakeLists.txt +++ b/mlir/examples/standalone/CMakeLists.txt @@ -8,6 +8,10 @@ set(CMAKE_CXX_STANDARD 17 CACHE STRING "C++ standard to conform to") if(CMAKE_SOURCE_DIR STREQUAL CMAKE_CURRENT_SOURCE_DIR) find_package(MLIR REQUIRED CONFIG) + + # Define the default argument to use by `lit` when testing. + set(LLVM_LIT_ARGS "-sv" CACHE STRING "Default options for lit") + message(STATUS "Using MLIRConfig.cmake in: ${MLIR_DIR}") message(STATUS "Using LLVMConfig.cmake in: ${LLVM_DIR}") From c84a43ff3bc9dd41f66ceefabe6e7abdf51c1268 Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Fri, 15 Aug 2025 12:55:18 -0700 Subject: [PATCH 013/214] [RISCV] Fold (sext_inreg (xor (setcc), -1), i1) -> (add (setcc), -1). (#153855) This improves all 3 vendor extensions that make sext_inreg i1 legal Fixes #153781. --- llvm/lib/Target/RISCV/RISCVISelLowering.cpp | 7 ++++ llvm/test/CodeGen/RISCV/rv32xandesperf.ll | 13 +++++++ llvm/test/CodeGen/RISCV/rv32xtheadbb.ll | 20 +++++++++++ llvm/test/CodeGen/RISCV/rv64xandesperf.ll | 26 ++++++++++++++ llvm/test/CodeGen/RISCV/rv64xtheadbb.ll | 40 +++++++++++++++++++++ llvm/test/CodeGen/RISCV/xqcibm-extract.ll | 27 ++++++++++++++ 6 files changed, 133 insertions(+) diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp index c5a706ae2b76..fbf02cad514b 100644 --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -16660,6 +16660,13 @@ performSIGN_EXTEND_INREGCombine(SDNode *N, SelectionDAG &DAG, return DAG.getNode(RISCVISD::SLLW, SDLoc(N), VT, Src.getOperand(0), Src.getOperand(1)); + // Fold (sext_inreg (xor (setcc), -1), i1) -> (add (setcc), -1) + if (Opc == ISD::XOR && SrcVT == MVT::i1 && + isAllOnesConstant(Src.getOperand(1)) && + Src.getOperand(0).getOpcode() == ISD::SETCC) + return DAG.getNode(ISD::ADD, SDLoc(N), VT, Src.getOperand(0), + DAG.getAllOnesConstant(SDLoc(N), VT)); + return SDValue(); } diff --git a/llvm/test/CodeGen/RISCV/rv32xandesperf.ll b/llvm/test/CodeGen/RISCV/rv32xandesperf.ll index 5cabb8c53e26..6f1d168358e2 100644 --- a/llvm/test/CodeGen/RISCV/rv32xandesperf.ll +++ b/llvm/test/CodeGen/RISCV/rv32xandesperf.ll @@ -364,6 +364,19 @@ define i32 @sexti1_i32_2(i1 %a) { ret i32 %1 } +; Make sure we don't use not+nds.bfos +define zeroext i8 @sexti1_i32_setcc(i32 signext %a) { +; CHECK-LABEL: sexti1_i32_setcc: +; CHECK: # %bb.0: +; CHECK-NEXT: srli a0, a0, 31 +; CHECK-NEXT: addi a0, a0, -1 +; CHECK-NEXT: zext.b a0, a0 +; CHECK-NEXT: ret + %icmp = icmp sgt i32 %a, -1 + %sext = sext i1 %icmp to i8 + ret i8 %sext +} + define i32 @sexti8_i32(i32 %a) { ; CHECK-LABEL: sexti8_i32: ; CHECK: # %bb.0: diff --git a/llvm/test/CodeGen/RISCV/rv32xtheadbb.ll b/llvm/test/CodeGen/RISCV/rv32xtheadbb.ll index 723437a610ff..784f08ca616c 100644 --- a/llvm/test/CodeGen/RISCV/rv32xtheadbb.ll +++ b/llvm/test/CodeGen/RISCV/rv32xtheadbb.ll @@ -314,6 +314,26 @@ define i32 @sexti1_i32_2(i1 %a) nounwind { ret i32 %sext } +; Make sure we don't use not+th.ext +define zeroext i8 @sexti1_i32_setcc(i32 signext %a) { +; RV32I-LABEL: sexti1_i32_setcc: +; RV32I: # %bb.0: +; RV32I-NEXT: srli a0, a0, 31 +; RV32I-NEXT: addi a0, a0, -1 +; RV32I-NEXT: zext.b a0, a0 +; RV32I-NEXT: ret +; +; RV32XTHEADBB-LABEL: sexti1_i32_setcc: +; RV32XTHEADBB: # %bb.0: +; RV32XTHEADBB-NEXT: srli a0, a0, 31 +; RV32XTHEADBB-NEXT: addi a0, a0, -1 +; RV32XTHEADBB-NEXT: zext.b a0, a0 +; RV32XTHEADBB-NEXT: ret + %icmp = icmp sgt i32 %a, -1 + %sext = sext i1 %icmp to i8 + ret i8 %sext +} + define i32 @sextb_i32(i32 %a) nounwind { ; RV32I-LABEL: sextb_i32: ; RV32I: # %bb.0: diff --git a/llvm/test/CodeGen/RISCV/rv64xandesperf.ll b/llvm/test/CodeGen/RISCV/rv64xandesperf.ll index 98cda4266516..406e5247ae0d 100644 --- a/llvm/test/CodeGen/RISCV/rv64xandesperf.ll +++ b/llvm/test/CodeGen/RISCV/rv64xandesperf.ll @@ -277,6 +277,19 @@ define signext i32 @sexti1_i32_2(i1 %a) { ret i32 %1 } +; Make sure we don't use not+nds.bfos +define zeroext i8 @sexti1_i32_setcc(i32 signext %a) { +; CHECK-LABEL: sexti1_i32_setcc: +; CHECK: # %bb.0: +; CHECK-NEXT: srli a0, a0, 63 +; CHECK-NEXT: addi a0, a0, -1 +; CHECK-NEXT: zext.b a0, a0 +; CHECK-NEXT: ret + %icmp = icmp sgt i32 %a, -1 + %sext = sext i1 %icmp to i8 + ret i8 %sext +} + define signext i32 @sexti8_i32(i32 signext %a) { ; CHECK-LABEL: sexti8_i32: ; CHECK: # %bb.0: @@ -334,6 +347,19 @@ define i64 @sexti1_i64_2(i1 %a) { ret i64 %1 } +; Make sure we don't use not+nds.bfos +define zeroext i8 @sexti1_i64_setcc(i64 %a) { +; CHECK-LABEL: sexti1_i64_setcc: +; CHECK: # %bb.0: +; CHECK-NEXT: srli a0, a0, 63 +; CHECK-NEXT: addi a0, a0, -1 +; CHECK-NEXT: zext.b a0, a0 +; CHECK-NEXT: ret + %icmp = icmp sgt i64 %a, -1 + %sext = sext i1 %icmp to i8 + ret i8 %sext +} + define i64 @sexti8_i64(i64 %a) { ; CHECK-LABEL: sexti8_i64: ; CHECK: # %bb.0: diff --git a/llvm/test/CodeGen/RISCV/rv64xtheadbb.ll b/llvm/test/CodeGen/RISCV/rv64xtheadbb.ll index 81acb4f72413..c7902342f7f0 100644 --- a/llvm/test/CodeGen/RISCV/rv64xtheadbb.ll +++ b/llvm/test/CodeGen/RISCV/rv64xtheadbb.ll @@ -635,6 +635,26 @@ define signext i32 @sexti1_i32_2(i1 %a) nounwind { ret i32 %sext } +; Make sure we don't use not+th.ext +define zeroext i8 @sexti1_i32_setcc(i32 signext %a) { +; RV64I-LABEL: sexti1_i32_setcc: +; RV64I: # %bb.0: +; RV64I-NEXT: srli a0, a0, 63 +; RV64I-NEXT: addi a0, a0, -1 +; RV64I-NEXT: zext.b a0, a0 +; RV64I-NEXT: ret +; +; RV64XTHEADBB-LABEL: sexti1_i32_setcc: +; RV64XTHEADBB: # %bb.0: +; RV64XTHEADBB-NEXT: srli a0, a0, 63 +; RV64XTHEADBB-NEXT: addi a0, a0, -1 +; RV64XTHEADBB-NEXT: zext.b a0, a0 +; RV64XTHEADBB-NEXT: ret + %icmp = icmp sgt i32 %a, -1 + %sext = sext i1 %icmp to i8 + ret i8 %sext +} + define i64 @sexti1_i64(i64 %a) nounwind { ; RV64I-LABEL: sexti1_i64: ; RV64I: # %bb.0: @@ -666,6 +686,26 @@ define i64 @sexti1_i64_2(i1 %a) nounwind { ret i64 %sext } +; Make sure we don't use not+th.ext +define zeroext i8 @sexti1_i64_setcc(i64 %a) { +; RV64I-LABEL: sexti1_i64_setcc: +; RV64I: # %bb.0: +; RV64I-NEXT: srli a0, a0, 63 +; RV64I-NEXT: addi a0, a0, -1 +; RV64I-NEXT: zext.b a0, a0 +; RV64I-NEXT: ret +; +; RV64XTHEADBB-LABEL: sexti1_i64_setcc: +; RV64XTHEADBB: # %bb.0: +; RV64XTHEADBB-NEXT: srli a0, a0, 63 +; RV64XTHEADBB-NEXT: addi a0, a0, -1 +; RV64XTHEADBB-NEXT: zext.b a0, a0 +; RV64XTHEADBB-NEXT: ret + %icmp = icmp sgt i64 %a, -1 + %sext = sext i1 %icmp to i8 + ret i8 %sext +} + define signext i32 @sextb_i32(i32 signext %a) nounwind { ; RV64I-LABEL: sextb_i32: ; RV64I: # %bb.0: diff --git a/llvm/test/CodeGen/RISCV/xqcibm-extract.ll b/llvm/test/CodeGen/RISCV/xqcibm-extract.ll index 481bfdd66643..fc3d8fe54602 100644 --- a/llvm/test/CodeGen/RISCV/xqcibm-extract.ll +++ b/llvm/test/CodeGen/RISCV/xqcibm-extract.ll @@ -47,6 +47,33 @@ define i32 @sexti1_i32_2(i32 %a) { ret i32 %shr } +; Make sure we don't use not+qc.ext +define zeroext i8 @sexti1_i32_setcc(i32 signext %a) { +; RV32I-LABEL: sexti1_i32_setcc: +; RV32I: # %bb.0: +; RV32I-NEXT: srli a0, a0, 31 +; RV32I-NEXT: addi a0, a0, -1 +; RV32I-NEXT: zext.b a0, a0 +; RV32I-NEXT: ret +; +; RV32XQCIBM-LABEL: sexti1_i32_setcc: +; RV32XQCIBM: # %bb.0: +; RV32XQCIBM-NEXT: srli a0, a0, 31 +; RV32XQCIBM-NEXT: addi a0, a0, -1 +; RV32XQCIBM-NEXT: qc.extu a0, a0, 8, 0 +; RV32XQCIBM-NEXT: ret +; +; RV32XQCIBMZBB-LABEL: sexti1_i32_setcc: +; RV32XQCIBMZBB: # %bb.0: +; RV32XQCIBMZBB-NEXT: srli a0, a0, 31 +; RV32XQCIBMZBB-NEXT: addi a0, a0, -1 +; RV32XQCIBMZBB-NEXT: qc.extu a0, a0, 8, 0 +; RV32XQCIBMZBB-NEXT: ret + %icmp = icmp sgt i32 %a, -1 + %sext = sext i1 %icmp to i8 + ret i8 %sext +} + define i32 @sexti8_i32(i8 %a) nounwind { ; RV32I-LABEL: sexti8_i32: From 95d43625214ae5b2c56c0f548ed3c28bdec1c9a9 Mon Sep 17 00:00:00 2001 From: Slava Zakharin Date: Fri, 15 Aug 2025 13:15:35 -0700 Subject: [PATCH 014/214] [flang] Added hlfir.eoshift operation definition. (#153105) This is a basic definition of the operation corresponding to the Fortran's EOSHIFT transformational intrinsic. --- .../flang/Optimizer/HLFIR/HLFIROpBase.td | 6 +- .../include/flang/Optimizer/HLFIR/HLFIROps.td | 22 +++ flang/lib/Optimizer/HLFIR/IR/HLFIROps.cpp | 139 ++++++++++++------ flang/test/HLFIR/invalid.fir | 93 ++++++++++++ 4 files changed, 214 insertions(+), 46 deletions(-) diff --git a/flang/include/flang/Optimizer/HLFIR/HLFIROpBase.td b/flang/include/flang/Optimizer/HLFIR/HLFIROpBase.td index ee0b5aa9760b..0bddfd85d436 100644 --- a/flang/include/flang/Optimizer/HLFIR/HLFIROpBase.td +++ b/flang/include/flang/Optimizer/HLFIR/HLFIROpBase.td @@ -95,9 +95,9 @@ def IsFortranValuePred : CPred<"::hlfir::isFortranValueType($_self)">; def AnyFortranValue : TypeConstraint; - -def AnyFortranEntity : TypeConstraint, "any Fortran value or variable type">; +def AnyFortranEntity + : Type, + "any Fortran value or variable type">; def IsFortranScalarCharacterPred : CPred<"::hlfir::isFortranScalarCharacterType($_self)">; diff --git a/flang/include/flang/Optimizer/HLFIR/HLFIROps.td b/flang/include/flang/Optimizer/HLFIR/HLFIROps.td index 2f5da720fbe1..db3fb0b90464 100644 --- a/flang/include/flang/Optimizer/HLFIR/HLFIROps.td +++ b/flang/include/flang/Optimizer/HLFIR/HLFIROps.td @@ -721,6 +721,28 @@ def hlfir_CShiftOp let hasVerifier = 1; } +def hlfir_EOShiftOp + : hlfir_Op< + "eoshift", [AttrSizedOperandSegments, + DeclareOpInterfaceMethods]> { + let summary = "EOSHIFT transformational intrinsic"; + let description = [{ + End-off shift of an array + }]; + + let arguments = (ins AnyFortranArrayObject:$array, + AnyFortranIntegerScalarOrArrayObject:$shift, + Optional:$boundary, Optional:$dim); + + let results = (outs hlfir_ExprType); + + let assemblyFormat = [{ + $array $shift (`boundary` $boundary^)? (`dim` $dim^)? attr-dict `:` functional-type(operands, results) + }]; + + let hasVerifier = 1; +} + def hlfir_ReshapeOp : hlfir_Op< "reshape", [AttrSizedOperandSegments, diff --git a/flang/lib/Optimizer/HLFIR/IR/HLFIROps.cpp b/flang/lib/Optimizer/HLFIR/IR/HLFIROps.cpp index ed102db69dae..93ee94a120aa 100644 --- a/flang/lib/Optimizer/HLFIR/IR/HLFIROps.cpp +++ b/flang/lib/Optimizer/HLFIR/IR/HLFIROps.cpp @@ -1440,44 +1440,46 @@ void hlfir::MatmulTransposeOp::getEffects( } //===----------------------------------------------------------------------===// -// CShiftOp +// Array shifts: CShiftOp/EOShiftOp //===----------------------------------------------------------------------===// -llvm::LogicalResult hlfir::CShiftOp::verify() { - mlir::Value array = getArray(); +template +static llvm::LogicalResult verifyArrayShift(Op op) { + mlir::Value array = op.getArray(); fir::SequenceType arrayTy = mlir::cast( hlfir::getFortranElementOrSequenceType(array.getType())); llvm::ArrayRef inShape = arrayTy.getShape(); std::size_t arrayRank = inShape.size(); mlir::Type eleTy = arrayTy.getEleTy(); - hlfir::ExprType resultTy = mlir::cast(getResult().getType()); + hlfir::ExprType resultTy = + mlir::cast(op.getResult().getType()); llvm::ArrayRef resultShape = resultTy.getShape(); std::size_t resultRank = resultShape.size(); mlir::Type resultEleTy = resultTy.getEleTy(); - mlir::Value shift = getShift(); + mlir::Value shift = op.getShift(); mlir::Type shiftTy = hlfir::getFortranElementOrSequenceType(shift.getType()); - // TODO: turn allowCharacterLenMismatch into true. - if (auto match = areMatchingTypes(*this, eleTy, resultEleTy, - /*allowCharacterLenMismatch=*/false); + if (auto match = areMatchingTypes( + op, eleTy, resultEleTy, + /*allowCharacterLenMismatch=*/!useStrictIntrinsicVerifier); match.failed()) - return emitOpError( + return op.emitOpError( "input and output arrays should have the same element type"); if (arrayRank != resultRank) - return emitOpError("input and output arrays should have the same rank"); + return op.emitOpError("input and output arrays should have the same rank"); constexpr int64_t unknownExtent = fir::SequenceType::getUnknownExtent(); for (auto [inDim, resultDim] : llvm::zip(inShape, resultShape)) if (inDim != unknownExtent && resultDim != unknownExtent && inDim != resultDim) - return emitOpError( + return op.emitOpError( "output array's shape conflicts with the input array's shape"); int64_t dimVal = -1; - if (!getDim()) + if (!op.getDim()) dimVal = 1; - else if (auto dim = fir::getIntIfConstant(getDim())) + else if (auto dim = fir::getIntIfConstant(op.getDim())) dimVal = *dim; // The DIM argument may be statically invalid (e.g. exceed the @@ -1485,44 +1487,79 @@ llvm::LogicalResult hlfir::CShiftOp::verify() { // so avoid some checks unless useStrictIntrinsicVerifier is true. if (useStrictIntrinsicVerifier && dimVal != -1) { if (dimVal < 1) - return emitOpError("DIM must be >= 1"); + return op.emitOpError("DIM must be >= 1"); if (dimVal > static_cast(arrayRank)) - return emitOpError("DIM must be <= input array's rank"); + return op.emitOpError("DIM must be <= input array's rank"); } - if (auto shiftSeqTy = mlir::dyn_cast(shiftTy)) { - // SHIFT is an array. Verify the rank and the shape (if DIM is constant). - llvm::ArrayRef shiftShape = shiftSeqTy.getShape(); - std::size_t shiftRank = shiftShape.size(); - if (shiftRank != arrayRank - 1) - return emitOpError( - "SHIFT's rank must be 1 less than the input array's rank"); + // A helper lambda to verify the shape of the array types of + // certain operands of the array shift (e.g. the SHIFT and BOUNDARY operands). + auto verifyOperandTypeShape = [&](mlir::Type type, + llvm::Twine name) -> llvm::LogicalResult { + if (auto opndSeqTy = mlir::dyn_cast(type)) { + // The operand is an array. Verify the rank and the shape (if DIM is + // constant). + llvm::ArrayRef opndShape = opndSeqTy.getShape(); + std::size_t opndRank = opndShape.size(); + if (opndRank != arrayRank - 1) + return op.emitOpError( + name + "'s rank must be 1 less than the input array's rank"); - if (useStrictIntrinsicVerifier && dimVal != -1) { - // SHIFT's shape must be [d(1), d(2), ..., d(DIM-1), d(DIM+1), ..., d(n)], - // where [d(1), d(2), ..., d(n)] is the shape of the ARRAY. - int64_t arrayDimIdx = 0; - int64_t shiftDimIdx = 0; - for (auto shiftDim : shiftShape) { - if (arrayDimIdx == dimVal - 1) + if (useStrictIntrinsicVerifier && dimVal != -1) { + // The operand's shape must be + // [d(1), d(2), ..., d(DIM-1), d(DIM+1), ..., d(n)], + // where [d(1), d(2), ..., d(n)] is the shape of the ARRAY. + int64_t arrayDimIdx = 0; + int64_t opndDimIdx = 0; + for (auto opndDim : opndShape) { + if (arrayDimIdx == dimVal - 1) + ++arrayDimIdx; + + if (inShape[arrayDimIdx] != unknownExtent && + opndDim != unknownExtent && inShape[arrayDimIdx] != opndDim) + return op.emitOpError("SHAPE(ARRAY)(" + + llvm::Twine(arrayDimIdx + 1) + + ") must be equal to SHAPE(" + name + ")(" + + llvm::Twine(opndDimIdx + 1) + + "): " + llvm::Twine(inShape[arrayDimIdx]) + + " != " + llvm::Twine(opndDim)); ++arrayDimIdx; - - if (inShape[arrayDimIdx] != unknownExtent && - shiftDim != unknownExtent && inShape[arrayDimIdx] != shiftDim) - return emitOpError("SHAPE(ARRAY)(" + llvm::Twine(arrayDimIdx + 1) + - ") must be equal to SHAPE(SHIFT)(" + - llvm::Twine(shiftDimIdx + 1) + - "): " + llvm::Twine(inShape[arrayDimIdx]) + - " != " + llvm::Twine(shiftDim)); - ++arrayDimIdx; - ++shiftDimIdx; + ++opndDimIdx; + } } } + return mlir::success(); + }; + + if (failed(verifyOperandTypeShape(shiftTy, "SHIFT"))) + return mlir::failure(); + + if constexpr (std::is_same_v) { + if (mlir::Value boundary = op.getBoundary()) { + mlir::Type boundaryTy = + hlfir::getFortranElementOrSequenceType(boundary.getType()); + if (auto match = areMatchingTypes( + op, eleTy, hlfir::getFortranElementType(boundaryTy), + /*allowCharacterLenMismatch=*/!useStrictIntrinsicVerifier); + match.failed()) + return op.emitOpError( + "ARRAY and BOUNDARY operands must have the same element type"); + if (failed(verifyOperandTypeShape(boundaryTy, "BOUNDARY"))) + return mlir::failure(); + } } return mlir::success(); } +//===----------------------------------------------------------------------===// +// CShiftOp +//===----------------------------------------------------------------------===// + +llvm::LogicalResult hlfir::CShiftOp::verify() { + return verifyArrayShift(*this); +} + void hlfir::CShiftOp::getEffects( llvm::SmallVectorImpl< mlir::SideEffects::EffectInstance> @@ -1530,6 +1567,21 @@ void hlfir::CShiftOp::getEffects( getIntrinsicEffects(getOperation(), effects); } +//===----------------------------------------------------------------------===// +// EOShiftOp +//===----------------------------------------------------------------------===// + +llvm::LogicalResult hlfir::EOShiftOp::verify() { + return verifyArrayShift(*this); +} + +void hlfir::EOShiftOp::getEffects( + llvm::SmallVectorImpl< + mlir::SideEffects::EffectInstance> + &effects) { + getIntrinsicEffects(getOperation(), effects); +} + //===----------------------------------------------------------------------===// // ReshapeOp //===----------------------------------------------------------------------===// @@ -1543,7 +1595,8 @@ llvm::LogicalResult hlfir::ReshapeOp::verify() { hlfir::getFortranElementOrSequenceType(array.getType())); if (auto match = areMatchingTypes( *this, hlfir::getFortranElementType(resultType), - arrayType.getElementType(), /*allowCharacterLenMismatch=*/true); + arrayType.getElementType(), + /*allowCharacterLenMismatch=*/!useStrictIntrinsicVerifier); match.failed()) return emitOpError("ARRAY and the result must have the same element type"); if (hlfir::isPolymorphicType(resultType) != @@ -1565,9 +1618,9 @@ llvm::LogicalResult hlfir::ReshapeOp::verify() { if (mlir::Value pad = getPad()) { auto padArrayType = mlir::cast( hlfir::getFortranElementOrSequenceType(pad.getType())); - if (auto match = areMatchingTypes(*this, arrayType.getElementType(), - padArrayType.getElementType(), - /*allowCharacterLenMismatch=*/true); + if (auto match = areMatchingTypes( + *this, arrayType.getElementType(), padArrayType.getElementType(), + /*allowCharacterLenMismatch=*/!useStrictIntrinsicVerifier); match.failed()) return emitOpError("ARRAY and PAD must be of the same type"); } diff --git a/flang/test/HLFIR/invalid.fir b/flang/test/HLFIR/invalid.fir index d61efe0062e6..0f54a0250294 100644 --- a/flang/test/HLFIR/invalid.fir +++ b/flang/test/HLFIR/invalid.fir @@ -1555,3 +1555,96 @@ func.func @bad_reshape(%arg0: !hlfir.expr<1x!fir.char<1,2>>, %arg1: !hlfir.expr< %0 = hlfir.reshape %arg0 %arg1 pad %arg2 : (!hlfir.expr<1x!fir.char<1,2>>, !hlfir.expr<1xi32>, !hlfir.expr<1x!fir.char<2,?>>) -> !hlfir.expr> return } + +// ----- + +func.func @bad_eoshift1(%arg0: !hlfir.expr, %arg1: i32) { + // expected-error@+1 {{'hlfir.eoshift' op input and output arrays should have the same element type}} + %0 = hlfir.eoshift %arg0 %arg1 : (!hlfir.expr, i32) -> !hlfir.expr + return +} + +// ----- + +func.func @bad_eoshift2(%arg0: !hlfir.expr, %arg1: i32) { + // expected-error@+1 {{'hlfir.eoshift' op input and output arrays should have the same rank}} + %0 = hlfir.eoshift %arg0 %arg1 : (!hlfir.expr, i32) -> !hlfir.expr + return +} + +// ----- + +func.func @bad_eoshift3(%arg0: !hlfir.expr<2x2xi32>, %arg1: i32) { + // expected-error@+1 {{'hlfir.eoshift' op output array's shape conflicts with the input array's shape}} + %0 = hlfir.eoshift %arg0 %arg1 : (!hlfir.expr<2x2xi32>, i32) -> !hlfir.expr<2x3xi32> + return +} + +// ----- + +func.func @bad_eoshift4(%arg0: !hlfir.expr<2x2xi32>, %arg1: i32) { + %c0 = arith.constant 0 : index + // expected-error@+1 {{'hlfir.eoshift' op DIM must be >= 1}} + %0 = hlfir.eoshift %arg0 %arg1 dim %c0 : (!hlfir.expr<2x2xi32>, i32, index) -> !hlfir.expr<2x2xi32> + return +} + +// ----- + +func.func @bad_eoshift5(%arg0: !hlfir.expr<2x2xi32>, %arg1: i32) { + %c10 = arith.constant 10 : index + // expected-error@+1 {{'hlfir.eoshift' op DIM must be <= input array's rank}} + %0 = hlfir.eoshift %arg0 %arg1 dim %c10 : (!hlfir.expr<2x2xi32>, i32, index) -> !hlfir.expr<2x2xi32> + return +} + +// ----- + +func.func @bad_eoshift6(%arg0: !hlfir.expr<2x2xi32>, %arg1: !hlfir.expr<2x2xi32>) { + // expected-error@+1 {{'hlfir.eoshift' op SHIFT's rank must be 1 less than the input array's rank}} + %0 = hlfir.eoshift %arg0 %arg1 : (!hlfir.expr<2x2xi32>, !hlfir.expr<2x2xi32>) -> !hlfir.expr<2x2xi32> + return +} + +// ----- + +func.func @bad_eoshift7(%arg0: !hlfir.expr, %arg1: !hlfir.expr<3xi32>) { + %c1 = arith.constant 1 : index + // expected-error@+1 {{'hlfir.eoshift' op SHAPE(ARRAY)(2) must be equal to SHAPE(SHIFT)(1): 2 != 3}} + %0 = hlfir.eoshift %arg0 %arg1 dim %c1 : (!hlfir.expr, !hlfir.expr<3xi32>, index) -> !hlfir.expr<2x2xi32> + return +} + +// ----- + +func.func @bad_eoshift8(%arg0: !hlfir.expr>, %arg1: i32) { + // expected-error@+2 {{'hlfir.eoshift' op character KIND mismatch}} + // expected-error@+1 {{'hlfir.eoshift' op input and output arrays should have the same element type}} + %0 = hlfir.eoshift %arg0 %arg1 : (!hlfir.expr>, i32) -> !hlfir.expr> + return +} + +// ----- + +func.func @bad_eoshift9(%arg0: !hlfir.expr>, %arg1: i32) { + // expected-error@+2 {{'hlfir.eoshift' op character LEN mismatch}} + // expected-error@+1 {{'hlfir.eoshift' op input and output arrays should have the same element type}} + %0 = hlfir.eoshift %arg0 %arg1 : (!hlfir.expr>, i32) -> !hlfir.expr> + return +} + +// ----- + +func.func @bad_eoshift10(%arg0: !hlfir.expr<2x2xi32>, %arg1: i32, %arg2: f32) { + // expected-error@+1 {{'hlfir.eoshift' op ARRAY and BOUNDARY operands must have the same element type}} + %0 = hlfir.eoshift %arg0 %arg1 boundary %arg2 : (!hlfir.expr<2x2xi32>, i32, f32) -> !hlfir.expr<2x2xi32> + return +} + +// ----- + +func.func @bad_eoshift11(%arg0: !hlfir.expr<2x2xi32>, %arg1: i32, %arg2: !hlfir.expr<2x2xi32>) { + // expected-error@+1 {{'hlfir.eoshift' op BOUNDARY's rank must be 1 less than the input array's rank}} + %0 = hlfir.eoshift %arg0 %arg1 boundary %arg2 : (!hlfir.expr<2x2xi32>, i32, !hlfir.expr<2x2xi32>) -> !hlfir.expr<2x2xi32> + return +} From 5b0619e79b65cbd3c5ad0fc0916d4ba59881b090 Mon Sep 17 00:00:00 2001 From: Sterling-Augustine Date: Fri, 15 Aug 2025 13:16:34 -0700 Subject: [PATCH 015/214] Move function info word into its own data structure (#153627) The sframe generator needs to construct this word separately from FDEs themselves, so split them into a separate data structure. --- llvm/include/llvm/BinaryFormat/SFrame.h | 18 ++++++---- llvm/lib/Object/SFrameParser.cpp | 4 +-- llvm/tools/llvm-readobj/ELFDumper.cpp | 21 +++++++----- llvm/unittests/BinaryFormat/SFrameTest.cpp | 38 +++++++++++----------- 4 files changed, 44 insertions(+), 37 deletions(-) diff --git a/llvm/include/llvm/BinaryFormat/SFrame.h b/llvm/include/llvm/BinaryFormat/SFrame.h index 74e47ea8acca..095db18b9c25 100644 --- a/llvm/include/llvm/BinaryFormat/SFrame.h +++ b/llvm/include/llvm/BinaryFormat/SFrame.h @@ -104,14 +104,8 @@ template struct Header { detail::packed FREOff; }; -template struct FuncDescEntry { - detail::packed StartAddress; - detail::packed Size; - detail::packed StartFREOff; - detail::packed NumFREs; +template struct FDEInfo { detail::packed Info; - detail::packed RepSize; - detail::packed Padding2; uint8_t getPAuthKey() const { return (Info >> 5) & 1; } FDEType getFDEType() const { return static_cast((Info >> 4) & 1); } @@ -125,6 +119,16 @@ template struct FuncDescEntry { } }; +template struct FuncDescEntry { + detail::packed StartAddress; + detail::packed Size; + detail::packed StartFREOff; + detail::packed NumFREs; + FDEInfo Info; + detail::packed RepSize; + detail::packed Padding2; +}; + template struct FREInfo { detail::packed Info; diff --git a/llvm/lib/Object/SFrameParser.cpp b/llvm/lib/Object/SFrameParser.cpp index 0c5638d776ef..759b579230d9 100644 --- a/llvm/lib/Object/SFrameParser.cpp +++ b/llvm/lib/Object/SFrameParser.cpp @@ -176,10 +176,10 @@ iterator_range::fre_iterator> SFrameParser::fres(const sframe::FuncDescEntry &FDE, Error &Err) const { uint64_t Offset = getFREBase() + FDE.StartFREOff; fre_iterator BeforeBegin = make_fallible_itr( - FallibleFREIterator(Data, FDE.getFREType(), -1, FDE.NumFREs, Offset), + FallibleFREIterator(Data, FDE.Info.getFREType(), -1, FDE.NumFREs, Offset), Err); fre_iterator End = make_fallible_end( - FallibleFREIterator(Data, FDE.getFREType(), FDE.NumFREs, FDE.NumFREs, + FallibleFREIterator(Data, FDE.Info.getFREType(), FDE.NumFREs, FDE.NumFREs, /*Offset=*/0)); return {++BeforeBegin, End}; } diff --git a/llvm/tools/llvm-readobj/ELFDumper.cpp b/llvm/tools/llvm-readobj/ELFDumper.cpp index c1a2c38ea9b7..ade025fd3d89 100644 --- a/llvm/tools/llvm-readobj/ELFDumper.cpp +++ b/llvm/tools/llvm-readobj/ELFDumper.cpp @@ -6512,12 +6512,13 @@ void ELFDumper::printSFrameFDEs( { DictScope InfoScope(W, "Info"); - W.printEnum("FRE Type", It->getFREType(), sframe::getFRETypes()); - W.printEnum("FDE Type", It->getFDEType(), sframe::getFDETypes()); + W.printEnum("FRE Type", It->Info.getFREType(), sframe::getFRETypes()); + W.printEnum("FDE Type", It->Info.getFDEType(), sframe::getFDETypes()); switch (Parser.getHeader().ABIArch) { case sframe::ABI::AArch64EndianBig: case sframe::ABI::AArch64EndianLittle: - W.printEnum("PAuth Key", sframe::AArch64PAuthKey(It->getPAuthKey()), + W.printEnum("PAuth Key", + sframe::AArch64PAuthKey(It->Info.getPAuthKey()), sframe::getAArch64PAuthKeys()); break; case sframe::ABI::AMD64EndianLittle: @@ -6525,12 +6526,13 @@ void ELFDumper::printSFrameFDEs( break; } - W.printHex("Raw", It->Info); + W.printHex("Raw", It->Info.Info); } W.printHex( ("Repetitive block size" + - Twine(It->getFDEType() == sframe::FDEType::PCMask ? "" : " (unused)")) + Twine(It->Info.getFDEType() == sframe::FDEType::PCMask ? "" + : " (unused)")) .str(), It->RepSize); @@ -6541,10 +6543,11 @@ void ELFDumper::printSFrameFDEs( for (const typename SFrameParser::FrameRowEntry &FRE : Parser.fres(*It, Err)) { DictScope FREScope(W, "Frame Row Entry"); - W.printHex( - "Start Address", - (It->getFDEType() == sframe::FDEType::PCInc ? FDEStartAddress : 0) + - FRE.StartAddress); + W.printHex("Start Address", + (It->Info.getFDEType() == sframe::FDEType::PCInc + ? FDEStartAddress + : 0) + + FRE.StartAddress); W.printBoolean("Return Address Signed", FRE.Info.isReturnAddressSigned()); W.printEnum("Offset Size", FRE.Info.getOffsetSize(), sframe::getFREOffsets()); diff --git a/llvm/unittests/BinaryFormat/SFrameTest.cpp b/llvm/unittests/BinaryFormat/SFrameTest.cpp index 394e382e041e..ab7b0fe20b75 100644 --- a/llvm/unittests/BinaryFormat/SFrameTest.cpp +++ b/llvm/unittests/BinaryFormat/SFrameTest.cpp @@ -54,28 +54,28 @@ TYPED_TEST_SUITE(SFrameTest, Types, NameGenerator); TYPED_TEST(SFrameTest, FDEFlags) { FuncDescEntry FDE = {}; - EXPECT_EQ(FDE.Info, 0u); - EXPECT_EQ(FDE.getPAuthKey(), 0); - EXPECT_EQ(FDE.getFDEType(), FDEType::PCInc); - EXPECT_EQ(FDE.getFREType(), FREType::Addr1); + EXPECT_EQ(FDE.Info.Info, 0u); + EXPECT_EQ(FDE.Info.getPAuthKey(), 0); + EXPECT_EQ(FDE.Info.getFDEType(), FDEType::PCInc); + EXPECT_EQ(FDE.Info.getFREType(), FREType::Addr1); - FDE.setPAuthKey(1); - EXPECT_EQ(FDE.Info, 0x20u); - EXPECT_EQ(FDE.getPAuthKey(), 1); - EXPECT_EQ(FDE.getFDEType(), FDEType::PCInc); - EXPECT_EQ(FDE.getFREType(), FREType::Addr1); + FDE.Info.setPAuthKey(1); + EXPECT_EQ(FDE.Info.Info, 0x20u); + EXPECT_EQ(FDE.Info.getPAuthKey(), 1); + EXPECT_EQ(FDE.Info.getFDEType(), FDEType::PCInc); + EXPECT_EQ(FDE.Info.getFREType(), FREType::Addr1); - FDE.setFDEType(FDEType::PCMask); - EXPECT_EQ(FDE.Info, 0x30u); - EXPECT_EQ(FDE.getPAuthKey(), 1); - EXPECT_EQ(FDE.getFDEType(), FDEType::PCMask); - EXPECT_EQ(FDE.getFREType(), FREType::Addr1); + FDE.Info.setFDEType(FDEType::PCMask); + EXPECT_EQ(FDE.Info.Info, 0x30u); + EXPECT_EQ(FDE.Info.getPAuthKey(), 1); + EXPECT_EQ(FDE.Info.getFDEType(), FDEType::PCMask); + EXPECT_EQ(FDE.Info.getFREType(), FREType::Addr1); - FDE.setFREType(FREType::Addr4); - EXPECT_EQ(FDE.Info, 0x32u); - EXPECT_EQ(FDE.getPAuthKey(), 1); - EXPECT_EQ(FDE.getFDEType(), FDEType::PCMask); - EXPECT_EQ(FDE.getFREType(), FREType::Addr4); + FDE.Info.setFREType(FREType::Addr4); + EXPECT_EQ(FDE.Info.Info, 0x32u); + EXPECT_EQ(FDE.Info.getPAuthKey(), 1); + EXPECT_EQ(FDE.Info.getFDEType(), FDEType::PCMask); + EXPECT_EQ(FDE.Info.getFREType(), FREType::Addr4); } TYPED_TEST(SFrameTest, FREFlags) { From 5c51a88f193a4753818b31ca186b3a1ef1a07ecf Mon Sep 17 00:00:00 2001 From: Matheus Izvekov Date: Fri, 15 Aug 2025 17:21:55 -0300 Subject: [PATCH 016/214] [clang] fix DependentNameType -> UnresolvedUsingType transforms (#153862) --- clang/lib/Sema/TreeTransform.h | 5 ++++- clang/test/SemaCXX/using-decl-templates.cpp | 8 ++++++++ 2 files changed, 12 insertions(+), 1 deletion(-) diff --git a/clang/lib/Sema/TreeTransform.h b/clang/lib/Sema/TreeTransform.h index 1863e7f97e3f..055d3cd1a860 100644 --- a/clang/lib/Sema/TreeTransform.h +++ b/clang/lib/Sema/TreeTransform.h @@ -7668,8 +7668,11 @@ QualType TreeTransform::TransformDependentNameType( } else if (isa(Result)) { TLB.push(Result).set(TL.getElaboratedKeywordLoc(), QualifierLoc, TL.getNameLoc()); + } else if (isa(Result)) { + auto NewTL = TLB.push(Result); + NewTL.set(TL.getElaboratedKeywordLoc(), QualifierLoc, TL.getNameLoc()); } else { - DependentNameTypeLoc NewTL = TLB.push(Result); + auto NewTL = TLB.push(Result); NewTL.setElaboratedKeywordLoc(TL.getElaboratedKeywordLoc()); NewTL.setQualifierLoc(QualifierLoc); NewTL.setNameLoc(TL.getNameLoc()); diff --git a/clang/test/SemaCXX/using-decl-templates.cpp b/clang/test/SemaCXX/using-decl-templates.cpp index c96c4879cb68..58b30595b148 100644 --- a/clang/test/SemaCXX/using-decl-templates.cpp +++ b/clang/test/SemaCXX/using-decl-templates.cpp @@ -153,3 +153,11 @@ T foo(T t) { // OK } } // namespace sss } // namespace func_templ + +namespace DependentName { + template struct S { + using typename T::Ty; + static Ty Val; + }; + template typename S::Ty S::Val; +} // DependentName From 334e9bf2dd01fbbfe785624c0de477b725cde6f2 Mon Sep 17 00:00:00 2001 From: gulfemsavrun Date: Fri, 15 Aug 2025 13:32:27 -0700 Subject: [PATCH 017/214] =?UTF-8?q?Revert=20"RuntimeLibcalls:=20Generate?= =?UTF-8?q?=20table=20of=20libcall=20name=20lengths=20(#153=E2=80=A6=20(#1?= =?UTF-8?q?53864)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit …210)" This reverts commit 9a14b1d254a43dc0d4445c3ffa3d393bca007ba3. Revert "RuntimeLibcalls: Return StringRef for libcall names (#153209)" This reverts commit cb1228fbd535b8f9fe78505a15292b0ba23b17de. Revert "TableGen: Emit statically generated hash table for runtime libcalls (#150192)" This reverts commit 769a9058c8d04fc920994f6a5bbb03c8a4fbcd05. Reverted three changes because of a CMake error while building llvm-nm as reported in the following PR: https://github.com/llvm/llvm-project/pull/150192#issuecomment-3192223073 --- llvm/benchmarks/CMakeLists.txt | 17 -- llvm/benchmarks/RuntimeLibcalls.cpp | 114 ---------- llvm/include/llvm/CodeGen/TargetLowering.h | 12 +- llvm/include/llvm/IR/RuntimeLibcalls.h | 64 ++---- llvm/lib/CodeGen/PreISelIntrinsicLowering.cpp | 2 +- llvm/lib/IR/RuntimeLibcalls.cpp | 59 +++-- llvm/lib/LTO/LTO.cpp | 2 +- llvm/lib/Object/IRSymtab.cpp | 47 ++-- .../WebAssemblyRuntimeLibcallSignatures.cpp | 4 +- .../Utils/DeclareRuntimeLibcalls.cpp | 2 +- llvm/test/TableGen/RuntimeLibcallEmitter.td | 39 ---- llvm/unittests/IR/CMakeLists.txt | 1 - llvm/unittests/IR/RuntimeLibcallsTest.cpp | 63 ------ .../TableGen/Basic/RuntimeLibcallsEmitter.cpp | 208 +----------------- 14 files changed, 100 insertions(+), 534 deletions(-) delete mode 100644 llvm/benchmarks/RuntimeLibcalls.cpp delete mode 100644 llvm/unittests/IR/RuntimeLibcallsTest.cpp diff --git a/llvm/benchmarks/CMakeLists.txt b/llvm/benchmarks/CMakeLists.txt index 9613678d2e0a..1078efa55f49 100644 --- a/llvm/benchmarks/CMakeLists.txt +++ b/llvm/benchmarks/CMakeLists.txt @@ -11,20 +11,3 @@ add_benchmark(FormatVariadicBM FormatVariadicBM.cpp PARTIAL_SOURCES_INTENDED) add_benchmark(GetIntrinsicInfoTableEntriesBM GetIntrinsicInfoTableEntriesBM.cpp PARTIAL_SOURCES_INTENDED) add_benchmark(SandboxIRBench SandboxIRBench.cpp PARTIAL_SOURCES_INTENDED) -# Extract the list of symbols in a random utility as sample data. -set(SYMBOL_TEST_DATA_FILE "sample_symbol_list.txt") -set(SYMBOL_TEST_DATA_SOURCE_BINARY $) - -add_custom_command(OUTPUT ${SYMBOL_TEST_DATA_FILE} - COMMAND $ --no-demangle --no-sort - --format=just-symbols - ${SYMBOL_TEST_DATA_SOURCE_BINARY} > ${SYMBOL_TEST_DATA_FILE} - DEPENDS "$" "$") - -add_custom_target(generate-runtime-libcalls-sample-symbol-list - DEPENDS ${SYMBOL_TEST_DATA_FILE}) -add_benchmark(RuntimeLibcallsBench RuntimeLibcalls.cpp PARTIAL_SOURCES_INTENDED) - -add_dependencies(RuntimeLibcallsBench generate-runtime-libcalls-sample-symbol-list) -target_compile_definitions(RuntimeLibcallsBench PRIVATE - -DSYMBOL_TEST_DATA_FILE="${CMAKE_CURRENT_BINARY_DIR}/${SYMBOL_TEST_DATA_FILE}") diff --git a/llvm/benchmarks/RuntimeLibcalls.cpp b/llvm/benchmarks/RuntimeLibcalls.cpp deleted file mode 100644 index 81a5a24ec8f9..000000000000 --- a/llvm/benchmarks/RuntimeLibcalls.cpp +++ /dev/null @@ -1,114 +0,0 @@ -//===----------------------------------------------------------------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#include "llvm/IR/RuntimeLibcalls.h" -#include "benchmark/benchmark.h" -#include "llvm/IR/DataLayout.h" -#include "llvm/Support/Error.h" -#include "llvm/Support/LineIterator.h" -#include "llvm/Support/MemoryBuffer.h" -#include "llvm/TargetParser/Triple.h" -#include -#include -using namespace llvm; - -static constexpr unsigned MaxFuncNameSize = 53; - -static std::vector getLibcallNameStringRefs() { - std::vector Names(RTLIB::NumLibcallImpls); - // Keep the strlens on the StringRef construction out of the benchmark loop. - for (RTLIB::LibcallImpl LC : RTLIB::libcall_impls()) - Names[LC] = RTLIB::RuntimeLibcallsInfo::getLibcallImplName(LC); - - return Names; -} - -static std::vector getRandomFuncNames() { - std::mt19937_64 Rng; - std::uniform_int_distribution<> StringLengthDistribution(1, MaxFuncNameSize); - std::uniform_int_distribution<> CharDistribution(1, 255); - int NumTestFuncs = 1 << 10; - std::vector TestFuncNames(NumTestFuncs); - - for (std::string &TestFuncName : TestFuncNames) { - for (int I = 0, E = StringLengthDistribution(Rng); I != E; ++I) - TestFuncName += static_cast(CharDistribution(Rng)); - } - - return TestFuncNames; -} - -static std::vector readSymbolsFromFile(StringRef InputFile) { - auto BufOrError = MemoryBuffer::getFileOrSTDIN(InputFile, /*IsText=*/true); - if (!BufOrError) { - reportFatalUsageError("failed to open \'" + Twine(InputFile) + - "\': " + BufOrError.getError().message()); - } - - // Hackily figure out if there's a prefix on the symbol names - llvm-nm - // appears to not have a flag to skip this. - llvm::Triple HostTriple(LLVM_HOST_TRIPLE); - std::string DummyDatalayout = "e"; - DummyDatalayout += DataLayout::getManglingComponent(HostTriple); - - DataLayout DL(DummyDatalayout); - char GlobalPrefix = DL.getGlobalPrefix(); - - std::vector Lines; - for (line_iterator LineIt(**BufOrError, /*SkipBlanks=*/true); - !LineIt.is_at_eof(); ++LineIt) { - StringRef SymbolName = *LineIt; - SymbolName.consume_front(StringRef(&GlobalPrefix, 1)); - - Lines.push_back(SymbolName.str()); - } - return Lines; -} - -static void BM_LookupRuntimeLibcallByNameKnownCalls(benchmark::State &State) { - std::vector Names = getLibcallNameStringRefs(); - - for (auto _ : State) { - for (StringRef Name : Names) { - benchmark::DoNotOptimize( - RTLIB::RuntimeLibcallsInfo::lookupLibcallImplName(Name).empty()); - } - } -} - -static void BM_LookupRuntimeLibcallByNameRandomCalls(benchmark::State &State) { - std::vector TestFuncNames = getRandomFuncNames(); - - for (auto _ : State) { - for (const std::string &Name : TestFuncNames) { - benchmark::DoNotOptimize( - RTLIB::RuntimeLibcallsInfo::lookupLibcallImplName(StringRef(Name)) - .empty()); - } - } -} - -// This isn't fully representative, it doesn't include any anonymous functions. -// nm -n --no-demangle --format=just-symbols sample-binary > sample.txt -static void BM_LookupRuntimeLibcallByNameSampleData(benchmark::State &State) { - std::vector TestFuncNames = - readSymbolsFromFile(SYMBOL_TEST_DATA_FILE); - for (auto _ : State) { - for (const std::string &Name : TestFuncNames) { - benchmark::DoNotOptimize( - RTLIB::RuntimeLibcallsInfo::lookupLibcallImplName(StringRef(Name)) - .empty()); - } - } -} - -BENCHMARK(BM_LookupRuntimeLibcallByNameKnownCalls); -BENCHMARK(BM_LookupRuntimeLibcallByNameRandomCalls); -BENCHMARK(BM_LookupRuntimeLibcallByNameSampleData); - -BENCHMARK_MAIN(); diff --git a/llvm/include/llvm/CodeGen/TargetLowering.h b/llvm/include/llvm/CodeGen/TargetLowering.h index 272d7dd5f45e..ec3104799bfb 100644 --- a/llvm/include/llvm/CodeGen/TargetLowering.h +++ b/llvm/include/llvm/CodeGen/TargetLowering.h @@ -3557,19 +3557,15 @@ public: /// Get the libcall routine name for the specified libcall. const char *getLibcallName(RTLIB::Libcall Call) const { - // FIXME: Return StringRef - return Libcalls.getLibcallName(Call).data(); + return Libcalls.getLibcallName(Call); } /// Get the libcall routine name for the specified libcall implementation - static StringRef getLibcallImplName(RTLIB::LibcallImpl Call) { - return RTLIB::RuntimeLibcallsInfo::getLibcallImplName(Call); + const char *getLibcallImplName(RTLIB::LibcallImpl Call) const { + return Libcalls.getLibcallImplName(Call); } - const char *getMemcpyName() const { - // FIXME: Return StringRef - return Libcalls.getMemcpyName().data(); - } + const char *getMemcpyName() const { return Libcalls.getMemcpyName(); } /// Get the comparison predicate that's to be used to test the result of the /// comparison libcall against zero. This should only be used with diff --git a/llvm/include/llvm/IR/RuntimeLibcalls.h b/llvm/include/llvm/IR/RuntimeLibcalls.h index 308be543de2b..2d1d07c5fd81 100644 --- a/llvm/include/llvm/IR/RuntimeLibcalls.h +++ b/llvm/include/llvm/IR/RuntimeLibcalls.h @@ -77,17 +77,17 @@ struct RuntimeLibcallsInfo { /// Get the libcall routine name for the specified libcall. // FIXME: This should be removed. Only LibcallImpl should have a name. - StringRef getLibcallName(RTLIB::Libcall Call) const { + const char *getLibcallName(RTLIB::Libcall Call) const { return getLibcallImplName(LibcallImpls[Call]); } /// Get the libcall routine name for the specified libcall implementation. - static StringRef getLibcallImplName(RTLIB::LibcallImpl CallImpl) { + // FIXME: Change to return StringRef + static const char *getLibcallImplName(RTLIB::LibcallImpl CallImpl) { if (CallImpl == RTLIB::Unsupported) - return StringRef(); - return StringRef(RuntimeLibcallImplNameTable.getCString( - RuntimeLibcallNameOffsetTable[CallImpl]), - RuntimeLibcallNameSizeTable[CallImpl]); + return nullptr; + return RuntimeLibcallImplNameTable[RuntimeLibcallNameOffsetTable[CallImpl]] + .data(); } /// Return the lowering's selection of implementation call for \p Call @@ -119,10 +119,9 @@ struct RuntimeLibcallsInfo { /// Return a function name compatible with RTLIB::MEMCPY, or nullptr if fully /// unsupported. - StringRef getMemcpyName() const { - RTLIB::LibcallImpl Memcpy = getLibcallImpl(RTLIB::MEMCPY); - if (Memcpy != RTLIB::Unsupported) - return getLibcallImplName(Memcpy); + const char *getMemcpyName() const { + if (const char *Memcpy = getLibcallName(RTLIB::MEMCPY)) + return Memcpy; // Fallback to memmove if memcpy isn't available. return getLibcallName(RTLIB::MEMMOVE); @@ -133,41 +132,11 @@ struct RuntimeLibcallsInfo { return ImplToLibcall[Impl]; } - /// Check if a function name is a recognized runtime call of any kind. This - /// does not consider if this call is available for any current compilation, - /// just that it is a known call somewhere. This returns the set of all - /// LibcallImpls which match the name; multiple implementations with the same - /// name may exist but differ in interpretation based on the target context. - /// - /// Generated by tablegen. - LLVM_ABI static inline iota_range - lookupLibcallImplName(StringRef Name){ - // Inlining the early exit on the string name appears to be worthwhile when - // querying a real set of symbols -#define GET_LOOKUP_LIBCALL_IMPL_NAME_BODY -#include "llvm/IR/RuntimeLibcalls.inc" -#undef GET_LOOKUP_LIBCALL_IMPL_NAME_BODY - } - /// Check if this is valid libcall for the current module, otherwise /// RTLIB::Unsupported. - LLVM_ABI RTLIB::LibcallImpl - getSupportedLibcallImpl(StringRef FuncName) const { - for (RTLIB::LibcallImpl Impl : lookupLibcallImplName(FuncName)) { - // FIXME: This should not depend on looking up ImplToLibcall, only the - // list of libcalls for the module. - RTLIB::LibcallImpl Recognized = LibcallImpls[ImplToLibcall[Impl]]; - if (Recognized != RTLIB::Unsupported) - return Recognized; - } - - return RTLIB::Unsupported; - } + LLVM_ABI RTLIB::LibcallImpl getSupportedLibcallImpl(StringRef FuncName) const; private: - LLVM_ABI static iota_range - lookupLibcallImplNameImpl(StringRef Name); - /// Stores the implementation choice for each each libcall. RTLIB::LibcallImpl LibcallImpls[RTLIB::UNKNOWN_LIBCALL + 1] = { RTLIB::Unsupported}; @@ -184,16 +153,17 @@ private: LLVM_ABI static const char RuntimeLibcallImplNameTableStorage[]; LLVM_ABI static const StringTable RuntimeLibcallImplNameTable; LLVM_ABI static const uint16_t RuntimeLibcallNameOffsetTable[]; - LLVM_ABI static const uint8_t RuntimeLibcallNameSizeTable[]; /// Map from a concrete LibcallImpl implementation to its RTLIB::Libcall kind. LLVM_ABI static const RTLIB::Libcall ImplToLibcall[RTLIB::NumLibcallImpls]; - /// Utility function for tablegenerated lookup function. Return a range of - /// enum values that apply for the function name at \p NameOffsetEntry with - /// the value \p StrOffset. - static inline iota_range - libcallImplNameHit(uint16_t NameOffsetEntry, uint16_t StrOffset); + /// Check if a function name is a recognized runtime call of any kind. This + /// does not consider if this call is available for any current compilation, + /// just that it is a known call somewhere. This returns the set of all + /// LibcallImpls which match the name; multiple implementations with the same + /// name may exist but differ in interpretation based on the target context. + LLVM_ABI static iterator_range::const_iterator> + getRecognizedLibcallImpls(StringRef FuncName); static bool darwinHasSinCosStret(const Triple &TT) { if (!TT.isOSDarwin()) diff --git a/llvm/lib/CodeGen/PreISelIntrinsicLowering.cpp b/llvm/lib/CodeGen/PreISelIntrinsicLowering.cpp index 96c9cde622b4..9fa96e737296 100644 --- a/llvm/lib/CodeGen/PreISelIntrinsicLowering.cpp +++ b/llvm/lib/CodeGen/PreISelIntrinsicLowering.cpp @@ -145,7 +145,7 @@ static bool lowerObjCCall(Function &F, RTLIB::LibcallImpl NewFn, // FIXME: When RuntimeLibcalls is an analysis, check if the function is really // supported, and go through RTLIB::Libcall. - StringRef NewFnName = RTLIB::RuntimeLibcallsInfo::getLibcallImplName(NewFn); + const char *NewFnName = RTLIB::RuntimeLibcallsInfo::getLibcallImplName(NewFn); // If we haven't already looked up this function, check to see if the // program already contains a function with this name. diff --git a/llvm/lib/IR/RuntimeLibcalls.cpp b/llvm/lib/IR/RuntimeLibcalls.cpp index 88cb192c0878..ac845c499878 100644 --- a/llvm/lib/IR/RuntimeLibcalls.cpp +++ b/llvm/lib/IR/RuntimeLibcalls.cpp @@ -9,7 +9,6 @@ #include "llvm/IR/RuntimeLibcalls.h" #include "llvm/ADT/StringTable.h" #include "llvm/Support/Debug.h" -#include "llvm/Support/xxhash.h" #include "llvm/TargetParser/ARMTargetParser.h" #define DEBUG_TYPE "runtime-libcalls-info" @@ -19,11 +18,9 @@ using namespace RTLIB; #define GET_INIT_RUNTIME_LIBCALL_NAMES #define GET_SET_TARGET_RUNTIME_LIBCALL_SETS -#define DEFINE_GET_LOOKUP_LIBCALL_IMPL_NAME #include "llvm/IR/RuntimeLibcalls.inc" #undef GET_INIT_RUNTIME_LIBCALL_NAMES #undef GET_SET_TARGET_RUNTIME_LIBCALL_SETS -#undef DEFINE_GET_LOOKUP_LIBCALL_IMPL_NAME /// Set default libcall names. If a target wants to opt-out of a libcall it /// should be placed here. @@ -61,23 +58,49 @@ void RuntimeLibcallsInfo::initLibcalls(const Triple &TT, } } -LLVM_ATTRIBUTE_ALWAYS_INLINE -iota_range -RuntimeLibcallsInfo::libcallImplNameHit(uint16_t NameOffsetEntry, - uint16_t StrOffset) { - int NumAliases = 1; - for (uint16_t Entry : ArrayRef(RuntimeLibcallNameOffsetTable) - .drop_front(NameOffsetEntry + 1)) { - if (Entry != StrOffset) - break; - ++NumAliases; +RTLIB::LibcallImpl +RuntimeLibcallsInfo::getSupportedLibcallImpl(StringRef FuncName) const { + const ArrayRef RuntimeLibcallNameOffsets( + RuntimeLibcallNameOffsetTable); + + iterator_range::const_iterator> Range = + getRecognizedLibcallImpls(FuncName); + + for (auto I = Range.begin(); I != Range.end(); ++I) { + RTLIB::LibcallImpl Impl = + static_cast(I - RuntimeLibcallNameOffsets.begin()); + + // FIXME: This should not depend on looking up ImplToLibcall, only the list + // of libcalls for the module. + RTLIB::LibcallImpl Recognized = LibcallImpls[ImplToLibcall[Impl]]; + if (Recognized != RTLIB::Unsupported) + return Recognized; } - RTLIB::LibcallImpl ImplStart = static_cast( - &RuntimeLibcallNameOffsetTable[NameOffsetEntry] - - &RuntimeLibcallNameOffsetTable[0]); - return enum_seq(ImplStart, - static_cast(ImplStart + NumAliases)); + return RTLIB::Unsupported; +} + +iterator_range::const_iterator> +RuntimeLibcallsInfo::getRecognizedLibcallImpls(StringRef FuncName) { + StringTable::Iterator It = lower_bound(RuntimeLibcallImplNameTable, FuncName); + if (It == RuntimeLibcallImplNameTable.end() || *It != FuncName) + return iterator_range(ArrayRef()); + + uint16_t IndexVal = It.offset().value(); + const ArrayRef TableRef(RuntimeLibcallNameOffsetTable); + + ArrayRef::const_iterator E = TableRef.end(); + ArrayRef::const_iterator EntriesBegin = + std::lower_bound(TableRef.begin(), E, IndexVal); + ArrayRef::const_iterator EntriesEnd = EntriesBegin; + + while (EntriesEnd != E && *EntriesEnd == IndexVal) + ++EntriesEnd; + + assert(EntriesBegin != E && + "libcall found in name table but not offset table"); + + return make_range(EntriesBegin, EntriesEnd); } bool RuntimeLibcallsInfo::isAAPCS_ABI(const Triple &TT, StringRef ABIName) { diff --git a/llvm/lib/LTO/LTO.cpp b/llvm/lib/LTO/LTO.cpp index 35d24c17bbd9..0323b4d433b8 100644 --- a/llvm/lib/LTO/LTO.cpp +++ b/llvm/lib/LTO/LTO.cpp @@ -1422,7 +1422,7 @@ SmallVector LTO::getRuntimeLibcallSymbols(const Triple &TT) { for (RTLIB::LibcallImpl Impl : LibcallImpls) { if (Impl != RTLIB::Unsupported) - LibcallSymbols.push_back(Libcalls.getLibcallImplName(Impl).data()); + LibcallSymbols.push_back(Libcalls.getLibcallImplName(Impl)); } return LibcallSymbols; diff --git a/llvm/lib/Object/IRSymtab.cpp b/llvm/lib/Object/IRSymtab.cpp index 0043f02107fb..0f194953787e 100644 --- a/llvm/lib/Object/IRSymtab.cpp +++ b/llvm/lib/Object/IRSymtab.cpp @@ -46,7 +46,7 @@ static cl::opt DisableBitcodeVersionUpgrade( "disable-bitcode-version-upgrade", cl::Hidden, cl::desc("Disable automatic bitcode upgrade for version mismatch")); -static constexpr StringLiteral PreservedSymbols[] = { +static const char *PreservedSymbols[] = { // There are global variables, so put it here instead of in // RuntimeLibcalls.td. // TODO: Are there similar such variables? @@ -54,10 +54,6 @@ static constexpr StringLiteral PreservedSymbols[] = { "__stack_chk_guard", }; -static bool isPreservedGlobalVarName(StringRef Name) { - return PreservedSymbols[0] == Name || PreservedSymbols[1] == Name; -} - namespace { const char *getExpectedProducerName() { @@ -85,16 +81,12 @@ struct Builder { // The StringTableBuilder does not create a copy of any strings added to it, // so this provides somewhere to store any strings that we create. Builder(SmallVector &Symtab, StringTableBuilder &StrtabBuilder, - BumpPtrAllocator &Alloc, const Triple &TT) - : Symtab(Symtab), StrtabBuilder(StrtabBuilder), Saver(Alloc), TT(TT), - Libcalls(TT) {} + BumpPtrAllocator &Alloc) + : Symtab(Symtab), StrtabBuilder(StrtabBuilder), Saver(Alloc) {} DenseMap ComdatMap; Mangler Mang; - const Triple &TT; - - // FIXME: This shouldn't be here. - RTLIB::RuntimeLibcallsInfo Libcalls; + Triple TT; std::vector Comdats; std::vector Mods; @@ -106,10 +98,6 @@ struct Builder { std::vector DependentLibraries; - bool isPreservedLibFuncName(StringRef Name) { - return Libcalls.getSupportedLibcallImpl(Name) != RTLIB::Unsupported; - } - void setStr(storage::Str &S, StringRef Value) { S.Offset = StrtabBuilder.add(Value); S.Size = Value.size(); @@ -225,6 +213,19 @@ Expected Builder::getComdatIndex(const Comdat *C, const Module *M) { return P.first->second; } +static StringSet<> buildPreservedSymbolsSet(const Triple &TT) { + StringSet<> PreservedSymbolSet; + PreservedSymbolSet.insert(std::begin(PreservedSymbols), + std::end(PreservedSymbols)); + // FIXME: Do we need to pass in ABI fields from TargetOptions? + RTLIB::RuntimeLibcallsInfo Libcalls(TT); + for (RTLIB::LibcallImpl Impl : Libcalls.getLibcallImpls()) { + if (Impl != RTLIB::Unsupported) + PreservedSymbolSet.insert(Libcalls.getLibcallImplName(Impl)); + } + return PreservedSymbolSet; +} + Error Builder::addSymbol(const ModuleSymbolTable &Msymtab, const SmallPtrSet &Used, ModuleSymbolTable::Symbol Msym) { @@ -278,11 +279,13 @@ Error Builder::addSymbol(const ModuleSymbolTable &Msymtab, return Error::success(); } - StringRef GVName = GV->getName(); - setStr(Sym.IRName, GVName); + setStr(Sym.IRName, GV->getName()); - if (Used.count(GV) || isPreservedLibFuncName(GVName) || - isPreservedGlobalVarName(GVName)) + static const StringSet<> PreservedSymbolsSet = + buildPreservedSymbolsSet(GV->getParent()->getTargetTriple()); + bool IsPreservedSymbol = PreservedSymbolsSet.contains(GV->getName()); + + if (Used.count(GV) || IsPreservedSymbol) Sym.Flags |= 1 << storage::Symbol::FB_used; if (GV->isThreadLocal()) Sym.Flags |= 1 << storage::Symbol::FB_tls; @@ -349,6 +352,7 @@ Error Builder::build(ArrayRef IRMods) { setStr(Hdr.Producer, kExpectedProducerName); setStr(Hdr.TargetTriple, IRMods[0]->getTargetTriple().str()); setStr(Hdr.SourceFileName, IRMods[0]->getSourceFileName()); + TT = IRMods[0]->getTargetTriple(); for (auto *M : IRMods) if (Error Err = addModule(M)) @@ -374,8 +378,7 @@ Error Builder::build(ArrayRef IRMods) { Error irsymtab::build(ArrayRef Mods, SmallVector &Symtab, StringTableBuilder &StrtabBuilder, BumpPtrAllocator &Alloc) { - const Triple &TT = Mods[0]->getTargetTriple(); - return Builder(Symtab, StrtabBuilder, Alloc, TT).build(Mods); + return Builder(Symtab, StrtabBuilder, Alloc).build(Mods); } // Upgrade a vector of bitcode modules created by an old version of LLVM by diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyRuntimeLibcallSignatures.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyRuntimeLibcallSignatures.cpp index 45b0e7dc1226..4548a7520b3b 100644 --- a/llvm/lib/Target/WebAssembly/WebAssemblyRuntimeLibcallSignatures.cpp +++ b/llvm/lib/Target/WebAssembly/WebAssemblyRuntimeLibcallSignatures.cpp @@ -533,8 +533,8 @@ struct StaticLibcallNameMap { // different libcalls. RTLIB::RuntimeLibcallsInfo RTCI(TT); for (RTLIB::Libcall LC : RTLIB::libcalls()) { - StringRef NameLibcall = RTCI.getLibcallName(LC); - if (!NameLibcall.empty() && + const char *NameLibcall = RTCI.getLibcallName(LC); + if (NameLibcall != nullptr && getRuntimeLibcallSignatures().Table[LC] != unsupported) { assert(!Map.contains(NameLibcall) && "duplicate libcall names in name map"); diff --git a/llvm/lib/Transforms/Utils/DeclareRuntimeLibcalls.cpp b/llvm/lib/Transforms/Utils/DeclareRuntimeLibcalls.cpp index 0642d51cd2c2..540039b7d2cb 100644 --- a/llvm/lib/Transforms/Utils/DeclareRuntimeLibcalls.cpp +++ b/llvm/lib/Transforms/Utils/DeclareRuntimeLibcalls.cpp @@ -30,7 +30,7 @@ PreservedAnalyses DeclareRuntimeLibcallsPass::run(Module &M, FunctionType *FuncTy = FunctionType::get(Type::getVoidTy(Ctx), {}, /*IsVarArgs=*/true); - StringRef FuncName = RTLCI.getLibcallImplName(Impl); + const char *FuncName = RTLCI.getLibcallImplName(Impl); M.getOrInsertFunction(FuncName, FuncTy); } diff --git a/llvm/test/TableGen/RuntimeLibcallEmitter.td b/llvm/test/TableGen/RuntimeLibcallEmitter.td index 7c62402227f7..a2d946f3aa84 100644 --- a/llvm/test/TableGen/RuntimeLibcallEmitter.td +++ b/llvm/test/TableGen/RuntimeLibcallEmitter.td @@ -137,19 +137,6 @@ def BlahLibrary : SystemRuntimeLibrary 9) -// CHECK-NEXT: return enum_seq(RTLIB::Unsupported, RTLIB::Unsupported); -// CHECK-NEXT: return lookupLibcallImplNameImpl(Name); -// CHECK-NEXT: #endif - -// CHECK: #ifdef DEFINE_GET_LOOKUP_LIBCALL_IMPL_NAME -// CHECK-NEXT: static inline uint64_t hash(StringRef Str) { -// CHECK-NEXT: return static_cast(xxh3_64bits(Str)); -// CHECK-NEXT: } - -// CHECK: iota_range RTLIB::RuntimeLibcallsInfo::lookupLibcallImplNameImpl(StringRef Name) { -// CHECK: static constexpr uint16_t HashTableNameToEnum[16] = { -// CHECK: 2, // 0x000000705301b8, ___memset -// CHECK: 0, -// CHECK: 6, // 0x0000001417a2af, calloc -// CHECK: 0, -// CHECK: }; - -// CHECK: unsigned Idx = (hash(Name) % 8) * 2; -// CHECK: for (int I = 0; I != 2; ++I) { -// CHECK: return libcallImplNameHit(Entry, StrOffset); - -// CHECK: return enum_seq(RTLIB::Unsupported, RTLIB::Unsupported); -// CHECK-NEXT: } // CHECK: void llvm::RTLIB::RuntimeLibcallsInfo::setTargetRuntimeLibcallSets(const llvm::Triple &TT, FloatABI::ABIType FloatABI, EABI EABIVersion, StringRef ABIName) { // CHECK-NEXT: struct LibcallImplPair { diff --git a/llvm/unittests/IR/CMakeLists.txt b/llvm/unittests/IR/CMakeLists.txt index 8b7bd3997ea2..b66eae93f933 100644 --- a/llvm/unittests/IR/CMakeLists.txt +++ b/llvm/unittests/IR/CMakeLists.txt @@ -43,7 +43,6 @@ add_llvm_unittest(IRTests PatternMatch.cpp ShuffleVectorInstTest.cpp StructuralHashTest.cpp - RuntimeLibcallsTest.cpp TimePassesTest.cpp TypesTest.cpp UseTest.cpp diff --git a/llvm/unittests/IR/RuntimeLibcallsTest.cpp b/llvm/unittests/IR/RuntimeLibcallsTest.cpp deleted file mode 100644 index 012316801859..000000000000 --- a/llvm/unittests/IR/RuntimeLibcallsTest.cpp +++ /dev/null @@ -1,63 +0,0 @@ -//===----------------------------------------------------------------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#include "llvm/IR/RuntimeLibcalls.h" -#include "llvm/ADT/STLExtras.h" -#include "gtest/gtest.h" -using namespace llvm; - -namespace { - -TEST(RuntimeLibcallsTest, LibcallImplByName) { - EXPECT_TRUE(RTLIB::RuntimeLibcallsInfo::lookupLibcallImplName("").empty()); - EXPECT_TRUE( - RTLIB::RuntimeLibcallsInfo::lookupLibcallImplName("unknown").empty()); - EXPECT_TRUE( - RTLIB::RuntimeLibcallsInfo::lookupLibcallImplName("Unsupported").empty()); - EXPECT_TRUE( - RTLIB::RuntimeLibcallsInfo::lookupLibcallImplName("unsupported").empty()); - - for (RTLIB::LibcallImpl LC : RTLIB::libcall_impls()) { - StringRef Name = RTLIB::RuntimeLibcallsInfo::getLibcallImplName(LC); - EXPECT_TRUE(is_contained( - RTLIB::RuntimeLibcallsInfo::lookupLibcallImplName(Name), LC)); - } - - // Test first libcall name - EXPECT_EQ( - RTLIB::arm64ec__Unwind_Resume, - *RTLIB::RuntimeLibcallsInfo::lookupLibcallImplName("#_Unwind_Resume") - .begin()); - // Test longest libcall names - EXPECT_EQ(RTLIB::__hexagon_memcpy_likely_aligned_min32bytes_mult8bytes, - *RTLIB::RuntimeLibcallsInfo::lookupLibcallImplName( - "__hexagon_memcpy_likely_aligned_min32bytes_mult8bytes") - .begin()); - - { - auto SquirtleSquad = - RTLIB::RuntimeLibcallsInfo::lookupLibcallImplName("sqrtl"); - ASSERT_EQ(size(SquirtleSquad), 3); - auto I = SquirtleSquad.begin(); - EXPECT_EQ(*I++, RTLIB::sqrt_f128); - EXPECT_EQ(*I++, RTLIB::sqrt_f80); - EXPECT_EQ(*I++, RTLIB::sqrt_ppcf128); - } - - // Last libcall - { - auto Truncs = RTLIB::RuntimeLibcallsInfo::lookupLibcallImplName("truncl"); - ASSERT_EQ(size(Truncs), 3); - auto I = Truncs.begin(); - EXPECT_EQ(*I++, RTLIB::trunc_f128); - EXPECT_EQ(*I++, RTLIB::trunc_f80); - EXPECT_EQ(*I++, RTLIB::trunc_ppcf128); - } -} - -} // namespace diff --git a/llvm/utils/TableGen/Basic/RuntimeLibcallsEmitter.cpp b/llvm/utils/TableGen/Basic/RuntimeLibcallsEmitter.cpp index c305e6323ca9..0fc230c4714f 100644 --- a/llvm/utils/TableGen/Basic/RuntimeLibcallsEmitter.cpp +++ b/llvm/utils/TableGen/Basic/RuntimeLibcallsEmitter.cpp @@ -6,15 +6,10 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "runtime-libcall-emitter" - -#include "llvm/ADT/DenseSet.h" #include "llvm/ADT/StringRef.h" #include "llvm/Support/Debug.h" -#include "llvm/Support/Format.h" #include "llvm/Support/FormatVariadic.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/Support/xxhash.h" #include "llvm/TableGen/Error.h" #include "llvm/TableGen/Record.h" #include "llvm/TableGen/SetTheory.h" @@ -220,9 +215,6 @@ private: private: void emitGetRuntimeLibcallEnum(raw_ostream &OS) const; - void emitNameMatchHashTable(raw_ostream &OS, - StringToOffsetTable &OffsetTable) const; - void emitGetInitRuntimeLibcallNames(raw_ostream &OS) const; void emitSystemRuntimeLibrarySetCalls(raw_ostream &OS) const; @@ -263,9 +255,12 @@ public: RuntimeLibcallImplDefList.emplace_back(LibCallImplDef, Def2RuntimeLibcall, LibCallImplEnumVal++); - const RuntimeLibcallImpl &LibCallImpl = RuntimeLibcallImplDefList.back(); + RuntimeLibcallImpl &LibCallImpl = RuntimeLibcallImplDefList.back(); + Def2RuntimeLibcallImpl[LibCallImplDef] = &LibCallImpl; + // const RuntimeLibcallImpl &LibCallImpl = + // RuntimeLibcallImplDefList.back(); if (LibCallImpl.isDefault()) { const RuntimeLibcall *Provides = LibCallImpl.getProvides(); if (!Provides) @@ -287,13 +282,6 @@ public: void run(raw_ostream &OS); }; -/// Helper struct for the name hash table. -struct LookupEntry { - StringRef FuncName; - uint64_t Hash = 0; - unsigned TableValue = 0; -}; - } // End anonymous namespace. void RuntimeLibcallEmitter::emitGetRuntimeLibcallEnum(raw_ostream &OS) const { @@ -307,6 +295,8 @@ void RuntimeLibcallEmitter::emitGetRuntimeLibcallEnum(raw_ostream &OS) const { OS << " " << Name << " = " << LibCall.getEnumVal() << ",\n"; } + // TODO: Emit libcall names as string offset table. + OS << " UNKNOWN_LIBCALL = " << RuntimeLibcallDefList.size() << "\n};\n\n" "enum LibcallImpl : unsigned short {\n" @@ -325,179 +315,8 @@ void RuntimeLibcallEmitter::emitGetRuntimeLibcallEnum(raw_ostream &OS) const { "#endif\n\n"; } -// StringMap uses xxh3_64bits, truncated to uint32_t. -static uint64_t hash(StringRef Str) { - return static_cast(xxh3_64bits(Str)); -} - -static void emitHashFunction(raw_ostream &OS) { - OS << "static inline uint64_t hash(StringRef Str) {\n" - " return static_cast(xxh3_64bits(Str));\n" - "}\n\n"; -} - -/// Return the table size, maximum number of collisions for the set of hashes -static std::pair -computePerfectHashParameters(ArrayRef Hashes) { - const int SizeOverhead = 10; - const int NumHashes = Hashes.size(); - - // Index derived from hash -> number of collisions. - DenseMap Table; - - for (int MaxCollisions = 1;; ++MaxCollisions) { - for (int N = NumHashes; N < SizeOverhead * NumHashes; ++N) { - Table.clear(); - - bool NeedResize = false; - for (uint64_t H : Hashes) { - uint64_t Idx = H % static_cast(N); - if (++Table[Idx] > MaxCollisions) { - // Need to resize the final table if we increased the collision count. - NeedResize = true; - break; - } - } - - if (!NeedResize) - return {N, MaxCollisions}; - } - } -} - -static std::vector -constructPerfectHashTable(ArrayRef Keywords, - ArrayRef Hashes, int Size, int Collisions, - StringToOffsetTable &OffsetTable) { - DenseSet Seen; - std::vector Lookup(Size * Collisions); - - for (const RuntimeLibcallImpl &LibCallImpl : Keywords) { - StringRef ImplName = LibCallImpl.getLibcallFuncName(); - - // We do not want to add repeated entries for cases with the same name, only - // an entry for the first, with the name collision enum values immediately - // following. - if (!Seen.insert(ImplName).second) - continue; - - uint64_t HashValue = Hashes[LibCallImpl.getEnumVal() - 1]; - - uint64_t Idx = (HashValue % static_cast(Size)) * - static_cast(Collisions); - - bool Found = false; - for (int J = 0; J < Collisions; ++J) { - LookupEntry &Entry = Lookup[Idx + J]; - if (Entry.TableValue == 0) { - Entry.FuncName = ImplName; - Entry.TableValue = LibCallImpl.getEnumVal(); - Entry.Hash = HashValue; - Found = true; - break; - } - } - - if (!Found) - reportFatalInternalError("failure to hash " + ImplName); - } - - return Lookup; -} - -/// Generate hash table based lookup by name. -void RuntimeLibcallEmitter::emitNameMatchHashTable( - raw_ostream &OS, StringToOffsetTable &OffsetTable) const { - std::vector Hashes(RuntimeLibcallImplDefList.size()); - - size_t MaxFuncNameSize = 0; - size_t Index = 0; - for (const RuntimeLibcallImpl &LibCallImpl : RuntimeLibcallImplDefList) { - StringRef ImplName = LibCallImpl.getLibcallFuncName(); - MaxFuncNameSize = std::max(MaxFuncNameSize, ImplName.size()); - Hashes[Index++] = hash(ImplName); - } - - LLVM_DEBUG({ - for (const RuntimeLibcallImpl &LibCallImpl : RuntimeLibcallImplDefList) { - StringRef ImplName = LibCallImpl.getLibcallFuncName(); - if (ImplName.size() == MaxFuncNameSize) { - dbgs() << "Maximum runtime libcall name size: " << ImplName << '(' - << MaxFuncNameSize << ")\n"; - } - } - }); - - // Early exiting on the symbol name provides a significant speedup in the miss - // case on the set of symbols in a clang binary. Emit this as an inlinable - // precondition in the header. - // - // The empty check is also used to get sensible behavior on anonymous - // functions. - // - // TODO: It may make more sense to split the search by string size more. There - // are a few outliers, most call names are small. - OS << "#ifdef GET_LOOKUP_LIBCALL_IMPL_NAME_BODY\n" - " size_t Size = Name.size();\n" - " if (Size == 0 || Size > " - << MaxFuncNameSize - << ")\n" - " return enum_seq(RTLIB::Unsupported, RTLIB::Unsupported);\n" - " return lookupLibcallImplNameImpl(Name);\n" - "#endif\n"; - - auto [Size, Collisions] = computePerfectHashParameters(Hashes); - std::vector Lookup = constructPerfectHashTable( - RuntimeLibcallImplDefList, Hashes, Size, Collisions, OffsetTable); - - LLVM_DEBUG(dbgs() << "Runtime libcall perfect hashing parameters: Size = " - << Size << ", maximum collisions = " << Collisions << '\n'); - - OS << "#ifdef DEFINE_GET_LOOKUP_LIBCALL_IMPL_NAME\n"; - emitHashFunction(OS); - - OS << "iota_range RTLIB::RuntimeLibcallsInfo::" - "lookupLibcallImplNameImpl(StringRef Name) {\n"; - - // Emit RTLIB::LibcallImpl values - OS << " static constexpr uint16_t HashTableNameToEnum[" << Lookup.size() - << "] = {\n"; - - for (auto [FuncName, Hash, TableVal] : Lookup) { - OS << " " << TableVal << ','; - if (TableVal != 0) - OS << " // " << format_hex(Hash, 16) << ", " << FuncName; - - OS << '\n'; - } - - OS << " };\n\n"; - - OS << " unsigned Idx = (hash(Name) % " << Size << ") * " << Collisions - << ";\n\n" - " for (int I = 0; I != " - << Collisions << R"(; ++I) { - const uint16_t Entry = HashTableNameToEnum[Idx + I]; - const uint16_t StrOffset = RuntimeLibcallNameOffsetTable[Entry]; - const uint8_t StrSize = RuntimeLibcallNameSizeTable[Entry]; - StringRef Str( - &RTLIB::RuntimeLibcallsInfo::RuntimeLibcallImplNameTableStorage[StrOffset], - StrSize); - if (Str == Name) - return libcallImplNameHit(Entry, StrOffset); - } - - return enum_seq(RTLIB::Unsupported, RTLIB::Unsupported); -} -)"; - - OS << "#endif\n\n"; -} - void RuntimeLibcallEmitter::emitGetInitRuntimeLibcallNames( raw_ostream &OS) const { - OS << "#ifdef GET_INIT_RUNTIME_LIBCALL_NAMES\n"; - // Emit the implementation names StringToOffsetTable Table(/*AppendZero=*/true, "RTLIB::RuntimeLibcallsInfo::"); @@ -518,15 +337,6 @@ const uint16_t RTLIB::RuntimeLibcallsInfo::RuntimeLibcallNameOffsetTable[] = { } OS << "};\n"; - OS << R"( -const uint8_t RTLIB::RuntimeLibcallsInfo::RuntimeLibcallNameSizeTable[] = { -)"; - - OS << " 0,\n"; - for (const RuntimeLibcallImpl &LibCallImpl : RuntimeLibcallImplDefList) - OS << " " << LibCallImpl.getLibcallFuncName().size() << ",\n"; - OS << "};\n\n"; - // Emit the reverse mapping from implementation libraries to RTLIB::Libcall OS << "const RTLIB::Libcall llvm::RTLIB::RuntimeLibcallsInfo::" "ImplToLibcall[RTLIB::NumLibcallImpls] = {\n" @@ -541,10 +351,6 @@ const uint8_t RTLIB::RuntimeLibcallsInfo::RuntimeLibcallNameSizeTable[] = { OS << '\n'; } OS << "};\n\n"; - - OS << "#endif\n\n"; - - emitNameMatchHashTable(OS, Table); } void RuntimeLibcallEmitter::emitSystemRuntimeLibrarySetCalls( @@ -725,7 +531,9 @@ void RuntimeLibcallEmitter::run(raw_ostream &OS) { emitSourceFileHeader("Runtime LibCalls Source Fragment", OS, Records); emitGetRuntimeLibcallEnum(OS); + OS << "#ifdef GET_INIT_RUNTIME_LIBCALL_NAMES\n"; emitGetInitRuntimeLibcallNames(OS); + OS << "#endif\n\n"; OS << "#ifdef GET_SET_TARGET_RUNTIME_LIBCALL_SETS\n"; emitSystemRuntimeLibrarySetCalls(OS); From e3154559ef7d884505727c4ddc9eaba13c496c09 Mon Sep 17 00:00:00 2001 From: Stanislav Mekhanoshin Date: Fri, 15 Aug 2025 13:53:08 -0700 Subject: [PATCH 018/214] [AMDGPU] Select mul_lohi to V_MAD_NC_{I|U}64_I32 on gfx1250 (#153851) --- llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp | 21 +- llvm/test/CodeGen/AMDGPU/mad_64_32.ll | 311 ++++++++++++++++++ 2 files changed, 325 insertions(+), 7 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp index 6a02995fc9cb..04c4d000547a 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp @@ -1196,18 +1196,25 @@ void AMDGPUDAGToDAGISel::SelectMAD_64_32(SDNode *N) { void AMDGPUDAGToDAGISel::SelectMUL_LOHI(SDNode *N) { SDLoc SL(N); bool Signed = N->getOpcode() == ISD::SMUL_LOHI; + SDVTList VTList; unsigned Opc; - if (Subtarget->hasMADIntraFwdBug()) - Opc = Signed ? AMDGPU::V_MAD_I64_I32_gfx11_e64 - : AMDGPU::V_MAD_U64_U32_gfx11_e64; - else - Opc = Signed ? AMDGPU::V_MAD_I64_I32_e64 : AMDGPU::V_MAD_U64_U32_e64; + if (Subtarget->hasMadU64U32NoCarry()) { + VTList = CurDAG->getVTList(MVT::i64); + Opc = Signed ? AMDGPU::V_MAD_NC_I64_I32_e64 : AMDGPU::V_MAD_NC_U64_U32_e64; + } else { + VTList = CurDAG->getVTList(MVT::i64, MVT::i1); + if (Subtarget->hasMADIntraFwdBug()) { + Opc = Signed ? AMDGPU::V_MAD_I64_I32_gfx11_e64 + : AMDGPU::V_MAD_U64_U32_gfx11_e64; + } else { + Opc = Signed ? AMDGPU::V_MAD_I64_I32_e64 : AMDGPU::V_MAD_U64_U32_e64; + } + } SDValue Zero = CurDAG->getTargetConstant(0, SL, MVT::i64); SDValue Clamp = CurDAG->getTargetConstant(0, SL, MVT::i1); SDValue Ops[] = {N->getOperand(0), N->getOperand(1), Zero, Clamp}; - SDNode *Mad = CurDAG->getMachineNode( - Opc, SL, CurDAG->getVTList(MVT::i64, MVT::i1), Ops); + SDNode *Mad = CurDAG->getMachineNode(Opc, SL, VTList, Ops); if (!SDValue(N, 0).use_empty()) { SDValue Sub0 = CurDAG->getTargetConstant(AMDGPU::sub0, SL, MVT::i32); SDNode *Lo = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG, SL, diff --git a/llvm/test/CodeGen/AMDGPU/mad_64_32.ll b/llvm/test/CodeGen/AMDGPU/mad_64_32.ll index cf9a700cd64f..b8f9571ccc2e 100644 --- a/llvm/test/CodeGen/AMDGPU/mad_64_32.ll +++ b/llvm/test/CodeGen/AMDGPU/mad_64_32.ll @@ -5,6 +5,7 @@ ; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 < %s | FileCheck -check-prefixes=GFX11,GFX1100 %s ; RUN: llc -mtriple=amdgcn -mcpu=gfx1150 < %s | FileCheck -check-prefixes=GFX11,GFX1150 %s ; RUN: llc -mtriple=amdgcn -mcpu=gfx1200 < %s | FileCheck -check-prefixes=GFX12 %s +; RUN: llc -mtriple=amdgcn -mcpu=gfx1250 < %s | FileCheck -check-prefixes=GFX1250 %s ; RUN: llc -mtriple=amdgcn -mcpu=gfx11-generic --amdhsa-code-object-version=6 < %s | FileCheck -check-prefixes=GFX11,GFX1100 %s ; On GFX11, ensure vdst and src2 do not partially overlap. Full overlap is ok. @@ -54,6 +55,13 @@ define i64 @mad_i64_i32_sextops(i32 %arg0, i32 %arg1, i64 %arg2) #0 { ; GFX12-NEXT: s_wait_kmcnt 0x0 ; GFX12-NEXT: v_mad_co_i64_i32 v[0:1], null, v0, v1, v[2:3] ; GFX12-NEXT: s_setpc_b64 s[30:31] +; +; GFX1250-LABEL: mad_i64_i32_sextops: +; GFX1250: ; %bb.0: +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: s_wait_kmcnt 0x0 +; GFX1250-NEXT: v_mad_nc_i64_i32 v[0:1], v0, v1, v[2:3] +; GFX1250-NEXT: s_set_pc_i64 s[30:31] %sext0 = sext i32 %arg0 to i64 %sext1 = sext i32 %arg1 to i64 %mul = mul i64 %sext0, %sext1 @@ -106,6 +114,13 @@ define i64 @mad_i64_i32_sextops_commute(i32 %arg0, i32 %arg1, i64 %arg2) #0 { ; GFX12-NEXT: s_wait_kmcnt 0x0 ; GFX12-NEXT: v_mad_co_i64_i32 v[0:1], null, v0, v1, v[2:3] ; GFX12-NEXT: s_setpc_b64 s[30:31] +; +; GFX1250-LABEL: mad_i64_i32_sextops_commute: +; GFX1250: ; %bb.0: +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: s_wait_kmcnt 0x0 +; GFX1250-NEXT: v_mad_nc_i64_i32 v[0:1], v0, v1, v[2:3] +; GFX1250-NEXT: s_set_pc_i64 s[30:31] %sext0 = sext i32 %arg0 to i64 %sext1 = sext i32 %arg1 to i64 %mul = mul i64 %sext0, %sext1 @@ -158,6 +173,13 @@ define i64 @mad_u64_u32_zextops(i32 %arg0, i32 %arg1, i64 %arg2) #0 { ; GFX12-NEXT: s_wait_kmcnt 0x0 ; GFX12-NEXT: v_mad_co_u64_u32 v[0:1], null, v0, v1, v[2:3] ; GFX12-NEXT: s_setpc_b64 s[30:31] +; +; GFX1250-LABEL: mad_u64_u32_zextops: +; GFX1250: ; %bb.0: +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: s_wait_kmcnt 0x0 +; GFX1250-NEXT: v_mad_nc_u64_u32 v[0:1], v0, v1, v[2:3] +; GFX1250-NEXT: s_set_pc_i64 s[30:31] %sext0 = zext i32 %arg0 to i64 %sext1 = zext i32 %arg1 to i64 %mul = mul i64 %sext0, %sext1 @@ -210,6 +232,13 @@ define i64 @mad_u64_u32_zextops_commute(i32 %arg0, i32 %arg1, i64 %arg2) #0 { ; GFX12-NEXT: s_wait_kmcnt 0x0 ; GFX12-NEXT: v_mad_co_u64_u32 v[0:1], null, v0, v1, v[2:3] ; GFX12-NEXT: s_setpc_b64 s[30:31] +; +; GFX1250-LABEL: mad_u64_u32_zextops_commute: +; GFX1250: ; %bb.0: +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: s_wait_kmcnt 0x0 +; GFX1250-NEXT: v_mad_nc_u64_u32 v[0:1], v0, v1, v[2:3] +; GFX1250-NEXT: s_set_pc_i64 s[30:31] %sext0 = zext i32 %arg0 to i64 %sext1 = zext i32 %arg1 to i64 %mul = mul i64 %sext0, %sext1 @@ -393,6 +422,38 @@ define i128 @mad_i64_i32_sextops_i32_i128(i32 %arg0, i32 %arg1, i128 %arg2) #0 { ; GFX12-NEXT: v_add_co_ci_u32_e32 v3, vcc_lo, v9, v5, vcc_lo ; GFX12-NEXT: s_wait_alu 0xfffd ; GFX12-NEXT: s_setpc_b64 s[30:31] +; +; GFX1250-LABEL: mad_i64_i32_sextops_i32_i128: +; GFX1250: ; %bb.0: +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: s_wait_kmcnt 0x0 +; GFX1250-NEXT: v_dual_mov_b32 v6, v1 :: v_dual_mov_b32 v9, 0 +; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2) +; GFX1250-NEXT: v_dual_mov_b32 v8, v1 :: v_dual_mov_b32 v1, v9 +; GFX1250-NEXT: v_mov_b32_e32 v21, v9 +; GFX1250-NEXT: v_mul_u64_e32 v[10:11], v[0:1], v[8:9] +; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1250-NEXT: v_dual_ashrrev_i32 v12, 31, v0 :: v_dual_mov_b32 v8, v11 +; GFX1250-NEXT: v_dual_ashrrev_i32 v7, 31, v6 :: v_dual_mov_b32 v13, v12 +; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX1250-NEXT: v_mad_nc_u64_u32 v[14:15], v12, v6, v[8:9] +; GFX1250-NEXT: v_mul_u64_e32 v[16:17], v[6:7], v[12:13] +; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1250-NEXT: v_mov_b32_e32 v8, v14 +; GFX1250-NEXT: v_mad_nc_u64_u32 v[18:19], v0, v7, v[8:9] +; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1250-NEXT: v_dual_mov_b32 v8, v15 :: v_dual_mov_b32 v20, v19 +; GFX1250-NEXT: v_add_nc_u64_e32 v[8:9], v[8:9], v[20:21] +; GFX1250-NEXT: v_mad_nc_i64_i32 v[0:1], v7, v0, v[16:17] +; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1250-NEXT: v_mad_nc_u64_u32 v[8:9], v12, v7, v[8:9] +; GFX1250-NEXT: v_add_nc_u64_e32 v[6:7], v[8:9], v[0:1] +; GFX1250-NEXT: v_add_co_u32 v0, vcc_lo, v10, v2 +; GFX1250-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, v18, v3, vcc_lo +; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_4) +; GFX1250-NEXT: v_add_co_ci_u32_e32 v2, vcc_lo, v6, v4, vcc_lo +; GFX1250-NEXT: v_add_co_ci_u32_e32 v3, vcc_lo, v7, v5, vcc_lo +; GFX1250-NEXT: s_set_pc_i64 s[30:31] %sext0 = sext i32 %arg0 to i128 %sext1 = sext i32 %arg1 to i128 %mul = mul i128 %sext0, %sext1 @@ -445,6 +506,13 @@ define i63 @mad_i64_i32_sextops_i32_i63(i32 %arg0, i32 %arg1, i63 %arg2) #0 { ; GFX12-NEXT: s_wait_kmcnt 0x0 ; GFX12-NEXT: v_mad_co_i64_i32 v[0:1], null, v0, v1, v[2:3] ; GFX12-NEXT: s_setpc_b64 s[30:31] +; +; GFX1250-LABEL: mad_i64_i32_sextops_i32_i63: +; GFX1250: ; %bb.0: +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: s_wait_kmcnt 0x0 +; GFX1250-NEXT: v_mad_nc_i64_i32 v[0:1], v0, v1, v[2:3] +; GFX1250-NEXT: s_set_pc_i64 s[30:31] %sext0 = sext i32 %arg0 to i63 %sext1 = sext i32 %arg1 to i63 %mul = mul i63 %sext0, %sext1 @@ -510,6 +578,16 @@ define i63 @mad_i64_i32_sextops_i31_i63(i31 %arg0, i31 %arg1, i63 %arg2) #0 { ; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX12-NEXT: v_mad_co_i64_i32 v[0:1], null, v0, v1, v[2:3] ; GFX12-NEXT: s_setpc_b64 s[30:31] +; +; GFX1250-LABEL: mad_i64_i32_sextops_i31_i63: +; GFX1250: ; %bb.0: +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: s_wait_kmcnt 0x0 +; GFX1250-NEXT: v_bfe_i32 v1, v1, 0, 31 +; GFX1250-NEXT: v_bfe_i32 v0, v0, 0, 31 +; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1250-NEXT: v_mad_nc_i64_i32 v[0:1], v0, v1, v[2:3] +; GFX1250-NEXT: s_set_pc_i64 s[30:31] %sext0 = sext i31 %arg0 to i63 %sext1 = sext i31 %arg1 to i63 %mul = mul i63 %sext0, %sext1 @@ -585,6 +663,17 @@ define i64 @mad_i64_i32_extops_i32_i64(i32 %arg0, i32 %arg1, i64 %arg2) #0 { ; GFX12-NEXT: v_ashrrev_i32_e32 v2, 31, v5 ; GFX12-NEXT: v_mad_co_u64_u32 v[1:2], null, v2, v4, v[1:2] ; GFX12-NEXT: s_setpc_b64 s[30:31] +; +; GFX1250-LABEL: mad_i64_i32_extops_i32_i64: +; GFX1250: ; %bb.0: +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: s_wait_kmcnt 0x0 +; GFX1250-NEXT: v_dual_mov_b32 v4, v1 :: v_dual_mov_b32 v5, v0 +; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1) +; GFX1250-NEXT: v_mad_nc_u64_u32 v[0:1], v5, v4, v[2:3] +; GFX1250-NEXT: v_ashrrev_i32_e32 v2, 31, v5 +; GFX1250-NEXT: v_mad_u32 v1, v2, v4, v1 +; GFX1250-NEXT: s_set_pc_i64 s[30:31] %ext0 = sext i32 %arg0 to i64 %ext1 = zext i32 %arg1 to i64 %mul = mul i64 %ext0, %ext1 @@ -637,6 +726,13 @@ define i64 @mad_u64_u32_bitops(i64 %arg0, i64 %arg1, i64 %arg2) #0 { ; GFX12-NEXT: s_wait_kmcnt 0x0 ; GFX12-NEXT: v_mad_co_u64_u32 v[0:1], null, v0, v2, v[4:5] ; GFX12-NEXT: s_setpc_b64 s[30:31] +; +; GFX1250-LABEL: mad_u64_u32_bitops: +; GFX1250: ; %bb.0: +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: s_wait_kmcnt 0x0 +; GFX1250-NEXT: v_mad_nc_u64_u32 v[0:1], v0, v2, v[4:5] +; GFX1250-NEXT: s_set_pc_i64 s[30:31] %trunc.lhs = and i64 %arg0, 4294967295 %trunc.rhs = and i64 %arg1, 4294967295 %mul = mul i64 %trunc.lhs, %trunc.rhs @@ -711,6 +807,17 @@ define i64 @mad_u64_u32_bitops_lhs_mask_small(i64 %arg0, i64 %arg1, i64 %arg2) # ; GFX12-NEXT: v_and_b32_e32 v3, 1, v3 ; GFX12-NEXT: v_mad_co_u64_u32 v[1:2], null, v3, v2, v[1:2] ; GFX12-NEXT: s_setpc_b64 s[30:31] +; +; GFX1250-LABEL: mad_u64_u32_bitops_lhs_mask_small: +; GFX1250: ; %bb.0: +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: s_wait_kmcnt 0x0 +; GFX1250-NEXT: v_mov_b32_e32 v3, v1 +; GFX1250-NEXT: v_mad_nc_u64_u32 v[0:1], v0, v2, v[4:5] +; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1250-NEXT: v_and_b32_e32 v3, 1, v3 +; GFX1250-NEXT: v_mad_u32 v1, v3, v2, v1 +; GFX1250-NEXT: s_set_pc_i64 s[30:31] %trunc.lhs = and i64 %arg0, 8589934591 %trunc.rhs = and i64 %arg1, 4294967295 %mul = mul i64 %trunc.lhs, %trunc.rhs @@ -786,6 +893,17 @@ define i64 @mad_u64_u32_bitops_rhs_mask_small(i64 %arg0, i64 %arg1, i64 %arg2) # ; GFX12-NEXT: v_and_b32_e32 v2, 1, v3 ; GFX12-NEXT: v_mad_co_u64_u32 v[1:2], null, v6, v2, v[1:2] ; GFX12-NEXT: s_setpc_b64 s[30:31] +; +; GFX1250-LABEL: mad_u64_u32_bitops_rhs_mask_small: +; GFX1250: ; %bb.0: +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: s_wait_kmcnt 0x0 +; GFX1250-NEXT: v_mov_b32_e32 v6, v0 +; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1) +; GFX1250-NEXT: v_mad_nc_u64_u32 v[0:1], v6, v2, v[4:5] +; GFX1250-NEXT: v_and_b32_e32 v2, 1, v3 +; GFX1250-NEXT: v_mad_u32 v1, v6, v2, v1 +; GFX1250-NEXT: s_set_pc_i64 s[30:31] %trunc.lhs = and i64 %arg0, 4294967295 %trunc.rhs = and i64 %arg1, 8589934591 %mul = mul i64 %trunc.lhs, %trunc.rhs @@ -838,6 +956,13 @@ define i64 @mad_i64_i32_bitops(i64 %arg0, i64 %arg1, i64 %arg2) #0 { ; GFX12-NEXT: s_wait_kmcnt 0x0 ; GFX12-NEXT: v_mad_co_i64_i32 v[0:1], null, v0, v2, v[4:5] ; GFX12-NEXT: s_setpc_b64 s[30:31] +; +; GFX1250-LABEL: mad_i64_i32_bitops: +; GFX1250: ; %bb.0: +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: s_wait_kmcnt 0x0 +; GFX1250-NEXT: v_mad_nc_i64_i32 v[0:1], v0, v2, v[4:5] +; GFX1250-NEXT: s_set_pc_i64 s[30:31] %shl.lhs = shl i64 %arg0, 32 %trunc.lhs = ashr i64 %shl.lhs, 32 %shl.rhs = shl i64 %arg1, 32 @@ -893,6 +1018,13 @@ define i64 @mad_i64_i32_unpack_i64ops(i64 %arg0) #0 { ; GFX12-NEXT: s_wait_kmcnt 0x0 ; GFX12-NEXT: v_mad_co_u64_u32 v[0:1], null, v1, v0, v[0:1] ; GFX12-NEXT: s_setpc_b64 s[30:31] +; +; GFX1250-LABEL: mad_i64_i32_unpack_i64ops: +; GFX1250: ; %bb.0: +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: s_wait_kmcnt 0x0 +; GFX1250-NEXT: v_mad_nc_u64_u32 v[0:1], v1, v0, v[0:1] +; GFX1250-NEXT: s_set_pc_i64 s[30:31] %tmp4 = lshr i64 %arg0, 32 %tmp5 = and i64 %arg0, 4294967295 %mul = mul nuw i64 %tmp4, %tmp5 @@ -982,6 +1114,25 @@ define amdgpu_kernel void @mad_i64_i32_uniform(ptr addrspace(1) %out, i32 %arg0, ; GFX12-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 ; GFX12-NEXT: global_store_b64 v2, v[0:1], s[0:1] ; GFX12-NEXT: s_endpgm +; +; GFX1250-LABEL: mad_i64_i32_uniform: +; GFX1250: ; %bb.0: +; GFX1250-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 +; GFX1250-NEXT: s_wait_xcnt 0x0 +; GFX1250-NEXT: s_load_b64 s[4:5], s[4:5], 0x34 +; GFX1250-NEXT: s_mov_b32 s7, 0 +; GFX1250-NEXT: v_mov_b32_e32 v2, 0 +; GFX1250-NEXT: s_wait_kmcnt 0x0 +; GFX1250-NEXT: s_mov_b32 s6, s2 +; GFX1250-NEXT: s_mov_b32 s2, s3 +; GFX1250-NEXT: s_mov_b32 s3, s7 +; GFX1250-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1) +; GFX1250-NEXT: s_mul_u64 s[2:3], s[6:7], s[2:3] +; GFX1250-NEXT: s_add_nc_u64 s[2:3], s[2:3], s[4:5] +; GFX1250-NEXT: s_delay_alu instid0(SALU_CYCLE_1) +; GFX1250-NEXT: v_mov_b64_e32 v[0:1], s[2:3] +; GFX1250-NEXT: global_store_b64 v2, v[0:1], s[0:1] +; GFX1250-NEXT: s_endpgm %ext0 = zext i32 %arg0 to i64 %ext1 = zext i32 %arg1 to i64 %mul = mul i64 %ext0, %ext1 @@ -1055,6 +1206,17 @@ define i64 @mad_i64_i32_twice(i32 %arg0, i32 %arg1, i64 %arg2, i64 %arg3) #0 { ; GFX12-NEXT: v_xor_b32_e32 v0, v2, v0 ; GFX12-NEXT: v_xor_b32_e32 v1, v3, v1 ; GFX12-NEXT: s_setpc_b64 s[30:31] +; +; GFX1250-LABEL: mad_i64_i32_twice: +; GFX1250: ; %bb.0: +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: s_wait_kmcnt 0x0 +; GFX1250-NEXT: v_mad_nc_i64_i32 v[2:3], v0, v1, v[2:3] +; GFX1250-NEXT: v_mad_nc_i64_i32 v[0:1], v0, v1, v[4:5] +; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX1250-NEXT: v_xor_b32_e32 v0, v2, v0 +; GFX1250-NEXT: v_xor_b32_e32 v1, v3, v1 +; GFX1250-NEXT: s_set_pc_i64 s[30:31] %sext0 = sext i32 %arg0 to i64 %sext1 = sext i32 %arg1 to i64 %mul = mul i64 %sext0, %sext1 @@ -1174,6 +1336,26 @@ define i64 @mad_i64_i32_thrice(i32 %arg0, i32 %arg1, i64 %arg2, i64 %arg3, i64 % ; GFX12-NEXT: v_xor_b32_e32 v0, v2, v0 ; GFX12-NEXT: v_xor_b32_e32 v1, v3, v1 ; GFX12-NEXT: s_setpc_b64 s[30:31] +; +; GFX1250-LABEL: mad_i64_i32_thrice: +; GFX1250: ; %bb.0: +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: s_wait_kmcnt 0x0 +; GFX1250-NEXT: v_dual_mov_b32 v8, v1 :: v_dual_ashrrev_i32 v1, 31, v0 +; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1250-NEXT: v_ashrrev_i32_e32 v9, 31, v8 +; GFX1250-NEXT: v_mul_u64_e32 v[0:1], v[0:1], v[8:9] +; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_2) +; GFX1250-NEXT: v_add_nc_u64_e32 v[2:3], v[0:1], v[2:3] +; GFX1250-NEXT: v_add_nc_u64_e32 v[4:5], v[0:1], v[4:5] +; GFX1250-NEXT: v_add_nc_u64_e32 v[0:1], v[0:1], v[6:7] +; GFX1250-NEXT: v_xor_b32_e32 v2, v2, v4 +; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX1250-NEXT: v_xor_b32_e32 v3, v3, v5 +; GFX1250-NEXT: v_xor_b32_e32 v0, v2, v0 +; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_2) +; GFX1250-NEXT: v_xor_b32_e32 v1, v3, v1 +; GFX1250-NEXT: s_set_pc_i64 s[30:31] %sext0 = sext i32 %arg0 to i64 %sext1 = sext i32 %arg1 to i64 %mul = mul i64 %sext0, %sext1 @@ -1256,6 +1438,21 @@ define i64 @mad_i64_i32_secondary_use(i32 %arg0, i32 %arg1, i64 %arg2) #0 { ; GFX12-NEXT: v_xor_b32_e32 v0, v2, v0 ; GFX12-NEXT: v_xor_b32_e32 v1, v3, v1 ; GFX12-NEXT: s_setpc_b64 s[30:31] +; +; GFX1250-LABEL: mad_i64_i32_secondary_use: +; GFX1250: ; %bb.0: +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: s_wait_kmcnt 0x0 +; GFX1250-NEXT: v_dual_mov_b32 v4, v1 :: v_dual_ashrrev_i32 v1, 31, v0 +; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1250-NEXT: v_ashrrev_i32_e32 v5, 31, v4 +; GFX1250-NEXT: v_mul_u64_e32 v[0:1], v[0:1], v[4:5] +; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1250-NEXT: v_add_nc_u64_e32 v[2:3], v[0:1], v[2:3] +; GFX1250-NEXT: v_xor_b32_e32 v0, v2, v0 +; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_2) +; GFX1250-NEXT: v_xor_b32_e32 v1, v3, v1 +; GFX1250-NEXT: s_set_pc_i64 s[30:31] %sext0 = sext i32 %arg0 to i64 %sext1 = sext i32 %arg1 to i64 %mul = mul i64 %sext0, %sext1 @@ -1328,6 +1525,18 @@ define i48 @mad_i48_i48(i48 %arg0, i48 %arg1, i48 %arg2) #0 { ; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX12-NEXT: v_add3_u32 v1, v2, v1, v3 ; GFX12-NEXT: s_setpc_b64 s[30:31] +; +; GFX1250-LABEL: mad_i48_i48: +; GFX1250: ; %bb.0: +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: s_wait_kmcnt 0x0 +; GFX1250-NEXT: v_dual_mov_b32 v6, v0 :: v_dual_mov_b32 v7, v1 +; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1250-NEXT: v_mad_nc_u64_u32 v[0:1], v6, v2, v[4:5] +; GFX1250-NEXT: v_mad_u32 v1, v7, v2, v1 +; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1250-NEXT: v_mad_u32 v1, v6, v3, v1 +; GFX1250-NEXT: s_set_pc_i64 s[30:31] %m = mul i48 %arg0, %arg1 %a = add i48 %m, %arg2 ret i48 %a @@ -1391,6 +1600,15 @@ define i64 @lshr_mad_i64_1(i64 %arg0, i64 %arg1) #0 { ; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX12-NEXT: v_mad_co_u64_u32 v[0:1], null, 0xfffffc19, v2, v[0:1] ; GFX12-NEXT: s_setpc_b64 s[30:31] +; +; GFX1250-LABEL: lshr_mad_i64_1: +; GFX1250: ; %bb.0: +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: s_wait_kmcnt 0x0 +; GFX1250-NEXT: v_dual_mov_b32 v2, v1 :: v_dual_mov_b32 v1, 0 +; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1250-NEXT: v_mad_nc_u64_u32 v[0:1], 0xfffffc19, v2, v[0:1] +; GFX1250-NEXT: s_set_pc_i64 s[30:31] %lsh = lshr i64 %arg0, 32 %mul = mul i64 %lsh, s0xfffffffffffffc19 %mad = add i64 %mul, %arg0 @@ -1456,6 +1674,15 @@ define i64 @lshr_mad_i64_2(i64 %arg0) #0 { ; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX12-NEXT: v_mad_co_u64_u32 v[0:1], null, 0xd1, v2, v[0:1] ; GFX12-NEXT: s_setpc_b64 s[30:31] +; +; GFX1250-LABEL: lshr_mad_i64_2: +; GFX1250: ; %bb.0: +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: s_wait_kmcnt 0x0 +; GFX1250-NEXT: v_dual_mov_b32 v2, v1 :: v_dual_mov_b32 v1, 0 +; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1250-NEXT: v_mad_nc_u64_u32 v[0:1], 0xd1, v2, v[0:1] +; GFX1250-NEXT: s_set_pc_i64 s[30:31] %lsh = lshr i64 %arg0, 32 %mul = mul i64 %lsh, s0xffffffff000000d1 %mad = add i64 %mul, %arg0 @@ -1521,6 +1748,15 @@ define i64 @lshr_mad_i64_3(i64 %arg0) #0 { ; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX12-NEXT: v_mad_co_u64_u32 v[0:1], null, 0xfffffc88, v2, v[0:1] ; GFX12-NEXT: s_setpc_b64 s[30:31] +; +; GFX1250-LABEL: lshr_mad_i64_3: +; GFX1250: ; %bb.0: +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: s_wait_kmcnt 0x0 +; GFX1250-NEXT: v_dual_mov_b32 v2, v1 :: v_dual_mov_b32 v1, 0 +; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1250-NEXT: v_mad_nc_u64_u32 v[0:1], 0xfffffc88, v2, v[0:1] +; GFX1250-NEXT: s_set_pc_i64 s[30:31] %lsh = lshr i64 %arg0, 32 %mul = mul i64 s0xfffffffffffffc88, %lsh %mad = add i64 %mul, %arg0 @@ -1602,6 +1838,19 @@ define i64 @lshr_mad_i64_4(i32 %arg0, i64 %arg1) #0 { ; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX12-NEXT: v_mad_co_u64_u32 v[0:1], null, 0xfffffc88, v0, v[3:4] ; GFX12-NEXT: s_setpc_b64 s[30:31] +; +; GFX1250-LABEL: lshr_mad_i64_4: +; GFX1250: ; %bb.0: +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: s_wait_kmcnt 0x0 +; GFX1250-NEXT: v_dual_mov_b32 v3, v2 :: v_dual_mov_b32 v2, v1 +; GFX1250-NEXT: v_mov_b32_e32 v1, 0 +; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1250-NEXT: v_mul_u64_e32 v[2:3], v[2:3], v[0:1] +; GFX1250-NEXT: v_mov_b32_e32 v0, v2 +; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1250-NEXT: v_mad_nc_u64_u32 v[0:1], 0xfffffc88, v3, v[0:1] +; GFX1250-NEXT: s_set_pc_i64 s[30:31] %ext = zext i32 %arg0 to i64 %mul1 = mul i64 %arg1, %ext %lsh = lshr i64 %mul1, 32 @@ -1666,6 +1915,15 @@ define i64 @lshr_mad_i64_negative_1(i64 %arg0) #0 { ; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX12-NEXT: v_mad_co_i64_i32 v[0:1], null, 0xfffffc19, v2, v[0:1] ; GFX12-NEXT: s_setpc_b64 s[30:31] +; +; GFX1250-LABEL: lshr_mad_i64_negative_1: +; GFX1250: ; %bb.0: +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: s_wait_kmcnt 0x0 +; GFX1250-NEXT: v_lshrrev_b32_e32 v2, 4, v1 +; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1250-NEXT: v_mad_nc_i64_i32 v[0:1], 0xfffffc19, v2, v[0:1] +; GFX1250-NEXT: s_set_pc_i64 s[30:31] %lsh = lshr i64 %arg0, 36 %mul = mul i64 %lsh, s0xfffffffffffffc19 %mad = add i64 %mul, %arg0 @@ -1729,6 +1987,16 @@ define i64 @lshr_mad_i64_negative_2(i64 %arg0) #0 { ; GFX12-NEXT: v_sub_nc_u32_e32 v1, v3, v0 ; GFX12-NEXT: v_mov_b32_e32 v0, v2 ; GFX12-NEXT: s_setpc_b64 s[30:31] +; +; GFX1250-LABEL: lshr_mad_i64_negative_2: +; GFX1250: ; %bb.0: +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: s_wait_kmcnt 0x0 +; GFX1250-NEXT: v_mad_nc_u64_u32 v[2:3], 0xd1, v1, v[0:1] +; GFX1250-NEXT: v_lshlrev_b32_e32 v0, 8, v1 +; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1250-NEXT: v_dual_mov_b32 v0, v2 :: v_dual_sub_nc_u32 v1, v3, v0 +; GFX1250-NEXT: s_set_pc_i64 s[30:31] %lsh = lshr i64 %arg0, 32 %mul = mul i64 %lsh, s0xffffff00000000d1 %mad = add i64 %mul, %arg0 @@ -1803,6 +2071,18 @@ define i64 @lshr_mad_i64_negative_3(i64 %arg0) #0 { ; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_2) ; GFX12-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo ; GFX12-NEXT: s_setpc_b64 s[30:31] +; +; GFX1250-LABEL: lshr_mad_i64_negative_3: +; GFX1250: ; %bb.0: +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: s_wait_kmcnt 0x0 +; GFX1250-NEXT: v_lshrrev_b64 v[2:3], 22, v[0:1] +; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1250-NEXT: v_and_b32_e32 v2, 0xfffffc00, v2 +; GFX1250-NEXT: v_sub_nc_u64_e32 v[0:1], v[0:1], v[2:3] +; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1250-NEXT: v_add_nc_u64_e32 v[0:1], 1, v[0:1] +; GFX1250-NEXT: s_set_pc_i64 s[30:31] %op = add i64 %arg0, 1 %lsh = lshr i64 %arg0, 32 %mul = mul i64 %lsh, s0xfffffffffffffc00 @@ -1878,6 +2158,16 @@ define i64 @lshr_mad_i64_negative_4(i64 %arg0) #0 { ; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_3) ; GFX12-NEXT: v_mov_b32_e32 v0, v3 ; GFX12-NEXT: s_setpc_b64 s[30:31] +; +; GFX1250-LABEL: lshr_mad_i64_negative_4: +; GFX1250: ; %bb.0: +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: s_wait_kmcnt 0x0 +; GFX1250-NEXT: v_mad_nc_u64_u32 v[2:3], v1, v0, v[0:1] +; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX1250-NEXT: v_mad_u32 v1, v1, v1, v3 +; GFX1250-NEXT: v_mov_b32_e32 v0, v2 +; GFX1250-NEXT: s_set_pc_i64 s[30:31] %lsh = lshr i64 %arg0, 32 %mul = mul i64 %lsh, %arg0 %mad = add i64 %mul, %arg0 @@ -1938,6 +2228,16 @@ define amdgpu_ps i64 @lshr_mad_i64_sgpr(i64 inreg %arg0) #0 { ; GFX12-NEXT: s_mul_u64 s[2:3], s[2:3], s[4:5] ; GFX12-NEXT: s_add_nc_u64 s[0:1], s[2:3], s[0:1] ; GFX12-NEXT: ; return to shader part epilog +; +; GFX1250-LABEL: lshr_mad_i64_sgpr: +; GFX1250: ; %bb.0: +; GFX1250-NEXT: s_mov_b32 s3, 0 +; GFX1250-NEXT: s_mov_b32 s2, s1 +; GFX1250-NEXT: s_mov_b64 s[4:5], lit64(0xffffffffffff1c18) +; GFX1250-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1) +; GFX1250-NEXT: s_mul_u64 s[2:3], s[2:3], s[4:5] +; GFX1250-NEXT: s_add_nc_u64 s[0:1], s[2:3], s[0:1] +; GFX1250-NEXT: ; return to shader part epilog %lsh = lshr i64 %arg0, 32 %mul = mul i64 %lsh, s0xffffffffffff1c18 %mad = add i64 %mul, %arg0 @@ -2018,6 +2318,17 @@ define <2 x i64> @lshr_mad_i64_vec(<2 x i64> %arg0) #0 { ; GFX12-NEXT: v_sub_nc_u32_e32 v3, v7, v3 ; GFX12-NEXT: v_mov_b32_e32 v2, v6 ; GFX12-NEXT: s_setpc_b64 s[30:31] +; +; GFX1250-LABEL: lshr_mad_i64_vec: +; GFX1250: ; %bb.0: +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: s_wait_kmcnt 0x0 +; GFX1250-NEXT: v_mad_nc_u64_u32 v[4:5], 0xffff1c18, v1, v[0:1] +; GFX1250-NEXT: v_mad_nc_u64_u32 v[6:7], 0xffff1118, v3, v[2:3] +; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX1250-NEXT: v_dual_sub_nc_u32 v1, v5, v1 :: v_dual_sub_nc_u32 v3, v7, v3 +; GFX1250-NEXT: v_dual_mov_b32 v0, v4 :: v_dual_mov_b32 v2, v6 +; GFX1250-NEXT: s_set_pc_i64 s[30:31] %lsh = lshr <2 x i64> %arg0, %mul = mul <2 x i64> %lsh, %mad = add <2 x i64> %mul, %arg0 From 4c6afc79936f374368171324d82b31f3a7cd7102 Mon Sep 17 00:00:00 2001 From: Slava Zakharin Date: Fri, 15 Aug 2025 13:54:49 -0700 Subject: [PATCH 019/214] [flang] Lower hlfir.eoshift to the runtime call. (#153107) Straightforward lowering of hlfir.eoshift to the runtime call in LowerHLFIRIntrinsics pass. --- .../HLFIR/Transforms/LowerHLFIRIntrinsics.cpp | 53 ++-- flang/test/HLFIR/eoshift-lowering.fir | 294 ++++++++++++++++++ 2 files changed, 329 insertions(+), 18 deletions(-) create mode 100644 flang/test/HLFIR/eoshift-lowering.fir diff --git a/flang/lib/Optimizer/HLFIR/Transforms/LowerHLFIRIntrinsics.cpp b/flang/lib/Optimizer/HLFIR/Transforms/LowerHLFIRIntrinsics.cpp index 3c29d6877e8d..e0167cc12b8a 100644 --- a/flang/lib/Optimizer/HLFIR/Transforms/LowerHLFIRIntrinsics.cpp +++ b/flang/lib/Optimizer/HLFIR/Transforms/LowerHLFIRIntrinsics.cpp @@ -469,33 +469,49 @@ struct MatmulTransposeOpConversion } }; -class CShiftOpConversion : public HlfirIntrinsicConversion { - using HlfirIntrinsicConversion::HlfirIntrinsicConversion; +// A converter for hlfir.cshift and hlfir.eoshift. +template +class ArrayShiftOpConversion : public HlfirIntrinsicConversion { + using HlfirIntrinsicConversion::HlfirIntrinsicConversion; + using HlfirIntrinsicConversion::lowerArguments; + using HlfirIntrinsicConversion::processReturnValue; + using typename HlfirIntrinsicConversion::IntrinsicArgument; llvm::LogicalResult - matchAndRewrite(hlfir::CShiftOp cshift, - mlir::PatternRewriter &rewriter) const override { - fir::FirOpBuilder builder{rewriter, cshift.getOperation()}; - const mlir::Location &loc = cshift->getLoc(); + matchAndRewrite(T op, mlir::PatternRewriter &rewriter) const override { + fir::FirOpBuilder builder{rewriter, op.getOperation()}; + const mlir::Location &loc = op->getLoc(); - llvm::SmallVector inArgs; - mlir::Value array = cshift.getArray(); + llvm::SmallVector inArgs; + llvm::StringRef intrinsicName{[]() { + if constexpr (std::is_same_v) + return "eoshift"; + else if constexpr (std::is_same_v) + return "cshift"; + else + llvm_unreachable("unsupported array shift"); + }()}; + + mlir::Value array = op.getArray(); inArgs.push_back({array, array.getType()}); - mlir::Value shift = cshift.getShift(); + mlir::Value shift = op.getShift(); inArgs.push_back({shift, shift.getType()}); - inArgs.push_back({cshift.getDim(), builder.getI32Type()}); + if constexpr (std::is_same_v) { + mlir::Value boundary = op.getBoundary(); + inArgs.push_back({boundary, boundary ? boundary.getType() : nullptr}); + } + inArgs.push_back({op.getDim(), builder.getI32Type()}); - auto *argLowering = fir::getIntrinsicArgumentLowering("cshift"); + auto *argLowering = fir::getIntrinsicArgumentLowering(intrinsicName); llvm::SmallVector args = - lowerArguments(cshift, inArgs, rewriter, argLowering); + lowerArguments(op, inArgs, rewriter, argLowering); - mlir::Type scalarResultType = - hlfir::getFortranElementType(cshift.getType()); + mlir::Type scalarResultType = hlfir::getFortranElementType(op.getType()); - auto [resultExv, mustBeFreed] = - fir::genIntrinsicCall(builder, loc, "cshift", scalarResultType, args); + auto [resultExv, mustBeFreed] = fir::genIntrinsicCall( + builder, loc, intrinsicName, scalarResultType, args); - processReturnValue(cshift, resultExv, mustBeFreed, builder, rewriter); + processReturnValue(op, resultExv, mustBeFreed, builder, rewriter); return mlir::success(); } }; @@ -547,7 +563,8 @@ public: AnyOpConversion, SumOpConversion, ProductOpConversion, TransposeOpConversion, CountOpConversion, DotProductOpConversion, MaxvalOpConversion, MinvalOpConversion, MinlocOpConversion, - MaxlocOpConversion, CShiftOpConversion, ReshapeOpConversion>(context); + MaxlocOpConversion, ArrayShiftOpConversion, + ArrayShiftOpConversion, ReshapeOpConversion>(context); // While conceptually this pass is performing dialect conversion, we use // pattern rewrites here instead of dialect conversion because this pass diff --git a/flang/test/HLFIR/eoshift-lowering.fir b/flang/test/HLFIR/eoshift-lowering.fir new file mode 100644 index 000000000000..7bfc3e21f052 --- /dev/null +++ b/flang/test/HLFIR/eoshift-lowering.fir @@ -0,0 +1,294 @@ +// Test hlfir.eoshift operation lowering to fir runtime call +// RUN: fir-opt %s -lower-hlfir-intrinsics | FileCheck %s + +// 1d boxed vector shift by scalar +func.func @eoshift1(%arg0: !fir.box> {fir.bindc_name = "a"}, %arg1: !fir.ref {fir.bindc_name = "sh"}) { + %0:2 = hlfir.declare %arg0 {uniq_name = "a"} : (!fir.box>) -> (!fir.box>, !fir.box>) + %1:2 = hlfir.declare %arg1 {uniq_name = "sh"} : (!fir.ref) -> (!fir.ref, !fir.ref) + %2 = hlfir.eoshift %0#0 %1#0 : (!fir.box>, !fir.ref) -> !hlfir.expr + hlfir.assign %2 to %0#0 : !hlfir.expr, !fir.box> + return +} +// CHECK-LABEL: func.func @eoshift1( +// CHECK-SAME: %[[VAL_0:.*]]: !fir.box> {fir.bindc_name = "a"}, +// CHECK-SAME: %[[VAL_1:.*]]: !fir.ref {fir.bindc_name = "sh"}) { +// CHECK: %[[VAL_2:.*]] = arith.constant true +// CHECK: %[[VAL_4:.*]] = arith.constant 0 : index +// CHECK: %[[VAL_5:.*]] = fir.alloca !fir.box>> +// CHECK: %[[VAL_6:.*]]:2 = hlfir.declare %[[VAL_0]] {uniq_name = "a"} : (!fir.box>) -> (!fir.box>, !fir.box>) +// CHECK: %[[VAL_7:.*]]:2 = hlfir.declare %[[VAL_1]] {uniq_name = "sh"} : (!fir.ref) -> (!fir.ref, !fir.ref) +// CHECK: %[[VAL_8:.*]] = fir.zero_bits !fir.heap> +// CHECK: %[[VAL_9:.*]] = fir.shape %[[VAL_4]] : (index) -> !fir.shape<1> +// CHECK: %[[VAL_10:.*]] = fir.embox %[[VAL_8]](%[[VAL_9]]) : (!fir.heap>, !fir.shape<1>) -> !fir.box>> +// CHECK: fir.store %[[VAL_10]] to %[[VAL_5]] : !fir.ref>>> +// CHECK: %[[BOUNDARY:.*]] = fir.absent !fir.box +// CHECK: %[[VAL_11:.*]] = fir.load %[[VAL_7]]#0 : !fir.ref +// CHECK: %[[VAL_13:.*]] = fir.convert %[[VAL_5]] : (!fir.ref>>>) -> !fir.ref> +// CHECK: %[[VAL_14:.*]] = fir.convert %[[VAL_6]]#1 : (!fir.box>) -> !fir.box +// CHECK: %[[VAL_15:.*]] = fir.convert %[[VAL_11]] : (i32) -> i64 +// CHECK: fir.call @_FortranAEoshiftVector(%[[VAL_13]], %[[VAL_14]], %[[VAL_15]], %[[BOUNDARY]], %{{.*}}, %{{.*}}) : (!fir.ref>, !fir.box, i64, !fir.box, !fir.ref, i32) -> () + +// 2d boxed array shift by scalar +func.func @eoshift2(%arg0: !fir.box> {fir.bindc_name = "a"}, %arg1: i32 {fir.bindc_name = "sh"}) { + %0:2 = hlfir.declare %arg0 {uniq_name = "a"} : (!fir.box>) -> (!fir.box>, !fir.box>) + %2 = hlfir.eoshift %0#0 %arg1 : (!fir.box>, i32) -> !hlfir.expr + hlfir.assign %2 to %0#0 : !hlfir.expr, !fir.box> + return +} +// CHECK-LABEL: func.func @eoshift2( +// CHECK-SAME: %[[VAL_0:.*]]: !fir.box> {fir.bindc_name = "a"}, +// CHECK-SAME: %[[VAL_1:.*]]: i32 {fir.bindc_name = "sh"}) { +// CHECK: %[[VAL_2:.*]] = arith.constant true +// CHECK: %[[VAL_4:.*]] = arith.constant 1 : index +// CHECK: %[[VAL_5:.*]] = arith.constant 0 : index +// CHECK: %[[VAL_6:.*]] = fir.alloca !fir.box>> +// CHECK: %[[VAL_7:.*]] = fir.alloca i32 +// CHECK: %[[VAL_8:.*]]:2 = hlfir.declare %[[VAL_0]] {uniq_name = "a"} : (!fir.box>) -> (!fir.box>, !fir.box>) +// CHECK: fir.store %[[VAL_1]] to %[[VAL_7]] : !fir.ref +// CHECK: %[[VAL_9:.*]] = fir.zero_bits !fir.heap> +// CHECK: %[[VAL_10:.*]] = fir.shape %[[VAL_5]], %[[VAL_5]] : (index, index) -> !fir.shape<2> +// CHECK: %[[VAL_11:.*]] = fir.embox %[[VAL_9]](%[[VAL_10]]) : (!fir.heap>, !fir.shape<2>) -> !fir.box>> +// CHECK: fir.store %[[VAL_11]] to %[[VAL_6]] : !fir.ref>>> +// CHECK: %[[BOUNDARY:.*]] = fir.absent !fir.box +// CHECK: %[[VAL_12:.*]] = fir.embox %[[VAL_7]] : (!fir.ref) -> !fir.box +// CHECK: %[[VAL_14:.*]] = fir.convert %[[VAL_6]] : (!fir.ref>>>) -> !fir.ref> +// CHECK: %[[VAL_15:.*]] = fir.convert %[[VAL_8]]#1 : (!fir.box>) -> !fir.box +// CHECK: %[[VAL_16:.*]] = fir.convert %[[VAL_12]] : (!fir.box) -> !fir.box +// CHECK: %[[VAL_17:.*]] = fir.convert %[[VAL_4]] : (index) -> i32 +// CHECK: fir.call @_FortranAEoshift(%[[VAL_14]], %[[VAL_15]], %[[VAL_16]], %[[BOUNDARY]], %[[VAL_17]], %{{.*}}, %{{.*}}) : (!fir.ref>, !fir.box, !fir.box, !fir.box, i32, !fir.ref, i32) -> () + +// 2d boxed array shift by boxed array +func.func @eoshift3(%arg0: !fir.box> {fir.bindc_name = "a"}, %arg1: !fir.box> {fir.bindc_name = "sh"}) { + %0:2 = hlfir.declare %arg0 {uniq_name = "a"} : (!fir.box>) -> (!fir.box>, !fir.box>) + %1:2 = hlfir.declare %arg1 {uniq_name = "sh"} : (!fir.box>) -> (!fir.box>, !fir.box>) + %2 = hlfir.eoshift %0#0 %1#0 : (!fir.box>, !fir.box>) -> !hlfir.expr + hlfir.assign %2 to %0#0 : !hlfir.expr, !fir.box> + return +} +// CHECK-LABEL: func.func @eoshift3( +// CHECK-SAME: %[[VAL_0:.*]]: !fir.box> {fir.bindc_name = "a"}, +// CHECK-SAME: %[[VAL_1:.*]]: !fir.box> {fir.bindc_name = "sh"}) { +// CHECK: %[[VAL_2:.*]] = arith.constant true +// CHECK: %[[VAL_4:.*]] = arith.constant 1 : index +// CHECK: %[[VAL_5:.*]] = arith.constant 0 : index +// CHECK: %[[VAL_6:.*]] = fir.alloca !fir.box>> +// CHECK: %[[VAL_7:.*]]:2 = hlfir.declare %[[VAL_0]] {uniq_name = "a"} : (!fir.box>) -> (!fir.box>, !fir.box>) +// CHECK: %[[VAL_8:.*]]:2 = hlfir.declare %[[VAL_1]] {uniq_name = "sh"} : (!fir.box>) -> (!fir.box>, !fir.box>) +// CHECK: %[[VAL_9:.*]] = fir.zero_bits !fir.heap> +// CHECK: %[[VAL_10:.*]] = fir.shape %[[VAL_5]], %[[VAL_5]] : (index, index) -> !fir.shape<2> +// CHECK: %[[VAL_11:.*]] = fir.embox %[[VAL_9]](%[[VAL_10]]) : (!fir.heap>, !fir.shape<2>) -> !fir.box>> +// CHECK: fir.store %[[VAL_11]] to %[[VAL_6]] : !fir.ref>>> +// CHECK: %[[BOUNDARY:.*]] = fir.absent !fir.box +// CHECK: %[[VAL_13:.*]] = fir.convert %[[VAL_6]] : (!fir.ref>>>) -> !fir.ref> +// CHECK: %[[VAL_14:.*]] = fir.convert %[[VAL_7]]#1 : (!fir.box>) -> !fir.box +// CHECK: %[[VAL_15:.*]] = fir.convert %[[VAL_8]]#1 : (!fir.box>) -> !fir.box +// CHECK: %[[VAL_16:.*]] = fir.convert %[[VAL_4]] : (index) -> i32 +// CHECK: fir.call @_FortranAEoshift(%[[VAL_13]], %[[VAL_14]], %[[VAL_15]], %[[BOUNDARY]], %[[VAL_16]], %{{.*}}, %{{.*}}) : (!fir.ref>, !fir.box, !fir.box, !fir.box, i32, !fir.ref, i32) -> () + +// 2d boxed array shift by array expr +func.func @eoshift4(%arg0: !fir.box> {fir.bindc_name = "a"}, %arg1: !hlfir.expr {fir.bindc_name = "sh"}) { + %0:2 = hlfir.declare %arg0 {uniq_name = "a"} : (!fir.box>) -> (!fir.box>, !fir.box>) + %2 = hlfir.eoshift %0#0 %arg1 : (!fir.box>, !hlfir.expr) -> !hlfir.expr + hlfir.assign %2 to %0#0 : !hlfir.expr, !fir.box> + return +} +// CHECK-LABEL: func.func @eoshift4( +// CHECK-SAME: %[[VAL_0:.*]]: !fir.box> {fir.bindc_name = "a"}, +// CHECK-SAME: %[[VAL_1:.*]]: !hlfir.expr {fir.bindc_name = "sh"}) { +// CHECK: %[[VAL_2:.*]] = arith.constant true +// CHECK: %[[VAL_4:.*]] = arith.constant 1 : index +// CHECK: %[[VAL_5:.*]] = arith.constant 0 : index +// CHECK: %[[VAL_6:.*]] = fir.alloca !fir.box>> +// CHECK: %[[VAL_7:.*]]:2 = hlfir.declare %[[VAL_0]] {uniq_name = "a"} : (!fir.box>) -> (!fir.box>, !fir.box>) +// CHECK: %[[VAL_8:.*]] = hlfir.shape_of %[[VAL_1]] : (!hlfir.expr) -> !fir.shape<1> +// CHECK: %[[VAL_9:.*]]:3 = hlfir.associate %[[VAL_1]](%[[VAL_8]]) {adapt.valuebyref} : (!hlfir.expr, !fir.shape<1>) -> (!fir.box>, !fir.ref>, i1) +// CHECK: %[[VAL_10:.*]] = hlfir.get_extent %[[VAL_8]] {dim = 0 : index} : (!fir.shape<1>) -> index +// CHECK: %[[VAL_11:.*]] = fir.zero_bits !fir.heap> +// CHECK: %[[VAL_12:.*]] = fir.shape %[[VAL_5]], %[[VAL_5]] : (index, index) -> !fir.shape<2> +// CHECK: %[[VAL_13:.*]] = fir.embox %[[VAL_11]](%[[VAL_12]]) : (!fir.heap>, !fir.shape<2>) -> !fir.box>> +// CHECK: fir.store %[[VAL_13]] to %[[VAL_6]] : !fir.ref>>> +// CHECK: %[[BOUNDARY:.*]] = fir.absent !fir.box +// CHECK: %[[VAL_14:.*]] = fir.shape %[[VAL_10]] : (index) -> !fir.shape<1> +// CHECK: %[[VAL_15:.*]] = fir.embox %[[VAL_9]]#1(%[[VAL_14]]) : (!fir.ref>, !fir.shape<1>) -> !fir.box> +// CHECK: %[[VAL_17:.*]] = fir.convert %[[VAL_6]] : (!fir.ref>>>) -> !fir.ref> +// CHECK: %[[VAL_18:.*]] = fir.convert %[[VAL_7]]#1 : (!fir.box>) -> !fir.box +// CHECK: %[[VAL_19:.*]] = fir.convert %[[VAL_15]] : (!fir.box>) -> !fir.box +// CHECK: %[[VAL_20:.*]] = fir.convert %[[VAL_4]] : (index) -> i32 +// CHECK: fir.call @_FortranAEoshift(%[[VAL_17]], %[[VAL_18]], %[[VAL_19]], %[[BOUNDARY]], %[[VAL_20]], %{{.*}}, %{{.*}}) : (!fir.ref>, !fir.box, !fir.box, !fir.box, i32, !fir.ref, i32) -> () + +// 2d array expr shift by array expr +func.func @eoshift5(%arg0: !hlfir.expr {fir.bindc_name = "a"}, %arg1: !hlfir.expr {fir.bindc_name = "sh"}) { + %2 = hlfir.eoshift %arg0 %arg1 : (!hlfir.expr, !hlfir.expr) -> !hlfir.expr + hlfir.destroy %2 : !hlfir.expr + return +} +// CHECK-LABEL: func.func @eoshift5( +// CHECK-SAME: %[[VAL_0:.*]]: !hlfir.expr {fir.bindc_name = "a"}, +// CHECK-SAME: %[[VAL_1:.*]]: !hlfir.expr {fir.bindc_name = "sh"}) { +// CHECK: %[[VAL_2:.*]] = arith.constant true +// CHECK: %[[VAL_4:.*]] = arith.constant 1 : index +// CHECK: %[[VAL_5:.*]] = arith.constant 0 : index +// CHECK: %[[VAL_6:.*]] = fir.alloca !fir.box>> +// CHECK: %[[VAL_7:.*]] = hlfir.shape_of %[[VAL_0]] : (!hlfir.expr) -> !fir.shape<2> +// CHECK: %[[VAL_8:.*]]:3 = hlfir.associate %[[VAL_0]](%[[VAL_7]]) {adapt.valuebyref} : (!hlfir.expr, !fir.shape<2>) -> (!fir.box>, !fir.ref>, i1) +// CHECK: %[[VAL_9:.*]] = hlfir.get_extent %[[VAL_7]] {dim = 0 : index} : (!fir.shape<2>) -> index +// CHECK: %[[VAL_10:.*]] = hlfir.get_extent %[[VAL_7]] {dim = 1 : index} : (!fir.shape<2>) -> index +// CHECK: %[[VAL_14:.*]] = fir.shape %[[VAL_9]], %[[VAL_10]] : (index, index) -> !fir.shape<2> +// CHECK: %[[VAL_15:.*]] = fir.embox %[[VAL_8]]#1(%[[VAL_14]]) : (!fir.ref>, !fir.shape<2>) -> !fir.box> +// CHECK: %[[VAL_11:.*]] = hlfir.shape_of %[[VAL_1]] : (!hlfir.expr) -> !fir.shape<1> +// CHECK: %[[VAL_12:.*]]:3 = hlfir.associate %[[VAL_1]](%[[VAL_11]]) {adapt.valuebyref} : (!hlfir.expr, !fir.shape<1>) -> (!fir.box>, !fir.ref>, i1) +// CHECK: %[[VAL_13:.*]] = hlfir.get_extent %[[VAL_11]] {dim = 0 : index} : (!fir.shape<1>) -> index +// CHECK: %[[VAL_16:.*]] = fir.zero_bits !fir.heap> +// CHECK: %[[VAL_17:.*]] = fir.shape %[[VAL_5]], %[[VAL_5]] : (index, index) -> !fir.shape<2> +// CHECK: %[[VAL_18:.*]] = fir.embox %[[VAL_16]](%[[VAL_17]]) : (!fir.heap>, !fir.shape<2>) -> !fir.box>> +// CHECK: fir.store %[[VAL_18]] to %[[VAL_6]] : !fir.ref>>> +// CHECK: %[[BOUNDARY:.*]] = fir.absent !fir.box +// CHECK: %[[VAL_19:.*]] = fir.shape %[[VAL_13]] : (index) -> !fir.shape<1> +// CHECK: %[[VAL_20:.*]] = fir.embox %[[VAL_12]]#1(%[[VAL_19]]) : (!fir.ref>, !fir.shape<1>) -> !fir.box> +// CHECK: %[[VAL_22:.*]] = fir.convert %[[VAL_6]] : (!fir.ref>>>) -> !fir.ref> +// CHECK: %[[VAL_23:.*]] = fir.convert %[[VAL_15]] : (!fir.box>) -> !fir.box +// CHECK: %[[VAL_24:.*]] = fir.convert %[[VAL_20]] : (!fir.box>) -> !fir.box +// CHECK: %[[VAL_25:.*]] = fir.convert %[[VAL_4]] : (index) -> i32 +// CHECK: fir.call @_FortranAEoshift(%[[VAL_22]], %[[VAL_23]], %[[VAL_24]], %[[BOUNDARY]], %[[VAL_25]], %{{.*}}, %{{.*}}) : (!fir.ref>, !fir.box, !fir.box, !fir.box, i32, !fir.ref, i32) -> () + +// 2d array expr shift by array expr with explicit dim +func.func @eoshift6(%arg0: !hlfir.expr {fir.bindc_name = "a"}, %arg1: !hlfir.expr {fir.bindc_name = "sh"}, %dim : i16) { + %2 = hlfir.eoshift %arg0 %arg1 dim %dim : (!hlfir.expr, !hlfir.expr, i16) -> !hlfir.expr + hlfir.destroy %2 : !hlfir.expr + return +} +// CHECK-LABEL: func.func @eoshift6( +// CHECK-SAME: %[[VAL_0:.*]]: !hlfir.expr {fir.bindc_name = "a"}, +// CHECK-SAME: %[[VAL_1:.*]]: !hlfir.expr {fir.bindc_name = "sh"}, +// CHECK-SAME: %[[VAL_2:.*]]: i16) { +// CHECK: %[[VAL_3:.*]] = arith.constant true +// CHECK: %[[VAL_4:.*]] = arith.constant 1 : index +// CHECK: %[[VAL_6:.*]] = arith.constant 0 : index +// CHECK: %[[VAL_7:.*]] = fir.alloca !fir.box>> +// CHECK: %[[VAL_8:.*]] = hlfir.shape_of %[[VAL_0]] : (!hlfir.expr) -> !fir.shape<2> +// CHECK: %[[VAL_9:.*]]:3 = hlfir.associate %[[VAL_0]](%[[VAL_8]]) {adapt.valuebyref} : (!hlfir.expr, !fir.shape<2>) -> (!fir.box>, !fir.ref>, i1) +// CHECK: %[[VAL_10:.*]] = hlfir.get_extent %[[VAL_8]] {dim = 0 : index} : (!fir.shape<2>) -> index +// CHECK: %[[VAL_11:.*]] = hlfir.get_extent %[[VAL_8]] {dim = 1 : index} : (!fir.shape<2>) -> index +// CHECK: %[[VAL_16:.*]] = fir.shape %[[VAL_10]], %[[VAL_11]] : (index, index) -> !fir.shape<2> +// CHECK: %[[VAL_17:.*]] = fir.embox %[[VAL_9]]#1(%[[VAL_16]]) : (!fir.ref>, !fir.shape<2>) -> !fir.box> +// CHECK: %[[VAL_12:.*]] = hlfir.shape_of %[[VAL_1]] : (!hlfir.expr) -> !fir.shape<1> +// CHECK: %[[VAL_13:.*]]:3 = hlfir.associate %[[VAL_1]](%[[VAL_12]]) {adapt.valuebyref} : (!hlfir.expr, !fir.shape<1>) -> (!fir.box>, !fir.ref>, i1) +// CHECK: %[[VAL_14:.*]] = hlfir.get_extent %[[VAL_12]] {dim = 0 : index} : (!fir.shape<1>) -> index +// CHECK: %[[VAL_15:.*]] = fir.convert %[[VAL_2]] : (i16) -> i32 +// CHECK: %[[VAL_18:.*]] = fir.zero_bits !fir.heap> +// CHECK: %[[VAL_19:.*]] = fir.shape %[[VAL_6]], %[[VAL_6]] : (index, index) -> !fir.shape<2> +// CHECK: %[[VAL_20:.*]] = fir.embox %[[VAL_18]](%[[VAL_19]]) : (!fir.heap>, !fir.shape<2>) -> !fir.box>> +// CHECK: fir.store %[[VAL_20]] to %[[VAL_7]] : !fir.ref>>> +// CHECK: %[[BOUNDARY:.*]] = fir.absent !fir.box +// CHECK: %[[VAL_21:.*]] = fir.shape %[[VAL_14]] : (index) -> !fir.shape<1> +// CHECK: %[[VAL_22:.*]] = fir.embox %[[VAL_13]]#1(%[[VAL_21]]) : (!fir.ref>, !fir.shape<1>) -> !fir.box> +// CHECK: %[[VAL_24:.*]] = fir.convert %[[VAL_7]] : (!fir.ref>>>) -> !fir.ref> +// CHECK: %[[VAL_25:.*]] = fir.convert %[[VAL_17]] : (!fir.box>) -> !fir.box +// CHECK: %[[VAL_26:.*]] = fir.convert %[[VAL_22]] : (!fir.box>) -> !fir.box +// CHECK: fir.call @_FortranAEoshift(%[[VAL_24]], %[[VAL_25]], %[[VAL_26]], %[[BOUNDARY]], %[[VAL_15]], %{{.*}}, %{{.*}}) : (!fir.ref>, !fir.box, !fir.box, !fir.box, i32, !fir.ref, i32) -> () + +// shift of polymorphic array +func.func @eoshift7(%arg0: !fir.ref>>>>, %arg1: !fir.ref) { + %0 = fir.dummy_scope : !fir.dscope + %1:2 = hlfir.declare %arg0 dummy_scope %0 {fortran_attrs = #fir.var_attrs, uniq_name = "a"} : (!fir.ref>>>>, !fir.dscope) -> (!fir.ref>>>>, !fir.ref>>>>) + %2:2 = hlfir.declare %arg1 dummy_scope %0 {uniq_name = "sh"} : (!fir.ref, !fir.dscope) -> (!fir.ref, !fir.ref) + %c2_i32 = arith.constant 2 : i32 + %3 = fir.load %1#0 : !fir.ref>>>> + %4 = hlfir.eoshift %3 %c2_i32 : (!fir.class>>>, i32) -> !hlfir.expr?> + hlfir.assign %4 to %1#0 realloc : !hlfir.expr?>, !fir.ref>>>> + hlfir.destroy %4 : !hlfir.expr?> + return +} +// CHECK-LABEL: func.func @eoshift7( +// CHECK-SAME: %[[VAL_0:.*]]: !fir.ref>>>>, +// CHECK-SAME: %[[VAL_1:.*]]: !fir.ref) { +// CHECK: %[[VAL_2:.*]] = arith.constant true +// CHECK: %[[VAL_4:.*]] = arith.constant 0 : index +// CHECK: %[[VAL_5:.*]] = arith.constant 2 : i32 +// CHECK: %[[VAL_6:.*]] = fir.alloca !fir.class>>> +// CHECK: %[[VAL_7:.*]] = fir.alloca i32 +// CHECK: %[[VAL_8:.*]] = fir.dummy_scope : !fir.dscope +// CHECK: %[[VAL_9:.*]]:2 = hlfir.declare %[[VAL_0]] dummy_scope %[[VAL_8]] {fortran_attrs = #fir.var_attrs, uniq_name = "a"} : (!fir.ref>>>>, !fir.dscope) -> (!fir.ref>>>>, !fir.ref>>>>) +// CHECK: %[[VAL_10:.*]]:2 = hlfir.declare %[[VAL_1]] dummy_scope %[[VAL_8]] {uniq_name = "sh"} : (!fir.ref, !fir.dscope) -> (!fir.ref, !fir.ref) +// CHECK: %[[VAL_11:.*]] = fir.load %[[VAL_9]]#0 : !fir.ref>>>> +// CHECK: fir.store %[[VAL_5]] to %[[VAL_7]] : !fir.ref +// CHECK: %[[VAL_12:.*]] = fir.zero_bits !fir.heap>> +// CHECK: %[[VAL_13:.*]] = fir.shape %[[VAL_4]] : (index) -> !fir.shape<1> +// CHECK: %[[VAL_14:.*]] = fir.embox %[[VAL_12]](%[[VAL_13]]) source_box %[[VAL_11]] : (!fir.heap>>, !fir.shape<1>, !fir.class>>>) -> !fir.class>>> +// CHECK: fir.store %[[VAL_14]] to %[[VAL_6]] : !fir.ref>>>> +// CHECK: %[[BOUNDARY:.*]] = fir.absent !fir.box +// CHECK: %[[VAL_15:.*]] = fir.load %[[VAL_7]] : !fir.ref +// CHECK: %[[VAL_17:.*]] = fir.convert %[[VAL_6]] : (!fir.ref>>>>) -> !fir.ref> +// CHECK: %[[VAL_18:.*]] = fir.convert %[[VAL_11]] : (!fir.class>>>) -> !fir.box +// CHECK: %[[VAL_19:.*]] = fir.convert %[[VAL_15]] : (i32) -> i64 +// CHECK: fir.call @_FortranAEoshiftVector(%[[VAL_17]], %[[VAL_18]], %[[VAL_19]], %[[BOUNDARY]], %{{.*}}, %{{.*}}) : (!fir.ref>, !fir.box, i64, !fir.box, !fir.ref, i32) -> () + +// shift with the present scalar boundary and dim +func.func @_QPeoshift8(%arg0: !fir.box> {fir.bindc_name = "array"}) { + %cst = arith.constant 3.000000e+00 : f32 + %c2_i32 = arith.constant 2 : i32 + %0 = fir.dummy_scope : !fir.dscope + %1:2 = hlfir.declare %arg0 dummy_scope %0 {uniq_name = "_QFeoshift8Earray"} : (!fir.box>, !fir.dscope) -> (!fir.box>, !fir.box>) + %2 = hlfir.eoshift %1#0 %c2_i32 boundary %cst dim %c2_i32 : (!fir.box>, i32, f32, i32) -> !hlfir.expr + hlfir.assign %2 to %1#0 : !hlfir.expr, !fir.box> + hlfir.destroy %2 : !hlfir.expr + return +} +// CHECK-LABEL: func.func @_QPeoshift8( +// CHECK-SAME: %[[ARG0:.*]]: !fir.box> {fir.bindc_name = "array"}) { +// CHECK: %[[VAL_3:.*]] = arith.constant 0 : index +// CHECK: %[[VAL_4:.*]] = arith.constant 2 : i32 +// CHECK: %[[VAL_5:.*]] = arith.constant 3.000000e+00 : f32 +// CHECK: %[[VAL_6:.*]] = fir.alloca !fir.box>> +// CHECK: %[[VAL_7:.*]] = fir.alloca f32 +// CHECK: %[[VAL_8:.*]] = fir.alloca i32 +// CHECK: %[[VAL_9:.*]] = fir.dummy_scope : !fir.dscope +// CHECK: %[[VAL_10:.*]]:2 = hlfir.declare %[[ARG0]] dummy_scope %[[VAL_9]] {uniq_name = "_QFeoshift8Earray"} : (!fir.box>, !fir.dscope) -> (!fir.box>, !fir.box>) +// CHECK: fir.store %[[VAL_4]] to %[[VAL_8]] : !fir.ref +// CHECK: fir.store %[[VAL_5]] to %[[VAL_7]] : !fir.ref +// CHECK: %[[VAL_11:.*]] = fir.embox %[[VAL_7]] : (!fir.ref) -> !fir.box +// CHECK: %[[VAL_12:.*]] = fir.zero_bits !fir.heap> +// CHECK: %[[VAL_13:.*]] = fir.shape %[[VAL_3]], %[[VAL_3]] : (index, index) -> !fir.shape<2> +// CHECK: %[[VAL_14:.*]] = fir.embox %[[VAL_12]](%[[VAL_13]]) : (!fir.heap>, !fir.shape<2>) -> !fir.box>> +// CHECK: fir.store %[[VAL_14]] to %[[VAL_6]] : !fir.ref>>> +// CHECK: %[[VAL_15:.*]] = fir.embox %[[VAL_8]] : (!fir.ref) -> !fir.box +// CHECK: %[[VAL_17:.*]] = fir.convert %[[VAL_6]] : (!fir.ref>>>) -> !fir.ref> +// CHECK: %[[VAL_18:.*]] = fir.convert %[[VAL_10]]#1 : (!fir.box>) -> !fir.box +// CHECK: %[[VAL_19:.*]] = fir.convert %[[VAL_15]] : (!fir.box) -> !fir.box +// CHECK: %[[VAL_20:.*]] = fir.convert %[[VAL_11]] : (!fir.box) -> !fir.box +// CHECK: fir.call @_FortranAEoshift(%[[VAL_17]], %[[VAL_18]], %[[VAL_19]], %[[VAL_20]], %[[VAL_4]], %{{.*}}, %{{.*}}) : (!fir.ref>, !fir.box, !fir.box, !fir.box, i32, !fir.ref, i32) -> () + +// shift with the present array boundary +func.func @_QPeoshift9(%arg0: !fir.box> {fir.bindc_name = "array"}, %arg1: !fir.box> {fir.bindc_name = "boundary"}) { + %c2_i32 = arith.constant 2 : i32 + %0 = fir.dummy_scope : !fir.dscope + %1:2 = hlfir.declare %arg0 dummy_scope %0 {uniq_name = "_QFeoshift9Earray"} : (!fir.box>, !fir.dscope) -> (!fir.box>, !fir.box>) + %2:2 = hlfir.declare %arg1 dummy_scope %0 {uniq_name = "_QFeoshift9Eboundary"} : (!fir.box>, !fir.dscope) -> (!fir.box>, !fir.box>) + %3 = hlfir.eoshift %1#0 %c2_i32 boundary %2#0 : (!fir.box>, i32, !fir.box>) -> !hlfir.expr + hlfir.assign %3 to %1#0 : !hlfir.expr, !fir.box> + hlfir.destroy %3 : !hlfir.expr + return +} +// CHECK-LABEL: func.func @_QPeoshift9( +// CHECK-SAME: %[[ARG0:.*]]: !fir.box> {fir.bindc_name = "array"}, +// CHECK-SAME: %[[ARG1:.*]]: !fir.box> {fir.bindc_name = "boundary"}) { +// CHECK: %[[VAL_2:.*]] = arith.constant 1 : index +// CHECK: %[[VAL_3:.*]] = arith.constant 0 : index +// CHECK: %[[VAL_4:.*]] = arith.constant 2 : i32 +// CHECK: %[[VAL_5:.*]] = fir.alloca !fir.box>> +// CHECK: %[[VAL_6:.*]] = fir.alloca i32 +// CHECK: %[[VAL_7:.*]] = fir.dummy_scope : !fir.dscope +// CHECK: %[[VAL_8:.*]]:2 = hlfir.declare %[[ARG0]] dummy_scope %[[VAL_7]] {uniq_name = "_QFeoshift9Earray"} : (!fir.box>, !fir.dscope) -> (!fir.box>, !fir.box>) +// CHECK: %[[VAL_9:.*]]:2 = hlfir.declare %[[ARG1]] dummy_scope %[[VAL_7]] {uniq_name = "_QFeoshift9Eboundary"} : (!fir.box>, !fir.dscope) -> (!fir.box>, !fir.box>) +// CHECK: fir.store %[[VAL_4]] to %[[VAL_6]] : !fir.ref +// CHECK: %[[VAL_10:.*]] = fir.zero_bits !fir.heap> +// CHECK: %[[VAL_11:.*]] = fir.shape %[[VAL_3]], %[[VAL_3]] : (index, index) -> !fir.shape<2> +// CHECK: %[[VAL_12:.*]] = fir.embox %[[VAL_10]](%[[VAL_11]]) : (!fir.heap>, !fir.shape<2>) -> !fir.box>> +// CHECK: fir.store %[[VAL_12]] to %[[VAL_5]] : !fir.ref>>> +// CHECK: %[[VAL_13:.*]] = fir.embox %[[VAL_6]] : (!fir.ref) -> !fir.box +// CHECK: %[[VAL_15:.*]] = fir.convert %[[VAL_5]] : (!fir.ref>>>) -> !fir.ref> +// CHECK: %[[VAL_16:.*]] = fir.convert %[[VAL_8]]#1 : (!fir.box>) -> !fir.box +// CHECK: %[[VAL_17:.*]] = fir.convert %[[VAL_13]] : (!fir.box) -> !fir.box +// CHECK: %[[VAL_18:.*]] = fir.convert %[[VAL_9]]#1 : (!fir.box>) -> !fir.box +// CHECK: %[[VAL_19:.*]] = fir.convert %[[VAL_2]] : (index) -> i32 +// CHECK: fir.call @_FortranAEoshift(%[[VAL_15]], %[[VAL_16]], %[[VAL_17]], %[[VAL_18]], %[[VAL_19]], %{{.*}}, %{{.*}}) : (!fir.ref>, !fir.box, !fir.box, !fir.box, i32, !fir.ref, i32) -> () From 25285b3476292fea239fdab945ca39d156c782d5 Mon Sep 17 00:00:00 2001 From: Slava Zakharin Date: Fri, 15 Aug 2025 13:55:05 -0700 Subject: [PATCH 020/214] [flang] Lower EOSHIFT into hlfir.eoshift. (#153106) Straightforward lowering of EOSHIFT intrinsic into the new hlfir.eoshift operation. --- flang/lib/Lower/HlfirIntrinsics.cpp | 41 +++++ flang/test/Lower/HLFIR/eoshift.f90 | 259 ++++++++++++++++++++++++++++ 2 files changed, 300 insertions(+) create mode 100644 flang/test/Lower/HLFIR/eoshift.f90 diff --git a/flang/lib/Lower/HlfirIntrinsics.cpp b/flang/lib/Lower/HlfirIntrinsics.cpp index 6e1d06a25924..3b0f2e35cd5b 100644 --- a/flang/lib/Lower/HlfirIntrinsics.cpp +++ b/flang/lib/Lower/HlfirIntrinsics.cpp @@ -170,6 +170,17 @@ protected: mlir::Type stmtResultType) override; }; +class HlfirEOShiftLowering : public HlfirTransformationalIntrinsic { +public: + using HlfirTransformationalIntrinsic::HlfirTransformationalIntrinsic; + +protected: + mlir::Value + lowerImpl(const Fortran::lower::PreparedActualArguments &loweredActuals, + const fir::IntrinsicArgumentLoweringRules *argLowering, + mlir::Type stmtResultType) override; +}; + class HlfirReshapeLowering : public HlfirTransformationalIntrinsic { public: using HlfirTransformationalIntrinsic::HlfirTransformationalIntrinsic; @@ -430,6 +441,33 @@ mlir::Value HlfirCShiftLowering::lowerImpl( return createOp(resultType, operands); } +mlir::Value HlfirEOShiftLowering::lowerImpl( + const Fortran::lower::PreparedActualArguments &loweredActuals, + const fir::IntrinsicArgumentLoweringRules *argLowering, + mlir::Type stmtResultType) { + auto operands = getOperandVector(loweredActuals, argLowering); + assert(operands.size() == 4); + mlir::Value array = operands[0]; + mlir::Value shift = operands[1]; + mlir::Value boundary = operands[2]; + mlir::Value dim = operands[3]; + // If DIM is present, then dereference it if it is a ref. + if (dim) + dim = hlfir::loadTrivialScalar(loc, builder, hlfir::Entity{dim}); + + mlir::Type resultType = computeResultType(array, stmtResultType); + + // Scalar logical constant boundary might be represented using i1, i2, ... + // type. We need to cast it to fir.logical type of the ARRAY/result. + if (auto logicalTy = mlir::dyn_cast( + hlfir::getFortranElementType(resultType))) + if (boundary && fir::isa_trivial(boundary.getType()) && + boundary.getType() != logicalTy) + boundary = builder.createConvert(loc, logicalTy, boundary); + + return createOp(resultType, array, shift, boundary, dim); +} + mlir::Value HlfirReshapeLowering::lowerImpl( const Fortran::lower::PreparedActualArguments &loweredActuals, const fir::IntrinsicArgumentLoweringRules *argLowering, @@ -489,6 +527,9 @@ std::optional Fortran::lower::lowerHlfirIntrinsic( if (name == "cshift") return HlfirCShiftLowering{builder, loc}.lower(loweredActuals, argLowering, stmtResultType); + if (name == "eoshift") + return HlfirEOShiftLowering{builder, loc}.lower(loweredActuals, argLowering, + stmtResultType); if (name == "reshape") return HlfirReshapeLowering{builder, loc}.lower(loweredActuals, argLowering, stmtResultType); diff --git a/flang/test/Lower/HLFIR/eoshift.f90 b/flang/test/Lower/HLFIR/eoshift.f90 new file mode 100644 index 000000000000..3b2570ab5936 --- /dev/null +++ b/flang/test/Lower/HLFIR/eoshift.f90 @@ -0,0 +1,259 @@ +! Test lowering of EOSHIFT intrinsic to HLFIR +! RUN: bbc -emit-hlfir -o - -I nowhere %s 2>&1 | FileCheck %s + +module eoshift_types + type t + end type t +end module eoshift_types + +! 1d shift by scalar +subroutine eoshift1(a, s) + integer :: a(:), s + a = EOSHIFT(a, 2) +end subroutine +! CHECK-LABEL: func.func @_QPeoshift1( +! CHECK-SAME: %[[VAL_0:.*]]: !fir.box> {fir.bindc_name = "a"}, +! CHECK-SAME: %[[VAL_1:.*]]: !fir.ref {fir.bindc_name = "s"}) { +! CHECK: %[[VAL_2:.*]] = fir.dummy_scope : !fir.dscope +! CHECK: %[[VAL_3:.*]]:2 = hlfir.declare %[[VAL_0]] +! CHECK: %[[VAL_4:.*]]:2 = hlfir.declare %[[VAL_1]] +! CHECK: %[[VAL_5:.*]] = arith.constant 2 : i32 +! CHECK: %[[VAL_6:.*]] = hlfir.eoshift %[[VAL_3]]#0 %[[VAL_5]] : (!fir.box>, i32) -> !hlfir.expr +! CHECK: hlfir.assign %[[VAL_6]] to %[[VAL_3]]#0 : !hlfir.expr, !fir.box> +! CHECK: hlfir.destroy %[[VAL_6]] : !hlfir.expr +! CHECK: return +! CHECK: } + +! 1d shift by scalar with dim +subroutine eoshift2(a, s) + integer :: a(:), s + a = EOSHIFT(a, 2, dim=1) +end subroutine +! CHECK-LABEL: func.func @_QPeoshift2( +! CHECK-SAME: %[[VAL_0:.*]]: !fir.box> {fir.bindc_name = "a"}, +! CHECK-SAME: %[[VAL_1:.*]]: !fir.ref {fir.bindc_name = "s"}) { +! CHECK: %[[VAL_2:.*]] = fir.dummy_scope : !fir.dscope +! CHECK: %[[VAL_3:.*]]:2 = hlfir.declare %[[VAL_0]] +! CHECK: %[[VAL_4:.*]]:2 = hlfir.declare %[[VAL_1]] +! CHECK: %[[VAL_5:.*]] = arith.constant 2 : i32 +! CHECK: %[[VAL_6:.*]] = arith.constant 1 : i32 +! CHECK: %[[VAL_7:.*]] = hlfir.eoshift %[[VAL_3]]#0 %[[VAL_5]] dim %[[VAL_6]] : (!fir.box>, i32, i32) -> !hlfir.expr +! CHECK: hlfir.assign %[[VAL_7]] to %[[VAL_3]]#0 : !hlfir.expr, !fir.box> +! CHECK: hlfir.destroy %[[VAL_7]] : !hlfir.expr +! CHECK: return +! CHECK: } + +! 2d shift by scalar +subroutine eoshift3(a, s) + integer :: a(:,:), s + a = EOSHIFT(a, 2) +end subroutine +! CHECK-LABEL: func.func @_QPeoshift3( +! CHECK-SAME: %[[VAL_0:.*]]: !fir.box> {fir.bindc_name = "a"}, +! CHECK-SAME: %[[VAL_1:.*]]: !fir.ref {fir.bindc_name = "s"}) { +! CHECK: %[[VAL_2:.*]] = fir.dummy_scope : !fir.dscope +! CHECK: %[[VAL_3:.*]]:2 = hlfir.declare %[[VAL_0]] +! CHECK: %[[VAL_4:.*]]:2 = hlfir.declare %[[VAL_1]] +! CHECK: %[[VAL_5:.*]] = arith.constant 2 : i32 +! CHECK: %[[VAL_6:.*]] = hlfir.eoshift %[[VAL_3]]#0 %[[VAL_5]] : (!fir.box>, i32) -> !hlfir.expr +! CHECK: hlfir.assign %[[VAL_6]] to %[[VAL_3]]#0 : !hlfir.expr, !fir.box> +! CHECK: hlfir.destroy %[[VAL_6]] : !hlfir.expr +! CHECK: return +! CHECK: } + +! 2d shift by scalar with dim +subroutine eoshift4(a, s) + integer :: a(:,:), s + a = EOSHIFT(a, 2, dim=2) +end subroutine +! CHECK-LABEL: func.func @_QPeoshift4( +! CHECK-SAME: %[[VAL_0:.*]]: !fir.box> {fir.bindc_name = "a"}, +! CHECK-SAME: %[[VAL_1:.*]]: !fir.ref {fir.bindc_name = "s"}) { +! CHECK: %[[VAL_2:.*]] = fir.dummy_scope : !fir.dscope +! CHECK: %[[VAL_3:.*]]:2 = hlfir.declare %[[VAL_0]] +! CHECK: %[[VAL_4:.*]]:2 = hlfir.declare %[[VAL_1]] +! CHECK: %[[VAL_5:.*]] = arith.constant 2 : i32 +! CHECK: %[[VAL_6:.*]] = arith.constant 2 : i32 +! CHECK: %[[VAL_7:.*]] = hlfir.eoshift %[[VAL_3]]#0 %[[VAL_5]] dim %[[VAL_6]] : (!fir.box>, i32, i32) -> !hlfir.expr +! CHECK: hlfir.assign %[[VAL_7]] to %[[VAL_3]]#0 : !hlfir.expr, !fir.box> +! CHECK: hlfir.destroy %[[VAL_7]] : !hlfir.expr +! CHECK: return +! CHECK: } + +! 2d shift by array +subroutine eoshift5(a, s) + integer :: a(:,:), s(:) + a = EOSHIFT(a, s) +end subroutine +! CHECK-LABEL: func.func @_QPeoshift5( +! CHECK-SAME: %[[VAL_0:.*]]: !fir.box> {fir.bindc_name = "a"}, +! CHECK-SAME: %[[VAL_1:.*]]: !fir.box> {fir.bindc_name = "s"}) { +! CHECK: %[[VAL_2:.*]] = fir.dummy_scope : !fir.dscope +! CHECK: %[[VAL_3:.*]]:2 = hlfir.declare %[[VAL_0]] +! CHECK: %[[VAL_4:.*]]:2 = hlfir.declare %[[VAL_1]] +! CHECK: %[[VAL_5:.*]] = hlfir.eoshift %[[VAL_3]]#0 %[[VAL_4]]#0 : (!fir.box>, !fir.box>) -> !hlfir.expr +! CHECK: hlfir.assign %[[VAL_5]] to %[[VAL_3]]#0 : !hlfir.expr, !fir.box> +! CHECK: hlfir.destroy %[[VAL_5]] : !hlfir.expr +! CHECK: return +! CHECK: } + +! 2d shift by array expr +subroutine eoshift6(a, s) + integer :: a(:,:), s(:) + a = EOSHIFT(a, s + 1) +end subroutine +! CHECK-LABEL: func.func @_QPeoshift6( +! CHECK-SAME: %[[VAL_0:.*]]: !fir.box> {fir.bindc_name = "a"}, +! CHECK-SAME: %[[VAL_1:.*]]: !fir.box> {fir.bindc_name = "s"}) { +! CHECK: %[[VAL_2:.*]] = fir.dummy_scope : !fir.dscope +! CHECK: %[[VAL_3:.*]]:2 = hlfir.declare %[[VAL_0]] +! CHECK: %[[VAL_4:.*]]:2 = hlfir.declare %[[VAL_1]] +! CHECK: %[[VAL_5:.*]] = arith.constant 1 : i32 +! CHECK: %[[VAL_6:.*]] = arith.constant 0 : index +! CHECK: %[[VAL_7:.*]]:3 = fir.box_dims %[[VAL_4]]#0, %[[VAL_6]] : (!fir.box>, index) -> (index, index, index) +! CHECK: %[[VAL_8:.*]] = fir.shape %[[VAL_7]]#1 : (index) -> !fir.shape<1> +! CHECK: %[[VAL_9:.*]] = hlfir.elemental %[[VAL_8]] unordered : (!fir.shape<1>) -> !hlfir.expr +! CHECK: %[[VAL_14:.*]] = hlfir.eoshift %[[VAL_3]]#0 %[[VAL_9]] : (!fir.box>, !hlfir.expr) -> !hlfir.expr +! CHECK: hlfir.assign %[[VAL_14]] to %[[VAL_3]]#0 : !hlfir.expr, !fir.box> +! CHECK: hlfir.destroy %[[VAL_14]] : !hlfir.expr +! CHECK: hlfir.destroy %[[VAL_9]] : !hlfir.expr +! CHECK: return +! CHECK: } + +! 1d character(10,2) shift by scalar +subroutine eoshift7(a, s) + character(10,2) :: a(:) + a = EOSHIFT(a, 2) +end subroutine +! CHECK-LABEL: func.func @_QPeoshift7( +! CHECK-SAME: %[[VAL_0:.*]]: !fir.box>> {fir.bindc_name = "a"}, +! CHECK-SAME: %[[VAL_1:.*]]: !fir.ref {fir.bindc_name = "s"}) { +! CHECK: %[[VAL_2:.*]] = fir.dummy_scope : !fir.dscope +! CHECK: %[[VAL_3:.*]] = arith.constant 10 : index +! CHECK: %[[VAL_4:.*]]:2 = hlfir.declare %[[VAL_0]] +! CHECK: %[[VAL_5:.*]]:2 = hlfir.declare %[[VAL_1]] +! CHECK: %[[VAL_6:.*]] = arith.constant 2 : i32 +! CHECK: %[[VAL_7:.*]] = hlfir.eoshift %[[VAL_4]]#0 %[[VAL_6]] : (!fir.box>>, i32) -> !hlfir.expr> +! CHECK: hlfir.assign %[[VAL_7]] to %[[VAL_4]]#0 : !hlfir.expr>, !fir.box>> +! CHECK: hlfir.destroy %[[VAL_7]] : !hlfir.expr> +! CHECK: return +! CHECK: } + +! 1d character(*) shift by scalar +subroutine eoshift8(a, s) + character(*) :: a(:) + a = EOSHIFT(a, 2) +end subroutine +! CHECK-LABEL: func.func @_QPeoshift8( +! CHECK-SAME: %[[VAL_0:.*]]: !fir.box>> {fir.bindc_name = "a"}, +! CHECK-SAME: %[[VAL_1:.*]]: !fir.ref {fir.bindc_name = "s"}) { +! CHECK: %[[VAL_2:.*]] = fir.dummy_scope : !fir.dscope +! CHECK: %[[VAL_3:.*]]:2 = hlfir.declare %[[VAL_0]] +! CHECK: %[[VAL_4:.*]]:2 = hlfir.declare %[[VAL_1]] +! CHECK: %[[VAL_5:.*]] = arith.constant 2 : i32 +! CHECK: %[[VAL_6:.*]] = hlfir.eoshift %[[VAL_3]]#0 %[[VAL_5]] : (!fir.box>>, i32) -> !hlfir.expr> +! CHECK: hlfir.assign %[[VAL_6]] to %[[VAL_3]]#0 : !hlfir.expr>, !fir.box>> +! CHECK: hlfir.destroy %[[VAL_6]] : !hlfir.expr> +! CHECK: return +! CHECK: } + +! 1d type(t) shift by scalar +subroutine eoshift9(a, s) + use eoshift_types + type(t) :: a(:) + a = EOSHIFT(a, 2, boundary=t()) +end subroutine +! CHECK-LABEL: func.func @_QPeoshift9( +! CHECK-SAME: %[[ARG0:.*]]: !fir.box>> {fir.bindc_name = "a"}, +! CHECK-SAME: %[[ARG1:.*]]: !fir.ref {fir.bindc_name = "s"}) { +! CHECK: %[[VAL_0:.*]] = fir.dummy_scope : !fir.dscope +! CHECK: %[[VAL_1:.*]]:2 = hlfir.declare %[[ARG0]] dummy_scope %[[VAL_0]] {uniq_name = "_QFeoshift9Ea"} : (!fir.box>>, !fir.dscope) -> (!fir.box>>, !fir.box>>) +! CHECK: %[[VAL_2:.*]]:2 = hlfir.declare %[[ARG1]] dummy_scope %[[VAL_0]] {uniq_name = "_QFeoshift9Es"} : (!fir.ref, !fir.dscope) -> (!fir.ref, !fir.ref) +! CHECK: %[[VAL_3:.*]] = arith.constant 2 : i32 +! CHECK: %[[VAL_4:.*]] = fir.address_of(@_QQro._QMeoshift_typesTt.0) : !fir.ref> +! CHECK: %[[VAL_5:.*]]:2 = hlfir.declare %[[VAL_4]] {fortran_attrs = #fir.var_attrs, uniq_name = "_QQro._QMeoshift_typesTt.0"} : (!fir.ref>) -> (!fir.ref>, !fir.ref>) +! CHECK: %[[VAL_6:.*]] = hlfir.eoshift %[[VAL_1]]#0 %[[VAL_3]] boundary %[[VAL_5]]#0 : (!fir.box>>, i32, !fir.ref>) -> !hlfir.expr> +! CHECK: hlfir.assign %[[VAL_6]] to %[[VAL_1]]#0 : !hlfir.expr>, !fir.box>> +! CHECK: hlfir.destroy %[[VAL_6]] : !hlfir.expr> +! CHECK: return +! CHECK: } + +! 1d class(t) shift by scalar +subroutine eoshift10(a, s) + use eoshift_types + class(t), allocatable :: a(:) + a = EOSHIFT(a, 2, boundary=t()) +end subroutine +! CHECK-LABEL: func.func @_QPeoshift10( +! CHECK-SAME: %[[ARG0:.*]]: !fir.ref>>>> {fir.bindc_name = "a"}, +! CHECK-SAME: %[[ARG1:.*]]: !fir.ref {fir.bindc_name = "s"}) { +! CHECK: %[[VAL_0:.*]] = fir.dummy_scope : !fir.dscope +! CHECK: %[[VAL_1:.*]]:2 = hlfir.declare %[[ARG0]] dummy_scope %[[VAL_0]] {fortran_attrs = #fir.var_attrs, uniq_name = "_QFeoshift10Ea"} : (!fir.ref>>>>, !fir.dscope) -> (!fir.ref>>>>, !fir.ref>>>>) +! CHECK: %[[VAL_2:.*]]:2 = hlfir.declare %[[ARG1]] dummy_scope %[[VAL_0]] {uniq_name = "_QFeoshift10Es"} : (!fir.ref, !fir.dscope) -> (!fir.ref, !fir.ref) +! CHECK: %[[VAL_3:.*]] = arith.constant 2 : i32 +! CHECK: %[[VAL_4:.*]] = fir.address_of(@_QQro._QMeoshift_typesTt.1) : !fir.ref> +! CHECK: %[[VAL_5:.*]]:2 = hlfir.declare %[[VAL_4]] {fortran_attrs = #fir.var_attrs, uniq_name = "_QQro._QMeoshift_typesTt.1"} : (!fir.ref>) -> (!fir.ref>, !fir.ref>) +! CHECK: %[[VAL_6:.*]] = fir.load %[[VAL_1]]#0 : !fir.ref>>>> +! CHECK: %[[VAL_7:.*]] = hlfir.eoshift %[[VAL_6]] %[[VAL_3]] boundary %[[VAL_5]]#0 : (!fir.class>>>, i32, !fir.ref>) -> !hlfir.expr?> +! CHECK: hlfir.assign %[[VAL_7]] to %[[VAL_1]]#0 realloc : !hlfir.expr?>, !fir.ref>>>> +! CHECK: hlfir.destroy %[[VAL_7]] : !hlfir.expr?> +! CHECK: return +! CHECK: } + +! 1d shift by scalar with variable dim +subroutine eoshift11(a, s, d) + integer :: a(:), s, d + a = EOSHIFT(a, 2, dim=d) +end subroutine +! CHECK-LABEL: func.func @_QPeoshift11( +! CHECK-SAME: %[[VAL_0:.*]]: !fir.box> {fir.bindc_name = "a"}, +! CHECK-SAME: %[[VAL_1:.*]]: !fir.ref {fir.bindc_name = "s"}, +! CHECK-SAME: %[[VAL_2:.*]]: !fir.ref {fir.bindc_name = "d"}) { +! CHECK: %[[VAL_3:.*]] = fir.dummy_scope : !fir.dscope +! CHECK: %[[VAL_4:.*]]:2 = hlfir.declare %[[VAL_0]] dummy_scope %[[VAL_3]] {uniq_name = "_QFeoshift11Ea"} : (!fir.box>, !fir.dscope) -> (!fir.box>, !fir.box>) +! CHECK: %[[VAL_5:.*]]:2 = hlfir.declare %[[VAL_2]] dummy_scope %[[VAL_3]] {uniq_name = "_QFeoshift11Ed"} : (!fir.ref, !fir.dscope) -> (!fir.ref, !fir.ref) +! CHECK: %[[VAL_6:.*]]:2 = hlfir.declare %[[VAL_1]] dummy_scope %[[VAL_3]] {uniq_name = "_QFeoshift11Es"} : (!fir.ref, !fir.dscope) -> (!fir.ref, !fir.ref) +! CHECK: %[[VAL_7:.*]] = arith.constant 2 : i32 +! CHECK: %[[VAL_8:.*]] = fir.load %[[VAL_5]]#0 : !fir.ref +! CHECK: %[[VAL_9:.*]] = hlfir.eoshift %[[VAL_4]]#0 %[[VAL_7]] dim %[[VAL_8]] : (!fir.box>, i32, i32) -> !hlfir.expr +! CHECK: hlfir.assign %[[VAL_9]] to %[[VAL_4]]#0 : !hlfir.expr, !fir.box> +! CHECK: hlfir.destroy %[[VAL_9]] : !hlfir.expr +! CHECK: return +! CHECK: } + +subroutine eoshift12(array, shift, boundary, dim) + real :: array(:,:) + real, optional :: boundary + integer :: shift(:), dim + array = EOSHIFT(array, shift, boundary, dim) +end subroutine eoshift12 +! CHECK-LABEL: func.func @_QPeoshift12( +! CHECK-SAME: %[[ARG0:.*]]: !fir.box> {fir.bindc_name = "array"}, +! CHECK-SAME: %[[ARG1:.*]]: !fir.box> {fir.bindc_name = "shift"}, +! CHECK-SAME: %[[ARG2:.*]]: !fir.ref {fir.bindc_name = "boundary", fir.optional}, +! CHECK-SAME: %[[ARG3:.*]]: !fir.ref {fir.bindc_name = "dim"}) { +! CHECK: %[[VAL_0:.*]] = fir.dummy_scope : !fir.dscope +! CHECK: %[[VAL_1:.*]]:2 = hlfir.declare %[[ARG0]] dummy_scope %[[VAL_0]] {uniq_name = "_QFeoshift12Earray"} : (!fir.box>, !fir.dscope) -> (!fir.box>, !fir.box>) +! CHECK: %[[VAL_2:.*]]:2 = hlfir.declare %[[ARG2]] dummy_scope %[[VAL_0]] {fortran_attrs = #fir.var_attrs, uniq_name = "_QFeoshift12Eboundary"} : (!fir.ref, !fir.dscope) -> (!fir.ref, !fir.ref) +! CHECK: %[[VAL_3:.*]]:2 = hlfir.declare %[[ARG3]] dummy_scope %[[VAL_0]] {uniq_name = "_QFeoshift12Edim"} : (!fir.ref, !fir.dscope) -> (!fir.ref, !fir.ref) +! CHECK: %[[VAL_4:.*]]:2 = hlfir.declare %[[ARG1]] dummy_scope %[[VAL_0]] {uniq_name = "_QFeoshift12Eshift"} : (!fir.box>, !fir.dscope) -> (!fir.box>, !fir.box>) +! CHECK: %[[VAL_5:.*]] = fir.is_present %[[VAL_2]]#0 : (!fir.ref) -> i1 +! CHECK: %[[VAL_6:.*]] = fir.embox %[[VAL_2]]#0 : (!fir.ref) -> !fir.box +! CHECK: %[[VAL_7:.*]] = fir.absent !fir.box +! CHECK: %[[VAL_8:.*]] = arith.select %[[VAL_5]], %[[VAL_6]], %[[VAL_7]] : !fir.box +! CHECK: %[[VAL_9:.*]] = fir.load %[[VAL_3]]#0 : !fir.ref +! CHECK: %[[VAL_10:.*]] = hlfir.eoshift %[[VAL_1]]#0 %[[VAL_4]]#0 boundary %[[VAL_8]] dim %[[VAL_9]] : (!fir.box>, !fir.box>, !fir.box, i32) -> !hlfir.expr +! CHECK: hlfir.assign %[[VAL_10]] to %[[VAL_1]]#0 : !hlfir.expr, !fir.box> +! CHECK: hlfir.destroy %[[VAL_10]] : !hlfir.expr +! CHECK: return +! CHECK: } + +! Test scalar logical boundary. +! CHECK-LABEL: func.func @_QPeoshift13( +subroutine eoshift13(array) + logical(1) :: array(:) + array = EOSHIFT(array, -1, .true._1) +! CHECK: %[[VAL_5:.*]] = hlfir.eoshift %{{.*}} %{{.*}} boundary %{{.*}} : (!fir.box>>, i32, !fir.logical<1>) -> !hlfir.expr> + array = EOSHIFT(array.EQV..false., -1, .true.) +! CHECK: %[[VAL_24:.*]] = hlfir.eoshift %{{.*}} %{{.*}} boundary %{{.*}} : (!hlfir.expr>, i32, !fir.logical<4>) -> !hlfir.expr> +end subroutine eoshift13 From eecbaac5c6699175b4c4f22e2d4fbad8c9cfa6ab Mon Sep 17 00:00:00 2001 From: Chenguang Wang Date: Fri, 15 Aug 2025 14:16:03 -0700 Subject: [PATCH 021/214] [bazel] Add yaml2obj to mlir/Test/Target/BUILD.bazel (#153875) https://github.com/llvm/llvm-project/pull/152131 uses yaml2obj, which is not listed as a dependency of the lit tests in bazel. This is causing LLVM CI failures, e.g [1]. [1]: https://buildkite.com/llvm-project/upstream-bazel/builds/146788/steps/canvas?sid=0198af37-f624-470f-aac1-d9e0b42fab56 --- utils/bazel/llvm-project-overlay/mlir/test/Target/BUILD.bazel | 1 + 1 file changed, 1 insertion(+) diff --git a/utils/bazel/llvm-project-overlay/mlir/test/Target/BUILD.bazel b/utils/bazel/llvm-project-overlay/mlir/test/Target/BUILD.bazel index 9a98f640d272..b5a8dbd2885b 100644 --- a/utils/bazel/llvm-project-overlay/mlir/test/Target/BUILD.bazel +++ b/utils/bazel/llvm-project-overlay/mlir/test/Target/BUILD.bazel @@ -10,6 +10,7 @@ package(default_visibility = ["//visibility:public"]) srcs = [src], data = [ "//llvm:split-file", + "//llvm:yaml2obj", "//mlir:mlir-opt", "//mlir:mlir-translate", "//mlir/test:lit_data", From 1f25c4883e08113ecd122879d82dbe77eac4eca7 Mon Sep 17 00:00:00 2001 From: Stanislav Mekhanoshin Date: Fri, 15 Aug 2025 14:17:54 -0700 Subject: [PATCH 022/214] [AMDGPU] Mitigate DS_ATOMIC_ASYNC_BARRIER_ARRIVE_B64 bug (#153872) DS_ATOMIC_ASYNC_BARRIER_ARRIVE_B64 shall not be claused (we already do not clause DS instructions) and needs waits before and after. --- llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp | 17 +++++++++++++++++ llvm/lib/Target/AMDGPU/GCNHazardRecognizer.h | 1 + llvm/lib/Target/AMDGPU/GCNSubtarget.h | 6 ++++++ llvm/test/CodeGen/AMDGPU/hazards-gfx1250.mir | 17 +++++++++++++++++ 4 files changed, 41 insertions(+) create mode 100644 llvm/test/CodeGen/AMDGPU/hazards-gfx1250.mir diff --git a/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp b/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp index 1f291ce5c534..5e297c7540c4 100644 --- a/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp +++ b/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp @@ -1202,6 +1202,8 @@ void GCNHazardRecognizer::fixHazards(MachineInstr *MI) { fixRequiredExportPriority(MI); if (ST.requiresWaitIdleBeforeGetReg()) fixGetRegWaitIdle(MI); + if (ST.hasDsAtomicAsyncBarrierArriveB64PipeBug()) + fixDsAtomicAsyncBarrierArriveB64(MI); } static bool isVCmpXWritesExec(const SIInstrInfo &TII, const SIRegisterInfo &TRI, @@ -3451,3 +3453,18 @@ bool GCNHazardRecognizer::fixGetRegWaitIdle(MachineInstr *MI) { .addImm(0); return true; } + +bool GCNHazardRecognizer::fixDsAtomicAsyncBarrierArriveB64(MachineInstr *MI) { + if (MI->getOpcode() != AMDGPU::DS_ATOMIC_ASYNC_BARRIER_ARRIVE_B64) + return false; + + const SIInstrInfo *TII = ST.getInstrInfo(); + BuildMI(*MI->getParent(), MI, MI->getDebugLoc(), + TII->get(AMDGPU::S_WAITCNT_DEPCTR)) + .addImm(0xFFE3); + BuildMI(*MI->getParent(), std::next(MI->getIterator()), MI->getDebugLoc(), + TII->get(AMDGPU::S_WAITCNT_DEPCTR)) + .addImm(0xFFE3); + + return true; +} diff --git a/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.h b/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.h index a078f50219c3..890d5cbd154d 100644 --- a/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.h +++ b/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.h @@ -111,6 +111,7 @@ private: bool fixVALUMaskWriteHazard(MachineInstr *MI); bool fixRequiredExportPriority(MachineInstr *MI); bool fixGetRegWaitIdle(MachineInstr *MI); + bool fixDsAtomicAsyncBarrierArriveB64(MachineInstr *MI); int checkMAIHazards(MachineInstr *MI); int checkMAIHazards908(MachineInstr *MI); diff --git a/llvm/lib/Target/AMDGPU/GCNSubtarget.h b/llvm/lib/Target/AMDGPU/GCNSubtarget.h index 92de024cc6fc..436f5c0801fa 100644 --- a/llvm/lib/Target/AMDGPU/GCNSubtarget.h +++ b/llvm/lib/Target/AMDGPU/GCNSubtarget.h @@ -1815,6 +1815,12 @@ public: // to the same register. return false; } + + // DS_ATOMIC_ASYNC_BARRIER_ARRIVE_B64 shall not be claused with anything + // and surronded by S_WAIT_ALU(0xFFE3). + bool hasDsAtomicAsyncBarrierArriveB64PipeBug() const { + return getGeneration() == GFX12; + } }; class GCNUserSGPRUsageInfo { diff --git a/llvm/test/CodeGen/AMDGPU/hazards-gfx1250.mir b/llvm/test/CodeGen/AMDGPU/hazards-gfx1250.mir new file mode 100644 index 000000000000..f1dbabf1e1a8 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/hazards-gfx1250.mir @@ -0,0 +1,17 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5 +# RUN: llc -mtriple=amdgcn -mcpu=gfx1250 -run-pass=post-RA-hazard-rec %s -o - | FileCheck -check-prefix=GCN %s + +--- +name: ds_atomic_async_barrier_arrive_b64 +tracksRegLiveness: true +body: | + bb.0: + liveins: $vgpr0, $vgpr1 + ; GCN-LABEL: name: ds_atomic_async_barrier_arrive_b64 + ; GCN: liveins: $vgpr0, $vgpr1 + ; GCN-NEXT: {{ $}} + ; GCN-NEXT: S_WAITCNT_DEPCTR 65507 + ; GCN-NEXT: DS_ATOMIC_ASYNC_BARRIER_ARRIVE_B64 $vgpr1, 0, 0, implicit-def $asynccnt, implicit $asynccnt, implicit $exec + ; GCN-NEXT: S_WAITCNT_DEPCTR 65507 + DS_ATOMIC_ASYNC_BARRIER_ARRIVE_B64 $vgpr1, 0, 0, implicit-def $asynccnt, implicit $asynccnt, implicit $exec +... From d0b19cf792eb673a5b8ad5f77793d270d0f44981 Mon Sep 17 00:00:00 2001 From: Aiden Grossman Date: Fri, 15 Aug 2025 21:31:17 +0000 Subject: [PATCH 023/214] [Github][CI] Set CC and CXX in CI Container We set these explicitly in a bunch of places. That is annoying and it is nice to get them picked up by default rather than needing to remember. --- .github/workflows/containers/github-action-ci/Dockerfile | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.github/workflows/containers/github-action-ci/Dockerfile b/.github/workflows/containers/github-action-ci/Dockerfile index 227496051af5..8a888f3a411c 100644 --- a/.github/workflows/containers/github-action-ci/Dockerfile +++ b/.github/workflows/containers/github-action-ci/Dockerfile @@ -81,6 +81,8 @@ RUN curl -L 'https://github.com/mozilla/sccache/releases/download/v0.10.0/sccach ENV LLVM_SYSROOT=$LLVM_SYSROOT ENV PATH=${LLVM_SYSROOT}/bin:${PATH} +ENV CC=clang +ENV CXX=clang++ # Create a new user to avoid test failures related to a lack of expected # permissions issues in some tests. Set the user id to 1001 as that is the From 21a5729b87a6c2e271334049fa28af10fa12f0c7 Mon Sep 17 00:00:00 2001 From: Haibo Jiang <33863061+Jianghibo@users.noreply.github.com> Date: Sat, 16 Aug 2025 05:35:13 +0800 Subject: [PATCH 024/214] [BOLT] Do not use HLT as split point when build the CFG (#150963) For x86, the halt instruction is defined as a terminator instruction. When building the CFG, the instruction sequence following the hlt instruction is treated as an independent MBB. Since there is no jump information, the predecessor of this MBB cannot be identified, and it is considered an unreachable MBB that will be removed. Using this fix, the instruction sequences before and after hlt are refused to be placed in different blocks. --- bolt/include/bolt/Core/MCPlusBuilder.h | 4 ++++ bolt/lib/Core/MCPlusBuilder.cpp | 6 ++++-- bolt/lib/Target/X86/X86MCPlusBuilder.cpp | 4 ++++ bolt/test/X86/cfg_build_hlt.s | 17 +++++++++++++++++ 4 files changed, 29 insertions(+), 2 deletions(-) create mode 100644 bolt/test/X86/cfg_build_hlt.s diff --git a/bolt/include/bolt/Core/MCPlusBuilder.h b/bolt/include/bolt/Core/MCPlusBuilder.h index f902a8c43cd1..e773250ce873 100644 --- a/bolt/include/bolt/Core/MCPlusBuilder.h +++ b/bolt/include/bolt/Core/MCPlusBuilder.h @@ -740,6 +740,10 @@ public: return false; } + /// Return true if the hlt instruction under the x86, otherwise, default to + /// false. + virtual bool isX86HLT(const MCInst &Inst) const { return false; } + /// Return the width, in bytes, of the memory access performed by \p Inst, if /// this is a pop instruction. Return zero otherwise. virtual int getPopSize(const MCInst &Inst) const { diff --git a/bolt/lib/Core/MCPlusBuilder.cpp b/bolt/lib/Core/MCPlusBuilder.cpp index fa8f4d1df308..d8a2ac6f6837 100644 --- a/bolt/lib/Core/MCPlusBuilder.cpp +++ b/bolt/lib/Core/MCPlusBuilder.cpp @@ -132,8 +132,10 @@ bool MCPlusBuilder::equals(const MCSpecifierExpr &A, const MCSpecifierExpr &B, } bool MCPlusBuilder::isTerminator(const MCInst &Inst) const { - return Analysis->isTerminator(Inst) || - (opts::TerminalTrap && Info->get(Inst.getOpcode()).isTrap()); + return (opts::TerminalTrap && Info->get(Inst.getOpcode()).isTrap()) || + Analysis->isTerminator(Inst) + ? !isX86HLT(Inst) + : false; } void MCPlusBuilder::setTailCall(MCInst &Inst) const { diff --git a/bolt/lib/Target/X86/X86MCPlusBuilder.cpp b/bolt/lib/Target/X86/X86MCPlusBuilder.cpp index a60c1a6bf156..1842509dcc5e 100644 --- a/bolt/lib/Target/X86/X86MCPlusBuilder.cpp +++ b/bolt/lib/Target/X86/X86MCPlusBuilder.cpp @@ -223,6 +223,10 @@ public: return Inst.getOpcode() == X86::ENDBR32 || Inst.getOpcode() == X86::ENDBR64; } + bool isX86HLT(const MCInst &Inst) const override { + return Inst.getOpcode() == X86::HLT; + } + int getPopSize(const MCInst &Inst) const override { switch (Inst.getOpcode()) { case X86::POP16r: diff --git a/bolt/test/X86/cfg_build_hlt.s b/bolt/test/X86/cfg_build_hlt.s new file mode 100644 index 000000000000..a78134df3401 --- /dev/null +++ b/bolt/test/X86/cfg_build_hlt.s @@ -0,0 +1,17 @@ +## Check CFG for halt instruction + +# RUN: %clang %cflags %s -static -o %t.exe -nostdlib +# RUN: llvm-bolt %t.exe --print-cfg --print-only=main -o %t 2>&1 | FileCheck %s --check-prefix=CHECK-CFG +# RUN: llvm-objdump -d %t --print-imm-hex | FileCheck %s --check-prefix=CHECK-BIN + +# CHECK-CFG: BB Count : 1 +# CHECK-BIN:
: +# CHECK-BIN-NEXT: f4 hlt +# CHECK-BIN-NEXT: c3 retq + +.global main + .type main, %function +main: + hlt + retq +.size main, .-main From 19cfc30b33df5bf121bac96d4c1aac6028df0670 Mon Sep 17 00:00:00 2001 From: Peter Collingbourne Date: Thu, 14 Aug 2025 12:24:12 -0700 Subject: [PATCH 025/214] compiler-rt: Make the tests pass on AArch64 and with page size != 4096. This makes the tests pass on my AArch64 machine with 16K pages. Not sure why some of the AArch64-specific test failures don't seem to occur on sanitizer-aarch64-linux. I could also reproduce them by running buildbot_cmake.sh on my machine. Pull Request: https://github.com/llvm/llvm-project/pull/153860 --- compiler-rt/lib/gwp_asan/tests/basic.cpp | 11 ++++++----- .../lib/gwp_asan/tests/never_allocated.cpp | 10 ++++++---- .../asan/TestCases/Linux/release_to_os_test.cpp | 1 + compiler-rt/test/cfi/cross-dso/lit.local.cfg.py | 4 ++++ compiler-rt/test/lit.common.cfg.py | 17 +++++++++++++++++ compiler-rt/test/msan/dtls_test.c | 1 + .../TestCases/Linux/odd_stack_size.cpp | 1 + .../TestCases/Linux/release_to_os_test.cpp | 3 +++ .../TestCases/Linux/resize_tls_dynamic.cpp | 3 +++ .../TestCases/Linux/tls_get_addr.c | 3 +++ 10 files changed, 45 insertions(+), 9 deletions(-) diff --git a/compiler-rt/lib/gwp_asan/tests/basic.cpp b/compiler-rt/lib/gwp_asan/tests/basic.cpp index 88e7ed14a5c2..7d36a2ee1f94 100644 --- a/compiler-rt/lib/gwp_asan/tests/basic.cpp +++ b/compiler-rt/lib/gwp_asan/tests/basic.cpp @@ -65,11 +65,12 @@ TEST_F(DefaultGuardedPoolAllocator, NonPowerOfTwoAlignment) { // Added multi-page slots? You'll need to expand this test. TEST_F(DefaultGuardedPoolAllocator, TooBigForSinglePageSlots) { - EXPECT_EQ(nullptr, GPA.allocate(0x1001, 0)); - EXPECT_EQ(nullptr, GPA.allocate(0x1001, 1)); - EXPECT_EQ(nullptr, GPA.allocate(0x1001, 0x1000)); - EXPECT_EQ(nullptr, GPA.allocate(1, 0x2000)); - EXPECT_EQ(nullptr, GPA.allocate(0, 0x2000)); + size_t PageSize = sysconf(_SC_PAGESIZE); + EXPECT_EQ(nullptr, GPA.allocate(PageSize + 1, 0)); + EXPECT_EQ(nullptr, GPA.allocate(PageSize + 1, 1)); + EXPECT_EQ(nullptr, GPA.allocate(PageSize + 1, PageSize)); + EXPECT_EQ(nullptr, GPA.allocate(1, 2 * PageSize)); + EXPECT_EQ(nullptr, GPA.allocate(0, 2 * PageSize)); } TEST_F(CustomGuardedPoolAllocator, AllocAllSlots) { diff --git a/compiler-rt/lib/gwp_asan/tests/never_allocated.cpp b/compiler-rt/lib/gwp_asan/tests/never_allocated.cpp index 2f695b437986..37a4b384e4ac 100644 --- a/compiler-rt/lib/gwp_asan/tests/never_allocated.cpp +++ b/compiler-rt/lib/gwp_asan/tests/never_allocated.cpp @@ -13,8 +13,10 @@ #include "gwp_asan/tests/harness.h" TEST_P(BacktraceGuardedPoolAllocatorDeathTest, NeverAllocated) { + size_t PageSize = sysconf(_SC_PAGESIZE); + SCOPED_TRACE(""); - void *Ptr = GPA.allocate(0x1000); + void *Ptr = GPA.allocate(PageSize); GPA.deallocate(Ptr); std::string DeathNeedle = @@ -23,7 +25,7 @@ TEST_P(BacktraceGuardedPoolAllocatorDeathTest, NeverAllocated) { // Trigger a guard page in a completely different slot that's never allocated. // Previously, there was a bug that this would result in nullptr-dereference // in the posix crash handler. - char *volatile NeverAllocatedPtr = static_cast(Ptr) + 0x3000; + char *volatile NeverAllocatedPtr = static_cast(Ptr) + 3 * PageSize; if (!Recoverable) { EXPECT_DEATH(*NeverAllocatedPtr = 0, DeathNeedle); return; @@ -37,8 +39,8 @@ TEST_P(BacktraceGuardedPoolAllocatorDeathTest, NeverAllocated) { GetOutputBuffer().clear(); for (size_t i = 0; i < 100; ++i) { *NeverAllocatedPtr = 0; - *(NeverAllocatedPtr + 0x2000) = 0; - *(NeverAllocatedPtr + 0x3000) = 0; + *(NeverAllocatedPtr + 2 * PageSize) = 0; + *(NeverAllocatedPtr + 3 * PageSize) = 0; ASSERT_TRUE(GetOutputBuffer().empty()); } diff --git a/compiler-rt/test/asan/TestCases/Linux/release_to_os_test.cpp b/compiler-rt/test/asan/TestCases/Linux/release_to_os_test.cpp index 3e28ffde46ab..dc3ead9e8436 100644 --- a/compiler-rt/test/asan/TestCases/Linux/release_to_os_test.cpp +++ b/compiler-rt/test/asan/TestCases/Linux/release_to_os_test.cpp @@ -6,6 +6,7 @@ // RUN: %env_asan_opts=allocator_release_to_os_interval_ms=-1 %run %t force 2>&1 | FileCheck %s --check-prefix=FORCE_RELEASE // REQUIRES: x86_64-target-arch +// REQUIRES: page-size-4096 #include #include diff --git a/compiler-rt/test/cfi/cross-dso/lit.local.cfg.py b/compiler-rt/test/cfi/cross-dso/lit.local.cfg.py index 2778d8c995fd..bd0fabd1f26d 100644 --- a/compiler-rt/test/cfi/cross-dso/lit.local.cfg.py +++ b/compiler-rt/test/cfi/cross-dso/lit.local.cfg.py @@ -12,3 +12,7 @@ if root.target_os not in ["Linux", "FreeBSD", "NetBSD"]: # Android O (API level 26) has support for cross-dso cfi in libdl.so. if config.android and "android-26" not in config.available_features: config.unsupported = True + +# The runtime library only supports 4K pages. +if "page-size-4096" not in config.available_features: + config.unsupported = True diff --git a/compiler-rt/test/lit.common.cfg.py b/compiler-rt/test/lit.common.cfg.py index 8328b407dcc3..e2e815444dcf 100644 --- a/compiler-rt/test/lit.common.cfg.py +++ b/compiler-rt/test/lit.common.cfg.py @@ -965,6 +965,23 @@ if config.memprof_shadow_scale: else: config.available_features.add("memprof-shadow-scale-3") + +def target_page_size(): + try: + proc = subprocess.Popen( + f"{emulator or ''} python3", + shell=True, + stdin=subprocess.PIPE, + stdout=subprocess.PIPE, + ) + out, err = proc.communicate(b'import os; print(os.sysconf("SC_PAGESIZE"))') + return int(out) + except: + return 4096 + + +config.available_features.add(f"page-size-{target_page_size()}") + if config.expensive_checks: config.available_features.add("expensive_checks") diff --git a/compiler-rt/test/msan/dtls_test.c b/compiler-rt/test/msan/dtls_test.c index 3c384256147a..0e49ac9feb9f 100644 --- a/compiler-rt/test/msan/dtls_test.c +++ b/compiler-rt/test/msan/dtls_test.c @@ -11,6 +11,7 @@ // Reports use-of-uninitialized-value, not analyzed XFAIL: target={{.*netbsd.*}} + XFAIL: aarch64-target-arch */ diff --git a/compiler-rt/test/sanitizer_common/TestCases/Linux/odd_stack_size.cpp b/compiler-rt/test/sanitizer_common/TestCases/Linux/odd_stack_size.cpp index 9d7d46b462a8..cc76804aed21 100644 --- a/compiler-rt/test/sanitizer_common/TestCases/Linux/odd_stack_size.cpp +++ b/compiler-rt/test/sanitizer_common/TestCases/Linux/odd_stack_size.cpp @@ -1,4 +1,5 @@ // RUN: %clangxx -O1 %s -o %t && %run %t +// REQUIRES: page-size-4096 // UNSUPPORTED: android // Fail on powerpc64 bots with: diff --git a/compiler-rt/test/sanitizer_common/TestCases/Linux/release_to_os_test.cpp b/compiler-rt/test/sanitizer_common/TestCases/Linux/release_to_os_test.cpp index 0fa77200bf1c..c7a553469636 100644 --- a/compiler-rt/test/sanitizer_common/TestCases/Linux/release_to_os_test.cpp +++ b/compiler-rt/test/sanitizer_common/TestCases/Linux/release_to_os_test.cpp @@ -11,6 +11,9 @@ // FIXME: This mode uses 32bit allocator without purge. // UNSUPPORTED: hwasan-aliasing +// Page size is hardcoded below, but test still fails even if not hardcoded. +// REQUIRES: page-size-4096 + #include #include #include diff --git a/compiler-rt/test/sanitizer_common/TestCases/Linux/resize_tls_dynamic.cpp b/compiler-rt/test/sanitizer_common/TestCases/Linux/resize_tls_dynamic.cpp index c288e1d69baf..3e9ff924a3c4 100644 --- a/compiler-rt/test/sanitizer_common/TestCases/Linux/resize_tls_dynamic.cpp +++ b/compiler-rt/test/sanitizer_common/TestCases/Linux/resize_tls_dynamic.cpp @@ -11,6 +11,9 @@ // FIXME: Investigate // UNSUPPORTED: target=powerpc64{{.*}} +// Fails because AArch64 uses TLSDESC instead of __tls_get_addr. +// UNSUPPORTED: aarch64-target-arch + #include #ifndef BUILD_DSO diff --git a/compiler-rt/test/sanitizer_common/TestCases/Linux/tls_get_addr.c b/compiler-rt/test/sanitizer_common/TestCases/Linux/tls_get_addr.c index 0aff6039ac4e..a4a4f64ed370 100644 --- a/compiler-rt/test/sanitizer_common/TestCases/Linux/tls_get_addr.c +++ b/compiler-rt/test/sanitizer_common/TestCases/Linux/tls_get_addr.c @@ -13,6 +13,9 @@ // FIXME: Fails for unknown reasons. // UNSUPPORTED: powerpc64le-target-arch +// Fails because AArch64 uses TLSDESC instead of __tls_get_addr. +// UNSUPPORTED: aarch64-target-arch + #ifndef BUILD_SO # include # include From 6beb6f34bc8e9ae34e6a4dcc36e24b765e7f2b47 Mon Sep 17 00:00:00 2001 From: Peter Collingbourne Date: Fri, 15 Aug 2025 13:43:16 -0700 Subject: [PATCH 026/214] dfsan: Fix test with gcc 15. With gcc 15 we end up emitting a reference to the std::__glibcxx_assert_fail function because of this change: https://github.com/gcc-mirror/gcc/commit/361d230fd7800a7e749aba8ed020f54f5c26d504 combined with assertion checks in the std::atomic implementation. This reference is undefined with dfsan causing the test to fail. Fix it by defining the macro that disables assertions. Pull Request: https://github.com/llvm/llvm-project/pull/153873 --- compiler-rt/test/dfsan/atomic.cpp | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/compiler-rt/test/dfsan/atomic.cpp b/compiler-rt/test/dfsan/atomic.cpp index 22ee323c752f..73e1cbd17a7c 100644 --- a/compiler-rt/test/dfsan/atomic.cpp +++ b/compiler-rt/test/dfsan/atomic.cpp @@ -1,9 +1,12 @@ -// RUN: %clangxx_dfsan %s -fno-exceptions -o %t && %run %t -// RUN: %clangxx_dfsan -DORIGIN_TRACKING -mllvm -dfsan-track-origins=1 %s -fno-exceptions -o %t && %run %t +// RUN: %clangxx_dfsan %s -fno-exceptions -D_GLIBCXX_NO_ASSERTIONS -o %t && %run %t +// RUN: %clangxx_dfsan -DORIGIN_TRACKING -mllvm -dfsan-track-origins=1 %s -fno-exceptions -D_GLIBCXX_NO_ASSERTIONS -o %t && %run %t // // Use -fno-exceptions to turn off exceptions to avoid instrumenting // __cxa_begin_catch, std::terminate and __gxx_personality_v0. // +// Use -D_GLIBCXX_NO_ASSERTIONS to avoid depending on +// std::__glibcxx_assert_fail with gcc >= 15. +// // TODO: Support builtin atomics. For example, https://gcc.gnu.org/onlinedocs/gcc/_005f_005fatomic-Builtins.html // DFSan instrumentation pass cannot identify builtin callsites yet. From b7d6f484c87704fdbe64b081bd77058d9b3cfc03 Mon Sep 17 00:00:00 2001 From: Sergei Barannikov Date: Sat, 16 Aug 2025 00:46:19 +0300 Subject: [PATCH 027/214] [RISCV] Remove non-existent operand of nds.vfwcvt/nds.vfncvt instructions (#153865) Mask operand is likely a copy-past error, they don't have one. --- llvm/lib/Target/RISCV/RISCVInstrInfoXAndes.td | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoXAndes.td b/llvm/lib/Target/RISCV/RISCVInstrInfoXAndes.td index c75addd95b14..1fb30a0b73d9 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfoXAndes.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoXAndes.td @@ -420,7 +420,7 @@ class NDSRVInstVD4DOT funct6, string opcodestr> } class NDSRVInstVBFHCvt vs1, string opcodestr> - : RVInst<(outs VR:$vd), (ins VR:$vs2, VMaskOp:$vm), + : RVInst<(outs VR:$vd), (ins VR:$vs2), opcodestr, "$vd, $vs2", [], InstFormatR> { bits<5> vs2; bits<5> vd; From 0cd35e7afd91ba64bdb2fc11caf13d0826780865 Mon Sep 17 00:00:00 2001 From: Andy Kaylor Date: Fri, 15 Aug 2025 15:14:51 -0700 Subject: [PATCH 028/214] [CIR] Add cir.vtable.get_vptr operation (#153630) This adds support for the cir.vtable.get_vptr operation and uses it to initialize the vptr member during constructors of dynamic classes. --- clang/include/clang/CIR/Dialect/IR/CIROps.td | 33 +++++++++++++++++++ .../CIR/Dialect/IR/CIRTypeConstraints.td | 8 +++++ .../include/clang/CIR/Dialect/IR/CIRTypes.td | 8 ++--- clang/lib/CIR/CodeGen/CIRGenBuilder.h | 4 +++ clang/lib/CIR/CodeGen/CIRGenClass.cpp | 25 +++++++++++--- .../CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp | 15 ++++++++- .../CIR/Lowering/DirectToLLVM/LowerToLLVM.h | 10 ++++++ .../CIR/CodeGen/virtual-function-calls.cpp | 4 +-- 8 files changed, 96 insertions(+), 11 deletions(-) diff --git a/clang/include/clang/CIR/Dialect/IR/CIROps.td b/clang/include/clang/CIR/Dialect/IR/CIROps.td index a77e9199cdc9..a181c95494ef 100644 --- a/clang/include/clang/CIR/Dialect/IR/CIROps.td +++ b/clang/include/clang/CIR/Dialect/IR/CIROps.td @@ -1749,6 +1749,39 @@ def CIR_VTableAddrPointOp : CIR_Op<"vtable.address_point", [ }]; } +//===----------------------------------------------------------------------===// +// VTableGetVPtr +//===----------------------------------------------------------------------===// + +def CIR_VTableGetVPtrOp : CIR_Op<"vtable.get_vptr", [Pure]> { + let summary = "Get a the address of the vtable pointer for an object"; + let description = [{ + The `vtable.get_vptr` operation retrieves the address of the vptr for a + C++ object. This operation requires that the object pointer points to + the start of a complete object. (TODO: Describe how we get that). + The vptr will always be at offset zero in the object, but this operation + is more explicit about what is being retrieved than a direct bitcast. + + The return type is always `!cir.ptr`. + + Example: + ```mlir + %2 = cir.load %0 : !cir.ptr>, !cir.ptr + %3 = cir.vtable.get_vptr %2 : !cir.ptr -> !cir.ptr + ``` + }]; + + let arguments = (ins + Arg:$src + ); + + let results = (outs CIR_PtrToVPtr:$result); + + let assemblyFormat = [{ + $src `:` qualified(type($src)) `->` qualified(type($result)) attr-dict + }]; +} + //===----------------------------------------------------------------------===// // SetBitfieldOp //===----------------------------------------------------------------------===// diff --git a/clang/include/clang/CIR/Dialect/IR/CIRTypeConstraints.td b/clang/include/clang/CIR/Dialect/IR/CIRTypeConstraints.td index d7d55dfbc065..82f6e1d33043 100644 --- a/clang/include/clang/CIR/Dialect/IR/CIRTypeConstraints.td +++ b/clang/include/clang/CIR/Dialect/IR/CIRTypeConstraints.td @@ -289,6 +289,14 @@ def CIR_AnyFloatOrVecOfFloatType let cppFunctionName = "isFPOrVectorOfFPType"; } +//===----------------------------------------------------------------------===// +// VPtr type predicates +//===----------------------------------------------------------------------===// + +def CIR_AnyVPtrType : CIR_TypeBase<"::cir::VPtrType", "vptr type">; + +def CIR_PtrToVPtr : CIR_PtrToType; + //===----------------------------------------------------------------------===// // Scalar Type predicates //===----------------------------------------------------------------------===// diff --git a/clang/include/clang/CIR/Dialect/IR/CIRTypes.td b/clang/include/clang/CIR/Dialect/IR/CIRTypes.td index a258df79a618..312d0a942267 100644 --- a/clang/include/clang/CIR/Dialect/IR/CIRTypes.td +++ b/clang/include/clang/CIR/Dialect/IR/CIRTypes.td @@ -296,10 +296,10 @@ def CIR_VPtrType : CIR_Type<"VPtr", "vptr", [ access to the vptr. This type will be the element type of the 'vptr' member of structures that - require a vtable pointer. A pointer to this type is returned by the - `cir.vtable.address_point` and `cir.vtable.get_vptr` operations, and this - pointer may be passed to the `cir.vtable.get_virtual_fn_addr` operation to - get the address of a virtual function pointer. + require a vtable pointer. The `cir.vtable.address_point` operation returns + this type. The `cir.vtable.get_vptr` operations returns a pointer to this + type. This pointer may be passed to the `cir.vtable.get_virtual_fn_addr` + operation to get the address of a virtual function pointer. The pointer may also be cast to other pointer types in order to perform pointer arithmetic based on information encoded in the AST layout to get diff --git a/clang/lib/CIR/CodeGen/CIRGenBuilder.h b/clang/lib/CIR/CodeGen/CIRGenBuilder.h index 59d2adc15a01..a7537a0480a2 100644 --- a/clang/lib/CIR/CodeGen/CIRGenBuilder.h +++ b/clang/lib/CIR/CodeGen/CIRGenBuilder.h @@ -84,6 +84,10 @@ public: llvm_unreachable("Unsupported format for long double"); } + mlir::Type getPtrToVPtrType() { + return getPointerTo(cir::VPtrType::get(getContext())); + } + /// Get a CIR record kind from a AST declaration tag. cir::RecordType::RecordKind getRecordKind(const clang::TagTypeKind kind) { switch (kind) { diff --git a/clang/lib/CIR/CodeGen/CIRGenClass.cpp b/clang/lib/CIR/CodeGen/CIRGenClass.cpp index 31c93cd00d08..a3947047de07 100644 --- a/clang/lib/CIR/CodeGen/CIRGenClass.cpp +++ b/clang/lib/CIR/CodeGen/CIRGenClass.cpp @@ -289,7 +289,7 @@ void CIRGenFunction::initializeVTablePointer(mlir::Location loc, } // Apply the offsets. - Address vtableField = loadCXXThisAddress(); + Address classAddr = loadCXXThisAddress(); if (!nonVirtualOffset.isZero() || virtualOffset) { cgm.errorNYI(loc, "initializeVTablePointer: non-virtual and virtual offset"); @@ -300,9 +300,9 @@ void CIRGenFunction::initializeVTablePointer(mlir::Location loc, // vtable field is derived from `this` pointer, therefore they should be in // the same addr space. assert(!cir::MissingFeatures::addressSpace()); - // TODO(cir): This should be cir.vtable.get_vptr. - vtableField = builder.createElementBitCast(loc, vtableField, - vtableAddressPoint.getType()); + auto vtablePtr = cir::VTableGetVPtrOp::create( + builder, loc, builder.getPtrToVPtrType(), classAddr.getPointer()); + Address vtableField = Address(vtablePtr, classAddr.getAlignment()); builder.createStore(loc, vtableAddressPoint, vtableField); assert(!cir::MissingFeatures::opTBAA()); assert(!cir::MissingFeatures::createInvariantGroup()); @@ -657,6 +657,23 @@ Address CIRGenFunction::getAddressOfBaseClass( return value; } +mlir::Value CIRGenFunction::getVTablePtr(mlir::Location loc, Address thisAddr, + const CXXRecordDecl *rd) { + auto vtablePtr = cir::VTableGetVPtrOp::create( + builder, loc, builder.getPtrToVPtrType(), thisAddr.getPointer()); + Address vtablePtrAddr = Address(vtablePtr, thisAddr.getAlignment()); + + auto vtable = builder.createLoad(loc, vtablePtrAddr); + assert(!cir::MissingFeatures::opTBAA()); + + if (cgm.getCodeGenOpts().OptimizationLevel > 0 && + cgm.getCodeGenOpts().StrictVTablePointers) { + assert(!cir::MissingFeatures::createInvariantGroup()); + } + + return vtable; +} + void CIRGenFunction::emitCXXConstructorCall(const clang::CXXConstructorDecl *d, clang::CXXCtorType type, bool forVirtualBase, diff --git a/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp b/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp index 1ea296a6887e..9f7521db78be 100644 --- a/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp +++ b/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp @@ -2344,7 +2344,8 @@ void ConvertCIRToLLVMPass::runOnOperation() { CIRToLLVMVecShuffleOpLowering, CIRToLLVMVecSplatOpLowering, CIRToLLVMVecTernaryOpLowering, - CIRToLLVMVTableAddrPointOpLowering + CIRToLLVMVTableAddrPointOpLowering, + CIRToLLVMVTableGetVPtrOpLowering // clang-format on >(converter, patterns.getContext()); @@ -2468,6 +2469,18 @@ mlir::LogicalResult CIRToLLVMVTableAddrPointOpLowering::matchAndRewrite( return mlir::success(); } +mlir::LogicalResult CIRToLLVMVTableGetVPtrOpLowering::matchAndRewrite( + cir::VTableGetVPtrOp op, OpAdaptor adaptor, + mlir::ConversionPatternRewriter &rewriter) const { + // cir.vtable.get_vptr is equivalent to a bitcast from the source object + // pointer to the vptr type. Since the LLVM dialect uses opaque pointers + // we can just replace uses of this operation with the original pointer. + mlir::Value srcVal = adaptor.getSrc(); + rewriter.replaceAllUsesWith(op, srcVal); + rewriter.eraseOp(op); + return mlir::success(); +} + mlir::LogicalResult CIRToLLVMStackSaveOpLowering::matchAndRewrite( cir::StackSaveOp op, OpAdaptor adaptor, mlir::ConversionPatternRewriter &rewriter) const { diff --git a/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.h b/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.h index e32bf2d1bae0..91e850523337 100644 --- a/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.h +++ b/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.h @@ -467,6 +467,16 @@ public: mlir::ConversionPatternRewriter &) const override; }; +class CIRToLLVMVTableGetVPtrOpLowering + : public mlir::OpConversionPattern { +public: + using mlir::OpConversionPattern::OpConversionPattern; + + mlir::LogicalResult + matchAndRewrite(cir::VTableGetVPtrOp op, OpAdaptor, + mlir::ConversionPatternRewriter &) const override; +}; + class CIRToLLVMStackSaveOpLowering : public mlir::OpConversionPattern { public: diff --git a/clang/test/CIR/CodeGen/virtual-function-calls.cpp b/clang/test/CIR/CodeGen/virtual-function-calls.cpp index 004b6dab3056..4787d78aa0e3 100644 --- a/clang/test/CIR/CodeGen/virtual-function-calls.cpp +++ b/clang/test/CIR/CodeGen/virtual-function-calls.cpp @@ -27,8 +27,8 @@ A::A() {} // CIR: cir.store %arg0, %[[THIS_ADDR]] : !cir.ptr, !cir.ptr> // CIR: %[[THIS:.*]] = cir.load %[[THIS_ADDR]] : !cir.ptr>, !cir.ptr // CIR: %[[VPTR:.*]] = cir.vtable.address_point(@_ZTV1A, address_point = ) : !cir.vptr -// CIR: %[[THIS_VPTR_PTR:.*]] = cir.cast(bitcast, %[[THIS]] : !cir.ptr), !cir.ptr -// CIR: cir.store align(8) %[[VPTR]], %[[THIS_VPTR_PTR]] : !cir.vptr, !cir.ptr +// CIR: %[[THIS_VPTR_PTR:.*]] = cir.vtable.get_vptr %[[THIS]] : !cir.ptr -> !cir.ptr +// CIR: cir.store{{.*}} align(8) %[[VPTR]], %[[THIS_VPTR_PTR]] : !cir.vptr, !cir.ptr // CIR: cir.return // LLVM: define{{.*}} void @_ZN1AC2Ev(ptr %[[ARG0:.*]]) From 9f302ed0cf433114faf4ab07655166634630e1e2 Mon Sep 17 00:00:00 2001 From: Slava Zakharin Date: Fri, 15 Aug 2025 15:22:06 -0700 Subject: [PATCH 029/214] [flang] Inline hlfir.eoshift during HLFIR intrinsics simplication. (#153108) This patch generalizes the code for hlfir.cshift to be applicable for hlfir.eoshift. The major difference is the selection of the boundary value that might be statically/dynamically absent, in which case the default scalar value has to be used. The scalar value of the boundary is always computed before the hlfir.elemental or the assignment loop. Contrary to hlfir.cshift simplication, the SHIFT value is not normalized, because the original value (and its sign) participate in the EOSHIFT index computation for addressing the input array and selecting which elements of the results are assigned from the boundary operand. --- flang/lib/Optimizer/Builder/HLFIRTools.cpp | 5 +- .../Transforms/SimplifyHLFIRIntrinsics.cpp | 666 ++++- .../simplify-hlfir-intrinsics-cshift.fir | 4 +- .../simplify-hlfir-intrinsics-eoshift.fir | 2210 +++++++++++++++++ 4 files changed, 2753 insertions(+), 132 deletions(-) create mode 100644 flang/test/HLFIR/simplify-hlfir-intrinsics-eoshift.fir diff --git a/flang/lib/Optimizer/Builder/HLFIRTools.cpp b/flang/lib/Optimizer/Builder/HLFIRTools.cpp index b6d692a0226c..086dd6671160 100644 --- a/flang/lib/Optimizer/Builder/HLFIRTools.cpp +++ b/flang/lib/Optimizer/Builder/HLFIRTools.cpp @@ -416,7 +416,10 @@ hlfir::Entity hlfir::loadTrivialScalar(mlir::Location loc, entity = derefPointersAndAllocatables(loc, builder, entity); if (entity.isVariable() && entity.isScalar() && fir::isa_trivial(entity.getFortranElementType())) { - return Entity{fir::LoadOp::create(builder, loc, entity)}; + // Optional entities may be represented with !fir.box. + // We need to take the data pointer before loading the scalar. + mlir::Value base = genVariableRawAddress(loc, builder, entity); + return Entity{fir::LoadOp::create(builder, loc, base)}; } return entity; } diff --git a/flang/lib/Optimizer/HLFIR/Transforms/SimplifyHLFIRIntrinsics.cpp b/flang/lib/Optimizer/HLFIR/Transforms/SimplifyHLFIRIntrinsics.cpp index b27c3a852694..fe12f49c655b 100644 --- a/flang/lib/Optimizer/HLFIR/Transforms/SimplifyHLFIRIntrinsics.cpp +++ b/flang/lib/Optimizer/HLFIR/Transforms/SimplifyHLFIRIntrinsics.cpp @@ -10,6 +10,7 @@ // into the calling function. //===----------------------------------------------------------------------===// +#include "flang/Optimizer/Builder/Character.h" #include "flang/Optimizer/Builder/Complex.h" #include "flang/Optimizer/Builder/FIRBuilder.h" #include "flang/Optimizer/Builder/HLFIRTools.h" @@ -1269,64 +1270,91 @@ public: } }; -class CShiftConversion : public mlir::OpRewritePattern { +template +class ArrayShiftConversion : public mlir::OpRewritePattern { public: - using mlir::OpRewritePattern::OpRewritePattern; + // The implementation below only support CShiftOp and EOShiftOp. + static_assert(std::is_same_v || + std::is_same_v); + + using mlir::OpRewritePattern::OpRewritePattern; llvm::LogicalResult - matchAndRewrite(hlfir::CShiftOp cshift, - mlir::PatternRewriter &rewriter) const override { + matchAndRewrite(Op op, mlir::PatternRewriter &rewriter) const override { - hlfir::ExprType expr = mlir::dyn_cast(cshift.getType()); + hlfir::ExprType expr = mlir::dyn_cast(op.getType()); assert(expr && - "expected an expression type for the result of hlfir.cshift"); + "expected an expression type for the result of the array shift"); unsigned arrayRank = expr.getRank(); - // When it is a 1D CSHIFT, we may assume that the DIM argument + // When it is a 1D CSHIFT/EOSHIFT, we may assume that the DIM argument // (whether it is present or absent) is equal to 1, otherwise, // the program is illegal. int64_t dimVal = 1; if (arrayRank != 1) - if (mlir::Value dim = cshift.getDim()) { + if (mlir::Value dim = op.getDim()) { auto constDim = fir::getIntIfConstant(dim); if (!constDim) - return rewriter.notifyMatchFailure(cshift, - "Nonconstant DIM for CSHIFT"); + return rewriter.notifyMatchFailure( + op, "Nonconstant DIM for CSHIFT/EOSHIFT"); dimVal = *constDim; } if (dimVal <= 0 || dimVal > arrayRank) - return rewriter.notifyMatchFailure(cshift, "Invalid DIM for CSHIFT"); + return rewriter.notifyMatchFailure(op, "Invalid DIM for CSHIFT/EOSHIFT"); + + if constexpr (std::is_same_v) { + // TODO: the EOSHIFT inlining code is not ready to produce + // fir.if selecting between ARRAY and BOUNDARY (or the default + // boundary value), when they are expressions of type CHARACTER. + // This needs more work. + if (mlir::isa(expr.getEleTy())) { + if (!hlfir::Entity{op.getArray()}.isVariable()) + return rewriter.notifyMatchFailure( + op, "EOSHIFT with ARRAY being CHARACTER expression"); + if (op.getBoundary() && !hlfir::Entity{op.getBoundary()}.isVariable()) + return rewriter.notifyMatchFailure( + op, "EOSHIFT with BOUNDARY being CHARACTER expression"); + } + // TODO: selecting between ARRAY and BOUNDARY values with derived types + // need more work. + if (fir::isa_derived(expr.getEleTy())) + return rewriter.notifyMatchFailure(op, "EOSHIFT of derived type"); + } // When DIM==1 and the contiguity of the input array is not statically // known, try to exploit the fact that the leading dimension might be // contiguous. We can do this now using hlfir.eval_in_mem with // a dynamic check for the leading dimension contiguity. - // Otherwise, convert hlfir.cshift to hlfir.elemental. + // Otherwise, convert hlfir.cshift/eoshift to hlfir.elemental. // // Note that the hlfir.elemental can be inlined into other hlfir.elemental, // while hlfir.eval_in_mem prevents this, and we will end up creating // a temporary array for the result. We may need to come up with // a more sophisticated logic for picking the most efficient // representation. - hlfir::Entity array = hlfir::Entity{cshift.getArray()}; + hlfir::Entity array = hlfir::Entity{op.getArray()}; mlir::Type elementType = array.getFortranElementType(); if (dimVal == 1 && fir::isa_trivial(elementType) && - // genInMemCShift() only works for variables currently. + // genInMemArrayShift() only works for variables currently. array.isVariable()) - rewriter.replaceOp(cshift, genInMemCShift(rewriter, cshift, dimVal)); + rewriter.replaceOp(op, genInMemArrayShift(rewriter, op, dimVal)); else - rewriter.replaceOp(cshift, genElementalCShift(rewriter, cshift, dimVal)); + rewriter.replaceOp(op, genElementalArrayShift(rewriter, op, dimVal)); return mlir::success(); } private: - /// Generate MODULO(\p shiftVal, \p extent). + /// For CSHIFT, generate MODULO(\p shiftVal, \p extent). + /// For EOSHIFT, return \p shiftVal casted to \p calcType. static mlir::Value normalizeShiftValue(mlir::Location loc, fir::FirOpBuilder &builder, mlir::Value shiftVal, mlir::Value extent, mlir::Type calcType) { shiftVal = builder.createConvert(loc, calcType, shiftVal); + if constexpr (std::is_same_v) + return shiftVal; + extent = builder.createConvert(loc, calcType, extent); // Make sure that we do not divide by zero. When the dimension // has zero size, turn the extent into 1. Note that the computed @@ -1342,24 +1370,227 @@ private: return builder.createConvert(loc, calcType, shiftVal); } - /// Convert \p cshift into an hlfir.elemental using - /// the pre-computed constant \p dimVal. - static mlir::Operation *genElementalCShift(mlir::PatternRewriter &rewriter, - hlfir::CShiftOp cshift, - int64_t dimVal) { - using Fortran::common::maxRank; - hlfir::Entity shift = hlfir::Entity{cshift.getShift()}; - hlfir::Entity array = hlfir::Entity{cshift.getArray()}; + /// The indices computations for the array shifts are done using I64 type. + /// For CSHIFT, all computations do not overflow signed and unsigned I64. + /// For EOSHIFT, some computations may involve negative shift values, + /// so using no-unsigned wrap flag would be incorrect. + static void setArithOverflowFlags(Op op, fir::FirOpBuilder &builder) { + if constexpr (std::is_same_v) + builder.setIntegerOverflowFlags(mlir::arith::IntegerOverflowFlags::nsw); + else + builder.setIntegerOverflowFlags(mlir::arith::IntegerOverflowFlags::nsw | + mlir::arith::IntegerOverflowFlags::nuw); + } - mlir::Location loc = cshift.getLoc(); - fir::FirOpBuilder builder{rewriter, cshift.getOperation()}; + /// Return the element type of the EOSHIFT boundary that may be omitted + /// statically or dynamically. This element type might be used + /// to generate MLIR where we have to select between the default + /// boundary value and the dynamically absent/present boundary value. + /// If the boundary has a type not defined in Table 16.4 in 16.9.77 + /// of F2023, then the return value is nullptr. + static mlir::Type getDefaultBoundaryValueType(mlir::Type elementType) { + // To be able to generate a "select" between the default boundary value + // and the dynamic boundary value, use BoxCharType for the CHARACTER + // cases. This might be a little bit inefficient, because we may + // create unnecessary tuples, but it simplifies the inlining code. + if (auto charTy = mlir::dyn_cast(elementType)) + return fir::BoxCharType::get(charTy.getContext(), charTy.getFKind()); + + if (mlir::isa(elementType) || + fir::isa_integer(elementType) || fir::isa_real(elementType) || + fir::isa_complex(elementType)) + return elementType; + + return nullptr; + } + + /// Generate the default boundary value as defined in Table 16.4 in 16.9.77 + /// of F2023. + static mlir::Value genDefaultBoundary(mlir::Location loc, + fir::FirOpBuilder &builder, + mlir::Type elementType) { + assert(getDefaultBoundaryValueType(elementType) && + "default boundary value cannot be computed for the given type"); + if (mlir::isa(elementType)) { + // Create an empty CHARACTER of the same kind. The assignment + // of this empty CHARACTER into the result will add the padding + // if necessary. + fir::factory::CharacterExprHelper charHelper{builder, loc}; + mlir::Value zeroLen = builder.createIntegerConstant( + loc, builder.getCharacterLengthType(), 0); + fir::CharBoxValue emptyCharTemp = + charHelper.createCharacterTemp(elementType, zeroLen); + return charHelper.createEmbox(emptyCharTemp); + } + + return fir::factory::createZeroValue(builder, loc, elementType); + } + + /// \p entity represents the boundary operand of hlfir.eoshift. + /// This method generates a scalar boundary value fetched + /// from the boundary entity using \p indices (which may be empty, + /// if the boundary operand is scalar). + static mlir::Value loadEoshiftVal(mlir::Location loc, + fir::FirOpBuilder &builder, + hlfir::Entity entity, + mlir::ValueRange indices = {}) { + hlfir::Entity boundaryVal = + hlfir::loadElementAt(loc, builder, entity, indices); + + mlir::Type boundaryValTy = + getDefaultBoundaryValueType(entity.getFortranElementType()); + + // Boxed !fir.char with known LEN are loaded + // as raw references to !fir.char. + // We need to wrap them into the !fir.boxchar. + if (boundaryVal.isVariable() && boundaryValTy && + mlir::isa(boundaryValTy)) + return hlfir::genVariableBoxChar(loc, builder, boundaryVal); + return boundaryVal; + } + + /// This method generates a scalar boundary value for the given hlfir.eoshift + /// \p op that can be used to initialize cells of the result + /// if the scalar/array boundary operand is statically or dynamically + /// absent. The first result is the scalar boundary value. The second result + /// is a dynamic predicate indicating whether the scalar boundary value + /// should actually be used. + [[maybe_unused]] static std::pair + genScalarBoundaryForEOShift(mlir::Location loc, fir::FirOpBuilder &builder, + hlfir::EOShiftOp op) { + hlfir::Entity array{op.getArray()}; + mlir::Type elementType = array.getFortranElementType(); + + if (!op.getBoundary()) { + // Boundary operand is statically absent. + mlir::Value defaultVal = genDefaultBoundary(loc, builder, elementType); + mlir::Value boundaryIsScalarPred = builder.createBool(loc, true); + return {defaultVal, boundaryIsScalarPred}; + } + + hlfir::Entity boundary{op.getBoundary()}; + mlir::Type boundaryValTy = getDefaultBoundaryValueType(elementType); + + if (boundary.isScalar()) { + if (!boundaryValTy || !boundary.mayBeOptional()) { + // The boundary must be present. + mlir::Value boundaryVal = loadEoshiftVal(loc, builder, boundary); + mlir::Value boundaryIsScalarPred = builder.createBool(loc, true); + return {boundaryVal, boundaryIsScalarPred}; + } + + // Boundary is a scalar that may be dynamically absent. + // If boundary is not present dynamically, we must use the default + // value. + assert(mlir::isa(boundary.getType())); + mlir::Value isPresentPred = + fir::IsPresentOp::create(builder, loc, builder.getI1Type(), boundary); + mlir::Value boundaryVal = + builder + .genIfOp(loc, {boundaryValTy}, isPresentPred, + /*withElseRegion=*/true) + .genThen([&]() { + mlir::Value boundaryVal = + loadEoshiftVal(loc, builder, boundary); + fir::ResultOp::create(builder, loc, boundaryVal); + }) + .genElse([&]() { + mlir::Value defaultVal = + genDefaultBoundary(loc, builder, elementType); + fir::ResultOp::create(builder, loc, defaultVal); + }) + .getResults()[0]; + mlir::Value boundaryIsScalarPred = builder.createBool(loc, true); + return {boundaryVal, boundaryIsScalarPred}; + } + if (!boundaryValTy || !boundary.mayBeOptional()) { + // The boundary must be present + mlir::Value boundaryIsScalarPred = builder.createBool(loc, false); + return {nullptr, boundaryIsScalarPred}; + } + + // Boundary is an array that may be dynamically absent. + mlir::Value defaultVal = genDefaultBoundary(loc, builder, elementType); + mlir::Value isPresentPred = + fir::IsPresentOp::create(builder, loc, builder.getI1Type(), boundary); + // If the array is present, then boundaryIsScalarPred must be equal + // to false, otherwise, it should be true. + mlir::Value trueVal = builder.createBool(loc, true); + mlir::Value falseVal = builder.createBool(loc, false); + mlir::Value boundaryIsScalarPred = mlir::arith::SelectOp::create( + builder, loc, isPresentPred, falseVal, trueVal); + return {defaultVal, boundaryIsScalarPred}; + } + + /// Generate code that produces the final boundary value to be assigned + /// to the result of hlfir.eoshift \p op. \p precomputedScalarBoundary + /// specifies the scalar boundary value pre-computed before the elemental + /// or the assignment loop. If it is nullptr, then the boundary operand + /// of \p op must be a present array. \p boundaryIsScalarPred is a dynamic + /// predicate that is true, when the pre-computed scalar value must be used. + /// \p oneBasedIndices specify the indices to address into the boundary + /// array - they may be empty, if the boundary is scalar. + [[maybe_unused]] static mlir::Value selectBoundaryValue( + mlir::Location loc, fir::FirOpBuilder &builder, hlfir::EOShiftOp op, + mlir::Value precomputedScalarBoundary, mlir::Value boundaryIsScalarPred, + mlir::ValueRange oneBasedIndices) { + // Boundary is statically absent: a default value has been precomputed. + if (!op.getBoundary()) + return precomputedScalarBoundary; + + // Boundary is statically present and is a scalar: boundary does not depend + // upon the indices and so it has been precomputed. + hlfir::Entity boundary{op.getBoundary()}; + if (boundary.isScalar()) + return precomputedScalarBoundary; + + // Boundary is statically present and is an array: if the scalar + // boundary has not been precomputed, this means that the data type + // of the shifted values does not provide a way to compute + // the default boundary value, so the array boundary must be dynamically + // present, and we can load the boundary values from it. + bool mustBePresent = !precomputedScalarBoundary; + if (mustBePresent) + return loadEoshiftVal(loc, builder, boundary, oneBasedIndices); + + // The array boundary may be dynamically absent. + // In this case, precomputedScalarBoundary is a pre-computed scalar + // boundary value that has to be used if boundaryIsScalarPred + // is true, otherwise, the boundary value has to be loaded + // from the boundary array. + mlir::Type boundaryValTy = precomputedScalarBoundary.getType(); + mlir::Value newBoundaryVal = + builder + .genIfOp(loc, {boundaryValTy}, boundaryIsScalarPred, + /*withElseRegion=*/true) + .genThen([&]() { + fir::ResultOp::create(builder, loc, precomputedScalarBoundary); + }) + .genElse([&]() { + mlir::Value elem = + loadEoshiftVal(loc, builder, boundary, oneBasedIndices); + fir::ResultOp::create(builder, loc, elem); + }) + .getResults()[0]; + return newBoundaryVal; + } + + /// Convert \p op into an hlfir.elemental using + /// the pre-computed constant \p dimVal. + static mlir::Operation * + genElementalArrayShift(mlir::PatternRewriter &rewriter, Op op, + int64_t dimVal) { + using Fortran::common::maxRank; + hlfir::Entity shift = hlfir::Entity{op.getShift()}; + hlfir::Entity array = hlfir::Entity{op.getArray()}; + + mlir::Location loc = op.getLoc(); + fir::FirOpBuilder builder{rewriter, op.getOperation()}; // The new index computation involves MODULO, which is not implemented // for IndexType, so use I64 instead. mlir::Type calcType = builder.getI64Type(); - // All the indices arithmetic used below does not overflow - // signed and unsigned I64. - builder.setIntegerOverflowFlags(mlir::arith::IntegerOverflowFlags::nsw | - mlir::arith::IntegerOverflowFlags::nuw); + // Set the indices arithmetic overflow flags. + setArithOverflowFlags(op, builder); mlir::Value arrayShape = hlfir::genShape(loc, builder, array); llvm::SmallVector arrayExtents = @@ -1374,6 +1605,17 @@ private: shiftVal = normalizeShiftValue(loc, builder, shiftVal, shiftDimExtent, calcType); } + // The boundary operand of hlfir.eoshift may be statically or + // dynamically absent. + // In both cases, it is assumed to be a scalar with the value + // corresponding to the array element type. + // boundaryIsScalarPred is a dynamic predicate that identifies + // these cases. If boundaryIsScalarPred is dynamicaly false, + // then the boundary operand must be a present array. + mlir::Value boundaryVal, boundaryIsScalarPred; + if constexpr (std::is_same_v) + std::tie(boundaryVal, boundaryIsScalarPred) = + genScalarBoundaryForEOShift(loc, builder, op); auto genKernel = [&](mlir::Location loc, fir::FirOpBuilder &builder, mlir::ValueRange inputIndices) -> hlfir::Entity { @@ -1394,34 +1636,84 @@ private: shiftVal = normalizeShiftValue(loc, builder, shiftVal, shiftDimExtent, calcType); } + if constexpr (std::is_same_v) { + llvm::SmallVector boundaryIndices{indices}; + boundaryIndices.erase(boundaryIndices.begin() + dimVal - 1); + boundaryVal = + selectBoundaryValue(loc, builder, op, boundaryVal, + boundaryIsScalarPred, boundaryIndices); + } - // Element i of the result (1-based) is element - // 'MODULO(i + SH - 1, SIZE(ARRAY,DIM)) + 1' (1-based) of the original - // ARRAY (or its section, when ARRAY is not a vector). + if constexpr (std::is_same_v) { + // EOSHIFT: + // Element i of the result (1-based) is the element of the original + // array (or its section, when ARRAY is not a vector) with index + // (i + SH), if (1 <= i + SH <= SIZE(ARRAY,DIM)), otherwise + // it is the BOUNDARY value. + mlir::Value index = + builder.createConvert(loc, calcType, inputIndices[dimVal - 1]); + mlir::arith::IntegerOverflowFlags savedFlags = + builder.getIntegerOverflowFlags(); + builder.setIntegerOverflowFlags(mlir::arith::IntegerOverflowFlags::nsw); + mlir::Value indexPlusShift = + mlir::arith::AddIOp::create(builder, loc, index, shiftVal); + builder.setIntegerOverflowFlags(savedFlags); + mlir::Value one = builder.createIntegerConstant(loc, calcType, 1); + mlir::Value cmp1 = mlir::arith::CmpIOp::create( + builder, loc, mlir::arith::CmpIPredicate::sge, indexPlusShift, one); + mlir::Value cmp2 = mlir::arith::CmpIOp::create( + builder, loc, mlir::arith::CmpIPredicate::sle, indexPlusShift, + shiftDimExtent); + mlir::Value loadFromArray = + mlir::arith::AndIOp::create(builder, loc, cmp1, cmp2); + mlir::Type boundaryValTy = boundaryVal.getType(); + mlir::Value result = + builder + .genIfOp(loc, {boundaryValTy}, loadFromArray, + /*withElseRegion=*/true) + .genThen([&]() { + indices[dimVal - 1] = builder.createConvert( + loc, builder.getIndexType(), indexPlusShift); + ; + mlir::Value elem = + loadEoshiftVal(loc, builder, array, indices); + fir::ResultOp::create(builder, loc, elem); + }) + .genElse( + [&]() { fir::ResultOp::create(builder, loc, boundaryVal); }) + .getResults()[0]; + return hlfir::Entity{result}; + } else { + // CSHIFT: + // Element i of the result (1-based) is element + // 'MODULO(i + SH - 1, SIZE(ARRAY,DIM)) + 1' (1-based) of the original + // ARRAY (or its section, when ARRAY is not a vector). - // Compute the index into the original array using the normalized - // shift value, which satisfies (SH >= 0 && SH < SIZE(ARRAY,DIM)): - // newIndex = - // i + ((i <= SIZE(ARRAY,DIM) - SH) ? SH : SH - SIZE(ARRAY,DIM)) - // - // Such index computation allows for further loop vectorization - // in LLVM. - mlir::Value wrapBound = - mlir::arith::SubIOp::create(builder, loc, shiftDimExtent, shiftVal); - mlir::Value adjustedShiftVal = - mlir::arith::SubIOp::create(builder, loc, shiftVal, shiftDimExtent); - mlir::Value index = - builder.createConvert(loc, calcType, inputIndices[dimVal - 1]); - mlir::Value wrapCheck = mlir::arith::CmpIOp::create( - builder, loc, mlir::arith::CmpIPredicate::sle, index, wrapBound); - mlir::Value actualShift = mlir::arith::SelectOp::create( - builder, loc, wrapCheck, shiftVal, adjustedShiftVal); - mlir::Value newIndex = - mlir::arith::AddIOp::create(builder, loc, index, actualShift); - newIndex = builder.createConvert(loc, builder.getIndexType(), newIndex); - indices[dimVal - 1] = newIndex; - hlfir::Entity element = hlfir::getElementAt(loc, builder, array, indices); - return hlfir::loadTrivialScalar(loc, builder, element); + // Compute the index into the original array using the normalized + // shift value, which satisfies (SH >= 0 && SH < SIZE(ARRAY,DIM)): + // newIndex = + // i + ((i <= SIZE(ARRAY,DIM) - SH) ? SH : SH - SIZE(ARRAY,DIM)) + // + // Such index computation allows for further loop vectorization + // in LLVM. + mlir::Value wrapBound = + mlir::arith::SubIOp::create(builder, loc, shiftDimExtent, shiftVal); + mlir::Value adjustedShiftVal = + mlir::arith::SubIOp::create(builder, loc, shiftVal, shiftDimExtent); + mlir::Value index = + builder.createConvert(loc, calcType, inputIndices[dimVal - 1]); + mlir::Value wrapCheck = mlir::arith::CmpIOp::create( + builder, loc, mlir::arith::CmpIPredicate::sle, index, wrapBound); + mlir::Value actualShift = mlir::arith::SelectOp::create( + builder, loc, wrapCheck, shiftVal, adjustedShiftVal); + mlir::Value newIndex = + mlir::arith::AddIOp::create(builder, loc, index, actualShift); + newIndex = builder.createConvert(loc, builder.getIndexType(), newIndex); + indices[dimVal - 1] = newIndex; + hlfir::Entity element = + hlfir::getElementAt(loc, builder, array, indices); + return hlfir::loadTrivialScalar(loc, builder, element); + } }; mlir::Type elementType = array.getFortranElementType(); @@ -1429,19 +1721,42 @@ private: loc, builder, elementType, arrayShape, typeParams, genKernel, /*isUnordered=*/true, array.isPolymorphic() ? static_cast(array) : nullptr, - cshift.getResult().getType()); + op.getResult().getType()); return elementalOp.getOperation(); } - /// Convert \p cshift into an hlfir.eval_in_mem using the pre-computed + /// Convert \p op into an hlfir.eval_in_mem using the pre-computed /// constant \p dimVal. - /// The converted code looks like this: - /// do i=1,SH - /// result(i + (SIZE(ARRAY,DIM) - SH)) = array(i) + /// The converted code for CSHIFT looks like this: + /// DEST_OFFSET = SIZE(ARRAY,DIM) - SH + /// COPY_END1 = SH + /// do i=1,COPY_END1 + /// result(i + DEST_OFFSET) = array(i) /// end - /// do i=1,SIZE(ARRAY,DIM) - SH - /// result(i) = array(i + SH) + /// SOURCE_OFFSET = SH + /// COPY_END2 = SIZE(ARRAY,DIM) - SH + /// do i=1,COPY_END2 + /// result(i) = array(i + SOURCE_OFFSET) /// end + /// Where SH is the normalized shift value, which satisfies + /// (SH >= 0 && SH < SIZE(ARRAY,DIM)). + /// + /// The converted code for EOSHIFT looks like this: + /// EXTENT = SIZE(ARRAY,DIM) + /// DEST_OFFSET = SH < 0 ? -SH : 0 + /// SOURCE_OFFSET = SH < 0 ? 0 : SH + /// COPY_END = SH < 0 ? + /// (-EXTENT > SH ? 0 : EXTENT + SH) : + /// (EXTENT < SH ? 0 : EXTENT - SH) + /// do i=1,COPY_END + /// result(i + DEST_OFFSET) = array(i + SOURCE_OFFSET) + /// end + /// INIT_END = EXTENT - COPY_END + /// INIT_OFFSET = SH < 0 ? 0 : COPY_END + /// do i=1,INIT_END + /// result(i + INIT_OFFSET) = BOUNDARY + /// end + /// Where SH is the original shift value. /// /// When \p dimVal is 1, we generate the same code twice /// under a dynamic check for the contiguity of the leading @@ -1450,24 +1765,21 @@ private: /// as a contiguous slice of the original array. /// This allows recognizing the above two loops as memcpy /// loop idioms in LLVM. - static mlir::Operation *genInMemCShift(mlir::PatternRewriter &rewriter, - hlfir::CShiftOp cshift, - int64_t dimVal) { + static mlir::Operation *genInMemArrayShift(mlir::PatternRewriter &rewriter, + Op op, int64_t dimVal) { using Fortran::common::maxRank; - hlfir::Entity shift = hlfir::Entity{cshift.getShift()}; - hlfir::Entity array = hlfir::Entity{cshift.getArray()}; + hlfir::Entity shift = hlfir::Entity{op.getShift()}; + hlfir::Entity array = hlfir::Entity{op.getArray()}; assert(array.isVariable() && "array must be a variable"); assert(!array.isPolymorphic() && - "genInMemCShift does not support polymorphic types"); - mlir::Location loc = cshift.getLoc(); - fir::FirOpBuilder builder{rewriter, cshift.getOperation()}; + "genInMemArrayShift does not support polymorphic types"); + mlir::Location loc = op.getLoc(); + fir::FirOpBuilder builder{rewriter, op.getOperation()}; // The new index computation involves MODULO, which is not implemented // for IndexType, so use I64 instead. mlir::Type calcType = builder.getI64Type(); - // All the indices arithmetic used below does not overflow - // signed and unsigned I64. - builder.setIntegerOverflowFlags(mlir::arith::IntegerOverflowFlags::nsw | - mlir::arith::IntegerOverflowFlags::nuw); + // Set the indices arithmetic overflow flags. + setArithOverflowFlags(op, builder); mlir::Value arrayShape = hlfir::genShape(loc, builder, array); llvm::SmallVector arrayExtents = @@ -1482,10 +1794,20 @@ private: shiftVal = normalizeShiftValue(loc, builder, shiftVal, shiftDimExtent, calcType); } + // The boundary operand of hlfir.eoshift may be statically or + // dynamically absent. + // In both cases, it is assumed to be a scalar with the value + // corresponding to the array element type. + // boundaryIsScalarPred is a dynamic predicate that identifies + // these cases. If boundaryIsScalarPred is dynamicaly false, + // then the boundary operand must be a present array. + mlir::Value boundaryVal, boundaryIsScalarPred; + if constexpr (std::is_same_v) + std::tie(boundaryVal, boundaryIsScalarPred) = + genScalarBoundaryForEOShift(loc, builder, op); hlfir::EvaluateInMemoryOp evalOp = hlfir::EvaluateInMemoryOp::create( - builder, loc, mlir::cast(cshift.getType()), - arrayShape); + builder, loc, mlir::cast(op.getType()), arrayShape); builder.setInsertionPointToStart(&evalOp.getBody().front()); mlir::Value resultArray = evalOp.getMemory(); @@ -1499,11 +1821,12 @@ private: // (if any). If exposeContiguity is true, the array's section // array(s(1), ..., s(dim-1), :, s(dim+1), ..., s(n)) is represented // as a contiguous 1D array. - // shiftVal is the normalized shift value that satisfies (SH >= 0 && SH < - // SIZE(ARRAY,DIM)). + // For CSHIFT, shiftVal is the normalized shift value that satisfies + // (SH >= 0 && SH < SIZE(ARRAY,DIM)). // auto genDimensionShift = [&](mlir::Location loc, fir::FirOpBuilder &builder, - mlir::Value shiftVal, bool exposeContiguity, + mlir::Value shiftVal, mlir::Value boundary, + bool exposeContiguity, mlir::ValueRange oneBasedIndices) -> llvm::SmallVector { // Create a vector of indices (s(1), ..., s(dim-1), nullptr, s(dim+1), @@ -1536,63 +1859,143 @@ private: srcIndices.resize(1); } - // Copy first portion of the array: - // do i=1,SH - // result(i + (SIZE(ARRAY,DIM) - SH)) = array(i) - // end - auto genAssign1 = [&](mlir::Location loc, fir::FirOpBuilder &builder, - mlir::ValueRange index, - mlir::ValueRange reductionArgs) + // genCopy labda generates the body of a generic copy loop. + // do i=1,COPY_END + // result(i + DEST_OFFSET) = array(i + SOURCE_OFFSET) + // end + // + // It is parameterized by DEST_OFFSET and SOURCE_OFFSET. + mlir::Value dstOffset, srcOffset; + auto genCopy = [&](mlir::Location loc, fir::FirOpBuilder &builder, + mlir::ValueRange index, mlir::ValueRange reductionArgs) -> llvm::SmallVector { assert(index.size() == 1 && "expected single loop"); mlir::Value srcIndex = builder.createConvert(loc, calcType, index[0]); + mlir::Value dstIndex = srcIndex; + if (srcOffset) + srcIndex = + mlir::arith::AddIOp::create(builder, loc, srcIndex, srcOffset); srcIndices[dimVal - 1] = srcIndex; hlfir::Entity srcElementValue = hlfir::loadElementAt(loc, builder, srcArray, srcIndices); - mlir::Value dstIndex = mlir::arith::AddIOp::create( - builder, loc, srcIndex, - mlir::arith::SubIOp::create(builder, loc, shiftDimExtent, - shiftVal)); + if (dstOffset) + dstIndex = + mlir::arith::AddIOp::create(builder, loc, dstIndex, dstOffset); dstIndices[dimVal - 1] = dstIndex; hlfir::Entity dstElement = hlfir::getElementAt( loc, builder, hlfir::Entity{resultArray}, dstIndices); hlfir::AssignOp::create(builder, loc, srcElementValue, dstElement); + // Reset the external parameters' values to make sure + // they are properly updated between the labda calls. + // WARNING: if genLoopNestWithReductions() calls the lambda + // multiple times, this is going to be a problem. + dstOffset = nullptr; + srcOffset = nullptr; return {}; }; - // Generate the first loop. - hlfir::genLoopNestWithReductions(loc, builder, {shiftVal}, - /*reductionInits=*/{}, genAssign1, - /*isUnordered=*/true); + if constexpr (std::is_same_v) { + // Copy first portion of the array: + // DEST_OFFSET = SIZE(ARRAY,DIM) - SH + // COPY_END1 = SH + // do i=1,COPY_END1 + // result(i + DEST_OFFSET) = array(i) + // end + dstOffset = + mlir::arith::SubIOp::create(builder, loc, shiftDimExtent, shiftVal); + srcOffset = nullptr; + hlfir::genLoopNestWithReductions(loc, builder, {shiftVal}, + /*reductionInits=*/{}, genCopy, + /*isUnordered=*/true); - // Copy second portion of the array: - // do i=1,SIZE(ARRAY,DIM)-SH - // result(i) = array(i + SH) - // end - auto genAssign2 = [&](mlir::Location loc, fir::FirOpBuilder &builder, - mlir::ValueRange index, - mlir::ValueRange reductionArgs) - -> llvm::SmallVector { - assert(index.size() == 1 && "expected single loop"); - mlir::Value dstIndex = builder.createConvert(loc, calcType, index[0]); - mlir::Value srcIndex = - mlir::arith::AddIOp::create(builder, loc, dstIndex, shiftVal); - srcIndices[dimVal - 1] = srcIndex; - hlfir::Entity srcElementValue = - hlfir::loadElementAt(loc, builder, srcArray, srcIndices); - dstIndices[dimVal - 1] = dstIndex; - hlfir::Entity dstElement = hlfir::getElementAt( - loc, builder, hlfir::Entity{resultArray}, dstIndices); - hlfir::AssignOp::create(builder, loc, srcElementValue, dstElement); - return {}; - }; + // Copy second portion of the array: + // SOURCE_OFFSET = SH + // COPY_END2 = SIZE(ARRAY,DIM) - SH + // do i=1,COPY_END2 + // result(i) = array(i + SOURCE_OFFSET) + // end + mlir::Value bound = + mlir::arith::SubIOp::create(builder, loc, shiftDimExtent, shiftVal); + dstOffset = nullptr; + srcOffset = shiftVal; + hlfir::genLoopNestWithReductions(loc, builder, {bound}, + /*reductionInits=*/{}, genCopy, + /*isUnordered=*/true); + } else { + // Do the copy: + // EXTENT = SIZE(ARRAY,DIM) + // DEST_OFFSET = SH < 0 ? -SH : 0 + // SOURCE_OFFSET = SH < 0 ? 0 : SH + // COPY_END = SH < 0 ? + // (-EXTENT > SH ? 0 : EXTENT + SH) : + // (EXTENT < SH ? 0 : EXTENT - SH) + // do i=1,COPY_END + // result(i + DEST_OFFSET) = array(i + SOURCE_OFFSET) + // end + mlir::arith::IntegerOverflowFlags savedFlags = + builder.getIntegerOverflowFlags(); + builder.setIntegerOverflowFlags(mlir::arith::IntegerOverflowFlags::nsw); - // Generate the second loop. - mlir::Value bound = - mlir::arith::SubIOp::create(builder, loc, shiftDimExtent, shiftVal); - hlfir::genLoopNestWithReductions(loc, builder, {bound}, - /*reductionInits=*/{}, genAssign2, - /*isUnordered=*/true); + mlir::Value zero = builder.createIntegerConstant(loc, calcType, 0); + mlir::Value isNegativeShift = mlir::arith::CmpIOp::create( + builder, loc, mlir::arith::CmpIPredicate::slt, shiftVal, zero); + mlir::Value shiftNeg = + mlir::arith::SubIOp::create(builder, loc, zero, shiftVal); + dstOffset = mlir::arith::SelectOp::create(builder, loc, isNegativeShift, + shiftNeg, zero); + srcOffset = mlir::arith::SelectOp::create(builder, loc, isNegativeShift, + zero, shiftVal); + mlir::Value extentNeg = + mlir::arith::SubIOp::create(builder, loc, zero, shiftDimExtent); + mlir::Value extentPlusShift = + mlir::arith::AddIOp::create(builder, loc, shiftDimExtent, shiftVal); + mlir::Value extentNegShiftCmp = mlir::arith::CmpIOp::create( + builder, loc, mlir::arith::CmpIPredicate::sgt, extentNeg, shiftVal); + mlir::Value negativeShiftBound = mlir::arith::SelectOp::create( + builder, loc, extentNegShiftCmp, zero, extentPlusShift); + mlir::Value extentMinusShift = + mlir::arith::SubIOp::create(builder, loc, shiftDimExtent, shiftVal); + mlir::Value extentShiftCmp = mlir::arith::CmpIOp::create( + builder, loc, mlir::arith::CmpIPredicate::slt, shiftDimExtent, + shiftVal); + mlir::Value positiveShiftBound = mlir::arith::SelectOp::create( + builder, loc, extentShiftCmp, zero, extentMinusShift); + mlir::Value copyEnd = mlir::arith::SelectOp::create( + builder, loc, isNegativeShift, negativeShiftBound, + positiveShiftBound); + hlfir::genLoopNestWithReductions(loc, builder, {copyEnd}, + /*reductionInits=*/{}, genCopy, + /*isUnordered=*/true); + + // Do the init: + // INIT_END = EXTENT - COPY_END + // INIT_OFFSET = SH < 0 ? 0 : COPY_END + // do i=1,INIT_END + // result(i + INIT_OFFSET) = BOUNDARY + // end + assert(boundary && "boundary cannot be null"); + mlir::Value initEnd = + mlir::arith::SubIOp::create(builder, loc, shiftDimExtent, copyEnd); + mlir::Value initOffset = mlir::arith::SelectOp::create( + builder, loc, isNegativeShift, zero, copyEnd); + auto genInit = [&](mlir::Location loc, fir::FirOpBuilder &builder, + mlir::ValueRange index, + mlir::ValueRange reductionArgs) + -> llvm::SmallVector { + mlir::Value dstIndex = builder.createConvert(loc, calcType, index[0]); + dstIndex = + mlir::arith::AddIOp::create(builder, loc, dstIndex, initOffset); + dstIndices[dimVal - 1] = dstIndex; + hlfir::Entity dstElement = hlfir::getElementAt( + loc, builder, hlfir::Entity{resultArray}, dstIndices); + hlfir::AssignOp::create(builder, loc, boundary, dstElement); + return {}; + }; + hlfir::genLoopNestWithReductions(loc, builder, {initEnd}, + /*reductionInits=*/{}, genInit, + /*isUnordered=*/true); + builder.setIntegerOverflowFlags(savedFlags); + } return {}; }; @@ -1614,6 +2017,10 @@ private: shiftVal = normalizeShiftValue(loc, builder, shiftVal, shiftDimExtent, calcType); } + if constexpr (std::is_same_v) + boundaryVal = + selectBoundaryValue(loc, builder, op, boundaryVal, + boundaryIsScalarPred, oneBasedIndices); // If we can fetch the byte stride of the leading dimension, // and the byte size of the element, then we can generate @@ -1635,8 +2042,8 @@ private: } if (array.isSimplyContiguous() || !elemSize || !stride) { - genDimensionShift(loc, builder, shiftVal, /*exposeContiguity=*/false, - oneBasedIndices); + genDimensionShift(loc, builder, shiftVal, boundaryVal, + /*exposeContiguity=*/false, oneBasedIndices); return {}; } @@ -1644,11 +2051,11 @@ private: builder, loc, mlir::arith::CmpIPredicate::eq, elemSize, stride); builder.genIfOp(loc, {}, isContiguous, /*withElseRegion=*/true) .genThen([&]() { - genDimensionShift(loc, builder, shiftVal, /*exposeContiguity=*/true, - oneBasedIndices); + genDimensionShift(loc, builder, shiftVal, boundaryVal, + /*exposeContiguity=*/true, oneBasedIndices); }) .genElse([&]() { - genDimensionShift(loc, builder, shiftVal, + genDimensionShift(loc, builder, shiftVal, boundaryVal, /*exposeContiguity=*/false, oneBasedIndices); }); @@ -2339,7 +2746,8 @@ public: mlir::RewritePatternSet patterns(context); patterns.insert(context); patterns.insert>(context); - patterns.insert(context); + patterns.insert>(context); + patterns.insert>(context); patterns.insert>(context); patterns.insert>(context); diff --git a/flang/test/HLFIR/simplify-hlfir-intrinsics-cshift.fir b/flang/test/HLFIR/simplify-hlfir-intrinsics-cshift.fir index 8684a429ea5b..f5af990da194 100644 --- a/flang/test/HLFIR/simplify-hlfir-intrinsics-cshift.fir +++ b/flang/test/HLFIR/simplify-hlfir-intrinsics-cshift.fir @@ -38,12 +38,12 @@ func.func @cshift_vector(%arg0: !fir.box>, %arg1: !fir.ref>, index, index, index, !fir.shape<1>) -> !fir.box> // CHECK: %[[VAL_25:.*]] = fir.box_addr %[[VAL_24]] : (!fir.box>) -> !fir.ref> // CHECK: %[[VAL_26:.*]] = fir.embox %[[VAL_25]](%[[VAL_23]]) : (!fir.ref>, !fir.shape<1>) -> !fir.box> +// CHECK: %[[VAL_36:.*]] = arith.subi %[[VAL_8]], %[[VAL_17]] overflow : i64 // CHECK: %[[VAL_27:.*]] = fir.convert %[[VAL_17]] : (i64) -> index // CHECK: fir.do_loop %[[VAL_28:.*]] = %[[VAL_2]] to %[[VAL_27]] step %[[VAL_2]] unordered { // CHECK: %[[VAL_29:.*]] = fir.convert %[[VAL_28]] : (index) -> i64 // CHECK: %[[VAL_34:.*]] = hlfir.designate %[[VAL_26]] (%[[VAL_29]]) : (!fir.box>, i64) -> !fir.ref // CHECK: %[[VAL_35:.*]] = fir.load %[[VAL_34]] : !fir.ref -// CHECK: %[[VAL_36:.*]] = arith.subi %[[VAL_8]], %[[VAL_17]] overflow : i64 // CHECK: %[[VAL_37:.*]] = arith.addi %[[VAL_29]], %[[VAL_36]] overflow : i64 // CHECK: %[[VAL_42:.*]] = hlfir.designate %[[VAL_20]] (%[[VAL_37]]) : (!fir.box>, i64) -> !fir.ref // CHECK: hlfir.assign %[[VAL_35]] to %[[VAL_42]] : i32, !fir.ref @@ -59,6 +59,7 @@ func.func @cshift_vector(%arg0: !fir.box>, %arg1: !fir.ref // CHECK: } // CHECK: } else { +// CHECK: %[[VAL_68:.*]] = arith.subi %[[VAL_8]], %[[VAL_17]] overflow : i64 // CHECK: %[[VAL_59:.*]] = fir.convert %[[VAL_17]] : (i64) -> index // CHECK: fir.do_loop %[[VAL_60:.*]] = %[[VAL_2]] to %[[VAL_59]] step %[[VAL_2]] unordered { // CHECK: %[[VAL_61:.*]] = fir.convert %[[VAL_60]] : (index) -> i64 @@ -68,7 +69,6 @@ func.func @cshift_vector(%arg0: !fir.box>, %arg1: !fir.ref : index // CHECK: %[[VAL_66:.*]] = hlfir.designate %[[VAL_0]] (%[[VAL_65]]) : (!fir.box>, index) -> !fir.ref // CHECK: %[[VAL_67:.*]] = fir.load %[[VAL_66]] : !fir.ref -// CHECK: %[[VAL_68:.*]] = arith.subi %[[VAL_8]], %[[VAL_17]] overflow : i64 // CHECK: %[[VAL_69:.*]] = arith.addi %[[VAL_61]], %[[VAL_68]] overflow : i64 // CHECK: %[[VAL_74:.*]] = hlfir.designate %[[VAL_20]] (%[[VAL_69]]) : (!fir.box>, i64) -> !fir.ref // CHECK: hlfir.assign %[[VAL_67]] to %[[VAL_74]] : i32, !fir.ref diff --git a/flang/test/HLFIR/simplify-hlfir-intrinsics-eoshift.fir b/flang/test/HLFIR/simplify-hlfir-intrinsics-eoshift.fir new file mode 100644 index 000000000000..88191d517c2b --- /dev/null +++ b/flang/test/HLFIR/simplify-hlfir-intrinsics-eoshift.fir @@ -0,0 +1,2210 @@ +// Test hlfir.eoshift simplification to hlfir.elemental and hlfir.eval_in_mem: +// RUN: fir-opt --simplify-hlfir-intrinsics %s | FileCheck %s + +// module eoshift_types +// type t +// end type t +// end module eoshift_types +// +// ! Test contiguous 1D array with statically absent boundary. +// subroutine eoshift1(n, array) +// integer :: n +// real(2) :: array(n) +// array = EOSHIFT(array, 2) +// end subroutine +func.func @_QPeoshift1(%arg0: !fir.ref {fir.bindc_name = "n"}, %arg1: !fir.ref> {fir.bindc_name = "array"}) { + %c2_i32 = arith.constant 2 : i32 + %c0 = arith.constant 0 : index + %0 = fir.dummy_scope : !fir.dscope + %1:2 = hlfir.declare %arg0 dummy_scope %0 {uniq_name = "_QFeoshift1En"} : (!fir.ref, !fir.dscope) -> (!fir.ref, !fir.ref) + %2 = fir.load %1#0 : !fir.ref + %3 = fir.convert %2 : (i32) -> index + %4 = arith.cmpi sgt, %3, %c0 : index + %5 = arith.select %4, %3, %c0 : index + %6 = fir.shape %5 : (index) -> !fir.shape<1> + %7:2 = hlfir.declare %arg1(%6) dummy_scope %0 {uniq_name = "_QFeoshift1Earray"} : (!fir.ref>, !fir.shape<1>, !fir.dscope) -> (!fir.box>, !fir.ref>) + %8 = hlfir.eoshift %7#0 %c2_i32 : (!fir.box>, i32) -> !hlfir.expr + hlfir.assign %8 to %7#0 : !hlfir.expr, !fir.box> + hlfir.destroy %8 : !hlfir.expr + return +} +// CHECK-LABEL: func.func @_QPeoshift1( +// CHECK-SAME: %[[ARG0:.*]]: !fir.ref {fir.bindc_name = "n"}, +// CHECK-SAME: %[[ARG1:.*]]: !fir.ref> {fir.bindc_name = "array"}) { +// CHECK: %[[VAL_0:.*]] = arith.constant 1 : index +// CHECK: %[[VAL_1:.*]] = arith.constant 0 : i64 +// CHECK: %[[VAL_2:.*]] = arith.constant 0.000000e+00 : f16 +// CHECK: %[[VAL_3:.*]] = arith.constant 2 : i32 +// CHECK: %[[VAL_4:.*]] = arith.constant 0 : index +// CHECK: %[[VAL_5:.*]] = fir.dummy_scope : !fir.dscope +// CHECK: %[[VAL_6:.*]]:2 = hlfir.declare %[[ARG0]] dummy_scope %[[VAL_5]] {uniq_name = "_QFeoshift1En"} : (!fir.ref, !fir.dscope) -> (!fir.ref, !fir.ref) +// CHECK: %[[VAL_7:.*]] = fir.load %[[VAL_6]]#0 : !fir.ref +// CHECK: %[[VAL_8:.*]] = fir.convert %[[VAL_7]] : (i32) -> index +// CHECK: %[[VAL_9:.*]] = arith.cmpi sgt, %[[VAL_8]], %[[VAL_4]] : index +// CHECK: %[[VAL_10:.*]] = arith.select %[[VAL_9]], %[[VAL_8]], %[[VAL_4]] : index +// CHECK: %[[VAL_11:.*]] = fir.shape %[[VAL_10]] : (index) -> !fir.shape<1> +// CHECK: %[[VAL_12:.*]]:2 = hlfir.declare %[[ARG1]](%[[VAL_11]]) dummy_scope %[[VAL_5]] {uniq_name = "_QFeoshift1Earray"} : (!fir.ref>, !fir.shape<1>, !fir.dscope) -> (!fir.box>, !fir.ref>) +// CHECK: %[[VAL_13:.*]] = fir.convert %[[VAL_10]] : (index) -> i64 +// CHECK: %[[VAL_14:.*]] = fir.convert %[[VAL_3]] : (i32) -> i64 +// CHECK: %[[VAL_15:.*]] = hlfir.eval_in_mem shape %[[VAL_11]] : (!fir.shape<1>) -> !hlfir.expr { +// CHECK: ^bb0(%[[VAL_16:.*]]: !fir.ref>): +// CHECK: %[[VAL_17:.*]] = fir.embox %[[VAL_16]](%[[VAL_11]]) : (!fir.ref>, !fir.shape<1>) -> !fir.box> +// CHECK: %[[VAL_18:.*]] = arith.cmpi slt, %[[VAL_14]], %[[VAL_1]] : i64 +// CHECK: %[[VAL_19:.*]] = arith.subi %[[VAL_1]], %[[VAL_14]] overflow : i64 +// CHECK: %[[VAL_20:.*]] = arith.select %[[VAL_18]], %[[VAL_19]], %[[VAL_1]] : i64 +// CHECK: %[[VAL_21:.*]] = arith.select %[[VAL_18]], %[[VAL_1]], %[[VAL_14]] : i64 +// CHECK: %[[VAL_22:.*]] = arith.subi %[[VAL_1]], %[[VAL_13]] overflow : i64 +// CHECK: %[[VAL_23:.*]] = arith.addi %[[VAL_13]], %[[VAL_14]] overflow : i64 +// CHECK: %[[VAL_24:.*]] = arith.cmpi sgt, %[[VAL_22]], %[[VAL_14]] : i64 +// CHECK: %[[VAL_25:.*]] = arith.select %[[VAL_24]], %[[VAL_1]], %[[VAL_23]] : i64 +// CHECK: %[[VAL_26:.*]] = arith.subi %[[VAL_13]], %[[VAL_14]] overflow : i64 +// CHECK: %[[VAL_27:.*]] = arith.cmpi slt, %[[VAL_13]], %[[VAL_14]] : i64 +// CHECK: %[[VAL_28:.*]] = arith.select %[[VAL_27]], %[[VAL_1]], %[[VAL_26]] : i64 +// CHECK: %[[VAL_29:.*]] = arith.select %[[VAL_18]], %[[VAL_25]], %[[VAL_28]] : i64 +// CHECK: %[[VAL_30:.*]] = fir.convert %[[VAL_29]] : (i64) -> index +// CHECK: fir.do_loop %[[VAL_31:.*]] = %[[VAL_0]] to %[[VAL_30]] step %[[VAL_0]] unordered { +// CHECK: %[[VAL_32:.*]] = fir.convert %[[VAL_31]] : (index) -> i64 +// CHECK: %[[VAL_33:.*]] = arith.addi %[[VAL_32]], %[[VAL_21]] overflow : i64 +// CHECK: %[[VAL_34:.*]] = hlfir.designate %[[VAL_12]]#0 (%[[VAL_33]]) : (!fir.box>, i64) -> !fir.ref +// CHECK: %[[VAL_35:.*]] = fir.load %[[VAL_34]] : !fir.ref +// CHECK: %[[VAL_36:.*]] = arith.addi %[[VAL_32]], %[[VAL_20]] overflow : i64 +// CHECK: %[[VAL_37:.*]] = hlfir.designate %[[VAL_17]] (%[[VAL_36]]) : (!fir.box>, i64) -> !fir.ref +// CHECK: hlfir.assign %[[VAL_35]] to %[[VAL_37]] : f16, !fir.ref +// CHECK: } +// CHECK: %[[VAL_38:.*]] = arith.subi %[[VAL_13]], %[[VAL_29]] overflow : i64 +// CHECK: %[[VAL_39:.*]] = arith.select %[[VAL_18]], %[[VAL_1]], %[[VAL_29]] : i64 +// CHECK: %[[VAL_40:.*]] = fir.convert %[[VAL_38]] : (i64) -> index +// CHECK: fir.do_loop %[[VAL_41:.*]] = %[[VAL_0]] to %[[VAL_40]] step %[[VAL_0]] unordered { +// CHECK: %[[VAL_42:.*]] = fir.convert %[[VAL_41]] : (index) -> i64 +// CHECK: %[[VAL_43:.*]] = arith.addi %[[VAL_42]], %[[VAL_39]] overflow : i64 +// CHECK: %[[VAL_44:.*]] = hlfir.designate %[[VAL_17]] (%[[VAL_43]]) : (!fir.box>, i64) -> !fir.ref +// CHECK: hlfir.assign %[[VAL_2]] to %[[VAL_44]] : f16, !fir.ref +// CHECK: } +// CHECK: } +// CHECK: hlfir.assign %[[VAL_15]] to %[[VAL_12]]#0 : !hlfir.expr, !fir.box> +// CHECK: hlfir.destroy %[[VAL_15]] : !hlfir.expr +// CHECK: return +// CHECK: } + +// ! Test contiguous 1D array with the scalar constant boundary. +// subroutine eoshift2(n, array) +// integer :: n +// logical(2) :: array(n) +// array = EOSHIFT(array, 2, boundary=.true._2, dim=1) +// end subroutine +func.func @_QPeoshift2(%arg0: !fir.ref {fir.bindc_name = "n"}, %arg1: !fir.ref>> {fir.bindc_name = "array"}) { + %c1_i32 = arith.constant 1 : i32 + %true = arith.constant true + %c2_i32 = arith.constant 2 : i32 + %c0 = arith.constant 0 : index + %0 = fir.dummy_scope : !fir.dscope + %1:2 = hlfir.declare %arg0 dummy_scope %0 {uniq_name = "_QFeoshift2En"} : (!fir.ref, !fir.dscope) -> (!fir.ref, !fir.ref) + %2 = fir.load %1#0 : !fir.ref + %3 = fir.convert %2 : (i32) -> index + %4 = arith.cmpi sgt, %3, %c0 : index + %5 = arith.select %4, %3, %c0 : index + %6 = fir.shape %5 : (index) -> !fir.shape<1> + %7:2 = hlfir.declare %arg1(%6) dummy_scope %0 {uniq_name = "_QFeoshift2Earray"} : (!fir.ref>>, !fir.shape<1>, !fir.dscope) -> (!fir.box>>, !fir.ref>>) + %8 = fir.convert %true : (i1) -> !fir.logical<2> + %9 = hlfir.eoshift %7#0 %c2_i32 boundary %8 dim %c1_i32 : (!fir.box>>, i32, !fir.logical<2>, i32) -> !hlfir.expr> + hlfir.assign %9 to %7#0 : !hlfir.expr>, !fir.box>> + hlfir.destroy %9 : !hlfir.expr> + return +} +// CHECK-LABEL: func.func @_QPeoshift2( +// CHECK-SAME: %[[ARG0:.*]]: !fir.ref {fir.bindc_name = "n"}, +// CHECK-SAME: %[[ARG1:.*]]: !fir.ref>> {fir.bindc_name = "array"}) { +// CHECK: %[[VAL_0:.*]] = arith.constant 1 : index +// CHECK: %[[VAL_1:.*]] = arith.constant 0 : i64 +// CHECK: %[[VAL_2:.*]] = arith.constant true +// CHECK: %[[VAL_3:.*]] = arith.constant 2 : i32 +// CHECK: %[[VAL_4:.*]] = arith.constant 0 : index +// CHECK: %[[VAL_5:.*]] = fir.dummy_scope : !fir.dscope +// CHECK: %[[VAL_6:.*]]:2 = hlfir.declare %[[ARG0]] dummy_scope %[[VAL_5]] {uniq_name = "_QFeoshift2En"} : (!fir.ref, !fir.dscope) -> (!fir.ref, !fir.ref) +// CHECK: %[[VAL_7:.*]] = fir.load %[[VAL_6]]#0 : !fir.ref +// CHECK: %[[VAL_8:.*]] = fir.convert %[[VAL_7]] : (i32) -> index +// CHECK: %[[VAL_9:.*]] = arith.cmpi sgt, %[[VAL_8]], %[[VAL_4]] : index +// CHECK: %[[VAL_10:.*]] = arith.select %[[VAL_9]], %[[VAL_8]], %[[VAL_4]] : index +// CHECK: %[[VAL_11:.*]] = fir.shape %[[VAL_10]] : (index) -> !fir.shape<1> +// CHECK: %[[VAL_12:.*]]:2 = hlfir.declare %[[ARG1]](%[[VAL_11]]) dummy_scope %[[VAL_5]] {uniq_name = "_QFeoshift2Earray"} : (!fir.ref>>, !fir.shape<1>, !fir.dscope) -> (!fir.box>>, !fir.ref>>) +// CHECK: %[[VAL_13:.*]] = fir.convert %[[VAL_2]] : (i1) -> !fir.logical<2> +// CHECK: %[[VAL_14:.*]] = fir.convert %[[VAL_10]] : (index) -> i64 +// CHECK: %[[VAL_15:.*]] = fir.convert %[[VAL_3]] : (i32) -> i64 +// CHECK: %[[VAL_16:.*]] = hlfir.eval_in_mem shape %[[VAL_11]] : (!fir.shape<1>) -> !hlfir.expr> { +// CHECK: ^bb0(%[[VAL_17:.*]]: !fir.ref>>): +// CHECK: %[[VAL_18:.*]] = fir.embox %[[VAL_17]](%[[VAL_11]]) : (!fir.ref>>, !fir.shape<1>) -> !fir.box>> +// CHECK: %[[VAL_19:.*]] = arith.cmpi slt, %[[VAL_15]], %[[VAL_1]] : i64 +// CHECK: %[[VAL_20:.*]] = arith.subi %[[VAL_1]], %[[VAL_15]] overflow : i64 +// CHECK: %[[VAL_21:.*]] = arith.select %[[VAL_19]], %[[VAL_20]], %[[VAL_1]] : i64 +// CHECK: %[[VAL_22:.*]] = arith.select %[[VAL_19]], %[[VAL_1]], %[[VAL_15]] : i64 +// CHECK: %[[VAL_23:.*]] = arith.subi %[[VAL_1]], %[[VAL_14]] overflow : i64 +// CHECK: %[[VAL_24:.*]] = arith.addi %[[VAL_14]], %[[VAL_15]] overflow : i64 +// CHECK: %[[VAL_25:.*]] = arith.cmpi sgt, %[[VAL_23]], %[[VAL_15]] : i64 +// CHECK: %[[VAL_26:.*]] = arith.select %[[VAL_25]], %[[VAL_1]], %[[VAL_24]] : i64 +// CHECK: %[[VAL_27:.*]] = arith.subi %[[VAL_14]], %[[VAL_15]] overflow : i64 +// CHECK: %[[VAL_28:.*]] = arith.cmpi slt, %[[VAL_14]], %[[VAL_15]] : i64 +// CHECK: %[[VAL_29:.*]] = arith.select %[[VAL_28]], %[[VAL_1]], %[[VAL_27]] : i64 +// CHECK: %[[VAL_30:.*]] = arith.select %[[VAL_19]], %[[VAL_26]], %[[VAL_29]] : i64 +// CHECK: %[[VAL_31:.*]] = fir.convert %[[VAL_30]] : (i64) -> index +// CHECK: fir.do_loop %[[VAL_32:.*]] = %[[VAL_0]] to %[[VAL_31]] step %[[VAL_0]] unordered { +// CHECK: %[[VAL_33:.*]] = fir.convert %[[VAL_32]] : (index) -> i64 +// CHECK: %[[VAL_34:.*]] = arith.addi %[[VAL_33]], %[[VAL_22]] overflow : i64 +// CHECK: %[[VAL_35:.*]] = hlfir.designate %[[VAL_12]]#0 (%[[VAL_34]]) : (!fir.box>>, i64) -> !fir.ref> +// CHECK: %[[VAL_36:.*]] = fir.load %[[VAL_35]] : !fir.ref> +// CHECK: %[[VAL_37:.*]] = arith.addi %[[VAL_33]], %[[VAL_21]] overflow : i64 +// CHECK: %[[VAL_38:.*]] = hlfir.designate %[[VAL_18]] (%[[VAL_37]]) : (!fir.box>>, i64) -> !fir.ref> +// CHECK: hlfir.assign %[[VAL_36]] to %[[VAL_38]] : !fir.logical<2>, !fir.ref> +// CHECK: } +// CHECK: %[[VAL_39:.*]] = arith.subi %[[VAL_14]], %[[VAL_30]] overflow : i64 +// CHECK: %[[VAL_40:.*]] = arith.select %[[VAL_19]], %[[VAL_1]], %[[VAL_30]] : i64 +// CHECK: %[[VAL_41:.*]] = fir.convert %[[VAL_39]] : (i64) -> index +// CHECK: fir.do_loop %[[VAL_42:.*]] = %[[VAL_0]] to %[[VAL_41]] step %[[VAL_0]] unordered { +// CHECK: %[[VAL_43:.*]] = fir.convert %[[VAL_42]] : (index) -> i64 +// CHECK: %[[VAL_44:.*]] = arith.addi %[[VAL_43]], %[[VAL_40]] overflow : i64 +// CHECK: %[[VAL_45:.*]] = hlfir.designate %[[VAL_18]] (%[[VAL_44]]) : (!fir.box>>, i64) -> !fir.ref> +// CHECK: hlfir.assign %[[VAL_13]] to %[[VAL_45]] : !fir.logical<2>, !fir.ref> +// CHECK: } +// CHECK: } +// CHECK: hlfir.assign %[[VAL_16]] to %[[VAL_12]]#0 : !hlfir.expr>, !fir.box>> +// CHECK: hlfir.destroy %[[VAL_16]] : !hlfir.expr> +// CHECK: return +// CHECK: } + +// ! Test contiguous 1D array with the scalar always present boundary. +// subroutine eoshift3(n, array, boundary) +// integer :: n +// complex(2) :: array(n), boundary +// array = EOSHIFT(array, 2, boundary) +// end subroutine +func.func @_QPeoshift3(%arg0: !fir.ref {fir.bindc_name = "n"}, %arg1: !fir.ref>> {fir.bindc_name = "array"}, %arg2: !fir.ref> {fir.bindc_name = "boundary"}) { + %c2_i32 = arith.constant 2 : i32 + %c0 = arith.constant 0 : index + %0 = fir.dummy_scope : !fir.dscope + %1:2 = hlfir.declare %arg0 dummy_scope %0 {uniq_name = "_QFeoshift3En"} : (!fir.ref, !fir.dscope) -> (!fir.ref, !fir.ref) + %2:2 = hlfir.declare %arg2 dummy_scope %0 {uniq_name = "_QFeoshift3Eboundary"} : (!fir.ref>, !fir.dscope) -> (!fir.ref>, !fir.ref>) + %3 = fir.load %1#0 : !fir.ref + %4 = fir.convert %3 : (i32) -> index + %5 = arith.cmpi sgt, %4, %c0 : index + %6 = arith.select %5, %4, %c0 : index + %7 = fir.shape %6 : (index) -> !fir.shape<1> + %8:2 = hlfir.declare %arg1(%7) dummy_scope %0 {uniq_name = "_QFeoshift3Earray"} : (!fir.ref>>, !fir.shape<1>, !fir.dscope) -> (!fir.box>>, !fir.ref>>) + %9 = hlfir.eoshift %8#0 %c2_i32 boundary %2#0 : (!fir.box>>, i32, !fir.ref>) -> !hlfir.expr> + hlfir.assign %9 to %8#0 : !hlfir.expr>, !fir.box>> + hlfir.destroy %9 : !hlfir.expr> + return +} +// CHECK-LABEL: func.func @_QPeoshift3( +// CHECK-SAME: %[[ARG0:.*]]: !fir.ref {fir.bindc_name = "n"}, +// CHECK-SAME: %[[ARG1:.*]]: !fir.ref>> {fir.bindc_name = "array"}, +// CHECK-SAME: %[[ARG2:.*]]: !fir.ref> {fir.bindc_name = "boundary"}) { +// CHECK: %[[VAL_0:.*]] = arith.constant 1 : index +// CHECK: %[[VAL_1:.*]] = arith.constant 0 : i64 +// CHECK: %[[VAL_2:.*]] = arith.constant 2 : i32 +// CHECK: %[[VAL_3:.*]] = arith.constant 0 : index +// CHECK: %[[VAL_4:.*]] = fir.dummy_scope : !fir.dscope +// CHECK: %[[VAL_5:.*]]:2 = hlfir.declare %[[ARG0]] dummy_scope %[[VAL_4]] {uniq_name = "_QFeoshift3En"} : (!fir.ref, !fir.dscope) -> (!fir.ref, !fir.ref) +// CHECK: %[[VAL_6:.*]]:2 = hlfir.declare %[[ARG2]] dummy_scope %[[VAL_4]] {uniq_name = "_QFeoshift3Eboundary"} : (!fir.ref>, !fir.dscope) -> (!fir.ref>, !fir.ref>) +// CHECK: %[[VAL_7:.*]] = fir.load %[[VAL_5]]#0 : !fir.ref +// CHECK: %[[VAL_8:.*]] = fir.convert %[[VAL_7]] : (i32) -> index +// CHECK: %[[VAL_9:.*]] = arith.cmpi sgt, %[[VAL_8]], %[[VAL_3]] : index +// CHECK: %[[VAL_10:.*]] = arith.select %[[VAL_9]], %[[VAL_8]], %[[VAL_3]] : index +// CHECK: %[[VAL_11:.*]] = fir.shape %[[VAL_10]] : (index) -> !fir.shape<1> +// CHECK: %[[VAL_12:.*]]:2 = hlfir.declare %[[ARG1]](%[[VAL_11]]) dummy_scope %[[VAL_4]] {uniq_name = "_QFeoshift3Earray"} : (!fir.ref>>, !fir.shape<1>, !fir.dscope) -> (!fir.box>>, !fir.ref>>) +// CHECK: %[[VAL_13:.*]] = fir.convert %[[VAL_10]] : (index) -> i64 +// CHECK: %[[VAL_14:.*]] = fir.convert %[[VAL_2]] : (i32) -> i64 +// CHECK: %[[VAL_15:.*]] = fir.load %[[VAL_6]]#0 : !fir.ref> +// CHECK: %[[VAL_16:.*]] = hlfir.eval_in_mem shape %[[VAL_11]] : (!fir.shape<1>) -> !hlfir.expr> { +// CHECK: ^bb0(%[[VAL_17:.*]]: !fir.ref>>): +// CHECK: %[[VAL_18:.*]] = fir.embox %[[VAL_17]](%[[VAL_11]]) : (!fir.ref>>, !fir.shape<1>) -> !fir.box>> +// CHECK: %[[VAL_19:.*]] = arith.cmpi slt, %[[VAL_14]], %[[VAL_1]] : i64 +// CHECK: %[[VAL_20:.*]] = arith.subi %[[VAL_1]], %[[VAL_14]] overflow : i64 +// CHECK: %[[VAL_21:.*]] = arith.select %[[VAL_19]], %[[VAL_20]], %[[VAL_1]] : i64 +// CHECK: %[[VAL_22:.*]] = arith.select %[[VAL_19]], %[[VAL_1]], %[[VAL_14]] : i64 +// CHECK: %[[VAL_23:.*]] = arith.subi %[[VAL_1]], %[[VAL_13]] overflow : i64 +// CHECK: %[[VAL_24:.*]] = arith.addi %[[VAL_13]], %[[VAL_14]] overflow : i64 +// CHECK: %[[VAL_25:.*]] = arith.cmpi sgt, %[[VAL_23]], %[[VAL_14]] : i64 +// CHECK: %[[VAL_26:.*]] = arith.select %[[VAL_25]], %[[VAL_1]], %[[VAL_24]] : i64 +// CHECK: %[[VAL_27:.*]] = arith.subi %[[VAL_13]], %[[VAL_14]] overflow : i64 +// CHECK: %[[VAL_28:.*]] = arith.cmpi slt, %[[VAL_13]], %[[VAL_14]] : i64 +// CHECK: %[[VAL_29:.*]] = arith.select %[[VAL_28]], %[[VAL_1]], %[[VAL_27]] : i64 +// CHECK: %[[VAL_30:.*]] = arith.select %[[VAL_19]], %[[VAL_26]], %[[VAL_29]] : i64 +// CHECK: %[[VAL_31:.*]] = fir.convert %[[VAL_30]] : (i64) -> index +// CHECK: fir.do_loop %[[VAL_32:.*]] = %[[VAL_0]] to %[[VAL_31]] step %[[VAL_0]] unordered { +// CHECK: %[[VAL_33:.*]] = fir.convert %[[VAL_32]] : (index) -> i64 +// CHECK: %[[VAL_34:.*]] = arith.addi %[[VAL_33]], %[[VAL_22]] overflow : i64 +// CHECK: %[[VAL_35:.*]] = hlfir.designate %[[VAL_12]]#0 (%[[VAL_34]]) : (!fir.box>>, i64) -> !fir.ref> +// CHECK: %[[VAL_36:.*]] = fir.load %[[VAL_35]] : !fir.ref> +// CHECK: %[[VAL_37:.*]] = arith.addi %[[VAL_33]], %[[VAL_21]] overflow : i64 +// CHECK: %[[VAL_38:.*]] = hlfir.designate %[[VAL_18]] (%[[VAL_37]]) : (!fir.box>>, i64) -> !fir.ref> +// CHECK: hlfir.assign %[[VAL_36]] to %[[VAL_38]] : complex, !fir.ref> +// CHECK: } +// CHECK: %[[VAL_39:.*]] = arith.subi %[[VAL_13]], %[[VAL_30]] overflow : i64 +// CHECK: %[[VAL_40:.*]] = arith.select %[[VAL_19]], %[[VAL_1]], %[[VAL_30]] : i64 +// CHECK: %[[VAL_41:.*]] = fir.convert %[[VAL_39]] : (i64) -> index +// CHECK: fir.do_loop %[[VAL_42:.*]] = %[[VAL_0]] to %[[VAL_41]] step %[[VAL_0]] unordered { +// CHECK: %[[VAL_43:.*]] = fir.convert %[[VAL_42]] : (index) -> i64 +// CHECK: %[[VAL_44:.*]] = arith.addi %[[VAL_43]], %[[VAL_40]] overflow : i64 +// CHECK: %[[VAL_45:.*]] = hlfir.designate %[[VAL_18]] (%[[VAL_44]]) : (!fir.box>>, i64) -> !fir.ref> +// CHECK: hlfir.assign %[[VAL_15]] to %[[VAL_45]] : complex, !fir.ref> +// CHECK: } +// CHECK: } +// CHECK: hlfir.assign %[[VAL_16]] to %[[VAL_12]]#0 : !hlfir.expr>, !fir.box>> +// CHECK: hlfir.destroy %[[VAL_16]] : !hlfir.expr> +// CHECK: return +// CHECK: } + +// ! Test contiguous 1D array with the scalar optional boundary. +// subroutine eoshift4(n, array, boundary) +// integer :: n +// logical :: array(n) +// logical, optional :: boundary +// array = EOSHIFT(array, 2, boundary) +// end subroutine +func.func @_QPeoshift4(%arg0: !fir.ref {fir.bindc_name = "n"}, %arg1: !fir.ref>> {fir.bindc_name = "array"}, %arg2: !fir.ref> {fir.bindc_name = "boundary", fir.optional}) { + %c2_i32 = arith.constant 2 : i32 + %c0 = arith.constant 0 : index + %0 = fir.dummy_scope : !fir.dscope + %1:2 = hlfir.declare %arg0 dummy_scope %0 {uniq_name = "_QFeoshift4En"} : (!fir.ref, !fir.dscope) -> (!fir.ref, !fir.ref) + %2:2 = hlfir.declare %arg2 dummy_scope %0 {fortran_attrs = #fir.var_attrs, uniq_name = "_QFeoshift4Eboundary"} : (!fir.ref>, !fir.dscope) -> (!fir.ref>, !fir.ref>) + %3 = fir.load %1#0 : !fir.ref + %4 = fir.convert %3 : (i32) -> index + %5 = arith.cmpi sgt, %4, %c0 : index + %6 = arith.select %5, %4, %c0 : index + %7 = fir.shape %6 : (index) -> !fir.shape<1> + %8:2 = hlfir.declare %arg1(%7) dummy_scope %0 {uniq_name = "_QFeoshift4Earray"} : (!fir.ref>>, !fir.shape<1>, !fir.dscope) -> (!fir.box>>, !fir.ref>>) + %9 = fir.is_present %2#0 : (!fir.ref>) -> i1 + %10 = fir.embox %2#0 : (!fir.ref>) -> !fir.box> + %11 = fir.absent !fir.box> + %12 = arith.select %9, %10, %11 : !fir.box> + %13 = hlfir.eoshift %8#0 %c2_i32 boundary %12 : (!fir.box>>, i32, !fir.box>) -> !hlfir.expr> + hlfir.assign %13 to %8#0 : !hlfir.expr>, !fir.box>> + hlfir.destroy %13 : !hlfir.expr> + return +} +// CHECK-LABEL: func.func @_QPeoshift4( +// CHECK-SAME: %[[ARG0:.*]]: !fir.ref {fir.bindc_name = "n"}, +// CHECK-SAME: %[[ARG1:.*]]: !fir.ref>> {fir.bindc_name = "array"}, +// CHECK-SAME: %[[ARG2:.*]]: !fir.ref> {fir.bindc_name = "boundary", fir.optional}) { +// CHECK: %[[VAL_0:.*]] = arith.constant 1 : index +// CHECK: %[[VAL_1:.*]] = arith.constant 0 : i64 +// CHECK: %[[VAL_2:.*]] = arith.constant false +// CHECK: %[[VAL_3:.*]] = arith.constant 2 : i32 +// CHECK: %[[VAL_4:.*]] = arith.constant 0 : index +// CHECK: %[[VAL_5:.*]] = fir.dummy_scope : !fir.dscope +// CHECK: %[[VAL_6:.*]]:2 = hlfir.declare %[[ARG0]] dummy_scope %[[VAL_5]] {uniq_name = "_QFeoshift4En"} : (!fir.ref, !fir.dscope) -> (!fir.ref, !fir.ref) +// CHECK: %[[VAL_7:.*]]:2 = hlfir.declare %[[ARG2]] dummy_scope %[[VAL_5]] {fortran_attrs = #fir.var_attrs, uniq_name = "_QFeoshift4Eboundary"} : (!fir.ref>, !fir.dscope) -> (!fir.ref>, !fir.ref>) +// CHECK: %[[VAL_8:.*]] = fir.load %[[VAL_6]]#0 : !fir.ref +// CHECK: %[[VAL_9:.*]] = fir.convert %[[VAL_8]] : (i32) -> index +// CHECK: %[[VAL_10:.*]] = arith.cmpi sgt, %[[VAL_9]], %[[VAL_4]] : index +// CHECK: %[[VAL_11:.*]] = arith.select %[[VAL_10]], %[[VAL_9]], %[[VAL_4]] : index +// CHECK: %[[VAL_12:.*]] = fir.shape %[[VAL_11]] : (index) -> !fir.shape<1> +// CHECK: %[[VAL_13:.*]]:2 = hlfir.declare %[[ARG1]](%[[VAL_12]]) dummy_scope %[[VAL_5]] {uniq_name = "_QFeoshift4Earray"} : (!fir.ref>>, !fir.shape<1>, !fir.dscope) -> (!fir.box>>, !fir.ref>>) +// CHECK: %[[VAL_14:.*]] = fir.is_present %[[VAL_7]]#0 : (!fir.ref>) -> i1 +// CHECK: %[[VAL_15:.*]] = fir.embox %[[VAL_7]]#0 : (!fir.ref>) -> !fir.box> +// CHECK: %[[VAL_16:.*]] = fir.absent !fir.box> +// CHECK: %[[VAL_17:.*]] = arith.select %[[VAL_14]], %[[VAL_15]], %[[VAL_16]] : !fir.box> +// CHECK: %[[VAL_18:.*]] = fir.convert %[[VAL_11]] : (index) -> i64 +// CHECK: %[[VAL_19:.*]] = fir.convert %[[VAL_3]] : (i32) -> i64 +// CHECK: %[[VAL_20:.*]] = fir.is_present %[[VAL_17]] : (!fir.box>) -> i1 +// CHECK: %[[VAL_21:.*]] = fir.if %[[VAL_20]] -> (!fir.logical<4>) { +// CHECK: %[[VAL_22:.*]] = fir.box_addr %[[VAL_17]] : (!fir.box>) -> !fir.ref> +// CHECK: %[[VAL_23:.*]] = fir.load %[[VAL_22]] : !fir.ref> +// CHECK: fir.result %[[VAL_23]] : !fir.logical<4> +// CHECK: } else { +// CHECK: %[[VAL_24:.*]] = fir.convert %[[VAL_2]] : (i1) -> !fir.logical<4> +// CHECK: fir.result %[[VAL_24]] : !fir.logical<4> +// CHECK: } +// CHECK: %[[VAL_25:.*]] = hlfir.eval_in_mem shape %[[VAL_12]] : (!fir.shape<1>) -> !hlfir.expr> { +// CHECK: ^bb0(%[[VAL_26:.*]]: !fir.ref>>): +// CHECK: %[[VAL_27:.*]] = fir.embox %[[VAL_26]](%[[VAL_12]]) : (!fir.ref>>, !fir.shape<1>) -> !fir.box>> +// CHECK: %[[VAL_28:.*]] = arith.cmpi slt, %[[VAL_19]], %[[VAL_1]] : i64 +// CHECK: %[[VAL_29:.*]] = arith.subi %[[VAL_1]], %[[VAL_19]] overflow : i64 +// CHECK: %[[VAL_30:.*]] = arith.select %[[VAL_28]], %[[VAL_29]], %[[VAL_1]] : i64 +// CHECK: %[[VAL_31:.*]] = arith.select %[[VAL_28]], %[[VAL_1]], %[[VAL_19]] : i64 +// CHECK: %[[VAL_32:.*]] = arith.subi %[[VAL_1]], %[[VAL_18]] overflow : i64 +// CHECK: %[[VAL_33:.*]] = arith.addi %[[VAL_18]], %[[VAL_19]] overflow : i64 +// CHECK: %[[VAL_34:.*]] = arith.cmpi sgt, %[[VAL_32]], %[[VAL_19]] : i64 +// CHECK: %[[VAL_35:.*]] = arith.select %[[VAL_34]], %[[VAL_1]], %[[VAL_33]] : i64 +// CHECK: %[[VAL_36:.*]] = arith.subi %[[VAL_18]], %[[VAL_19]] overflow : i64 +// CHECK: %[[VAL_37:.*]] = arith.cmpi slt, %[[VAL_18]], %[[VAL_19]] : i64 +// CHECK: %[[VAL_38:.*]] = arith.select %[[VAL_37]], %[[VAL_1]], %[[VAL_36]] : i64 +// CHECK: %[[VAL_39:.*]] = arith.select %[[VAL_28]], %[[VAL_35]], %[[VAL_38]] : i64 +// CHECK: %[[VAL_40:.*]] = fir.convert %[[VAL_39]] : (i64) -> index +// CHECK: fir.do_loop %[[VAL_41:.*]] = %[[VAL_0]] to %[[VAL_40]] step %[[VAL_0]] unordered { +// CHECK: %[[VAL_42:.*]] = fir.convert %[[VAL_41]] : (index) -> i64 +// CHECK: %[[VAL_43:.*]] = arith.addi %[[VAL_42]], %[[VAL_31]] overflow : i64 +// CHECK: %[[VAL_44:.*]] = hlfir.designate %[[VAL_13]]#0 (%[[VAL_43]]) : (!fir.box>>, i64) -> !fir.ref> +// CHECK: %[[VAL_45:.*]] = fir.load %[[VAL_44]] : !fir.ref> +// CHECK: %[[VAL_46:.*]] = arith.addi %[[VAL_42]], %[[VAL_30]] overflow : i64 +// CHECK: %[[VAL_47:.*]] = hlfir.designate %[[VAL_27]] (%[[VAL_46]]) : (!fir.box>>, i64) -> !fir.ref> +// CHECK: hlfir.assign %[[VAL_45]] to %[[VAL_47]] : !fir.logical<4>, !fir.ref> +// CHECK: } +// CHECK: %[[VAL_48:.*]] = arith.subi %[[VAL_18]], %[[VAL_39]] overflow : i64 +// CHECK: %[[VAL_49:.*]] = arith.select %[[VAL_28]], %[[VAL_1]], %[[VAL_39]] : i64 +// CHECK: %[[VAL_50:.*]] = fir.convert %[[VAL_48]] : (i64) -> index +// CHECK: fir.do_loop %[[VAL_51:.*]] = %[[VAL_0]] to %[[VAL_50]] step %[[VAL_0]] unordered { +// CHECK: %[[VAL_52:.*]] = fir.convert %[[VAL_51]] : (index) -> i64 +// CHECK: %[[VAL_53:.*]] = arith.addi %[[VAL_52]], %[[VAL_49]] overflow : i64 +// CHECK: %[[VAL_54:.*]] = hlfir.designate %[[VAL_27]] (%[[VAL_53]]) : (!fir.box>>, i64) -> !fir.ref> +// CHECK: hlfir.assign %[[VAL_21]] to %[[VAL_54]] : !fir.logical<4>, !fir.ref> +// CHECK: } +// CHECK: } +// CHECK: hlfir.assign %[[VAL_25]] to %[[VAL_13]]#0 : !hlfir.expr>, !fir.box>> +// CHECK: hlfir.destroy %[[VAL_25]] : !hlfir.expr> +// CHECK: return +// CHECK: } + +// ! Test contiguous 1D array with the array always present boundary. +// subroutine eoshift5(n, array, boundary) +// integer :: n +// real :: array(n,n) +// real :: boundary(:) +// array = EOSHIFT(array, 2, boundary) +// end subroutine +func.func @_QPeoshift5(%arg0: !fir.ref {fir.bindc_name = "n"}, %arg1: !fir.ref> {fir.bindc_name = "array"}, %arg2: !fir.box> {fir.bindc_name = "boundary"}) { + %c2_i32 = arith.constant 2 : i32 + %c0 = arith.constant 0 : index + %0 = fir.dummy_scope : !fir.dscope + %1:2 = hlfir.declare %arg0 dummy_scope %0 {uniq_name = "_QFeoshift5En"} : (!fir.ref, !fir.dscope) -> (!fir.ref, !fir.ref) + %2:2 = hlfir.declare %arg2 dummy_scope %0 {uniq_name = "_QFeoshift5Eboundary"} : (!fir.box>, !fir.dscope) -> (!fir.box>, !fir.box>) + %3 = fir.load %1#0 : !fir.ref + %4 = fir.convert %3 : (i32) -> index + %5 = arith.cmpi sgt, %4, %c0 : index + %6 = arith.select %5, %4, %c0 : index + %7 = fir.load %1#0 : !fir.ref + %8 = fir.convert %7 : (i32) -> index + %9 = arith.cmpi sgt, %8, %c0 : index + %10 = arith.select %9, %8, %c0 : index + %11 = fir.shape %6, %10 : (index, index) -> !fir.shape<2> + %12:2 = hlfir.declare %arg1(%11) dummy_scope %0 {uniq_name = "_QFeoshift5Earray"} : (!fir.ref>, !fir.shape<2>, !fir.dscope) -> (!fir.box>, !fir.ref>) + %13 = hlfir.eoshift %12#0 %c2_i32 boundary %2#0 : (!fir.box>, i32, !fir.box>) -> !hlfir.expr + hlfir.assign %13 to %12#0 : !hlfir.expr, !fir.box> + hlfir.destroy %13 : !hlfir.expr + return +} +// CHECK-LABEL: func.func @_QPeoshift5( +// CHECK-SAME: %[[ARG0:.*]]: !fir.ref {fir.bindc_name = "n"}, +// CHECK-SAME: %[[ARG1:.*]]: !fir.ref> {fir.bindc_name = "array"}, +// CHECK-SAME: %[[ARG2:.*]]: !fir.box> {fir.bindc_name = "boundary"}) { +// CHECK: %[[VAL_0:.*]] = arith.constant 0 : i64 +// CHECK: %[[VAL_1:.*]] = arith.constant 1 : index +// CHECK: %[[VAL_2:.*]] = arith.constant 2 : i32 +// CHECK: %[[VAL_3:.*]] = arith.constant 0 : index +// CHECK: %[[VAL_4:.*]] = fir.dummy_scope : !fir.dscope +// CHECK: %[[VAL_5:.*]]:2 = hlfir.declare %[[ARG0]] dummy_scope %[[VAL_4]] {uniq_name = "_QFeoshift5En"} : (!fir.ref, !fir.dscope) -> (!fir.ref, !fir.ref) +// CHECK: %[[VAL_6:.*]]:2 = hlfir.declare %[[ARG2]] dummy_scope %[[VAL_4]] {uniq_name = "_QFeoshift5Eboundary"} : (!fir.box>, !fir.dscope) -> (!fir.box>, !fir.box>) +// CHECK: %[[VAL_7:.*]] = fir.load %[[VAL_5]]#0 : !fir.ref +// CHECK: %[[VAL_8:.*]] = fir.convert %[[VAL_7]] : (i32) -> index +// CHECK: %[[VAL_9:.*]] = arith.cmpi sgt, %[[VAL_8]], %[[VAL_3]] : index +// CHECK: %[[VAL_10:.*]] = arith.select %[[VAL_9]], %[[VAL_8]], %[[VAL_3]] : index +// CHECK: %[[VAL_11:.*]] = fir.load %[[VAL_5]]#0 : !fir.ref +// CHECK: %[[VAL_12:.*]] = fir.convert %[[VAL_11]] : (i32) -> index +// CHECK: %[[VAL_13:.*]] = arith.cmpi sgt, %[[VAL_12]], %[[VAL_3]] : index +// CHECK: %[[VAL_14:.*]] = arith.select %[[VAL_13]], %[[VAL_12]], %[[VAL_3]] : index +// CHECK: %[[VAL_15:.*]] = fir.shape %[[VAL_10]], %[[VAL_14]] : (index, index) -> !fir.shape<2> +// CHECK: %[[VAL_16:.*]]:2 = hlfir.declare %[[ARG1]](%[[VAL_15]]) dummy_scope %[[VAL_4]] {uniq_name = "_QFeoshift5Earray"} : (!fir.ref>, !fir.shape<2>, !fir.dscope) -> (!fir.box>, !fir.ref>) +// CHECK: %[[VAL_17:.*]] = fir.convert %[[VAL_10]] : (index) -> i64 +// CHECK: %[[VAL_18:.*]] = fir.convert %[[VAL_2]] : (i32) -> i64 +// CHECK: %[[VAL_19:.*]] = hlfir.eval_in_mem shape %[[VAL_15]] : (!fir.shape<2>) -> !hlfir.expr { +// CHECK: ^bb0(%[[VAL_20:.*]]: !fir.ref>): +// CHECK: %[[VAL_21:.*]] = fir.embox %[[VAL_20]](%[[VAL_15]]) : (!fir.ref>, !fir.shape<2>) -> !fir.box> +// CHECK: fir.do_loop %[[VAL_22:.*]] = %[[VAL_1]] to %[[VAL_14]] step %[[VAL_1]] unordered { +// CHECK: %[[VAL_23:.*]] = hlfir.designate %[[VAL_6]]#0 (%[[VAL_22]]) : (!fir.box>, index) -> !fir.ref +// CHECK: %[[VAL_24:.*]] = fir.load %[[VAL_23]] : !fir.ref +// CHECK: %[[VAL_25:.*]] = arith.cmpi slt, %[[VAL_18]], %[[VAL_0]] : i64 +// CHECK: %[[VAL_26:.*]] = arith.subi %[[VAL_0]], %[[VAL_18]] overflow : i64 +// CHECK: %[[VAL_27:.*]] = arith.select %[[VAL_25]], %[[VAL_26]], %[[VAL_0]] : i64 +// CHECK: %[[VAL_28:.*]] = arith.select %[[VAL_25]], %[[VAL_0]], %[[VAL_18]] : i64 +// CHECK: %[[VAL_29:.*]] = arith.subi %[[VAL_0]], %[[VAL_17]] overflow : i64 +// CHECK: %[[VAL_30:.*]] = arith.addi %[[VAL_17]], %[[VAL_18]] overflow : i64 +// CHECK: %[[VAL_31:.*]] = arith.cmpi sgt, %[[VAL_29]], %[[VAL_18]] : i64 +// CHECK: %[[VAL_32:.*]] = arith.select %[[VAL_31]], %[[VAL_0]], %[[VAL_30]] : i64 +// CHECK: %[[VAL_33:.*]] = arith.subi %[[VAL_17]], %[[VAL_18]] overflow : i64 +// CHECK: %[[VAL_34:.*]] = arith.cmpi slt, %[[VAL_17]], %[[VAL_18]] : i64 +// CHECK: %[[VAL_35:.*]] = arith.select %[[VAL_34]], %[[VAL_0]], %[[VAL_33]] : i64 +// CHECK: %[[VAL_36:.*]] = arith.select %[[VAL_25]], %[[VAL_32]], %[[VAL_35]] : i64 +// CHECK: %[[VAL_37:.*]] = fir.convert %[[VAL_36]] : (i64) -> index +// CHECK: fir.do_loop %[[VAL_38:.*]] = %[[VAL_1]] to %[[VAL_37]] step %[[VAL_1]] unordered { +// CHECK: %[[VAL_39:.*]] = fir.convert %[[VAL_38]] : (index) -> i64 +// CHECK: %[[VAL_40:.*]] = arith.addi %[[VAL_39]], %[[VAL_28]] overflow : i64 +// CHECK: %[[VAL_41:.*]] = hlfir.designate %[[VAL_16]]#0 (%[[VAL_40]], %[[VAL_22]]) : (!fir.box>, i64, index) -> !fir.ref +// CHECK: %[[VAL_42:.*]] = fir.load %[[VAL_41]] : !fir.ref +// CHECK: %[[VAL_43:.*]] = arith.addi %[[VAL_39]], %[[VAL_27]] overflow : i64 +// CHECK: %[[VAL_44:.*]] = hlfir.designate %[[VAL_21]] (%[[VAL_43]], %[[VAL_22]]) : (!fir.box>, i64, index) -> !fir.ref +// CHECK: hlfir.assign %[[VAL_42]] to %[[VAL_44]] : f32, !fir.ref +// CHECK: } +// CHECK: %[[VAL_45:.*]] = arith.subi %[[VAL_17]], %[[VAL_36]] overflow : i64 +// CHECK: %[[VAL_46:.*]] = arith.select %[[VAL_25]], %[[VAL_0]], %[[VAL_36]] : i64 +// CHECK: %[[VAL_47:.*]] = fir.convert %[[VAL_45]] : (i64) -> index +// CHECK: fir.do_loop %[[VAL_48:.*]] = %[[VAL_1]] to %[[VAL_47]] step %[[VAL_1]] unordered { +// CHECK: %[[VAL_49:.*]] = fir.convert %[[VAL_48]] : (index) -> i64 +// CHECK: %[[VAL_50:.*]] = arith.addi %[[VAL_49]], %[[VAL_46]] overflow : i64 +// CHECK: %[[VAL_51:.*]] = hlfir.designate %[[VAL_21]] (%[[VAL_50]], %[[VAL_22]]) : (!fir.box>, i64, index) -> !fir.ref +// CHECK: hlfir.assign %[[VAL_24]] to %[[VAL_51]] : f32, !fir.ref +// CHECK: } +// CHECK: } +// CHECK: } +// CHECK: hlfir.assign %[[VAL_19]] to %[[VAL_16]]#0 : !hlfir.expr, !fir.box> +// CHECK: hlfir.destroy %[[VAL_19]] : !hlfir.expr +// CHECK: return +// CHECK: } + +// ! Test contiguous 1D array with the array optional boundary. +// subroutine eoshift6(n, array, boundary) +// integer :: n +// real :: array(n,n) +// real, optional :: boundary(n) +// array = EOSHIFT(array, 2, boundary) +// end subroutine +func.func @_QPeoshift6(%arg0: !fir.ref {fir.bindc_name = "n"}, %arg1: !fir.ref> {fir.bindc_name = "array"}, %arg2: !fir.ref> {fir.bindc_name = "boundary", fir.optional}) { + %c2_i32 = arith.constant 2 : i32 + %c0 = arith.constant 0 : index + %0 = fir.dummy_scope : !fir.dscope + %1:2 = hlfir.declare %arg0 dummy_scope %0 {uniq_name = "_QFeoshift6En"} : (!fir.ref, !fir.dscope) -> (!fir.ref, !fir.ref) + %2 = fir.load %1#0 : !fir.ref + %3 = fir.convert %2 : (i32) -> index + %4 = arith.cmpi sgt, %3, %c0 : index + %5 = arith.select %4, %3, %c0 : index + %6 = fir.load %1#0 : !fir.ref + %7 = fir.convert %6 : (i32) -> index + %8 = arith.cmpi sgt, %7, %c0 : index + %9 = arith.select %8, %7, %c0 : index + %10 = fir.shape %5, %9 : (index, index) -> !fir.shape<2> + %11:2 = hlfir.declare %arg1(%10) dummy_scope %0 {uniq_name = "_QFeoshift6Earray"} : (!fir.ref>, !fir.shape<2>, !fir.dscope) -> (!fir.box>, !fir.ref>) + %12 = fir.load %1#0 : !fir.ref + %13 = fir.convert %12 : (i32) -> index + %14 = arith.cmpi sgt, %13, %c0 : index + %15 = arith.select %14, %13, %c0 : index + %16 = fir.shape %15 : (index) -> !fir.shape<1> + %17:2 = hlfir.declare %arg2(%16) dummy_scope %0 {fortran_attrs = #fir.var_attrs, uniq_name = "_QFeoshift6Eboundary"} : (!fir.ref>, !fir.shape<1>, !fir.dscope) -> (!fir.box>, !fir.ref>) + %18 = fir.is_present %17#0 : (!fir.box>) -> i1 + %19 = fir.shape %15 : (index) -> !fir.shape<1> + %20 = fir.embox %17#1(%19) : (!fir.ref>, !fir.shape<1>) -> !fir.box> + %21 = fir.absent !fir.box> + %22 = arith.select %18, %20, %21 : !fir.box> + %23 = hlfir.eoshift %11#0 %c2_i32 boundary %22 : (!fir.box>, i32, !fir.box>) -> !hlfir.expr + hlfir.assign %23 to %11#0 : !hlfir.expr, !fir.box> + hlfir.destroy %23 : !hlfir.expr + return +} +// CHECK-LABEL: func.func @_QPeoshift6( +// CHECK-SAME: %[[ARG0:.*]]: !fir.ref {fir.bindc_name = "n"}, +// CHECK-SAME: %[[ARG1:.*]]: !fir.ref> {fir.bindc_name = "array"}, +// CHECK-SAME: %[[ARG2:.*]]: !fir.ref> {fir.bindc_name = "boundary", fir.optional}) { +// CHECK: %[[VAL_0:.*]] = arith.constant 0 : i64 +// CHECK: %[[VAL_1:.*]] = arith.constant 1 : index +// CHECK: %[[VAL_2:.*]] = arith.constant false +// CHECK: %[[VAL_3:.*]] = arith.constant true +// CHECK: %[[VAL_4:.*]] = arith.constant 0.000000e+00 : f32 +// CHECK: %[[VAL_5:.*]] = arith.constant 2 : i32 +// CHECK: %[[VAL_6:.*]] = arith.constant 0 : index +// CHECK: %[[VAL_7:.*]] = fir.dummy_scope : !fir.dscope +// CHECK: %[[VAL_8:.*]]:2 = hlfir.declare %[[ARG0]] dummy_scope %[[VAL_7]] {uniq_name = "_QFeoshift6En"} : (!fir.ref, !fir.dscope) -> (!fir.ref, !fir.ref) +// CHECK: %[[VAL_9:.*]] = fir.load %[[VAL_8]]#0 : !fir.ref +// CHECK: %[[VAL_10:.*]] = fir.convert %[[VAL_9]] : (i32) -> index +// CHECK: %[[VAL_11:.*]] = arith.cmpi sgt, %[[VAL_10]], %[[VAL_6]] : index +// CHECK: %[[VAL_12:.*]] = arith.select %[[VAL_11]], %[[VAL_10]], %[[VAL_6]] : index +// CHECK: %[[VAL_13:.*]] = fir.load %[[VAL_8]]#0 : !fir.ref +// CHECK: %[[VAL_14:.*]] = fir.convert %[[VAL_13]] : (i32) -> index +// CHECK: %[[VAL_15:.*]] = arith.cmpi sgt, %[[VAL_14]], %[[VAL_6]] : index +// CHECK: %[[VAL_16:.*]] = arith.select %[[VAL_15]], %[[VAL_14]], %[[VAL_6]] : index +// CHECK: %[[VAL_17:.*]] = fir.shape %[[VAL_12]], %[[VAL_16]] : (index, index) -> !fir.shape<2> +// CHECK: %[[VAL_18:.*]]:2 = hlfir.declare %[[ARG1]](%[[VAL_17]]) dummy_scope %[[VAL_7]] {uniq_name = "_QFeoshift6Earray"} : (!fir.ref>, !fir.shape<2>, !fir.dscope) -> (!fir.box>, !fir.ref>) +// CHECK: %[[VAL_19:.*]] = fir.load %[[VAL_8]]#0 : !fir.ref +// CHECK: %[[VAL_20:.*]] = fir.convert %[[VAL_19]] : (i32) -> index +// CHECK: %[[VAL_21:.*]] = arith.cmpi sgt, %[[VAL_20]], %[[VAL_6]] : index +// CHECK: %[[VAL_22:.*]] = arith.select %[[VAL_21]], %[[VAL_20]], %[[VAL_6]] : index +// CHECK: %[[VAL_23:.*]] = fir.shape %[[VAL_22]] : (index) -> !fir.shape<1> +// CHECK: %[[VAL_24:.*]]:2 = hlfir.declare %[[ARG2]](%[[VAL_23]]) dummy_scope %[[VAL_7]] {fortran_attrs = #fir.var_attrs, uniq_name = "_QFeoshift6Eboundary"} : (!fir.ref>, !fir.shape<1>, !fir.dscope) -> (!fir.box>, !fir.ref>) +// CHECK: %[[VAL_25:.*]] = fir.is_present %[[VAL_24]]#0 : (!fir.box>) -> i1 +// CHECK: %[[VAL_26:.*]] = fir.shape %[[VAL_22]] : (index) -> !fir.shape<1> +// CHECK: %[[VAL_27:.*]] = fir.embox %[[VAL_24]]#1(%[[VAL_26]]) : (!fir.ref>, !fir.shape<1>) -> !fir.box> +// CHECK: %[[VAL_28:.*]] = fir.absent !fir.box> +// CHECK: %[[VAL_29:.*]] = arith.select %[[VAL_25]], %[[VAL_27]], %[[VAL_28]] : !fir.box> +// CHECK: %[[VAL_30:.*]] = fir.convert %[[VAL_12]] : (index) -> i64 +// CHECK: %[[VAL_31:.*]] = fir.convert %[[VAL_5]] : (i32) -> i64 +// CHECK: %[[VAL_32:.*]] = fir.is_present %[[VAL_29]] : (!fir.box>) -> i1 +// CHECK: %[[VAL_33:.*]] = arith.select %[[VAL_32]], %[[VAL_2]], %[[VAL_3]] : i1 +// CHECK: %[[VAL_34:.*]] = hlfir.eval_in_mem shape %[[VAL_17]] : (!fir.shape<2>) -> !hlfir.expr { +// CHECK: ^bb0(%[[VAL_35:.*]]: !fir.ref>): +// CHECK: %[[VAL_36:.*]] = fir.embox %[[VAL_35]](%[[VAL_17]]) : (!fir.ref>, !fir.shape<2>) -> !fir.box> +// CHECK: fir.do_loop %[[VAL_37:.*]] = %[[VAL_1]] to %[[VAL_16]] step %[[VAL_1]] unordered { +// CHECK: %[[VAL_38:.*]] = fir.if %[[VAL_33]] -> (f32) { +// CHECK: fir.result %[[VAL_4]] : f32 +// CHECK: } else { +// CHECK: %[[VAL_39:.*]]:3 = fir.box_dims %[[VAL_29]], %[[VAL_6]] : (!fir.box>, index) -> (index, index, index) +// CHECK: %[[VAL_40:.*]] = arith.subi %[[VAL_39]]#0, %[[VAL_1]] overflow : index +// CHECK: %[[VAL_41:.*]] = arith.addi %[[VAL_37]], %[[VAL_40]] overflow : index +// CHECK: %[[VAL_42:.*]] = hlfir.designate %[[VAL_29]] (%[[VAL_41]]) : (!fir.box>, index) -> !fir.ref +// CHECK: %[[VAL_43:.*]] = fir.load %[[VAL_42]] : !fir.ref +// CHECK: fir.result %[[VAL_43]] : f32 +// CHECK: } +// CHECK: %[[VAL_44:.*]] = arith.cmpi slt, %[[VAL_31]], %[[VAL_0]] : i64 +// CHECK: %[[VAL_45:.*]] = arith.subi %[[VAL_0]], %[[VAL_31]] overflow : i64 +// CHECK: %[[VAL_46:.*]] = arith.select %[[VAL_44]], %[[VAL_45]], %[[VAL_0]] : i64 +// CHECK: %[[VAL_47:.*]] = arith.select %[[VAL_44]], %[[VAL_0]], %[[VAL_31]] : i64 +// CHECK: %[[VAL_48:.*]] = arith.subi %[[VAL_0]], %[[VAL_30]] overflow : i64 +// CHECK: %[[VAL_49:.*]] = arith.addi %[[VAL_30]], %[[VAL_31]] overflow : i64 +// CHECK: %[[VAL_50:.*]] = arith.cmpi sgt, %[[VAL_48]], %[[VAL_31]] : i64 +// CHECK: %[[VAL_51:.*]] = arith.select %[[VAL_50]], %[[VAL_0]], %[[VAL_49]] : i64 +// CHECK: %[[VAL_52:.*]] = arith.subi %[[VAL_30]], %[[VAL_31]] overflow : i64 +// CHECK: %[[VAL_53:.*]] = arith.cmpi slt, %[[VAL_30]], %[[VAL_31]] : i64 +// CHECK: %[[VAL_54:.*]] = arith.select %[[VAL_53]], %[[VAL_0]], %[[VAL_52]] : i64 +// CHECK: %[[VAL_55:.*]] = arith.select %[[VAL_44]], %[[VAL_51]], %[[VAL_54]] : i64 +// CHECK: %[[VAL_56:.*]] = fir.convert %[[VAL_55]] : (i64) -> index +// CHECK: fir.do_loop %[[VAL_57:.*]] = %[[VAL_1]] to %[[VAL_56]] step %[[VAL_1]] unordered { +// CHECK: %[[VAL_58:.*]] = fir.convert %[[VAL_57]] : (index) -> i64 +// CHECK: %[[VAL_59:.*]] = arith.addi %[[VAL_58]], %[[VAL_47]] overflow : i64 +// CHECK: %[[VAL_60:.*]] = hlfir.designate %[[VAL_18]]#0 (%[[VAL_59]], %[[VAL_37]]) : (!fir.box>, i64, index) -> !fir.ref +// CHECK: %[[VAL_61:.*]] = fir.load %[[VAL_60]] : !fir.ref +// CHECK: %[[VAL_62:.*]] = arith.addi %[[VAL_58]], %[[VAL_46]] overflow : i64 +// CHECK: %[[VAL_63:.*]] = hlfir.designate %[[VAL_36]] (%[[VAL_62]], %[[VAL_37]]) : (!fir.box>, i64, index) -> !fir.ref +// CHECK: hlfir.assign %[[VAL_61]] to %[[VAL_63]] : f32, !fir.ref +// CHECK: } +// CHECK: %[[VAL_64:.*]] = arith.subi %[[VAL_30]], %[[VAL_55]] overflow : i64 +// CHECK: %[[VAL_65:.*]] = arith.select %[[VAL_44]], %[[VAL_0]], %[[VAL_55]] : i64 +// CHECK: %[[VAL_66:.*]] = fir.convert %[[VAL_64]] : (i64) -> index +// CHECK: fir.do_loop %[[VAL_67:.*]] = %[[VAL_1]] to %[[VAL_66]] step %[[VAL_1]] unordered { +// CHECK: %[[VAL_68:.*]] = fir.convert %[[VAL_67]] : (index) -> i64 +// CHECK: %[[VAL_69:.*]] = arith.addi %[[VAL_68]], %[[VAL_65]] overflow : i64 +// CHECK: %[[VAL_70:.*]] = hlfir.designate %[[VAL_36]] (%[[VAL_69]], %[[VAL_37]]) : (!fir.box>, i64, index) -> !fir.ref +// CHECK: hlfir.assign %[[VAL_38]] to %[[VAL_70]] : f32, !fir.ref +// CHECK: } +// CHECK: } +// CHECK: } +// CHECK: hlfir.assign %[[VAL_34]] to %[[VAL_18]]#0 : !hlfir.expr, !fir.box> +// CHECK: hlfir.destroy %[[VAL_34]] : !hlfir.expr +// CHECK: return +// CHECK: } + +// ! Test contiguous 1D array with the array expression boundary. +// subroutine eoshift7(n, array) +// interface +// function real_boundary(n) +// integer :: n +// real :: real_boundary(n) +// end function +// end interface +// integer :: n +// real :: array(n,n) +// array = EOSHIFT(array, 2, real_boundary(n)) +// end subroutine +func.func @_QPeoshift7(%arg0: !fir.ref {fir.bindc_name = "n"}, %arg1: !fir.ref> {fir.bindc_name = "array"}) { + %c2_i32 = arith.constant 2 : i32 + %c0 = arith.constant 0 : index + %0 = fir.dummy_scope : !fir.dscope + %1:2 = hlfir.declare %arg0 dummy_scope %0 {uniq_name = "_QFeoshift7En"} : (!fir.ref, !fir.dscope) -> (!fir.ref, !fir.ref) + %2 = fir.load %1#0 : !fir.ref + %3 = fir.convert %2 : (i32) -> index + %4 = arith.cmpi sgt, %3, %c0 : index + %5 = arith.select %4, %3, %c0 : index + %6 = fir.load %1#0 : !fir.ref + %7 = fir.convert %6 : (i32) -> index + %8 = arith.cmpi sgt, %7, %c0 : index + %9 = arith.select %8, %7, %c0 : index + %10 = fir.shape %5, %9 : (index, index) -> !fir.shape<2> + %11:2 = hlfir.declare %arg1(%10) dummy_scope %0 {uniq_name = "_QFeoshift7Earray"} : (!fir.ref>, !fir.shape<2>, !fir.dscope) -> (!fir.box>, !fir.ref>) + %12:2 = hlfir.declare %1#0 {uniq_name = "_QFeoshift7Freal_boundaryEn"} : (!fir.ref) -> (!fir.ref, !fir.ref) + %13 = fir.load %12#0 : !fir.ref + %14 = fir.convert %13 : (i32) -> index + %15 = arith.cmpi sgt, %14, %c0 : index + %16 = arith.select %15, %14, %c0 : index + %17 = fir.shape %16 : (index) -> !fir.shape<1> + %18 = hlfir.eval_in_mem shape %17 : (!fir.shape<1>) -> !hlfir.expr { + ^bb0(%arg2: !fir.ref>): + %20 = fir.call @_QPreal_boundary(%1#0) fastmath : (!fir.ref) -> !fir.array + fir.save_result %20 to %arg2(%17) : !fir.array, !fir.ref>, !fir.shape<1> + } + %19 = hlfir.eoshift %11#0 %c2_i32 boundary %18 : (!fir.box>, i32, !hlfir.expr) -> !hlfir.expr + hlfir.assign %19 to %11#0 : !hlfir.expr, !fir.box> + hlfir.destroy %19 : !hlfir.expr + hlfir.destroy %18 : !hlfir.expr + return +} +// CHECK-LABEL: func.func @_QPeoshift7( +// CHECK-SAME: %[[ARG0:.*]]: !fir.ref {fir.bindc_name = "n"}, +// CHECK-SAME: %[[ARG1:.*]]: !fir.ref> {fir.bindc_name = "array"}) { +// CHECK: %[[VAL_0:.*]] = arith.constant 0 : i64 +// CHECK: %[[VAL_1:.*]] = arith.constant 1 : index +// CHECK: %[[VAL_2:.*]] = arith.constant 2 : i32 +// CHECK: %[[VAL_3:.*]] = arith.constant 0 : index +// CHECK: %[[VAL_4:.*]] = fir.dummy_scope : !fir.dscope +// CHECK: %[[VAL_5:.*]]:2 = hlfir.declare %[[ARG0]] dummy_scope %[[VAL_4]] {uniq_name = "_QFeoshift7En"} : (!fir.ref, !fir.dscope) -> (!fir.ref, !fir.ref) +// CHECK: %[[VAL_6:.*]] = fir.load %[[VAL_5]]#0 : !fir.ref +// CHECK: %[[VAL_7:.*]] = fir.convert %[[VAL_6]] : (i32) -> index +// CHECK: %[[VAL_8:.*]] = arith.cmpi sgt, %[[VAL_7]], %[[VAL_3]] : index +// CHECK: %[[VAL_9:.*]] = arith.select %[[VAL_8]], %[[VAL_7]], %[[VAL_3]] : index +// CHECK: %[[VAL_10:.*]] = fir.load %[[VAL_5]]#0 : !fir.ref +// CHECK: %[[VAL_11:.*]] = fir.convert %[[VAL_10]] : (i32) -> index +// CHECK: %[[VAL_12:.*]] = arith.cmpi sgt, %[[VAL_11]], %[[VAL_3]] : index +// CHECK: %[[VAL_13:.*]] = arith.select %[[VAL_12]], %[[VAL_11]], %[[VAL_3]] : index +// CHECK: %[[VAL_14:.*]] = fir.shape %[[VAL_9]], %[[VAL_13]] : (index, index) -> !fir.shape<2> +// CHECK: %[[VAL_15:.*]]:2 = hlfir.declare %[[ARG1]](%[[VAL_14]]) dummy_scope %[[VAL_4]] {uniq_name = "_QFeoshift7Earray"} : (!fir.ref>, !fir.shape<2>, !fir.dscope) -> (!fir.box>, !fir.ref>) +// CHECK: %[[VAL_16:.*]]:2 = hlfir.declare %[[VAL_5]]#0 {uniq_name = "_QFeoshift7Freal_boundaryEn"} : (!fir.ref) -> (!fir.ref, !fir.ref) +// CHECK: %[[VAL_17:.*]] = fir.load %[[VAL_16]]#0 : !fir.ref +// CHECK: %[[VAL_18:.*]] = fir.convert %[[VAL_17]] : (i32) -> index +// CHECK: %[[VAL_19:.*]] = arith.cmpi sgt, %[[VAL_18]], %[[VAL_3]] : index +// CHECK: %[[VAL_20:.*]] = arith.select %[[VAL_19]], %[[VAL_18]], %[[VAL_3]] : index +// CHECK: %[[VAL_21:.*]] = fir.shape %[[VAL_20]] : (index) -> !fir.shape<1> +// CHECK: %[[VAL_22:.*]] = hlfir.eval_in_mem shape %[[VAL_21]] : (!fir.shape<1>) -> !hlfir.expr { +// CHECK: ^bb0(%[[VAL_23:.*]]: !fir.ref>): +// CHECK: %[[VAL_24:.*]] = fir.call @_QPreal_boundary(%[[VAL_5]]#0) fastmath : (!fir.ref) -> !fir.array +// CHECK: fir.save_result %[[VAL_24]] to %[[VAL_23]](%[[VAL_21]]) : !fir.array, !fir.ref>, !fir.shape<1> +// CHECK: } +// CHECK: %[[VAL_25:.*]] = fir.convert %[[VAL_9]] : (index) -> i64 +// CHECK: %[[VAL_26:.*]] = fir.convert %[[VAL_2]] : (i32) -> i64 +// CHECK: %[[VAL_27:.*]] = hlfir.eval_in_mem shape %[[VAL_14]] : (!fir.shape<2>) -> !hlfir.expr { +// CHECK: ^bb0(%[[VAL_28:.*]]: !fir.ref>): +// CHECK: %[[VAL_29:.*]] = fir.embox %[[VAL_28]](%[[VAL_14]]) : (!fir.ref>, !fir.shape<2>) -> !fir.box> +// CHECK: fir.do_loop %[[VAL_30:.*]] = %[[VAL_1]] to %[[VAL_13]] step %[[VAL_1]] unordered { +// CHECK: %[[VAL_31:.*]] = hlfir.apply %[[VAL_22]], %[[VAL_30]] : (!hlfir.expr, index) -> f32 +// CHECK: %[[VAL_32:.*]] = arith.cmpi slt, %[[VAL_26]], %[[VAL_0]] : i64 +// CHECK: %[[VAL_33:.*]] = arith.subi %[[VAL_0]], %[[VAL_26]] overflow : i64 +// CHECK: %[[VAL_34:.*]] = arith.select %[[VAL_32]], %[[VAL_33]], %[[VAL_0]] : i64 +// CHECK: %[[VAL_35:.*]] = arith.select %[[VAL_32]], %[[VAL_0]], %[[VAL_26]] : i64 +// CHECK: %[[VAL_36:.*]] = arith.subi %[[VAL_0]], %[[VAL_25]] overflow : i64 +// CHECK: %[[VAL_37:.*]] = arith.addi %[[VAL_25]], %[[VAL_26]] overflow : i64 +// CHECK: %[[VAL_38:.*]] = arith.cmpi sgt, %[[VAL_36]], %[[VAL_26]] : i64 +// CHECK: %[[VAL_39:.*]] = arith.select %[[VAL_38]], %[[VAL_0]], %[[VAL_37]] : i64 +// CHECK: %[[VAL_40:.*]] = arith.subi %[[VAL_25]], %[[VAL_26]] overflow : i64 +// CHECK: %[[VAL_41:.*]] = arith.cmpi slt, %[[VAL_25]], %[[VAL_26]] : i64 +// CHECK: %[[VAL_42:.*]] = arith.select %[[VAL_41]], %[[VAL_0]], %[[VAL_40]] : i64 +// CHECK: %[[VAL_43:.*]] = arith.select %[[VAL_32]], %[[VAL_39]], %[[VAL_42]] : i64 +// CHECK: %[[VAL_44:.*]] = fir.convert %[[VAL_43]] : (i64) -> index +// CHECK: fir.do_loop %[[VAL_45:.*]] = %[[VAL_1]] to %[[VAL_44]] step %[[VAL_1]] unordered { +// CHECK: %[[VAL_46:.*]] = fir.convert %[[VAL_45]] : (index) -> i64 +// CHECK: %[[VAL_47:.*]] = arith.addi %[[VAL_46]], %[[VAL_35]] overflow : i64 +// CHECK: %[[VAL_48:.*]] = hlfir.designate %[[VAL_15]]#0 (%[[VAL_47]], %[[VAL_30]]) : (!fir.box>, i64, index) -> !fir.ref +// CHECK: %[[VAL_49:.*]] = fir.load %[[VAL_48]] : !fir.ref +// CHECK: %[[VAL_50:.*]] = arith.addi %[[VAL_46]], %[[VAL_34]] overflow : i64 +// CHECK: %[[VAL_51:.*]] = hlfir.designate %[[VAL_29]] (%[[VAL_50]], %[[VAL_30]]) : (!fir.box>, i64, index) -> !fir.ref +// CHECK: hlfir.assign %[[VAL_49]] to %[[VAL_51]] : f32, !fir.ref +// CHECK: } +// CHECK: %[[VAL_52:.*]] = arith.subi %[[VAL_25]], %[[VAL_43]] overflow : i64 +// CHECK: %[[VAL_53:.*]] = arith.select %[[VAL_32]], %[[VAL_0]], %[[VAL_43]] : i64 +// CHECK: %[[VAL_54:.*]] = fir.convert %[[VAL_52]] : (i64) -> index +// CHECK: fir.do_loop %[[VAL_55:.*]] = %[[VAL_1]] to %[[VAL_54]] step %[[VAL_1]] unordered { +// CHECK: %[[VAL_56:.*]] = fir.convert %[[VAL_55]] : (index) -> i64 +// CHECK: %[[VAL_57:.*]] = arith.addi %[[VAL_56]], %[[VAL_53]] overflow : i64 +// CHECK: %[[VAL_58:.*]] = hlfir.designate %[[VAL_29]] (%[[VAL_57]], %[[VAL_30]]) : (!fir.box>, i64, index) -> !fir.ref +// CHECK: hlfir.assign %[[VAL_31]] to %[[VAL_58]] : f32, !fir.ref +// CHECK: } +// CHECK: } +// CHECK: } +// CHECK: hlfir.assign %[[VAL_27]] to %[[VAL_15]]#0 : !hlfir.expr, !fir.box> +// CHECK: hlfir.destroy %[[VAL_27]] : !hlfir.expr +// CHECK: hlfir.destroy %[[VAL_22]] : !hlfir.expr +// CHECK: return +// CHECK: } + +// ! Tests for CHARACTER type (lowered via hlfir.elemental). + +// ! Test contiguous 1D array with statically absent boundary. +// ! CHARACTER with constant length. +// subroutine eoshift1c(n, array) +// integer :: n +// character(10,1) :: array(n) +// array = EOSHIFT(array, 2) +// end subroutine +func.func @_QPeoshift1c(%arg0: !fir.ref {fir.bindc_name = "n"}, %arg1: !fir.boxchar<1> {fir.bindc_name = "array"}) { + %c2_i32 = arith.constant 2 : i32 + %c0 = arith.constant 0 : index + %c10 = arith.constant 10 : index + %0 = fir.dummy_scope : !fir.dscope + %1:2 = hlfir.declare %arg0 dummy_scope %0 {uniq_name = "_QFeoshift1cEn"} : (!fir.ref, !fir.dscope) -> (!fir.ref, !fir.ref) + %2:2 = fir.unboxchar %arg1 : (!fir.boxchar<1>) -> (!fir.ref>, index) + %3 = fir.convert %2#0 : (!fir.ref>) -> !fir.ref>> + %4 = fir.load %1#0 : !fir.ref + %5 = fir.convert %4 : (i32) -> index + %6 = arith.cmpi sgt, %5, %c0 : index + %7 = arith.select %6, %5, %c0 : index + %8 = fir.shape %7 : (index) -> !fir.shape<1> + %9:2 = hlfir.declare %3(%8) typeparams %c10 dummy_scope %0 {uniq_name = "_QFeoshift1cEarray"} : (!fir.ref>>, !fir.shape<1>, index, !fir.dscope) -> (!fir.box>>, !fir.ref>>) + %10 = hlfir.eoshift %9#0 %c2_i32 : (!fir.box>>, i32) -> !hlfir.expr> + hlfir.assign %10 to %9#0 : !hlfir.expr>, !fir.box>> + hlfir.destroy %10 : !hlfir.expr> + return +} +// CHECK-LABEL: func.func @_QPeoshift1c( +// CHECK-SAME: %[[ARG0:.*]]: !fir.ref {fir.bindc_name = "n"}, +// CHECK-SAME: %[[ARG1:.*]]: !fir.boxchar<1> {fir.bindc_name = "array"}) { +// CHECK: %[[VAL_0:.*]] = arith.constant 1 : i64 +// CHECK: %[[VAL_1:.*]] = arith.constant 2 : i32 +// CHECK: %[[VAL_2:.*]] = arith.constant 0 : index +// CHECK: %[[VAL_3:.*]] = arith.constant 10 : index +// CHECK: %[[VAL_4:.*]] = fir.dummy_scope : !fir.dscope +// CHECK: %[[VAL_5:.*]]:2 = hlfir.declare %[[ARG0]] dummy_scope %[[VAL_4]] {uniq_name = "_QFeoshift1cEn"} : (!fir.ref, !fir.dscope) -> (!fir.ref, !fir.ref) +// CHECK: %[[VAL_6:.*]]:2 = fir.unboxchar %[[ARG1]] : (!fir.boxchar<1>) -> (!fir.ref>, index) +// CHECK: %[[VAL_7:.*]] = fir.convert %[[VAL_6]]#0 : (!fir.ref>) -> !fir.ref>> +// CHECK: %[[VAL_8:.*]] = fir.load %[[VAL_5]]#0 : !fir.ref +// CHECK: %[[VAL_9:.*]] = fir.convert %[[VAL_8]] : (i32) -> index +// CHECK: %[[VAL_10:.*]] = arith.cmpi sgt, %[[VAL_9]], %[[VAL_2]] : index +// CHECK: %[[VAL_11:.*]] = arith.select %[[VAL_10]], %[[VAL_9]], %[[VAL_2]] : index +// CHECK: %[[VAL_12:.*]] = fir.shape %[[VAL_11]] : (index) -> !fir.shape<1> +// CHECK: %[[VAL_13:.*]]:2 = hlfir.declare %[[VAL_7]](%[[VAL_12]]) typeparams %[[VAL_3]] dummy_scope %[[VAL_4]] {uniq_name = "_QFeoshift1cEarray"} : (!fir.ref>>, !fir.shape<1>, index, !fir.dscope) -> (!fir.box>>, !fir.ref>>) +// CHECK: %[[VAL_14:.*]] = fir.convert %[[VAL_11]] : (index) -> i64 +// CHECK: %[[VAL_15:.*]] = fir.convert %[[VAL_1]] : (i32) -> i64 +// CHECK: %[[VAL_16:.*]] = fir.alloca !fir.char<1,0> {bindc_name = ".chrtmp"} +// CHECK: %[[VAL_17:.*]] = fir.emboxchar %[[VAL_16]], %[[VAL_2]] : (!fir.ref>, index) -> !fir.boxchar<1> +// CHECK: %[[VAL_18:.*]] = hlfir.elemental %[[VAL_12]] typeparams %[[VAL_3]] unordered : (!fir.shape<1>, index) -> !hlfir.expr> { +// CHECK: ^bb0(%[[VAL_19:.*]]: index): +// CHECK: %[[VAL_20:.*]] = fir.convert %[[VAL_19]] : (index) -> i64 +// CHECK: %[[VAL_21:.*]] = arith.addi %[[VAL_20]], %[[VAL_15]] overflow : i64 +// CHECK: %[[VAL_22:.*]] = arith.cmpi sge, %[[VAL_21]], %[[VAL_0]] : i64 +// CHECK: %[[VAL_23:.*]] = arith.cmpi sle, %[[VAL_21]], %[[VAL_14]] : i64 +// CHECK: %[[VAL_24:.*]] = arith.andi %[[VAL_22]], %[[VAL_23]] : i1 +// CHECK: %[[VAL_25:.*]] = fir.if %[[VAL_24]] -> (!fir.boxchar<1>) { +// CHECK: %[[VAL_26:.*]] = fir.convert %[[VAL_21]] : (i64) -> index +// CHECK: %[[VAL_27:.*]] = hlfir.designate %[[VAL_13]]#0 (%[[VAL_26]]) typeparams %[[VAL_3]] : (!fir.box>>, index, index) -> !fir.ref> +// CHECK: %[[VAL_28:.*]] = fir.emboxchar %[[VAL_27]], %[[VAL_3]] : (!fir.ref>, index) -> !fir.boxchar<1> +// CHECK: fir.result %[[VAL_28]] : !fir.boxchar<1> +// CHECK: } else { +// CHECK: fir.result %[[VAL_17]] : !fir.boxchar<1> +// CHECK: } +// CHECK: hlfir.yield_element %[[VAL_25]] : !fir.boxchar<1> +// CHECK: } +// CHECK: hlfir.assign %[[VAL_18]] to %[[VAL_13]]#0 : !hlfir.expr>, !fir.box>> +// CHECK: hlfir.destroy %[[VAL_18]] : !hlfir.expr> +// CHECK: return +// CHECK: } + +// ! Test contiguous 1D array with statically absent boundary. +// ! CHARACTER with variable length. +// subroutine eoshift2c(n, array) +// integer :: n +// character(n,1) :: array(n) +// array = EOSHIFT(array, 2) +// end subroutine +func.func @_QPeoshift2c(%arg0: !fir.ref {fir.bindc_name = "n"}, %arg1: !fir.boxchar<1> {fir.bindc_name = "array"}) { + %c2_i32 = arith.constant 2 : i32 + %c0 = arith.constant 0 : index + %c0_i32 = arith.constant 0 : i32 + %0 = fir.dummy_scope : !fir.dscope + %1:2 = hlfir.declare %arg0 dummy_scope %0 {uniq_name = "_QFeoshift2cEn"} : (!fir.ref, !fir.dscope) -> (!fir.ref, !fir.ref) + %2:2 = fir.unboxchar %arg1 : (!fir.boxchar<1>) -> (!fir.ref>, index) + %3 = fir.convert %2#0 : (!fir.ref>) -> !fir.ref>> + %4 = fir.load %1#0 : !fir.ref + %5 = arith.cmpi sgt, %4, %c0_i32 : i32 + %6 = arith.select %5, %4, %c0_i32 : i32 + %7 = fir.load %1#0 : !fir.ref + %8 = fir.convert %7 : (i32) -> index + %9 = arith.cmpi sgt, %8, %c0 : index + %10 = arith.select %9, %8, %c0 : index + %11 = fir.shape %10 : (index) -> !fir.shape<1> + %12:2 = hlfir.declare %3(%11) typeparams %6 dummy_scope %0 {uniq_name = "_QFeoshift2cEarray"} : (!fir.ref>>, !fir.shape<1>, i32, !fir.dscope) -> (!fir.box>>, !fir.ref>>) + %13 = hlfir.eoshift %12#0 %c2_i32 : (!fir.box>>, i32) -> !hlfir.expr> + hlfir.assign %13 to %12#0 : !hlfir.expr>, !fir.box>> + hlfir.destroy %13 : !hlfir.expr> + return +} +// CHECK-LABEL: func.func @_QPeoshift2c( +// CHECK-SAME: %[[ARG0:.*]]: !fir.ref {fir.bindc_name = "n"}, +// CHECK-SAME: %[[ARG1:.*]]: !fir.boxchar<1> {fir.bindc_name = "array"}) { +// CHECK: %[[VAL_0:.*]] = arith.constant 1 : i64 +// CHECK: %[[VAL_1:.*]] = arith.constant 2 : i32 +// CHECK: %[[VAL_2:.*]] = arith.constant 0 : index +// CHECK: %[[VAL_3:.*]] = arith.constant 0 : i32 +// CHECK: %[[VAL_4:.*]] = fir.dummy_scope : !fir.dscope +// CHECK: %[[VAL_5:.*]]:2 = hlfir.declare %[[ARG0]] dummy_scope %[[VAL_4]] {uniq_name = "_QFeoshift2cEn"} : (!fir.ref, !fir.dscope) -> (!fir.ref, !fir.ref) +// CHECK: %[[VAL_6:.*]]:2 = fir.unboxchar %[[ARG1]] : (!fir.boxchar<1>) -> (!fir.ref>, index) +// CHECK: %[[VAL_7:.*]] = fir.convert %[[VAL_6]]#0 : (!fir.ref>) -> !fir.ref>> +// CHECK: %[[VAL_8:.*]] = fir.load %[[VAL_5]]#0 : !fir.ref +// CHECK: %[[VAL_9:.*]] = arith.cmpi sgt, %[[VAL_8]], %[[VAL_3]] : i32 +// CHECK: %[[VAL_10:.*]] = arith.select %[[VAL_9]], %[[VAL_8]], %[[VAL_3]] : i32 +// CHECK: %[[VAL_11:.*]] = fir.load %[[VAL_5]]#0 : !fir.ref +// CHECK: %[[VAL_12:.*]] = fir.convert %[[VAL_11]] : (i32) -> index +// CHECK: %[[VAL_13:.*]] = arith.cmpi sgt, %[[VAL_12]], %[[VAL_2]] : index +// CHECK: %[[VAL_14:.*]] = arith.select %[[VAL_13]], %[[VAL_12]], %[[VAL_2]] : index +// CHECK: %[[VAL_15:.*]] = fir.shape %[[VAL_14]] : (index) -> !fir.shape<1> +// CHECK: %[[VAL_16:.*]]:2 = hlfir.declare %[[VAL_7]](%[[VAL_15]]) typeparams %[[VAL_10]] dummy_scope %[[VAL_4]] {uniq_name = "_QFeoshift2cEarray"} : (!fir.ref>>, !fir.shape<1>, i32, !fir.dscope) -> (!fir.box>>, !fir.ref>>) +// CHECK: %[[VAL_17:.*]] = fir.convert %[[VAL_14]] : (index) -> i64 +// CHECK: %[[VAL_18:.*]] = fir.convert %[[VAL_1]] : (i32) -> i64 +// CHECK: %[[VAL_19:.*]] = fir.alloca !fir.char<1,0> {bindc_name = ".chrtmp"} +// CHECK: %[[VAL_20:.*]] = fir.emboxchar %[[VAL_19]], %[[VAL_2]] : (!fir.ref>, index) -> !fir.boxchar<1> +// CHECK: %[[VAL_21:.*]] = hlfir.elemental %[[VAL_15]] typeparams %[[VAL_10]] unordered : (!fir.shape<1>, i32) -> !hlfir.expr> { +// CHECK: ^bb0(%[[VAL_22:.*]]: index): +// CHECK: %[[VAL_23:.*]] = fir.convert %[[VAL_22]] : (index) -> i64 +// CHECK: %[[VAL_24:.*]] = arith.addi %[[VAL_23]], %[[VAL_18]] overflow : i64 +// CHECK: %[[VAL_25:.*]] = arith.cmpi sge, %[[VAL_24]], %[[VAL_0]] : i64 +// CHECK: %[[VAL_26:.*]] = arith.cmpi sle, %[[VAL_24]], %[[VAL_17]] : i64 +// CHECK: %[[VAL_27:.*]] = arith.andi %[[VAL_25]], %[[VAL_26]] : i1 +// CHECK: %[[VAL_28:.*]] = fir.if %[[VAL_27]] -> (!fir.boxchar<1>) { +// CHECK: %[[VAL_29:.*]] = fir.convert %[[VAL_24]] : (i64) -> index +// CHECK: %[[VAL_30:.*]] = hlfir.designate %[[VAL_16]]#0 (%[[VAL_29]]) typeparams %[[VAL_10]] : (!fir.box>>, index, i32) -> !fir.boxchar<1> +// CHECK: fir.result %[[VAL_30]] : !fir.boxchar<1> +// CHECK: } else { +// CHECK: fir.result %[[VAL_20]] : !fir.boxchar<1> +// CHECK: } +// CHECK: hlfir.yield_element %[[VAL_28]] : !fir.boxchar<1> +// CHECK: } +// CHECK: hlfir.assign %[[VAL_21]] to %[[VAL_16]]#0 : !hlfir.expr>, !fir.box>> +// CHECK: hlfir.destroy %[[VAL_21]] : !hlfir.expr> +// CHECK: return +// CHECK: } + +// ! Test contiguous 1D array with statically absent boundary. +// ! CHARACTER with assumed length. +// subroutine eoshift3c(n, array) +// integer :: n +// character(*,1) :: array(n) +// array = EOSHIFT(array, 2) +// end subroutine +func.func @_QPeoshift3c(%arg0: !fir.ref {fir.bindc_name = "n"}, %arg1: !fir.boxchar<1> {fir.bindc_name = "array"}) { + %c2_i32 = arith.constant 2 : i32 + %c0 = arith.constant 0 : index + %0 = fir.dummy_scope : !fir.dscope + %1:2 = hlfir.declare %arg0 dummy_scope %0 {uniq_name = "_QFeoshift3cEn"} : (!fir.ref, !fir.dscope) -> (!fir.ref, !fir.ref) + %2:2 = fir.unboxchar %arg1 : (!fir.boxchar<1>) -> (!fir.ref>, index) + %3 = fir.convert %2#0 : (!fir.ref>) -> !fir.ref>> + %4 = fir.load %1#0 : !fir.ref + %5 = fir.convert %4 : (i32) -> index + %6 = arith.cmpi sgt, %5, %c0 : index + %7 = arith.select %6, %5, %c0 : index + %8 = fir.shape %7 : (index) -> !fir.shape<1> + %9:2 = hlfir.declare %3(%8) typeparams %2#1 dummy_scope %0 {uniq_name = "_QFeoshift3cEarray"} : (!fir.ref>>, !fir.shape<1>, index, !fir.dscope) -> (!fir.box>>, !fir.ref>>) + %10 = hlfir.eoshift %9#0 %c2_i32 : (!fir.box>>, i32) -> !hlfir.expr> + hlfir.assign %10 to %9#0 : !hlfir.expr>, !fir.box>> + hlfir.destroy %10 : !hlfir.expr> + return +} +// CHECK-LABEL: func.func @_QPeoshift3c( +// CHECK-SAME: %[[ARG0:.*]]: !fir.ref {fir.bindc_name = "n"}, +// CHECK-SAME: %[[ARG1:.*]]: !fir.boxchar<1> {fir.bindc_name = "array"}) { +// CHECK: %[[VAL_0:.*]] = arith.constant 1 : i64 +// CHECK: %[[VAL_1:.*]] = arith.constant 2 : i32 +// CHECK: %[[VAL_2:.*]] = arith.constant 0 : index +// CHECK: %[[VAL_3:.*]] = fir.dummy_scope : !fir.dscope +// CHECK: %[[VAL_4:.*]]:2 = hlfir.declare %[[ARG0]] dummy_scope %[[VAL_3]] {uniq_name = "_QFeoshift3cEn"} : (!fir.ref, !fir.dscope) -> (!fir.ref, !fir.ref) +// CHECK: %[[VAL_5:.*]]:2 = fir.unboxchar %[[ARG1]] : (!fir.boxchar<1>) -> (!fir.ref>, index) +// CHECK: %[[VAL_6:.*]] = fir.convert %[[VAL_5]]#0 : (!fir.ref>) -> !fir.ref>> +// CHECK: %[[VAL_7:.*]] = fir.load %[[VAL_4]]#0 : !fir.ref +// CHECK: %[[VAL_8:.*]] = fir.convert %[[VAL_7]] : (i32) -> index +// CHECK: %[[VAL_9:.*]] = arith.cmpi sgt, %[[VAL_8]], %[[VAL_2]] : index +// CHECK: %[[VAL_10:.*]] = arith.select %[[VAL_9]], %[[VAL_8]], %[[VAL_2]] : index +// CHECK: %[[VAL_11:.*]] = fir.shape %[[VAL_10]] : (index) -> !fir.shape<1> +// CHECK: %[[VAL_12:.*]]:2 = hlfir.declare %[[VAL_6]](%[[VAL_11]]) typeparams %[[VAL_5]]#1 dummy_scope %[[VAL_3]] {uniq_name = "_QFeoshift3cEarray"} : (!fir.ref>>, !fir.shape<1>, index, !fir.dscope) -> (!fir.box>>, !fir.ref>>) +// CHECK: %[[VAL_13:.*]] = fir.convert %[[VAL_10]] : (index) -> i64 +// CHECK: %[[VAL_14:.*]] = fir.convert %[[VAL_1]] : (i32) -> i64 +// CHECK: %[[VAL_15:.*]] = fir.alloca !fir.char<1,0> {bindc_name = ".chrtmp"} +// CHECK: %[[VAL_16:.*]] = fir.emboxchar %[[VAL_15]], %[[VAL_2]] : (!fir.ref>, index) -> !fir.boxchar<1> +// CHECK: %[[VAL_17:.*]] = hlfir.elemental %[[VAL_11]] typeparams %[[VAL_5]]#1 unordered : (!fir.shape<1>, index) -> !hlfir.expr> { +// CHECK: ^bb0(%[[VAL_18:.*]]: index): +// CHECK: %[[VAL_19:.*]] = fir.convert %[[VAL_18]] : (index) -> i64 +// CHECK: %[[VAL_20:.*]] = arith.addi %[[VAL_19]], %[[VAL_14]] overflow : i64 +// CHECK: %[[VAL_21:.*]] = arith.cmpi sge, %[[VAL_20]], %[[VAL_0]] : i64 +// CHECK: %[[VAL_22:.*]] = arith.cmpi sle, %[[VAL_20]], %[[VAL_13]] : i64 +// CHECK: %[[VAL_23:.*]] = arith.andi %[[VAL_21]], %[[VAL_22]] : i1 +// CHECK: %[[VAL_24:.*]] = fir.if %[[VAL_23]] -> (!fir.boxchar<1>) { +// CHECK: %[[VAL_25:.*]] = fir.convert %[[VAL_20]] : (i64) -> index +// CHECK: %[[VAL_26:.*]] = hlfir.designate %[[VAL_12]]#0 (%[[VAL_25]]) typeparams %[[VAL_5]]#1 : (!fir.box>>, index, index) -> !fir.boxchar<1> +// CHECK: fir.result %[[VAL_26]] : !fir.boxchar<1> +// CHECK: } else { +// CHECK: fir.result %[[VAL_16]] : !fir.boxchar<1> +// CHECK: } +// CHECK: hlfir.yield_element %[[VAL_24]] : !fir.boxchar<1> +// CHECK: } +// CHECK: hlfir.assign %[[VAL_17]] to %[[VAL_12]]#0 : !hlfir.expr>, !fir.box>> +// CHECK: hlfir.destroy %[[VAL_17]] : !hlfir.expr> +// CHECK: return +// CHECK: } + +// ! Test contiguous 1D array with scalar constant boundary. +// subroutine eoshift4c(n, array) +// integer :: n +// character(10,1) :: array(n) +// array = EOSHIFT(array, 2, '0123456789') +// end subroutine +func.func @_QPeoshift4c(%arg0: !fir.ref {fir.bindc_name = "n"}, %arg1: !fir.boxchar<1> {fir.bindc_name = "array"}) { + %c2_i32 = arith.constant 2 : i32 + %c0 = arith.constant 0 : index + %c10 = arith.constant 10 : index + %0 = fir.dummy_scope : !fir.dscope + %1:2 = hlfir.declare %arg0 dummy_scope %0 {uniq_name = "_QFeoshift4cEn"} : (!fir.ref, !fir.dscope) -> (!fir.ref, !fir.ref) + %2:2 = fir.unboxchar %arg1 : (!fir.boxchar<1>) -> (!fir.ref>, index) + %3 = fir.convert %2#0 : (!fir.ref>) -> !fir.ref>> + %4 = fir.load %1#0 : !fir.ref + %5 = fir.convert %4 : (i32) -> index + %6 = arith.cmpi sgt, %5, %c0 : index + %7 = arith.select %6, %5, %c0 : index + %8 = fir.shape %7 : (index) -> !fir.shape<1> + %9:2 = hlfir.declare %3(%8) typeparams %c10 dummy_scope %0 {uniq_name = "_QFeoshift4cEarray"} : (!fir.ref>>, !fir.shape<1>, index, !fir.dscope) -> (!fir.box>>, !fir.ref>>) + %10 = fir.address_of(@_QQclX30313233343536373839) : !fir.ref> + %11:2 = hlfir.declare %10 typeparams %c10 {fortran_attrs = #fir.var_attrs, uniq_name = "_QQclX30313233343536373839"} : (!fir.ref>, index) -> (!fir.ref>, !fir.ref>) + %12 = hlfir.eoshift %9#0 %c2_i32 boundary %11#0 : (!fir.box>>, i32, !fir.ref>) -> !hlfir.expr> + hlfir.assign %12 to %9#0 : !hlfir.expr>, !fir.box>> + hlfir.destroy %12 : !hlfir.expr> + return +} +// CHECK-LABEL: func.func @_QPeoshift4c( +// CHECK-SAME: %[[ARG0:.*]]: !fir.ref {fir.bindc_name = "n"}, +// CHECK-SAME: %[[ARG1:.*]]: !fir.boxchar<1> {fir.bindc_name = "array"}) { +// CHECK: %[[VAL_0:.*]] = arith.constant 1 : i64 +// CHECK: %[[VAL_1:.*]] = arith.constant 2 : i32 +// CHECK: %[[VAL_2:.*]] = arith.constant 0 : index +// CHECK: %[[VAL_3:.*]] = arith.constant 10 : index +// CHECK: %[[VAL_4:.*]] = fir.dummy_scope : !fir.dscope +// CHECK: %[[VAL_5:.*]]:2 = hlfir.declare %[[ARG0]] dummy_scope %[[VAL_4]] {uniq_name = "_QFeoshift4cEn"} : (!fir.ref, !fir.dscope) -> (!fir.ref, !fir.ref) +// CHECK: %[[VAL_6:.*]]:2 = fir.unboxchar %[[ARG1]] : (!fir.boxchar<1>) -> (!fir.ref>, index) +// CHECK: %[[VAL_7:.*]] = fir.convert %[[VAL_6]]#0 : (!fir.ref>) -> !fir.ref>> +// CHECK: %[[VAL_8:.*]] = fir.load %[[VAL_5]]#0 : !fir.ref +// CHECK: %[[VAL_9:.*]] = fir.convert %[[VAL_8]] : (i32) -> index +// CHECK: %[[VAL_10:.*]] = arith.cmpi sgt, %[[VAL_9]], %[[VAL_2]] : index +// CHECK: %[[VAL_11:.*]] = arith.select %[[VAL_10]], %[[VAL_9]], %[[VAL_2]] : index +// CHECK: %[[VAL_12:.*]] = fir.shape %[[VAL_11]] : (index) -> !fir.shape<1> +// CHECK: %[[VAL_13:.*]]:2 = hlfir.declare %[[VAL_7]](%[[VAL_12]]) typeparams %[[VAL_3]] dummy_scope %[[VAL_4]] {uniq_name = "_QFeoshift4cEarray"} : (!fir.ref>>, !fir.shape<1>, index, !fir.dscope) -> (!fir.box>>, !fir.ref>>) +// CHECK: %[[VAL_14:.*]] = fir.address_of(@_QQclX30313233343536373839) : !fir.ref> +// CHECK: %[[VAL_15:.*]]:2 = hlfir.declare %[[VAL_14]] typeparams %[[VAL_3]] {fortran_attrs = #fir.var_attrs, uniq_name = "_QQclX30313233343536373839"} : (!fir.ref>, index) -> (!fir.ref>, !fir.ref>) +// CHECK: %[[VAL_16:.*]] = fir.convert %[[VAL_11]] : (index) -> i64 +// CHECK: %[[VAL_17:.*]] = fir.convert %[[VAL_1]] : (i32) -> i64 +// CHECK: %[[VAL_18:.*]] = fir.emboxchar %[[VAL_15]]#0, %[[VAL_3]] : (!fir.ref>, index) -> !fir.boxchar<1> +// CHECK: %[[VAL_19:.*]] = hlfir.elemental %[[VAL_12]] typeparams %[[VAL_3]] unordered : (!fir.shape<1>, index) -> !hlfir.expr> { +// CHECK: ^bb0(%[[VAL_20:.*]]: index): +// CHECK: %[[VAL_21:.*]] = fir.convert %[[VAL_20]] : (index) -> i64 +// CHECK: %[[VAL_22:.*]] = arith.addi %[[VAL_21]], %[[VAL_17]] overflow : i64 +// CHECK: %[[VAL_23:.*]] = arith.cmpi sge, %[[VAL_22]], %[[VAL_0]] : i64 +// CHECK: %[[VAL_24:.*]] = arith.cmpi sle, %[[VAL_22]], %[[VAL_16]] : i64 +// CHECK: %[[VAL_25:.*]] = arith.andi %[[VAL_23]], %[[VAL_24]] : i1 +// CHECK: %[[VAL_26:.*]] = fir.if %[[VAL_25]] -> (!fir.boxchar<1>) { +// CHECK: %[[VAL_27:.*]] = fir.convert %[[VAL_22]] : (i64) -> index +// CHECK: %[[VAL_28:.*]] = hlfir.designate %[[VAL_13]]#0 (%[[VAL_27]]) typeparams %[[VAL_3]] : (!fir.box>>, index, index) -> !fir.ref> +// CHECK: %[[VAL_29:.*]] = fir.emboxchar %[[VAL_28]], %[[VAL_3]] : (!fir.ref>, index) -> !fir.boxchar<1> +// CHECK: fir.result %[[VAL_29]] : !fir.boxchar<1> +// CHECK: } else { +// CHECK: fir.result %[[VAL_18]] : !fir.boxchar<1> +// CHECK: } +// CHECK: hlfir.yield_element %[[VAL_26]] : !fir.boxchar<1> +// CHECK: } +// CHECK: hlfir.assign %[[VAL_19]] to %[[VAL_13]]#0 : !hlfir.expr>, !fir.box>> +// CHECK: hlfir.destroy %[[VAL_19]] : !hlfir.expr> +// CHECK: return +// CHECK: } + +// ! Test contiguous 1D array with scalar always present boundary. +// ! CHARACTER with constant length. +// subroutine eoshift5c(n, array, boundary) +// integer :: n +// character(10,1) :: array(n), boundary +// array = EOSHIFT(array, 2, boundary) +// end subroutine +func.func @_QPeoshift5c(%arg0: !fir.ref {fir.bindc_name = "n"}, %arg1: !fir.boxchar<1> {fir.bindc_name = "array"}, %arg2: !fir.boxchar<1> {fir.bindc_name = "boundary"}) { + %c2_i32 = arith.constant 2 : i32 + %c0 = arith.constant 0 : index + %c10 = arith.constant 10 : index + %0 = fir.dummy_scope : !fir.dscope + %1:2 = hlfir.declare %arg0 dummy_scope %0 {uniq_name = "_QFeoshift5cEn"} : (!fir.ref, !fir.dscope) -> (!fir.ref, !fir.ref) + %2:2 = fir.unboxchar %arg2 : (!fir.boxchar<1>) -> (!fir.ref>, index) + %3 = fir.convert %2#0 : (!fir.ref>) -> !fir.ref> + %4:2 = hlfir.declare %3 typeparams %c10 dummy_scope %0 {uniq_name = "_QFeoshift5cEboundary"} : (!fir.ref>, index, !fir.dscope) -> (!fir.ref>, !fir.ref>) + %5:2 = fir.unboxchar %arg1 : (!fir.boxchar<1>) -> (!fir.ref>, index) + %6 = fir.convert %5#0 : (!fir.ref>) -> !fir.ref>> + %7 = fir.load %1#0 : !fir.ref + %8 = fir.convert %7 : (i32) -> index + %9 = arith.cmpi sgt, %8, %c0 : index + %10 = arith.select %9, %8, %c0 : index + %11 = fir.shape %10 : (index) -> !fir.shape<1> + %12:2 = hlfir.declare %6(%11) typeparams %c10 dummy_scope %0 {uniq_name = "_QFeoshift5cEarray"} : (!fir.ref>>, !fir.shape<1>, index, !fir.dscope) -> (!fir.box>>, !fir.ref>>) + %13 = hlfir.eoshift %12#0 %c2_i32 boundary %4#0 : (!fir.box>>, i32, !fir.ref>) -> !hlfir.expr> + hlfir.assign %13 to %12#0 : !hlfir.expr>, !fir.box>> + hlfir.destroy %13 : !hlfir.expr> + return +} +// CHECK-LABEL: func.func @_QPeoshift5c( +// CHECK-SAME: %[[ARG0:.*]]: !fir.ref {fir.bindc_name = "n"}, +// CHECK-SAME: %[[ARG1:.*]]: !fir.boxchar<1> {fir.bindc_name = "array"}, +// CHECK-SAME: %[[ARG2:.*]]: !fir.boxchar<1> {fir.bindc_name = "boundary"}) { +// CHECK: %[[VAL_0:.*]] = arith.constant 1 : i64 +// CHECK: %[[VAL_1:.*]] = arith.constant 2 : i32 +// CHECK: %[[VAL_2:.*]] = arith.constant 0 : index +// CHECK: %[[VAL_3:.*]] = arith.constant 10 : index +// CHECK: %[[VAL_4:.*]] = fir.dummy_scope : !fir.dscope +// CHECK: %[[VAL_5:.*]]:2 = hlfir.declare %[[ARG0]] dummy_scope %[[VAL_4]] {uniq_name = "_QFeoshift5cEn"} : (!fir.ref, !fir.dscope) -> (!fir.ref, !fir.ref) +// CHECK: %[[VAL_6:.*]]:2 = fir.unboxchar %[[ARG2]] : (!fir.boxchar<1>) -> (!fir.ref>, index) +// CHECK: %[[VAL_7:.*]] = fir.convert %[[VAL_6]]#0 : (!fir.ref>) -> !fir.ref> +// CHECK: %[[VAL_8:.*]]:2 = hlfir.declare %[[VAL_7]] typeparams %[[VAL_3]] dummy_scope %[[VAL_4]] {uniq_name = "_QFeoshift5cEboundary"} : (!fir.ref>, index, !fir.dscope) -> (!fir.ref>, !fir.ref>) +// CHECK: %[[VAL_9:.*]]:2 = fir.unboxchar %[[ARG1]] : (!fir.boxchar<1>) -> (!fir.ref>, index) +// CHECK: %[[VAL_10:.*]] = fir.convert %[[VAL_9]]#0 : (!fir.ref>) -> !fir.ref>> +// CHECK: %[[VAL_11:.*]] = fir.load %[[VAL_5]]#0 : !fir.ref +// CHECK: %[[VAL_12:.*]] = fir.convert %[[VAL_11]] : (i32) -> index +// CHECK: %[[VAL_13:.*]] = arith.cmpi sgt, %[[VAL_12]], %[[VAL_2]] : index +// CHECK: %[[VAL_14:.*]] = arith.select %[[VAL_13]], %[[VAL_12]], %[[VAL_2]] : index +// CHECK: %[[VAL_15:.*]] = fir.shape %[[VAL_14]] : (index) -> !fir.shape<1> +// CHECK: %[[VAL_16:.*]]:2 = hlfir.declare %[[VAL_10]](%[[VAL_15]]) typeparams %[[VAL_3]] dummy_scope %[[VAL_4]] {uniq_name = "_QFeoshift5cEarray"} : (!fir.ref>>, !fir.shape<1>, index, !fir.dscope) -> (!fir.box>>, !fir.ref>>) +// CHECK: %[[VAL_17:.*]] = fir.convert %[[VAL_14]] : (index) -> i64 +// CHECK: %[[VAL_18:.*]] = fir.convert %[[VAL_1]] : (i32) -> i64 +// CHECK: %[[VAL_19:.*]] = fir.emboxchar %[[VAL_8]]#0, %[[VAL_3]] : (!fir.ref>, index) -> !fir.boxchar<1> +// CHECK: %[[VAL_20:.*]] = hlfir.elemental %[[VAL_15]] typeparams %[[VAL_3]] unordered : (!fir.shape<1>, index) -> !hlfir.expr> { +// CHECK: ^bb0(%[[VAL_21:.*]]: index): +// CHECK: %[[VAL_22:.*]] = fir.convert %[[VAL_21]] : (index) -> i64 +// CHECK: %[[VAL_23:.*]] = arith.addi %[[VAL_22]], %[[VAL_18]] overflow : i64 +// CHECK: %[[VAL_24:.*]] = arith.cmpi sge, %[[VAL_23]], %[[VAL_0]] : i64 +// CHECK: %[[VAL_25:.*]] = arith.cmpi sle, %[[VAL_23]], %[[VAL_17]] : i64 +// CHECK: %[[VAL_26:.*]] = arith.andi %[[VAL_24]], %[[VAL_25]] : i1 +// CHECK: %[[VAL_27:.*]] = fir.if %[[VAL_26]] -> (!fir.boxchar<1>) { +// CHECK: %[[VAL_28:.*]] = fir.convert %[[VAL_23]] : (i64) -> index +// CHECK: %[[VAL_29:.*]] = hlfir.designate %[[VAL_16]]#0 (%[[VAL_28]]) typeparams %[[VAL_3]] : (!fir.box>>, index, index) -> !fir.ref> +// CHECK: %[[VAL_30:.*]] = fir.emboxchar %[[VAL_29]], %[[VAL_3]] : (!fir.ref>, index) -> !fir.boxchar<1> +// CHECK: fir.result %[[VAL_30]] : !fir.boxchar<1> +// CHECK: } else { +// CHECK: fir.result %[[VAL_19]] : !fir.boxchar<1> +// CHECK: } +// CHECK: hlfir.yield_element %[[VAL_27]] : !fir.boxchar<1> +// CHECK: } +// CHECK: hlfir.assign %[[VAL_20]] to %[[VAL_16]]#0 : !hlfir.expr>, !fir.box>> +// CHECK: hlfir.destroy %[[VAL_20]] : !hlfir.expr> +// CHECK: return +// CHECK: } + +// ! Test contiguous 1D array with scalar always present boundary. +// ! CHARACTER with variable length. +// subroutine eoshift6c(n, array, boundary) +// integer :: n +// character(n,1) :: array(n), boundary +// array = EOSHIFT(array, 2, boundary) +// end subroutine +func.func @_QPeoshift6c(%arg0: !fir.ref {fir.bindc_name = "n"}, %arg1: !fir.boxchar<1> {fir.bindc_name = "array"}, %arg2: !fir.boxchar<1> {fir.bindc_name = "boundary"}) { + %c2_i32 = arith.constant 2 : i32 + %c0 = arith.constant 0 : index + %c0_i32 = arith.constant 0 : i32 + %0 = fir.dummy_scope : !fir.dscope + %1:2 = hlfir.declare %arg0 dummy_scope %0 {uniq_name = "_QFeoshift6cEn"} : (!fir.ref, !fir.dscope) -> (!fir.ref, !fir.ref) + %2:2 = fir.unboxchar %arg1 : (!fir.boxchar<1>) -> (!fir.ref>, index) + %3 = fir.convert %2#0 : (!fir.ref>) -> !fir.ref>> + %4 = fir.load %1#0 : !fir.ref + %5 = arith.cmpi sgt, %4, %c0_i32 : i32 + %6 = arith.select %5, %4, %c0_i32 : i32 + %7 = fir.load %1#0 : !fir.ref + %8 = fir.convert %7 : (i32) -> index + %9 = arith.cmpi sgt, %8, %c0 : index + %10 = arith.select %9, %8, %c0 : index + %11 = fir.shape %10 : (index) -> !fir.shape<1> + %12:2 = hlfir.declare %3(%11) typeparams %6 dummy_scope %0 {uniq_name = "_QFeoshift6cEarray"} : (!fir.ref>>, !fir.shape<1>, i32, !fir.dscope) -> (!fir.box>>, !fir.ref>>) + %13:2 = fir.unboxchar %arg2 : (!fir.boxchar<1>) -> (!fir.ref>, index) + %14 = fir.load %1#0 : !fir.ref + %15 = arith.cmpi sgt, %14, %c0_i32 : i32 + %16 = arith.select %15, %14, %c0_i32 : i32 + %17:2 = hlfir.declare %13#0 typeparams %16 dummy_scope %0 {uniq_name = "_QFeoshift6cEboundary"} : (!fir.ref>, i32, !fir.dscope) -> (!fir.boxchar<1>, !fir.ref>) + %18 = hlfir.eoshift %12#0 %c2_i32 boundary %17#0 : (!fir.box>>, i32, !fir.boxchar<1>) -> !hlfir.expr> + hlfir.assign %18 to %12#0 : !hlfir.expr>, !fir.box>> + hlfir.destroy %18 : !hlfir.expr> + return +} +// CHECK-LABEL: func.func @_QPeoshift6c( +// CHECK-SAME: %[[ARG0:.*]]: !fir.ref {fir.bindc_name = "n"}, +// CHECK-SAME: %[[ARG1:.*]]: !fir.boxchar<1> {fir.bindc_name = "array"}, +// CHECK-SAME: %[[ARG2:.*]]: !fir.boxchar<1> {fir.bindc_name = "boundary"}) { +// CHECK: %[[VAL_0:.*]] = arith.constant 1 : i64 +// CHECK: %[[VAL_1:.*]] = arith.constant 2 : i32 +// CHECK: %[[VAL_2:.*]] = arith.constant 0 : index +// CHECK: %[[VAL_3:.*]] = arith.constant 0 : i32 +// CHECK: %[[VAL_4:.*]] = fir.dummy_scope : !fir.dscope +// CHECK: %[[VAL_5:.*]]:2 = hlfir.declare %[[ARG0]] dummy_scope %[[VAL_4]] {uniq_name = "_QFeoshift6cEn"} : (!fir.ref, !fir.dscope) -> (!fir.ref, !fir.ref) +// CHECK: %[[VAL_6:.*]]:2 = fir.unboxchar %[[ARG1]] : (!fir.boxchar<1>) -> (!fir.ref>, index) +// CHECK: %[[VAL_7:.*]] = fir.convert %[[VAL_6]]#0 : (!fir.ref>) -> !fir.ref>> +// CHECK: %[[VAL_8:.*]] = fir.load %[[VAL_5]]#0 : !fir.ref +// CHECK: %[[VAL_9:.*]] = arith.cmpi sgt, %[[VAL_8]], %[[VAL_3]] : i32 +// CHECK: %[[VAL_10:.*]] = arith.select %[[VAL_9]], %[[VAL_8]], %[[VAL_3]] : i32 +// CHECK: %[[VAL_11:.*]] = fir.load %[[VAL_5]]#0 : !fir.ref +// CHECK: %[[VAL_12:.*]] = fir.convert %[[VAL_11]] : (i32) -> index +// CHECK: %[[VAL_13:.*]] = arith.cmpi sgt, %[[VAL_12]], %[[VAL_2]] : index +// CHECK: %[[VAL_14:.*]] = arith.select %[[VAL_13]], %[[VAL_12]], %[[VAL_2]] : index +// CHECK: %[[VAL_15:.*]] = fir.shape %[[VAL_14]] : (index) -> !fir.shape<1> +// CHECK: %[[VAL_16:.*]]:2 = hlfir.declare %[[VAL_7]](%[[VAL_15]]) typeparams %[[VAL_10]] dummy_scope %[[VAL_4]] {uniq_name = "_QFeoshift6cEarray"} : (!fir.ref>>, !fir.shape<1>, i32, !fir.dscope) -> (!fir.box>>, !fir.ref>>) +// CHECK: %[[VAL_17:.*]]:2 = fir.unboxchar %[[ARG2]] : (!fir.boxchar<1>) -> (!fir.ref>, index) +// CHECK: %[[VAL_18:.*]] = fir.load %[[VAL_5]]#0 : !fir.ref +// CHECK: %[[VAL_19:.*]] = arith.cmpi sgt, %[[VAL_18]], %[[VAL_3]] : i32 +// CHECK: %[[VAL_20:.*]] = arith.select %[[VAL_19]], %[[VAL_18]], %[[VAL_3]] : i32 +// CHECK: %[[VAL_21:.*]]:2 = hlfir.declare %[[VAL_17]]#0 typeparams %[[VAL_20]] dummy_scope %[[VAL_4]] {uniq_name = "_QFeoshift6cEboundary"} : (!fir.ref>, i32, !fir.dscope) -> (!fir.boxchar<1>, !fir.ref>) +// CHECK: %[[VAL_22:.*]] = fir.convert %[[VAL_14]] : (index) -> i64 +// CHECK: %[[VAL_23:.*]] = fir.convert %[[VAL_1]] : (i32) -> i64 +// CHECK: %[[VAL_24:.*]] = hlfir.elemental %[[VAL_15]] typeparams %[[VAL_10]] unordered : (!fir.shape<1>, i32) -> !hlfir.expr> { +// CHECK: ^bb0(%[[VAL_25:.*]]: index): +// CHECK: %[[VAL_26:.*]] = fir.convert %[[VAL_25]] : (index) -> i64 +// CHECK: %[[VAL_27:.*]] = arith.addi %[[VAL_26]], %[[VAL_23]] overflow : i64 +// CHECK: %[[VAL_28:.*]] = arith.cmpi sge, %[[VAL_27]], %[[VAL_0]] : i64 +// CHECK: %[[VAL_29:.*]] = arith.cmpi sle, %[[VAL_27]], %[[VAL_22]] : i64 +// CHECK: %[[VAL_30:.*]] = arith.andi %[[VAL_28]], %[[VAL_29]] : i1 +// CHECK: %[[VAL_31:.*]] = fir.if %[[VAL_30]] -> (!fir.boxchar<1>) { +// CHECK: %[[VAL_32:.*]] = fir.convert %[[VAL_27]] : (i64) -> index +// CHECK: %[[VAL_33:.*]] = hlfir.designate %[[VAL_16]]#0 (%[[VAL_32]]) typeparams %[[VAL_10]] : (!fir.box>>, index, i32) -> !fir.boxchar<1> +// CHECK: fir.result %[[VAL_33]] : !fir.boxchar<1> +// CHECK: } else { +// CHECK: fir.result %[[VAL_21]]#0 : !fir.boxchar<1> +// CHECK: } +// CHECK: hlfir.yield_element %[[VAL_31]] : !fir.boxchar<1> +// CHECK: } +// CHECK: hlfir.assign %[[VAL_24]] to %[[VAL_16]]#0 : !hlfir.expr>, !fir.box>> +// CHECK: hlfir.destroy %[[VAL_24]] : !hlfir.expr> +// CHECK: return +// CHECK: } + +// ! Test contiguous 1D array with scalar always present boundary. +// ! CHARACTER with assumed length. +// subroutine eoshift7c(n, array, boundary) +// integer :: n +// character(*,1) :: array(n), boundary +// array = EOSHIFT(array, 2, boundary) +// end subroutine +func.func @_QPeoshift7c(%arg0: !fir.ref {fir.bindc_name = "n"}, %arg1: !fir.boxchar<1> {fir.bindc_name = "array"}, %arg2: !fir.boxchar<1> {fir.bindc_name = "boundary"}) { + %c2_i32 = arith.constant 2 : i32 + %c0 = arith.constant 0 : index + %0 = fir.dummy_scope : !fir.dscope + %1:2 = hlfir.declare %arg0 dummy_scope %0 {uniq_name = "_QFeoshift7cEn"} : (!fir.ref, !fir.dscope) -> (!fir.ref, !fir.ref) + %2:2 = fir.unboxchar %arg2 : (!fir.boxchar<1>) -> (!fir.ref>, index) + %3:2 = hlfir.declare %2#0 typeparams %2#1 dummy_scope %0 {uniq_name = "_QFeoshift7cEboundary"} : (!fir.ref>, index, !fir.dscope) -> (!fir.boxchar<1>, !fir.ref>) + %4:2 = fir.unboxchar %arg1 : (!fir.boxchar<1>) -> (!fir.ref>, index) + %5 = fir.convert %4#0 : (!fir.ref>) -> !fir.ref>> + %6 = fir.load %1#0 : !fir.ref + %7 = fir.convert %6 : (i32) -> index + %8 = arith.cmpi sgt, %7, %c0 : index + %9 = arith.select %8, %7, %c0 : index + %10 = fir.shape %9 : (index) -> !fir.shape<1> + %11:2 = hlfir.declare %5(%10) typeparams %4#1 dummy_scope %0 {uniq_name = "_QFeoshift7cEarray"} : (!fir.ref>>, !fir.shape<1>, index, !fir.dscope) -> (!fir.box>>, !fir.ref>>) + %12 = hlfir.eoshift %11#0 %c2_i32 boundary %3#0 : (!fir.box>>, i32, !fir.boxchar<1>) -> !hlfir.expr> + hlfir.assign %12 to %11#0 : !hlfir.expr>, !fir.box>> + hlfir.destroy %12 : !hlfir.expr> + return +} +// CHECK-LABEL: func.func @_QPeoshift7c( +// CHECK-SAME: %[[ARG0:.*]]: !fir.ref {fir.bindc_name = "n"}, +// CHECK-SAME: %[[ARG1:.*]]: !fir.boxchar<1> {fir.bindc_name = "array"}, +// CHECK-SAME: %[[ARG2:.*]]: !fir.boxchar<1> {fir.bindc_name = "boundary"}) { +// CHECK: %[[VAL_0:.*]] = arith.constant 1 : i64 +// CHECK: %[[VAL_1:.*]] = arith.constant 2 : i32 +// CHECK: %[[VAL_2:.*]] = arith.constant 0 : index +// CHECK: %[[VAL_3:.*]] = fir.dummy_scope : !fir.dscope +// CHECK: %[[VAL_4:.*]]:2 = hlfir.declare %[[ARG0]] dummy_scope %[[VAL_3]] {uniq_name = "_QFeoshift7cEn"} : (!fir.ref, !fir.dscope) -> (!fir.ref, !fir.ref) +// CHECK: %[[VAL_5:.*]]:2 = fir.unboxchar %[[ARG2]] : (!fir.boxchar<1>) -> (!fir.ref>, index) +// CHECK: %[[VAL_6:.*]]:2 = hlfir.declare %[[VAL_5]]#0 typeparams %[[VAL_5]]#1 dummy_scope %[[VAL_3]] {uniq_name = "_QFeoshift7cEboundary"} : (!fir.ref>, index, !fir.dscope) -> (!fir.boxchar<1>, !fir.ref>) +// CHECK: %[[VAL_7:.*]]:2 = fir.unboxchar %[[ARG1]] : (!fir.boxchar<1>) -> (!fir.ref>, index) +// CHECK: %[[VAL_8:.*]] = fir.convert %[[VAL_7]]#0 : (!fir.ref>) -> !fir.ref>> +// CHECK: %[[VAL_9:.*]] = fir.load %[[VAL_4]]#0 : !fir.ref +// CHECK: %[[VAL_10:.*]] = fir.convert %[[VAL_9]] : (i32) -> index +// CHECK: %[[VAL_11:.*]] = arith.cmpi sgt, %[[VAL_10]], %[[VAL_2]] : index +// CHECK: %[[VAL_12:.*]] = arith.select %[[VAL_11]], %[[VAL_10]], %[[VAL_2]] : index +// CHECK: %[[VAL_13:.*]] = fir.shape %[[VAL_12]] : (index) -> !fir.shape<1> +// CHECK: %[[VAL_14:.*]]:2 = hlfir.declare %[[VAL_8]](%[[VAL_13]]) typeparams %[[VAL_7]]#1 dummy_scope %[[VAL_3]] {uniq_name = "_QFeoshift7cEarray"} : (!fir.ref>>, !fir.shape<1>, index, !fir.dscope) -> (!fir.box>>, !fir.ref>>) +// CHECK: %[[VAL_15:.*]] = fir.convert %[[VAL_12]] : (index) -> i64 +// CHECK: %[[VAL_16:.*]] = fir.convert %[[VAL_1]] : (i32) -> i64 +// CHECK: %[[VAL_17:.*]] = hlfir.elemental %[[VAL_13]] typeparams %[[VAL_7]]#1 unordered : (!fir.shape<1>, index) -> !hlfir.expr> { +// CHECK: ^bb0(%[[VAL_18:.*]]: index): +// CHECK: %[[VAL_19:.*]] = fir.convert %[[VAL_18]] : (index) -> i64 +// CHECK: %[[VAL_20:.*]] = arith.addi %[[VAL_19]], %[[VAL_16]] overflow : i64 +// CHECK: %[[VAL_21:.*]] = arith.cmpi sge, %[[VAL_20]], %[[VAL_0]] : i64 +// CHECK: %[[VAL_22:.*]] = arith.cmpi sle, %[[VAL_20]], %[[VAL_15]] : i64 +// CHECK: %[[VAL_23:.*]] = arith.andi %[[VAL_21]], %[[VAL_22]] : i1 +// CHECK: %[[VAL_24:.*]] = fir.if %[[VAL_23]] -> (!fir.boxchar<1>) { +// CHECK: %[[VAL_25:.*]] = fir.convert %[[VAL_20]] : (i64) -> index +// CHECK: %[[VAL_26:.*]] = hlfir.designate %[[VAL_14]]#0 (%[[VAL_25]]) typeparams %[[VAL_7]]#1 : (!fir.box>>, index, index) -> !fir.boxchar<1> +// CHECK: fir.result %[[VAL_26]] : !fir.boxchar<1> +// CHECK: } else { +// CHECK: fir.result %[[VAL_6]]#0 : !fir.boxchar<1> +// CHECK: } +// CHECK: hlfir.yield_element %[[VAL_24]] : !fir.boxchar<1> +// CHECK: } +// CHECK: hlfir.assign %[[VAL_17]] to %[[VAL_14]]#0 : !hlfir.expr>, !fir.box>> +// CHECK: hlfir.destroy %[[VAL_17]] : !hlfir.expr> +// CHECK: return +// CHECK: } + +// ! Test contiguous 1D array with the scalar optional boundary. +// ! CHARACTER with constant length. +// subroutine eoshift8c(n, array, boundary) +// integer :: n +// character(10,2) :: array(n) +// character(10,2), optional :: boundary +// array = EOSHIFT(array, 2, boundary) +// end subroutine +func.func @_QPeoshift8c(%arg0: !fir.ref {fir.bindc_name = "n"}, %arg1: !fir.boxchar<2> {fir.bindc_name = "array"}, %arg2: !fir.boxchar<2> {fir.bindc_name = "boundary", fir.optional}) { + %c2_i32 = arith.constant 2 : i32 + %c0 = arith.constant 0 : index + %c10 = arith.constant 10 : index + %0 = fir.dummy_scope : !fir.dscope + %1:2 = hlfir.declare %arg0 dummy_scope %0 {uniq_name = "_QFeoshift8cEn"} : (!fir.ref, !fir.dscope) -> (!fir.ref, !fir.ref) + %2:2 = fir.unboxchar %arg2 : (!fir.boxchar<2>) -> (!fir.ref>, index) + %3 = fir.convert %2#0 : (!fir.ref>) -> !fir.ref> + %4:2 = hlfir.declare %3 typeparams %c10 dummy_scope %0 {fortran_attrs = #fir.var_attrs, uniq_name = "_QFeoshift8cEboundary"} : (!fir.ref>, index, !fir.dscope) -> (!fir.ref>, !fir.ref>) + %5:2 = fir.unboxchar %arg1 : (!fir.boxchar<2>) -> (!fir.ref>, index) + %6 = fir.convert %5#0 : (!fir.ref>) -> !fir.ref>> + %7 = fir.load %1#0 : !fir.ref + %8 = fir.convert %7 : (i32) -> index + %9 = arith.cmpi sgt, %8, %c0 : index + %10 = arith.select %9, %8, %c0 : index + %11 = fir.shape %10 : (index) -> !fir.shape<1> + %12:2 = hlfir.declare %6(%11) typeparams %c10 dummy_scope %0 {uniq_name = "_QFeoshift8cEarray"} : (!fir.ref>>, !fir.shape<1>, index, !fir.dscope) -> (!fir.box>>, !fir.ref>>) + %13 = fir.is_present %4#0 : (!fir.ref>) -> i1 + %14 = fir.embox %4#0 : (!fir.ref>) -> !fir.box> + %15 = fir.absent !fir.box> + %16 = arith.select %13, %14, %15 : !fir.box> + %17 = hlfir.eoshift %12#0 %c2_i32 boundary %16 : (!fir.box>>, i32, !fir.box>) -> !hlfir.expr> + hlfir.assign %17 to %12#0 : !hlfir.expr>, !fir.box>> + hlfir.destroy %17 : !hlfir.expr> + return +} +// CHECK-LABEL: func.func @_QPeoshift8c( +// CHECK-SAME: %[[ARG0:.*]]: !fir.ref {fir.bindc_name = "n"}, +// CHECK-SAME: %[[ARG1:.*]]: !fir.boxchar<2> {fir.bindc_name = "array"}, +// CHECK-SAME: %[[ARG2:.*]]: !fir.boxchar<2> {fir.bindc_name = "boundary", fir.optional}) { +// CHECK: %[[VAL_0:.*]] = arith.constant 1 : i64 +// CHECK: %[[VAL_1:.*]] = arith.constant 2 : i32 +// CHECK: %[[VAL_2:.*]] = arith.constant 0 : index +// CHECK: %[[VAL_3:.*]] = arith.constant 10 : index +// CHECK: %[[VAL_4:.*]] = fir.dummy_scope : !fir.dscope +// CHECK: %[[VAL_5:.*]]:2 = hlfir.declare %[[ARG0]] dummy_scope %[[VAL_4]] {uniq_name = "_QFeoshift8cEn"} : (!fir.ref, !fir.dscope) -> (!fir.ref, !fir.ref) +// CHECK: %[[VAL_6:.*]]:2 = fir.unboxchar %[[ARG2]] : (!fir.boxchar<2>) -> (!fir.ref>, index) +// CHECK: %[[VAL_7:.*]] = fir.convert %[[VAL_6]]#0 : (!fir.ref>) -> !fir.ref> +// CHECK: %[[VAL_8:.*]]:2 = hlfir.declare %[[VAL_7]] typeparams %[[VAL_3]] dummy_scope %[[VAL_4]] {fortran_attrs = #fir.var_attrs, uniq_name = "_QFeoshift8cEboundary"} : (!fir.ref>, index, !fir.dscope) -> (!fir.ref>, !fir.ref>) +// CHECK: %[[VAL_9:.*]]:2 = fir.unboxchar %[[ARG1]] : (!fir.boxchar<2>) -> (!fir.ref>, index) +// CHECK: %[[VAL_10:.*]] = fir.convert %[[VAL_9]]#0 : (!fir.ref>) -> !fir.ref>> +// CHECK: %[[VAL_11:.*]] = fir.load %[[VAL_5]]#0 : !fir.ref +// CHECK: %[[VAL_12:.*]] = fir.convert %[[VAL_11]] : (i32) -> index +// CHECK: %[[VAL_13:.*]] = arith.cmpi sgt, %[[VAL_12]], %[[VAL_2]] : index +// CHECK: %[[VAL_14:.*]] = arith.select %[[VAL_13]], %[[VAL_12]], %[[VAL_2]] : index +// CHECK: %[[VAL_15:.*]] = fir.shape %[[VAL_14]] : (index) -> !fir.shape<1> +// CHECK: %[[VAL_16:.*]]:2 = hlfir.declare %[[VAL_10]](%[[VAL_15]]) typeparams %[[VAL_3]] dummy_scope %[[VAL_4]] {uniq_name = "_QFeoshift8cEarray"} : (!fir.ref>>, !fir.shape<1>, index, !fir.dscope) -> (!fir.box>>, !fir.ref>>) +// CHECK: %[[VAL_17:.*]] = fir.is_present %[[VAL_8]]#0 : (!fir.ref>) -> i1 +// CHECK: %[[VAL_18:.*]] = fir.embox %[[VAL_8]]#0 : (!fir.ref>) -> !fir.box> +// CHECK: %[[VAL_19:.*]] = fir.absent !fir.box> +// CHECK: %[[VAL_20:.*]] = arith.select %[[VAL_17]], %[[VAL_18]], %[[VAL_19]] : !fir.box> +// CHECK: %[[VAL_21:.*]] = fir.convert %[[VAL_14]] : (index) -> i64 +// CHECK: %[[VAL_22:.*]] = fir.convert %[[VAL_1]] : (i32) -> i64 +// CHECK: %[[VAL_23:.*]] = fir.is_present %[[VAL_20]] : (!fir.box>) -> i1 +// CHECK: %[[VAL_24:.*]] = fir.if %[[VAL_23]] -> (!fir.boxchar<2>) { +// CHECK: %[[VAL_25:.*]] = fir.box_addr %[[VAL_20]] : (!fir.box>) -> !fir.ref> +// CHECK: %[[VAL_26:.*]] = fir.emboxchar %[[VAL_25]], %[[VAL_3]] : (!fir.ref>, index) -> !fir.boxchar<2> +// CHECK: fir.result %[[VAL_26]] : !fir.boxchar<2> +// CHECK: } else { +// CHECK: %[[VAL_27:.*]] = fir.alloca !fir.char<2,0> {bindc_name = ".chrtmp"} +// CHECK: %[[VAL_28:.*]] = fir.emboxchar %[[VAL_27]], %[[VAL_2]] : (!fir.ref>, index) -> !fir.boxchar<2> +// CHECK: fir.result %[[VAL_28]] : !fir.boxchar<2> +// CHECK: } +// CHECK: %[[VAL_29:.*]] = hlfir.elemental %[[VAL_15]] typeparams %[[VAL_3]] unordered : (!fir.shape<1>, index) -> !hlfir.expr> { +// CHECK: ^bb0(%[[VAL_30:.*]]: index): +// CHECK: %[[VAL_31:.*]] = fir.convert %[[VAL_30]] : (index) -> i64 +// CHECK: %[[VAL_32:.*]] = arith.addi %[[VAL_31]], %[[VAL_22]] overflow : i64 +// CHECK: %[[VAL_33:.*]] = arith.cmpi sge, %[[VAL_32]], %[[VAL_0]] : i64 +// CHECK: %[[VAL_34:.*]] = arith.cmpi sle, %[[VAL_32]], %[[VAL_21]] : i64 +// CHECK: %[[VAL_35:.*]] = arith.andi %[[VAL_33]], %[[VAL_34]] : i1 +// CHECK: %[[VAL_36:.*]] = fir.if %[[VAL_35]] -> (!fir.boxchar<2>) { +// CHECK: %[[VAL_37:.*]] = fir.convert %[[VAL_32]] : (i64) -> index +// CHECK: %[[VAL_38:.*]] = hlfir.designate %[[VAL_16]]#0 (%[[VAL_37]]) typeparams %[[VAL_3]] : (!fir.box>>, index, index) -> !fir.ref> +// CHECK: %[[VAL_39:.*]] = fir.emboxchar %[[VAL_38]], %[[VAL_3]] : (!fir.ref>, index) -> !fir.boxchar<2> +// CHECK: fir.result %[[VAL_39]] : !fir.boxchar<2> +// CHECK: } else { +// CHECK: fir.result %[[VAL_24]] : !fir.boxchar<2> +// CHECK: } +// CHECK: hlfir.yield_element %[[VAL_36]] : !fir.boxchar<2> +// CHECK: } +// CHECK: hlfir.assign %[[VAL_29]] to %[[VAL_16]]#0 : !hlfir.expr>, !fir.box>> +// CHECK: hlfir.destroy %[[VAL_29]] : !hlfir.expr> +// CHECK: return +// CHECK: } + +// ! Test contiguous 1D array with the scalar optional boundary. +// ! CHARACTER with variable length. +// subroutine eoshift9c(n, array, boundary) +// integer :: n +// character(n,2) :: array(n) +// character(n,2), optional :: boundary +// array = EOSHIFT(array, 2, boundary) +// end subroutine +func.func @_QPeoshift9c(%arg0: !fir.ref {fir.bindc_name = "n"}, %arg1: !fir.boxchar<2> {fir.bindc_name = "array"}, %arg2: !fir.boxchar<2> {fir.bindc_name = "boundary", fir.optional}) { + %c2_i32 = arith.constant 2 : i32 + %c0 = arith.constant 0 : index + %c0_i32 = arith.constant 0 : i32 + %0 = fir.dummy_scope : !fir.dscope + %1:2 = hlfir.declare %arg0 dummy_scope %0 {uniq_name = "_QFeoshift9cEn"} : (!fir.ref, !fir.dscope) -> (!fir.ref, !fir.ref) + %2:2 = fir.unboxchar %arg1 : (!fir.boxchar<2>) -> (!fir.ref>, index) + %3 = fir.convert %2#0 : (!fir.ref>) -> !fir.ref>> + %4 = fir.load %1#0 : !fir.ref + %5 = arith.cmpi sgt, %4, %c0_i32 : i32 + %6 = arith.select %5, %4, %c0_i32 : i32 + %7 = fir.load %1#0 : !fir.ref + %8 = fir.convert %7 : (i32) -> index + %9 = arith.cmpi sgt, %8, %c0 : index + %10 = arith.select %9, %8, %c0 : index + %11 = fir.shape %10 : (index) -> !fir.shape<1> + %12:2 = hlfir.declare %3(%11) typeparams %6 dummy_scope %0 {uniq_name = "_QFeoshift9cEarray"} : (!fir.ref>>, !fir.shape<1>, i32, !fir.dscope) -> (!fir.box>>, !fir.ref>>) + %13:2 = fir.unboxchar %arg2 : (!fir.boxchar<2>) -> (!fir.ref>, index) + %14 = fir.load %1#0 : !fir.ref + %15 = arith.cmpi sgt, %14, %c0_i32 : i32 + %16 = arith.select %15, %14, %c0_i32 : i32 + %17:2 = hlfir.declare %13#0 typeparams %16 dummy_scope %0 {fortran_attrs = #fir.var_attrs, uniq_name = "_QFeoshift9cEboundary"} : (!fir.ref>, i32, !fir.dscope) -> (!fir.boxchar<2>, !fir.ref>) + %18 = fir.is_present %17#0 : (!fir.boxchar<2>) -> i1 + %19 = fir.embox %17#1 typeparams %16 : (!fir.ref>, i32) -> !fir.box> + %20 = fir.absent !fir.box> + %21 = arith.select %18, %19, %20 : !fir.box> + %22 = hlfir.eoshift %12#0 %c2_i32 boundary %21 : (!fir.box>>, i32, !fir.box>) -> !hlfir.expr> + hlfir.assign %22 to %12#0 : !hlfir.expr>, !fir.box>> + hlfir.destroy %22 : !hlfir.expr> + return +} +// CHECK-LABEL: func.func @_QPeoshift9c( +// CHECK-SAME: %[[ARG0:.*]]: !fir.ref {fir.bindc_name = "n"}, +// CHECK-SAME: %[[ARG1:.*]]: !fir.boxchar<2> {fir.bindc_name = "array"}, +// CHECK-SAME: %[[ARG2:.*]]: !fir.boxchar<2> {fir.bindc_name = "boundary", fir.optional}) { +// CHECK: %[[VAL_0:.*]] = arith.constant 1 : i64 +// CHECK: %[[VAL_1:.*]] = arith.constant 2 : index +// CHECK: %[[VAL_2:.*]] = arith.constant 2 : i32 +// CHECK: %[[VAL_3:.*]] = arith.constant 0 : index +// CHECK: %[[VAL_4:.*]] = arith.constant 0 : i32 +// CHECK: %[[VAL_5:.*]] = fir.dummy_scope : !fir.dscope +// CHECK: %[[VAL_6:.*]]:2 = hlfir.declare %[[ARG0]] dummy_scope %[[VAL_5]] {uniq_name = "_QFeoshift9cEn"} : (!fir.ref, !fir.dscope) -> (!fir.ref, !fir.ref) +// CHECK: %[[VAL_7:.*]]:2 = fir.unboxchar %[[ARG1]] : (!fir.boxchar<2>) -> (!fir.ref>, index) +// CHECK: %[[VAL_8:.*]] = fir.convert %[[VAL_7]]#0 : (!fir.ref>) -> !fir.ref>> +// CHECK: %[[VAL_9:.*]] = fir.load %[[VAL_6]]#0 : !fir.ref +// CHECK: %[[VAL_10:.*]] = arith.cmpi sgt, %[[VAL_9]], %[[VAL_4]] : i32 +// CHECK: %[[VAL_11:.*]] = arith.select %[[VAL_10]], %[[VAL_9]], %[[VAL_4]] : i32 +// CHECK: %[[VAL_12:.*]] = fir.load %[[VAL_6]]#0 : !fir.ref +// CHECK: %[[VAL_13:.*]] = fir.convert %[[VAL_12]] : (i32) -> index +// CHECK: %[[VAL_14:.*]] = arith.cmpi sgt, %[[VAL_13]], %[[VAL_3]] : index +// CHECK: %[[VAL_15:.*]] = arith.select %[[VAL_14]], %[[VAL_13]], %[[VAL_3]] : index +// CHECK: %[[VAL_16:.*]] = fir.shape %[[VAL_15]] : (index) -> !fir.shape<1> +// CHECK: %[[VAL_17:.*]]:2 = hlfir.declare %[[VAL_8]](%[[VAL_16]]) typeparams %[[VAL_11]] dummy_scope %[[VAL_5]] {uniq_name = "_QFeoshift9cEarray"} : (!fir.ref>>, !fir.shape<1>, i32, !fir.dscope) -> (!fir.box>>, !fir.ref>>) +// CHECK: %[[VAL_18:.*]]:2 = fir.unboxchar %[[ARG2]] : (!fir.boxchar<2>) -> (!fir.ref>, index) +// CHECK: %[[VAL_19:.*]] = fir.load %[[VAL_6]]#0 : !fir.ref +// CHECK: %[[VAL_20:.*]] = arith.cmpi sgt, %[[VAL_19]], %[[VAL_4]] : i32 +// CHECK: %[[VAL_21:.*]] = arith.select %[[VAL_20]], %[[VAL_19]], %[[VAL_4]] : i32 +// CHECK: %[[VAL_22:.*]]:2 = hlfir.declare %[[VAL_18]]#0 typeparams %[[VAL_21]] dummy_scope %[[VAL_5]] {fortran_attrs = #fir.var_attrs, uniq_name = "_QFeoshift9cEboundary"} : (!fir.ref>, i32, !fir.dscope) -> (!fir.boxchar<2>, !fir.ref>) +// CHECK: %[[VAL_23:.*]] = fir.is_present %[[VAL_22]]#0 : (!fir.boxchar<2>) -> i1 +// CHECK: %[[VAL_24:.*]] = fir.embox %[[VAL_22]]#1 typeparams %[[VAL_21]] : (!fir.ref>, i32) -> !fir.box> +// CHECK: %[[VAL_25:.*]] = fir.absent !fir.box> +// CHECK: %[[VAL_26:.*]] = arith.select %[[VAL_23]], %[[VAL_24]], %[[VAL_25]] : !fir.box> +// CHECK: %[[VAL_27:.*]] = fir.convert %[[VAL_15]] : (index) -> i64 +// CHECK: %[[VAL_28:.*]] = fir.convert %[[VAL_2]] : (i32) -> i64 +// CHECK: %[[VAL_29:.*]] = fir.is_present %[[VAL_26]] : (!fir.box>) -> i1 +// CHECK: %[[VAL_30:.*]] = fir.if %[[VAL_29]] -> (!fir.boxchar<2>) { +// CHECK: %[[VAL_31:.*]] = fir.box_addr %[[VAL_26]] : (!fir.box>) -> !fir.ref> +// CHECK: %[[VAL_32:.*]] = fir.box_elesize %[[VAL_26]] : (!fir.box>) -> index +// CHECK: %[[VAL_33:.*]] = arith.divsi %[[VAL_32]], %[[VAL_1]] : index +// CHECK: %[[VAL_34:.*]] = fir.emboxchar %[[VAL_31]], %[[VAL_33]] : (!fir.ref>, index) -> !fir.boxchar<2> +// CHECK: fir.result %[[VAL_34]] : !fir.boxchar<2> +// CHECK: } else { +// CHECK: %[[VAL_35:.*]] = fir.alloca !fir.char<2,0> {bindc_name = ".chrtmp"} +// CHECK: %[[VAL_36:.*]] = fir.emboxchar %[[VAL_35]], %[[VAL_3]] : (!fir.ref>, index) -> !fir.boxchar<2> +// CHECK: fir.result %[[VAL_36]] : !fir.boxchar<2> +// CHECK: } +// CHECK: %[[VAL_37:.*]] = hlfir.elemental %[[VAL_16]] typeparams %[[VAL_11]] unordered : (!fir.shape<1>, i32) -> !hlfir.expr> { +// CHECK: ^bb0(%[[VAL_38:.*]]: index): +// CHECK: %[[VAL_39:.*]] = fir.convert %[[VAL_38]] : (index) -> i64 +// CHECK: %[[VAL_40:.*]] = arith.addi %[[VAL_39]], %[[VAL_28]] overflow : i64 +// CHECK: %[[VAL_41:.*]] = arith.cmpi sge, %[[VAL_40]], %[[VAL_0]] : i64 +// CHECK: %[[VAL_42:.*]] = arith.cmpi sle, %[[VAL_40]], %[[VAL_27]] : i64 +// CHECK: %[[VAL_43:.*]] = arith.andi %[[VAL_41]], %[[VAL_42]] : i1 +// CHECK: %[[VAL_44:.*]] = fir.if %[[VAL_43]] -> (!fir.boxchar<2>) { +// CHECK: %[[VAL_45:.*]] = fir.convert %[[VAL_40]] : (i64) -> index +// CHECK: %[[VAL_46:.*]] = hlfir.designate %[[VAL_17]]#0 (%[[VAL_45]]) typeparams %[[VAL_11]] : (!fir.box>>, index, i32) -> !fir.boxchar<2> +// CHECK: fir.result %[[VAL_46]] : !fir.boxchar<2> +// CHECK: } else { +// CHECK: fir.result %[[VAL_30]] : !fir.boxchar<2> +// CHECK: } +// CHECK: hlfir.yield_element %[[VAL_44]] : !fir.boxchar<2> +// CHECK: } +// CHECK: hlfir.assign %[[VAL_37]] to %[[VAL_17]]#0 : !hlfir.expr>, !fir.box>> +// CHECK: hlfir.destroy %[[VAL_37]] : !hlfir.expr> +// CHECK: return +// CHECK: } + +// ! Test contiguous 1D array with the scalar optional boundary. +// ! CHARACTER with assumed length. +// subroutine eoshift10c(n, array, boundary) +// integer :: n +// character(*,2) :: array(n) +// character(*,2), optional :: boundary +// array = EOSHIFT(array, 2, boundary) +// end subroutine +func.func @_QPeoshift10c(%arg0: !fir.ref {fir.bindc_name = "n"}, %arg1: !fir.boxchar<2> {fir.bindc_name = "array"}, %arg2: !fir.boxchar<2> {fir.bindc_name = "boundary", fir.optional}) { + %c2_i32 = arith.constant 2 : i32 + %c0 = arith.constant 0 : index + %0 = fir.dummy_scope : !fir.dscope + %1:2 = hlfir.declare %arg0 dummy_scope %0 {uniq_name = "_QFeoshift10cEn"} : (!fir.ref, !fir.dscope) -> (!fir.ref, !fir.ref) + %2:2 = fir.unboxchar %arg2 : (!fir.boxchar<2>) -> (!fir.ref>, index) + %3:2 = hlfir.declare %2#0 typeparams %2#1 dummy_scope %0 {fortran_attrs = #fir.var_attrs, uniq_name = "_QFeoshift10cEboundary"} : (!fir.ref>, index, !fir.dscope) -> (!fir.boxchar<2>, !fir.ref>) + %4:2 = fir.unboxchar %arg1 : (!fir.boxchar<2>) -> (!fir.ref>, index) + %5 = fir.convert %4#0 : (!fir.ref>) -> !fir.ref>> + %6 = fir.load %1#0 : !fir.ref + %7 = fir.convert %6 : (i32) -> index + %8 = arith.cmpi sgt, %7, %c0 : index + %9 = arith.select %8, %7, %c0 : index + %10 = fir.shape %9 : (index) -> !fir.shape<1> + %11:2 = hlfir.declare %5(%10) typeparams %4#1 dummy_scope %0 {uniq_name = "_QFeoshift10cEarray"} : (!fir.ref>>, !fir.shape<1>, index, !fir.dscope) -> (!fir.box>>, !fir.ref>>) + %12 = fir.is_present %3#0 : (!fir.boxchar<2>) -> i1 + %13 = fir.embox %3#1 typeparams %2#1 : (!fir.ref>, index) -> !fir.box> + %14 = fir.absent !fir.box> + %15 = arith.select %12, %13, %14 : !fir.box> + %16 = hlfir.eoshift %11#0 %c2_i32 boundary %15 : (!fir.box>>, i32, !fir.box>) -> !hlfir.expr> + hlfir.assign %16 to %11#0 : !hlfir.expr>, !fir.box>> + hlfir.destroy %16 : !hlfir.expr> + return +} +// CHECK-LABEL: func.func @_QPeoshift10c( +// CHECK-SAME: %[[ARG0:.*]]: !fir.ref {fir.bindc_name = "n"}, +// CHECK-SAME: %[[ARG1:.*]]: !fir.boxchar<2> {fir.bindc_name = "array"}, +// CHECK-SAME: %[[ARG2:.*]]: !fir.boxchar<2> {fir.bindc_name = "boundary", fir.optional}) { +// CHECK: %[[VAL_0:.*]] = arith.constant 1 : i64 +// CHECK: %[[VAL_1:.*]] = arith.constant 2 : index +// CHECK: %[[VAL_2:.*]] = arith.constant 2 : i32 +// CHECK: %[[VAL_3:.*]] = arith.constant 0 : index +// CHECK: %[[VAL_4:.*]] = fir.dummy_scope : !fir.dscope +// CHECK: %[[VAL_5:.*]]:2 = hlfir.declare %[[ARG0]] dummy_scope %[[VAL_4]] {uniq_name = "_QFeoshift10cEn"} : (!fir.ref, !fir.dscope) -> (!fir.ref, !fir.ref) +// CHECK: %[[VAL_6:.*]]:2 = fir.unboxchar %[[ARG2]] : (!fir.boxchar<2>) -> (!fir.ref>, index) +// CHECK: %[[VAL_7:.*]]:2 = hlfir.declare %[[VAL_6]]#0 typeparams %[[VAL_6]]#1 dummy_scope %[[VAL_4]] {fortran_attrs = #fir.var_attrs, uniq_name = "_QFeoshift10cEboundary"} : (!fir.ref>, index, !fir.dscope) -> (!fir.boxchar<2>, !fir.ref>) +// CHECK: %[[VAL_8:.*]]:2 = fir.unboxchar %[[ARG1]] : (!fir.boxchar<2>) -> (!fir.ref>, index) +// CHECK: %[[VAL_9:.*]] = fir.convert %[[VAL_8]]#0 : (!fir.ref>) -> !fir.ref>> +// CHECK: %[[VAL_10:.*]] = fir.load %[[VAL_5]]#0 : !fir.ref +// CHECK: %[[VAL_11:.*]] = fir.convert %[[VAL_10]] : (i32) -> index +// CHECK: %[[VAL_12:.*]] = arith.cmpi sgt, %[[VAL_11]], %[[VAL_3]] : index +// CHECK: %[[VAL_13:.*]] = arith.select %[[VAL_12]], %[[VAL_11]], %[[VAL_3]] : index +// CHECK: %[[VAL_14:.*]] = fir.shape %[[VAL_13]] : (index) -> !fir.shape<1> +// CHECK: %[[VAL_15:.*]]:2 = hlfir.declare %[[VAL_9]](%[[VAL_14]]) typeparams %[[VAL_8]]#1 dummy_scope %[[VAL_4]] {uniq_name = "_QFeoshift10cEarray"} : (!fir.ref>>, !fir.shape<1>, index, !fir.dscope) -> (!fir.box>>, !fir.ref>>) +// CHECK: %[[VAL_16:.*]] = fir.is_present %[[VAL_7]]#0 : (!fir.boxchar<2>) -> i1 +// CHECK: %[[VAL_17:.*]] = fir.embox %[[VAL_7]]#1 typeparams %[[VAL_6]]#1 : (!fir.ref>, index) -> !fir.box> +// CHECK: %[[VAL_18:.*]] = fir.absent !fir.box> +// CHECK: %[[VAL_19:.*]] = arith.select %[[VAL_16]], %[[VAL_17]], %[[VAL_18]] : !fir.box> +// CHECK: %[[VAL_20:.*]] = fir.convert %[[VAL_13]] : (index) -> i64 +// CHECK: %[[VAL_21:.*]] = fir.convert %[[VAL_2]] : (i32) -> i64 +// CHECK: %[[VAL_22:.*]] = fir.is_present %[[VAL_19]] : (!fir.box>) -> i1 +// CHECK: %[[VAL_23:.*]] = fir.if %[[VAL_22]] -> (!fir.boxchar<2>) { +// CHECK: %[[VAL_24:.*]] = fir.box_addr %[[VAL_19]] : (!fir.box>) -> !fir.ref> +// CHECK: %[[VAL_25:.*]] = fir.box_elesize %[[VAL_19]] : (!fir.box>) -> index +// CHECK: %[[VAL_26:.*]] = arith.divsi %[[VAL_25]], %[[VAL_1]] : index +// CHECK: %[[VAL_27:.*]] = fir.emboxchar %[[VAL_24]], %[[VAL_26]] : (!fir.ref>, index) -> !fir.boxchar<2> +// CHECK: fir.result %[[VAL_27]] : !fir.boxchar<2> +// CHECK: } else { +// CHECK: %[[VAL_28:.*]] = fir.alloca !fir.char<2,0> {bindc_name = ".chrtmp"} +// CHECK: %[[VAL_29:.*]] = fir.emboxchar %[[VAL_28]], %[[VAL_3]] : (!fir.ref>, index) -> !fir.boxchar<2> +// CHECK: fir.result %[[VAL_29]] : !fir.boxchar<2> +// CHECK: } +// CHECK: %[[VAL_30:.*]] = hlfir.elemental %[[VAL_14]] typeparams %[[VAL_8]]#1 unordered : (!fir.shape<1>, index) -> !hlfir.expr> { +// CHECK: ^bb0(%[[VAL_31:.*]]: index): +// CHECK: %[[VAL_32:.*]] = fir.convert %[[VAL_31]] : (index) -> i64 +// CHECK: %[[VAL_33:.*]] = arith.addi %[[VAL_32]], %[[VAL_21]] overflow : i64 +// CHECK: %[[VAL_34:.*]] = arith.cmpi sge, %[[VAL_33]], %[[VAL_0]] : i64 +// CHECK: %[[VAL_35:.*]] = arith.cmpi sle, %[[VAL_33]], %[[VAL_20]] : i64 +// CHECK: %[[VAL_36:.*]] = arith.andi %[[VAL_34]], %[[VAL_35]] : i1 +// CHECK: %[[VAL_37:.*]] = fir.if %[[VAL_36]] -> (!fir.boxchar<2>) { +// CHECK: %[[VAL_38:.*]] = fir.convert %[[VAL_33]] : (i64) -> index +// CHECK: %[[VAL_39:.*]] = hlfir.designate %[[VAL_15]]#0 (%[[VAL_38]]) typeparams %[[VAL_8]]#1 : (!fir.box>>, index, index) -> !fir.boxchar<2> +// CHECK: fir.result %[[VAL_39]] : !fir.boxchar<2> +// CHECK: } else { +// CHECK: fir.result %[[VAL_23]] : !fir.boxchar<2> +// CHECK: } +// CHECK: hlfir.yield_element %[[VAL_37]] : !fir.boxchar<2> +// CHECK: } +// CHECK: hlfir.assign %[[VAL_30]] to %[[VAL_15]]#0 : !hlfir.expr>, !fir.box>> +// CHECK: hlfir.destroy %[[VAL_30]] : !hlfir.expr> +// CHECK: return +// CHECK: } + +// ! Test contiguous 1D array with the array always present boundary. +// ! CHARACTER with constant length. +// subroutine eoshift11c(n, array, boundary) +// integer :: n +// character(10,4) :: array(n,n), boundary(:) +// array = EOSHIFT(array, 2, boundary) +// end subroutine +func.func @_QPeoshift11c(%arg0: !fir.ref {fir.bindc_name = "n"}, %arg1: !fir.boxchar<4> {fir.bindc_name = "array"}, %arg2: !fir.box>> {fir.bindc_name = "boundary"}) { + %c2_i32 = arith.constant 2 : i32 + %c0 = arith.constant 0 : index + %c10 = arith.constant 10 : index + %0 = fir.dummy_scope : !fir.dscope + %1:2 = hlfir.declare %arg0 dummy_scope %0 {uniq_name = "_QFeoshift11cEn"} : (!fir.ref, !fir.dscope) -> (!fir.ref, !fir.ref) + %2:2 = hlfir.declare %arg2 typeparams %c10 dummy_scope %0 {uniq_name = "_QFeoshift11cEboundary"} : (!fir.box>>, index, !fir.dscope) -> (!fir.box>>, !fir.box>>) + %3:2 = fir.unboxchar %arg1 : (!fir.boxchar<4>) -> (!fir.ref>, index) + %4 = fir.convert %3#0 : (!fir.ref>) -> !fir.ref>> + %5 = fir.load %1#0 : !fir.ref + %6 = fir.convert %5 : (i32) -> index + %7 = arith.cmpi sgt, %6, %c0 : index + %8 = arith.select %7, %6, %c0 : index + %9 = fir.load %1#0 : !fir.ref + %10 = fir.convert %9 : (i32) -> index + %11 = arith.cmpi sgt, %10, %c0 : index + %12 = arith.select %11, %10, %c0 : index + %13 = fir.shape %8, %12 : (index, index) -> !fir.shape<2> + %14:2 = hlfir.declare %4(%13) typeparams %c10 dummy_scope %0 {uniq_name = "_QFeoshift11cEarray"} : (!fir.ref>>, !fir.shape<2>, index, !fir.dscope) -> (!fir.box>>, !fir.ref>>) + %15 = hlfir.eoshift %14#0 %c2_i32 boundary %2#0 : (!fir.box>>, i32, !fir.box>>) -> !hlfir.expr> + hlfir.assign %15 to %14#0 : !hlfir.expr>, !fir.box>> + hlfir.destroy %15 : !hlfir.expr> + return +} +// CHECK-LABEL: func.func @_QPeoshift11c( +// CHECK-SAME: %[[ARG0:.*]]: !fir.ref {fir.bindc_name = "n"}, +// CHECK-SAME: %[[ARG1:.*]]: !fir.boxchar<4> {fir.bindc_name = "array"}, +// CHECK-SAME: %[[ARG2:.*]]: !fir.box>> {fir.bindc_name = "boundary"}) { +// CHECK: %[[VAL_0:.*]] = arith.constant 1 : i64 +// CHECK: %[[VAL_1:.*]] = arith.constant 2 : i32 +// CHECK: %[[VAL_2:.*]] = arith.constant 0 : index +// CHECK: %[[VAL_3:.*]] = arith.constant 10 : index +// CHECK: %[[VAL_4:.*]] = fir.dummy_scope : !fir.dscope +// CHECK: %[[VAL_5:.*]]:2 = hlfir.declare %[[ARG0]] dummy_scope %[[VAL_4]] {uniq_name = "_QFeoshift11cEn"} : (!fir.ref, !fir.dscope) -> (!fir.ref, !fir.ref) +// CHECK: %[[VAL_6:.*]]:2 = hlfir.declare %[[ARG2]] typeparams %[[VAL_3]] dummy_scope %[[VAL_4]] {uniq_name = "_QFeoshift11cEboundary"} : (!fir.box>>, index, !fir.dscope) -> (!fir.box>>, !fir.box>>) +// CHECK: %[[VAL_7:.*]]:2 = fir.unboxchar %[[ARG1]] : (!fir.boxchar<4>) -> (!fir.ref>, index) +// CHECK: %[[VAL_8:.*]] = fir.convert %[[VAL_7]]#0 : (!fir.ref>) -> !fir.ref>> +// CHECK: %[[VAL_9:.*]] = fir.load %[[VAL_5]]#0 : !fir.ref +// CHECK: %[[VAL_10:.*]] = fir.convert %[[VAL_9]] : (i32) -> index +// CHECK: %[[VAL_11:.*]] = arith.cmpi sgt, %[[VAL_10]], %[[VAL_2]] : index +// CHECK: %[[VAL_12:.*]] = arith.select %[[VAL_11]], %[[VAL_10]], %[[VAL_2]] : index +// CHECK: %[[VAL_13:.*]] = fir.load %[[VAL_5]]#0 : !fir.ref +// CHECK: %[[VAL_14:.*]] = fir.convert %[[VAL_13]] : (i32) -> index +// CHECK: %[[VAL_15:.*]] = arith.cmpi sgt, %[[VAL_14]], %[[VAL_2]] : index +// CHECK: %[[VAL_16:.*]] = arith.select %[[VAL_15]], %[[VAL_14]], %[[VAL_2]] : index +// CHECK: %[[VAL_17:.*]] = fir.shape %[[VAL_12]], %[[VAL_16]] : (index, index) -> !fir.shape<2> +// CHECK: %[[VAL_18:.*]]:2 = hlfir.declare %[[VAL_8]](%[[VAL_17]]) typeparams %[[VAL_3]] dummy_scope %[[VAL_4]] {uniq_name = "_QFeoshift11cEarray"} : (!fir.ref>>, !fir.shape<2>, index, !fir.dscope) -> (!fir.box>>, !fir.ref>>) +// CHECK: %[[VAL_19:.*]] = fir.convert %[[VAL_12]] : (index) -> i64 +// CHECK: %[[VAL_20:.*]] = fir.convert %[[VAL_1]] : (i32) -> i64 +// CHECK: %[[VAL_21:.*]] = hlfir.elemental %[[VAL_17]] typeparams %[[VAL_3]] unordered : (!fir.shape<2>, index) -> !hlfir.expr> { +// CHECK: ^bb0(%[[VAL_22:.*]]: index, %[[VAL_23:.*]]: index): +// CHECK: %[[VAL_24:.*]] = hlfir.designate %[[VAL_6]]#0 (%[[VAL_23]]) typeparams %[[VAL_3]] : (!fir.box>>, index, index) -> !fir.ref> +// CHECK: %[[VAL_25:.*]] = fir.emboxchar %[[VAL_24]], %[[VAL_3]] : (!fir.ref>, index) -> !fir.boxchar<4> +// CHECK: %[[VAL_26:.*]] = fir.convert %[[VAL_22]] : (index) -> i64 +// CHECK: %[[VAL_27:.*]] = arith.addi %[[VAL_26]], %[[VAL_20]] overflow : i64 +// CHECK: %[[VAL_28:.*]] = arith.cmpi sge, %[[VAL_27]], %[[VAL_0]] : i64 +// CHECK: %[[VAL_29:.*]] = arith.cmpi sle, %[[VAL_27]], %[[VAL_19]] : i64 +// CHECK: %[[VAL_30:.*]] = arith.andi %[[VAL_28]], %[[VAL_29]] : i1 +// CHECK: %[[VAL_31:.*]] = fir.if %[[VAL_30]] -> (!fir.boxchar<4>) { +// CHECK: %[[VAL_32:.*]] = fir.convert %[[VAL_27]] : (i64) -> index +// CHECK: %[[VAL_33:.*]] = hlfir.designate %[[VAL_18]]#0 (%[[VAL_32]], %[[VAL_23]]) typeparams %[[VAL_3]] : (!fir.box>>, index, index, index) -> !fir.ref> +// CHECK: %[[VAL_34:.*]] = fir.emboxchar %[[VAL_33]], %[[VAL_3]] : (!fir.ref>, index) -> !fir.boxchar<4> +// CHECK: fir.result %[[VAL_34]] : !fir.boxchar<4> +// CHECK: } else { +// CHECK: fir.result %[[VAL_25]] : !fir.boxchar<4> +// CHECK: } +// CHECK: hlfir.yield_element %[[VAL_31]] : !fir.boxchar<4> +// CHECK: } +// CHECK: hlfir.assign %[[VAL_21]] to %[[VAL_18]]#0 : !hlfir.expr>, !fir.box>> +// CHECK: hlfir.destroy %[[VAL_21]] : !hlfir.expr> +// CHECK: return +// CHECK: } + +// ! Test contiguous 1D array with the array always present boundary. +// ! CHARACTER with variable length. +// subroutine eoshift12c(n, array, boundary) +// integer :: n +// character(n,4) :: array(n,n), boundary(:) +// array = EOSHIFT(array, 2, boundary) +// end subroutine +func.func @_QPeoshift12c(%arg0: !fir.ref {fir.bindc_name = "n"}, %arg1: !fir.boxchar<4> {fir.bindc_name = "array"}, %arg2: !fir.box>> {fir.bindc_name = "boundary"}) { + %c2_i32 = arith.constant 2 : i32 + %c0 = arith.constant 0 : index + %c0_i32 = arith.constant 0 : i32 + %0 = fir.dummy_scope : !fir.dscope + %1:2 = hlfir.declare %arg0 dummy_scope %0 {uniq_name = "_QFeoshift12cEn"} : (!fir.ref, !fir.dscope) -> (!fir.ref, !fir.ref) + %2:2 = fir.unboxchar %arg1 : (!fir.boxchar<4>) -> (!fir.ref>, index) + %3 = fir.convert %2#0 : (!fir.ref>) -> !fir.ref>> + %4 = fir.load %1#0 : !fir.ref + %5 = arith.cmpi sgt, %4, %c0_i32 : i32 + %6 = arith.select %5, %4, %c0_i32 : i32 + %7 = fir.load %1#0 : !fir.ref + %8 = fir.convert %7 : (i32) -> index + %9 = arith.cmpi sgt, %8, %c0 : index + %10 = arith.select %9, %8, %c0 : index + %11 = fir.load %1#0 : !fir.ref + %12 = fir.convert %11 : (i32) -> index + %13 = arith.cmpi sgt, %12, %c0 : index + %14 = arith.select %13, %12, %c0 : index + %15 = fir.shape %10, %14 : (index, index) -> !fir.shape<2> + %16:2 = hlfir.declare %3(%15) typeparams %6 dummy_scope %0 {uniq_name = "_QFeoshift12cEarray"} : (!fir.ref>>, !fir.shape<2>, i32, !fir.dscope) -> (!fir.box>>, !fir.ref>>) + %17 = fir.load %1#0 : !fir.ref + %18 = arith.cmpi sgt, %17, %c0_i32 : i32 + %19 = arith.select %18, %17, %c0_i32 : i32 + %20:2 = hlfir.declare %arg2 typeparams %19 dummy_scope %0 {uniq_name = "_QFeoshift12cEboundary"} : (!fir.box>>, i32, !fir.dscope) -> (!fir.box>>, !fir.box>>) + %21 = hlfir.eoshift %16#0 %c2_i32 boundary %20#0 : (!fir.box>>, i32, !fir.box>>) -> !hlfir.expr> + hlfir.assign %21 to %16#0 : !hlfir.expr>, !fir.box>> + hlfir.destroy %21 : !hlfir.expr> + return +} +// CHECK-LABEL: func.func @_QPeoshift12c( +// CHECK-SAME: %[[ARG0:.*]]: !fir.ref {fir.bindc_name = "n"}, +// CHECK-SAME: %[[ARG1:.*]]: !fir.boxchar<4> {fir.bindc_name = "array"}, +// CHECK-SAME: %[[ARG2:.*]]: !fir.box>> {fir.bindc_name = "boundary"}) { +// CHECK: %[[VAL_0:.*]] = arith.constant 1 : i64 +// CHECK: %[[VAL_1:.*]] = arith.constant 2 : i32 +// CHECK: %[[VAL_2:.*]] = arith.constant 0 : index +// CHECK: %[[VAL_3:.*]] = arith.constant 0 : i32 +// CHECK: %[[VAL_4:.*]] = fir.dummy_scope : !fir.dscope +// CHECK: %[[VAL_5:.*]]:2 = hlfir.declare %[[ARG0]] dummy_scope %[[VAL_4]] {uniq_name = "_QFeoshift12cEn"} : (!fir.ref, !fir.dscope) -> (!fir.ref, !fir.ref) +// CHECK: %[[VAL_6:.*]]:2 = fir.unboxchar %[[ARG1]] : (!fir.boxchar<4>) -> (!fir.ref>, index) +// CHECK: %[[VAL_7:.*]] = fir.convert %[[VAL_6]]#0 : (!fir.ref>) -> !fir.ref>> +// CHECK: %[[VAL_8:.*]] = fir.load %[[VAL_5]]#0 : !fir.ref +// CHECK: %[[VAL_9:.*]] = arith.cmpi sgt, %[[VAL_8]], %[[VAL_3]] : i32 +// CHECK: %[[VAL_10:.*]] = arith.select %[[VAL_9]], %[[VAL_8]], %[[VAL_3]] : i32 +// CHECK: %[[VAL_11:.*]] = fir.load %[[VAL_5]]#0 : !fir.ref +// CHECK: %[[VAL_12:.*]] = fir.convert %[[VAL_11]] : (i32) -> index +// CHECK: %[[VAL_13:.*]] = arith.cmpi sgt, %[[VAL_12]], %[[VAL_2]] : index +// CHECK: %[[VAL_14:.*]] = arith.select %[[VAL_13]], %[[VAL_12]], %[[VAL_2]] : index +// CHECK: %[[VAL_15:.*]] = fir.load %[[VAL_5]]#0 : !fir.ref +// CHECK: %[[VAL_16:.*]] = fir.convert %[[VAL_15]] : (i32) -> index +// CHECK: %[[VAL_17:.*]] = arith.cmpi sgt, %[[VAL_16]], %[[VAL_2]] : index +// CHECK: %[[VAL_18:.*]] = arith.select %[[VAL_17]], %[[VAL_16]], %[[VAL_2]] : index +// CHECK: %[[VAL_19:.*]] = fir.shape %[[VAL_14]], %[[VAL_18]] : (index, index) -> !fir.shape<2> +// CHECK: %[[VAL_20:.*]]:2 = hlfir.declare %[[VAL_7]](%[[VAL_19]]) typeparams %[[VAL_10]] dummy_scope %[[VAL_4]] {uniq_name = "_QFeoshift12cEarray"} : (!fir.ref>>, !fir.shape<2>, i32, !fir.dscope) -> (!fir.box>>, !fir.ref>>) +// CHECK: %[[VAL_21:.*]] = fir.load %[[VAL_5]]#0 : !fir.ref +// CHECK: %[[VAL_22:.*]] = arith.cmpi sgt, %[[VAL_21]], %[[VAL_3]] : i32 +// CHECK: %[[VAL_23:.*]] = arith.select %[[VAL_22]], %[[VAL_21]], %[[VAL_3]] : i32 +// CHECK: %[[VAL_24:.*]]:2 = hlfir.declare %[[ARG2]] typeparams %[[VAL_23]] dummy_scope %[[VAL_4]] {uniq_name = "_QFeoshift12cEboundary"} : (!fir.box>>, i32, !fir.dscope) -> (!fir.box>>, !fir.box>>) +// CHECK: %[[VAL_25:.*]] = fir.convert %[[VAL_14]] : (index) -> i64 +// CHECK: %[[VAL_26:.*]] = fir.convert %[[VAL_1]] : (i32) -> i64 +// CHECK: %[[VAL_27:.*]] = hlfir.elemental %[[VAL_19]] typeparams %[[VAL_10]] unordered : (!fir.shape<2>, i32) -> !hlfir.expr> { +// CHECK: ^bb0(%[[VAL_28:.*]]: index, %[[VAL_29:.*]]: index): +// CHECK: %[[VAL_30:.*]] = hlfir.designate %[[VAL_24]]#0 (%[[VAL_29]]) typeparams %[[VAL_23]] : (!fir.box>>, index, i32) -> !fir.boxchar<4> +// CHECK: %[[VAL_31:.*]] = fir.convert %[[VAL_28]] : (index) -> i64 +// CHECK: %[[VAL_32:.*]] = arith.addi %[[VAL_31]], %[[VAL_26]] overflow : i64 +// CHECK: %[[VAL_33:.*]] = arith.cmpi sge, %[[VAL_32]], %[[VAL_0]] : i64 +// CHECK: %[[VAL_34:.*]] = arith.cmpi sle, %[[VAL_32]], %[[VAL_25]] : i64 +// CHECK: %[[VAL_35:.*]] = arith.andi %[[VAL_33]], %[[VAL_34]] : i1 +// CHECK: %[[VAL_36:.*]] = fir.if %[[VAL_35]] -> (!fir.boxchar<4>) { +// CHECK: %[[VAL_37:.*]] = fir.convert %[[VAL_32]] : (i64) -> index +// CHECK: %[[VAL_38:.*]] = hlfir.designate %[[VAL_20]]#0 (%[[VAL_37]], %[[VAL_29]]) typeparams %[[VAL_10]] : (!fir.box>>, index, index, i32) -> !fir.boxchar<4> +// CHECK: fir.result %[[VAL_38]] : !fir.boxchar<4> +// CHECK: } else { +// CHECK: fir.result %[[VAL_30]] : !fir.boxchar<4> +// CHECK: } +// CHECK: hlfir.yield_element %[[VAL_36]] : !fir.boxchar<4> +// CHECK: } +// CHECK: hlfir.assign %[[VAL_27]] to %[[VAL_20]]#0 : !hlfir.expr>, !fir.box>> +// CHECK: hlfir.destroy %[[VAL_27]] : !hlfir.expr> +// CHECK: return +// CHECK: } + +// ! Test contiguous 1D array with the array always present boundary. +// ! CHARACTER with assumed length. +// subroutine eoshift13c(n, array, boundary) +// integer :: n +// character(*,4) :: array(n,n), boundary(:) +// array = EOSHIFT(array, 2, boundary) +// end subroutine +func.func @_QPeoshift13c(%arg0: !fir.ref {fir.bindc_name = "n"}, %arg1: !fir.boxchar<4> {fir.bindc_name = "array"}, %arg2: !fir.box>> {fir.bindc_name = "boundary"}) { + %c2_i32 = arith.constant 2 : i32 + %c0 = arith.constant 0 : index + %0 = fir.dummy_scope : !fir.dscope + %1:2 = hlfir.declare %arg0 dummy_scope %0 {uniq_name = "_QFeoshift13cEn"} : (!fir.ref, !fir.dscope) -> (!fir.ref, !fir.ref) + %2:2 = hlfir.declare %arg2 dummy_scope %0 {uniq_name = "_QFeoshift13cEboundary"} : (!fir.box>>, !fir.dscope) -> (!fir.box>>, !fir.box>>) + %3:2 = fir.unboxchar %arg1 : (!fir.boxchar<4>) -> (!fir.ref>, index) + %4 = fir.convert %3#0 : (!fir.ref>) -> !fir.ref>> + %5 = fir.load %1#0 : !fir.ref + %6 = fir.convert %5 : (i32) -> index + %7 = arith.cmpi sgt, %6, %c0 : index + %8 = arith.select %7, %6, %c0 : index + %9 = fir.load %1#0 : !fir.ref + %10 = fir.convert %9 : (i32) -> index + %11 = arith.cmpi sgt, %10, %c0 : index + %12 = arith.select %11, %10, %c0 : index + %13 = fir.shape %8, %12 : (index, index) -> !fir.shape<2> + %14:2 = hlfir.declare %4(%13) typeparams %3#1 dummy_scope %0 {uniq_name = "_QFeoshift13cEarray"} : (!fir.ref>>, !fir.shape<2>, index, !fir.dscope) -> (!fir.box>>, !fir.ref>>) + %15 = hlfir.eoshift %14#0 %c2_i32 boundary %2#0 : (!fir.box>>, i32, !fir.box>>) -> !hlfir.expr> + hlfir.assign %15 to %14#0 : !hlfir.expr>, !fir.box>> + hlfir.destroy %15 : !hlfir.expr> + return +} +// CHECK-LABEL: func.func @_QPeoshift13c( +// CHECK-SAME: %[[ARG0:.*]]: !fir.ref {fir.bindc_name = "n"}, +// CHECK-SAME: %[[ARG1:.*]]: !fir.boxchar<4> {fir.bindc_name = "array"}, +// CHECK-SAME: %[[ARG2:.*]]: !fir.box>> {fir.bindc_name = "boundary"}) { +// CHECK: %[[VAL_0:.*]] = arith.constant 1 : i64 +// CHECK: %[[VAL_1:.*]] = arith.constant 4 : index +// CHECK: %[[VAL_2:.*]] = arith.constant 2 : i32 +// CHECK: %[[VAL_3:.*]] = arith.constant 0 : index +// CHECK: %[[VAL_4:.*]] = fir.dummy_scope : !fir.dscope +// CHECK: %[[VAL_5:.*]]:2 = hlfir.declare %[[ARG0]] dummy_scope %[[VAL_4]] {uniq_name = "_QFeoshift13cEn"} : (!fir.ref, !fir.dscope) -> (!fir.ref, !fir.ref) +// CHECK: %[[VAL_6:.*]]:2 = hlfir.declare %[[ARG2]] dummy_scope %[[VAL_4]] {uniq_name = "_QFeoshift13cEboundary"} : (!fir.box>>, !fir.dscope) -> (!fir.box>>, !fir.box>>) +// CHECK: %[[VAL_7:.*]]:2 = fir.unboxchar %[[ARG1]] : (!fir.boxchar<4>) -> (!fir.ref>, index) +// CHECK: %[[VAL_8:.*]] = fir.convert %[[VAL_7]]#0 : (!fir.ref>) -> !fir.ref>> +// CHECK: %[[VAL_9:.*]] = fir.load %[[VAL_5]]#0 : !fir.ref +// CHECK: %[[VAL_10:.*]] = fir.convert %[[VAL_9]] : (i32) -> index +// CHECK: %[[VAL_11:.*]] = arith.cmpi sgt, %[[VAL_10]], %[[VAL_3]] : index +// CHECK: %[[VAL_12:.*]] = arith.select %[[VAL_11]], %[[VAL_10]], %[[VAL_3]] : index +// CHECK: %[[VAL_13:.*]] = fir.load %[[VAL_5]]#0 : !fir.ref +// CHECK: %[[VAL_14:.*]] = fir.convert %[[VAL_13]] : (i32) -> index +// CHECK: %[[VAL_15:.*]] = arith.cmpi sgt, %[[VAL_14]], %[[VAL_3]] : index +// CHECK: %[[VAL_16:.*]] = arith.select %[[VAL_15]], %[[VAL_14]], %[[VAL_3]] : index +// CHECK: %[[VAL_17:.*]] = fir.shape %[[VAL_12]], %[[VAL_16]] : (index, index) -> !fir.shape<2> +// CHECK: %[[VAL_18:.*]]:2 = hlfir.declare %[[VAL_8]](%[[VAL_17]]) typeparams %[[VAL_7]]#1 dummy_scope %[[VAL_4]] {uniq_name = "_QFeoshift13cEarray"} : (!fir.ref>>, !fir.shape<2>, index, !fir.dscope) -> (!fir.box>>, !fir.ref>>) +// CHECK: %[[VAL_19:.*]] = fir.convert %[[VAL_12]] : (index) -> i64 +// CHECK: %[[VAL_20:.*]] = fir.convert %[[VAL_2]] : (i32) -> i64 +// CHECK: %[[VAL_21:.*]] = hlfir.elemental %[[VAL_17]] typeparams %[[VAL_7]]#1 unordered : (!fir.shape<2>, index) -> !hlfir.expr> { +// CHECK: ^bb0(%[[VAL_22:.*]]: index, %[[VAL_23:.*]]: index): +// CHECK: %[[VAL_24:.*]] = fir.box_elesize %[[VAL_6]]#1 : (!fir.box>>) -> index +// CHECK: %[[VAL_25:.*]] = arith.divsi %[[VAL_24]], %[[VAL_1]] : index +// CHECK: %[[VAL_26:.*]] = hlfir.designate %[[VAL_6]]#0 (%[[VAL_23]]) typeparams %[[VAL_25]] : (!fir.box>>, index, index) -> !fir.boxchar<4> +// CHECK: %[[VAL_27:.*]] = fir.convert %[[VAL_22]] : (index) -> i64 +// CHECK: %[[VAL_28:.*]] = arith.addi %[[VAL_27]], %[[VAL_20]] overflow : i64 +// CHECK: %[[VAL_29:.*]] = arith.cmpi sge, %[[VAL_28]], %[[VAL_0]] : i64 +// CHECK: %[[VAL_30:.*]] = arith.cmpi sle, %[[VAL_28]], %[[VAL_19]] : i64 +// CHECK: %[[VAL_31:.*]] = arith.andi %[[VAL_29]], %[[VAL_30]] : i1 +// CHECK: %[[VAL_32:.*]] = fir.if %[[VAL_31]] -> (!fir.boxchar<4>) { +// CHECK: %[[VAL_33:.*]] = fir.convert %[[VAL_28]] : (i64) -> index +// CHECK: %[[VAL_34:.*]] = hlfir.designate %[[VAL_18]]#0 (%[[VAL_33]], %[[VAL_23]]) typeparams %[[VAL_7]]#1 : (!fir.box>>, index, index, index) -> !fir.boxchar<4> +// CHECK: fir.result %[[VAL_34]] : !fir.boxchar<4> +// CHECK: } else { +// CHECK: fir.result %[[VAL_26]] : !fir.boxchar<4> +// CHECK: } +// CHECK: hlfir.yield_element %[[VAL_32]] : !fir.boxchar<4> +// CHECK: } +// CHECK: hlfir.assign %[[VAL_21]] to %[[VAL_18]]#0 : !hlfir.expr>, !fir.box>> +// CHECK: hlfir.destroy %[[VAL_21]] : !hlfir.expr> +// CHECK: return +// CHECK: } + +// ! Test contiguous 1D array with the array optional boundary. +// ! CHARACTER with constant length. +// subroutine eoshift14c(n, array, boundary) +// integer :: n +// character(10,1) :: array(n,n) +// character(10,1), optional :: boundary(n) +// array = EOSHIFT(array, 2, boundary) +// end subroutine +func.func @_QPeoshift14c(%arg0: !fir.ref {fir.bindc_name = "n"}, %arg1: !fir.boxchar<1> {fir.bindc_name = "array"}, %arg2: !fir.boxchar<1> {fir.bindc_name = "boundary", fir.optional}) { + %c2_i32 = arith.constant 2 : i32 + %c0 = arith.constant 0 : index + %c10 = arith.constant 10 : index + %0 = fir.dummy_scope : !fir.dscope + %1:2 = hlfir.declare %arg0 dummy_scope %0 {uniq_name = "_QFeoshift14cEn"} : (!fir.ref, !fir.dscope) -> (!fir.ref, !fir.ref) + %2:2 = fir.unboxchar %arg1 : (!fir.boxchar<1>) -> (!fir.ref>, index) + %3 = fir.convert %2#0 : (!fir.ref>) -> !fir.ref>> + %4 = fir.load %1#0 : !fir.ref + %5 = fir.convert %4 : (i32) -> index + %6 = arith.cmpi sgt, %5, %c0 : index + %7 = arith.select %6, %5, %c0 : index + %8 = fir.load %1#0 : !fir.ref + %9 = fir.convert %8 : (i32) -> index + %10 = arith.cmpi sgt, %9, %c0 : index + %11 = arith.select %10, %9, %c0 : index + %12 = fir.shape %7, %11 : (index, index) -> !fir.shape<2> + %13:2 = hlfir.declare %3(%12) typeparams %c10 dummy_scope %0 {uniq_name = "_QFeoshift14cEarray"} : (!fir.ref>>, !fir.shape<2>, index, !fir.dscope) -> (!fir.box>>, !fir.ref>>) + %14:2 = fir.unboxchar %arg2 : (!fir.boxchar<1>) -> (!fir.ref>, index) + %15 = fir.convert %14#0 : (!fir.ref>) -> !fir.ref>> + %16 = fir.load %1#0 : !fir.ref + %17 = fir.convert %16 : (i32) -> index + %18 = arith.cmpi sgt, %17, %c0 : index + %19 = arith.select %18, %17, %c0 : index + %20 = fir.shape %19 : (index) -> !fir.shape<1> + %21:2 = hlfir.declare %15(%20) typeparams %c10 dummy_scope %0 {fortran_attrs = #fir.var_attrs, uniq_name = "_QFeoshift14cEboundary"} : (!fir.ref>>, !fir.shape<1>, index, !fir.dscope) -> (!fir.box>>, !fir.ref>>) + %22 = fir.is_present %21#0 : (!fir.box>>) -> i1 + %23 = fir.shape %19 : (index) -> !fir.shape<1> + %24 = fir.embox %21#1(%23) : (!fir.ref>>, !fir.shape<1>) -> !fir.box>> + %25 = fir.absent !fir.box>> + %26 = arith.select %22, %24, %25 : !fir.box>> + %27 = hlfir.eoshift %13#0 %c2_i32 boundary %26 : (!fir.box>>, i32, !fir.box>>) -> !hlfir.expr> + hlfir.assign %27 to %13#0 : !hlfir.expr>, !fir.box>> + hlfir.destroy %27 : !hlfir.expr> + return +} +// CHECK-LABEL: func.func @_QPeoshift14c( +// CHECK-SAME: %[[ARG0:.*]]: !fir.ref {fir.bindc_name = "n"}, +// CHECK-SAME: %[[ARG1:.*]]: !fir.boxchar<1> {fir.bindc_name = "array"}, +// CHECK-SAME: %[[ARG2:.*]]: !fir.boxchar<1> {fir.bindc_name = "boundary", fir.optional}) { +// CHECK: %[[VAL_0:.*]] = arith.constant 1 : i64 +// CHECK: %[[VAL_1:.*]] = arith.constant 1 : index +// CHECK: %[[VAL_2:.*]] = arith.constant false +// CHECK: %[[VAL_3:.*]] = arith.constant true +// CHECK: %[[VAL_4:.*]] = arith.constant 2 : i32 +// CHECK: %[[VAL_5:.*]] = arith.constant 0 : index +// CHECK: %[[VAL_6:.*]] = arith.constant 10 : index +// CHECK: %[[VAL_7:.*]] = fir.dummy_scope : !fir.dscope +// CHECK: %[[VAL_8:.*]]:2 = hlfir.declare %[[ARG0]] dummy_scope %[[VAL_7]] {uniq_name = "_QFeoshift14cEn"} : (!fir.ref, !fir.dscope) -> (!fir.ref, !fir.ref) +// CHECK: %[[VAL_9:.*]]:2 = fir.unboxchar %[[ARG1]] : (!fir.boxchar<1>) -> (!fir.ref>, index) +// CHECK: %[[VAL_10:.*]] = fir.convert %[[VAL_9]]#0 : (!fir.ref>) -> !fir.ref>> +// CHECK: %[[VAL_11:.*]] = fir.load %[[VAL_8]]#0 : !fir.ref +// CHECK: %[[VAL_12:.*]] = fir.convert %[[VAL_11]] : (i32) -> index +// CHECK: %[[VAL_13:.*]] = arith.cmpi sgt, %[[VAL_12]], %[[VAL_5]] : index +// CHECK: %[[VAL_14:.*]] = arith.select %[[VAL_13]], %[[VAL_12]], %[[VAL_5]] : index +// CHECK: %[[VAL_15:.*]] = fir.load %[[VAL_8]]#0 : !fir.ref +// CHECK: %[[VAL_16:.*]] = fir.convert %[[VAL_15]] : (i32) -> index +// CHECK: %[[VAL_17:.*]] = arith.cmpi sgt, %[[VAL_16]], %[[VAL_5]] : index +// CHECK: %[[VAL_18:.*]] = arith.select %[[VAL_17]], %[[VAL_16]], %[[VAL_5]] : index +// CHECK: %[[VAL_19:.*]] = fir.shape %[[VAL_14]], %[[VAL_18]] : (index, index) -> !fir.shape<2> +// CHECK: %[[VAL_20:.*]]:2 = hlfir.declare %[[VAL_10]](%[[VAL_19]]) typeparams %[[VAL_6]] dummy_scope %[[VAL_7]] {uniq_name = "_QFeoshift14cEarray"} : (!fir.ref>>, !fir.shape<2>, index, !fir.dscope) -> (!fir.box>>, !fir.ref>>) +// CHECK: %[[VAL_21:.*]]:2 = fir.unboxchar %[[ARG2]] : (!fir.boxchar<1>) -> (!fir.ref>, index) +// CHECK: %[[VAL_22:.*]] = fir.convert %[[VAL_21]]#0 : (!fir.ref>) -> !fir.ref>> +// CHECK: %[[VAL_23:.*]] = fir.load %[[VAL_8]]#0 : !fir.ref +// CHECK: %[[VAL_24:.*]] = fir.convert %[[VAL_23]] : (i32) -> index +// CHECK: %[[VAL_25:.*]] = arith.cmpi sgt, %[[VAL_24]], %[[VAL_5]] : index +// CHECK: %[[VAL_26:.*]] = arith.select %[[VAL_25]], %[[VAL_24]], %[[VAL_5]] : index +// CHECK: %[[VAL_27:.*]] = fir.shape %[[VAL_26]] : (index) -> !fir.shape<1> +// CHECK: %[[VAL_28:.*]]:2 = hlfir.declare %[[VAL_22]](%[[VAL_27]]) typeparams %[[VAL_6]] dummy_scope %[[VAL_7]] {fortran_attrs = #fir.var_attrs, uniq_name = "_QFeoshift14cEboundary"} : (!fir.ref>>, !fir.shape<1>, index, !fir.dscope) -> (!fir.box>>, !fir.ref>>) +// CHECK: %[[VAL_29:.*]] = fir.is_present %[[VAL_28]]#0 : (!fir.box>>) -> i1 +// CHECK: %[[VAL_30:.*]] = fir.shape %[[VAL_26]] : (index) -> !fir.shape<1> +// CHECK: %[[VAL_31:.*]] = fir.embox %[[VAL_28]]#1(%[[VAL_30]]) : (!fir.ref>>, !fir.shape<1>) -> !fir.box>> +// CHECK: %[[VAL_32:.*]] = fir.absent !fir.box>> +// CHECK: %[[VAL_33:.*]] = arith.select %[[VAL_29]], %[[VAL_31]], %[[VAL_32]] : !fir.box>> +// CHECK: %[[VAL_34:.*]] = fir.convert %[[VAL_14]] : (index) -> i64 +// CHECK: %[[VAL_35:.*]] = fir.convert %[[VAL_4]] : (i32) -> i64 +// CHECK: %[[VAL_36:.*]] = fir.alloca !fir.char<1,0> {bindc_name = ".chrtmp"} +// CHECK: %[[VAL_37:.*]] = fir.emboxchar %[[VAL_36]], %[[VAL_5]] : (!fir.ref>, index) -> !fir.boxchar<1> +// CHECK: %[[VAL_38:.*]] = fir.is_present %[[VAL_33]] : (!fir.box>>) -> i1 +// CHECK: %[[VAL_39:.*]] = arith.select %[[VAL_38]], %[[VAL_2]], %[[VAL_3]] : i1 +// CHECK: %[[VAL_40:.*]] = hlfir.elemental %[[VAL_19]] typeparams %[[VAL_6]] unordered : (!fir.shape<2>, index) -> !hlfir.expr> { +// CHECK: ^bb0(%[[VAL_41:.*]]: index, %[[VAL_42:.*]]: index): +// CHECK: %[[VAL_43:.*]] = fir.if %[[VAL_39]] -> (!fir.boxchar<1>) { +// CHECK: fir.result %[[VAL_37]] : !fir.boxchar<1> +// CHECK: } else { +// CHECK: %[[VAL_44:.*]]:3 = fir.box_dims %[[VAL_33]], %[[VAL_5]] : (!fir.box>>, index) -> (index, index, index) +// CHECK: %[[VAL_45:.*]] = arith.subi %[[VAL_44]]#0, %[[VAL_1]] overflow : index +// CHECK: %[[VAL_46:.*]] = arith.addi %[[VAL_42]], %[[VAL_45]] overflow : index +// CHECK: %[[VAL_47:.*]] = hlfir.designate %[[VAL_33]] (%[[VAL_46]]) typeparams %[[VAL_6]] : (!fir.box>>, index, index) -> !fir.ref> +// CHECK: %[[VAL_48:.*]] = fir.emboxchar %[[VAL_47]], %[[VAL_6]] : (!fir.ref>, index) -> !fir.boxchar<1> +// CHECK: fir.result %[[VAL_48]] : !fir.boxchar<1> +// CHECK: } +// CHECK: %[[VAL_49:.*]] = fir.convert %[[VAL_41]] : (index) -> i64 +// CHECK: %[[VAL_50:.*]] = arith.addi %[[VAL_49]], %[[VAL_35]] overflow : i64 +// CHECK: %[[VAL_51:.*]] = arith.cmpi sge, %[[VAL_50]], %[[VAL_0]] : i64 +// CHECK: %[[VAL_52:.*]] = arith.cmpi sle, %[[VAL_50]], %[[VAL_34]] : i64 +// CHECK: %[[VAL_53:.*]] = arith.andi %[[VAL_51]], %[[VAL_52]] : i1 +// CHECK: %[[VAL_54:.*]] = fir.if %[[VAL_53]] -> (!fir.boxchar<1>) { +// CHECK: %[[VAL_55:.*]] = fir.convert %[[VAL_50]] : (i64) -> index +// CHECK: %[[VAL_56:.*]] = hlfir.designate %[[VAL_20]]#0 (%[[VAL_55]], %[[VAL_42]]) typeparams %[[VAL_6]] : (!fir.box>>, index, index, index) -> !fir.ref> +// CHECK: %[[VAL_57:.*]] = fir.emboxchar %[[VAL_56]], %[[VAL_6]] : (!fir.ref>, index) -> !fir.boxchar<1> +// CHECK: fir.result %[[VAL_57]] : !fir.boxchar<1> +// CHECK: } else { +// CHECK: fir.result %[[VAL_43]] : !fir.boxchar<1> +// CHECK: } +// CHECK: hlfir.yield_element %[[VAL_54]] : !fir.boxchar<1> +// CHECK: } +// CHECK: hlfir.assign %[[VAL_40]] to %[[VAL_20]]#0 : !hlfir.expr>, !fir.box>> +// CHECK: hlfir.destroy %[[VAL_40]] : !hlfir.expr> +// CHECK: return +// CHECK: } + +// ! Test contiguous 1D array with the array optional boundary. +// ! CHARACTER with variable length. +// subroutine eoshift15c(n, array, boundary) +// integer :: n +// character(n,1) :: array(n,n) +// character(n,1), optional :: boundary(n) +// array = EOSHIFT(array, 2, boundary) +// end subroutine +func.func @_QPeoshift15c(%arg0: !fir.ref {fir.bindc_name = "n"}, %arg1: !fir.boxchar<1> {fir.bindc_name = "array"}, %arg2: !fir.boxchar<1> {fir.bindc_name = "boundary", fir.optional}) { + %c2_i32 = arith.constant 2 : i32 + %c0 = arith.constant 0 : index + %c0_i32 = arith.constant 0 : i32 + %0 = fir.dummy_scope : !fir.dscope + %1:2 = hlfir.declare %arg0 dummy_scope %0 {uniq_name = "_QFeoshift15cEn"} : (!fir.ref, !fir.dscope) -> (!fir.ref, !fir.ref) + %2:2 = fir.unboxchar %arg1 : (!fir.boxchar<1>) -> (!fir.ref>, index) + %3 = fir.convert %2#0 : (!fir.ref>) -> !fir.ref>> + %4 = fir.load %1#0 : !fir.ref + %5 = arith.cmpi sgt, %4, %c0_i32 : i32 + %6 = arith.select %5, %4, %c0_i32 : i32 + %7 = fir.load %1#0 : !fir.ref + %8 = fir.convert %7 : (i32) -> index + %9 = arith.cmpi sgt, %8, %c0 : index + %10 = arith.select %9, %8, %c0 : index + %11 = fir.load %1#0 : !fir.ref + %12 = fir.convert %11 : (i32) -> index + %13 = arith.cmpi sgt, %12, %c0 : index + %14 = arith.select %13, %12, %c0 : index + %15 = fir.shape %10, %14 : (index, index) -> !fir.shape<2> + %16:2 = hlfir.declare %3(%15) typeparams %6 dummy_scope %0 {uniq_name = "_QFeoshift15cEarray"} : (!fir.ref>>, !fir.shape<2>, i32, !fir.dscope) -> (!fir.box>>, !fir.ref>>) + %17:2 = fir.unboxchar %arg2 : (!fir.boxchar<1>) -> (!fir.ref>, index) + %18 = fir.convert %17#0 : (!fir.ref>) -> !fir.ref>> + %19 = fir.load %1#0 : !fir.ref + %20 = arith.cmpi sgt, %19, %c0_i32 : i32 + %21 = arith.select %20, %19, %c0_i32 : i32 + %22 = fir.load %1#0 : !fir.ref + %23 = fir.convert %22 : (i32) -> index + %24 = arith.cmpi sgt, %23, %c0 : index + %25 = arith.select %24, %23, %c0 : index + %26 = fir.shape %25 : (index) -> !fir.shape<1> + %27:2 = hlfir.declare %18(%26) typeparams %21 dummy_scope %0 {fortran_attrs = #fir.var_attrs, uniq_name = "_QFeoshift15cEboundary"} : (!fir.ref>>, !fir.shape<1>, i32, !fir.dscope) -> (!fir.box>>, !fir.ref>>) + %28 = fir.is_present %27#0 : (!fir.box>>) -> i1 + %29 = fir.shape %25 : (index) -> !fir.shape<1> + %30 = fir.embox %27#1(%29) typeparams %21 : (!fir.ref>>, !fir.shape<1>, i32) -> !fir.box>> + %31 = fir.absent !fir.box>> + %32 = arith.select %28, %30, %31 : !fir.box>> + %33 = hlfir.eoshift %16#0 %c2_i32 boundary %32 : (!fir.box>>, i32, !fir.box>>) -> !hlfir.expr> + hlfir.assign %33 to %16#0 : !hlfir.expr>, !fir.box>> + hlfir.destroy %33 : !hlfir.expr> + return +} +// CHECK-LABEL: func.func @_QPeoshift15c( +// CHECK-SAME: %[[ARG0:.*]]: !fir.ref {fir.bindc_name = "n"}, +// CHECK-SAME: %[[ARG1:.*]]: !fir.boxchar<1> {fir.bindc_name = "array"}, +// CHECK-SAME: %[[ARG2:.*]]: !fir.boxchar<1> {fir.bindc_name = "boundary", fir.optional}) { +// CHECK: %[[VAL_0:.*]] = arith.constant 1 : i64 +// CHECK: %[[VAL_1:.*]] = arith.constant 1 : index +// CHECK: %[[VAL_2:.*]] = arith.constant false +// CHECK: %[[VAL_3:.*]] = arith.constant true +// CHECK: %[[VAL_4:.*]] = arith.constant 2 : i32 +// CHECK: %[[VAL_5:.*]] = arith.constant 0 : index +// CHECK: %[[VAL_6:.*]] = arith.constant 0 : i32 +// CHECK: %[[VAL_7:.*]] = fir.dummy_scope : !fir.dscope +// CHECK: %[[VAL_8:.*]]:2 = hlfir.declare %[[ARG0]] dummy_scope %[[VAL_7]] {uniq_name = "_QFeoshift15cEn"} : (!fir.ref, !fir.dscope) -> (!fir.ref, !fir.ref) +// CHECK: %[[VAL_9:.*]]:2 = fir.unboxchar %[[ARG1]] : (!fir.boxchar<1>) -> (!fir.ref>, index) +// CHECK: %[[VAL_10:.*]] = fir.convert %[[VAL_9]]#0 : (!fir.ref>) -> !fir.ref>> +// CHECK: %[[VAL_11:.*]] = fir.load %[[VAL_8]]#0 : !fir.ref +// CHECK: %[[VAL_12:.*]] = arith.cmpi sgt, %[[VAL_11]], %[[VAL_6]] : i32 +// CHECK: %[[VAL_13:.*]] = arith.select %[[VAL_12]], %[[VAL_11]], %[[VAL_6]] : i32 +// CHECK: %[[VAL_14:.*]] = fir.load %[[VAL_8]]#0 : !fir.ref +// CHECK: %[[VAL_15:.*]] = fir.convert %[[VAL_14]] : (i32) -> index +// CHECK: %[[VAL_16:.*]] = arith.cmpi sgt, %[[VAL_15]], %[[VAL_5]] : index +// CHECK: %[[VAL_17:.*]] = arith.select %[[VAL_16]], %[[VAL_15]], %[[VAL_5]] : index +// CHECK: %[[VAL_18:.*]] = fir.load %[[VAL_8]]#0 : !fir.ref +// CHECK: %[[VAL_19:.*]] = fir.convert %[[VAL_18]] : (i32) -> index +// CHECK: %[[VAL_20:.*]] = arith.cmpi sgt, %[[VAL_19]], %[[VAL_5]] : index +// CHECK: %[[VAL_21:.*]] = arith.select %[[VAL_20]], %[[VAL_19]], %[[VAL_5]] : index +// CHECK: %[[VAL_22:.*]] = fir.shape %[[VAL_17]], %[[VAL_21]] : (index, index) -> !fir.shape<2> +// CHECK: %[[VAL_23:.*]]:2 = hlfir.declare %[[VAL_10]](%[[VAL_22]]) typeparams %[[VAL_13]] dummy_scope %[[VAL_7]] {uniq_name = "_QFeoshift15cEarray"} : (!fir.ref>>, !fir.shape<2>, i32, !fir.dscope) -> (!fir.box>>, !fir.ref>>) +// CHECK: %[[VAL_24:.*]]:2 = fir.unboxchar %[[ARG2]] : (!fir.boxchar<1>) -> (!fir.ref>, index) +// CHECK: %[[VAL_25:.*]] = fir.convert %[[VAL_24]]#0 : (!fir.ref>) -> !fir.ref>> +// CHECK: %[[VAL_26:.*]] = fir.load %[[VAL_8]]#0 : !fir.ref +// CHECK: %[[VAL_27:.*]] = arith.cmpi sgt, %[[VAL_26]], %[[VAL_6]] : i32 +// CHECK: %[[VAL_28:.*]] = arith.select %[[VAL_27]], %[[VAL_26]], %[[VAL_6]] : i32 +// CHECK: %[[VAL_29:.*]] = fir.load %[[VAL_8]]#0 : !fir.ref +// CHECK: %[[VAL_30:.*]] = fir.convert %[[VAL_29]] : (i32) -> index +// CHECK: %[[VAL_31:.*]] = arith.cmpi sgt, %[[VAL_30]], %[[VAL_5]] : index +// CHECK: %[[VAL_32:.*]] = arith.select %[[VAL_31]], %[[VAL_30]], %[[VAL_5]] : index +// CHECK: %[[VAL_33:.*]] = fir.shape %[[VAL_32]] : (index) -> !fir.shape<1> +// CHECK: %[[VAL_34:.*]]:2 = hlfir.declare %[[VAL_25]](%[[VAL_33]]) typeparams %[[VAL_28]] dummy_scope %[[VAL_7]] {fortran_attrs = #fir.var_attrs, uniq_name = "_QFeoshift15cEboundary"} : (!fir.ref>>, !fir.shape<1>, i32, !fir.dscope) -> (!fir.box>>, !fir.ref>>) +// CHECK: %[[VAL_35:.*]] = fir.is_present %[[VAL_34]]#0 : (!fir.box>>) -> i1 +// CHECK: %[[VAL_36:.*]] = fir.shape %[[VAL_32]] : (index) -> !fir.shape<1> +// CHECK: %[[VAL_37:.*]] = fir.embox %[[VAL_34]]#1(%[[VAL_36]]) typeparams %[[VAL_28]] : (!fir.ref>>, !fir.shape<1>, i32) -> !fir.box>> +// CHECK: %[[VAL_38:.*]] = fir.absent !fir.box>> +// CHECK: %[[VAL_39:.*]] = arith.select %[[VAL_35]], %[[VAL_37]], %[[VAL_38]] : !fir.box>> +// CHECK: %[[VAL_40:.*]] = fir.convert %[[VAL_17]] : (index) -> i64 +// CHECK: %[[VAL_41:.*]] = fir.convert %[[VAL_4]] : (i32) -> i64 +// CHECK: %[[VAL_42:.*]] = fir.alloca !fir.char<1,0> {bindc_name = ".chrtmp"} +// CHECK: %[[VAL_43:.*]] = fir.emboxchar %[[VAL_42]], %[[VAL_5]] : (!fir.ref>, index) -> !fir.boxchar<1> +// CHECK: %[[VAL_44:.*]] = fir.is_present %[[VAL_39]] : (!fir.box>>) -> i1 +// CHECK: %[[VAL_45:.*]] = arith.select %[[VAL_44]], %[[VAL_2]], %[[VAL_3]] : i1 +// CHECK: %[[VAL_46:.*]] = hlfir.elemental %[[VAL_22]] typeparams %[[VAL_13]] unordered : (!fir.shape<2>, i32) -> !hlfir.expr> { +// CHECK: ^bb0(%[[VAL_47:.*]]: index, %[[VAL_48:.*]]: index): +// CHECK: %[[VAL_49:.*]] = fir.if %[[VAL_45]] -> (!fir.boxchar<1>) { +// CHECK: fir.result %[[VAL_43]] : !fir.boxchar<1> +// CHECK: } else { +// CHECK: %[[VAL_50:.*]] = fir.box_elesize %[[VAL_39]] : (!fir.box>>) -> index +// CHECK: %[[VAL_51:.*]]:3 = fir.box_dims %[[VAL_39]], %[[VAL_5]] : (!fir.box>>, index) -> (index, index, index) +// CHECK: %[[VAL_52:.*]] = arith.subi %[[VAL_51]]#0, %[[VAL_1]] overflow : index +// CHECK: %[[VAL_53:.*]] = arith.addi %[[VAL_48]], %[[VAL_52]] overflow : index +// CHECK: %[[VAL_54:.*]] = hlfir.designate %[[VAL_39]] (%[[VAL_53]]) typeparams %[[VAL_50]] : (!fir.box>>, index, index) -> !fir.boxchar<1> +// CHECK: fir.result %[[VAL_54]] : !fir.boxchar<1> +// CHECK: } +// CHECK: %[[VAL_55:.*]] = fir.convert %[[VAL_47]] : (index) -> i64 +// CHECK: %[[VAL_56:.*]] = arith.addi %[[VAL_55]], %[[VAL_41]] overflow : i64 +// CHECK: %[[VAL_57:.*]] = arith.cmpi sge, %[[VAL_56]], %[[VAL_0]] : i64 +// CHECK: %[[VAL_58:.*]] = arith.cmpi sle, %[[VAL_56]], %[[VAL_40]] : i64 +// CHECK: %[[VAL_59:.*]] = arith.andi %[[VAL_57]], %[[VAL_58]] : i1 +// CHECK: %[[VAL_60:.*]] = fir.if %[[VAL_59]] -> (!fir.boxchar<1>) { +// CHECK: %[[VAL_61:.*]] = fir.convert %[[VAL_56]] : (i64) -> index +// CHECK: %[[VAL_62:.*]] = hlfir.designate %[[VAL_23]]#0 (%[[VAL_61]], %[[VAL_48]]) typeparams %[[VAL_13]] : (!fir.box>>, index, index, i32) -> !fir.boxchar<1> +// CHECK: fir.result %[[VAL_62]] : !fir.boxchar<1> +// CHECK: } else { +// CHECK: fir.result %[[VAL_49]] : !fir.boxchar<1> +// CHECK: } +// CHECK: hlfir.yield_element %[[VAL_60]] : !fir.boxchar<1> +// CHECK: } +// CHECK: hlfir.assign %[[VAL_46]] to %[[VAL_23]]#0 : !hlfir.expr>, !fir.box>> +// CHECK: hlfir.destroy %[[VAL_46]] : !hlfir.expr> +// CHECK: return +// CHECK: } + +// ! Test contiguous 1D array with the array optional boundary. +// ! CHARACTER with assumed length. +// subroutine eoshift16c(n, array, boundary) +// integer :: n +// character(*,1) :: array(n,n) +// character(*,1), optional :: boundary(n) +// array = EOSHIFT(array, 2, boundary) +// end subroutine +func.func @_QPeoshift16c(%arg0: !fir.ref {fir.bindc_name = "n"}, %arg1: !fir.boxchar<1> {fir.bindc_name = "array"}, %arg2: !fir.boxchar<1> {fir.bindc_name = "boundary", fir.optional}) { + %c2_i32 = arith.constant 2 : i32 + %c0 = arith.constant 0 : index + %0 = fir.dummy_scope : !fir.dscope + %1:2 = hlfir.declare %arg0 dummy_scope %0 {uniq_name = "_QFeoshift16cEn"} : (!fir.ref, !fir.dscope) -> (!fir.ref, !fir.ref) + %2:2 = fir.unboxchar %arg1 : (!fir.boxchar<1>) -> (!fir.ref>, index) + %3 = fir.convert %2#0 : (!fir.ref>) -> !fir.ref>> + %4 = fir.load %1#0 : !fir.ref + %5 = fir.convert %4 : (i32) -> index + %6 = arith.cmpi sgt, %5, %c0 : index + %7 = arith.select %6, %5, %c0 : index + %8 = fir.load %1#0 : !fir.ref + %9 = fir.convert %8 : (i32) -> index + %10 = arith.cmpi sgt, %9, %c0 : index + %11 = arith.select %10, %9, %c0 : index + %12 = fir.shape %7, %11 : (index, index) -> !fir.shape<2> + %13:2 = hlfir.declare %3(%12) typeparams %2#1 dummy_scope %0 {uniq_name = "_QFeoshift16cEarray"} : (!fir.ref>>, !fir.shape<2>, index, !fir.dscope) -> (!fir.box>>, !fir.ref>>) + %14:2 = fir.unboxchar %arg2 : (!fir.boxchar<1>) -> (!fir.ref>, index) + %15 = fir.convert %14#0 : (!fir.ref>) -> !fir.ref>> + %16 = fir.load %1#0 : !fir.ref + %17 = fir.convert %16 : (i32) -> index + %18 = arith.cmpi sgt, %17, %c0 : index + %19 = arith.select %18, %17, %c0 : index + %20 = fir.shape %19 : (index) -> !fir.shape<1> + %21:2 = hlfir.declare %15(%20) typeparams %14#1 dummy_scope %0 {fortran_attrs = #fir.var_attrs, uniq_name = "_QFeoshift16cEboundary"} : (!fir.ref>>, !fir.shape<1>, index, !fir.dscope) -> (!fir.box>>, !fir.ref>>) + %22 = fir.is_present %21#0 : (!fir.box>>) -> i1 + %23 = fir.shape %19 : (index) -> !fir.shape<1> + %24 = fir.embox %21#1(%23) typeparams %14#1 : (!fir.ref>>, !fir.shape<1>, index) -> !fir.box>> + %25 = fir.absent !fir.box>> + %26 = arith.select %22, %24, %25 : !fir.box>> + %27 = hlfir.eoshift %13#0 %c2_i32 boundary %26 : (!fir.box>>, i32, !fir.box>>) -> !hlfir.expr> + hlfir.assign %27 to %13#0 : !hlfir.expr>, !fir.box>> + hlfir.destroy %27 : !hlfir.expr> + return +} +// CHECK-LABEL: func.func @_QPeoshift16c( +// CHECK-SAME: %[[ARG0:.*]]: !fir.ref {fir.bindc_name = "n"}, +// CHECK-SAME: %[[ARG1:.*]]: !fir.boxchar<1> {fir.bindc_name = "array"}, +// CHECK-SAME: %[[ARG2:.*]]: !fir.boxchar<1> {fir.bindc_name = "boundary", fir.optional}) { +// CHECK: %[[VAL_0:.*]] = arith.constant 1 : i64 +// CHECK: %[[VAL_1:.*]] = arith.constant 1 : index +// CHECK: %[[VAL_2:.*]] = arith.constant false +// CHECK: %[[VAL_3:.*]] = arith.constant true +// CHECK: %[[VAL_4:.*]] = arith.constant 2 : i32 +// CHECK: %[[VAL_5:.*]] = arith.constant 0 : index +// CHECK: %[[VAL_6:.*]] = fir.dummy_scope : !fir.dscope +// CHECK: %[[VAL_7:.*]]:2 = hlfir.declare %[[ARG0]] dummy_scope %[[VAL_6]] {uniq_name = "_QFeoshift16cEn"} : (!fir.ref, !fir.dscope) -> (!fir.ref, !fir.ref) +// CHECK: %[[VAL_8:.*]]:2 = fir.unboxchar %[[ARG1]] : (!fir.boxchar<1>) -> (!fir.ref>, index) +// CHECK: %[[VAL_9:.*]] = fir.convert %[[VAL_8]]#0 : (!fir.ref>) -> !fir.ref>> +// CHECK: %[[VAL_10:.*]] = fir.load %[[VAL_7]]#0 : !fir.ref +// CHECK: %[[VAL_11:.*]] = fir.convert %[[VAL_10]] : (i32) -> index +// CHECK: %[[VAL_12:.*]] = arith.cmpi sgt, %[[VAL_11]], %[[VAL_5]] : index +// CHECK: %[[VAL_13:.*]] = arith.select %[[VAL_12]], %[[VAL_11]], %[[VAL_5]] : index +// CHECK: %[[VAL_14:.*]] = fir.load %[[VAL_7]]#0 : !fir.ref +// CHECK: %[[VAL_15:.*]] = fir.convert %[[VAL_14]] : (i32) -> index +// CHECK: %[[VAL_16:.*]] = arith.cmpi sgt, %[[VAL_15]], %[[VAL_5]] : index +// CHECK: %[[VAL_17:.*]] = arith.select %[[VAL_16]], %[[VAL_15]], %[[VAL_5]] : index +// CHECK: %[[VAL_18:.*]] = fir.shape %[[VAL_13]], %[[VAL_17]] : (index, index) -> !fir.shape<2> +// CHECK: %[[VAL_19:.*]]:2 = hlfir.declare %[[VAL_9]](%[[VAL_18]]) typeparams %[[VAL_8]]#1 dummy_scope %[[VAL_6]] {uniq_name = "_QFeoshift16cEarray"} : (!fir.ref>>, !fir.shape<2>, index, !fir.dscope) -> (!fir.box>>, !fir.ref>>) +// CHECK: %[[VAL_20:.*]]:2 = fir.unboxchar %[[ARG2]] : (!fir.boxchar<1>) -> (!fir.ref>, index) +// CHECK: %[[VAL_21:.*]] = fir.convert %[[VAL_20]]#0 : (!fir.ref>) -> !fir.ref>> +// CHECK: %[[VAL_22:.*]] = fir.load %[[VAL_7]]#0 : !fir.ref +// CHECK: %[[VAL_23:.*]] = fir.convert %[[VAL_22]] : (i32) -> index +// CHECK: %[[VAL_24:.*]] = arith.cmpi sgt, %[[VAL_23]], %[[VAL_5]] : index +// CHECK: %[[VAL_25:.*]] = arith.select %[[VAL_24]], %[[VAL_23]], %[[VAL_5]] : index +// CHECK: %[[VAL_26:.*]] = fir.shape %[[VAL_25]] : (index) -> !fir.shape<1> +// CHECK: %[[VAL_27:.*]]:2 = hlfir.declare %[[VAL_21]](%[[VAL_26]]) typeparams %[[VAL_20]]#1 dummy_scope %[[VAL_6]] {fortran_attrs = #fir.var_attrs, uniq_name = "_QFeoshift16cEboundary"} : (!fir.ref>>, !fir.shape<1>, index, !fir.dscope) -> (!fir.box>>, !fir.ref>>) +// CHECK: %[[VAL_28:.*]] = fir.is_present %[[VAL_27]]#0 : (!fir.box>>) -> i1 +// CHECK: %[[VAL_29:.*]] = fir.shape %[[VAL_25]] : (index) -> !fir.shape<1> +// CHECK: %[[VAL_30:.*]] = fir.embox %[[VAL_27]]#1(%[[VAL_29]]) typeparams %[[VAL_20]]#1 : (!fir.ref>>, !fir.shape<1>, index) -> !fir.box>> +// CHECK: %[[VAL_31:.*]] = fir.absent !fir.box>> +// CHECK: %[[VAL_32:.*]] = arith.select %[[VAL_28]], %[[VAL_30]], %[[VAL_31]] : !fir.box>> +// CHECK: %[[VAL_33:.*]] = fir.convert %[[VAL_13]] : (index) -> i64 +// CHECK: %[[VAL_34:.*]] = fir.convert %[[VAL_4]] : (i32) -> i64 +// CHECK: %[[VAL_35:.*]] = fir.alloca !fir.char<1,0> {bindc_name = ".chrtmp"} +// CHECK: %[[VAL_36:.*]] = fir.emboxchar %[[VAL_35]], %[[VAL_5]] : (!fir.ref>, index) -> !fir.boxchar<1> +// CHECK: %[[VAL_37:.*]] = fir.is_present %[[VAL_32]] : (!fir.box>>) -> i1 +// CHECK: %[[VAL_38:.*]] = arith.select %[[VAL_37]], %[[VAL_2]], %[[VAL_3]] : i1 +// CHECK: %[[VAL_39:.*]] = hlfir.elemental %[[VAL_18]] typeparams %[[VAL_8]]#1 unordered : (!fir.shape<2>, index) -> !hlfir.expr> { +// CHECK: ^bb0(%[[VAL_40:.*]]: index, %[[VAL_41:.*]]: index): +// CHECK: %[[VAL_42:.*]] = fir.if %[[VAL_38]] -> (!fir.boxchar<1>) { +// CHECK: fir.result %[[VAL_36]] : !fir.boxchar<1> +// CHECK: } else { +// CHECK: %[[VAL_43:.*]] = fir.box_elesize %[[VAL_32]] : (!fir.box>>) -> index +// CHECK: %[[VAL_44:.*]]:3 = fir.box_dims %[[VAL_32]], %[[VAL_5]] : (!fir.box>>, index) -> (index, index, index) +// CHECK: %[[VAL_45:.*]] = arith.subi %[[VAL_44]]#0, %[[VAL_1]] overflow : index +// CHECK: %[[VAL_46:.*]] = arith.addi %[[VAL_41]], %[[VAL_45]] overflow : index +// CHECK: %[[VAL_47:.*]] = hlfir.designate %[[VAL_32]] (%[[VAL_46]]) typeparams %[[VAL_43]] : (!fir.box>>, index, index) -> !fir.boxchar<1> +// CHECK: fir.result %[[VAL_47]] : !fir.boxchar<1> +// CHECK: } +// CHECK: %[[VAL_48:.*]] = fir.convert %[[VAL_40]] : (index) -> i64 +// CHECK: %[[VAL_49:.*]] = arith.addi %[[VAL_48]], %[[VAL_34]] overflow : i64 +// CHECK: %[[VAL_50:.*]] = arith.cmpi sge, %[[VAL_49]], %[[VAL_0]] : i64 +// CHECK: %[[VAL_51:.*]] = arith.cmpi sle, %[[VAL_49]], %[[VAL_33]] : i64 +// CHECK: %[[VAL_52:.*]] = arith.andi %[[VAL_50]], %[[VAL_51]] : i1 +// CHECK: %[[VAL_53:.*]] = fir.if %[[VAL_52]] -> (!fir.boxchar<1>) { +// CHECK: %[[VAL_54:.*]] = fir.convert %[[VAL_49]] : (i64) -> index +// CHECK: %[[VAL_55:.*]] = hlfir.designate %[[VAL_19]]#0 (%[[VAL_54]], %[[VAL_41]]) typeparams %[[VAL_8]]#1 : (!fir.box>>, index, index, index) -> !fir.boxchar<1> +// CHECK: fir.result %[[VAL_55]] : !fir.boxchar<1> +// CHECK: } else { +// CHECK: fir.result %[[VAL_42]] : !fir.boxchar<1> +// CHECK: } +// CHECK: hlfir.yield_element %[[VAL_53]] : !fir.boxchar<1> +// CHECK: } +// CHECK: hlfir.assign %[[VAL_39]] to %[[VAL_19]]#0 : !hlfir.expr>, !fir.box>> +// CHECK: hlfir.destroy %[[VAL_39]] : !hlfir.expr> +// CHECK: return +// CHECK: } + +// ! TODO: ARRAY or/and BOUNDARY are expressions of CHARACTER type. +// ! Test contiguous 1D array with the array expression boundary. +// ! CHARACTER with constant length. +// subroutine eoshift17c(n, array) +// interface +// function charc_boundary(n) +// integer :: n +// character(10,1) :: charc_boundary(n) +// end function +// end interface +// integer :: n +// character(10,1) :: array(n,n) +// array = EOSHIFT(array//array, 2, charc_boundary(n)) +// end subroutine +func.func @_QPeoshift17c(%arg0: !fir.ref {fir.bindc_name = "n"}, %arg1: !fir.boxchar<1> {fir.bindc_name = "array"}) { + %c20 = arith.constant 20 : index + %c2_i32 = arith.constant 2 : i32 + %c0 = arith.constant 0 : index + %c10 = arith.constant 10 : index + %0 = fir.dummy_scope : !fir.dscope + %1:2 = hlfir.declare %arg0 dummy_scope %0 {uniq_name = "_QFeoshift17cEn"} : (!fir.ref, !fir.dscope) -> (!fir.ref, !fir.ref) + %2:2 = fir.unboxchar %arg1 : (!fir.boxchar<1>) -> (!fir.ref>, index) + %3 = fir.convert %2#0 : (!fir.ref>) -> !fir.ref>> + %4 = fir.load %1#0 : !fir.ref + %5 = fir.convert %4 : (i32) -> index + %6 = arith.cmpi sgt, %5, %c0 : index + %7 = arith.select %6, %5, %c0 : index + %8 = fir.load %1#0 : !fir.ref + %9 = fir.convert %8 : (i32) -> index + %10 = arith.cmpi sgt, %9, %c0 : index + %11 = arith.select %10, %9, %c0 : index + %12 = fir.shape %7, %11 : (index, index) -> !fir.shape<2> + %13:2 = hlfir.declare %3(%12) typeparams %c10 dummy_scope %0 {uniq_name = "_QFeoshift17cEarray"} : (!fir.ref>>, !fir.shape<2>, index, !fir.dscope) -> (!fir.box>>, !fir.ref>>) + %14 = hlfir.elemental %12 typeparams %c20 unordered : (!fir.shape<2>, index) -> !hlfir.expr> { + ^bb0(%arg2: index, %arg3: index): + %23 = hlfir.designate %13#0 (%arg2, %arg3) typeparams %c10 : (!fir.box>>, index, index, index) -> !fir.ref> + %24 = hlfir.designate %13#0 (%arg2, %arg3) typeparams %c10 : (!fir.box>>, index, index, index) -> !fir.ref> + %25 = hlfir.concat %23, %24 len %c20 : (!fir.ref>, !fir.ref>, index) -> !hlfir.expr> + hlfir.yield_element %25 : !hlfir.expr> + } + %15:2 = hlfir.declare %1#0 {uniq_name = "_QFeoshift17cFcharc_boundaryEn"} : (!fir.ref) -> (!fir.ref, !fir.ref) + %16 = fir.load %15#0 : !fir.ref + %17 = fir.convert %16 : (i32) -> index + %18 = arith.cmpi sgt, %17, %c0 : index + %19 = arith.select %18, %17, %c0 : index + %20 = fir.shape %19 : (index) -> !fir.shape<1> + %21 = hlfir.eval_in_mem shape %20 typeparams %c10 : (!fir.shape<1>, index) -> !hlfir.expr> { + ^bb0(%arg2: !fir.ref>>): + %23 = fir.call @_QPcharc_boundary(%1#0) fastmath : (!fir.ref) -> !fir.array> + fir.save_result %23 to %arg2(%20) typeparams %c10 : !fir.array>, !fir.ref>>, !fir.shape<1>, index + } + %22 = hlfir.eoshift %14 %c2_i32 boundary %21 : (!hlfir.expr>, i32, !hlfir.expr>) -> !hlfir.expr> + hlfir.assign %22 to %13#0 : !hlfir.expr>, !fir.box>> + hlfir.destroy %22 : !hlfir.expr> + hlfir.destroy %21 : !hlfir.expr> + hlfir.destroy %14 : !hlfir.expr> + return +} +// CHECK-LABEL: func.func @_QPeoshift17c( +// CHECK: hlfir.eoshift + +// ! Tests for derived types. + +// ! TODO: selecting between !fir.ref> and !fir.box> +// ! is not implemented. +// ! Test contiguous 1D array with the scalar optional boundary. +// subroutine eoshift1d(n, array, boundary) +// use eoshift_types +// integer :: n +// type(t) :: array(n) +// type(t), optional :: boundary +// array = EOSHIFT(array, 2, boundary) +// end subroutine +func.func @_QPeoshift1d(%arg0: !fir.ref {fir.bindc_name = "n"}, %arg1: !fir.ref>> {fir.bindc_name = "array"}, %arg2: !fir.ref> {fir.bindc_name = "boundary", fir.optional}) { + %c2_i32 = arith.constant 2 : i32 + %c0 = arith.constant 0 : index + %0 = fir.dummy_scope : !fir.dscope + %1:2 = hlfir.declare %arg0 dummy_scope %0 {uniq_name = "_QFeoshift1dEn"} : (!fir.ref, !fir.dscope) -> (!fir.ref, !fir.ref) + %2:2 = hlfir.declare %arg2 dummy_scope %0 {fortran_attrs = #fir.var_attrs, uniq_name = "_QFeoshift1dEboundary"} : (!fir.ref>, !fir.dscope) -> (!fir.ref>, !fir.ref>) + %3 = fir.load %1#0 : !fir.ref + %4 = fir.convert %3 : (i32) -> index + %5 = arith.cmpi sgt, %4, %c0 : index + %6 = arith.select %5, %4, %c0 : index + %7 = fir.shape %6 : (index) -> !fir.shape<1> + %8:2 = hlfir.declare %arg1(%7) dummy_scope %0 {uniq_name = "_QFeoshift1dEarray"} : (!fir.ref>>, !fir.shape<1>, !fir.dscope) -> (!fir.box>>, !fir.ref>>) + %9 = fir.is_present %2#0 : (!fir.ref>) -> i1 + %10 = fir.embox %2#0 : (!fir.ref>) -> !fir.box> + %11 = fir.absent !fir.box> + %12 = arith.select %9, %10, %11 : !fir.box> + %13 = hlfir.eoshift %8#0 %c2_i32 boundary %12 : (!fir.box>>, i32, !fir.box>) -> !hlfir.expr> + hlfir.assign %13 to %8#0 : !hlfir.expr>, !fir.box>> + hlfir.destroy %13 : !hlfir.expr> + return +} +// CHECK-LABEL: func.func @_QPeoshift1d( +// CHECK: hlfir.eoshift From f1fc50748aee471daa9e51eaf61e9e853f11f0c7 Mon Sep 17 00:00:00 2001 From: Stanislav Mekhanoshin Date: Fri, 15 Aug 2025 15:23:06 -0700 Subject: [PATCH 030/214] [AMDGPU] w/a hazard with writing s102/103 and reading FLAT_SCRATCH_BASE (#153878) --- .../lib/Target/AMDGPU/GCNHazardRecognizer.cpp | 78 +++ llvm/lib/Target/AMDGPU/GCNHazardRecognizer.h | 1 + llvm/lib/Target/AMDGPU/GCNSubtarget.h | 6 + llvm/test/CodeGen/AMDGPU/hazards-gfx1250.mir | 478 ++++++++++++++++++ 4 files changed, 563 insertions(+) diff --git a/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp b/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp index 5e297c7540c4..dd7c1914d344 100644 --- a/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp +++ b/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp @@ -1204,6 +1204,8 @@ void GCNHazardRecognizer::fixHazards(MachineInstr *MI) { fixGetRegWaitIdle(MI); if (ST.hasDsAtomicAsyncBarrierArriveB64PipeBug()) fixDsAtomicAsyncBarrierArriveB64(MI); + if (ST.hasScratchBaseForwardingHazard()) + fixScratchBaseForwardingHazard(MI); } static bool isVCmpXWritesExec(const SIInstrInfo &TII, const SIRegisterInfo &TRI, @@ -3468,3 +3470,79 @@ bool GCNHazardRecognizer::fixDsAtomicAsyncBarrierArriveB64(MachineInstr *MI) { return true; } + +bool GCNHazardRecognizer::fixScratchBaseForwardingHazard(MachineInstr *MI) { + // No reason to check this in pre-RA scheduling, SGPRs have to be allocated + // for hazard to trigger. + if (!IsHazardRecognizerMode) + return false; + + const SIRegisterInfo *TRI = ST.getRegisterInfo(); + const SIInstrInfo *TII = ST.getInstrInfo(); + // Hazard expires after 10 SGPR writes by SALU or 8 SGPR writes by VALU. + const int FlatScrBaseWaitStates = 10; + + bool ReadsFlatScrLo = + MI->readsRegister(AMDGPU::SRC_FLAT_SCRATCH_BASE_LO, TRI); + bool ReadsFlatScrHi = + MI->readsRegister(AMDGPU::SRC_FLAT_SCRATCH_BASE_HI, TRI); + if (isSGetReg(MI->getOpcode())) { + switch (getHWReg(TII, *MI)) { + default: + break; + case AMDGPU::Hwreg::ID_FLAT_SCR_LO: + ReadsFlatScrLo = true; + break; + case AMDGPU::Hwreg::ID_FLAT_SCR_HI: + ReadsFlatScrHi = true; + break; + } + } + + const MachineRegisterInfo &MRI = MF.getRegInfo(); + + auto IsRegDefHazard = [&](Register Reg) -> bool { + DenseSet Visited; + auto IsHazardFn = [TRI, Reg](const MachineInstr &MI) { + return MI.modifiesRegister(Reg, TRI); + }; + + // This literally abuses the idea of waitstates. Instead of waitstates it + // returns 1 for SGPR written and 0 otherwise. + auto IsSGPRDef = [TII, TRI, &MRI](const MachineInstr &MI) -> unsigned { + if (!TII->isSALU(MI) && !TII->isVALU(MI)) + return 0; + for (const MachineOperand &MO : MI.all_defs()) { + if (TRI->isSGPRReg(MRI, MO.getReg())) + return 1; + } + return 0; + }; + + auto IsExpiredFn = [=](const MachineInstr &MI, int SgprWrites) { + if (MI.getOpcode() == AMDGPU::S_WAITCNT_DEPCTR) { + unsigned Wait = MI.getOperand(0).getImm(); + if (AMDGPU::DepCtr::decodeFieldSaSdst(Wait) == 0 && + AMDGPU::DepCtr::decodeFieldVaSdst(Wait) == 0) + return true; + } + return SgprWrites >= FlatScrBaseWaitStates; + }; + + return ::getWaitStatesSince( + IsHazardFn, MI->getParent(), std::next(MI->getReverseIterator()), + 0, IsExpiredFn, Visited, IsSGPRDef) < FlatScrBaseWaitStates; + }; + + if ((!ReadsFlatScrLo || MRI.isConstantPhysReg(AMDGPU::SGPR102) || + !IsRegDefHazard(AMDGPU::SGPR102)) && + (!ReadsFlatScrHi || MRI.isConstantPhysReg(AMDGPU::SGPR103) || + !IsRegDefHazard(AMDGPU::SGPR103))) + return false; + + BuildMI(*MI->getParent(), MI, MI->getDebugLoc(), + TII->get(AMDGPU::S_WAITCNT_DEPCTR)) + .addImm(AMDGPU::DepCtr::encodeFieldVaSdst( + AMDGPU::DepCtr::encodeFieldSaSdst(0), 0)); + return true; +} diff --git a/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.h b/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.h index 890d5cbd154d..e0982b46424b 100644 --- a/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.h +++ b/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.h @@ -112,6 +112,7 @@ private: bool fixRequiredExportPriority(MachineInstr *MI); bool fixGetRegWaitIdle(MachineInstr *MI); bool fixDsAtomicAsyncBarrierArriveB64(MachineInstr *MI); + bool fixScratchBaseForwardingHazard(MachineInstr *MI); int checkMAIHazards(MachineInstr *MI); int checkMAIHazards908(MachineInstr *MI); diff --git a/llvm/lib/Target/AMDGPU/GCNSubtarget.h b/llvm/lib/Target/AMDGPU/GCNSubtarget.h index 436f5c0801fa..404a476a3076 100644 --- a/llvm/lib/Target/AMDGPU/GCNSubtarget.h +++ b/llvm/lib/Target/AMDGPU/GCNSubtarget.h @@ -1821,6 +1821,12 @@ public: bool hasDsAtomicAsyncBarrierArriveB64PipeBug() const { return getGeneration() == GFX12; } + + // Requires s_wait_alu(0) after s102/s103 write and src_flat_scratch_base + // read. + bool hasScratchBaseForwardingHazard() const { + return GFX1250Insts && getGeneration() == GFX12; + } }; class GCNUserSGPRUsageInfo { diff --git a/llvm/test/CodeGen/AMDGPU/hazards-gfx1250.mir b/llvm/test/CodeGen/AMDGPU/hazards-gfx1250.mir index f1dbabf1e1a8..f4596b0832d9 100644 --- a/llvm/test/CodeGen/AMDGPU/hazards-gfx1250.mir +++ b/llvm/test/CodeGen/AMDGPU/hazards-gfx1250.mir @@ -15,3 +15,481 @@ body: | ; GCN-NEXT: S_WAITCNT_DEPCTR 65507 DS_ATOMIC_ASYNC_BARRIER_ARRIVE_B64 $vgpr1, 0, 0, implicit-def $asynccnt, implicit $asynccnt, implicit $exec ... + +--- +name: write_s102_read_flat_scr_base_lo +tracksRegLiveness: true +body: | + bb.0: + liveins: $vgpr0 + ; GCN-LABEL: name: write_s102_read_flat_scr_base_lo + ; GCN: liveins: $vgpr0 + ; GCN-NEXT: {{ $}} + ; GCN-NEXT: $sgpr102 = S_MOV_B32 0 + ; GCN-NEXT: S_WAITCNT_DEPCTR 61950 + ; GCN-NEXT: $vgpr0 = V_ADD_U32_e32 $src_flat_scratch_base_lo, $vgpr0, implicit $exec + $sgpr102 = S_MOV_B32 0 + $vgpr0 = V_ADD_U32_e32 $src_flat_scratch_base_lo, $vgpr0, implicit $exec +... + +--- +name: write_s103_read_flat_scr_base_hi +tracksRegLiveness: true +body: | + bb.0: + liveins: $vgpr0 + ; GCN-LABEL: name: write_s103_read_flat_scr_base_hi + ; GCN: liveins: $vgpr0 + ; GCN-NEXT: {{ $}} + ; GCN-NEXT: $sgpr103 = S_MOV_B32 0 + ; GCN-NEXT: S_WAITCNT_DEPCTR 61950 + ; GCN-NEXT: $vgpr0 = V_ADD_U32_e32 $src_flat_scratch_base_hi, $vgpr0, implicit $exec + $sgpr103 = S_MOV_B32 0 + $vgpr0 = V_ADD_U32_e32 $src_flat_scratch_base_hi, $vgpr0, implicit $exec +... + +--- +name: write_s102_read_flat_scr_base +tracksRegLiveness: true +body: | + bb.0: + ; GCN-LABEL: name: write_s102_read_flat_scr_base + ; GCN: $sgpr102 = S_MOV_B32 0 + ; GCN-NEXT: S_WAITCNT_DEPCTR 61950 + ; GCN-NEXT: $sgpr0_sgpr1 = S_MOV_B64 $src_flat_scratch_base + $sgpr102 = S_MOV_B32 0 + $sgpr0_sgpr1 = S_MOV_B64 $src_flat_scratch_base +... + +--- +name: write_s103_read_flat_scr_base +tracksRegLiveness: true +body: | + bb.0: + ; GCN-LABEL: name: write_s103_read_flat_scr_base + ; GCN: $sgpr103 = S_MOV_B32 0 + ; GCN-NEXT: S_WAITCNT_DEPCTR 61950 + ; GCN-NEXT: $sgpr0_sgpr1 = S_MOV_B64 $src_flat_scratch_base + $sgpr103 = S_MOV_B32 0 + $sgpr0_sgpr1 = S_MOV_B64 $src_flat_scratch_base +... + +--- +name: write_s102_s103_read_flat_scr_base +tracksRegLiveness: true +body: | + bb.0: + ; GCN-LABEL: name: write_s102_s103_read_flat_scr_base + ; GCN: $sgpr102_sgpr103 = S_MOV_B64 0 + ; GCN-NEXT: S_WAITCNT_DEPCTR 61950 + ; GCN-NEXT: $sgpr0_sgpr1 = S_MOV_B64 $src_flat_scratch_base + $sgpr102_sgpr103 = S_MOV_B64 0 + $sgpr0_sgpr1 = S_MOV_B64 $src_flat_scratch_base +... + +--- +name: write_s102_getreg_flat_scr_base_lo +tracksRegLiveness: true +body: | + bb.0: + liveins: $vgpr0 + ; GCN-LABEL: name: write_s102_getreg_flat_scr_base_lo + ; GCN: liveins: $vgpr0 + ; GCN-NEXT: {{ $}} + ; GCN-NEXT: $sgpr102 = S_MOV_B32 0 + ; GCN-NEXT: S_WAITCNT_DEPCTR 61950 + ; GCN-NEXT: $sgpr1 = S_GETREG_B32 20, implicit $mode + $sgpr102 = S_MOV_B32 0 + $sgpr1 = S_GETREG_B32 20, implicit $mode +... + +--- +name: write_s103_getreg_flat_scr_base_hi +tracksRegLiveness: true +body: | + bb.0: + liveins: $vgpr0 + ; GCN-LABEL: name: write_s103_getreg_flat_scr_base_hi + ; GCN: liveins: $vgpr0 + ; GCN-NEXT: {{ $}} + ; GCN-NEXT: $sgpr103 = S_MOV_B32 0 + ; GCN-NEXT: S_WAITCNT_DEPCTR 61950 + ; GCN-NEXT: $sgpr1 = S_GETREG_B32 21, implicit $mode + $sgpr103 = S_MOV_B32 0 + $sgpr1 = S_GETREG_B32 21, implicit $mode +... + +--- +name: write_s102_s103_getreg_flat_scr_base_hi +tracksRegLiveness: true +body: | + bb.0: + ; GCN-LABEL: name: write_s102_s103_getreg_flat_scr_base_hi + ; GCN: $sgpr102_sgpr103 = S_MOV_B64 0 + ; GCN-NEXT: S_WAITCNT_DEPCTR 61950 + ; GCN-NEXT: $sgpr1 = S_GETREG_B32 21, implicit $mode + $sgpr102_sgpr103 = S_MOV_B64 0 + $sgpr1 = S_GETREG_B32 21, implicit $mode +... + +--- +name: write_s102_read_flat_scr_base_lo_9_salu_valu +tracksRegLiveness: true +body: | + bb.0: + liveins: $vgpr0 + ; GCN-LABEL: name: write_s102_read_flat_scr_base_lo_9_salu_valu + ; GCN: liveins: $vgpr0 + ; GCN-NEXT: {{ $}} + ; GCN-NEXT: $sgpr102 = S_MOV_B32 0 + ; GCN-NEXT: $sgpr0 = S_MOV_B32 0 + ; GCN-NEXT: $sgpr1 = S_MOV_B32 0 + ; GCN-NEXT: $sgpr2_sgpr3 = S_MOV_B64 0 + ; GCN-NEXT: $vgpr1, $vcc_lo = V_ADDC_U32_e64 0, $vgpr0, $sgpr0, 0, implicit $exec + ; GCN-NEXT: $vgpr2 = V_ADDC_U32_e32 $vgpr0, $vgpr0, implicit-def $vcc_lo, implicit $vcc_lo, implicit $exec + ; GCN-NEXT: S_NOP 0 + ; GCN-NEXT: $vgpr3 = DS_READ_B32 $vgpr0, 0, 0, implicit $m0, implicit $exec + ; GCN-NEXT: $sgpr4 = S_MOV_B32 0 + ; GCN-NEXT: $sgpr5 = S_MOV_B32 0 + ; GCN-NEXT: $sgpr6 = S_MOV_B32 0 + ; GCN-NEXT: $sgpr7 = S_MOV_B32 0 + ; GCN-NEXT: $sgpr8_sgpr9 = S_LOAD_DWORDX2_IMM renamable $sgpr4_sgpr5, 0, 0 + ; GCN-NEXT: S_WAITCNT_DEPCTR 61950 + ; GCN-NEXT: $vgpr0 = V_ADD_U32_e32 $src_flat_scratch_base_lo, $vgpr0, implicit $exec + $sgpr102 = S_MOV_B32 0 + $sgpr0 = S_MOV_B32 0 + $sgpr1 = S_MOV_B32 0 + $sgpr2_sgpr3 = S_MOV_B64 0 + $vgpr1, $vcc_lo = V_ADDC_U32_e64 0, $vgpr0, $sgpr0, 0, implicit $exec + $vgpr2 = V_ADDC_U32_e32 $vgpr0, $vgpr0, implicit-def $vcc, implicit $vcc, implicit $exec + ; NOP does not count because it does not write SGPRs + S_NOP 0 + ; DS_READ_B32 does not count because it is not SALU or VALU + $vgpr3 = DS_READ_B32 $vgpr0, 0, 0, implicit $m0, implicit $exec + $sgpr4 = S_MOV_B32 0 + $sgpr5 = S_MOV_B32 0 + $sgpr6 = S_MOV_B32 0 + $sgpr7 = S_MOV_B32 0 + ; S_LOAD_DWORDX2_IMM does not count because it is not SALU + $sgpr8_sgpr9 = S_LOAD_DWORDX2_IMM renamable $sgpr4_sgpr5, 0, 0 + $vgpr0 = V_ADD_U32_e32 $src_flat_scratch_base_lo, $vgpr0, implicit $exec +... + +--- +name: write_s102_read_flat_scr_base_lo_10_salu_valu_expired +tracksRegLiveness: true +body: | + bb.0: + liveins: $vgpr0 + ; GCN-LABEL: name: write_s102_read_flat_scr_base_lo_10_salu_valu_expired + ; GCN: liveins: $vgpr0 + ; GCN-NEXT: {{ $}} + ; GCN-NEXT: $sgpr102 = S_MOV_B32 0 + ; GCN-NEXT: $sgpr0 = S_MOV_B32 0 + ; GCN-NEXT: $sgpr1 = S_MOV_B32 0 + ; GCN-NEXT: $sgpr2_sgpr3 = S_MOV_B64 0 + ; GCN-NEXT: $vgpr1, $vcc_lo = V_ADDC_U32_e64 0, $vgpr0, $sgpr0, 0, implicit $exec + ; GCN-NEXT: $vgpr2 = V_ADDC_U32_e32 $vgpr0, $vgpr0, implicit-def $vcc_lo, implicit $vcc_lo, implicit $exec + ; GCN-NEXT: S_NOP 0 + ; GCN-NEXT: $vgpr3 = DS_READ_B32 $vgpr0, 0, 0, implicit $m0, implicit $exec + ; GCN-NEXT: $sgpr4 = S_MOV_B32 0 + ; GCN-NEXT: $sgpr5 = S_MOV_B32 0 + ; GCN-NEXT: $sgpr6 = S_MOV_B32 0 + ; GCN-NEXT: $sgpr7 = S_MOV_B32 0 + ; GCN-NEXT: $sgpr8_sgpr9 = S_LOAD_DWORDX2_IMM renamable $sgpr4_sgpr5, 0, 0 + ; GCN-NEXT: $sgpr10 = S_MOV_B32 0 + ; GCN-NEXT: $vgpr0 = V_ADD_U32_e32 $src_flat_scratch_base_lo, $vgpr0, implicit $exec + $sgpr102 = S_MOV_B32 0 + $sgpr0 = S_MOV_B32 0 + $sgpr1 = S_MOV_B32 0 + $sgpr2_sgpr3 = S_MOV_B64 0 + $vgpr1, $vcc_lo = V_ADDC_U32_e64 0, $vgpr0, $sgpr0, 0, implicit $exec + $vgpr2 = V_ADDC_U32_e32 $vgpr0, $vgpr0, implicit-def $vcc, implicit $vcc, implicit $exec + ; NOP does not count because it does not write SGPRs + S_NOP 0 + ; DS_READ_B32 does not count because it is not SALU or VALU + $vgpr3 = DS_READ_B32 $vgpr0, 0, 0, implicit $m0, implicit $exec + $sgpr4 = S_MOV_B32 0 + $sgpr5 = S_MOV_B32 0 + $sgpr6 = S_MOV_B32 0 + $sgpr7 = S_MOV_B32 0 + ; S_LOAD_DWORDX2_IMM does not count because it is not SALU + $sgpr8_sgpr9 = S_LOAD_DWORDX2_IMM renamable $sgpr4_sgpr5, 0, 0 + $sgpr10 = S_MOV_B32 0 + $vgpr0 = V_ADD_U32_e32 $src_flat_scratch_base_lo, $vgpr0, implicit $exec +... + +--- +name: write_s103_read_flat_scr_base_hi_9_salu_valu +tracksRegLiveness: true +body: | + bb.0: + liveins: $vgpr0 + ; GCN-LABEL: name: write_s103_read_flat_scr_base_hi_9_salu_valu + ; GCN: liveins: $vgpr0 + ; GCN-NEXT: {{ $}} + ; GCN-NEXT: $sgpr103 = S_MOV_B32 0 + ; GCN-NEXT: $sgpr0 = S_MOV_B32 0 + ; GCN-NEXT: $sgpr1 = S_MOV_B32 0 + ; GCN-NEXT: $sgpr2_sgpr3 = S_MOV_B64 0 + ; GCN-NEXT: $vgpr1, $vcc_lo = V_ADDC_U32_e64 0, $vgpr0, $sgpr0, 0, implicit $exec + ; GCN-NEXT: $vgpr2 = V_ADDC_U32_e32 $vgpr0, $vgpr0, implicit-def $vcc_lo, implicit $vcc_lo, implicit $exec + ; GCN-NEXT: S_NOP 0 + ; GCN-NEXT: $vgpr3 = DS_READ_B32 $vgpr0, 0, 0, implicit $m0, implicit $exec + ; GCN-NEXT: $sgpr4 = S_MOV_B32 0 + ; GCN-NEXT: $sgpr5 = S_MOV_B32 0 + ; GCN-NEXT: $sgpr6 = S_MOV_B32 0 + ; GCN-NEXT: $sgpr7 = S_MOV_B32 0 + ; GCN-NEXT: $sgpr8_sgpr9 = S_LOAD_DWORDX2_IMM renamable $sgpr4_sgpr5, 0, 0 + ; GCN-NEXT: S_WAITCNT_DEPCTR 61950 + ; GCN-NEXT: $vgpr0 = V_ADD_U32_e32 $src_flat_scratch_base_hi, $vgpr0, implicit $exec + $sgpr103 = S_MOV_B32 0 + $sgpr0 = S_MOV_B32 0 + $sgpr1 = S_MOV_B32 0 + $sgpr2_sgpr3 = S_MOV_B64 0 + $vgpr1, $vcc_lo = V_ADDC_U32_e64 0, $vgpr0, $sgpr0, 0, implicit $exec + $vgpr2 = V_ADDC_U32_e32 $vgpr0, $vgpr0, implicit-def $vcc, implicit $vcc, implicit $exec + ; NOP does not count because it does not write SGPRs + S_NOP 0 + ; DS_READ_B32 does not count because it is not SALU or VALU + $vgpr3 = DS_READ_B32 $vgpr0, 0, 0, implicit $m0, implicit $exec + $sgpr4 = S_MOV_B32 0 + $sgpr5 = S_MOV_B32 0 + $sgpr6 = S_MOV_B32 0 + $sgpr7 = S_MOV_B32 0 + ; S_LOAD_DWORDX2_IMM does not count because it is not SALU + $sgpr8_sgpr9 = S_LOAD_DWORDX2_IMM renamable $sgpr4_sgpr5, 0, 0 + $vgpr0 = V_ADD_U32_e32 $src_flat_scratch_base_hi, $vgpr0, implicit $exec +... + +--- +name: write_s103_read_flat_scr_base_hi_10_salu_valu_expired +tracksRegLiveness: true +body: | + bb.0: + liveins: $vgpr0 + ; GCN-LABEL: name: write_s103_read_flat_scr_base_hi_10_salu_valu_expired + ; GCN: liveins: $vgpr0 + ; GCN-NEXT: {{ $}} + ; GCN-NEXT: $sgpr103 = S_MOV_B32 0 + ; GCN-NEXT: $sgpr0 = S_MOV_B32 0 + ; GCN-NEXT: $sgpr1 = S_MOV_B32 0 + ; GCN-NEXT: $sgpr2_sgpr3 = S_MOV_B64 0 + ; GCN-NEXT: $vgpr1, $vcc_lo = V_ADDC_U32_e64 0, $vgpr0, $sgpr0, 0, implicit $exec + ; GCN-NEXT: $vgpr2 = V_ADDC_U32_e32 $vgpr0, $vgpr0, implicit-def $vcc_lo, implicit $vcc_lo, implicit $exec + ; GCN-NEXT: S_NOP 0 + ; GCN-NEXT: $vgpr3 = DS_READ_B32 $vgpr0, 0, 0, implicit $m0, implicit $exec + ; GCN-NEXT: $sgpr4 = S_MOV_B32 0 + ; GCN-NEXT: $sgpr5 = S_MOV_B32 0 + ; GCN-NEXT: $sgpr6 = S_MOV_B32 0 + ; GCN-NEXT: $sgpr7 = S_MOV_B32 0 + ; GCN-NEXT: $sgpr8_sgpr9 = S_LOAD_DWORDX2_IMM renamable $sgpr4_sgpr5, 0, 0 + ; GCN-NEXT: $sgpr10 = S_MOV_B32 0 + ; GCN-NEXT: $vgpr0 = V_ADD_U32_e32 $src_flat_scratch_base_hi, $vgpr0, implicit $exec + $sgpr103 = S_MOV_B32 0 + $sgpr0 = S_MOV_B32 0 + $sgpr1 = S_MOV_B32 0 + $sgpr2_sgpr3 = S_MOV_B64 0 + $vgpr1, $vcc_lo = V_ADDC_U32_e64 0, $vgpr0, $sgpr0, 0, implicit $exec + $vgpr2 = V_ADDC_U32_e32 $vgpr0, $vgpr0, implicit-def $vcc, implicit $vcc, implicit $exec + ; NOP does not count because it does not write SGPRs + S_NOP 0 + ; DS_READ_B32 does not count because it is not SALU or VALU + $vgpr3 = DS_READ_B32 $vgpr0, 0, 0, implicit $m0, implicit $exec + $sgpr4 = S_MOV_B32 0 + $sgpr5 = S_MOV_B32 0 + $sgpr6 = S_MOV_B32 0 + $sgpr7 = S_MOV_B32 0 + ; S_LOAD_DWORDX2_IMM does not count because it is not SALU + $sgpr8_sgpr9 = S_LOAD_DWORDX2_IMM renamable $sgpr4_sgpr5, 0, 0 + $sgpr10 = S_MOV_B32 0 + $vgpr0 = V_ADD_U32_e32 $src_flat_scratch_base_hi, $vgpr0, implicit $exec +... + +--- +name: write_s102_read_flat_scr_base_hi_no_hazard +tracksRegLiveness: true +body: | + bb.0: + liveins: $vgpr0 + ; GCN-LABEL: name: write_s102_read_flat_scr_base_hi_no_hazard + ; GCN: liveins: $vgpr0 + ; GCN-NEXT: {{ $}} + ; GCN-NEXT: $sgpr102 = S_MOV_B32 0 + ; GCN-NEXT: $vgpr0 = V_ADD_U32_e32 $src_flat_scratch_base_hi, $vgpr0, implicit $exec + $sgpr102 = S_MOV_B32 0 + $vgpr0 = V_ADD_U32_e32 $src_flat_scratch_base_hi, $vgpr0, implicit $exec +... + +--- +name: write_s102_read_flat_scr_base_lo_expired_by_wait0 +tracksRegLiveness: true +body: | + bb.0: + liveins: $vgpr0 + ; GCN-LABEL: name: write_s102_read_flat_scr_base_lo_expired_by_wait0 + ; GCN: liveins: $vgpr0 + ; GCN-NEXT: {{ $}} + ; GCN-NEXT: $sgpr102 = S_MOV_B32 0 + ; GCN-NEXT: S_WAITCNT_DEPCTR 0 + ; GCN-NEXT: S_NOP 0 + ; GCN-NEXT: $vgpr0 = V_ADD_U32_e32 $src_flat_scratch_base_lo, $vgpr0, implicit $exec + $sgpr102 = S_MOV_B32 0 + S_WAITCNT_DEPCTR 0 + S_NOP 0 + $vgpr0 = V_ADD_U32_e32 $src_flat_scratch_base_lo, $vgpr0, implicit $exec +... + +--- +name: write_s102_read_flat_scr_base_lo_expired_by_wait_vs_sdst_sa_sdst +tracksRegLiveness: true +body: | + bb.0: + liveins: $vgpr0 + ; GCN-LABEL: name: write_s102_read_flat_scr_base_lo_expired_by_wait_vs_sdst_sa_sdst + ; GCN: liveins: $vgpr0 + ; GCN-NEXT: {{ $}} + ; GCN-NEXT: $sgpr102 = S_MOV_B32 0 + ; GCN-NEXT: S_WAITCNT_DEPCTR 61950 + ; GCN-NEXT: S_NOP 0 + ; GCN-NEXT: $vgpr0 = V_ADD_U32_e32 $src_flat_scratch_base_lo, $vgpr0, implicit $exec + $sgpr102 = S_MOV_B32 0 + S_WAITCNT_DEPCTR 61950 + S_NOP 0 + $vgpr0 = V_ADD_U32_e32 $src_flat_scratch_base_lo, $vgpr0, implicit $exec +... + +--- +name: write_s102_read_flat_scr_base_lo_not_expired_by_wait_va_sdst_only +tracksRegLiveness: true +body: | + bb.0: + liveins: $vgpr0 + ; GCN-LABEL: name: write_s102_read_flat_scr_base_lo_not_expired_by_wait_va_sdst_only + ; GCN: liveins: $vgpr0 + ; GCN-NEXT: {{ $}} + ; GCN-NEXT: $sgpr102 = S_MOV_B32 0 + ; GCN-NEXT: S_WAITCNT_DEPCTR 61951 + ; GCN-NEXT: S_NOP 0 + ; GCN-NEXT: S_WAITCNT_DEPCTR 61950 + ; GCN-NEXT: $vgpr0 = V_ADD_U32_e32 $src_flat_scratch_base_lo, $vgpr0, implicit $exec + $sgpr102 = S_MOV_B32 0 + S_WAITCNT_DEPCTR 61951 + S_NOP 0 + $vgpr0 = V_ADD_U32_e32 $src_flat_scratch_base_lo, $vgpr0, implicit $exec +... + +--- +name: write_s102_read_flat_scr_base_lo_not_expired_by_wait_sa_sdst_only +tracksRegLiveness: true +body: | + bb.0: + liveins: $vgpr0 + ; GCN-LABEL: name: write_s102_read_flat_scr_base_lo_not_expired_by_wait_sa_sdst_only + ; GCN: liveins: $vgpr0 + ; GCN-NEXT: {{ $}} + ; GCN-NEXT: $sgpr102 = S_MOV_B32 0 + ; GCN-NEXT: S_WAITCNT_DEPCTR 65534 + ; GCN-NEXT: S_NOP 0 + ; GCN-NEXT: S_WAITCNT_DEPCTR 61950 + ; GCN-NEXT: $vgpr0 = V_ADD_U32_e32 $src_flat_scratch_base_lo, $vgpr0, implicit $exec + $sgpr102 = S_MOV_B32 0 + S_WAITCNT_DEPCTR 65534 + S_NOP 0 + $vgpr0 = V_ADD_U32_e32 $src_flat_scratch_base_lo, $vgpr0, implicit $exec +... + +--- +name: write_s102_write_s103_read_flat_scr_base_lo_read_flat_scr_base_hi +tracksRegLiveness: true +body: | + bb.0: + liveins: $vgpr0 + ; GCN-LABEL: name: write_s102_write_s103_read_flat_scr_base_lo_read_flat_scr_base_hi + ; GCN: liveins: $vgpr0 + ; GCN-NEXT: {{ $}} + ; GCN-NEXT: $sgpr102 = S_MOV_B32 0 + ; GCN-NEXT: $sgpr103 = S_MOV_B32 0 + ; GCN-NEXT: S_WAITCNT_DEPCTR 61950 + ; GCN-NEXT: $vgpr0 = V_ADD_U32_e32 $src_flat_scratch_base_lo, $vgpr0, implicit $exec + ; GCN-NEXT: $vgpr1 = V_ADD_U32_e32 $src_flat_scratch_base_hi, $vgpr0, implicit $exec + $sgpr102 = S_MOV_B32 0 + $sgpr103 = S_MOV_B32 0 + $vgpr0 = V_ADD_U32_e32 $src_flat_scratch_base_lo, $vgpr0, implicit $exec + $vgpr1 = V_ADD_U32_e32 $src_flat_scratch_base_hi, $vgpr0, implicit $exec +... + +--- +name: write_s102_read_flat_scr_base_lo_cross_blocks +tracksRegLiveness: true +body: | + ; GCN-LABEL: name: write_s102_read_flat_scr_base_lo_cross_blocks + ; GCN: bb.0: + ; GCN-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000) + ; GCN-NEXT: liveins: $vgpr0, $sgpr0 + ; GCN-NEXT: {{ $}} + ; GCN-NEXT: $sgpr102 = S_ADD_U32 $sgpr0, 0, implicit-def $scc + ; GCN-NEXT: $sgpr1 = S_MOV_B32 0 + ; GCN-NEXT: $sgpr2 = S_MOV_B32 0 + ; GCN-NEXT: $sgpr3 = S_MOV_B32 0 + ; GCN-NEXT: $sgpr4 = S_MOV_B32 0 + ; GCN-NEXT: $sgpr5 = S_MOV_B32 0 + ; GCN-NEXT: $sgpr6 = S_MOV_B32 0 + ; GCN-NEXT: $sgpr7 = S_MOV_B32 0 + ; GCN-NEXT: $sgpr8 = S_MOV_B32 0 + ; GCN-NEXT: S_CBRANCH_SCC0 %bb.2, implicit $scc + ; GCN-NEXT: S_BRANCH %bb.1 + ; GCN-NEXT: {{ $}} + ; GCN-NEXT: bb.1: + ; GCN-NEXT: successors: %bb.2(0x80000000) + ; GCN-NEXT: liveins: $vgpr0 + ; GCN-NEXT: {{ $}} + ; GCN-NEXT: $sgpr102 = S_MOV_B32 0 + ; GCN-NEXT: $sgpr1 = S_MOV_B32 0 + ; GCN-NEXT: $sgpr2 = S_MOV_B32 0 + ; GCN-NEXT: $sgpr3 = S_MOV_B32 0 + ; GCN-NEXT: $sgpr4 = S_MOV_B32 0 + ; GCN-NEXT: $sgpr5 = S_MOV_B32 0 + ; GCN-NEXT: $sgpr6 = S_MOV_B32 0 + ; GCN-NEXT: $sgpr7 = S_MOV_B32 0 + ; GCN-NEXT: $sgpr8 = S_MOV_B32 0 + ; GCN-NEXT: $sgpr9 = S_MOV_B32 0 + ; GCN-NEXT: S_BRANCH %bb.2 + ; GCN-NEXT: {{ $}} + ; GCN-NEXT: bb.2: + ; GCN-NEXT: liveins: $vgpr0 + ; GCN-NEXT: {{ $}} + ; GCN-NEXT: S_WAITCNT_DEPCTR 61950 + ; GCN-NEXT: $vgpr0 = V_ADD_U32_e32 $src_flat_scratch_base_lo, $vgpr0, implicit $exec + bb.0: + liveins: $vgpr0, $sgpr0 + $sgpr102 = S_ADD_U32 $sgpr0, 0, implicit-def $scc + $sgpr1 = S_MOV_B32 0 + $sgpr2 = S_MOV_B32 0 + $sgpr3 = S_MOV_B32 0 + $sgpr4 = S_MOV_B32 0 + $sgpr5 = S_MOV_B32 0 + $sgpr6 = S_MOV_B32 0 + $sgpr7 = S_MOV_B32 0 + $sgpr8 = S_MOV_B32 0 + S_CBRANCH_SCC0 %bb.2, implicit $scc + S_BRANCH %bb.1 + + bb.1: + liveins: $vgpr0 + $sgpr102 = S_MOV_B32 0 + $sgpr1 = S_MOV_B32 0 + $sgpr2 = S_MOV_B32 0 + $sgpr3 = S_MOV_B32 0 + $sgpr4 = S_MOV_B32 0 + $sgpr5 = S_MOV_B32 0 + $sgpr6 = S_MOV_B32 0 + $sgpr7 = S_MOV_B32 0 + $sgpr8 = S_MOV_B32 0 + $sgpr9 = S_MOV_B32 0 + S_BRANCH %bb.2 + + bb.2: + liveins: $vgpr0 + $vgpr0 = V_ADD_U32_e32 $src_flat_scratch_base_lo, $vgpr0, implicit $exec +... From df0e9f3a2d6e858a2552f453655beae58660b007 Mon Sep 17 00:00:00 2001 From: Morris Hafner Date: Sat, 16 Aug 2025 00:47:04 +0200 Subject: [PATCH 031/214] [CIR] Implement __builtin_return_address and __builtin_frame_address (#153698) This adds ReturnAddrOp and FrameAddrOp that represent __builtin_return_address and __builtin_frame_address and the respective lowering to LLVM parts. --------- Co-authored-by: Andy Kaylor --- clang/include/clang/CIR/Dialect/IR/CIROps.td | 62 +++++++++++++++++++ clang/lib/CIR/CodeGen/CIRGenBuilder.h | 3 + clang/lib/CIR/CodeGen/CIRGenBuiltin.cpp | 14 +++++ clang/lib/CIR/CodeGen/CIRGenConstantEmitter.h | 6 +- clang/lib/CIR/CodeGen/CIRGenExprConstant.cpp | 36 +++++++++++ .../CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp | 40 ++++++++++++ .../CIR/Lowering/DirectToLLVM/LowerToLLVM.h | 20 ++++++ clang/test/CIR/CodeGen/builtins.cpp | 28 +++++++++ 8 files changed, 207 insertions(+), 2 deletions(-) diff --git a/clang/include/clang/CIR/Dialect/IR/CIROps.td b/clang/include/clang/CIR/Dialect/IR/CIROps.td index a181c95494ef..3bfa29b9c347 100644 --- a/clang/include/clang/CIR/Dialect/IR/CIROps.td +++ b/clang/include/clang/CIR/Dialect/IR/CIROps.td @@ -2243,6 +2243,68 @@ def CIR_CallOp : CIR_CallOpBase<"call", [NoRegionArguments]> { ]; } +//===----------------------------------------------------------------------===// +// ReturnAddrOp and FrameAddrOp +//===----------------------------------------------------------------------===// + +class CIR_FuncAddrBuiltinOp : CIR_Op { + let arguments = (ins CIR_UInt32:$level); + let results = (outs CIR_VoidPtrType:$result); + let assemblyFormat = [{ + `(` $level `)` attr-dict + }]; +} + +def CIR_ReturnAddrOp : CIR_FuncAddrBuiltinOp<"return_address"> { + let summary = + "The return address of the current function, or of one of its callers"; + + let description = [{ + Represents a call to builtin function ` __builtin_return_address` in CIR. + This builtin function returns the return address of the current function, + or of one of its callers. + + The `level` argument is number of frames to scan up the call stack. + For instance, value of 0 yields the return address of the current function, + value of 1 yields the return address of the caller of the current function, + and so forth. + + Examples: + + ```mlir + %p = return_address(%level) -> !cir.ptr + ``` + }]; +} + +def CIR_FrameAddrOp : CIR_FuncAddrBuiltinOp<"frame_address"> { + let summary = + "The frame address of the current function, or of one of its callers"; + + let description = [{ + Represents a call to builtin function ` __builtin_frame_address` in CIR. + This builtin function returns the frame address of the current function, + or of one of its callers. The frame is the area on the stack that holds + local variables and saved registers. The frame address is normally the + address of the first word pushed on to the stack by the function. + However, the exact definition depends upon the processor and the calling + convention. If the processor has a dedicated frame pointer register, and + the function has a frame, then __builtin_frame_address returns the value of + the frame pointer register. + + The `level` argument is number of frames to scan up the call stack. + For instance, value of 0 yields the frame address of the current function, + value of 1 yields the frame address of the caller of the current function, + and so forth. + + Examples: + + ```mlir + %p = frame_address(%level) -> !cir.ptr + ``` + }]; +} + //===----------------------------------------------------------------------===// // StackSaveOp & StackRestoreOp //===----------------------------------------------------------------------===// diff --git a/clang/lib/CIR/CodeGen/CIRGenBuilder.h b/clang/lib/CIR/CodeGen/CIRGenBuilder.h index a7537a0480a2..c1088c4cd082 100644 --- a/clang/lib/CIR/CodeGen/CIRGenBuilder.h +++ b/clang/lib/CIR/CodeGen/CIRGenBuilder.h @@ -267,6 +267,9 @@ public: cir::ConstantOp getSInt32(int32_t c, mlir::Location loc) { return getConstantInt(loc, getSInt32Ty(), c); } + cir::ConstantOp getUInt32(uint32_t c, mlir::Location loc) { + return getConstantInt(loc, getUInt32Ty(), c); + } // Creates constant nullptr for pointer type ty. cir::ConstantOp getNullPtr(mlir::Type ty, mlir::Location loc) { diff --git a/clang/lib/CIR/CodeGen/CIRGenBuiltin.cpp b/clang/lib/CIR/CodeGen/CIRGenBuiltin.cpp index 36aea4c1d39c..dcd00696f335 100644 --- a/clang/lib/CIR/CodeGen/CIRGenBuiltin.cpp +++ b/clang/lib/CIR/CodeGen/CIRGenBuiltin.cpp @@ -312,6 +312,20 @@ RValue CIRGenFunction::emitBuiltinExpr(const GlobalDecl &gd, unsigned builtinID, case Builtin::BI__builtin_rotateright64: return emitRotate(e, /*isRotateLeft=*/false); + case Builtin::BI__builtin_return_address: + case Builtin::BI__builtin_frame_address: { + mlir::Location loc = getLoc(e->getExprLoc()); + llvm::APSInt level = e->getArg(0)->EvaluateKnownConstInt(getContext()); + if (builtinID == Builtin::BI__builtin_return_address) { + return RValue::get(cir::ReturnAddrOp::create( + builder, loc, + builder.getConstAPInt(loc, builder.getUInt32Ty(), level))); + } + return RValue::get(cir::FrameAddrOp::create( + builder, loc, + builder.getConstAPInt(loc, builder.getUInt32Ty(), level))); + } + case Builtin::BI__builtin_trap: emitTrap(loc, /*createNewBlock=*/true); return RValue::get(nullptr); diff --git a/clang/lib/CIR/CodeGen/CIRGenConstantEmitter.h b/clang/lib/CIR/CodeGen/CIRGenConstantEmitter.h index d6dac50bb126..d455f6e28340 100644 --- a/clang/lib/CIR/CodeGen/CIRGenConstantEmitter.h +++ b/clang/lib/CIR/CodeGen/CIRGenConstantEmitter.h @@ -80,7 +80,7 @@ public: // initializer or to propagate to another context; for example, // side effects, or emitting an initialization that requires a // reference to its current location. - mlir::Attribute emitForMemory(mlir::Attribute c, QualType t); + mlir::Attribute emitForMemory(mlir::Attribute c, QualType destType); /// Try to emit the initializer of the given declaration as an abstract /// constant. @@ -90,8 +90,9 @@ public: /// asserting that it succeeded. This is only safe to do when the /// expression is known to be a constant expression with either a fairly /// simple type or a known simple form. + mlir::Attribute emitAbstract(const Expr *e, QualType destType); mlir::Attribute emitAbstract(SourceLocation loc, const APValue &value, - QualType t); + QualType destType); mlir::Attribute tryEmitConstantExpr(const ConstantExpr *ce); @@ -101,6 +102,7 @@ public: mlir::Attribute tryEmitPrivateForVarInit(const VarDecl &d); + mlir::TypedAttr tryEmitPrivate(const Expr *e, QualType destType); mlir::Attribute tryEmitPrivate(const APValue &value, QualType destType); mlir::Attribute tryEmitPrivateForMemory(const APValue &value, QualType t); diff --git a/clang/lib/CIR/CodeGen/CIRGenExprConstant.cpp b/clang/lib/CIR/CodeGen/CIRGenExprConstant.cpp index c2b373417392..2fbf69d5d01f 100644 --- a/clang/lib/CIR/CodeGen/CIRGenExprConstant.cpp +++ b/clang/lib/CIR/CodeGen/CIRGenExprConstant.cpp @@ -710,6 +710,16 @@ mlir::Attribute ConstantEmitter::tryEmitPrivateForMemory(const APValue &value, return (c ? emitForMemory(c, destType) : nullptr); } +mlir::Attribute ConstantEmitter::emitAbstract(const Expr *e, + QualType destType) { + AbstractStateRAII state{*this, true}; + mlir::Attribute c = mlir::cast(tryEmitPrivate(e, destType)); + if (!c) + cgm.errorNYI(e->getSourceRange(), + "emitAbstract failed, emit null constaant"); + return c; +} + mlir::Attribute ConstantEmitter::emitAbstract(SourceLocation loc, const APValue &value, QualType destType) { @@ -731,6 +741,32 @@ mlir::Attribute ConstantEmitter::emitForMemory(mlir::Attribute c, return c; } +mlir::TypedAttr ConstantEmitter::tryEmitPrivate(const Expr *e, + QualType destType) { + assert(!destType->isVoidType() && "can't emit a void constant"); + + if (mlir::Attribute c = + ConstExprEmitter(*this).Visit(const_cast(e), destType)) + return llvm::dyn_cast(c); + + Expr::EvalResult result; + + bool success = false; + + if (destType->isReferenceType()) + success = e->EvaluateAsLValue(result, cgm.getASTContext()); + else + success = + e->EvaluateAsRValue(result, cgm.getASTContext(), inConstantContext); + + if (success && !result.hasSideEffects()) { + mlir::Attribute c = tryEmitPrivate(result.Val, destType); + return llvm::dyn_cast(c); + } + + return nullptr; +} + mlir::Attribute ConstantEmitter::tryEmitPrivate(const APValue &value, QualType destType) { auto &builder = cgm.getBuilder(); diff --git a/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp b/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp index 9f7521db78be..49784b46d351 100644 --- a/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp +++ b/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp @@ -267,6 +267,26 @@ void convertSideEffectForCall(mlir::Operation *callOp, bool isNothrow, } } +static mlir::LLVM::CallIntrinsicOp +createCallLLVMIntrinsicOp(mlir::ConversionPatternRewriter &rewriter, + mlir::Location loc, const llvm::Twine &intrinsicName, + mlir::Type resultTy, mlir::ValueRange operands) { + auto intrinsicNameAttr = + mlir::StringAttr::get(rewriter.getContext(), intrinsicName); + return mlir::LLVM::CallIntrinsicOp::create(rewriter, loc, resultTy, + intrinsicNameAttr, operands); +} + +static mlir::LLVM::CallIntrinsicOp replaceOpWithCallLLVMIntrinsicOp( + mlir::ConversionPatternRewriter &rewriter, mlir::Operation *op, + const llvm::Twine &intrinsicName, mlir::Type resultTy, + mlir::ValueRange operands) { + mlir::LLVM::CallIntrinsicOp callIntrinOp = createCallLLVMIntrinsicOp( + rewriter, op->getLoc(), intrinsicName, resultTy, operands); + rewriter.replaceOp(op, callIntrinOp.getOperation()); + return callIntrinOp; +} + /// IntAttr visitor. mlir::Value CIRAttrToValue::visitCirAttr(cir::IntAttr intAttr) { mlir::Location loc = parentOp->getLoc(); @@ -1112,6 +1132,24 @@ mlir::LogicalResult CIRToLLVMCallOpLowering::matchAndRewrite( getTypeConverter(), op.getCalleeAttr()); } +mlir::LogicalResult CIRToLLVMReturnAddrOpLowering::matchAndRewrite( + cir::ReturnAddrOp op, OpAdaptor adaptor, + mlir::ConversionPatternRewriter &rewriter) const { + auto llvmPtrTy = mlir::LLVM::LLVMPointerType::get(rewriter.getContext()); + replaceOpWithCallLLVMIntrinsicOp(rewriter, op, "llvm.returnaddress", + llvmPtrTy, adaptor.getOperands()); + return mlir::success(); +} + +mlir::LogicalResult CIRToLLVMFrameAddrOpLowering::matchAndRewrite( + cir::FrameAddrOp op, OpAdaptor adaptor, + mlir::ConversionPatternRewriter &rewriter) const { + auto llvmPtrTy = mlir::LLVM::LLVMPointerType::get(rewriter.getContext()); + replaceOpWithCallLLVMIntrinsicOp(rewriter, op, "llvm.frameaddress", llvmPtrTy, + adaptor.getOperands()); + return mlir::success(); +} + mlir::LogicalResult CIRToLLVMLoadOpLowering::matchAndRewrite( cir::LoadOp op, OpAdaptor adaptor, mlir::ConversionPatternRewriter &rewriter) const { @@ -2322,10 +2360,12 @@ void ConvertCIRToLLVMPass::runOnOperation() { CIRToLLVMConstantOpLowering, CIRToLLVMExpectOpLowering, CIRToLLVMFAbsOpLowering, + CIRToLLVMFrameAddrOpLowering, CIRToLLVMFuncOpLowering, CIRToLLVMGetBitfieldOpLowering, CIRToLLVMGetGlobalOpLowering, CIRToLLVMGetMemberOpLowering, + CIRToLLVMReturnAddrOpLowering, CIRToLLVMRotateOpLowering, CIRToLLVMSelectOpLowering, CIRToLLVMSetBitfieldOpLowering, diff --git a/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.h b/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.h index 91e850523337..7a480d2d4d77 100644 --- a/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.h +++ b/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.h @@ -209,6 +209,26 @@ public: mlir::ConversionPatternRewriter &rewriter) const override; }; +class CIRToLLVMReturnAddrOpLowering + : public mlir::OpConversionPattern { +public: + using mlir::OpConversionPattern::OpConversionPattern; + + mlir::LogicalResult + matchAndRewrite(cir::ReturnAddrOp op, OpAdaptor, + mlir::ConversionPatternRewriter &) const override; +}; + +class CIRToLLVMFrameAddrOpLowering + : public mlir::OpConversionPattern { +public: + using mlir::OpConversionPattern::OpConversionPattern; + + mlir::LogicalResult + matchAndRewrite(cir::FrameAddrOp op, OpAdaptor, + mlir::ConversionPatternRewriter &) const override; +}; + class CIRToLLVMAllocaOpLowering : public mlir::OpConversionPattern { mlir::DataLayout const &dataLayout; diff --git a/clang/test/CIR/CodeGen/builtins.cpp b/clang/test/CIR/CodeGen/builtins.cpp index 3d43821af4e5..0e434809fe6b 100644 --- a/clang/test/CIR/CodeGen/builtins.cpp +++ b/clang/test/CIR/CodeGen/builtins.cpp @@ -12,3 +12,31 @@ double fabs(double x) { // CIR: {{.*}} = cir.fabs {{.*}} : !cir.double // LLVM: {{.*}} = call double @llvm.fabs.f64(double {{.*}}) // OGCG: {{.*}} = call double @llvm.fabs.f64(double {{.*}}) + +extern "C" void *test_return_address(void) { + return __builtin_return_address(1); + + // CIR-LABEL: test_return_address + // CIR: [[ARG:%.*]] = cir.const #cir.int<1> : !u32i + // CIR: {{%.*}} = cir.return_address([[ARG]]) + + // LLVM-LABEL: @test_return_address + // LLVM: {{%.*}} = call ptr @llvm.returnaddress(i32 1) + + // OGCG-LABEL: @test_return_address + // OGCG: {{%.*}} = call ptr @llvm.returnaddress(i32 1) +} + +extern "C" void *test_frame_address(void) { + return __builtin_frame_address(1); + + // CIR-LABEL: test_frame_address + // CIR: [[ARG:%.*]] = cir.const #cir.int<1> : !u32i + // CIR: {{%.*}} = cir.frame_address([[ARG]]) + + // LLVM-LABEL: @test_frame_address + // LLVM: {{%.*}} = call ptr @llvm.frameaddress.p0(i32 1) + + // OGCG-LABEL: @test_frame_address + // OGCG: {{%.*}} = call ptr @llvm.frameaddress.p0(i32 1) +} From 9c4e571ae83d86aa81c556d62400c61b3f53c805 Mon Sep 17 00:00:00 2001 From: Chao Chen Date: Fri, 15 Aug 2025 18:02:13 -0500 Subject: [PATCH 032/214] [mlir][xegpu] Add definitions of MemDescType and related ops. (#153273) --- .../mlir/Dialect/XeGPU/IR/XeGPUAttrs.td | 30 ++++ .../include/mlir/Dialect/XeGPU/IR/XeGPUOps.td | 148 ++++++++++++++++++ .../mlir/Dialect/XeGPU/IR/XeGPUTypes.td | 49 ++++++ mlir/lib/Dialect/XeGPU/IR/CMakeLists.txt | 2 + mlir/lib/Dialect/XeGPU/IR/XeGPUDialect.cpp | 125 ++++++++++++++- mlir/lib/Dialect/XeGPU/IR/XeGPUOps.cpp | 108 +++++++++++++ .../Transforms/XeGPUWgToSgDistribute.cpp | 4 +- mlir/test/Dialect/XeGPU/invalid.mlir | 86 ++++++++++ mlir/test/Dialect/XeGPU/ops.mlir | 68 ++++++++ 9 files changed, 612 insertions(+), 8 deletions(-) diff --git a/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUAttrs.td b/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUAttrs.td index 1f420c13ebae..a94987885c9e 100644 --- a/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUAttrs.td +++ b/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUAttrs.td @@ -527,4 +527,34 @@ def XeGPU_RangeAttr : XeGPUAttr<"Range", "range"> { let genVerifyDecl = 1; } +def XeGPU_MemLayoutAttr : XeGPUAttr<"MemLayout", "mem_layout"> { + let summary = [{Specifies memory layouts with named attributes.}]; + + let description = [{ + This attribute stores a collection of named attributes that describe + memory layout properties such as stride, block, etc. + }]; + + let parameters = (ins "DictionaryAttr": $attrs); + let hasCustomAssemblyFormat = 1; + + let extraClassDeclaration = [{ + /// Get a specific attribute by name + Attribute getAttr(StringRef name) const { + return getAttrs().get(name); + } + + /// Check if a specific attribute exists + bool hasAttr(StringRef name) const { + return getAttrs().contains(name); + } + + ArrayAttr getStrides() { + return getAttrs().getAs("stride"); + } + + }]; + +} + #endif // MLIR_DIALECT_XEGPU_IR_XEGPUATTRS_TD diff --git a/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUOps.td b/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUOps.td index 480b43e74073..abc291c81a76 100644 --- a/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUOps.td +++ b/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUOps.td @@ -1097,4 +1097,152 @@ def XeGPU_ConvertLayoutOp: XeGPU_Op<"convert_layout", [Pure, AllTypesMatch<["sou let hasCanonicalizer = 1; } +def isSharedPred : CPred<"isSharedMemory(llvm::cast($_self))">; +class StaticShared1DMemRefOf allowedTypes> : + ConfinedType, [HasStaticShapePred, isSharedPred], + "statically shaped " # MemRefOf.summary # " for shared memory", + "mlir::MemRefType">; + +class SizeInBits : + StrFunc<"llvm::cast($" # name # ".getType()).getNumElements()" + "*llvm::cast($" # name # ".getType()).getElementTypeBitWidth()">; +class AllMemSizesMatch names> : + AllMatchSameOperatorTrait.result, + "size in bits">; + +def XeGPU_CreateMemDescOp: XeGPU_Op<"create_mem_desc", [Pure, + AllMemSizesMatch<["source", "mem_desc"]>]> { + let summary = "Create a memory descriptor."; + let description = [{ + Creates a memory descriptor from a shared local memory (SLM) buffer, and xegpu + specific memory layout. The resulting memory descriptor has to have the same size + as the underlying shared local memory. + + Arguments: + - `source` : a 1D statically shaped memref with element type i8, representing the raw SLM buffer. + Results: + - `mem_desc` : the memory descriptor. + }]; + let arguments = (ins StaticShared1DMemRefOf<[I8]>:$source); + let results = (outs XeGPU_MemDesc:$mem_desc); + let assemblyFormat = "$source prop-dict attr-dict `` `:` type($source) `->` qualified(type($mem_desc))"; +} + +def XeGPU_LoadMatrixOp: XeGPU_Op<"load_matrix", [MemoryEffects<[MemRead]>, + AllElementTypesMatch<["mem_desc", "res"]>, + AllRanksMatch<["mem_desc", "res"]>]> { + let arguments = (ins XeGPU_MemDesc:$mem_desc, + Variadic: $offsets, + DenseI64ArrayAttr: $const_offsets, + OptionalAttr:$layout + ); + let results = (outs XeGPU_ValueType:$res); + let assemblyFormat = [{ + $mem_desc `` custom($offsets, $const_offsets) + prop-dict attr-dict `` `:` type(operands) `->` type(results) + }]; + + let description = [{ + This operation loads a 2D block of data from shared local memory (SLM) as specified + by the provided 2D `mem_desc`. Only 2D memory descriptors are supported; use the + subview operation to obtain a compatible 2D `mem_desc` from a higher-rank descriptor if needed. + + Arguments: + - `mem_desc`: the memory descriptor identifying the SLM region. + - `offsets`: the coordinates within the matrix to read from. + - `layout`: [optional] An attribute for guiding distributions among + subgroups and/or work-items. It currently can accept either + LayoutAttr or SliceAttr. + Results: + - `res`: the matrix elements loaded from SLM. + }]; + + let builders = [ + OpBuilder<(ins "Type":$res, "TypedValue": $mem_desc, + "llvm::ArrayRef": $offsets, "LayoutTrait": $layout)>, + ]; + let extraClassDeclaration = [{ + SmallVector getMixedOffsets() { + return getMixedValues(getConstOffsets(), getOffsets(), getContext()); + } + }]; + + let hasVerifier = 1; +} + +def XeGPU_StoreMatrixOp: XeGPU_Op<"store_matrix", [MemoryEffects<[MemWrite]>, + AllElementTypesMatch<["mem_desc", "data"]>, + AllRanksMatch<["mem_desc", "data"]>]> { + let arguments = (ins + XeGPU_ValueType:$data, + XeGPU_MemDesc:$mem_desc, + Variadic: $offsets, + DenseI64ArrayAttr: $const_offsets, + OptionalAttr:$layout + ); + let assemblyFormat = [{ $data `,` $mem_desc `` custom($offsets, $const_offsets) + prop-dict attr-dict `` `:` type(operands)}]; + let description = [{ + This operation stores a 2D `data` fragment into the shared local memory region + specified by a 2D `mem_desc`. Only 2D memory descriptors are supported; use the + subview operation to obtain a 2D `mem_desc` from a higher-rank descriptor if needed. + + Arguments: + - `mem_desc`: the memory descriptor specifying the SLM region. + - `offsets`: the coordinates within the matrix where the data will be written. + - `data`: the values to be stored in the matrix. + - `layout`: [optional] An attribute for guiding distributions among + subgroups and/or work-items. It currently can accept either + LayoutAttr or SliceAttr. + }]; + let builders = [ + OpBuilder<(ins "Value" : $data, "TypedValue": $mem_desc, + "llvm::ArrayRef": $offsets, "LayoutTrait": $layout)>, + ]; + let extraClassDeclaration = [{ + SmallVector getMixedOffsets() { + return getMixedValues(getConstOffsets(), getOffsets(), getContext()); + } + }]; + + let hasVerifier = 1; +} + +def XeGPU_MemDescSubviewOp: XeGPU_Op<"mem_desc_subview", + [Pure, ViewLikeOpInterface, AllElementTypesMatch<["src", "res"]>]> { + let description = [{ + Creates a subview of a memory descriptor. The resulting memory descriptor can have + a lower rank than the source; in this case, the result dimensions correspond to the + higher-order dimensions of the source memory descriptor. + + Arguments: + - `src` : a memory descriptor. + - `offsets` : the coordinates within the matrix the subview will be created from. + + Results: + - `res` : a memory descriptor with smaller size. + + }]; + let arguments = (ins XeGPU_MemDesc:$src, + Variadic:$offsets, + DenseI64ArrayAttr:$const_offsets); + let results = (outs XeGPU_MemDesc:$res); + let assemblyFormat = [{$src `` custom($offsets, $const_offsets) prop-dict + attr-dict `` `:` qualified(type($src)) `->` qualified(type($res))}]; + let builders = [ + OpBuilder<(ins "Type": $res, "Value":$src, "llvm::ArrayRef": $offsets)> + ]; + + let extraClassDeclaration = [{ + mlir::Value getViewSource() { return getSrc(); } + + SmallVector getMixedOffsets() { + return getMixedValues(getConstOffsets(), getOffsets(), getContext()); + } + }]; + + let hasVerifier = 1; +} + + #endif // MLIR_DIALECT_XEGPU_IR_XEGPUOPS_TD diff --git a/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUTypes.td b/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUTypes.td index b268cabb5d26..f8b371db498e 100644 --- a/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUTypes.td +++ b/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUTypes.td @@ -201,4 +201,53 @@ def XeGPU_Nbarrier: XeGPUTypeDef<"Nbarrier", "nbarrier", [], "mlir::Type"> { }]; } +def XeGPU_MemDesc: XeGPUTypeDef<"MemDesc", "mem_desc", [ShapedTypeInterface], "mlir::Type"> { + let summary = "MemDesc describing the data in SLM"; + let description = [{ + MemDesc represents a block of data stored in shared local memory. + By default, unless a layout attribute is provided, the data is stored + contiguously in row-major order within the region. + + Examples: + ```mlir + // A multi-dimensional array stored in column-major order. + !xegpu.mem_desc<128x128xf16, #xegpu.mem_layout> + + // A multi-dimensional array stored in a blocked layout. Elements within the same block + // are stored contiguously in memory. Blocks are stored in row-major order. + !xegpu.mem_desc<128x128xf16, #xegpu.mem_layout> + + // A multi-dimensional array stored in column-major order with blocked layout. + !xegpu.mem_desc<128x128xf16, #xegpu.mem_layout> + ``` + }]; + let parameters = (ins ArrayRefParameter<"int64_t">: $shape, + "mlir::Type": $elementType, + OptionalParameter<"MemLayoutAttr">: $mem_layout); + + let extraClassDeclaration = [{ + bool hasRank() const { return true; } + + MemDescType cloneWith(std::optional> shape, Type elementType) const { + return MemDescType::get(getContext(), shape.value_or(getShape()), elementType, getMemLayout()); + } + + ArrayAttr getStrides() { + auto layout = getMemLayout(); + if (layout && layout.hasAttr("stride")) { + return layout.getStrides(); + } + + // derive and return default strides + SmallVector defaultStrides; + llvm::append_range(defaultStrides, getShape().drop_front()); + llvm::append_values(defaultStrides, 1); + Builder builder(getContext()); + return builder.getI64ArrayAttr(defaultStrides); + } + }]; + + let hasCustomAssemblyFormat = true; +} + #endif // MLIR_DIALECT_XEGPU_IR_XEGPUTYPES_TD diff --git a/mlir/lib/Dialect/XeGPU/IR/CMakeLists.txt b/mlir/lib/Dialect/XeGPU/IR/CMakeLists.txt index 7c6a4f37db9a..7869a28dfed5 100644 --- a/mlir/lib/Dialect/XeGPU/IR/CMakeLists.txt +++ b/mlir/lib/Dialect/XeGPU/IR/CMakeLists.txt @@ -17,6 +17,8 @@ add_mlir_dialect_library(MLIRXeGPUDialect MLIRAffineUtils MLIRArithUtils MLIRDialectUtils + MLIRGPUDialect + MLIRXeVMDialect MLIRIR MLIRViewLikeInterface MLIRVectorDialect diff --git a/mlir/lib/Dialect/XeGPU/IR/XeGPUDialect.cpp b/mlir/lib/Dialect/XeGPU/IR/XeGPUDialect.cpp index d997296a22c2..1b26542ff65a 100644 --- a/mlir/lib/Dialect/XeGPU/IR/XeGPUDialect.cpp +++ b/mlir/lib/Dialect/XeGPU/IR/XeGPUDialect.cpp @@ -427,7 +427,7 @@ RangeAttr::verify(llvm::function_ref emitError, // XeGPU_TensorDescType //===----------------------------------------------------------------------===// -mlir::Type TensorDescType::parse(::mlir::AsmParser &parser) { +mlir::Type TensorDescType::parse(AsmParser &parser) { llvm::SmallVector shape; mlir::Type elementType; mlir::FailureOr encoding; @@ -477,7 +477,7 @@ mlir::Type TensorDescType::parse(::mlir::AsmParser &parser) { layout.value_or(mlir::Attribute())); } -void TensorDescType::print(::mlir::AsmPrinter &printer) const { +void TensorDescType::print(AsmPrinter &printer) const { printer << "<"; auto shape = getShape(); @@ -522,10 +522,10 @@ TensorDescType TensorDescType::get(llvm::ArrayRef shape, return Base::get(context, shape, elementType, attr, layout); } -LogicalResult TensorDescType::verify( - llvm::function_ref<::mlir::InFlightDiagnostic()> emitError, - llvm::ArrayRef shape, mlir::Type elementType, - mlir::Attribute encoding, mlir::Attribute layout) { +LogicalResult +TensorDescType::verify(llvm::function_ref emitError, + llvm::ArrayRef shape, mlir::Type elementType, + mlir::Attribute encoding, mlir::Attribute layout) { size_t rank = shape.size(); if (rank == 0) @@ -591,6 +591,119 @@ LogicalResult TensorDescType::verify( return success(); } +//===----------------------------------------------------------------------===// +// XeGPU_MemDescType +//===----------------------------------------------------------------------===// +mlir::Type MemDescType::parse(AsmParser &parser) { + llvm::SmallVector shape; + mlir::Type elementType; + mlir::FailureOr layout; + + // Parse literal '<' + if (parser.parseLess()) + return {}; + + auto shapeLoc = parser.getCurrentLocation(); + if (mlir::failed(parser.parseDimensionList(shape, false, true))) { + parser.emitError(shapeLoc, "failed to parse parameter 'shape'"); + return {}; + } + + auto elemTypeLoc = parser.getCurrentLocation(); + if (mlir::failed(parser.parseType(elementType))) { + parser.emitError(elemTypeLoc, "failed to parse parameter 'elementType'"); + return {}; + } + + // parse optional attributes + if (mlir::succeeded(parser.parseOptionalComma())) { + MemLayoutAttr attr; + ParseResult res = parser.parseAttribute(attr); + if (mlir::failed(res)) + return {}; + layout = attr; + } + + // Parse literal '>' + if (parser.parseGreater()) + return {}; + + MLIRContext *ctxt = parser.getContext(); + return MemDescType::getChecked( + [&]() { return parser.emitError(parser.getNameLoc()); }, ctxt, shape, + elementType, layout.value_or(MemLayoutAttr())); +} + +void MemDescType::print(AsmPrinter &printer) const { + printer << "<"; + + printer.printDimensionList(getShape()); + printer << 'x'; + printer << getElementType(); + + if (auto layout = getMemLayout()) + printer << ", " << layout; + + printer << ">"; +} + +//===----------------------------------------------------------------------===// +// XeGPU_MemDescType +//===----------------------------------------------------------------------===// + +Attribute MemLayoutAttr::parse(AsmParser &parser, Type type) { + + auto context = parser.getContext(); + llvm::SMLoc loc = parser.getCurrentLocation(); + + llvm::SmallDenseSet seenKeys; + SmallVector attributes; + + auto parseElt = [&]() -> ParseResult { + StringRef nameId; + if (failed(parser.parseKeyword(&nameId))) + return parser.emitError(loc, "expected valid attribute name"); + + if (!seenKeys.insert(nameId).second) + return parser.emitError(loc, "duplicate key '") + << nameId << " in mem layout attribute"; + + if (failed(parser.parseEqual())) + return failure(); + + Attribute attr; + if (failed(parser.parseAttribute(attr))) + return failure(); + attributes.emplace_back(nameId, attr); + return success(); + }; + + // Parse literal '<' + if (parser.parseLess()) + return {}; + + if (failed(parser.parseCommaSeparatedList(parseElt))) + return {}; + + // Parse literal '>' + if (parser.parseGreater()) + return {}; + + return parser.getChecked( + loc, context, DictionaryAttr::get(context, attributes)); +} + +void MemLayoutAttr::print(AsmPrinter &printer) const { + printer << "<"; + ArrayRef attrs = getAttrs().getValue(); + for (size_t i = 0; i < attrs.size(); i++) { + printer << attrs[i].getName().str() << " = " << attrs[i].getValue(); + if (i < attrs.size() - 1) + printer << ", "; + } + printer << ">"; +} + } // namespace xegpu } // namespace mlir diff --git a/mlir/lib/Dialect/XeGPU/IR/XeGPUOps.cpp b/mlir/lib/Dialect/XeGPU/IR/XeGPUOps.cpp index 7b7ce19e6937..eee0fdc7160d 100644 --- a/mlir/lib/Dialect/XeGPU/IR/XeGPUOps.cpp +++ b/mlir/lib/Dialect/XeGPU/IR/XeGPUOps.cpp @@ -7,6 +7,8 @@ //===----------------------------------------------------------------------===// #include "mlir/Dialect/Arith/Utils/Utils.h" +#include "mlir/Dialect/GPU/IR/GPUDialect.h" +#include "mlir/Dialect/LLVMIR/XeVMDialect.h" #include "mlir/Dialect/Utils/IndexingUtils.h" #include "mlir/Dialect/Utils/StaticValueUtils.h" #include "mlir/Dialect/XeGPU/IR/XeGPU.h" @@ -21,6 +23,17 @@ namespace mlir { namespace xegpu { +bool isSharedMemory(const MemRefType &memrefTy) { + Attribute attr = memrefTy.getMemorySpace(); + if (auto intAttr = llvm::dyn_cast(attr)) + return intAttr.getInt() == 3; + if (auto memrefSpace = llvm::dyn_cast(attr)) + return memrefSpace.getValue() == MemorySpace::SLM; + if (auto xevmSpace = llvm::dyn_cast(attr)) + return xevmSpace.getValue() == xevm::AddrSpace::SHARED; + return gpu::GPUDialect::isWorkgroupMemoryAddressSpace(attr); +} + template static std::string makeString(T array, bool breakline = false) { std::string buf; @@ -919,6 +932,101 @@ void ConvertLayoutOp::getCanonicalizationPatterns(RewritePatternSet &patterns, patterns.add(context); } +//===----------------------------------------------------------------------===// +// XeGPU_LoadMatrixOp +//===----------------------------------------------------------------------===// +void LoadMatrixOp::build(OpBuilder &builder, OperationState &state, Type res, + TypedValue memDesc, + llvm::ArrayRef offsets, + LayoutTrait layout) { + llvm::SmallVector dynamicOffsets; + llvm::SmallVector staticOffsets; + dispatchIndexOpFoldResults(offsets, dynamicOffsets, staticOffsets); + auto staticOffsetsAttr = builder.getDenseI64ArrayAttr(staticOffsets); + build(builder, state, res, memDesc, dynamicOffsets, staticOffsetsAttr, + layout); +} + +LogicalResult LoadMatrixOp::verify() { + VectorType resTy = getRes().getType(); + MemDescType mdescTy = getMemDesc().getType(); + + if (mdescTy.getRank() != 2) + return emitOpError("mem_desc must be 2D."); + + ArrayRef valueShape = resTy.getShape(); + ArrayRef mdescShape = mdescTy.getShape(); + if (llvm::any_of(llvm::zip_equal(valueShape, mdescShape), + [](auto p) { return std::get<0>(p) > std::get<1>(p); })) + return emitOpError("result shape must not exceed mem_desc shape."); + return success(); +} + +//===----------------------------------------------------------------------===// +// XeGPU_StoreMatrixOp +//===----------------------------------------------------------------------===// +void StoreMatrixOp::build(OpBuilder &builder, OperationState &state, Value data, + TypedValue memDesc, + llvm::ArrayRef offsets, + LayoutTrait layout) { + llvm::SmallVector dynamicOffsets; + llvm::SmallVector staticOffsets; + dispatchIndexOpFoldResults(offsets, dynamicOffsets, staticOffsets); + auto staticOffsetsAttr = builder.getDenseI64ArrayAttr(staticOffsets); + build(builder, state, data, memDesc, dynamicOffsets, staticOffsetsAttr, + layout); +} + +LogicalResult StoreMatrixOp::verify() { + VectorType dataTy = getData().getType(); + MemDescType mdescTy = getMemDesc().getType(); + + if (mdescTy.getRank() != 2) + return emitOpError("mem_desc must be 2D."); + + ArrayRef dataShape = dataTy.getShape(); + ArrayRef mdescShape = mdescTy.getShape(); + if (llvm::any_of(llvm::zip_equal(dataShape, mdescShape), + [](auto p) { return std::get<0>(p) > std::get<1>(p); })) + return emitOpError("data shape must not exceed mem_desc shape."); + + return success(); +} + +//===----------------------------------------------------------------------===// +// XeGPU_MemDescSubviewOp +//===----------------------------------------------------------------------===// + +void MemDescSubviewOp::build(OpBuilder &builder, OperationState &state, + Type resTy, Value src, + llvm::ArrayRef offsets) { + llvm::SmallVector dynamicOffsets; + llvm::SmallVector staticOffsets; + dispatchIndexOpFoldResults(offsets, dynamicOffsets, staticOffsets); + auto staticOffsetsAttr = builder.getDenseI64ArrayAttr(staticOffsets); + build(builder, state, resTy, src, dynamicOffsets, staticOffsetsAttr); +} + +LogicalResult MemDescSubviewOp::verify() { + MemDescType srcTy = getSrc().getType(); + MemDescType resTy = getRes().getType(); + ArrayRef srcShape = srcTy.getShape(); + ArrayRef resShape = resTy.getShape(); + + if (srcTy.getRank() < resTy.getRank()) + return emitOpError("result rank must not exceed source rank."); + + if (llvm::any_of( + llvm::zip_equal(resShape, srcShape.take_back(resShape.size())), + [](auto p) { return std::get<0>(p) > std::get<1>(p); })) + return emitOpError("result shape must not exceed source shape."); + + if (srcTy.getStrides() != resTy.getStrides()) + return emitOpError("result must inherit the source strides."); + + return success(); +} + } // namespace xegpu } // namespace mlir diff --git a/mlir/lib/Dialect/XeGPU/Transforms/XeGPUWgToSgDistribute.cpp b/mlir/lib/Dialect/XeGPU/Transforms/XeGPUWgToSgDistribute.cpp index 270d71aaa727..46ff03745a22 100644 --- a/mlir/lib/Dialect/XeGPU/Transforms/XeGPUWgToSgDistribute.cpp +++ b/mlir/lib/Dialect/XeGPU/Transforms/XeGPUWgToSgDistribute.cpp @@ -524,8 +524,8 @@ struct WgToSgElementwiseOp : public ConversionPattern { // is lowered to: // #a = #xegpu.layout // #b = #xegpu.layout -// store_matrix %1, %slm <{layout_input_0 = #a}> : vector<32x16>, matrix_desc<32x64xf32> -// %d = load_matrix %slm <{layout_result_0 = #a}> : matrix_desc<32x64xf32> -> vector<16x32xf32> +// store_matrix %1, %slm <{layout_input_0 = #a}> : vector<32x16>, mem_desc<32x64xf32> +// %d = load_matrix %slm <{layout_result_0 = #a}> : mem_desc<32x64xf32> -> vector<16x32xf32> // xegpu.convert_layout %d <{input_layout = #a, target_layout = #b}> : vector<16x32xf32> // clang-format on struct WgToSgConvertLayoutOp diff --git a/mlir/test/Dialect/XeGPU/invalid.mlir b/mlir/test/Dialect/XeGPU/invalid.mlir index 5b251517d2ef..93a5a055b08c 100644 --- a/mlir/test/Dialect/XeGPU/invalid.mlir +++ b/mlir/test/Dialect/XeGPU/invalid.mlir @@ -762,3 +762,89 @@ func.func @slice_attr_repeat_dim() { return } +// ----- +func.func @create_mem_desc_non_slm() { + %m = memref.alloca() {alignment = 1024} : memref<2048xi8, 1> + // expected-error@+1 {{operand #0 must be statically shaped memref of 8-bit signless integer values for shared memory}} + %mem_desc = xegpu.create_mem_desc %m : memref<2048xi8, 1> -> !xegpu.mem_desc<16x64xf16> + return +} + +// ----- +func.func @create_mem_desc_mismatch_sizes() { + %m = memref.alloca() {alignment = 1024} : memref<2048xi8, 3> + // expected-error@+1 {{failed to verify that all of {source, mem_desc} have same size in bits}} + %mem_desc = xegpu.create_mem_desc %m : memref<2048xi8, 3> -> !xegpu.mem_desc<16x32xf16> + return +} + +// ----- +func.func @load_mem_desc_mismatch_element_type(%arg0: !xegpu.mem_desc<16x64xf16>) { + // expected-error@+1 {{failed to verify that all of {mem_desc, res} have same element type}} + %data = xegpu.load_matrix %arg0[8, 8]: !xegpu.mem_desc<16x64xf16> -> vector<8x16xf32> + return +} + +// ----- +func.func @load_mem_desc_invalid_result_size(%arg0: !xegpu.mem_desc<16x64xf16>) { + // expected-error@+1 {{result shape must not exceed mem_desc shape}} + %data = xegpu.load_matrix %arg0[8, 8]: !xegpu.mem_desc<16x64xf16> -> vector<32x16xf16> + return +} + +// ----- +func.func @load_mem_desc_invalid_rank(%arg0: !xegpu.mem_desc<64xf16>) { + // expected-error@+1 {{mem_desc must be 2D}} + %data = xegpu.load_matrix %arg0[16]: !xegpu.mem_desc<64xf16> -> vector<16xf16> + return +} + +// ----- +func.func @store_mem_desc_mismatch_element_type(%arg0: !xegpu.mem_desc<16x64xf16>, %arg1: vector<16x16xf32>) { + // expected-error@+1 {{failed to verify that all of {mem_desc, data} have same element type}} + xegpu.store_matrix %arg1, %arg0[8, 8] : vector<16x16xf32>, !xegpu.mem_desc<16x64xf16> + return +} + +// ----- +func.func @store_mem_desc_invalid_data_size(%arg0: !xegpu.mem_desc<16x64xf16>, %arg1: vector<32x32xf16>) { + // expected-error@+1 {{data shape must not exceed mem_desc shape}} + xegpu.store_matrix %arg1, %arg0[8, 8] : vector<32x32xf16>, !xegpu.mem_desc<16x64xf16> + return +} + +// ----- +func.func @store_mem_desc_invalid_rank(%arg0: !xegpu.mem_desc<64xf16>, %arg1: vector<32xf16>) { + // expected-error@+1 {{mem_desc must be 2D.}} + xegpu.store_matrix %arg1, %arg0[32] : vector<32xf16>, !xegpu.mem_desc<64xf16> + return +} + +// ----- +func.func @mem_desc_subview_size_mismatch(%arg0: !xegpu.mem_desc<16x64xf16>) { + // expected-error@+1 {{result shape must not exceed source shape}} + %data = xegpu.mem_desc_subview %arg0[8, 8]: !xegpu.mem_desc<16x64xf16> -> !xegpu.mem_desc<32x16xf16> + return +} + +// ----- +func.func @mem_desc_subview_layout_mismatch(%arg0: !xegpu.mem_desc<16x64xf16, #xegpu.mem_layout>) { + // expected-error@+1 {{result must inherit the source strides}} + %data = xegpu.mem_desc_subview %arg0[8, 8]: !xegpu.mem_desc<16x64xf16, #xegpu.mem_layout> -> !xegpu.mem_desc<8x16xf16> + return +} + +// ----- +func.func @mem_desc_subview_element_type_mismatch(%arg0: !xegpu.mem_desc<16x64xf16>) { + // expected-error@+1 {{failed to verify that all of {src, res} have same element type}} + %data = xegpu.mem_desc_subview %arg0[8, 8]: !xegpu.mem_desc<16x64xf16> -> !xegpu.mem_desc<8x16xf32, #xegpu.mem_layout> + return +} + +// ----- +func.func @mem_desc_subview_rank_mismatch(%arg0: !xegpu.mem_desc<16x64xf16>) { + // expected-error@+1 {{result rank must not exceed source rank}} + %data = xegpu.mem_desc_subview %arg0[8, 8]: !xegpu.mem_desc<16x64xf16> -> !xegpu.mem_desc<4x8x16xf16> + return +} + diff --git a/mlir/test/Dialect/XeGPU/ops.mlir b/mlir/test/Dialect/XeGPU/ops.mlir index 67c00f5a9cc2..35342eca1354 100644 --- a/mlir/test/Dialect/XeGPU/ops.mlir +++ b/mlir/test/Dialect/XeGPU/ops.mlir @@ -751,4 +751,72 @@ gpu.func @fence() { gpu.return } +// CHECK-LABEL: gpu.func @create_mem_desc({{.*}}) { +gpu.func @create_mem_desc() { + //CHECK: [[alloc:%.+]] = memref.alloca() {alignment = 1024 : i64} : memref<2048xi8, 3> + //CHECK: [[mdesc:%.+]] = xegpu.create_mem_desc [[alloc]] : memref<2048xi8, 3> -> !xegpu.mem_desc<16x64xf16> + %m = memref.alloca() {alignment = 1024} : memref<2048xi8, 3> + %mem_desc = xegpu.create_mem_desc %m : memref<2048xi8, 3> -> !xegpu.mem_desc<16x64xf16> + gpu.return +} + +// CHECK-LABEL: gpu.func @create_mem_desc_with_stride({{.*}}) { +gpu.func @create_mem_desc_with_stride() { + //CHECK: [[alloc:%.+]] = memref.alloca() {alignment = 1024 : i64} : memref<2048xi8, 3> + //CHECK: [[mdesc:%.+]] = xegpu.create_mem_desc [[alloc]] : memref<2048xi8, 3> -> !xegpu.mem_desc<16x64xf16, #xegpu.mem_layout> + %m = memref.alloca() {alignment = 1024} : memref<2048xi8, 3> + %mem_desc = xegpu.create_mem_desc %m : memref<2048xi8, 3> -> !xegpu.mem_desc<16x64xf16, #xegpu.mem_layout> + gpu.return +} + +// CHECK: gpu.func @load_mem_desc([[ARG0:%.+]]: !xegpu.mem_desc<16x64xf16>) +gpu.func @load_mem_desc(%arg0: !xegpu.mem_desc<16x64xf16>) { + // CHECK: xegpu.load_matrix [[ARG0]][8, 8] : !xegpu.mem_desc<16x64xf16> -> vector<8x16xf16> + %data = xegpu.load_matrix %arg0[8, 8]: !xegpu.mem_desc<16x64xf16> -> vector<8x16xf16> + gpu.return +} + +// CHECK: gpu.func @load_mem_desc_with_stride(%arg0: !xegpu.mem_desc<16x64xf16, #xegpu.mem_layout>) +gpu.func @load_mem_desc_with_stride(%arg0: !xegpu.mem_desc<16x64xf16, #xegpu.mem_layout>) { + // CHECK: xegpu.load_matrix [[ARG0]][8, 8] : !xegpu.mem_desc<16x64xf16, #xegpu.mem_layout> -> vector<8x16xf16> + %data = xegpu.load_matrix %arg0[8, 8]: !xegpu.mem_desc<16x64xf16, #xegpu.mem_layout> -> vector<8x16xf16> + gpu.return +} + + +// CHECK: gpu.func @store_mem_desc([[ARG0:%.+]]: !xegpu.mem_desc<16x64xf16>, [[ARG1:%.+]]: vector<16x16xf16>) +gpu.func @store_mem_desc(%arg0: !xegpu.mem_desc<16x64xf16>, %arg1: vector<16x16xf16>) { + // CHECK: xegpu.store_matrix [[ARG1]], [[ARG0]][8, 8] : vector<16x16xf16>, !xegpu.mem_desc<16x64xf16> + xegpu.store_matrix %arg1, %arg0[8, 8]: vector<16x16xf16>, !xegpu.mem_desc<16x64xf16> + gpu.return +} + +// CHECK: gpu.func @store_mem_desc_with_stride([[ARG0:%.+]]: !xegpu.mem_desc<16x64xf16, #xegpu.mem_layout>, [[ARG1:%.+]]: vector<16x16xf16>) +gpu.func @store_mem_desc_with_stride(%arg0: !xegpu.mem_desc<16x64xf16, #xegpu.mem_layout>, %arg1: vector<16x16xf16>) { + // CHECK: xegpu.store_matrix [[ARG1]], [[ARG0]][0, 8] : vector<16x16xf16>, !xegpu.mem_desc<16x64xf16, #xegpu.mem_layout> + xegpu.store_matrix %arg1, %arg0[0, 8]: vector<16x16xf16>, !xegpu.mem_desc<16x64xf16, #xegpu.mem_layout> + gpu.return +} + +// CHECK: gpu.func @mem_desc_subview([[ARG0:%.+]]: !xegpu.mem_desc<16x64xf16>) +gpu.func @mem_desc_subview(%arg0: !xegpu.mem_desc<16x64xf16>) { + //CHECK: xegpu.mem_desc_subview [[ARG0]][8, 8] : !xegpu.mem_desc<16x64xf16> -> !xegpu.mem_desc<8x16xf16, #xegpu.mem_layout> + %data = xegpu.mem_desc_subview %arg0[8, 8]: !xegpu.mem_desc<16x64xf16> -> !xegpu.mem_desc<8x16xf16, #xegpu.mem_layout> + gpu.return +} + +// CHECK: gpu.func @mem_desc_subview_lower_rank([[ARG0:%.+]]: !xegpu.mem_desc<16x64xf16>) +gpu.func @mem_desc_subview_lower_rank(%arg0: !xegpu.mem_desc<16x64xf16>) { + //CHECK: xegpu.mem_desc_subview [[ARG0]][8, 8] : !xegpu.mem_desc<16x64xf16> -> !xegpu.mem_desc<16xf16, #xegpu.mem_layout> + %data = xegpu.mem_desc_subview %arg0[8, 8]: !xegpu.mem_desc<16x64xf16> -> !xegpu.mem_desc<16xf16, #xegpu.mem_layout> + gpu.return +} + +// CHECK: gpu.func @mem_desc_subview_with_stride([[ARG0:%.+]]: !xegpu.mem_desc<16x64xf16, #xegpu.mem_layout>) +gpu.func @mem_desc_subview_with_stride(%arg0: !xegpu.mem_desc<16x64xf16, #xegpu.mem_layout>) { + //CHECK: xegpu.mem_desc_subview [[ARG0]][8, 8] : !xegpu.mem_desc<16x64xf16, #xegpu.mem_layout> -> !xegpu.mem_desc<8x16xf16, #xegpu.mem_layout> + %data = xegpu.mem_desc_subview %arg0[8, 8]: !xegpu.mem_desc<16x64xf16, #xegpu.mem_layout> -> !xegpu.mem_desc<8x16xf16, #xegpu.mem_layout> + gpu.return +} + } From acdbb00af5d0b6469fceb8abb26634de2dbee985 Mon Sep 17 00:00:00 2001 From: Jean-Didier PAILLEUX Date: Sat, 16 Aug 2025 01:04:49 +0200 Subject: [PATCH 033/214] [flang] Adding support of -fcoarray flang and init PRIF (#151675) In relation to the approval and merge of the [PRIF](https://github.com/llvm/llvm-project/pull/76088) specification about multi-image features in Flang, here is a first PR to add support for the `-fcoarray` compilation flag and the initialization of the PRIF environment. Other PRs will follow for adding support of lowering to PRIF. --- clang/include/clang/Driver/Options.td | 10 ++++- clang/lib/Driver/ToolChains/Flang.cpp | 2 + .../flang/Optimizer/Builder/Runtime/Coarray.h | 41 +++++++++++++++++++ .../flang/Optimizer/Builder/Runtime/Main.h | 2 +- .../include/flang/Support/Fortran-features.h | 2 +- flang/lib/Frontend/CompilerInvocation.cpp | 11 +++++ flang/lib/Lower/Bridge.cpp | 4 +- flang/lib/Optimizer/Builder/CMakeLists.txt | 1 + .../lib/Optimizer/Builder/Runtime/Coarray.cpp | 29 +++++++++++++ flang/lib/Optimizer/Builder/Runtime/Main.cpp | 7 +++- flang/lib/Support/Fortran-features.cpp | 1 + flang/test/Lower/Coarray/coarray-init.f90 | 11 +++++ 12 files changed, 115 insertions(+), 6 deletions(-) create mode 100644 flang/include/flang/Optimizer/Builder/Runtime/Coarray.h create mode 100644 flang/lib/Optimizer/Builder/Runtime/Coarray.cpp create mode 100644 flang/test/Lower/Coarray/coarray-init.f90 diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td index 858f37c39210..6a2f4575459b 100644 --- a/clang/include/clang/Driver/Options.td +++ b/clang/include/clang/Driver/Options.td @@ -6987,7 +6987,6 @@ def static_libgfortran : Flag<["-"], "static-libgfortran">, Group, Group; def finit_character_EQ : Joined<["-"], "finit-character=">, Group; @@ -8695,6 +8694,15 @@ def fopenmp_host_ir_file_path : Separate<["-"], "fopenmp-host-ir-file-path">, } // let Visibility = [CC1Option, FC1Option] +//===----------------------------------------------------------------------===// +// Coarray Options +//===----------------------------------------------------------------------===// + +def fcoarray : Flag<["-"], "fcoarray">, + Group, + Visibility<[FlangOption, FC1Option]>, + HelpText<"Enable Coarray features">; + //===----------------------------------------------------------------------===// // SYCL Options //===----------------------------------------------------------------------===// diff --git a/clang/lib/Driver/ToolChains/Flang.cpp b/clang/lib/Driver/ToolChains/Flang.cpp index 547e3156f519..65391033c2b9 100644 --- a/clang/lib/Driver/ToolChains/Flang.cpp +++ b/clang/lib/Driver/ToolChains/Flang.cpp @@ -178,6 +178,8 @@ void Flang::addCodegenOptions(const ArgList &Args, options::OPT_fstack_repack_arrays, options::OPT_fno_stack_repack_arrays, options::OPT_ftime_report, options::OPT_ftime_report_EQ, options::OPT_funroll_loops, options::OPT_fno_unroll_loops}); + if (Args.hasArg(clang::driver::options::OPT_fcoarray)) + CmdArgs.push_back("-fcoarray"); } void Flang::addPicOptions(const ArgList &Args, ArgStringList &CmdArgs) const { diff --git a/flang/include/flang/Optimizer/Builder/Runtime/Coarray.h b/flang/include/flang/Optimizer/Builder/Runtime/Coarray.h new file mode 100644 index 000000000000..f2c76c9e8d97 --- /dev/null +++ b/flang/include/flang/Optimizer/Builder/Runtime/Coarray.h @@ -0,0 +1,41 @@ +//===-- Coarray.h -- generate Coarray intrinsics runtime calls --*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef FORTRAN_OPTIMIZER_BUILDER_RUNTIME_COARRAY_H +#define FORTRAN_OPTIMIZER_BUILDER_RUNTIME_COARRAY_H + +#include "flang/Lower/AbstractConverter.h" +#include "flang/Optimizer/Support/InternalNames.h" +#include "mlir/Dialect/Func/IR/FuncOps.h" + +namespace fir { +class ExtendedValue; +class FirOpBuilder; +} // namespace fir + +namespace fir::runtime { + +// Get the function type for a prif subroutine with a variable number of +// arguments +#define PRIF_FUNCTYPE(...) \ + mlir::FunctionType::get(builder.getContext(), /*inputs*/ {__VA_ARGS__}, \ + /*result*/ {}) + +// Default prefix for subroutines of PRIF compiled with LLVM +#define PRIFNAME_SUB(fmt) \ + []() { \ + std::ostringstream oss; \ + oss << "prif_" << fmt; \ + return fir::NameUniquer::doProcedure({"prif"}, {}, oss.str()); \ + }() + +/// Generate Call to runtime prif_init +mlir::Value genInitCoarray(fir::FirOpBuilder &builder, mlir::Location loc); + +} // namespace fir::runtime +#endif // FORTRAN_OPTIMIZER_BUILDER_RUNTIME_COARRAY_H diff --git a/flang/include/flang/Optimizer/Builder/Runtime/Main.h b/flang/include/flang/Optimizer/Builder/Runtime/Main.h index a0586deade42..d4067b367f73 100644 --- a/flang/include/flang/Optimizer/Builder/Runtime/Main.h +++ b/flang/include/flang/Optimizer/Builder/Runtime/Main.h @@ -25,7 +25,7 @@ namespace fir::runtime { void genMain(fir::FirOpBuilder &builder, mlir::Location loc, const std::vector &defs, - bool initCuda = false); + bool initCuda = false, bool initCoarrayEnv = false); } #endif // FORTRAN_OPTIMIZER_BUILDER_RUNTIME_MAIN_H diff --git a/flang/include/flang/Support/Fortran-features.h b/flang/include/flang/Support/Fortran-features.h index bd3ff4a70ef0..83a75b0efcb5 100644 --- a/flang/include/flang/Support/Fortran-features.h +++ b/flang/include/flang/Support/Fortran-features.h @@ -56,7 +56,7 @@ ENUM_CLASS(LanguageFeature, BackslashEscapes, OldDebugLines, IgnoreIrrelevantAttributes, Unsigned, AmbiguousStructureConstructor, ContiguousOkForSeqAssociation, ForwardRefExplicitTypeDummy, InaccessibleDeferredOverride, CudaWarpMatchFunction, DoConcurrentOffload, - TransferBOZ) + TransferBOZ, Coarray) // Portability and suspicious usage warnings ENUM_CLASS(UsageWarning, Portability, PointerToUndefinable, diff --git a/flang/lib/Frontend/CompilerInvocation.cpp b/flang/lib/Frontend/CompilerInvocation.cpp index 3811a87aaf46..265ba8e031a6 100644 --- a/flang/lib/Frontend/CompilerInvocation.cpp +++ b/flang/lib/Frontend/CompilerInvocation.cpp @@ -1152,6 +1152,17 @@ static bool parseDialectArgs(CompilerInvocation &res, llvm::opt::ArgList &args, diags.Report(diagID); } } + // -fcoarray + if (args.hasArg(clang::driver::options::OPT_fcoarray)) { + res.getFrontendOpts().features.Enable( + Fortran::common::LanguageFeature::Coarray); + const unsigned diagID = + diags.getCustomDiagID(clang::DiagnosticsEngine::Warning, + "Support for multi image Fortran features is " + "still experimental and in development."); + diags.Report(diagID); + } + return diags.getNumErrors() == numErrorsBefore; } diff --git a/flang/lib/Lower/Bridge.cpp b/flang/lib/Lower/Bridge.cpp index 43bdbdb4644e..ab7bf28a9e8b 100644 --- a/flang/lib/Lower/Bridge.cpp +++ b/flang/lib/Lower/Bridge.cpp @@ -475,7 +475,9 @@ public: fir::runtime::genMain(*builder, toLocation(), bridge.getEnvironmentDefaults(), getFoldingContext().languageFeatures().IsEnabled( - Fortran::common::LanguageFeature::CUDA)); + Fortran::common::LanguageFeature::CUDA), + getFoldingContext().languageFeatures().IsEnabled( + Fortran::common::LanguageFeature::Coarray)); }); finalizeOpenMPLowering(globalOmpRequiresSymbol); diff --git a/flang/lib/Optimizer/Builder/CMakeLists.txt b/flang/lib/Optimizer/Builder/CMakeLists.txt index 31ae395805fa..8fb36a750d43 100644 --- a/flang/lib/Optimizer/Builder/CMakeLists.txt +++ b/flang/lib/Optimizer/Builder/CMakeLists.txt @@ -16,6 +16,7 @@ add_flang_library(FIRBuilder Runtime/Allocatable.cpp Runtime/ArrayConstructor.cpp Runtime/Assign.cpp + Runtime/Coarray.cpp Runtime/Character.cpp Runtime/Command.cpp Runtime/CUDA/Descriptor.cpp diff --git a/flang/lib/Optimizer/Builder/Runtime/Coarray.cpp b/flang/lib/Optimizer/Builder/Runtime/Coarray.cpp new file mode 100644 index 000000000000..eaff6c37ecdb --- /dev/null +++ b/flang/lib/Optimizer/Builder/Runtime/Coarray.cpp @@ -0,0 +1,29 @@ +//===-- Coarray.cpp -- runtime API for coarray intrinsics -----------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "flang/Optimizer/Builder/Runtime/Coarray.h" +#include "flang/Optimizer/Builder/FIRBuilder.h" +#include "flang/Optimizer/Builder/Runtime/RTBuilder.h" +#include "mlir/Dialect/Func/IR/FuncOps.h" + +using namespace Fortran::runtime; +using namespace Fortran::semantics; + +/// Generate Call to runtime prif_init +mlir::Value fir::runtime::genInitCoarray(fir::FirOpBuilder &builder, + mlir::Location loc) { + mlir::Type i32Ty = builder.getI32Type(); + mlir::Value result = builder.createTemporary(loc, i32Ty); + mlir::FunctionType ftype = PRIF_FUNCTYPE(builder.getRefType(i32Ty)); + mlir::func::FuncOp funcOp = + builder.createFunction(loc, PRIFNAME_SUB("init"), ftype); + llvm::SmallVector args = + fir::runtime::createArguments(builder, loc, ftype, result); + builder.create(loc, funcOp, args); + return builder.create(loc, result); +} diff --git a/flang/lib/Optimizer/Builder/Runtime/Main.cpp b/flang/lib/Optimizer/Builder/Runtime/Main.cpp index d35f687167b0..d303e0ad6384 100644 --- a/flang/lib/Optimizer/Builder/Runtime/Main.cpp +++ b/flang/lib/Optimizer/Builder/Runtime/Main.cpp @@ -10,6 +10,7 @@ #include "flang/Lower/EnvironmentDefault.h" #include "flang/Optimizer/Builder/BoxValue.h" #include "flang/Optimizer/Builder/FIRBuilder.h" +#include "flang/Optimizer/Builder/Runtime/Coarray.h" #include "flang/Optimizer/Builder/Runtime/EnvironmentDefaults.h" #include "flang/Optimizer/Builder/Runtime/RTBuilder.h" #include "flang/Optimizer/Dialect/FIROps.h" @@ -23,8 +24,8 @@ using namespace Fortran::runtime; /// Create a `int main(...)` that calls the Fortran entry point void fir::runtime::genMain( fir::FirOpBuilder &builder, mlir::Location loc, - const std::vector &defs, - bool initCuda) { + const std::vector &defs, bool initCuda, + bool initCoarrayEnv) { auto *context = builder.getContext(); auto argcTy = builder.getDefaultIntegerType(); auto ptrTy = mlir::LLVM::LLVMPointerType::get(context); @@ -69,6 +70,8 @@ void fir::runtime::genMain( loc, RTNAME_STRING(CUFInit), mlir::FunctionType::get(context, {}, {})); fir::CallOp::create(builder, loc, initFn); } + if (initCoarrayEnv) + fir::runtime::genInitCoarray(builder, loc); fir::CallOp::create(builder, loc, qqMainFn); fir::CallOp::create(builder, loc, stopFn); diff --git a/flang/lib/Support/Fortran-features.cpp b/flang/lib/Support/Fortran-features.cpp index 6a61149e9700..4a6fb8d75a13 100644 --- a/flang/lib/Support/Fortran-features.cpp +++ b/flang/lib/Support/Fortran-features.cpp @@ -90,6 +90,7 @@ LanguageFeatureControl::LanguageFeatureControl() { disable_.set(LanguageFeature::OldStyleParameter); // Possibly an accidental "feature" of nvfortran. disable_.set(LanguageFeature::AssumedRankPassedToNonAssumedRank); + disable_.set(LanguageFeature::Coarray); // These warnings are enabled by default, but only because they used // to be unconditional. TODO: prune this list warnLanguage_.set(LanguageFeature::ExponentMatchingKindParam); diff --git a/flang/test/Lower/Coarray/coarray-init.f90 b/flang/test/Lower/Coarray/coarray-init.f90 new file mode 100644 index 000000000000..055bc0fc4da7 --- /dev/null +++ b/flang/test/Lower/Coarray/coarray-init.f90 @@ -0,0 +1,11 @@ +! RUN: %flang_fc1 -emit-hlfir -fcoarray %s -o - | FileCheck %s --check-prefixes=ALL,COARRAY +! RUN: %flang_fc1 -emit-hlfir %s -o - | FileCheck %s --check-prefixes=ALL,NOCOARRAY + +program test_init + +end + +! ALL-LABEL: func.func @main +! ALL: fir.call @_FortranAProgramStart +! COARRAY: fir.call @_QMprifPprif_init(%[[ARG:.*]]) fastmath : (!fir.ref) -> () +! NOCOARRAY-NOT: fir.call @_QMprifPprif_init(%[[ARG:.*]]) fastmath : (!fir.ref) -> () From 4f34c740ab7313085d49a10c90128b6a47bc4eee Mon Sep 17 00:00:00 2001 From: Stanislav Mekhanoshin Date: Fri, 15 Aug 2025 16:08:13 -0700 Subject: [PATCH 034/214] [AMDGPU] w/a for s_setreg_b32 gfx1250 hazard with MODE register (#153879) --- .../lib/Target/AMDGPU/GCNHazardRecognizer.cpp | 12 +++++ llvm/lib/Target/AMDGPU/GCNHazardRecognizer.h | 1 + llvm/lib/Target/AMDGPU/GCNSubtarget.h | 4 ++ llvm/test/CodeGen/AMDGPU/hazards-gfx1250.mir | 54 +++++++++++++++++++ 4 files changed, 71 insertions(+) diff --git a/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp b/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp index dd7c1914d344..c1cca063aac6 100644 --- a/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp +++ b/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp @@ -1206,6 +1206,8 @@ void GCNHazardRecognizer::fixHazards(MachineInstr *MI) { fixDsAtomicAsyncBarrierArriveB64(MI); if (ST.hasScratchBaseForwardingHazard()) fixScratchBaseForwardingHazard(MI); + if (ST.setRegModeNeedsVNOPs()) + fixSetRegMode(MI); } static bool isVCmpXWritesExec(const SIInstrInfo &TII, const SIRegisterInfo &TRI, @@ -3546,3 +3548,13 @@ bool GCNHazardRecognizer::fixScratchBaseForwardingHazard(MachineInstr *MI) { AMDGPU::DepCtr::encodeFieldSaSdst(0), 0)); return true; } + +bool GCNHazardRecognizer::fixSetRegMode(MachineInstr *MI) { + if (!isSSetReg(MI->getOpcode()) || + MI->getOperand(1).getImm() != AMDGPU::Hwreg::ID_MODE) + return false; + + BuildMI(*MI->getParent(), MI, MI->getDebugLoc(), TII.get(AMDGPU::V_NOP_e32)); + BuildMI(*MI->getParent(), MI, MI->getDebugLoc(), TII.get(AMDGPU::V_NOP_e32)); + return true; +} diff --git a/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.h b/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.h index e0982b46424b..67beffadc091 100644 --- a/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.h +++ b/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.h @@ -113,6 +113,7 @@ private: bool fixGetRegWaitIdle(MachineInstr *MI); bool fixDsAtomicAsyncBarrierArriveB64(MachineInstr *MI); bool fixScratchBaseForwardingHazard(MachineInstr *MI); + bool fixSetRegMode(MachineInstr *MI); int checkMAIHazards(MachineInstr *MI); int checkMAIHazards908(MachineInstr *MI); diff --git a/llvm/lib/Target/AMDGPU/GCNSubtarget.h b/llvm/lib/Target/AMDGPU/GCNSubtarget.h index 404a476a3076..2a8385df3f93 100644 --- a/llvm/lib/Target/AMDGPU/GCNSubtarget.h +++ b/llvm/lib/Target/AMDGPU/GCNSubtarget.h @@ -1345,6 +1345,10 @@ public: bool hasVALUReadSGPRHazard() const { return GFX12Insts && !GFX1250Insts; } + bool setRegModeNeedsVNOPs() const { + return GFX1250Insts && getGeneration() == GFX12; + } + /// Return if operations acting on VGPR tuples require even alignment. bool needsAlignedVGPRs() const { return GFX90AInsts || GFX1250Insts; } diff --git a/llvm/test/CodeGen/AMDGPU/hazards-gfx1250.mir b/llvm/test/CodeGen/AMDGPU/hazards-gfx1250.mir index f4596b0832d9..170478539d8a 100644 --- a/llvm/test/CodeGen/AMDGPU/hazards-gfx1250.mir +++ b/llvm/test/CodeGen/AMDGPU/hazards-gfx1250.mir @@ -493,3 +493,57 @@ body: | liveins: $vgpr0 $vgpr0 = V_ADD_U32_e32 $src_flat_scratch_base_lo, $vgpr0, implicit $exec ... + +--- +name: s_setreg_b32_hwreg_mode +tracksRegLiveness: true +body: | + bb.0: + liveins: $sgpr0 + ; GCN-LABEL: name: s_setreg_b32_hwreg_mode + ; GCN: liveins: $sgpr0 + ; GCN-NEXT: {{ $}} + ; GCN-NEXT: V_NOP_e32 implicit $exec + ; GCN-NEXT: V_NOP_e32 implicit $exec + ; GCN-NEXT: S_SETREG_B32 $sgpr0, 1, implicit-def $mode, implicit $mode + S_SETREG_B32 $sgpr0, 1, implicit-def $mode, implicit $mode +... + +--- +name: s_setreg_b32_mode +tracksRegLiveness: true +body: | + bb.0: + liveins: $sgpr0 + ; GCN-LABEL: name: s_setreg_b32_mode + ; GCN: liveins: $sgpr0 + ; GCN-NEXT: {{ $}} + ; GCN-NEXT: V_NOP_e32 implicit $exec + ; GCN-NEXT: V_NOP_e32 implicit $exec + ; GCN-NEXT: S_SETREG_B32_mode $sgpr0, 1, implicit-def $mode, implicit $mode + S_SETREG_B32_mode $sgpr0, 1, implicit-def $mode, implicit $mode +... + +--- +name: s_setreg_imm32_b32_hwreg_mode +tracksRegLiveness: true +body: | + bb.0: + ; GCN-LABEL: name: s_setreg_imm32_b32_hwreg_mode + ; GCN: V_NOP_e32 implicit $exec + ; GCN-NEXT: V_NOP_e32 implicit $exec + ; GCN-NEXT: S_SETREG_IMM32_B32 1, 1, implicit-def $mode, implicit $mode + S_SETREG_IMM32_B32 1, 1, implicit-def $mode, implicit $mode +... + +--- +name: s_setreg_imm32_b32_mode +tracksRegLiveness: true +body: | + bb.0: + ; GCN-LABEL: name: s_setreg_imm32_b32_mode + ; GCN: V_NOP_e32 implicit $exec + ; GCN-NEXT: V_NOP_e32 implicit $exec + ; GCN-NEXT: S_SETREG_IMM32_B32_mode 1, 1, implicit-def $mode, implicit $mode + S_SETREG_IMM32_B32_mode 1, 1, implicit-def $mode, implicit $mode +... From be0135538a934f108a6fb70f93ec587be3016033 Mon Sep 17 00:00:00 2001 From: Chris B Date: Fri, 15 Aug 2025 18:10:49 -0500 Subject: [PATCH 035/214] [DirectX][objdump] Add support for printing signatures (#153320) This adds support for printing the signature sections as part of the `-p` flag for printing private headers. The formatting aims to roughly match the formatting used by DXC's `/dumpbin` flag. The original version's printed output left some trailing whitespace on lines, which caused the tests to fail with the strict whitespace matching. Re-lands #152531. Resolves #152380. --- llvm/include/llvm/Object/DXContainer.h | 2 + .../DXContainer/input-output-signatures.yaml | 167 ++++++++++++++++++ llvm/tools/llvm-objdump/DXContainerDump.cpp | 140 ++++++++++++++- 3 files changed, 307 insertions(+), 2 deletions(-) create mode 100644 llvm/test/tools/llvm-objdump/DXContainer/input-output-signatures.yaml diff --git a/llvm/include/llvm/Object/DXContainer.h b/llvm/include/llvm/Object/DXContainer.h index ad1b2361ff06..93d39dabae4b 100644 --- a/llvm/include/llvm/Object/DXContainer.h +++ b/llvm/include/llvm/Object/DXContainer.h @@ -603,6 +603,8 @@ private: } public: + const DXContainer &getDXContainer() const { return Container; } + static bool classof(const Binary *v) { return v->isDXContainer(); } Expected getSymbolName(DataRefImpl) const override; diff --git a/llvm/test/tools/llvm-objdump/DXContainer/input-output-signatures.yaml b/llvm/test/tools/llvm-objdump/DXContainer/input-output-signatures.yaml new file mode 100644 index 000000000000..ad979d2dcb7e --- /dev/null +++ b/llvm/test/tools/llvm-objdump/DXContainer/input-output-signatures.yaml @@ -0,0 +1,167 @@ +# RUN: yaml2obj %s -o %t +# RUN: llvm-objdump -p %t | FileCheck %s --match-full-lines --strict-whitespace + +## This test covers llvm-objdump printing private headers for the ISG1, OSG1, +## and PSG1 "parts" of the DX container file format. The test uses a few +## absurdly large values and long string names to ensure that the columns in the +## printed table widen correctly. + +--- !dxcontainer +Header: + Hash: [ 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, + 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 ] + Version: + Major: 1 + Minor: 0 + FileSize: 630 + PartCount: 3 + PartOffsets: [ 64, 124, 184 ] +Parts: + - Name: ISG1 + Size: 52 + Signature: + Parameters: + - Stream: 0 + Name: AAA_HSFoo + Index: 4391238 # This value forces the index column to widen + SystemValue: Undefined + CompType: Float32 + Register: 0 + Mask: 7 + ExclusiveMask: 2 + MinPrecision: Default + - Name: OSG1 + Size: 52 + Signature: + Parameters: + - Stream: 0 + Name: SV_Position + Index: 0 + SystemValue: Position + CompType: Float32 + Register: 2147483647 # This value forces the register column to widen + Mask: 15 + ExclusiveMask: 0 + MinPrecision: Default + - Name: PSG1 + Size: 402 + Signature: + Parameters: + - Stream: 0 + Name: SV_TessFactor + Index: 0 + SystemValue: FinalQuadEdgeTessfactor # The tessfactor forces the SysVal column to widen + CompType: Float32 + Register: 0 + Mask: 8 + ExclusiveMask: 8 + MinPrecision: Default + - Stream: 0 + Name: BBB + Index: 0 + SystemValue: Undefined + CompType: Float32 + Register: 0 + Mask: 7 + ExclusiveMask: 0 + MinPrecision: Default + - Stream: 0 + Name: SV_TessFactor + Index: 1 + SystemValue: FinalQuadEdgeTessfactor + CompType: Float32 + Register: 1 + Mask: 8 + ExclusiveMask: 8 + MinPrecision: Default + - Stream: 0 + Name: BBB + Index: 1 + SystemValue: Undefined + CompType: Float32 + Register: 1 + Mask: 7 + ExclusiveMask: 0 + MinPrecision: Default + - Stream: 0 + Name: SV_TessFactor + Index: 2 + SystemValue: FinalQuadEdgeTessfactor + CompType: Float32 + Register: 2 + Mask: 8 + ExclusiveMask: 8 + MinPrecision: Default + - Stream: 0 + Name: BBB + Index: 2 + SystemValue: Undefined + CompType: Float32 + Register: 2 + Mask: 7 + ExclusiveMask: 0 + MinPrecision: Default + - Stream: 0 + Name: SV_TessFactor + Index: 3 + SystemValue: FinalQuadEdgeTessfactor + CompType: Float32 + Register: 3 + Mask: 8 + ExclusiveMask: 8 + MinPrecision: Default + - Stream: 0 + Name: SV_InsideTessFactor + Index: 0 + SystemValue: FinalQuadInsideTessfactor + CompType: Float32 + Register: 4 + Mask: 8 + ExclusiveMask: 0 + MinPrecision: Default + - Stream: 0 + Name: SV_InsideTessFactor + Index: 1 + SystemValue: FinalQuadInsideTessfactor + CompType: Float32 + Register: 5 + Mask: 8 + ExclusiveMask: 0 + MinPrecision: Default + - Stream: 0 + Name: AVeryLongStringThatWillForceWidening # This value forces name column to widen + Index: 0 + SystemValue: Undefined + CompType: Float32 + Register: 6 + Mask: 15 + ExclusiveMask: 4 + MinPrecision: Default +... + +# CHECK:; Input signature: +# CHECK-NEXT:; +# CHECK-NEXT:; Name Index Mask Register SysValue Format Used +# CHECK-NEXT:; ------------------------ ------- ----- -------- ---------- ------- ----- +# CHECK-NEXT:; AAA_HSFoo 4391238 xyz 0 Undefined Float32 y + +# CHECK:; Output signature: +# CHECK-NEXT:; +# CHECK-NEXT:; Name Index Mask Register SysValue Format Used +# CHECK-NEXT:; ------------------------ ----- ----- ---------- ---------- ------- ----- +# CHECK-NEXT:; SV_Position 0 xyzw 2147483647 Position Float32 + +# CHECK:; Patch Constant signature: +# CHECK-NEXT:; +# CHECK-NEXT:; Name Index Mask Register SysValue Format Used +# CHECK-NEXT:; ------------------------------------ ----- ----- -------- ------------------------- ------- ----- +# CHECK-NEXT:; SV_TessFactor 0 w 0 FinalQuadEdgeTessfactor Float32 w +# CHECK-NEXT:; BBB 0 xyz 0 Undefined Float32 +# CHECK-NEXT:; SV_TessFactor 1 w 1 FinalQuadEdgeTessfactor Float32 w +# CHECK-NEXT:; BBB 1 xyz 1 Undefined Float32 +# CHECK-NEXT:; SV_TessFactor 2 w 2 FinalQuadEdgeTessfactor Float32 w +# CHECK-NEXT:; BBB 2 xyz 2 Undefined Float32 +# CHECK-NEXT:; SV_TessFactor 3 w 3 FinalQuadEdgeTessfactor Float32 w +# CHECK-NEXT:; SV_InsideTessFactor 0 w 4 FinalQuadInsideTessfactor Float32 +# CHECK-NEXT:; SV_InsideTessFactor 1 w 5 FinalQuadInsideTessfactor Float32 +# CHECK-NEXT:; AVeryLongStringThatWillForceWidening 0 xyzw 6 Undefined Float32 z diff --git a/llvm/tools/llvm-objdump/DXContainerDump.cpp b/llvm/tools/llvm-objdump/DXContainerDump.cpp index 2fb073473de5..52963e0f7d1b 100644 --- a/llvm/tools/llvm-objdump/DXContainerDump.cpp +++ b/llvm/tools/llvm-objdump/DXContainerDump.cpp @@ -12,16 +12,152 @@ //===----------------------------------------------------------------------===// #include "llvm-objdump.h" +#include "llvm/BinaryFormat/DXContainer.h" #include "llvm/Object/DXContainer.h" +#include "llvm/Support/ScopedPrinter.h" using namespace llvm; +using namespace llvm::object; + +static llvm::SmallString<4> maskToString(uint8_t Mask, + bool StripTrailing = false) { + llvm::SmallString<4> Result(" "); + if (Mask & 1) + Result[0] = 'x'; + if (Mask & 2) + Result[1] = 'y'; + if (Mask & 4) + Result[2] = 'z'; + if (Mask & 8) + Result[3] = 'w'; + if (!StripTrailing) + return Result; + int Size = 8 - countl_zero(Mask); + return Result.slice(0, Size); +} + +static void printColumnHeader(raw_ostream &OS, size_t Length) { + for (size_t I = 0; I < Length; ++I) + OS << "-"; +} + +static void printColumnHeaders(raw_ostream &OS, ArrayRef Lengths) { + // Generate the header in a temporary to avoid trailing whitespace. + SmallString<256> Str; + raw_svector_ostream Tmp(Str); + for (auto L : Lengths) { + printColumnHeader(Tmp, L); + Tmp << " "; + } + Str.back() = '\n'; + OS << Str; +} + +static size_t digitsForNumber(size_t N) { + if (N == 0) + return 1; + return static_cast(log10(static_cast(N))) + 1; +} namespace { class DXContainerDumper : public objdump::Dumper { + const DXContainerObjectFile &Obj; + public: - DXContainerDumper(const object::DXContainerObjectFile &Obj) - : objdump::Dumper(Obj) {} + DXContainerDumper(const DXContainerObjectFile &O) + : objdump::Dumper(O), Obj(O) {} + + void printPrivateHeaders() override; + void printSignature(const DirectX::Signature &S); }; + +void DXContainerDumper::printSignature(const DirectX::Signature &S) { + // DXC prints a table like this as part of the shader disassembly: + //; Name Index Mask Register SysValue Format Used + //; -------------------- ----- ------ -------- -------- ------- ------ + //; NORMAL 0 xyz 0 NONE float xyz + //; TEXCOORD 0 xy 1 NONE float xy + + // DXC's implementation doesn't scale columns entirely completely for the + // provided input, so this implementation is a bit more complicated in + // formatting logic to scale with the size of the printed text. + + // DXC gives names 21 characters for some unknown reason, I arbitrarily chose + // to start at 24 so that we're not going shorter but are using a round + // number. + size_t LongestName = 24; + size_t LongestSV = 10; + size_t LongestIndex = strlen("Index"); + size_t LongestRegister = strlen("Register"); + size_t LongestFormat = strlen("Format"); + const size_t MaskWidth = 5; + // Compute the column widths. Skip calculating the "Mask" and "Used" columns + // since they both have widths of 4. + for (auto El : S) { + LongestName = std::max(LongestName, S.getName(El.NameOffset).size()); + LongestSV = std::max( + LongestSV, + enumToStringRef(El.SystemValue, dxbc::getD3DSystemValues()).size()); + LongestIndex = std::max(LongestIndex, digitsForNumber(El.Index)); + LongestRegister = std::max(LongestRegister, digitsForNumber(El.Register)); + LongestFormat = std::max( + LongestFormat, + enumToStringRef(El.CompType, dxbc::getSigComponentTypes()).size()); + } + + // Print Column headers. + OS << "; "; + OS << left_justify("Name", LongestName) << " "; + OS << right_justify("Index", LongestIndex) << " "; + OS << right_justify("Mask", MaskWidth) << " "; + OS << right_justify("Register", LongestRegister) << " "; + OS << right_justify("SysValue", LongestSV) << " "; + OS << right_justify("Format", LongestFormat) << " "; + OS << right_justify("Used", MaskWidth) << "\n"; + OS << "; "; + printColumnHeaders(OS, {LongestName, LongestIndex, MaskWidth, LongestRegister, + LongestSV, LongestFormat, MaskWidth}); + + for (auto El : S) { + OS << "; " << left_justify(S.getName(El.NameOffset), LongestName) << " "; + OS << right_justify(std::to_string(El.Index), LongestIndex) << " "; + OS << right_justify(maskToString(El.Mask), MaskWidth) << " "; + OS << right_justify(std::to_string(El.Register), LongestRegister) << " "; + OS << right_justify( + enumToStringRef(El.SystemValue, dxbc::getD3DSystemValues()), + LongestSV) + << " "; + OS << right_justify( + enumToStringRef(El.CompType, dxbc::getSigComponentTypes()), + LongestFormat); + if (El.ExclusiveMask) + OS << " " << maskToString(El.ExclusiveMask, true); + OS << "\n"; + } +} + +void DXContainerDumper::printPrivateHeaders() { + const DXContainer &C = + cast(Obj).getDXContainer(); + + if (!C.getInputSignature().isEmpty()) { + OS << "; Input signature:\n;\n"; + printSignature(C.getInputSignature()); + OS << ";\n"; + } + + if (!C.getOutputSignature().isEmpty()) { + OS << "; Output signature:\n;\n"; + printSignature(C.getOutputSignature()); + OS << ";\n"; + } + + if (!C.getPatchConstantSignature().isEmpty()) { + OS << "; Patch Constant signature:\n;\n"; + printSignature(C.getPatchConstantSignature()); + OS << ";\n"; + } +} } // namespace std::unique_ptr llvm::objdump::createDXContainerDumper( From 568c23bbd3303518c5056d7f03444dae4fdc8a9c Mon Sep 17 00:00:00 2001 From: Peter Collingbourne Date: Fri, 15 Aug 2025 16:13:23 -0700 Subject: [PATCH 036/214] Frontend: Define __SANITIZE_*__ macros for certain sanitizers. Per discussion with @ojhunt and @AaronBallman we are moving towards predefined macros and away from __has_feature and __has_extension for detecting sanitizers and other similar features. The rationale is that __has_feature is only really meant for standardized features (see the comment at the top of clang/include/clang/Basic/Features.def), and __has_extension has the issues discovered as part of #153104. Let's start by defining macros for ASan, HWASan and TSan, consistently with gcc. Reviewers: vitalybuka, ojhunt, AaronBallman, fmayer Reviewed By: fmayer, vitalybuka Pull Request: https://github.com/llvm/llvm-project/pull/153888 --- clang/lib/Frontend/InitPreprocessor.cpp | 7 +++++++ clang/test/Preprocessor/sanitizer-predefines.c | 8 ++++++++ 2 files changed, 15 insertions(+) create mode 100644 clang/test/Preprocessor/sanitizer-predefines.c diff --git a/clang/lib/Frontend/InitPreprocessor.cpp b/clang/lib/Frontend/InitPreprocessor.cpp index 008a35d5265e..5980806fba5e 100644 --- a/clang/lib/Frontend/InitPreprocessor.cpp +++ b/clang/lib/Frontend/InitPreprocessor.cpp @@ -1519,6 +1519,13 @@ static void InitializePredefinedMacros(const TargetInfo &TI, if (TI.getTriple().isOSBinFormatELF()) Builder.defineMacro("__ELF__"); + if (LangOpts.Sanitize.has(SanitizerKind::Address)) + Builder.defineMacro("__SANITIZE_ADDRESS__"); + if (LangOpts.Sanitize.has(SanitizerKind::HWAddress)) + Builder.defineMacro("__SANITIZE_HWADDRESS__"); + if (LangOpts.Sanitize.has(SanitizerKind::Thread)) + Builder.defineMacro("__SANITIZE_THREAD__"); + // Target OS macro definitions. if (PPOpts.DefineTargetOSMacros) { const llvm::Triple &Triple = TI.getTriple(); diff --git a/clang/test/Preprocessor/sanitizer-predefines.c b/clang/test/Preprocessor/sanitizer-predefines.c new file mode 100644 index 000000000000..9d2f6bf2517a --- /dev/null +++ b/clang/test/Preprocessor/sanitizer-predefines.c @@ -0,0 +1,8 @@ +// RUN: %clang_cc1 -E -dM -triple aarch64-unknown-linux -fsanitize=address %s | FileCheck %s --check-prefix=ASAN +// ASAN: #define __SANITIZE_ADDRESS__ 1 + +// RUN: %clang_cc1 -E -dM -triple aarch64-unknown-linux -fsanitize=hwaddress %s | FileCheck %s --check-prefix=HWASAN +// HWASAN: #define __SANITIZE_HWADDRESS__ 1 + +// RUN: %clang_cc1 -E -dM -triple aarch64-unknown-linux -fsanitize=thread %s | FileCheck %s --check-prefix=TSAN +// TSAN: #define __SANITIZE_THREAD__ 1 From 4485a3f968eab26e08b1fdb4be9285305c716db3 Mon Sep 17 00:00:00 2001 From: Peter Collingbourne Date: Fri, 15 Aug 2025 16:29:33 -0700 Subject: [PATCH 037/214] Switch dtls_test.c from XFAIL to UNSUPPORTED on aarch64. It passes on some buildbots, so we can't expect failure. --- compiler-rt/test/msan/dtls_test.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/compiler-rt/test/msan/dtls_test.c b/compiler-rt/test/msan/dtls_test.c index 0e49ac9feb9f..6daaab0ae0b8 100644 --- a/compiler-rt/test/msan/dtls_test.c +++ b/compiler-rt/test/msan/dtls_test.c @@ -11,7 +11,7 @@ // Reports use-of-uninitialized-value, not analyzed XFAIL: target={{.*netbsd.*}} - XFAIL: aarch64-target-arch + UNSUPPORTED: aarch64-target-arch */ From 2b75ff192d03033c136f7641bb93ea52fec54c14 Mon Sep 17 00:00:00 2001 From: Thurston Dang Date: Fri, 15 Aug 2025 16:35:42 -0700 Subject: [PATCH 038/214] [msan] Reland with even more improvement: Improve packed multiply-add instrumentation (#153353) This reverts commit cf002847a464c004a57ca4777251b1aafc33d958 i.e., relands ba603b5e4d44f1a25207a2a00196471d2ba93424. It was reverted because it was subtly wrong: multiplying an uninitialized zero should not result in an initialized zero. This reland fixes the issue by using instrumentation analogous to visitAnd (bitwise AND of an initialized zero and an uninitialized value results in an initialized value). Additionally, this reland expands a test case; fixes the commit message; and optimizes the change to avoid the need for horizontalReduce. The current instrumentation has false positives: it does not take into account that multiplying an initialized zero value with an uninitialized value results in an initialized zero value This change fixes the issue during the multiplication step. The horizontal add step is modeled using bitwise OR. Future work can apply this improved handler to the AVX512 equivalent intrinsics (x86_avx512_pmaddw_d_512, x86_avx512_pmaddubs_w_512.) and AVX VNNI intrinsics. --- compiler-rt/lib/msan/tests/msan_test.cpp | 27 +++- .../Instrumentation/MemorySanitizer.cpp | 137 +++++++++++++++--- .../X86/avx2-intrinsics-x86.ll | 57 ++++++-- .../MemorySanitizer/X86/mmx-intrinsics.ll | 66 ++++++--- .../X86/sse2-intrinsics-x86.ll | 19 ++- .../i386/avx2-intrinsics-i386.ll | 59 ++++++-- .../MemorySanitizer/i386/mmx-intrinsics.ll | 66 ++++++--- .../i386/sse2-intrinsics-i386.ll | 19 ++- .../MemorySanitizer/vector_arith.ll | 43 ++++-- 9 files changed, 384 insertions(+), 109 deletions(-) diff --git a/compiler-rt/lib/msan/tests/msan_test.cpp b/compiler-rt/lib/msan/tests/msan_test.cpp index d1c481483dfa..b0d8409d97ff 100644 --- a/compiler-rt/lib/msan/tests/msan_test.cpp +++ b/compiler-rt/lib/msan/tests/msan_test.cpp @@ -4271,14 +4271,39 @@ TEST(VectorSadTest, sse2_psad_bw) { } TEST(VectorMaddTest, mmx_pmadd_wd) { - V4x16 a = {Poisoned(), 1, 2, 3}; + V4x16 a = {Poisoned(0), 1, 2, 3}; V4x16 b = {100, 101, 102, 103}; V2x32 c = _mm_madd_pi16(a, b); + // Multiply step: + // {Poison * 100, 1 * 101, 2 * 102, 3 * 103} + // == {Poison, 1 * 101, 2 * 102, 3 * 103} + // Notice that for the poisoned value, we ignored the concrete zero value. + // + // Horizontal add step: + // {Poison + 1 * 101, 2 * 102 + 3 * 103} + // == {Poison, 2 * 102 + 3 * 103} EXPECT_POISONED(c[0]); EXPECT_NOT_POISONED(c[1]); EXPECT_EQ((unsigned)(2 * 102 + 3 * 103), c[1]); + + V4x16 d = {Poisoned(0), 1, 0, 3}; + V4x16 e = {100, 101, Poisoned(102), 103}; + V2x32 f = _mm_madd_pi16(d, e); + // Multiply step: + // {Poison * 100, 1 * 101, 0 * Poison, 3 * 103} + // == {Poison, 1 * 101, 0 , 3 * 103} + // Notice that 0 * Poison == 0. + // + // Horizontal add step: + // {Poison + 1 * 101, 0 + 3 * 103} + // == {Poison, 3 * 103} + + EXPECT_POISONED(f[0]); + EXPECT_NOT_POISONED(f[1]); + + EXPECT_EQ((unsigned)(3 * 103), f[1]); } TEST(VectorCmpTest, mm_cmpneq_ps) { diff --git a/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp b/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp index 21bd4164385a..3ecace5cfe6e 100644 --- a/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp +++ b/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp @@ -3641,9 +3641,10 @@ struct MemorySanitizerVisitor : public InstVisitor { setOriginForNaryOp(I); } - // Get an MMX-sized vector type. - Type *getMMXVectorTy(unsigned EltSizeInBits) { - const unsigned X86_MMXSizeInBits = 64; + // Get an MMX-sized (64-bit) vector type, or optionally, other sized + // vectors. + Type *getMMXVectorTy(unsigned EltSizeInBits, + unsigned X86_MMXSizeInBits = 64) { assert(EltSizeInBits != 0 && (X86_MMXSizeInBits % EltSizeInBits) == 0 && "Illegal MMX vector element size"); return FixedVectorType::get(IntegerType::get(*MS.C, EltSizeInBits), @@ -3843,20 +3844,109 @@ struct MemorySanitizerVisitor : public InstVisitor { setOriginForNaryOp(I); } - // Instrument multiply-add intrinsic. - void handleVectorPmaddIntrinsic(IntrinsicInst &I, - unsigned MMXEltSizeInBits = 0) { - Type *ResTy = - MMXEltSizeInBits ? getMMXVectorTy(MMXEltSizeInBits * 2) : I.getType(); + // Instrument multiply-add intrinsics. + // + // e.g., Two operands: + // <4 x i32> @llvm.x86.sse2.pmadd.wd(<8 x i16> %a, <8 x i16> %b) + // + // Two operands which require an EltSizeInBits override: + // <1 x i64> @llvm.x86.mmx.pmadd.wd(<1 x i64> %a, <1 x i64> %b) + // + // Three operands are not implemented yet: + // <4 x i32> @llvm.x86.avx512.vpdpbusd.128 + // (<4 x i32> %s, <4 x i32> %a, <4 x i32> %b) + // (the result of multiply-add'ing %a and %b is accumulated with %s) + void handleVectorPmaddIntrinsic(IntrinsicInst &I, unsigned ReductionFactor, + unsigned EltSizeInBits = 0) { IRBuilder<> IRB(&I); - auto *Shadow0 = getShadow(&I, 0); - auto *Shadow1 = getShadow(&I, 1); - Value *S = IRB.CreateOr(Shadow0, Shadow1); - S = IRB.CreateBitCast(S, ResTy); - S = IRB.CreateSExt(IRB.CreateICmpNE(S, Constant::getNullValue(ResTy)), - ResTy); - S = IRB.CreateBitCast(S, getShadowTy(&I)); - setShadow(&I, S); + + [[maybe_unused]] FixedVectorType *ReturnType = + cast(I.getType()); + assert(isa(ReturnType)); + + assert(I.arg_size() == 2); + + // Vectors A and B, and shadows + Value *Va = I.getOperand(0); + Value *Vb = I.getOperand(1); + + Value *Sa = getShadow(&I, 0); + Value *Sb = getShadow(&I, 1); + + FixedVectorType *ParamType = + cast(I.getArgOperand(0)->getType()); + assert(ParamType == I.getArgOperand(1)->getType()); + + assert(ParamType->getPrimitiveSizeInBits() == + ReturnType->getPrimitiveSizeInBits()); + + FixedVectorType *ImplicitReturnType = ReturnType; + // Step 1: instrument multiplication of corresponding vector elements + if (EltSizeInBits) { + ImplicitReturnType = cast(getMMXVectorTy( + EltSizeInBits * 2, ParamType->getPrimitiveSizeInBits())); + ParamType = cast( + getMMXVectorTy(EltSizeInBits, ParamType->getPrimitiveSizeInBits())); + + Va = IRB.CreateBitCast(Va, ParamType); + Vb = IRB.CreateBitCast(Vb, ParamType); + + Sa = IRB.CreateBitCast(Sa, getShadowTy(ParamType)); + Sb = IRB.CreateBitCast(Sb, getShadowTy(ParamType)); + } else { + assert(ParamType->getNumElements() == + ReturnType->getNumElements() * ReductionFactor); + } + + // Multiplying an *initialized* zero by an uninitialized element results in + // an initialized zero element. + // + // This is analogous to bitwise AND, where "AND" of 0 and a poisoned value + // results in an unpoisoned value. We can therefore adapt the visitAnd() + // instrumentation: + // OutShadow = (SaNonZero & SbNonZero) + // | (VaNonZero & SbNonZero) + // | (SaNonZero & VbNonZero) + // where non-zero is checked on a per-element basis (not per bit). + Value *SZero = Constant::getNullValue(Va->getType()); + Value *VZero = Constant::getNullValue(Sa->getType()); + Value *SaNonZero = IRB.CreateICmpNE(Sa, SZero); + Value *SbNonZero = IRB.CreateICmpNE(Sb, SZero); + Value *VaNonZero = IRB.CreateICmpNE(Va, VZero); + Value *VbNonZero = IRB.CreateICmpNE(Vb, VZero); + + Value *SaAndSbNonZero = IRB.CreateAnd(SaNonZero, SbNonZero); + Value *VaAndSbNonZero = IRB.CreateAnd(VaNonZero, SbNonZero); + Value *SaAndVbNonZero = IRB.CreateAnd(SaNonZero, VbNonZero); + + // Each element of the vector is represented by a single bit (poisoned or + // not) e.g., <8 x i1>. + Value *And = IRB.CreateOr({SaAndSbNonZero, VaAndSbNonZero, SaAndVbNonZero}); + + // Extend <8 x i1> to <8 x i16>. + // (The real pmadd intrinsic would have computed intermediate values of + // <8 x i32>, but that is irrelevant for our shadow purposes because we + // consider each element to be either fully initialized or fully + // uninitialized.) + And = IRB.CreateSExt(And, Sa->getType()); + + // Step 2: instrument horizontal add + // We don't need bit-precise horizontalReduce because we only want to check + // if each pair of elements is fully zero. + // Cast to <4 x i32>. + Value *Horizontal = IRB.CreateBitCast(And, ImplicitReturnType); + + // Compute <4 x i1>, then extend back to <4 x i32>. + Value *OutShadow = IRB.CreateSExt( + IRB.CreateICmpNE(Horizontal, + Constant::getNullValue(Horizontal->getType())), + ImplicitReturnType); + + // For MMX, cast it back to the required fake return type (<1 x i64>). + if (EltSizeInBits) + OutShadow = CreateShadowCast(IRB, OutShadow, getShadowTy(&I)); + + setShadow(&I, OutShadow); setOriginForNaryOp(I); } @@ -5391,19 +5481,28 @@ struct MemorySanitizerVisitor : public InstVisitor { handleVectorSadIntrinsic(I); break; + // Multiply and Add Packed Words + // < 4 x i32> @llvm.x86.sse2.pmadd.wd(<8 x i16>, <8 x i16>) + // < 8 x i32> @llvm.x86.avx2.pmadd.wd(<16 x i16>, <16 x i16>) + // + // Multiply and Add Packed Signed and Unsigned Bytes + // < 8 x i16> @llvm.x86.ssse3.pmadd.ub.sw.128(<16 x i8>, <16 x i8>) + // <16 x i16> @llvm.x86.avx2.pmadd.ub.sw(<32 x i8>, <32 x i8>) case Intrinsic::x86_sse2_pmadd_wd: case Intrinsic::x86_avx2_pmadd_wd: case Intrinsic::x86_ssse3_pmadd_ub_sw_128: case Intrinsic::x86_avx2_pmadd_ub_sw: - handleVectorPmaddIntrinsic(I); + handleVectorPmaddIntrinsic(I, /*ReductionFactor=*/2); break; + // <1 x i64> @llvm.x86.ssse3.pmadd.ub.sw(<1 x i64>, <1 x i64>) case Intrinsic::x86_ssse3_pmadd_ub_sw: - handleVectorPmaddIntrinsic(I, 8); + handleVectorPmaddIntrinsic(I, /*ReductionFactor=*/2, /*EltSize=*/8); break; + // <1 x i64> @llvm.x86.mmx.pmadd.wd(<1 x i64>, <1 x i64>) case Intrinsic::x86_mmx_pmadd_wd: - handleVectorPmaddIntrinsic(I, 16); + handleVectorPmaddIntrinsic(I, /*ReductionFactor=*/2, /*EltSize=*/16); break; case Intrinsic::x86_sse_cmp_ss: diff --git a/llvm/test/Instrumentation/MemorySanitizer/X86/avx2-intrinsics-x86.ll b/llvm/test/Instrumentation/MemorySanitizer/X86/avx2-intrinsics-x86.ll index f916130fe53e..cc07958bd9f2 100644 --- a/llvm/test/Instrumentation/MemorySanitizer/X86/avx2-intrinsics-x86.ll +++ b/llvm/test/Instrumentation/MemorySanitizer/X86/avx2-intrinsics-x86.ll @@ -140,11 +140,20 @@ define <8 x i32> @test_x86_avx2_pmadd_wd(<16 x i16> %a0, <16 x i16> %a1) #0 { ; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i16>, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8 ; CHECK-NEXT: call void @llvm.donothing() -; CHECK-NEXT: [[TMP3:%.*]] = or <16 x i16> [[TMP1]], [[TMP2]] -; CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i16> [[TMP3]] to <8 x i32> -; CHECK-NEXT: [[TMP5:%.*]] = icmp ne <8 x i32> [[TMP4]], zeroinitializer -; CHECK-NEXT: [[TMP6:%.*]] = sext <8 x i1> [[TMP5]] to <8 x i32> -; CHECK-NEXT: [[RES:%.*]] = call <8 x i32> @llvm.x86.avx2.pmadd.wd(<16 x i16> [[A0:%.*]], <16 x i16> [[A1:%.*]]) +; CHECK-NEXT: [[TMP4:%.*]] = icmp ne <16 x i16> [[TMP1]], zeroinitializer +; CHECK-NEXT: [[TMP5:%.*]] = icmp ne <16 x i16> [[TMP2]], zeroinitializer +; CHECK-NEXT: [[TMP12:%.*]] = icmp ne <16 x i16> [[A0:%.*]], zeroinitializer +; CHECK-NEXT: [[TMP13:%.*]] = icmp ne <16 x i16> [[A1:%.*]], zeroinitializer +; CHECK-NEXT: [[TMP11:%.*]] = and <16 x i1> [[TMP4]], [[TMP5]] +; CHECK-NEXT: [[TMP14:%.*]] = and <16 x i1> [[TMP12]], [[TMP5]] +; CHECK-NEXT: [[TMP15:%.*]] = and <16 x i1> [[TMP4]], [[TMP13]] +; CHECK-NEXT: [[TMP16:%.*]] = or <16 x i1> [[TMP11]], [[TMP14]] +; CHECK-NEXT: [[TMP17:%.*]] = or <16 x i1> [[TMP16]], [[TMP15]] +; CHECK-NEXT: [[TMP7:%.*]] = sext <16 x i1> [[TMP17]] to <16 x i16> +; CHECK-NEXT: [[TMP18:%.*]] = bitcast <16 x i16> [[TMP7]] to <8 x i32> +; CHECK-NEXT: [[TMP19:%.*]] = icmp ne <8 x i32> [[TMP18]], zeroinitializer +; CHECK-NEXT: [[TMP6:%.*]] = sext <8 x i1> [[TMP19]] to <8 x i32> +; CHECK-NEXT: [[RES:%.*]] = call <8 x i32> @llvm.x86.avx2.pmadd.wd(<16 x i16> [[A0]], <16 x i16> [[A1]]) ; CHECK-NEXT: store <8 x i32> [[TMP6]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <8 x i32> [[RES]] ; @@ -677,11 +686,20 @@ define <16 x i16> @test_x86_avx2_pmadd_ub_sw(<32 x i8> %a0, <32 x i8> %a1) #0 { ; CHECK-NEXT: [[TMP1:%.*]] = load <32 x i8>, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: [[TMP2:%.*]] = load <32 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8 ; CHECK-NEXT: call void @llvm.donothing() -; CHECK-NEXT: [[TMP3:%.*]] = or <32 x i8> [[TMP1]], [[TMP2]] -; CHECK-NEXT: [[TMP4:%.*]] = bitcast <32 x i8> [[TMP3]] to <16 x i16> -; CHECK-NEXT: [[TMP5:%.*]] = icmp ne <16 x i16> [[TMP4]], zeroinitializer -; CHECK-NEXT: [[TMP6:%.*]] = sext <16 x i1> [[TMP5]] to <16 x i16> -; CHECK-NEXT: [[RES:%.*]] = call <16 x i16> @llvm.x86.avx2.pmadd.ub.sw(<32 x i8> [[A0:%.*]], <32 x i8> [[A1:%.*]]) +; CHECK-NEXT: [[TMP4:%.*]] = icmp ne <32 x i8> [[TMP1]], zeroinitializer +; CHECK-NEXT: [[TMP5:%.*]] = icmp ne <32 x i8> [[TMP2]], zeroinitializer +; CHECK-NEXT: [[TMP12:%.*]] = icmp ne <32 x i8> [[A0:%.*]], zeroinitializer +; CHECK-NEXT: [[TMP13:%.*]] = icmp ne <32 x i8> [[A1:%.*]], zeroinitializer +; CHECK-NEXT: [[TMP11:%.*]] = and <32 x i1> [[TMP4]], [[TMP5]] +; CHECK-NEXT: [[TMP14:%.*]] = and <32 x i1> [[TMP12]], [[TMP5]] +; CHECK-NEXT: [[TMP15:%.*]] = and <32 x i1> [[TMP4]], [[TMP13]] +; CHECK-NEXT: [[TMP16:%.*]] = or <32 x i1> [[TMP11]], [[TMP14]] +; CHECK-NEXT: [[TMP17:%.*]] = or <32 x i1> [[TMP16]], [[TMP15]] +; CHECK-NEXT: [[TMP7:%.*]] = sext <32 x i1> [[TMP17]] to <32 x i8> +; CHECK-NEXT: [[TMP18:%.*]] = bitcast <32 x i8> [[TMP7]] to <16 x i16> +; CHECK-NEXT: [[TMP19:%.*]] = icmp ne <16 x i16> [[TMP18]], zeroinitializer +; CHECK-NEXT: [[TMP6:%.*]] = sext <16 x i1> [[TMP19]] to <16 x i16> +; CHECK-NEXT: [[RES:%.*]] = call <16 x i16> @llvm.x86.avx2.pmadd.ub.sw(<32 x i8> [[A0]], <32 x i8> [[A1]]) ; CHECK-NEXT: store <16 x i16> [[TMP6]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <16 x i16> [[RES]] ; @@ -706,11 +724,20 @@ define <16 x i16> @test_x86_avx2_pmadd_ub_sw_load_op0(ptr %ptr, <32 x i8> %a1) # ; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 87960930222080 ; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr ; CHECK-NEXT: [[_MSLD:%.*]] = load <32 x i8>, ptr [[TMP7]], align 32 -; CHECK-NEXT: [[TMP8:%.*]] = or <32 x i8> [[_MSLD]], [[TMP2]] -; CHECK-NEXT: [[TMP9:%.*]] = bitcast <32 x i8> [[TMP8]] to <16 x i16> -; CHECK-NEXT: [[TMP10:%.*]] = icmp ne <16 x i16> [[TMP9]], zeroinitializer -; CHECK-NEXT: [[TMP11:%.*]] = sext <16 x i1> [[TMP10]] to <16 x i16> -; CHECK-NEXT: [[RES:%.*]] = call <16 x i16> @llvm.x86.avx2.pmadd.ub.sw(<32 x i8> [[A0]], <32 x i8> [[A1:%.*]]) +; CHECK-NEXT: [[TMP9:%.*]] = icmp ne <32 x i8> [[_MSLD]], zeroinitializer +; CHECK-NEXT: [[TMP10:%.*]] = icmp ne <32 x i8> [[TMP2]], zeroinitializer +; CHECK-NEXT: [[TMP17:%.*]] = icmp ne <32 x i8> [[A0]], zeroinitializer +; CHECK-NEXT: [[TMP18:%.*]] = icmp ne <32 x i8> [[A1:%.*]], zeroinitializer +; CHECK-NEXT: [[TMP16:%.*]] = and <32 x i1> [[TMP9]], [[TMP10]] +; CHECK-NEXT: [[TMP19:%.*]] = and <32 x i1> [[TMP17]], [[TMP10]] +; CHECK-NEXT: [[TMP20:%.*]] = and <32 x i1> [[TMP9]], [[TMP18]] +; CHECK-NEXT: [[TMP21:%.*]] = or <32 x i1> [[TMP16]], [[TMP19]] +; CHECK-NEXT: [[TMP22:%.*]] = or <32 x i1> [[TMP21]], [[TMP20]] +; CHECK-NEXT: [[TMP12:%.*]] = sext <32 x i1> [[TMP22]] to <32 x i8> +; CHECK-NEXT: [[TMP23:%.*]] = bitcast <32 x i8> [[TMP12]] to <16 x i16> +; CHECK-NEXT: [[TMP24:%.*]] = icmp ne <16 x i16> [[TMP23]], zeroinitializer +; CHECK-NEXT: [[TMP11:%.*]] = sext <16 x i1> [[TMP24]] to <16 x i16> +; CHECK-NEXT: [[RES:%.*]] = call <16 x i16> @llvm.x86.avx2.pmadd.ub.sw(<32 x i8> [[A0]], <32 x i8> [[A1]]) ; CHECK-NEXT: store <16 x i16> [[TMP11]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <16 x i16> [[RES]] ; diff --git a/llvm/test/Instrumentation/MemorySanitizer/X86/mmx-intrinsics.ll b/llvm/test/Instrumentation/MemorySanitizer/X86/mmx-intrinsics.ll index ac3bb5671903..99eafc13b2bf 100644 --- a/llvm/test/Instrumentation/MemorySanitizer/X86/mmx-intrinsics.ll +++ b/llvm/test/Instrumentation/MemorySanitizer/X86/mmx-intrinsics.ll @@ -1687,16 +1687,30 @@ define i64 @test49(<1 x i64> %a, <1 x i64> %b) #0 { ; CHECK-NEXT: [[MMX_VAR_I:%.*]] = bitcast <4 x i16> [[TMP1]] to <1 x i64> ; CHECK-NEXT: [[TMP7:%.*]] = bitcast <4 x i16> [[TMP16]] to <1 x i64> ; CHECK-NEXT: [[MMX_VAR1_I:%.*]] = bitcast <4 x i16> [[TMP0]] to <1 x i64> -; CHECK-NEXT: [[TMP8:%.*]] = or <1 x i64> [[TMP6]], [[TMP7]] -; CHECK-NEXT: [[TMP9:%.*]] = bitcast <1 x i64> [[TMP8]] to <2 x i32> -; CHECK-NEXT: [[TMP10:%.*]] = icmp ne <2 x i32> [[TMP9]], zeroinitializer -; CHECK-NEXT: [[TMP11:%.*]] = sext <2 x i1> [[TMP10]] to <2 x i32> -; CHECK-NEXT: [[TMP12:%.*]] = bitcast <2 x i32> [[TMP11]] to <1 x i64> -; CHECK-NEXT: [[TMP14:%.*]] = tail call <1 x i64> @llvm.x86.mmx.pmadd.wd(<1 x i64> [[MMX_VAR_I]], <1 x i64> [[MMX_VAR1_I]]) #[[ATTR2]] -; CHECK-NEXT: [[TMP3:%.*]] = bitcast <1 x i64> [[TMP12]] to <2 x i32> +; CHECK-NEXT: [[TMP8:%.*]] = bitcast <1 x i64> [[MMX_VAR_I]] to <4 x i16> +; CHECK-NEXT: [[TMP9:%.*]] = bitcast <1 x i64> [[MMX_VAR1_I]] to <4 x i16> +; CHECK-NEXT: [[TMP10:%.*]] = bitcast <1 x i64> [[TMP6]] to <4 x i16> +; CHECK-NEXT: [[TMP11:%.*]] = bitcast <1 x i64> [[TMP7]] to <4 x i16> +; CHECK-NEXT: [[TMP29:%.*]] = icmp ne <4 x i16> [[TMP10]], zeroinitializer +; CHECK-NEXT: [[TMP30:%.*]] = icmp ne <4 x i16> [[TMP11]], zeroinitializer +; CHECK-NEXT: [[TMP22:%.*]] = icmp ne <4 x i16> [[TMP8]], zeroinitializer +; CHECK-NEXT: [[TMP32:%.*]] = icmp ne <4 x i16> [[TMP9]], zeroinitializer +; CHECK-NEXT: [[TMP31:%.*]] = and <4 x i1> [[TMP29]], [[TMP30]] +; CHECK-NEXT: [[TMP35:%.*]] = and <4 x i1> [[TMP22]], [[TMP30]] +; CHECK-NEXT: [[TMP36:%.*]] = and <4 x i1> [[TMP29]], [[TMP32]] +; CHECK-NEXT: [[TMP37:%.*]] = or <4 x i1> [[TMP31]], [[TMP35]] +; CHECK-NEXT: [[TMP38:%.*]] = or <4 x i1> [[TMP37]], [[TMP36]] +; CHECK-NEXT: [[TMP23:%.*]] = sext <4 x i1> [[TMP38]] to <4 x i16> +; CHECK-NEXT: [[TMP24:%.*]] = bitcast <4 x i16> [[TMP23]] to <2 x i32> +; CHECK-NEXT: [[TMP25:%.*]] = icmp ne <2 x i32> [[TMP24]], zeroinitializer +; CHECK-NEXT: [[TMP27:%.*]] = sext <2 x i1> [[TMP25]] to <2 x i32> +; CHECK-NEXT: [[TMP28:%.*]] = bitcast <2 x i32> [[TMP27]] to i64 +; CHECK-NEXT: [[TMP14:%.*]] = bitcast i64 [[TMP28]] to <1 x i64> +; CHECK-NEXT: [[TMP33:%.*]] = tail call <1 x i64> @llvm.x86.mmx.pmadd.wd(<1 x i64> [[MMX_VAR_I]], <1 x i64> [[MMX_VAR1_I]]) #[[ATTR2]] ; CHECK-NEXT: [[TMP20:%.*]] = bitcast <1 x i64> [[TMP14]] to <2 x i32> -; CHECK-NEXT: [[TMP17:%.*]] = bitcast <2 x i32> [[TMP3]] to <1 x i64> -; CHECK-NEXT: [[TMP21:%.*]] = bitcast <2 x i32> [[TMP20]] to <1 x i64> +; CHECK-NEXT: [[TMP34:%.*]] = bitcast <1 x i64> [[TMP33]] to <2 x i32> +; CHECK-NEXT: [[TMP17:%.*]] = bitcast <2 x i32> [[TMP20]] to <1 x i64> +; CHECK-NEXT: [[TMP21:%.*]] = bitcast <2 x i32> [[TMP34]] to <1 x i64> ; CHECK-NEXT: [[TMP5:%.*]] = extractelement <1 x i64> [[TMP17]], i32 0 ; CHECK-NEXT: [[TMP18:%.*]] = extractelement <1 x i64> [[TMP21]], i32 0 ; CHECK-NEXT: store i64 [[TMP5]], ptr @__msan_retval_tls, align 8 @@ -3315,16 +3329,30 @@ define i64 @test7(<1 x i64> %a, <1 x i64> %b) #0 { ; CHECK-NEXT: [[TMP22:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64> ; CHECK-NEXT: [[TMP8:%.*]] = bitcast <8 x i8> [[TMP17]] to <1 x i64> ; CHECK-NEXT: [[TMP23:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64> -; CHECK-NEXT: [[TMP10:%.*]] = or <1 x i64> [[TMP21]], [[TMP8]] -; CHECK-NEXT: [[TMP11:%.*]] = bitcast <1 x i64> [[TMP10]] to <4 x i16> -; CHECK-NEXT: [[TMP12:%.*]] = icmp ne <4 x i16> [[TMP11]], zeroinitializer -; CHECK-NEXT: [[TMP13:%.*]] = sext <4 x i1> [[TMP12]] to <4 x i16> -; CHECK-NEXT: [[TMP14:%.*]] = bitcast <4 x i16> [[TMP13]] to <1 x i64> -; CHECK-NEXT: [[TMP24:%.*]] = tail call <1 x i64> @llvm.x86.ssse3.pmadd.ub.sw(<1 x i64> [[TMP22]], <1 x i64> [[TMP23]]) #[[ATTR5]] -; CHECK-NEXT: [[TMP16:%.*]] = bitcast <1 x i64> [[TMP14]] to <8 x i8> -; CHECK-NEXT: [[TMP5:%.*]] = bitcast <1 x i64> [[TMP24]] to <8 x i8> -; CHECK-NEXT: [[TMP6:%.*]] = bitcast <8 x i8> [[TMP16]] to <1 x i64> -; CHECK-NEXT: [[TMP19:%.*]] = bitcast <8 x i8> [[TMP5]] to <1 x i64> +; CHECK-NEXT: [[TMP10:%.*]] = bitcast <1 x i64> [[TMP22]] to <8 x i8> +; CHECK-NEXT: [[TMP11:%.*]] = bitcast <1 x i64> [[TMP23]] to <8 x i8> +; CHECK-NEXT: [[TMP12:%.*]] = bitcast <1 x i64> [[TMP21]] to <8 x i8> +; CHECK-NEXT: [[TMP13:%.*]] = bitcast <1 x i64> [[TMP8]] to <8 x i8> +; CHECK-NEXT: [[TMP32:%.*]] = icmp ne <8 x i8> [[TMP12]], zeroinitializer +; CHECK-NEXT: [[TMP33:%.*]] = icmp ne <8 x i8> [[TMP13]], zeroinitializer +; CHECK-NEXT: [[TMP35:%.*]] = icmp ne <8 x i8> [[TMP10]], zeroinitializer +; CHECK-NEXT: [[TMP37:%.*]] = icmp ne <8 x i8> [[TMP11]], zeroinitializer +; CHECK-NEXT: [[TMP34:%.*]] = and <8 x i1> [[TMP32]], [[TMP33]] +; CHECK-NEXT: [[TMP38:%.*]] = and <8 x i1> [[TMP35]], [[TMP33]] +; CHECK-NEXT: [[TMP39:%.*]] = and <8 x i1> [[TMP32]], [[TMP37]] +; CHECK-NEXT: [[TMP40:%.*]] = or <8 x i1> [[TMP34]], [[TMP38]] +; CHECK-NEXT: [[TMP41:%.*]] = or <8 x i1> [[TMP40]], [[TMP39]] +; CHECK-NEXT: [[TMP16:%.*]] = sext <8 x i1> [[TMP41]] to <8 x i8> +; CHECK-NEXT: [[TMP26:%.*]] = bitcast <8 x i8> [[TMP16]] to <4 x i16> +; CHECK-NEXT: [[TMP25:%.*]] = icmp ne <4 x i16> [[TMP26]], zeroinitializer +; CHECK-NEXT: [[TMP29:%.*]] = sext <4 x i1> [[TMP25]] to <4 x i16> +; CHECK-NEXT: [[TMP24:%.*]] = bitcast <4 x i16> [[TMP29]] to i64 +; CHECK-NEXT: [[TMP30:%.*]] = bitcast i64 [[TMP24]] to <1 x i64> +; CHECK-NEXT: [[TMP36:%.*]] = tail call <1 x i64> @llvm.x86.ssse3.pmadd.ub.sw(<1 x i64> [[TMP22]], <1 x i64> [[TMP23]]) #[[ATTR5]] +; CHECK-NEXT: [[TMP31:%.*]] = bitcast <1 x i64> [[TMP30]] to <8 x i8> +; CHECK-NEXT: [[TMP28:%.*]] = bitcast <1 x i64> [[TMP36]] to <8 x i8> +; CHECK-NEXT: [[TMP6:%.*]] = bitcast <8 x i8> [[TMP31]] to <1 x i64> +; CHECK-NEXT: [[TMP19:%.*]] = bitcast <8 x i8> [[TMP28]] to <1 x i64> ; CHECK-NEXT: [[TMP7:%.*]] = extractelement <1 x i64> [[TMP6]], i32 0 ; CHECK-NEXT: [[TMP20:%.*]] = extractelement <1 x i64> [[TMP19]], i32 0 ; CHECK-NEXT: store i64 [[TMP7]], ptr @__msan_retval_tls, align 8 diff --git a/llvm/test/Instrumentation/MemorySanitizer/X86/sse2-intrinsics-x86.ll b/llvm/test/Instrumentation/MemorySanitizer/X86/sse2-intrinsics-x86.ll index 8f915a59db8e..704805018079 100644 --- a/llvm/test/Instrumentation/MemorySanitizer/X86/sse2-intrinsics-x86.ll +++ b/llvm/test/Instrumentation/MemorySanitizer/X86/sse2-intrinsics-x86.ll @@ -762,11 +762,20 @@ define <4 x i32> @test_x86_sse2_pmadd_wd(<8 x i16> %a0, <8 x i16> %a1) #0 { ; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i16>, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 ; CHECK-NEXT: call void @llvm.donothing() -; CHECK-NEXT: [[TMP3:%.*]] = or <8 x i16> [[TMP1]], [[TMP2]] -; CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i16> [[TMP3]] to <4 x i32> -; CHECK-NEXT: [[TMP5:%.*]] = icmp ne <4 x i32> [[TMP4]], zeroinitializer -; CHECK-NEXT: [[TMP6:%.*]] = sext <4 x i1> [[TMP5]] to <4 x i32> -; CHECK-NEXT: [[RES:%.*]] = call <4 x i32> @llvm.x86.sse2.pmadd.wd(<8 x i16> [[A0:%.*]], <8 x i16> [[A1:%.*]]) +; CHECK-NEXT: [[TMP4:%.*]] = icmp ne <8 x i16> [[TMP1]], zeroinitializer +; CHECK-NEXT: [[TMP5:%.*]] = icmp ne <8 x i16> [[TMP2]], zeroinitializer +; CHECK-NEXT: [[TMP12:%.*]] = icmp ne <8 x i16> [[A0:%.*]], zeroinitializer +; CHECK-NEXT: [[TMP13:%.*]] = icmp ne <8 x i16> [[A1:%.*]], zeroinitializer +; CHECK-NEXT: [[TMP11:%.*]] = and <8 x i1> [[TMP4]], [[TMP5]] +; CHECK-NEXT: [[TMP14:%.*]] = and <8 x i1> [[TMP12]], [[TMP5]] +; CHECK-NEXT: [[TMP15:%.*]] = and <8 x i1> [[TMP4]], [[TMP13]] +; CHECK-NEXT: [[TMP16:%.*]] = or <8 x i1> [[TMP11]], [[TMP14]] +; CHECK-NEXT: [[TMP17:%.*]] = or <8 x i1> [[TMP16]], [[TMP15]] +; CHECK-NEXT: [[TMP7:%.*]] = sext <8 x i1> [[TMP17]] to <8 x i16> +; CHECK-NEXT: [[TMP18:%.*]] = bitcast <8 x i16> [[TMP7]] to <4 x i32> +; CHECK-NEXT: [[TMP19:%.*]] = icmp ne <4 x i32> [[TMP18]], zeroinitializer +; CHECK-NEXT: [[TMP6:%.*]] = sext <4 x i1> [[TMP19]] to <4 x i32> +; CHECK-NEXT: [[RES:%.*]] = call <4 x i32> @llvm.x86.sse2.pmadd.wd(<8 x i16> [[A0]], <8 x i16> [[A1]]) ; CHECK-NEXT: store <4 x i32> [[TMP6]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <4 x i32> [[RES]] ; diff --git a/llvm/test/Instrumentation/MemorySanitizer/i386/avx2-intrinsics-i386.ll b/llvm/test/Instrumentation/MemorySanitizer/i386/avx2-intrinsics-i386.ll index 5cc56baf0e0d..9960b80f2856 100644 --- a/llvm/test/Instrumentation/MemorySanitizer/i386/avx2-intrinsics-i386.ll +++ b/llvm/test/Instrumentation/MemorySanitizer/i386/avx2-intrinsics-i386.ll @@ -149,11 +149,20 @@ define <8 x i32> @test_x86_avx2_pmadd_wd(<16 x i16> %a0, <16 x i16> %a1) #0 { ; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8 ; CHECK-NEXT: [[TMP7:%.*]] = load i64, ptr @__msan_va_arg_overflow_size_tls, align 8 ; CHECK-NEXT: call void @llvm.donothing() -; CHECK-NEXT: [[TMP3:%.*]] = or <16 x i16> [[TMP1]], [[TMP2]] -; CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i16> [[TMP3]] to <8 x i32> -; CHECK-NEXT: [[TMP5:%.*]] = icmp ne <8 x i32> [[TMP4]], zeroinitializer -; CHECK-NEXT: [[TMP6:%.*]] = sext <8 x i1> [[TMP5]] to <8 x i32> -; CHECK-NEXT: [[RES:%.*]] = call <8 x i32> @llvm.x86.avx2.pmadd.wd(<16 x i16> [[A0:%.*]], <16 x i16> [[A1:%.*]]) +; CHECK-NEXT: [[TMP5:%.*]] = icmp ne <16 x i16> [[TMP1]], zeroinitializer +; CHECK-NEXT: [[TMP12:%.*]] = icmp ne <16 x i16> [[TMP2]], zeroinitializer +; CHECK-NEXT: [[TMP14:%.*]] = icmp ne <16 x i16> [[A0:%.*]], zeroinitializer +; CHECK-NEXT: [[TMP15:%.*]] = icmp ne <16 x i16> [[A1:%.*]], zeroinitializer +; CHECK-NEXT: [[TMP13:%.*]] = and <16 x i1> [[TMP5]], [[TMP12]] +; CHECK-NEXT: [[TMP16:%.*]] = and <16 x i1> [[TMP14]], [[TMP12]] +; CHECK-NEXT: [[TMP17:%.*]] = and <16 x i1> [[TMP5]], [[TMP15]] +; CHECK-NEXT: [[TMP18:%.*]] = or <16 x i1> [[TMP13]], [[TMP16]] +; CHECK-NEXT: [[TMP19:%.*]] = or <16 x i1> [[TMP18]], [[TMP17]] +; CHECK-NEXT: [[TMP8:%.*]] = sext <16 x i1> [[TMP19]] to <16 x i16> +; CHECK-NEXT: [[TMP20:%.*]] = bitcast <16 x i16> [[TMP8]] to <8 x i32> +; CHECK-NEXT: [[TMP21:%.*]] = icmp ne <8 x i32> [[TMP20]], zeroinitializer +; CHECK-NEXT: [[TMP6:%.*]] = sext <8 x i1> [[TMP21]] to <8 x i32> +; CHECK-NEXT: [[RES:%.*]] = call <8 x i32> @llvm.x86.avx2.pmadd.wd(<16 x i16> [[A0]], <16 x i16> [[A1]]) ; CHECK-NEXT: store <8 x i32> [[TMP6]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <8 x i32> [[RES]] ; @@ -714,11 +723,20 @@ define <16 x i16> @test_x86_avx2_pmadd_ub_sw(<32 x i8> %a0, <32 x i8> %a1) #0 { ; CHECK-NEXT: [[TMP2:%.*]] = load <32 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8 ; CHECK-NEXT: [[TMP7:%.*]] = load i64, ptr @__msan_va_arg_overflow_size_tls, align 8 ; CHECK-NEXT: call void @llvm.donothing() -; CHECK-NEXT: [[TMP3:%.*]] = or <32 x i8> [[TMP1]], [[TMP2]] -; CHECK-NEXT: [[TMP4:%.*]] = bitcast <32 x i8> [[TMP3]] to <16 x i16> -; CHECK-NEXT: [[TMP5:%.*]] = icmp ne <16 x i16> [[TMP4]], zeroinitializer -; CHECK-NEXT: [[TMP6:%.*]] = sext <16 x i1> [[TMP5]] to <16 x i16> -; CHECK-NEXT: [[RES:%.*]] = call <16 x i16> @llvm.x86.avx2.pmadd.ub.sw(<32 x i8> [[A0:%.*]], <32 x i8> [[A1:%.*]]) +; CHECK-NEXT: [[TMP5:%.*]] = icmp ne <32 x i8> [[TMP1]], zeroinitializer +; CHECK-NEXT: [[TMP12:%.*]] = icmp ne <32 x i8> [[TMP2]], zeroinitializer +; CHECK-NEXT: [[TMP14:%.*]] = icmp ne <32 x i8> [[A0:%.*]], zeroinitializer +; CHECK-NEXT: [[TMP15:%.*]] = icmp ne <32 x i8> [[A1:%.*]], zeroinitializer +; CHECK-NEXT: [[TMP13:%.*]] = and <32 x i1> [[TMP5]], [[TMP12]] +; CHECK-NEXT: [[TMP16:%.*]] = and <32 x i1> [[TMP14]], [[TMP12]] +; CHECK-NEXT: [[TMP17:%.*]] = and <32 x i1> [[TMP5]], [[TMP15]] +; CHECK-NEXT: [[TMP18:%.*]] = or <32 x i1> [[TMP13]], [[TMP16]] +; CHECK-NEXT: [[TMP19:%.*]] = or <32 x i1> [[TMP18]], [[TMP17]] +; CHECK-NEXT: [[TMP8:%.*]] = sext <32 x i1> [[TMP19]] to <32 x i8> +; CHECK-NEXT: [[TMP20:%.*]] = bitcast <32 x i8> [[TMP8]] to <16 x i16> +; CHECK-NEXT: [[TMP21:%.*]] = icmp ne <16 x i16> [[TMP20]], zeroinitializer +; CHECK-NEXT: [[TMP6:%.*]] = sext <16 x i1> [[TMP21]] to <16 x i16> +; CHECK-NEXT: [[RES:%.*]] = call <16 x i16> @llvm.x86.avx2.pmadd.ub.sw(<32 x i8> [[A0]], <32 x i8> [[A1]]) ; CHECK-NEXT: store <16 x i16> [[TMP6]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <16 x i16> [[RES]] ; @@ -734,7 +752,7 @@ define <16 x i16> @test_x86_avx2_pmadd_ub_sw_load_op0(ptr %ptr, <32 x i8> %a1) # ; CHECK-NEXT: [[TMP3:%.*]] = load i64, ptr @__msan_va_arg_overflow_size_tls, align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP4:%.*]], label [[TMP12:%.*]], !prof [[PROF1]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP4:%.*]], label [[TMP8:%.*]], !prof [[PROF1]] ; CHECK: 4: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR6]] ; CHECK-NEXT: unreachable @@ -744,11 +762,20 @@ define <16 x i16> @test_x86_avx2_pmadd_ub_sw_load_op0(ptr %ptr, <32 x i8> %a1) # ; CHECK-NEXT: [[TMP6:%.*]] = and i64 [[TMP5]], -2147483649 ; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr ; CHECK-NEXT: [[_MSLD:%.*]] = load <32 x i8>, ptr [[TMP7]], align 32 -; CHECK-NEXT: [[TMP8:%.*]] = or <32 x i8> [[_MSLD]], [[TMP2]] -; CHECK-NEXT: [[TMP9:%.*]] = bitcast <32 x i8> [[TMP8]] to <16 x i16> -; CHECK-NEXT: [[TMP10:%.*]] = icmp ne <16 x i16> [[TMP9]], zeroinitializer -; CHECK-NEXT: [[TMP11:%.*]] = sext <16 x i1> [[TMP10]] to <16 x i16> -; CHECK-NEXT: [[RES:%.*]] = call <16 x i16> @llvm.x86.avx2.pmadd.ub.sw(<32 x i8> [[A0]], <32 x i8> [[A1:%.*]]) +; CHECK-NEXT: [[TMP10:%.*]] = icmp ne <32 x i8> [[_MSLD]], zeroinitializer +; CHECK-NEXT: [[TMP17:%.*]] = icmp ne <32 x i8> [[TMP2]], zeroinitializer +; CHECK-NEXT: [[TMP18:%.*]] = icmp ne <32 x i8> [[A0]], zeroinitializer +; CHECK-NEXT: [[TMP19:%.*]] = icmp ne <32 x i8> [[A1:%.*]], zeroinitializer +; CHECK-NEXT: [[TMP12:%.*]] = and <32 x i1> [[TMP10]], [[TMP17]] +; CHECK-NEXT: [[TMP20:%.*]] = and <32 x i1> [[TMP18]], [[TMP17]] +; CHECK-NEXT: [[TMP21:%.*]] = and <32 x i1> [[TMP10]], [[TMP19]] +; CHECK-NEXT: [[TMP22:%.*]] = or <32 x i1> [[TMP12]], [[TMP20]] +; CHECK-NEXT: [[TMP23:%.*]] = or <32 x i1> [[TMP22]], [[TMP21]] +; CHECK-NEXT: [[TMP13:%.*]] = sext <32 x i1> [[TMP23]] to <32 x i8> +; CHECK-NEXT: [[TMP24:%.*]] = bitcast <32 x i8> [[TMP13]] to <16 x i16> +; CHECK-NEXT: [[TMP25:%.*]] = icmp ne <16 x i16> [[TMP24]], zeroinitializer +; CHECK-NEXT: [[TMP11:%.*]] = sext <16 x i1> [[TMP25]] to <16 x i16> +; CHECK-NEXT: [[RES:%.*]] = call <16 x i16> @llvm.x86.avx2.pmadd.ub.sw(<32 x i8> [[A0]], <32 x i8> [[A1]]) ; CHECK-NEXT: store <16 x i16> [[TMP11]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <16 x i16> [[RES]] ; diff --git a/llvm/test/Instrumentation/MemorySanitizer/i386/mmx-intrinsics.ll b/llvm/test/Instrumentation/MemorySanitizer/i386/mmx-intrinsics.ll index 0a3efaaea149..74822de4962b 100644 --- a/llvm/test/Instrumentation/MemorySanitizer/i386/mmx-intrinsics.ll +++ b/llvm/test/Instrumentation/MemorySanitizer/i386/mmx-intrinsics.ll @@ -1730,16 +1730,30 @@ define i64 @test49(<1 x i64> %a, <1 x i64> %b) #0 { ; CHECK-NEXT: [[MMX_VAR_I:%.*]] = bitcast <4 x i16> [[TMP1]] to <1 x i64> ; CHECK-NEXT: [[TMP7:%.*]] = bitcast <4 x i16> [[TMP16]] to <1 x i64> ; CHECK-NEXT: [[MMX_VAR1_I:%.*]] = bitcast <4 x i16> [[TMP0]] to <1 x i64> -; CHECK-NEXT: [[TMP8:%.*]] = or <1 x i64> [[TMP6]], [[TMP7]] -; CHECK-NEXT: [[TMP9:%.*]] = bitcast <1 x i64> [[TMP8]] to <2 x i32> -; CHECK-NEXT: [[TMP10:%.*]] = icmp ne <2 x i32> [[TMP9]], zeroinitializer -; CHECK-NEXT: [[TMP11:%.*]] = sext <2 x i1> [[TMP10]] to <2 x i32> -; CHECK-NEXT: [[TMP12:%.*]] = bitcast <2 x i32> [[TMP11]] to <1 x i64> -; CHECK-NEXT: [[TMP14:%.*]] = tail call <1 x i64> @llvm.x86.mmx.pmadd.wd(<1 x i64> [[MMX_VAR_I]], <1 x i64> [[MMX_VAR1_I]]) #[[ATTR2]] -; CHECK-NEXT: [[TMP3:%.*]] = bitcast <1 x i64> [[TMP12]] to <2 x i32> +; CHECK-NEXT: [[TMP9:%.*]] = bitcast <1 x i64> [[MMX_VAR_I]] to <4 x i16> +; CHECK-NEXT: [[TMP10:%.*]] = bitcast <1 x i64> [[MMX_VAR1_I]] to <4 x i16> +; CHECK-NEXT: [[TMP11:%.*]] = bitcast <1 x i64> [[TMP6]] to <4 x i16> +; CHECK-NEXT: [[TMP22:%.*]] = bitcast <1 x i64> [[TMP7]] to <4 x i16> +; CHECK-NEXT: [[TMP30:%.*]] = icmp ne <4 x i16> [[TMP11]], zeroinitializer +; CHECK-NEXT: [[TMP31:%.*]] = icmp ne <4 x i16> [[TMP22]], zeroinitializer +; CHECK-NEXT: [[TMP23:%.*]] = icmp ne <4 x i16> [[TMP9]], zeroinitializer +; CHECK-NEXT: [[TMP33:%.*]] = icmp ne <4 x i16> [[TMP10]], zeroinitializer +; CHECK-NEXT: [[TMP32:%.*]] = and <4 x i1> [[TMP30]], [[TMP31]] +; CHECK-NEXT: [[TMP36:%.*]] = and <4 x i1> [[TMP23]], [[TMP31]] +; CHECK-NEXT: [[TMP37:%.*]] = and <4 x i1> [[TMP30]], [[TMP33]] +; CHECK-NEXT: [[TMP38:%.*]] = or <4 x i1> [[TMP32]], [[TMP36]] +; CHECK-NEXT: [[TMP39:%.*]] = or <4 x i1> [[TMP38]], [[TMP37]] +; CHECK-NEXT: [[TMP24:%.*]] = sext <4 x i1> [[TMP39]] to <4 x i16> +; CHECK-NEXT: [[TMP25:%.*]] = bitcast <4 x i16> [[TMP24]] to <2 x i32> +; CHECK-NEXT: [[TMP26:%.*]] = icmp ne <2 x i32> [[TMP25]], zeroinitializer +; CHECK-NEXT: [[TMP28:%.*]] = sext <2 x i1> [[TMP26]] to <2 x i32> +; CHECK-NEXT: [[TMP29:%.*]] = bitcast <2 x i32> [[TMP28]] to i64 +; CHECK-NEXT: [[TMP14:%.*]] = bitcast i64 [[TMP29]] to <1 x i64> +; CHECK-NEXT: [[TMP34:%.*]] = tail call <1 x i64> @llvm.x86.mmx.pmadd.wd(<1 x i64> [[MMX_VAR_I]], <1 x i64> [[MMX_VAR1_I]]) #[[ATTR2]] ; CHECK-NEXT: [[TMP20:%.*]] = bitcast <1 x i64> [[TMP14]] to <2 x i32> -; CHECK-NEXT: [[TMP17:%.*]] = bitcast <2 x i32> [[TMP3]] to <1 x i64> -; CHECK-NEXT: [[TMP21:%.*]] = bitcast <2 x i32> [[TMP20]] to <1 x i64> +; CHECK-NEXT: [[TMP35:%.*]] = bitcast <1 x i64> [[TMP34]] to <2 x i32> +; CHECK-NEXT: [[TMP17:%.*]] = bitcast <2 x i32> [[TMP20]] to <1 x i64> +; CHECK-NEXT: [[TMP21:%.*]] = bitcast <2 x i32> [[TMP35]] to <1 x i64> ; CHECK-NEXT: [[TMP5:%.*]] = extractelement <1 x i64> [[TMP17]], i32 0 ; CHECK-NEXT: [[TMP18:%.*]] = extractelement <1 x i64> [[TMP21]], i32 0 ; CHECK-NEXT: store i64 [[TMP5]], ptr @__msan_retval_tls, align 8 @@ -3401,16 +3415,30 @@ define i64 @test7(<1 x i64> %a, <1 x i64> %b) #0 { ; CHECK-NEXT: [[TMP22:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64> ; CHECK-NEXT: [[TMP8:%.*]] = bitcast <8 x i8> [[TMP17]] to <1 x i64> ; CHECK-NEXT: [[TMP23:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64> -; CHECK-NEXT: [[TMP10:%.*]] = or <1 x i64> [[TMP21]], [[TMP8]] -; CHECK-NEXT: [[TMP11:%.*]] = bitcast <1 x i64> [[TMP10]] to <4 x i16> -; CHECK-NEXT: [[TMP12:%.*]] = icmp ne <4 x i16> [[TMP11]], zeroinitializer -; CHECK-NEXT: [[TMP13:%.*]] = sext <4 x i1> [[TMP12]] to <4 x i16> -; CHECK-NEXT: [[TMP14:%.*]] = bitcast <4 x i16> [[TMP13]] to <1 x i64> -; CHECK-NEXT: [[TMP24:%.*]] = tail call <1 x i64> @llvm.x86.ssse3.pmadd.ub.sw(<1 x i64> [[TMP22]], <1 x i64> [[TMP23]]) #[[ATTR5]] -; CHECK-NEXT: [[TMP16:%.*]] = bitcast <1 x i64> [[TMP14]] to <8 x i8> -; CHECK-NEXT: [[TMP5:%.*]] = bitcast <1 x i64> [[TMP24]] to <8 x i8> -; CHECK-NEXT: [[TMP6:%.*]] = bitcast <8 x i8> [[TMP16]] to <1 x i64> -; CHECK-NEXT: [[TMP19:%.*]] = bitcast <8 x i8> [[TMP5]] to <1 x i64> +; CHECK-NEXT: [[TMP11:%.*]] = bitcast <1 x i64> [[TMP22]] to <8 x i8> +; CHECK-NEXT: [[TMP12:%.*]] = bitcast <1 x i64> [[TMP23]] to <8 x i8> +; CHECK-NEXT: [[TMP13:%.*]] = bitcast <1 x i64> [[TMP21]] to <8 x i8> +; CHECK-NEXT: [[TMP14:%.*]] = bitcast <1 x i64> [[TMP8]] to <8 x i8> +; CHECK-NEXT: [[TMP33:%.*]] = icmp ne <8 x i8> [[TMP13]], zeroinitializer +; CHECK-NEXT: [[TMP34:%.*]] = icmp ne <8 x i8> [[TMP14]], zeroinitializer +; CHECK-NEXT: [[TMP26:%.*]] = icmp ne <8 x i8> [[TMP11]], zeroinitializer +; CHECK-NEXT: [[TMP36:%.*]] = icmp ne <8 x i8> [[TMP12]], zeroinitializer +; CHECK-NEXT: [[TMP35:%.*]] = and <8 x i1> [[TMP33]], [[TMP34]] +; CHECK-NEXT: [[TMP38:%.*]] = and <8 x i1> [[TMP26]], [[TMP34]] +; CHECK-NEXT: [[TMP39:%.*]] = and <8 x i1> [[TMP33]], [[TMP36]] +; CHECK-NEXT: [[TMP40:%.*]] = or <8 x i1> [[TMP35]], [[TMP38]] +; CHECK-NEXT: [[TMP41:%.*]] = or <8 x i1> [[TMP40]], [[TMP39]] +; CHECK-NEXT: [[TMP16:%.*]] = sext <8 x i1> [[TMP41]] to <8 x i8> +; CHECK-NEXT: [[TMP27:%.*]] = bitcast <8 x i8> [[TMP16]] to <4 x i16> +; CHECK-NEXT: [[TMP28:%.*]] = icmp ne <4 x i16> [[TMP27]], zeroinitializer +; CHECK-NEXT: [[TMP42:%.*]] = sext <4 x i1> [[TMP28]] to <4 x i16> +; CHECK-NEXT: [[TMP30:%.*]] = bitcast <4 x i16> [[TMP42]] to i64 +; CHECK-NEXT: [[TMP32:%.*]] = bitcast i64 [[TMP30]] to <1 x i64> +; CHECK-NEXT: [[TMP31:%.*]] = tail call <1 x i64> @llvm.x86.ssse3.pmadd.ub.sw(<1 x i64> [[TMP22]], <1 x i64> [[TMP23]]) #[[ATTR5]] +; CHECK-NEXT: [[TMP25:%.*]] = bitcast <1 x i64> [[TMP32]] to <8 x i8> +; CHECK-NEXT: [[TMP37:%.*]] = bitcast <1 x i64> [[TMP31]] to <8 x i8> +; CHECK-NEXT: [[TMP6:%.*]] = bitcast <8 x i8> [[TMP25]] to <1 x i64> +; CHECK-NEXT: [[TMP19:%.*]] = bitcast <8 x i8> [[TMP37]] to <1 x i64> ; CHECK-NEXT: [[TMP7:%.*]] = extractelement <1 x i64> [[TMP6]], i32 0 ; CHECK-NEXT: [[TMP20:%.*]] = extractelement <1 x i64> [[TMP19]], i32 0 ; CHECK-NEXT: store i64 [[TMP7]], ptr @__msan_retval_tls, align 8 diff --git a/llvm/test/Instrumentation/MemorySanitizer/i386/sse2-intrinsics-i386.ll b/llvm/test/Instrumentation/MemorySanitizer/i386/sse2-intrinsics-i386.ll index e771e60e2f29..3a37eafd78ec 100644 --- a/llvm/test/Instrumentation/MemorySanitizer/i386/sse2-intrinsics-i386.ll +++ b/llvm/test/Instrumentation/MemorySanitizer/i386/sse2-intrinsics-i386.ll @@ -800,11 +800,20 @@ define <4 x i32> @test_x86_sse2_pmadd_wd(<8 x i16> %a0, <8 x i16> %a1) #0 { ; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 ; CHECK-NEXT: [[TMP7:%.*]] = load i64, ptr @__msan_va_arg_overflow_size_tls, align 8 ; CHECK-NEXT: call void @llvm.donothing() -; CHECK-NEXT: [[TMP3:%.*]] = or <8 x i16> [[TMP1]], [[TMP2]] -; CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i16> [[TMP3]] to <4 x i32> -; CHECK-NEXT: [[TMP5:%.*]] = icmp ne <4 x i32> [[TMP4]], zeroinitializer -; CHECK-NEXT: [[TMP6:%.*]] = sext <4 x i1> [[TMP5]] to <4 x i32> -; CHECK-NEXT: [[RES:%.*]] = call <4 x i32> @llvm.x86.sse2.pmadd.wd(<8 x i16> [[A0:%.*]], <8 x i16> [[A1:%.*]]) +; CHECK-NEXT: [[TMP5:%.*]] = icmp ne <8 x i16> [[TMP1]], zeroinitializer +; CHECK-NEXT: [[TMP12:%.*]] = icmp ne <8 x i16> [[TMP2]], zeroinitializer +; CHECK-NEXT: [[TMP14:%.*]] = icmp ne <8 x i16> [[A0:%.*]], zeroinitializer +; CHECK-NEXT: [[TMP15:%.*]] = icmp ne <8 x i16> [[A1:%.*]], zeroinitializer +; CHECK-NEXT: [[TMP13:%.*]] = and <8 x i1> [[TMP5]], [[TMP12]] +; CHECK-NEXT: [[TMP16:%.*]] = and <8 x i1> [[TMP14]], [[TMP12]] +; CHECK-NEXT: [[TMP17:%.*]] = and <8 x i1> [[TMP5]], [[TMP15]] +; CHECK-NEXT: [[TMP18:%.*]] = or <8 x i1> [[TMP13]], [[TMP16]] +; CHECK-NEXT: [[TMP19:%.*]] = or <8 x i1> [[TMP18]], [[TMP17]] +; CHECK-NEXT: [[TMP8:%.*]] = sext <8 x i1> [[TMP19]] to <8 x i16> +; CHECK-NEXT: [[TMP20:%.*]] = bitcast <8 x i16> [[TMP8]] to <4 x i32> +; CHECK-NEXT: [[TMP21:%.*]] = icmp ne <4 x i32> [[TMP20]], zeroinitializer +; CHECK-NEXT: [[TMP6:%.*]] = sext <4 x i1> [[TMP21]] to <4 x i32> +; CHECK-NEXT: [[RES:%.*]] = call <4 x i32> @llvm.x86.sse2.pmadd.wd(<8 x i16> [[A0]], <8 x i16> [[A1]]) ; CHECK-NEXT: store <4 x i32> [[TMP6]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <4 x i32> [[RES]] ; diff --git a/llvm/test/Instrumentation/MemorySanitizer/vector_arith.ll b/llvm/test/Instrumentation/MemorySanitizer/vector_arith.ll index d614bb85d858..d1060fb33e1b 100644 --- a/llvm/test/Instrumentation/MemorySanitizer/vector_arith.ll +++ b/llvm/test/Instrumentation/MemorySanitizer/vector_arith.ll @@ -17,10 +17,19 @@ define <4 x i32> @Test_sse2_pmadd_wd(<8 x i16> %a, <8 x i16> %b) sanitize_memory ; CHECK-NEXT: [[TMP0:%.*]] = load <8 x i16>, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 ; CHECK-NEXT: call void @llvm.donothing() -; CHECK-NEXT: [[TMP2:%.*]] = or <8 x i16> [[TMP0]], [[TMP1]] -; CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to <4 x i32> -; CHECK-NEXT: [[TMP4:%.*]] = icmp ne <4 x i32> [[TMP3]], zeroinitializer -; CHECK-NEXT: [[TMP5:%.*]] = sext <4 x i1> [[TMP4]] to <4 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = icmp ne <8 x i16> [[TMP0]], zeroinitializer +; CHECK-NEXT: [[TMP12:%.*]] = icmp ne <8 x i16> [[TMP1]], zeroinitializer +; CHECK-NEXT: [[TMP3:%.*]] = icmp ne <8 x i16> [[A]], zeroinitializer +; CHECK-NEXT: [[TMP4:%.*]] = icmp ne <8 x i16> [[B]], zeroinitializer +; CHECK-NEXT: [[TMP6:%.*]] = and <8 x i1> [[TMP2]], [[TMP12]] +; CHECK-NEXT: [[TMP13:%.*]] = and <8 x i1> [[TMP3]], [[TMP12]] +; CHECK-NEXT: [[TMP14:%.*]] = and <8 x i1> [[TMP2]], [[TMP4]] +; CHECK-NEXT: [[TMP15:%.*]] = or <8 x i1> [[TMP6]], [[TMP13]] +; CHECK-NEXT: [[TMP10:%.*]] = or <8 x i1> [[TMP15]], [[TMP14]] +; CHECK-NEXT: [[TMP11:%.*]] = sext <8 x i1> [[TMP10]] to <8 x i16> +; CHECK-NEXT: [[TMP16:%.*]] = bitcast <8 x i16> [[TMP11]] to <4 x i32> +; CHECK-NEXT: [[TMP17:%.*]] = icmp ne <4 x i32> [[TMP16]], zeroinitializer +; CHECK-NEXT: [[TMP5:%.*]] = sext <4 x i1> [[TMP17]] to <4 x i32> ; CHECK-NEXT: [[C:%.*]] = tail call <4 x i32> @llvm.x86.sse2.pmadd.wd(<8 x i16> [[A]], <8 x i16> [[B]]) #[[ATTR2:[0-9]+]] ; CHECK-NEXT: store <4 x i32> [[TMP5]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <4 x i32> [[C]] @@ -39,13 +48,27 @@ define <1 x i64> @Test_ssse3_pmadd_ub_sw(<1 x i64> %a, <1 x i64> %b) sanitize_me ; CHECK-NEXT: [[TMP0:%.*]] = load <1 x i64>, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: [[TMP1:%.*]] = load <1 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 ; CHECK-NEXT: call void @llvm.donothing() -; CHECK-NEXT: [[TMP2:%.*]] = or <1 x i64> [[TMP0]], [[TMP1]] -; CHECK-NEXT: [[TMP3:%.*]] = bitcast <1 x i64> [[TMP2]] to <4 x i16> -; CHECK-NEXT: [[TMP4:%.*]] = icmp ne <4 x i16> [[TMP3]], zeroinitializer -; CHECK-NEXT: [[TMP5:%.*]] = sext <4 x i1> [[TMP4]] to <4 x i16> -; CHECK-NEXT: [[TMP6:%.*]] = bitcast <4 x i16> [[TMP5]] to <1 x i64> +; CHECK-NEXT: [[TMP2:%.*]] = bitcast <1 x i64> [[A]] to <8 x i8> +; CHECK-NEXT: [[TMP3:%.*]] = bitcast <1 x i64> [[B]] to <8 x i8> +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <1 x i64> [[TMP0]] to <8 x i8> +; CHECK-NEXT: [[TMP5:%.*]] = bitcast <1 x i64> [[TMP1]] to <8 x i8> +; CHECK-NEXT: [[TMP14:%.*]] = icmp ne <8 x i8> [[TMP4]], zeroinitializer +; CHECK-NEXT: [[TMP15:%.*]] = icmp ne <8 x i8> [[TMP5]], zeroinitializer +; CHECK-NEXT: [[TMP17:%.*]] = icmp ne <8 x i8> [[TMP2]], zeroinitializer +; CHECK-NEXT: [[TMP21:%.*]] = icmp ne <8 x i8> [[TMP3]], zeroinitializer +; CHECK-NEXT: [[TMP16:%.*]] = and <8 x i1> [[TMP14]], [[TMP15]] +; CHECK-NEXT: [[TMP11:%.*]] = and <8 x i1> [[TMP17]], [[TMP15]] +; CHECK-NEXT: [[TMP12:%.*]] = and <8 x i1> [[TMP14]], [[TMP21]] +; CHECK-NEXT: [[TMP13:%.*]] = or <8 x i1> [[TMP16]], [[TMP11]] +; CHECK-NEXT: [[TMP22:%.*]] = or <8 x i1> [[TMP13]], [[TMP12]] +; CHECK-NEXT: [[TMP7:%.*]] = sext <8 x i1> [[TMP22]] to <8 x i8> +; CHECK-NEXT: [[TMP18:%.*]] = bitcast <8 x i8> [[TMP7]] to <4 x i16> +; CHECK-NEXT: [[TMP24:%.*]] = icmp ne <4 x i16> [[TMP18]], zeroinitializer +; CHECK-NEXT: [[TMP23:%.*]] = sext <4 x i1> [[TMP24]] to <4 x i16> +; CHECK-NEXT: [[TMP19:%.*]] = bitcast <4 x i16> [[TMP23]] to i64 +; CHECK-NEXT: [[TMP20:%.*]] = bitcast i64 [[TMP19]] to <1 x i64> ; CHECK-NEXT: [[C:%.*]] = tail call <1 x i64> @llvm.x86.ssse3.pmadd.ub.sw(<1 x i64> [[A]], <1 x i64> [[B]]) #[[ATTR2]] -; CHECK-NEXT: store <1 x i64> [[TMP6]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: store <1 x i64> [[TMP20]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <1 x i64> [[C]] ; entry: From e68989b93036c3ff4efe785b07ba27662356f6cc Mon Sep 17 00:00:00 2001 From: Chenguang Wang Date: Fri, 15 Aug 2025 16:45:32 -0700 Subject: [PATCH 039/214] [bazel] Fix //mlir:XeGPUDialect compilation. (#153904) Broken by https://github.com/llvm/llvm-project/pull/153273. --- utils/bazel/llvm-project-overlay/mlir/BUILD.bazel | 2 ++ 1 file changed, 2 insertions(+) diff --git a/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel b/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel index ffae51b44aa7..763dbdbaee26 100644 --- a/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel +++ b/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel @@ -3654,6 +3654,7 @@ cc_library( ":ArithUtils", ":BytecodeOpInterface", ":DialectUtils", + ":GPUDialect", ":IR", ":IndexDialect", ":ShapedOpInterfaces", @@ -3663,6 +3664,7 @@ cc_library( ":XeGPUAttrInterfaceIncGen", ":XeGPUEnumsIncGen", ":XeGPUIncGen", + ":XeVMDialect", "//llvm:Support", ], ) From f396657bf94fab0216715036f4ef9a58d940521a Mon Sep 17 00:00:00 2001 From: gulfemsavrun Date: Fri, 15 Aug 2025 16:45:40 -0700 Subject: [PATCH 040/214] Revert "Remember LLVM_ENABLE_LIBCXX setting in installed configuration" (#153898) Reverts llvm/llvm-project#139712 Caused an lld relocation issue as shown below: https://logs.chromium.org/logs/fuchsia/buildbucket/cr-buildbucket/8706642902273983073/+/u/clang/build/stdout --- llvm/cmake/modules/HandleLLVMStdlib.cmake | 13 +------------ llvm/cmake/modules/LLVMConfig.cmake.in | 2 -- 2 files changed, 1 insertion(+), 14 deletions(-) diff --git a/llvm/cmake/modules/HandleLLVMStdlib.cmake b/llvm/cmake/modules/HandleLLVMStdlib.cmake index dda1caa846dc..a7e138aa0789 100644 --- a/llvm/cmake/modules/HandleLLVMStdlib.cmake +++ b/llvm/cmake/modules/HandleLLVMStdlib.cmake @@ -2,7 +2,6 @@ # if the user has requested it. include(DetermineGCCCompatible) -include(CheckIncludeFiles) if(NOT DEFINED LLVM_STDLIB_HANDLED) set(LLVM_STDLIB_HANDLED ON) @@ -20,17 +19,7 @@ if(NOT DEFINED LLVM_STDLIB_HANDLED) if(LLVM_COMPILER_IS_GCC_COMPATIBLE) check_cxx_compiler_flag("-stdlib=libc++" CXX_COMPILER_SUPPORTS_STDLIB) check_linker_flag(CXX "-stdlib=libc++" CXX_LINKER_SUPPORTS_STDLIB) - - # Check whether C++ include files are available - # runtimes/CMakeLists.txt adds -nostdlib++ and -nostdinc++ to - # CMAKE_REQUIRED_FLAGS, which are incompatible with -stdlib=libc++; use - # a fresh CMAKE_REQUIRED_FLAGS environment. - cmake_push_check_state(RESET) - set(CMAKE_REQUIRED_FLAGS "${CMAKE_REQUIRED_FLAGS} -stdlib=libc++") - check_include_files("chrono" CXX_COMPILER_SUPPORTS_STDLIB_CHRONO LANGUAGE CXX) - cmake_pop_check_state() - - if(CXX_COMPILER_SUPPORTS_STDLIB AND CXX_LINKER_SUPPORTS_STDLIB AND CXX_COMPILER_SUPPORTS_STDLIB_CHRONO) + if(CXX_COMPILER_SUPPORTS_STDLIB AND CXX_LINKER_SUPPORTS_STDLIB) append("-stdlib=libc++" CMAKE_CXX_FLAGS CMAKE_EXE_LINKER_FLAGS CMAKE_SHARED_LINKER_FLAGS CMAKE_MODULE_LINKER_FLAGS) diff --git a/llvm/cmake/modules/LLVMConfig.cmake.in b/llvm/cmake/modules/LLVMConfig.cmake.in index c39c33f0c779..c15b9576cd5d 100644 --- a/llvm/cmake/modules/LLVMConfig.cmake.in +++ b/llvm/cmake/modules/LLVMConfig.cmake.in @@ -55,8 +55,6 @@ endif() set(LLVM_ENABLE_RTTI @LLVM_ENABLE_RTTI@) -set(LLVM_ENABLE_LIBCXX @LLVM_ENABLE_LIBCXX@) - set(LLVM_ENABLE_LIBEDIT @HAVE_LIBEDIT@) if(LLVM_ENABLE_LIBEDIT) find_package(LibEdit) From 99e690bc755d233f8e3fffd9181bdfa9bebb3de7 Mon Sep 17 00:00:00 2001 From: Felipe de Azevedo Piovezan Date: Fri, 15 Aug 2025 16:47:31 -0700 Subject: [PATCH 041/214] [lldb][nfc] Update docstring of StackFrame "get variable" methods. (#153728) This commits makes the docs more precise, clarifying how scopes affect the result of a method, as well as documenting a parameter of a different method. --- lldb/include/lldb/Target/StackFrame.h | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/lldb/include/lldb/Target/StackFrame.h b/lldb/include/lldb/Target/StackFrame.h index 3f51c9a7f22f..d4104bfe49d2 100644 --- a/lldb/include/lldb/Target/StackFrame.h +++ b/lldb/include/lldb/Target/StackFrame.h @@ -241,8 +241,9 @@ public: return m_reg_context_sp; } - /// Retrieve the list of variables that are in scope at this StackFrame's - /// pc. + /// Retrieve the list of variables whose scope either: + /// * contains this StackFrame's pc, + /// * is a child of this StackFrame's current scope. /// /// A frame that is not live may return an empty VariableList for a given /// pc value even though variables would be available at this point if it @@ -274,6 +275,9 @@ public: /// that are visible to the entire compilation unit (e.g. file /// static in C, globals that are homed in this CU). /// + /// \param[in] must_have_valid_location + /// Whether to filter variables whose location is not available at this + /// StackFrame's pc. /// \return /// A pointer to a list of variables. lldb::VariableListSP From e67ec12640e7cff396932c2bc1401504bba38085 Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Fri, 15 Aug 2025 17:18:09 -0700 Subject: [PATCH 042/214] [RISCV] Remove experimental from Smctr and Ssctr. (#153903) These extensions were ratified in November 2024. --- .../Driver/print-supported-extensions-riscv.c | 4 ++-- llvm/docs/ReleaseNotes.md | 1 + llvm/lib/Target/RISCV/RISCVFeatures.td | 12 +++++----- llvm/test/CodeGen/RISCV/attributes.ll | 8 +++---- llvm/test/CodeGen/RISCV/features-info.ll | 4 ++-- llvm/test/MC/RISCV/smctr-ssctr-valid.s | 24 +++++++++---------- .../TargetParser/RISCVISAInfoTest.cpp | 4 ++-- 7 files changed, 29 insertions(+), 28 deletions(-) diff --git a/clang/test/Driver/print-supported-extensions-riscv.c b/clang/test/Driver/print-supported-extensions-riscv.c index 2503f2473d64..87bbd95877b2 100644 --- a/clang/test/Driver/print-supported-extensions-riscv.c +++ b/clang/test/Driver/print-supported-extensions-riscv.c @@ -128,6 +128,7 @@ // CHECK-NEXT: smcdeleg 1.0 'Smcdeleg' (Counter Delegation Machine Level) // CHECK-NEXT: smcntrpmf 1.0 'Smcntrpmf' (Cycle and Instret Privilege Mode Filtering) // CHECK-NEXT: smcsrind 1.0 'Smcsrind' (Indirect CSR Access Machine Level) +// CHECK-NEXT: smctr 1.0 'Smctr' (Control Transfer Records Machine Level) // CHECK-NEXT: smdbltrp 1.0 'Smdbltrp' (Double Trap Machine Level) // CHECK-NEXT: smepmp 1.0 'Smepmp' (Enhanced Physical Memory Protection) // CHECK-NEXT: smmpm 1.0 'Smmpm' (Machine-level Pointer Masking for M-mode) @@ -140,6 +141,7 @@ // CHECK-NEXT: sscofpmf 1.0 'Sscofpmf' (Count Overflow and Mode-Based Filtering) // CHECK-NEXT: sscounterenw 1.0 'Sscounterenw' (Support writeable scounteren enable bit for any hpmcounter that is not read-only zero) // CHECK-NEXT: sscsrind 1.0 'Sscsrind' (Indirect CSR Access Supervisor Level) +// CHECK-NEXT: ssctr 1.0 'Ssctr' (Control Transfer Records Supervisor Level) // CHECK-NEXT: ssdbltrp 1.0 'Ssdbltrp' (Double Trap Supervisor Level) // CHECK-NEXT: ssnpm 1.0 'Ssnpm' (Supervisor-level Pointer Masking for next lower privilege mode) // CHECK-NEXT: sspm 1.0 'Sspm' (Indicates Supervisor-mode Pointer Masking) @@ -214,8 +216,6 @@ // CHECK-NEXT: zvbc32e 0.7 'Zvbc32e' (Vector Carryless Multiplication with 32-bits elements) // CHECK-NEXT: zvkgs 0.7 'Zvkgs' (Vector-Scalar GCM instructions for Cryptography) // CHECK-NEXT: zvqdotq 0.0 'Zvqdotq' (Vector quad widening 4D Dot Product) -// CHECK-NEXT: smctr 1.0 'Smctr' (Control Transfer Records Machine Level) -// CHECK-NEXT: ssctr 1.0 'Ssctr' (Control Transfer Records Supervisor Level) // CHECK-NEXT: svukte 0.3 'Svukte' (Address-Independent Latency of User-Mode Faults to Supervisor Addresses) // CHECK-NEXT: xqccmp 0.3 'Xqccmp' (Qualcomm 16-bit Push/Pop and Double Moves) // CHECK-NEXT: xqcia 0.7 'Xqcia' (Qualcomm uC Arithmetic Extension) diff --git a/llvm/docs/ReleaseNotes.md b/llvm/docs/ReleaseNotes.md index ef7586a6bab7..3b90c964ac53 100644 --- a/llvm/docs/ReleaseNotes.md +++ b/llvm/docs/ReleaseNotes.md @@ -118,6 +118,7 @@ Changes to the RISC-V Backend * `llvm-objdump` now has basic support for switching between disassembling code and data using mapping symbols such as `$x` and `$d`. Switching architectures using `$x` with an architecture string suffix is not yet supported. +* Ssctr and Smctr extensions are no longer experimental. Changes to the WebAssembly Backend ---------------------------------- diff --git a/llvm/lib/Target/RISCV/RISCVFeatures.td b/llvm/lib/Target/RISCV/RISCVFeatures.td index a7329d201f88..ac0234dd9a5f 100644 --- a/llvm/lib/Target/RISCV/RISCVFeatures.td +++ b/llvm/lib/Target/RISCV/RISCVFeatures.td @@ -1055,13 +1055,13 @@ def FeatureStdExtSupm "Indicates User-mode Pointer Masking">; def FeatureStdExtSmctr - : RISCVExperimentalExtension<1, 0, - "Control Transfer Records Machine Level", - [FeatureStdExtSscsrind]>; + : RISCVExtension<1, 0, + "Control Transfer Records Machine Level", + [FeatureStdExtSscsrind]>; def FeatureStdExtSsctr - : RISCVExperimentalExtension<1, 0, - "Control Transfer Records Supervisor Level", - [FeatureStdExtSscsrind]>; + : RISCVExtension<1, 0, + "Control Transfer Records Supervisor Level", + [FeatureStdExtSscsrind]>; def HasStdExtSmctrOrSsctr : Predicate<"Subtarget->hasStdExtSmctrOrSsctr()">, AssemblerPredicate<(any_of FeatureStdExtSmctr, FeatureStdExtSsctr), "'Smctr' (Control Transfer Records Machine Level) or " diff --git a/llvm/test/CodeGen/RISCV/attributes.ll b/llvm/test/CodeGen/RISCV/attributes.ll index a28b818b3db1..940e018ebdc9 100644 --- a/llvm/test/CodeGen/RISCV/attributes.ll +++ b/llvm/test/CodeGen/RISCV/attributes.ll @@ -173,8 +173,8 @@ ; RUN: llc -mtriple=riscv32 -mattr=+smmpm %s -o - | FileCheck --check-prefix=RV32SMMPM %s ; RUN: llc -mtriple=riscv32 -mattr=+sspm %s -o - | FileCheck --check-prefix=RV32SSPM %s ; RUN: llc -mtriple=riscv32 -mattr=+supm %s -o - | FileCheck --check-prefix=RV32SUPM %s -; RUN: llc -mtriple=riscv32 -mattr=+experimental-smctr %s -o - | FileCheck --check-prefix=RV32SMCTR %s -; RUN: llc -mtriple=riscv32 -mattr=+experimental-ssctr %s -o - | FileCheck --check-prefix=RV32SSCTR %s +; RUN: llc -mtriple=riscv32 -mattr=+smctr %s -o - | FileCheck --check-prefix=RV32SMCTR %s +; RUN: llc -mtriple=riscv32 -mattr=+ssctr %s -o - | FileCheck --check-prefix=RV32SSCTR %s ; RUN: llc -mtriple=riscv64 %s -o - | FileCheck %s ; RUN: llc -mtriple=riscv64 -mattr=+m %s -o - | FileCheck --check-prefixes=CHECK,RV64M %s @@ -336,8 +336,8 @@ ; RUN: llc -mtriple=riscv64 -mattr=+smmpm %s -o - | FileCheck --check-prefix=RV64SMMPM %s ; RUN: llc -mtriple=riscv64 -mattr=+sspm %s -o - | FileCheck --check-prefix=RV64SSPM %s ; RUN: llc -mtriple=riscv64 -mattr=+supm %s -o - | FileCheck --check-prefix=RV64SUPM %s -; RUN: llc -mtriple=riscv64 -mattr=+experimental-smctr %s -o - | FileCheck --check-prefix=RV64SMCTR %s -; RUN: llc -mtriple=riscv64 -mattr=+experimental-ssctr %s -o - | FileCheck --check-prefix=RV64SSCTR %s +; RUN: llc -mtriple=riscv64 -mattr=+smctr %s -o - | FileCheck --check-prefix=RV64SMCTR %s +; RUN: llc -mtriple=riscv64 -mattr=+ssctr %s -o - | FileCheck --check-prefix=RV64SSCTR %s ; RUN: llc -mtriple=riscv64 -mattr=+sdext %s -o - | FileCheck --check-prefix=RV64SDEXT %s ; RUN: llc -mtriple=riscv64 -mattr=+sdtrig %s -o - | FileCheck --check-prefix=RV64SDTRIG %s ; RUN: llc -mtriple=riscv64 -mattr=+experimental-xqccmp %s -o - | FileCheck --check-prefix=RV64XQCCMP %s diff --git a/llvm/test/CodeGen/RISCV/features-info.ll b/llvm/test/CodeGen/RISCV/features-info.ll index fb539211fcc3..60b94adcc4c7 100644 --- a/llvm/test/CodeGen/RISCV/features-info.ll +++ b/llvm/test/CodeGen/RISCV/features-info.ll @@ -27,8 +27,6 @@ ; CHECK-NEXT: experimental - Experimental intrinsics. ; CHECK-NEXT: experimental-p - 'P' ('Base P' (Packed SIMD)). ; CHECK-NEXT: experimental-rvm23u32 - RISC-V experimental-rvm23u32 profile. -; CHECK-NEXT: experimental-smctr - 'Smctr' (Control Transfer Records Machine Level). -; CHECK-NEXT: experimental-ssctr - 'Ssctr' (Control Transfer Records Supervisor Level). ; CHECK-NEXT: experimental-svukte - 'Svukte' (Address-Independent Latency of User-Mode Faults to Supervisor Addresses). ; CHECK-NEXT: experimental-xqccmp - 'Xqccmp' (Qualcomm 16-bit Push/Pop and Double Moves). ; CHECK-NEXT: experimental-xqcia - 'Xqcia' (Qualcomm uC Arithmetic Extension). @@ -145,6 +143,7 @@ ; CHECK-NEXT: smcdeleg - 'Smcdeleg' (Counter Delegation Machine Level). ; CHECK-NEXT: smcntrpmf - 'Smcntrpmf' (Cycle and Instret Privilege Mode Filtering). ; CHECK-NEXT: smcsrind - 'Smcsrind' (Indirect CSR Access Machine Level). +; CHECK-NEXT: smctr - 'Smctr' (Control Transfer Records Machine Level). ; CHECK-NEXT: smdbltrp - 'Smdbltrp' (Double Trap Machine Level). ; CHECK-NEXT: smepmp - 'Smepmp' (Enhanced Physical Memory Protection). ; CHECK-NEXT: smmpm - 'Smmpm' (Machine-level Pointer Masking for M-mode). @@ -157,6 +156,7 @@ ; CHECK-NEXT: sscofpmf - 'Sscofpmf' (Count Overflow and Mode-Based Filtering). ; CHECK-NEXT: sscounterenw - 'Sscounterenw' (Support writeable scounteren enable bit for any hpmcounter that is not read-only zero). ; CHECK-NEXT: sscsrind - 'Sscsrind' (Indirect CSR Access Supervisor Level). +; CHECK-NEXT: ssctr - 'Ssctr' (Control Transfer Records Supervisor Level). ; CHECK-NEXT: ssdbltrp - 'Ssdbltrp' (Double Trap Supervisor Level). ; CHECK-NEXT: ssnpm - 'Ssnpm' (Supervisor-level Pointer Masking for next lower privilege mode). ; CHECK-NEXT: sspm - 'Sspm' (Indicates Supervisor-mode Pointer Masking). diff --git a/llvm/test/MC/RISCV/smctr-ssctr-valid.s b/llvm/test/MC/RISCV/smctr-ssctr-valid.s index 8bbd5a426b8e..072231a9b546 100644 --- a/llvm/test/MC/RISCV/smctr-ssctr-valid.s +++ b/llvm/test/MC/RISCV/smctr-ssctr-valid.s @@ -1,22 +1,22 @@ -# RUN: llvm-mc %s -triple=riscv32 -mattr=+experimental-smctr -M no-aliases -show-encoding \ +# RUN: llvm-mc %s -triple=riscv32 -mattr=+smctr -M no-aliases -show-encoding \ # RUN: | FileCheck -check-prefixes=CHECK,CHECK-INST %s -# RUN: llvm-mc %s -triple=riscv64 -mattr=+experimental-smctr -M no-aliases -show-encoding \ +# RUN: llvm-mc %s -triple=riscv64 -mattr=+smctr -M no-aliases -show-encoding \ # RUN: | FileCheck -check-prefixes=CHECK,CHECK-INST %s -# RUN: llvm-mc %s -triple=riscv32 -mattr=+experimental-ssctr -M no-aliases -show-encoding \ +# RUN: llvm-mc %s -triple=riscv32 -mattr=+ssctr -M no-aliases -show-encoding \ # RUN: | FileCheck -check-prefixes=CHECK,CHECK-INST %s -# RUN: llvm-mc %s -triple=riscv64 -mattr=+experimental-ssctr -M no-aliases -show-encoding \ +# RUN: llvm-mc %s -triple=riscv64 -mattr=+ssctr -M no-aliases -show-encoding \ # RUN: | FileCheck -check-prefixes=CHECK,CHECK-INST %s -# RUN: llvm-mc -filetype=obj -triple riscv32 -mattr=+experimental-smctr < %s \ -# RUN: | llvm-objdump --mattr=+experimental-smctr -M no-aliases -d - \ +# RUN: llvm-mc -filetype=obj -triple riscv32 -mattr=+smctr < %s \ +# RUN: | llvm-objdump --mattr=+smctr -M no-aliases -d - \ # RUN: | FileCheck -check-prefix=CHECK-INST %s -# RUN: llvm-mc -filetype=obj -triple riscv64 -mattr=+experimental-smctr < %s \ -# RUN: | llvm-objdump --mattr=+experimental-smctr -M no-aliases -d - \ +# RUN: llvm-mc -filetype=obj -triple riscv64 -mattr=+smctr < %s \ +# RUN: | llvm-objdump --mattr=+smctr -M no-aliases -d - \ # RUN: | FileCheck -check-prefix=CHECK-INST %s -# RUN: llvm-mc -filetype=obj -triple riscv32 -mattr=+experimental-ssctr < %s \ -# RUN: | llvm-objdump --mattr=+experimental-ssctr -M no-aliases -d - \ +# RUN: llvm-mc -filetype=obj -triple riscv32 -mattr=+ssctr < %s \ +# RUN: | llvm-objdump --mattr=+ssctr -M no-aliases -d - \ # RUN: | FileCheck -check-prefix=CHECK-INST %s -# RUN: llvm-mc -filetype=obj -triple riscv64 -mattr=+experimental-ssctr < %s \ -# RUN: | llvm-objdump --mattr=+experimental-ssctr -M no-aliases -d - \ +# RUN: llvm-mc -filetype=obj -triple riscv64 -mattr=+ssctr < %s \ +# RUN: | llvm-objdump --mattr=+ssctr -M no-aliases -d - \ # RUN: | FileCheck -check-prefix=CHECK-INST %s # RUN: not llvm-mc -triple riscv32 -M no-aliases -show-encoding < %s 2>&1 \ diff --git a/llvm/unittests/TargetParser/RISCVISAInfoTest.cpp b/llvm/unittests/TargetParser/RISCVISAInfoTest.cpp index 319538eaea13..db58d95d2d82 100644 --- a/llvm/unittests/TargetParser/RISCVISAInfoTest.cpp +++ b/llvm/unittests/TargetParser/RISCVISAInfoTest.cpp @@ -1099,6 +1099,7 @@ R"(All available -march extensions for RISC-V smcdeleg 1.0 smcntrpmf 1.0 smcsrind 1.0 + smctr 1.0 smdbltrp 1.0 smepmp 1.0 smmpm 1.0 @@ -1111,6 +1112,7 @@ R"(All available -march extensions for RISC-V sscofpmf 1.0 sscounterenw 1.0 sscsrind 1.0 + ssctr 1.0 ssdbltrp 1.0 ssnpm 1.0 sspm 1.0 @@ -1185,8 +1187,6 @@ Experimental extensions zvbc32e 0.7 zvkgs 0.7 zvqdotq 0.0 - smctr 1.0 - ssctr 1.0 svukte 0.3 xqccmp 0.3 xqcia 0.7 From b7ec10ca6c2a1d4825ce2e4058050c852c1b90c1 Mon Sep 17 00:00:00 2001 From: Stanislav Mekhanoshin Date: Fri, 15 Aug 2025 17:18:41 -0700 Subject: [PATCH 043/214] [AMDGPU] Update GCNHazardRecognizer's understanding of gfx12 waitcount instructions (#153880) This simply updates the pass's cognizance of these instructions, and for the most part the hazards where they might be encountered do not exist for gfx12. Nonetheless, encountering them has to be checked for as doing so would indicate a compiler error. Co-authored-by: Stephen Thomas --------- Co-authored-by: Stephen Thomas --- .../lib/Target/AMDGPU/GCNHazardRecognizer.cpp | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp b/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp index c1cca063aac6..fa3ca27a5f47 100644 --- a/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp +++ b/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp @@ -1357,6 +1357,16 @@ bool GCNHazardRecognizer::fixSMEMtoVectorWriteHazards(MachineInstr *MI) { // DsCnt corresponds to LGKMCnt here. return (Decoded.DsCnt == 0); } + case AMDGPU::S_WAIT_STORECNT: + case AMDGPU::S_WAIT_STORECNT_DSCNT: + case AMDGPU::S_WAIT_LOADCNT: + case AMDGPU::S_WAIT_LOADCNT_DSCNT: + case AMDGPU::S_WAIT_SAMPLECNT: + case AMDGPU::S_WAIT_BVHCNT: + case AMDGPU::S_WAIT_DSCNT: + case AMDGPU::S_WAIT_EXPCNT: + case AMDGPU::S_WAIT_KMCNT: + llvm_unreachable("unexpected wait count instruction"); default: // SOPP instructions cannot mitigate the hazard. if (TII->isSOPP(MI)) @@ -2254,6 +2264,15 @@ int GCNHazardRecognizer::checkFPAtomicToDenormModeHazard(MachineInstr *MI) { case AMDGPU::S_WAITCNT_EXPCNT: case AMDGPU::S_WAITCNT_LGKMCNT: case AMDGPU::S_WAIT_IDLE: + case AMDGPU::S_WAIT_LOADCNT: + case AMDGPU::S_WAIT_LOADCNT_DSCNT: + case AMDGPU::S_WAIT_SAMPLECNT: + case AMDGPU::S_WAIT_BVHCNT: + case AMDGPU::S_WAIT_STORECNT: + case AMDGPU::S_WAIT_STORECNT_DSCNT: + case AMDGPU::S_WAIT_EXPCNT: + case AMDGPU::S_WAIT_DSCNT: + case AMDGPU::S_WAIT_KMCNT: return true; default: break; From 5178aeff7b96e86b066f8407b9d9732ec660dd2e Mon Sep 17 00:00:00 2001 From: Slava Zakharin Date: Fri, 15 Aug 2025 17:48:40 -0700 Subject: [PATCH 044/214] Revert "[flang] Lower EOSHIFT into hlfir.eoshift." (#153907) Reverts llvm/llvm-project#153106 Buildbots failing: * https://lab.llvm.org/buildbot/#/builders/199/builds/5188 * https://lab.llvm.org/buildbot/#/builders/41/builds/8329 --- flang/lib/Lower/HlfirIntrinsics.cpp | 41 ----- flang/test/Lower/HLFIR/eoshift.f90 | 259 ---------------------------- 2 files changed, 300 deletions(-) delete mode 100644 flang/test/Lower/HLFIR/eoshift.f90 diff --git a/flang/lib/Lower/HlfirIntrinsics.cpp b/flang/lib/Lower/HlfirIntrinsics.cpp index 3b0f2e35cd5b..6e1d06a25924 100644 --- a/flang/lib/Lower/HlfirIntrinsics.cpp +++ b/flang/lib/Lower/HlfirIntrinsics.cpp @@ -170,17 +170,6 @@ protected: mlir::Type stmtResultType) override; }; -class HlfirEOShiftLowering : public HlfirTransformationalIntrinsic { -public: - using HlfirTransformationalIntrinsic::HlfirTransformationalIntrinsic; - -protected: - mlir::Value - lowerImpl(const Fortran::lower::PreparedActualArguments &loweredActuals, - const fir::IntrinsicArgumentLoweringRules *argLowering, - mlir::Type stmtResultType) override; -}; - class HlfirReshapeLowering : public HlfirTransformationalIntrinsic { public: using HlfirTransformationalIntrinsic::HlfirTransformationalIntrinsic; @@ -441,33 +430,6 @@ mlir::Value HlfirCShiftLowering::lowerImpl( return createOp(resultType, operands); } -mlir::Value HlfirEOShiftLowering::lowerImpl( - const Fortran::lower::PreparedActualArguments &loweredActuals, - const fir::IntrinsicArgumentLoweringRules *argLowering, - mlir::Type stmtResultType) { - auto operands = getOperandVector(loweredActuals, argLowering); - assert(operands.size() == 4); - mlir::Value array = operands[0]; - mlir::Value shift = operands[1]; - mlir::Value boundary = operands[2]; - mlir::Value dim = operands[3]; - // If DIM is present, then dereference it if it is a ref. - if (dim) - dim = hlfir::loadTrivialScalar(loc, builder, hlfir::Entity{dim}); - - mlir::Type resultType = computeResultType(array, stmtResultType); - - // Scalar logical constant boundary might be represented using i1, i2, ... - // type. We need to cast it to fir.logical type of the ARRAY/result. - if (auto logicalTy = mlir::dyn_cast( - hlfir::getFortranElementType(resultType))) - if (boundary && fir::isa_trivial(boundary.getType()) && - boundary.getType() != logicalTy) - boundary = builder.createConvert(loc, logicalTy, boundary); - - return createOp(resultType, array, shift, boundary, dim); -} - mlir::Value HlfirReshapeLowering::lowerImpl( const Fortran::lower::PreparedActualArguments &loweredActuals, const fir::IntrinsicArgumentLoweringRules *argLowering, @@ -527,9 +489,6 @@ std::optional Fortran::lower::lowerHlfirIntrinsic( if (name == "cshift") return HlfirCShiftLowering{builder, loc}.lower(loweredActuals, argLowering, stmtResultType); - if (name == "eoshift") - return HlfirEOShiftLowering{builder, loc}.lower(loweredActuals, argLowering, - stmtResultType); if (name == "reshape") return HlfirReshapeLowering{builder, loc}.lower(loweredActuals, argLowering, stmtResultType); diff --git a/flang/test/Lower/HLFIR/eoshift.f90 b/flang/test/Lower/HLFIR/eoshift.f90 deleted file mode 100644 index 3b2570ab5936..000000000000 --- a/flang/test/Lower/HLFIR/eoshift.f90 +++ /dev/null @@ -1,259 +0,0 @@ -! Test lowering of EOSHIFT intrinsic to HLFIR -! RUN: bbc -emit-hlfir -o - -I nowhere %s 2>&1 | FileCheck %s - -module eoshift_types - type t - end type t -end module eoshift_types - -! 1d shift by scalar -subroutine eoshift1(a, s) - integer :: a(:), s - a = EOSHIFT(a, 2) -end subroutine -! CHECK-LABEL: func.func @_QPeoshift1( -! CHECK-SAME: %[[VAL_0:.*]]: !fir.box> {fir.bindc_name = "a"}, -! CHECK-SAME: %[[VAL_1:.*]]: !fir.ref {fir.bindc_name = "s"}) { -! CHECK: %[[VAL_2:.*]] = fir.dummy_scope : !fir.dscope -! CHECK: %[[VAL_3:.*]]:2 = hlfir.declare %[[VAL_0]] -! CHECK: %[[VAL_4:.*]]:2 = hlfir.declare %[[VAL_1]] -! CHECK: %[[VAL_5:.*]] = arith.constant 2 : i32 -! CHECK: %[[VAL_6:.*]] = hlfir.eoshift %[[VAL_3]]#0 %[[VAL_5]] : (!fir.box>, i32) -> !hlfir.expr -! CHECK: hlfir.assign %[[VAL_6]] to %[[VAL_3]]#0 : !hlfir.expr, !fir.box> -! CHECK: hlfir.destroy %[[VAL_6]] : !hlfir.expr -! CHECK: return -! CHECK: } - -! 1d shift by scalar with dim -subroutine eoshift2(a, s) - integer :: a(:), s - a = EOSHIFT(a, 2, dim=1) -end subroutine -! CHECK-LABEL: func.func @_QPeoshift2( -! CHECK-SAME: %[[VAL_0:.*]]: !fir.box> {fir.bindc_name = "a"}, -! CHECK-SAME: %[[VAL_1:.*]]: !fir.ref {fir.bindc_name = "s"}) { -! CHECK: %[[VAL_2:.*]] = fir.dummy_scope : !fir.dscope -! CHECK: %[[VAL_3:.*]]:2 = hlfir.declare %[[VAL_0]] -! CHECK: %[[VAL_4:.*]]:2 = hlfir.declare %[[VAL_1]] -! CHECK: %[[VAL_5:.*]] = arith.constant 2 : i32 -! CHECK: %[[VAL_6:.*]] = arith.constant 1 : i32 -! CHECK: %[[VAL_7:.*]] = hlfir.eoshift %[[VAL_3]]#0 %[[VAL_5]] dim %[[VAL_6]] : (!fir.box>, i32, i32) -> !hlfir.expr -! CHECK: hlfir.assign %[[VAL_7]] to %[[VAL_3]]#0 : !hlfir.expr, !fir.box> -! CHECK: hlfir.destroy %[[VAL_7]] : !hlfir.expr -! CHECK: return -! CHECK: } - -! 2d shift by scalar -subroutine eoshift3(a, s) - integer :: a(:,:), s - a = EOSHIFT(a, 2) -end subroutine -! CHECK-LABEL: func.func @_QPeoshift3( -! CHECK-SAME: %[[VAL_0:.*]]: !fir.box> {fir.bindc_name = "a"}, -! CHECK-SAME: %[[VAL_1:.*]]: !fir.ref {fir.bindc_name = "s"}) { -! CHECK: %[[VAL_2:.*]] = fir.dummy_scope : !fir.dscope -! CHECK: %[[VAL_3:.*]]:2 = hlfir.declare %[[VAL_0]] -! CHECK: %[[VAL_4:.*]]:2 = hlfir.declare %[[VAL_1]] -! CHECK: %[[VAL_5:.*]] = arith.constant 2 : i32 -! CHECK: %[[VAL_6:.*]] = hlfir.eoshift %[[VAL_3]]#0 %[[VAL_5]] : (!fir.box>, i32) -> !hlfir.expr -! CHECK: hlfir.assign %[[VAL_6]] to %[[VAL_3]]#0 : !hlfir.expr, !fir.box> -! CHECK: hlfir.destroy %[[VAL_6]] : !hlfir.expr -! CHECK: return -! CHECK: } - -! 2d shift by scalar with dim -subroutine eoshift4(a, s) - integer :: a(:,:), s - a = EOSHIFT(a, 2, dim=2) -end subroutine -! CHECK-LABEL: func.func @_QPeoshift4( -! CHECK-SAME: %[[VAL_0:.*]]: !fir.box> {fir.bindc_name = "a"}, -! CHECK-SAME: %[[VAL_1:.*]]: !fir.ref {fir.bindc_name = "s"}) { -! CHECK: %[[VAL_2:.*]] = fir.dummy_scope : !fir.dscope -! CHECK: %[[VAL_3:.*]]:2 = hlfir.declare %[[VAL_0]] -! CHECK: %[[VAL_4:.*]]:2 = hlfir.declare %[[VAL_1]] -! CHECK: %[[VAL_5:.*]] = arith.constant 2 : i32 -! CHECK: %[[VAL_6:.*]] = arith.constant 2 : i32 -! CHECK: %[[VAL_7:.*]] = hlfir.eoshift %[[VAL_3]]#0 %[[VAL_5]] dim %[[VAL_6]] : (!fir.box>, i32, i32) -> !hlfir.expr -! CHECK: hlfir.assign %[[VAL_7]] to %[[VAL_3]]#0 : !hlfir.expr, !fir.box> -! CHECK: hlfir.destroy %[[VAL_7]] : !hlfir.expr -! CHECK: return -! CHECK: } - -! 2d shift by array -subroutine eoshift5(a, s) - integer :: a(:,:), s(:) - a = EOSHIFT(a, s) -end subroutine -! CHECK-LABEL: func.func @_QPeoshift5( -! CHECK-SAME: %[[VAL_0:.*]]: !fir.box> {fir.bindc_name = "a"}, -! CHECK-SAME: %[[VAL_1:.*]]: !fir.box> {fir.bindc_name = "s"}) { -! CHECK: %[[VAL_2:.*]] = fir.dummy_scope : !fir.dscope -! CHECK: %[[VAL_3:.*]]:2 = hlfir.declare %[[VAL_0]] -! CHECK: %[[VAL_4:.*]]:2 = hlfir.declare %[[VAL_1]] -! CHECK: %[[VAL_5:.*]] = hlfir.eoshift %[[VAL_3]]#0 %[[VAL_4]]#0 : (!fir.box>, !fir.box>) -> !hlfir.expr -! CHECK: hlfir.assign %[[VAL_5]] to %[[VAL_3]]#0 : !hlfir.expr, !fir.box> -! CHECK: hlfir.destroy %[[VAL_5]] : !hlfir.expr -! CHECK: return -! CHECK: } - -! 2d shift by array expr -subroutine eoshift6(a, s) - integer :: a(:,:), s(:) - a = EOSHIFT(a, s + 1) -end subroutine -! CHECK-LABEL: func.func @_QPeoshift6( -! CHECK-SAME: %[[VAL_0:.*]]: !fir.box> {fir.bindc_name = "a"}, -! CHECK-SAME: %[[VAL_1:.*]]: !fir.box> {fir.bindc_name = "s"}) { -! CHECK: %[[VAL_2:.*]] = fir.dummy_scope : !fir.dscope -! CHECK: %[[VAL_3:.*]]:2 = hlfir.declare %[[VAL_0]] -! CHECK: %[[VAL_4:.*]]:2 = hlfir.declare %[[VAL_1]] -! CHECK: %[[VAL_5:.*]] = arith.constant 1 : i32 -! CHECK: %[[VAL_6:.*]] = arith.constant 0 : index -! CHECK: %[[VAL_7:.*]]:3 = fir.box_dims %[[VAL_4]]#0, %[[VAL_6]] : (!fir.box>, index) -> (index, index, index) -! CHECK: %[[VAL_8:.*]] = fir.shape %[[VAL_7]]#1 : (index) -> !fir.shape<1> -! CHECK: %[[VAL_9:.*]] = hlfir.elemental %[[VAL_8]] unordered : (!fir.shape<1>) -> !hlfir.expr -! CHECK: %[[VAL_14:.*]] = hlfir.eoshift %[[VAL_3]]#0 %[[VAL_9]] : (!fir.box>, !hlfir.expr) -> !hlfir.expr -! CHECK: hlfir.assign %[[VAL_14]] to %[[VAL_3]]#0 : !hlfir.expr, !fir.box> -! CHECK: hlfir.destroy %[[VAL_14]] : !hlfir.expr -! CHECK: hlfir.destroy %[[VAL_9]] : !hlfir.expr -! CHECK: return -! CHECK: } - -! 1d character(10,2) shift by scalar -subroutine eoshift7(a, s) - character(10,2) :: a(:) - a = EOSHIFT(a, 2) -end subroutine -! CHECK-LABEL: func.func @_QPeoshift7( -! CHECK-SAME: %[[VAL_0:.*]]: !fir.box>> {fir.bindc_name = "a"}, -! CHECK-SAME: %[[VAL_1:.*]]: !fir.ref {fir.bindc_name = "s"}) { -! CHECK: %[[VAL_2:.*]] = fir.dummy_scope : !fir.dscope -! CHECK: %[[VAL_3:.*]] = arith.constant 10 : index -! CHECK: %[[VAL_4:.*]]:2 = hlfir.declare %[[VAL_0]] -! CHECK: %[[VAL_5:.*]]:2 = hlfir.declare %[[VAL_1]] -! CHECK: %[[VAL_6:.*]] = arith.constant 2 : i32 -! CHECK: %[[VAL_7:.*]] = hlfir.eoshift %[[VAL_4]]#0 %[[VAL_6]] : (!fir.box>>, i32) -> !hlfir.expr> -! CHECK: hlfir.assign %[[VAL_7]] to %[[VAL_4]]#0 : !hlfir.expr>, !fir.box>> -! CHECK: hlfir.destroy %[[VAL_7]] : !hlfir.expr> -! CHECK: return -! CHECK: } - -! 1d character(*) shift by scalar -subroutine eoshift8(a, s) - character(*) :: a(:) - a = EOSHIFT(a, 2) -end subroutine -! CHECK-LABEL: func.func @_QPeoshift8( -! CHECK-SAME: %[[VAL_0:.*]]: !fir.box>> {fir.bindc_name = "a"}, -! CHECK-SAME: %[[VAL_1:.*]]: !fir.ref {fir.bindc_name = "s"}) { -! CHECK: %[[VAL_2:.*]] = fir.dummy_scope : !fir.dscope -! CHECK: %[[VAL_3:.*]]:2 = hlfir.declare %[[VAL_0]] -! CHECK: %[[VAL_4:.*]]:2 = hlfir.declare %[[VAL_1]] -! CHECK: %[[VAL_5:.*]] = arith.constant 2 : i32 -! CHECK: %[[VAL_6:.*]] = hlfir.eoshift %[[VAL_3]]#0 %[[VAL_5]] : (!fir.box>>, i32) -> !hlfir.expr> -! CHECK: hlfir.assign %[[VAL_6]] to %[[VAL_3]]#0 : !hlfir.expr>, !fir.box>> -! CHECK: hlfir.destroy %[[VAL_6]] : !hlfir.expr> -! CHECK: return -! CHECK: } - -! 1d type(t) shift by scalar -subroutine eoshift9(a, s) - use eoshift_types - type(t) :: a(:) - a = EOSHIFT(a, 2, boundary=t()) -end subroutine -! CHECK-LABEL: func.func @_QPeoshift9( -! CHECK-SAME: %[[ARG0:.*]]: !fir.box>> {fir.bindc_name = "a"}, -! CHECK-SAME: %[[ARG1:.*]]: !fir.ref {fir.bindc_name = "s"}) { -! CHECK: %[[VAL_0:.*]] = fir.dummy_scope : !fir.dscope -! CHECK: %[[VAL_1:.*]]:2 = hlfir.declare %[[ARG0]] dummy_scope %[[VAL_0]] {uniq_name = "_QFeoshift9Ea"} : (!fir.box>>, !fir.dscope) -> (!fir.box>>, !fir.box>>) -! CHECK: %[[VAL_2:.*]]:2 = hlfir.declare %[[ARG1]] dummy_scope %[[VAL_0]] {uniq_name = "_QFeoshift9Es"} : (!fir.ref, !fir.dscope) -> (!fir.ref, !fir.ref) -! CHECK: %[[VAL_3:.*]] = arith.constant 2 : i32 -! CHECK: %[[VAL_4:.*]] = fir.address_of(@_QQro._QMeoshift_typesTt.0) : !fir.ref> -! CHECK: %[[VAL_5:.*]]:2 = hlfir.declare %[[VAL_4]] {fortran_attrs = #fir.var_attrs, uniq_name = "_QQro._QMeoshift_typesTt.0"} : (!fir.ref>) -> (!fir.ref>, !fir.ref>) -! CHECK: %[[VAL_6:.*]] = hlfir.eoshift %[[VAL_1]]#0 %[[VAL_3]] boundary %[[VAL_5]]#0 : (!fir.box>>, i32, !fir.ref>) -> !hlfir.expr> -! CHECK: hlfir.assign %[[VAL_6]] to %[[VAL_1]]#0 : !hlfir.expr>, !fir.box>> -! CHECK: hlfir.destroy %[[VAL_6]] : !hlfir.expr> -! CHECK: return -! CHECK: } - -! 1d class(t) shift by scalar -subroutine eoshift10(a, s) - use eoshift_types - class(t), allocatable :: a(:) - a = EOSHIFT(a, 2, boundary=t()) -end subroutine -! CHECK-LABEL: func.func @_QPeoshift10( -! CHECK-SAME: %[[ARG0:.*]]: !fir.ref>>>> {fir.bindc_name = "a"}, -! CHECK-SAME: %[[ARG1:.*]]: !fir.ref {fir.bindc_name = "s"}) { -! CHECK: %[[VAL_0:.*]] = fir.dummy_scope : !fir.dscope -! CHECK: %[[VAL_1:.*]]:2 = hlfir.declare %[[ARG0]] dummy_scope %[[VAL_0]] {fortran_attrs = #fir.var_attrs, uniq_name = "_QFeoshift10Ea"} : (!fir.ref>>>>, !fir.dscope) -> (!fir.ref>>>>, !fir.ref>>>>) -! CHECK: %[[VAL_2:.*]]:2 = hlfir.declare %[[ARG1]] dummy_scope %[[VAL_0]] {uniq_name = "_QFeoshift10Es"} : (!fir.ref, !fir.dscope) -> (!fir.ref, !fir.ref) -! CHECK: %[[VAL_3:.*]] = arith.constant 2 : i32 -! CHECK: %[[VAL_4:.*]] = fir.address_of(@_QQro._QMeoshift_typesTt.1) : !fir.ref> -! CHECK: %[[VAL_5:.*]]:2 = hlfir.declare %[[VAL_4]] {fortran_attrs = #fir.var_attrs, uniq_name = "_QQro._QMeoshift_typesTt.1"} : (!fir.ref>) -> (!fir.ref>, !fir.ref>) -! CHECK: %[[VAL_6:.*]] = fir.load %[[VAL_1]]#0 : !fir.ref>>>> -! CHECK: %[[VAL_7:.*]] = hlfir.eoshift %[[VAL_6]] %[[VAL_3]] boundary %[[VAL_5]]#0 : (!fir.class>>>, i32, !fir.ref>) -> !hlfir.expr?> -! CHECK: hlfir.assign %[[VAL_7]] to %[[VAL_1]]#0 realloc : !hlfir.expr?>, !fir.ref>>>> -! CHECK: hlfir.destroy %[[VAL_7]] : !hlfir.expr?> -! CHECK: return -! CHECK: } - -! 1d shift by scalar with variable dim -subroutine eoshift11(a, s, d) - integer :: a(:), s, d - a = EOSHIFT(a, 2, dim=d) -end subroutine -! CHECK-LABEL: func.func @_QPeoshift11( -! CHECK-SAME: %[[VAL_0:.*]]: !fir.box> {fir.bindc_name = "a"}, -! CHECK-SAME: %[[VAL_1:.*]]: !fir.ref {fir.bindc_name = "s"}, -! CHECK-SAME: %[[VAL_2:.*]]: !fir.ref {fir.bindc_name = "d"}) { -! CHECK: %[[VAL_3:.*]] = fir.dummy_scope : !fir.dscope -! CHECK: %[[VAL_4:.*]]:2 = hlfir.declare %[[VAL_0]] dummy_scope %[[VAL_3]] {uniq_name = "_QFeoshift11Ea"} : (!fir.box>, !fir.dscope) -> (!fir.box>, !fir.box>) -! CHECK: %[[VAL_5:.*]]:2 = hlfir.declare %[[VAL_2]] dummy_scope %[[VAL_3]] {uniq_name = "_QFeoshift11Ed"} : (!fir.ref, !fir.dscope) -> (!fir.ref, !fir.ref) -! CHECK: %[[VAL_6:.*]]:2 = hlfir.declare %[[VAL_1]] dummy_scope %[[VAL_3]] {uniq_name = "_QFeoshift11Es"} : (!fir.ref, !fir.dscope) -> (!fir.ref, !fir.ref) -! CHECK: %[[VAL_7:.*]] = arith.constant 2 : i32 -! CHECK: %[[VAL_8:.*]] = fir.load %[[VAL_5]]#0 : !fir.ref -! CHECK: %[[VAL_9:.*]] = hlfir.eoshift %[[VAL_4]]#0 %[[VAL_7]] dim %[[VAL_8]] : (!fir.box>, i32, i32) -> !hlfir.expr -! CHECK: hlfir.assign %[[VAL_9]] to %[[VAL_4]]#0 : !hlfir.expr, !fir.box> -! CHECK: hlfir.destroy %[[VAL_9]] : !hlfir.expr -! CHECK: return -! CHECK: } - -subroutine eoshift12(array, shift, boundary, dim) - real :: array(:,:) - real, optional :: boundary - integer :: shift(:), dim - array = EOSHIFT(array, shift, boundary, dim) -end subroutine eoshift12 -! CHECK-LABEL: func.func @_QPeoshift12( -! CHECK-SAME: %[[ARG0:.*]]: !fir.box> {fir.bindc_name = "array"}, -! CHECK-SAME: %[[ARG1:.*]]: !fir.box> {fir.bindc_name = "shift"}, -! CHECK-SAME: %[[ARG2:.*]]: !fir.ref {fir.bindc_name = "boundary", fir.optional}, -! CHECK-SAME: %[[ARG3:.*]]: !fir.ref {fir.bindc_name = "dim"}) { -! CHECK: %[[VAL_0:.*]] = fir.dummy_scope : !fir.dscope -! CHECK: %[[VAL_1:.*]]:2 = hlfir.declare %[[ARG0]] dummy_scope %[[VAL_0]] {uniq_name = "_QFeoshift12Earray"} : (!fir.box>, !fir.dscope) -> (!fir.box>, !fir.box>) -! CHECK: %[[VAL_2:.*]]:2 = hlfir.declare %[[ARG2]] dummy_scope %[[VAL_0]] {fortran_attrs = #fir.var_attrs, uniq_name = "_QFeoshift12Eboundary"} : (!fir.ref, !fir.dscope) -> (!fir.ref, !fir.ref) -! CHECK: %[[VAL_3:.*]]:2 = hlfir.declare %[[ARG3]] dummy_scope %[[VAL_0]] {uniq_name = "_QFeoshift12Edim"} : (!fir.ref, !fir.dscope) -> (!fir.ref, !fir.ref) -! CHECK: %[[VAL_4:.*]]:2 = hlfir.declare %[[ARG1]] dummy_scope %[[VAL_0]] {uniq_name = "_QFeoshift12Eshift"} : (!fir.box>, !fir.dscope) -> (!fir.box>, !fir.box>) -! CHECK: %[[VAL_5:.*]] = fir.is_present %[[VAL_2]]#0 : (!fir.ref) -> i1 -! CHECK: %[[VAL_6:.*]] = fir.embox %[[VAL_2]]#0 : (!fir.ref) -> !fir.box -! CHECK: %[[VAL_7:.*]] = fir.absent !fir.box -! CHECK: %[[VAL_8:.*]] = arith.select %[[VAL_5]], %[[VAL_6]], %[[VAL_7]] : !fir.box -! CHECK: %[[VAL_9:.*]] = fir.load %[[VAL_3]]#0 : !fir.ref -! CHECK: %[[VAL_10:.*]] = hlfir.eoshift %[[VAL_1]]#0 %[[VAL_4]]#0 boundary %[[VAL_8]] dim %[[VAL_9]] : (!fir.box>, !fir.box>, !fir.box, i32) -> !hlfir.expr -! CHECK: hlfir.assign %[[VAL_10]] to %[[VAL_1]]#0 : !hlfir.expr, !fir.box> -! CHECK: hlfir.destroy %[[VAL_10]] : !hlfir.expr -! CHECK: return -! CHECK: } - -! Test scalar logical boundary. -! CHECK-LABEL: func.func @_QPeoshift13( -subroutine eoshift13(array) - logical(1) :: array(:) - array = EOSHIFT(array, -1, .true._1) -! CHECK: %[[VAL_5:.*]] = hlfir.eoshift %{{.*}} %{{.*}} boundary %{{.*}} : (!fir.box>>, i32, !fir.logical<1>) -> !hlfir.expr> - array = EOSHIFT(array.EQV..false., -1, .true.) -! CHECK: %[[VAL_24:.*]] = hlfir.eoshift %{{.*}} %{{.*}} boundary %{{.*}} : (!hlfir.expr>, i32, !fir.logical<4>) -> !hlfir.expr> -end subroutine eoshift13 From 4198649c1996d5a9579971482ffddf3509e7b0fe Mon Sep 17 00:00:00 2001 From: Stanislav Mekhanoshin Date: Fri, 15 Aug 2025 17:50:27 -0700 Subject: [PATCH 045/214] [AMDGPU] Use encodeFieldVaVdst in hazard recognizer. NFCI. (#153881) Co-authored-by: Stephen Thomas --------- Co-authored-by: Stephen Thomas --- llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp b/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp index fa3ca27a5f47..49a681efc79c 100644 --- a/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp +++ b/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp @@ -1749,7 +1749,7 @@ bool GCNHazardRecognizer::fixVALUPartialForwardingHazard(MachineInstr *MI) { BuildMI(*MI->getParent(), MI, MI->getDebugLoc(), TII.get(AMDGPU::S_WAITCNT_DEPCTR)) - .addImm(0x0fff); + .addImm(AMDGPU::DepCtr::encodeFieldVaVdst(0)); return true; } @@ -1799,7 +1799,7 @@ bool GCNHazardRecognizer::fixVALUTransUseHazard(MachineInstr *MI) { if (SIInstrInfo::isVMEM(I) || SIInstrInfo::isDS(I) || SIInstrInfo::isEXP(I) || (I.getOpcode() == AMDGPU::S_WAITCNT_DEPCTR && - I.getOperand(0).getImm() == 0x0fff)) + AMDGPU::DepCtr::decodeFieldVaVdst(I.getOperand(0).getImm()) == 0)) return HazardExpired; // Track registers writes From cf5f311b26d4db7296b28d52742c87f6a2836120 Mon Sep 17 00:00:00 2001 From: Leandro Lacerda Date: Fri, 15 Aug 2025 21:51:52 -0300 Subject: [PATCH 046/214] [libc] Polish GPU benchmarking (#153900) This patch provides cleanups and improvements for the GPU benchmarking infrastructure. The key changes are: - Fix benchmark convergence bug: Round up the scaled iteration count (ceil) to ensure it grows properly. The previous truncation logic causes the iteration count to get stuck. - Resolve remaining compiler warning. - Remove unused `BenchmarkLogger` files: This is dead code that added maintenance and cognitive overhead without providing functionality. - Improve build hygiene: Clean up headers and CMake dependencies to strictly follow the 'include what you use' (IWYU) principle. --- libc/benchmarks/gpu/BenchmarkLogger.cpp | 97 ------------------- libc/benchmarks/gpu/BenchmarkLogger.h | 29 ------ libc/benchmarks/gpu/CMakeLists.txt | 16 +-- libc/benchmarks/gpu/LibcGpuBenchmark.cpp | 18 ++-- libc/benchmarks/gpu/LibcGpuBenchmark.h | 4 +- .../gpu/timing/amdgpu/CMakeLists.txt | 3 +- libc/benchmarks/gpu/timing/amdgpu/timing.h | 1 - .../gpu/timing/nvptx/CMakeLists.txt | 3 +- libc/benchmarks/gpu/timing/nvptx/timing.h | 4 +- 9 files changed, 22 insertions(+), 153 deletions(-) delete mode 100644 libc/benchmarks/gpu/BenchmarkLogger.cpp delete mode 100644 libc/benchmarks/gpu/BenchmarkLogger.h diff --git a/libc/benchmarks/gpu/BenchmarkLogger.cpp b/libc/benchmarks/gpu/BenchmarkLogger.cpp deleted file mode 100644 index d5996a74f6dd..000000000000 --- a/libc/benchmarks/gpu/BenchmarkLogger.cpp +++ /dev/null @@ -1,97 +0,0 @@ -#include "benchmarks/gpu/BenchmarkLogger.h" -#include "hdr/stdint_proxy.h" -#include "src/__support/CPP/string.h" -#include "src/__support/CPP/string_view.h" -#include "src/__support/OSUtil/io.h" // write_to_stderr -#include "src/__support/big_int.h" // is_big_int -#include "src/__support/macros/config.h" -#include "src/__support/macros/properties/types.h" // LIBC_TYPES_HAS_INT128 -#include "src/__support/uint128.h" - -namespace LIBC_NAMESPACE_DECL { -namespace benchmarks { - -// cpp::string_view specialization -template <> -BenchmarkLogger & - BenchmarkLogger::operator<< (cpp::string_view str) { - LIBC_NAMESPACE::write_to_stderr(str); - return *this; -} - -// cpp::string specialization -template <> -BenchmarkLogger &BenchmarkLogger::operator<< (cpp::string str) { - return *this << static_cast(str); -} - -// const char* specialization -template <> -BenchmarkLogger &BenchmarkLogger::operator<< (const char *str) { - return *this << cpp::string_view(str); -} - -// char* specialization -template <> BenchmarkLogger &BenchmarkLogger::operator<< (char *str) { - return *this << cpp::string_view(str); -} - -// char specialization -template <> BenchmarkLogger &BenchmarkLogger::operator<<(char ch) { - return *this << cpp::string_view(&ch, 1); -} - -// bool specialization -template <> BenchmarkLogger &BenchmarkLogger::operator<<(bool cond) { - return *this << (cond ? "true" : "false"); -} - -// void * specialization -template <> BenchmarkLogger &BenchmarkLogger::operator<<(void *addr) { - return *this << "0x" << cpp::to_string(reinterpret_cast(addr)); -} - -template BenchmarkLogger &BenchmarkLogger::operator<<(T t) { - if constexpr (is_big_int_v || - (cpp::is_integral_v && cpp::is_unsigned_v && - (sizeof(T) > sizeof(uint64_t)))) { - static_assert(sizeof(T) % 8 == 0, "Unsupported size of UInt"); - const IntegerToString buffer(t); - return *this << buffer.view(); - } else { - return *this << cpp::to_string(t); - } -} - -// is_integral specializations -// char is already specialized to handle character -template BenchmarkLogger &BenchmarkLogger::operator<< (short); -template BenchmarkLogger &BenchmarkLogger::operator<< (int); -template BenchmarkLogger &BenchmarkLogger::operator<< (long); -template BenchmarkLogger &BenchmarkLogger::operator<< (long long); -template BenchmarkLogger & - BenchmarkLogger::operator<< (unsigned char); -template BenchmarkLogger & - BenchmarkLogger::operator<< (unsigned short); -template BenchmarkLogger & - BenchmarkLogger::operator<< (unsigned int); -template BenchmarkLogger & - BenchmarkLogger::operator<< (unsigned long); -template BenchmarkLogger & - BenchmarkLogger::operator<< (unsigned long long); - -#ifdef LIBC_TYPES_HAS_INT128 -template BenchmarkLogger & - BenchmarkLogger::operator<< <__uint128_t>(__uint128_t); -#endif // LIBC_TYPES_HAS_INT128 -template BenchmarkLogger &BenchmarkLogger::operator<< >(UInt<128>); -template BenchmarkLogger &BenchmarkLogger::operator<< >(UInt<192>); -template BenchmarkLogger &BenchmarkLogger::operator<< >(UInt<256>); -template BenchmarkLogger &BenchmarkLogger::operator<< >(UInt<320>); - -// TODO: Add floating point formatting once it's supported by StringStream. - -BenchmarkLogger log; - -} // namespace benchmarks -} // namespace LIBC_NAMESPACE_DECL diff --git a/libc/benchmarks/gpu/BenchmarkLogger.h b/libc/benchmarks/gpu/BenchmarkLogger.h deleted file mode 100644 index 2b22aba085f8..000000000000 --- a/libc/benchmarks/gpu/BenchmarkLogger.h +++ /dev/null @@ -1,29 +0,0 @@ -//===-- Utilities to log to standard output during tests --------*- C++ -*-===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#ifndef LLVM_LIBC_BENCHMARKS_GPU_BENCHMARKLOGGER_H -#define LLVM_LIBC_BENCHMARKS_GPU_BENCHMARKLOGGER_H - -#include "src/__support/macros/config.h" - -namespace LIBC_NAMESPACE_DECL { -namespace benchmarks { - -// A class to log to standard output in the context of hermetic tests. -struct BenchmarkLogger { - constexpr BenchmarkLogger() = default; - template BenchmarkLogger &operator<<(T); -}; - -// A global TestLogger instance to be used in tests. -extern BenchmarkLogger log; - -} // namespace benchmarks -} // namespace LIBC_NAMESPACE_DECL - -#endif /* LLVM_LIBC_BENCHMARKS_GPU_BENCHMARKLOGGER_H */ diff --git a/libc/benchmarks/gpu/CMakeLists.txt b/libc/benchmarks/gpu/CMakeLists.txt index beedac78d482..6ca134b12a47 100644 --- a/libc/benchmarks/gpu/CMakeLists.txt +++ b/libc/benchmarks/gpu/CMakeLists.txt @@ -38,31 +38,25 @@ add_unittest_framework_library( SRCS LibcGpuBenchmark.cpp LibcGpuBenchmarkMain.cpp - BenchmarkLogger.cpp HDRS LibcGpuBenchmark.h - BenchmarkLogger.h DEPENDS + libc.benchmarks.gpu.timing.timing libc.hdr.stdint_proxy - libc.src.__support.big_int - libc.src.__support.c_string libc.src.__support.CPP.string libc.src.__support.CPP.string_view libc.src.__support.CPP.type_traits - libc.src.__support.CPP.limits libc.src.__support.CPP.algorithm libc.src.__support.CPP.atomic libc.src.__support.CPP.array - libc.src.__support.fixed_point.fx_rep - libc.src.__support.macros.properties.types - libc.src.__support.OSUtil.osutil - libc.src.__support.uint128 libc.src.__support.FPUtil.fp_bits + libc.src.__support.FPUtil.nearest_integer_operations libc.src.__support.FPUtil.sqrt libc.src.__support.fixedvector - libc.src.time.clock - libc.benchmarks.gpu.timing.timing + libc.src.__support.GPU.utils + libc.src.__support.time.gpu.time_utils libc.src.stdio.printf + libc.src.time.clock ) add_subdirectory(src) diff --git a/libc/benchmarks/gpu/LibcGpuBenchmark.cpp b/libc/benchmarks/gpu/LibcGpuBenchmark.cpp index ef816c51a87d..a4a0ff4ec46e 100644 --- a/libc/benchmarks/gpu/LibcGpuBenchmark.cpp +++ b/libc/benchmarks/gpu/LibcGpuBenchmark.cpp @@ -2,16 +2,17 @@ #include "hdr/stdint_proxy.h" #include "src/__support/CPP/algorithm.h" -#include "src/__support/CPP/array.h" #include "src/__support/CPP/atomic.h" #include "src/__support/CPP/string.h" #include "src/__support/FPUtil/FPBits.h" +#include "src/__support/FPUtil/NearestIntegerOperations.h" #include "src/__support/FPUtil/sqrt.h" #include "src/__support/GPU/utils.h" #include "src/__support/fixedvector.h" #include "src/__support/macros/config.h" #include "src/__support/time/gpu/time_utils.h" #include "src/stdio/printf.h" +#include "src/time/clock.h" namespace LIBC_NAMESPACE_DECL { namespace benchmarks { @@ -134,11 +135,13 @@ void print_results(Benchmark *b) { cpp::atomic_thread_fence(cpp::MemoryOrder::RELEASE); LIBC_NAMESPACE::printf( - "%-24s |%15.0f |%9.0f |%8llu |%8llu |%11llu |%9u |\n", + "%-24s |%15.0f |%9.0f |%8llu |%8llu |%15llu |%9u |\n", b->get_test_name().data(), final_result.cycles, - final_result.standard_deviation, (unsigned long long)final_result.min, - (unsigned long long)final_result.max, - (unsigned long long)final_result.total_iterations, (unsigned)num_threads); + final_result.standard_deviation, + static_cast(final_result.min), + static_cast(final_result.max), + static_cast(final_result.total_iterations), + static_cast(num_threads)); } void print_header() { @@ -147,7 +150,7 @@ void print_header() { benchmarks[0]->get_suite_name().data()); LIBC_NAMESPACE::printf("%s", RESET); cpp::string titles = "Benchmark | Cycles (Mean) | Stddev | " - " Min | Max | Iterations | Threads |\n"; + " Min | Max | Iterations | Threads |\n"; LIBC_NAMESPACE::printf(titles.data()); cpp::string separator(titles.size(), '-'); @@ -226,7 +229,8 @@ BenchmarkResult benchmark(const BenchmarkOptions &options, change_ratio < options.epsilon) break; - iterations = static_cast(iterations * options.scaling_factor); + iterations = static_cast( + fputil::ceil(iterations * options.scaling_factor)); } const auto &estimator = rep.get_estimator(); diff --git a/libc/benchmarks/gpu/LibcGpuBenchmark.h b/libc/benchmarks/gpu/LibcGpuBenchmark.h index 60f69edf8655..e36e93c7efc1 100644 --- a/libc/benchmarks/gpu/LibcGpuBenchmark.h +++ b/libc/benchmarks/gpu/LibcGpuBenchmark.h @@ -1,18 +1,16 @@ #ifndef LLVM_LIBC_BENCHMARKS_LIBC_GPU_BENCHMARK_H #define LLVM_LIBC_BENCHMARKS_LIBC_GPU_BENCHMARK_H -#include "benchmarks/gpu/BenchmarkLogger.h" #include "benchmarks/gpu/timing/timing.h" + #include "hdr/stdint_proxy.h" #include "src/__support/CPP/algorithm.h" #include "src/__support/CPP/array.h" -#include "src/__support/CPP/limits.h" #include "src/__support/CPP/string_view.h" #include "src/__support/CPP/type_traits.h" #include "src/__support/FPUtil/FPBits.h" #include "src/__support/FPUtil/sqrt.h" #include "src/__support/macros/config.h" -#include "src/time/clock.h" namespace LIBC_NAMESPACE_DECL { diff --git a/libc/benchmarks/gpu/timing/amdgpu/CMakeLists.txt b/libc/benchmarks/gpu/timing/amdgpu/CMakeLists.txt index d6a89d04dab9..f85152e69c34 100644 --- a/libc/benchmarks/gpu/timing/amdgpu/CMakeLists.txt +++ b/libc/benchmarks/gpu/timing/amdgpu/CMakeLists.txt @@ -4,10 +4,11 @@ add_header_library( timing.h DEPENDS libc.hdr.stdint_proxy - libc.src.__support.common libc.src.__support.macros.config libc.src.__support.macros.attributes libc.src.__support.CPP.algorithm libc.src.__support.CPP.array + libc.src.__support.CPP.atomic libc.src.__support.CPP.type_traits + libc.src.__support.GPU.utils ) diff --git a/libc/benchmarks/gpu/timing/amdgpu/timing.h b/libc/benchmarks/gpu/timing/amdgpu/timing.h index de721a2d6ce6..b4a174f72981 100644 --- a/libc/benchmarks/gpu/timing/amdgpu/timing.h +++ b/libc/benchmarks/gpu/timing/amdgpu/timing.h @@ -15,7 +15,6 @@ #include "src/__support/CPP/atomic.h" #include "src/__support/CPP/type_traits.h" #include "src/__support/GPU/utils.h" -#include "src/__support/common.h" #include "src/__support/macros/attributes.h" #include "src/__support/macros/config.h" diff --git a/libc/benchmarks/gpu/timing/nvptx/CMakeLists.txt b/libc/benchmarks/gpu/timing/nvptx/CMakeLists.txt index 801080e7a6e9..4615f53e3d24 100644 --- a/libc/benchmarks/gpu/timing/nvptx/CMakeLists.txt +++ b/libc/benchmarks/gpu/timing/nvptx/CMakeLists.txt @@ -4,10 +4,11 @@ add_header_library( timing.h DEPENDS libc.hdr.stdint_proxy - libc.src.__support.common libc.src.__support.macros.config libc.src.__support.macros.attributes libc.src.__support.CPP.algorithm libc.src.__support.CPP.array + libc.src.__support.CPP.atomic libc.src.__support.CPP.type_traits + libc.src.__support.GPU.utils ) diff --git a/libc/benchmarks/gpu/timing/nvptx/timing.h b/libc/benchmarks/gpu/timing/nvptx/timing.h index 133032ca0842..0c93a67129b8 100644 --- a/libc/benchmarks/gpu/timing/nvptx/timing.h +++ b/libc/benchmarks/gpu/timing/nvptx/timing.h @@ -13,9 +13,7 @@ #include "src/__support/CPP/algorithm.h" #include "src/__support/CPP/array.h" #include "src/__support/CPP/atomic.h" -#include "src/__support/CPP/type_traits.h" #include "src/__support/GPU/utils.h" -#include "src/__support/common.h" #include "src/__support/macros/attributes.h" #include "src/__support/macros/config.h" @@ -66,7 +64,7 @@ template uint64_t stop = gpu::processor_clock(); cpp::atomic_thread_fence(cpp::MemoryOrder::ACQ_REL); asm("" ::"r"(stop)); - volatile T output = result; + volatile auto output = result; // Return the time elapsed. return stop - start; From 3e5d8a1439afb784dee1a12a878fa0a533a6d4c6 Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Sat, 16 Aug 2025 09:28:06 +0900 Subject: [PATCH 047/214] =?UTF-8?q?Reapply=20"RuntimeLibcalls:=20Generate?= =?UTF-8?q?=20table=20of=20libcall=20name=20lengths=20(#153=E2=80=A6=20(#1?= =?UTF-8?q?53864)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This reverts commit 334e9bf2dd01fbbfe785624c0de477b725cde6f2. Check if llvm-nm exists before building the benchmark. --- llvm/benchmarks/CMakeLists.txt | 21 ++ llvm/benchmarks/RuntimeLibcalls.cpp | 121 ++++++++++ llvm/include/llvm/CodeGen/TargetLowering.h | 12 +- llvm/include/llvm/IR/RuntimeLibcalls.h | 64 ++++-- llvm/lib/CodeGen/PreISelIntrinsicLowering.cpp | 2 +- llvm/lib/IR/RuntimeLibcalls.cpp | 59 ++--- llvm/lib/LTO/LTO.cpp | 2 +- llvm/lib/Object/IRSymtab.cpp | 47 ++-- .../WebAssemblyRuntimeLibcallSignatures.cpp | 4 +- .../Utils/DeclareRuntimeLibcalls.cpp | 2 +- llvm/test/TableGen/RuntimeLibcallEmitter.td | 39 ++++ llvm/unittests/IR/CMakeLists.txt | 1 + llvm/unittests/IR/RuntimeLibcallsTest.cpp | 63 ++++++ .../TableGen/Basic/RuntimeLibcallsEmitter.cpp | 208 +++++++++++++++++- 14 files changed, 545 insertions(+), 100 deletions(-) create mode 100644 llvm/benchmarks/RuntimeLibcalls.cpp create mode 100644 llvm/unittests/IR/RuntimeLibcallsTest.cpp diff --git a/llvm/benchmarks/CMakeLists.txt b/llvm/benchmarks/CMakeLists.txt index 1078efa55f49..14cccd9c8ed5 100644 --- a/llvm/benchmarks/CMakeLists.txt +++ b/llvm/benchmarks/CMakeLists.txt @@ -11,3 +11,24 @@ add_benchmark(FormatVariadicBM FormatVariadicBM.cpp PARTIAL_SOURCES_INTENDED) add_benchmark(GetIntrinsicInfoTableEntriesBM GetIntrinsicInfoTableEntriesBM.cpp PARTIAL_SOURCES_INTENDED) add_benchmark(SandboxIRBench SandboxIRBench.cpp PARTIAL_SOURCES_INTENDED) +add_benchmark(RuntimeLibcallsBench RuntimeLibcalls.cpp PARTIAL_SOURCES_INTENDED) + + +if(TARGET llvm-nm) + # Extract the list of symbols in a random utility as sample data. + set(SYMBOL_TEST_DATA_FILE "sample_symbol_list.txt") + set(SYMBOL_TEST_DATA_SOURCE_BINARY $) + + add_custom_command(OUTPUT ${SYMBOL_TEST_DATA_FILE} + COMMAND $ --no-demangle --no-sort + --format=just-symbols + ${SYMBOL_TEST_DATA_SOURCE_BINARY} > ${SYMBOL_TEST_DATA_FILE} + DEPENDS "$" "$") + + add_custom_target(generate-runtime-libcalls-sample-symbol-list + DEPENDS ${SYMBOL_TEST_DATA_FILE}) + + add_dependencies(RuntimeLibcallsBench generate-runtime-libcalls-sample-symbol-list) + target_compile_definitions(RuntimeLibcallsBench PRIVATE + -DSYMBOL_TEST_DATA_FILE="${CMAKE_CURRENT_BINARY_DIR}/${SYMBOL_TEST_DATA_FILE}") +endif() diff --git a/llvm/benchmarks/RuntimeLibcalls.cpp b/llvm/benchmarks/RuntimeLibcalls.cpp new file mode 100644 index 000000000000..9ac77bb74a3d --- /dev/null +++ b/llvm/benchmarks/RuntimeLibcalls.cpp @@ -0,0 +1,121 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "llvm/IR/RuntimeLibcalls.h" +#include "benchmark/benchmark.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/Support/Error.h" +#include "llvm/Support/LineIterator.h" +#include "llvm/Support/MemoryBuffer.h" +#include "llvm/TargetParser/Triple.h" +#include +#include +using namespace llvm; + +static constexpr unsigned MaxFuncNameSize = 53; + +static std::vector getLibcallNameStringRefs() { + std::vector Names(RTLIB::NumLibcallImpls); + // Keep the strlens on the StringRef construction out of the benchmark loop. + for (RTLIB::LibcallImpl LC : RTLIB::libcall_impls()) + Names[LC] = RTLIB::RuntimeLibcallsInfo::getLibcallImplName(LC); + + return Names; +} + +static std::vector getRandomFuncNames() { + std::mt19937_64 Rng; + std::uniform_int_distribution<> StringLengthDistribution(1, MaxFuncNameSize); + std::uniform_int_distribution<> CharDistribution(1, 255); + int NumTestFuncs = 1 << 10; + std::vector TestFuncNames(NumTestFuncs); + + for (std::string &TestFuncName : TestFuncNames) { + for (int I = 0, E = StringLengthDistribution(Rng); I != E; ++I) + TestFuncName += static_cast(CharDistribution(Rng)); + } + + return TestFuncNames; +} + +#ifdef SYMBOL_TEST_DATA_FILE +static std::vector readSymbolsFromFile(StringRef InputFile) { + auto BufOrError = MemoryBuffer::getFileOrSTDIN(InputFile, /*IsText=*/true); + if (!BufOrError) { + reportFatalUsageError("failed to open \'" + Twine(InputFile) + + "\': " + BufOrError.getError().message()); + } + + // Hackily figure out if there's a prefix on the symbol names - llvm-nm + // appears to not have a flag to skip this. + llvm::Triple HostTriple(LLVM_HOST_TRIPLE); + std::string DummyDatalayout = "e"; + DummyDatalayout += DataLayout::getManglingComponent(HostTriple); + + DataLayout DL(DummyDatalayout); + char GlobalPrefix = DL.getGlobalPrefix(); + + std::vector Lines; + for (line_iterator LineIt(**BufOrError, /*SkipBlanks=*/true); + !LineIt.is_at_eof(); ++LineIt) { + StringRef SymbolName = *LineIt; + SymbolName.consume_front(StringRef(&GlobalPrefix, 1)); + + Lines.push_back(SymbolName.str()); + } + return Lines; +} +#endif + +static void BM_LookupRuntimeLibcallByNameKnownCalls(benchmark::State &State) { + std::vector Names = getLibcallNameStringRefs(); + + for (auto _ : State) { + for (StringRef Name : Names) { + benchmark::DoNotOptimize( + RTLIB::RuntimeLibcallsInfo::lookupLibcallImplName(Name).empty()); + } + } +} + +static void BM_LookupRuntimeLibcallByNameRandomCalls(benchmark::State &State) { + std::vector TestFuncNames = getRandomFuncNames(); + + for (auto _ : State) { + for (const std::string &Name : TestFuncNames) { + benchmark::DoNotOptimize( + RTLIB::RuntimeLibcallsInfo::lookupLibcallImplName(StringRef(Name)) + .empty()); + } + } +} + +#ifdef SYMBOL_TEST_DATA_FILE +// This isn't fully representative, it doesn't include any anonymous functions. +// nm -n --no-demangle --format=just-symbols sample-binary > sample.txt +static void BM_LookupRuntimeLibcallByNameSampleData(benchmark::State &State) { + std::vector TestFuncNames = + readSymbolsFromFile(SYMBOL_TEST_DATA_FILE); + for (auto _ : State) { + for (const std::string &Name : TestFuncNames) { + benchmark::DoNotOptimize( + RTLIB::RuntimeLibcallsInfo::lookupLibcallImplName(StringRef(Name)) + .empty()); + } + } +} +#endif + +BENCHMARK(BM_LookupRuntimeLibcallByNameKnownCalls); +BENCHMARK(BM_LookupRuntimeLibcallByNameRandomCalls); + +#ifdef SYMBOL_TEST_DATA_FILE +BENCHMARK(BM_LookupRuntimeLibcallByNameSampleData); +#endif + +BENCHMARK_MAIN(); diff --git a/llvm/include/llvm/CodeGen/TargetLowering.h b/llvm/include/llvm/CodeGen/TargetLowering.h index ec3104799bfb..272d7dd5f45e 100644 --- a/llvm/include/llvm/CodeGen/TargetLowering.h +++ b/llvm/include/llvm/CodeGen/TargetLowering.h @@ -3557,15 +3557,19 @@ public: /// Get the libcall routine name for the specified libcall. const char *getLibcallName(RTLIB::Libcall Call) const { - return Libcalls.getLibcallName(Call); + // FIXME: Return StringRef + return Libcalls.getLibcallName(Call).data(); } /// Get the libcall routine name for the specified libcall implementation - const char *getLibcallImplName(RTLIB::LibcallImpl Call) const { - return Libcalls.getLibcallImplName(Call); + static StringRef getLibcallImplName(RTLIB::LibcallImpl Call) { + return RTLIB::RuntimeLibcallsInfo::getLibcallImplName(Call); } - const char *getMemcpyName() const { return Libcalls.getMemcpyName(); } + const char *getMemcpyName() const { + // FIXME: Return StringRef + return Libcalls.getMemcpyName().data(); + } /// Get the comparison predicate that's to be used to test the result of the /// comparison libcall against zero. This should only be used with diff --git a/llvm/include/llvm/IR/RuntimeLibcalls.h b/llvm/include/llvm/IR/RuntimeLibcalls.h index 2d1d07c5fd81..308be543de2b 100644 --- a/llvm/include/llvm/IR/RuntimeLibcalls.h +++ b/llvm/include/llvm/IR/RuntimeLibcalls.h @@ -77,17 +77,17 @@ struct RuntimeLibcallsInfo { /// Get the libcall routine name for the specified libcall. // FIXME: This should be removed. Only LibcallImpl should have a name. - const char *getLibcallName(RTLIB::Libcall Call) const { + StringRef getLibcallName(RTLIB::Libcall Call) const { return getLibcallImplName(LibcallImpls[Call]); } /// Get the libcall routine name for the specified libcall implementation. - // FIXME: Change to return StringRef - static const char *getLibcallImplName(RTLIB::LibcallImpl CallImpl) { + static StringRef getLibcallImplName(RTLIB::LibcallImpl CallImpl) { if (CallImpl == RTLIB::Unsupported) - return nullptr; - return RuntimeLibcallImplNameTable[RuntimeLibcallNameOffsetTable[CallImpl]] - .data(); + return StringRef(); + return StringRef(RuntimeLibcallImplNameTable.getCString( + RuntimeLibcallNameOffsetTable[CallImpl]), + RuntimeLibcallNameSizeTable[CallImpl]); } /// Return the lowering's selection of implementation call for \p Call @@ -119,9 +119,10 @@ struct RuntimeLibcallsInfo { /// Return a function name compatible with RTLIB::MEMCPY, or nullptr if fully /// unsupported. - const char *getMemcpyName() const { - if (const char *Memcpy = getLibcallName(RTLIB::MEMCPY)) - return Memcpy; + StringRef getMemcpyName() const { + RTLIB::LibcallImpl Memcpy = getLibcallImpl(RTLIB::MEMCPY); + if (Memcpy != RTLIB::Unsupported) + return getLibcallImplName(Memcpy); // Fallback to memmove if memcpy isn't available. return getLibcallName(RTLIB::MEMMOVE); @@ -132,11 +133,41 @@ struct RuntimeLibcallsInfo { return ImplToLibcall[Impl]; } + /// Check if a function name is a recognized runtime call of any kind. This + /// does not consider if this call is available for any current compilation, + /// just that it is a known call somewhere. This returns the set of all + /// LibcallImpls which match the name; multiple implementations with the same + /// name may exist but differ in interpretation based on the target context. + /// + /// Generated by tablegen. + LLVM_ABI static inline iota_range + lookupLibcallImplName(StringRef Name){ + // Inlining the early exit on the string name appears to be worthwhile when + // querying a real set of symbols +#define GET_LOOKUP_LIBCALL_IMPL_NAME_BODY +#include "llvm/IR/RuntimeLibcalls.inc" +#undef GET_LOOKUP_LIBCALL_IMPL_NAME_BODY + } + /// Check if this is valid libcall for the current module, otherwise /// RTLIB::Unsupported. - LLVM_ABI RTLIB::LibcallImpl getSupportedLibcallImpl(StringRef FuncName) const; + LLVM_ABI RTLIB::LibcallImpl + getSupportedLibcallImpl(StringRef FuncName) const { + for (RTLIB::LibcallImpl Impl : lookupLibcallImplName(FuncName)) { + // FIXME: This should not depend on looking up ImplToLibcall, only the + // list of libcalls for the module. + RTLIB::LibcallImpl Recognized = LibcallImpls[ImplToLibcall[Impl]]; + if (Recognized != RTLIB::Unsupported) + return Recognized; + } + + return RTLIB::Unsupported; + } private: + LLVM_ABI static iota_range + lookupLibcallImplNameImpl(StringRef Name); + /// Stores the implementation choice for each each libcall. RTLIB::LibcallImpl LibcallImpls[RTLIB::UNKNOWN_LIBCALL + 1] = { RTLIB::Unsupported}; @@ -153,17 +184,16 @@ private: LLVM_ABI static const char RuntimeLibcallImplNameTableStorage[]; LLVM_ABI static const StringTable RuntimeLibcallImplNameTable; LLVM_ABI static const uint16_t RuntimeLibcallNameOffsetTable[]; + LLVM_ABI static const uint8_t RuntimeLibcallNameSizeTable[]; /// Map from a concrete LibcallImpl implementation to its RTLIB::Libcall kind. LLVM_ABI static const RTLIB::Libcall ImplToLibcall[RTLIB::NumLibcallImpls]; - /// Check if a function name is a recognized runtime call of any kind. This - /// does not consider if this call is available for any current compilation, - /// just that it is a known call somewhere. This returns the set of all - /// LibcallImpls which match the name; multiple implementations with the same - /// name may exist but differ in interpretation based on the target context. - LLVM_ABI static iterator_range::const_iterator> - getRecognizedLibcallImpls(StringRef FuncName); + /// Utility function for tablegenerated lookup function. Return a range of + /// enum values that apply for the function name at \p NameOffsetEntry with + /// the value \p StrOffset. + static inline iota_range + libcallImplNameHit(uint16_t NameOffsetEntry, uint16_t StrOffset); static bool darwinHasSinCosStret(const Triple &TT) { if (!TT.isOSDarwin()) diff --git a/llvm/lib/CodeGen/PreISelIntrinsicLowering.cpp b/llvm/lib/CodeGen/PreISelIntrinsicLowering.cpp index 9fa96e737296..96c9cde622b4 100644 --- a/llvm/lib/CodeGen/PreISelIntrinsicLowering.cpp +++ b/llvm/lib/CodeGen/PreISelIntrinsicLowering.cpp @@ -145,7 +145,7 @@ static bool lowerObjCCall(Function &F, RTLIB::LibcallImpl NewFn, // FIXME: When RuntimeLibcalls is an analysis, check if the function is really // supported, and go through RTLIB::Libcall. - const char *NewFnName = RTLIB::RuntimeLibcallsInfo::getLibcallImplName(NewFn); + StringRef NewFnName = RTLIB::RuntimeLibcallsInfo::getLibcallImplName(NewFn); // If we haven't already looked up this function, check to see if the // program already contains a function with this name. diff --git a/llvm/lib/IR/RuntimeLibcalls.cpp b/llvm/lib/IR/RuntimeLibcalls.cpp index ac845c499878..88cb192c0878 100644 --- a/llvm/lib/IR/RuntimeLibcalls.cpp +++ b/llvm/lib/IR/RuntimeLibcalls.cpp @@ -9,6 +9,7 @@ #include "llvm/IR/RuntimeLibcalls.h" #include "llvm/ADT/StringTable.h" #include "llvm/Support/Debug.h" +#include "llvm/Support/xxhash.h" #include "llvm/TargetParser/ARMTargetParser.h" #define DEBUG_TYPE "runtime-libcalls-info" @@ -18,9 +19,11 @@ using namespace RTLIB; #define GET_INIT_RUNTIME_LIBCALL_NAMES #define GET_SET_TARGET_RUNTIME_LIBCALL_SETS +#define DEFINE_GET_LOOKUP_LIBCALL_IMPL_NAME #include "llvm/IR/RuntimeLibcalls.inc" #undef GET_INIT_RUNTIME_LIBCALL_NAMES #undef GET_SET_TARGET_RUNTIME_LIBCALL_SETS +#undef DEFINE_GET_LOOKUP_LIBCALL_IMPL_NAME /// Set default libcall names. If a target wants to opt-out of a libcall it /// should be placed here. @@ -58,49 +61,23 @@ void RuntimeLibcallsInfo::initLibcalls(const Triple &TT, } } -RTLIB::LibcallImpl -RuntimeLibcallsInfo::getSupportedLibcallImpl(StringRef FuncName) const { - const ArrayRef RuntimeLibcallNameOffsets( - RuntimeLibcallNameOffsetTable); - - iterator_range::const_iterator> Range = - getRecognizedLibcallImpls(FuncName); - - for (auto I = Range.begin(); I != Range.end(); ++I) { - RTLIB::LibcallImpl Impl = - static_cast(I - RuntimeLibcallNameOffsets.begin()); - - // FIXME: This should not depend on looking up ImplToLibcall, only the list - // of libcalls for the module. - RTLIB::LibcallImpl Recognized = LibcallImpls[ImplToLibcall[Impl]]; - if (Recognized != RTLIB::Unsupported) - return Recognized; +LLVM_ATTRIBUTE_ALWAYS_INLINE +iota_range +RuntimeLibcallsInfo::libcallImplNameHit(uint16_t NameOffsetEntry, + uint16_t StrOffset) { + int NumAliases = 1; + for (uint16_t Entry : ArrayRef(RuntimeLibcallNameOffsetTable) + .drop_front(NameOffsetEntry + 1)) { + if (Entry != StrOffset) + break; + ++NumAliases; } - return RTLIB::Unsupported; -} - -iterator_range::const_iterator> -RuntimeLibcallsInfo::getRecognizedLibcallImpls(StringRef FuncName) { - StringTable::Iterator It = lower_bound(RuntimeLibcallImplNameTable, FuncName); - if (It == RuntimeLibcallImplNameTable.end() || *It != FuncName) - return iterator_range(ArrayRef()); - - uint16_t IndexVal = It.offset().value(); - const ArrayRef TableRef(RuntimeLibcallNameOffsetTable); - - ArrayRef::const_iterator E = TableRef.end(); - ArrayRef::const_iterator EntriesBegin = - std::lower_bound(TableRef.begin(), E, IndexVal); - ArrayRef::const_iterator EntriesEnd = EntriesBegin; - - while (EntriesEnd != E && *EntriesEnd == IndexVal) - ++EntriesEnd; - - assert(EntriesBegin != E && - "libcall found in name table but not offset table"); - - return make_range(EntriesBegin, EntriesEnd); + RTLIB::LibcallImpl ImplStart = static_cast( + &RuntimeLibcallNameOffsetTable[NameOffsetEntry] - + &RuntimeLibcallNameOffsetTable[0]); + return enum_seq(ImplStart, + static_cast(ImplStart + NumAliases)); } bool RuntimeLibcallsInfo::isAAPCS_ABI(const Triple &TT, StringRef ABIName) { diff --git a/llvm/lib/LTO/LTO.cpp b/llvm/lib/LTO/LTO.cpp index 0323b4d433b8..35d24c17bbd9 100644 --- a/llvm/lib/LTO/LTO.cpp +++ b/llvm/lib/LTO/LTO.cpp @@ -1422,7 +1422,7 @@ SmallVector LTO::getRuntimeLibcallSymbols(const Triple &TT) { for (RTLIB::LibcallImpl Impl : LibcallImpls) { if (Impl != RTLIB::Unsupported) - LibcallSymbols.push_back(Libcalls.getLibcallImplName(Impl)); + LibcallSymbols.push_back(Libcalls.getLibcallImplName(Impl).data()); } return LibcallSymbols; diff --git a/llvm/lib/Object/IRSymtab.cpp b/llvm/lib/Object/IRSymtab.cpp index 0f194953787e..0043f02107fb 100644 --- a/llvm/lib/Object/IRSymtab.cpp +++ b/llvm/lib/Object/IRSymtab.cpp @@ -46,7 +46,7 @@ static cl::opt DisableBitcodeVersionUpgrade( "disable-bitcode-version-upgrade", cl::Hidden, cl::desc("Disable automatic bitcode upgrade for version mismatch")); -static const char *PreservedSymbols[] = { +static constexpr StringLiteral PreservedSymbols[] = { // There are global variables, so put it here instead of in // RuntimeLibcalls.td. // TODO: Are there similar such variables? @@ -54,6 +54,10 @@ static const char *PreservedSymbols[] = { "__stack_chk_guard", }; +static bool isPreservedGlobalVarName(StringRef Name) { + return PreservedSymbols[0] == Name || PreservedSymbols[1] == Name; +} + namespace { const char *getExpectedProducerName() { @@ -81,12 +85,16 @@ struct Builder { // The StringTableBuilder does not create a copy of any strings added to it, // so this provides somewhere to store any strings that we create. Builder(SmallVector &Symtab, StringTableBuilder &StrtabBuilder, - BumpPtrAllocator &Alloc) - : Symtab(Symtab), StrtabBuilder(StrtabBuilder), Saver(Alloc) {} + BumpPtrAllocator &Alloc, const Triple &TT) + : Symtab(Symtab), StrtabBuilder(StrtabBuilder), Saver(Alloc), TT(TT), + Libcalls(TT) {} DenseMap ComdatMap; Mangler Mang; - Triple TT; + const Triple &TT; + + // FIXME: This shouldn't be here. + RTLIB::RuntimeLibcallsInfo Libcalls; std::vector Comdats; std::vector Mods; @@ -98,6 +106,10 @@ struct Builder { std::vector DependentLibraries; + bool isPreservedLibFuncName(StringRef Name) { + return Libcalls.getSupportedLibcallImpl(Name) != RTLIB::Unsupported; + } + void setStr(storage::Str &S, StringRef Value) { S.Offset = StrtabBuilder.add(Value); S.Size = Value.size(); @@ -213,19 +225,6 @@ Expected Builder::getComdatIndex(const Comdat *C, const Module *M) { return P.first->second; } -static StringSet<> buildPreservedSymbolsSet(const Triple &TT) { - StringSet<> PreservedSymbolSet; - PreservedSymbolSet.insert(std::begin(PreservedSymbols), - std::end(PreservedSymbols)); - // FIXME: Do we need to pass in ABI fields from TargetOptions? - RTLIB::RuntimeLibcallsInfo Libcalls(TT); - for (RTLIB::LibcallImpl Impl : Libcalls.getLibcallImpls()) { - if (Impl != RTLIB::Unsupported) - PreservedSymbolSet.insert(Libcalls.getLibcallImplName(Impl)); - } - return PreservedSymbolSet; -} - Error Builder::addSymbol(const ModuleSymbolTable &Msymtab, const SmallPtrSet &Used, ModuleSymbolTable::Symbol Msym) { @@ -279,13 +278,11 @@ Error Builder::addSymbol(const ModuleSymbolTable &Msymtab, return Error::success(); } - setStr(Sym.IRName, GV->getName()); + StringRef GVName = GV->getName(); + setStr(Sym.IRName, GVName); - static const StringSet<> PreservedSymbolsSet = - buildPreservedSymbolsSet(GV->getParent()->getTargetTriple()); - bool IsPreservedSymbol = PreservedSymbolsSet.contains(GV->getName()); - - if (Used.count(GV) || IsPreservedSymbol) + if (Used.count(GV) || isPreservedLibFuncName(GVName) || + isPreservedGlobalVarName(GVName)) Sym.Flags |= 1 << storage::Symbol::FB_used; if (GV->isThreadLocal()) Sym.Flags |= 1 << storage::Symbol::FB_tls; @@ -352,7 +349,6 @@ Error Builder::build(ArrayRef IRMods) { setStr(Hdr.Producer, kExpectedProducerName); setStr(Hdr.TargetTriple, IRMods[0]->getTargetTriple().str()); setStr(Hdr.SourceFileName, IRMods[0]->getSourceFileName()); - TT = IRMods[0]->getTargetTriple(); for (auto *M : IRMods) if (Error Err = addModule(M)) @@ -378,7 +374,8 @@ Error Builder::build(ArrayRef IRMods) { Error irsymtab::build(ArrayRef Mods, SmallVector &Symtab, StringTableBuilder &StrtabBuilder, BumpPtrAllocator &Alloc) { - return Builder(Symtab, StrtabBuilder, Alloc).build(Mods); + const Triple &TT = Mods[0]->getTargetTriple(); + return Builder(Symtab, StrtabBuilder, Alloc, TT).build(Mods); } // Upgrade a vector of bitcode modules created by an old version of LLVM by diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyRuntimeLibcallSignatures.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyRuntimeLibcallSignatures.cpp index 4548a7520b3b..45b0e7dc1226 100644 --- a/llvm/lib/Target/WebAssembly/WebAssemblyRuntimeLibcallSignatures.cpp +++ b/llvm/lib/Target/WebAssembly/WebAssemblyRuntimeLibcallSignatures.cpp @@ -533,8 +533,8 @@ struct StaticLibcallNameMap { // different libcalls. RTLIB::RuntimeLibcallsInfo RTCI(TT); for (RTLIB::Libcall LC : RTLIB::libcalls()) { - const char *NameLibcall = RTCI.getLibcallName(LC); - if (NameLibcall != nullptr && + StringRef NameLibcall = RTCI.getLibcallName(LC); + if (!NameLibcall.empty() && getRuntimeLibcallSignatures().Table[LC] != unsupported) { assert(!Map.contains(NameLibcall) && "duplicate libcall names in name map"); diff --git a/llvm/lib/Transforms/Utils/DeclareRuntimeLibcalls.cpp b/llvm/lib/Transforms/Utils/DeclareRuntimeLibcalls.cpp index 540039b7d2cb..0642d51cd2c2 100644 --- a/llvm/lib/Transforms/Utils/DeclareRuntimeLibcalls.cpp +++ b/llvm/lib/Transforms/Utils/DeclareRuntimeLibcalls.cpp @@ -30,7 +30,7 @@ PreservedAnalyses DeclareRuntimeLibcallsPass::run(Module &M, FunctionType *FuncTy = FunctionType::get(Type::getVoidTy(Ctx), {}, /*IsVarArgs=*/true); - const char *FuncName = RTLCI.getLibcallImplName(Impl); + StringRef FuncName = RTLCI.getLibcallImplName(Impl); M.getOrInsertFunction(FuncName, FuncTy); } diff --git a/llvm/test/TableGen/RuntimeLibcallEmitter.td b/llvm/test/TableGen/RuntimeLibcallEmitter.td index a2d946f3aa84..7c62402227f7 100644 --- a/llvm/test/TableGen/RuntimeLibcallEmitter.td +++ b/llvm/test/TableGen/RuntimeLibcallEmitter.td @@ -137,6 +137,19 @@ def BlahLibrary : SystemRuntimeLibrary 9) +// CHECK-NEXT: return enum_seq(RTLIB::Unsupported, RTLIB::Unsupported); +// CHECK-NEXT: return lookupLibcallImplNameImpl(Name); +// CHECK-NEXT: #endif + +// CHECK: #ifdef DEFINE_GET_LOOKUP_LIBCALL_IMPL_NAME +// CHECK-NEXT: static inline uint64_t hash(StringRef Str) { +// CHECK-NEXT: return static_cast(xxh3_64bits(Str)); +// CHECK-NEXT: } + +// CHECK: iota_range RTLIB::RuntimeLibcallsInfo::lookupLibcallImplNameImpl(StringRef Name) { +// CHECK: static constexpr uint16_t HashTableNameToEnum[16] = { +// CHECK: 2, // 0x000000705301b8, ___memset +// CHECK: 0, +// CHECK: 6, // 0x0000001417a2af, calloc +// CHECK: 0, +// CHECK: }; + +// CHECK: unsigned Idx = (hash(Name) % 8) * 2; +// CHECK: for (int I = 0; I != 2; ++I) { +// CHECK: return libcallImplNameHit(Entry, StrOffset); + +// CHECK: return enum_seq(RTLIB::Unsupported, RTLIB::Unsupported); +// CHECK-NEXT: } // CHECK: void llvm::RTLIB::RuntimeLibcallsInfo::setTargetRuntimeLibcallSets(const llvm::Triple &TT, FloatABI::ABIType FloatABI, EABI EABIVersion, StringRef ABIName) { // CHECK-NEXT: struct LibcallImplPair { diff --git a/llvm/unittests/IR/CMakeLists.txt b/llvm/unittests/IR/CMakeLists.txt index b66eae93f933..8b7bd3997ea2 100644 --- a/llvm/unittests/IR/CMakeLists.txt +++ b/llvm/unittests/IR/CMakeLists.txt @@ -43,6 +43,7 @@ add_llvm_unittest(IRTests PatternMatch.cpp ShuffleVectorInstTest.cpp StructuralHashTest.cpp + RuntimeLibcallsTest.cpp TimePassesTest.cpp TypesTest.cpp UseTest.cpp diff --git a/llvm/unittests/IR/RuntimeLibcallsTest.cpp b/llvm/unittests/IR/RuntimeLibcallsTest.cpp new file mode 100644 index 000000000000..012316801859 --- /dev/null +++ b/llvm/unittests/IR/RuntimeLibcallsTest.cpp @@ -0,0 +1,63 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "llvm/IR/RuntimeLibcalls.h" +#include "llvm/ADT/STLExtras.h" +#include "gtest/gtest.h" +using namespace llvm; + +namespace { + +TEST(RuntimeLibcallsTest, LibcallImplByName) { + EXPECT_TRUE(RTLIB::RuntimeLibcallsInfo::lookupLibcallImplName("").empty()); + EXPECT_TRUE( + RTLIB::RuntimeLibcallsInfo::lookupLibcallImplName("unknown").empty()); + EXPECT_TRUE( + RTLIB::RuntimeLibcallsInfo::lookupLibcallImplName("Unsupported").empty()); + EXPECT_TRUE( + RTLIB::RuntimeLibcallsInfo::lookupLibcallImplName("unsupported").empty()); + + for (RTLIB::LibcallImpl LC : RTLIB::libcall_impls()) { + StringRef Name = RTLIB::RuntimeLibcallsInfo::getLibcallImplName(LC); + EXPECT_TRUE(is_contained( + RTLIB::RuntimeLibcallsInfo::lookupLibcallImplName(Name), LC)); + } + + // Test first libcall name + EXPECT_EQ( + RTLIB::arm64ec__Unwind_Resume, + *RTLIB::RuntimeLibcallsInfo::lookupLibcallImplName("#_Unwind_Resume") + .begin()); + // Test longest libcall names + EXPECT_EQ(RTLIB::__hexagon_memcpy_likely_aligned_min32bytes_mult8bytes, + *RTLIB::RuntimeLibcallsInfo::lookupLibcallImplName( + "__hexagon_memcpy_likely_aligned_min32bytes_mult8bytes") + .begin()); + + { + auto SquirtleSquad = + RTLIB::RuntimeLibcallsInfo::lookupLibcallImplName("sqrtl"); + ASSERT_EQ(size(SquirtleSquad), 3); + auto I = SquirtleSquad.begin(); + EXPECT_EQ(*I++, RTLIB::sqrt_f128); + EXPECT_EQ(*I++, RTLIB::sqrt_f80); + EXPECT_EQ(*I++, RTLIB::sqrt_ppcf128); + } + + // Last libcall + { + auto Truncs = RTLIB::RuntimeLibcallsInfo::lookupLibcallImplName("truncl"); + ASSERT_EQ(size(Truncs), 3); + auto I = Truncs.begin(); + EXPECT_EQ(*I++, RTLIB::trunc_f128); + EXPECT_EQ(*I++, RTLIB::trunc_f80); + EXPECT_EQ(*I++, RTLIB::trunc_ppcf128); + } +} + +} // namespace diff --git a/llvm/utils/TableGen/Basic/RuntimeLibcallsEmitter.cpp b/llvm/utils/TableGen/Basic/RuntimeLibcallsEmitter.cpp index 0fc230c4714f..c305e6323ca9 100644 --- a/llvm/utils/TableGen/Basic/RuntimeLibcallsEmitter.cpp +++ b/llvm/utils/TableGen/Basic/RuntimeLibcallsEmitter.cpp @@ -6,10 +6,15 @@ // //===----------------------------------------------------------------------===// +#define DEBUG_TYPE "runtime-libcall-emitter" + +#include "llvm/ADT/DenseSet.h" #include "llvm/ADT/StringRef.h" #include "llvm/Support/Debug.h" +#include "llvm/Support/Format.h" #include "llvm/Support/FormatVariadic.h" #include "llvm/Support/raw_ostream.h" +#include "llvm/Support/xxhash.h" #include "llvm/TableGen/Error.h" #include "llvm/TableGen/Record.h" #include "llvm/TableGen/SetTheory.h" @@ -215,6 +220,9 @@ private: private: void emitGetRuntimeLibcallEnum(raw_ostream &OS) const; + void emitNameMatchHashTable(raw_ostream &OS, + StringToOffsetTable &OffsetTable) const; + void emitGetInitRuntimeLibcallNames(raw_ostream &OS) const; void emitSystemRuntimeLibrarySetCalls(raw_ostream &OS) const; @@ -255,12 +263,9 @@ public: RuntimeLibcallImplDefList.emplace_back(LibCallImplDef, Def2RuntimeLibcall, LibCallImplEnumVal++); - RuntimeLibcallImpl &LibCallImpl = RuntimeLibcallImplDefList.back(); - + const RuntimeLibcallImpl &LibCallImpl = RuntimeLibcallImplDefList.back(); Def2RuntimeLibcallImpl[LibCallImplDef] = &LibCallImpl; - // const RuntimeLibcallImpl &LibCallImpl = - // RuntimeLibcallImplDefList.back(); if (LibCallImpl.isDefault()) { const RuntimeLibcall *Provides = LibCallImpl.getProvides(); if (!Provides) @@ -282,6 +287,13 @@ public: void run(raw_ostream &OS); }; +/// Helper struct for the name hash table. +struct LookupEntry { + StringRef FuncName; + uint64_t Hash = 0; + unsigned TableValue = 0; +}; + } // End anonymous namespace. void RuntimeLibcallEmitter::emitGetRuntimeLibcallEnum(raw_ostream &OS) const { @@ -295,8 +307,6 @@ void RuntimeLibcallEmitter::emitGetRuntimeLibcallEnum(raw_ostream &OS) const { OS << " " << Name << " = " << LibCall.getEnumVal() << ",\n"; } - // TODO: Emit libcall names as string offset table. - OS << " UNKNOWN_LIBCALL = " << RuntimeLibcallDefList.size() << "\n};\n\n" "enum LibcallImpl : unsigned short {\n" @@ -315,8 +325,179 @@ void RuntimeLibcallEmitter::emitGetRuntimeLibcallEnum(raw_ostream &OS) const { "#endif\n\n"; } +// StringMap uses xxh3_64bits, truncated to uint32_t. +static uint64_t hash(StringRef Str) { + return static_cast(xxh3_64bits(Str)); +} + +static void emitHashFunction(raw_ostream &OS) { + OS << "static inline uint64_t hash(StringRef Str) {\n" + " return static_cast(xxh3_64bits(Str));\n" + "}\n\n"; +} + +/// Return the table size, maximum number of collisions for the set of hashes +static std::pair +computePerfectHashParameters(ArrayRef Hashes) { + const int SizeOverhead = 10; + const int NumHashes = Hashes.size(); + + // Index derived from hash -> number of collisions. + DenseMap Table; + + for (int MaxCollisions = 1;; ++MaxCollisions) { + for (int N = NumHashes; N < SizeOverhead * NumHashes; ++N) { + Table.clear(); + + bool NeedResize = false; + for (uint64_t H : Hashes) { + uint64_t Idx = H % static_cast(N); + if (++Table[Idx] > MaxCollisions) { + // Need to resize the final table if we increased the collision count. + NeedResize = true; + break; + } + } + + if (!NeedResize) + return {N, MaxCollisions}; + } + } +} + +static std::vector +constructPerfectHashTable(ArrayRef Keywords, + ArrayRef Hashes, int Size, int Collisions, + StringToOffsetTable &OffsetTable) { + DenseSet Seen; + std::vector Lookup(Size * Collisions); + + for (const RuntimeLibcallImpl &LibCallImpl : Keywords) { + StringRef ImplName = LibCallImpl.getLibcallFuncName(); + + // We do not want to add repeated entries for cases with the same name, only + // an entry for the first, with the name collision enum values immediately + // following. + if (!Seen.insert(ImplName).second) + continue; + + uint64_t HashValue = Hashes[LibCallImpl.getEnumVal() - 1]; + + uint64_t Idx = (HashValue % static_cast(Size)) * + static_cast(Collisions); + + bool Found = false; + for (int J = 0; J < Collisions; ++J) { + LookupEntry &Entry = Lookup[Idx + J]; + if (Entry.TableValue == 0) { + Entry.FuncName = ImplName; + Entry.TableValue = LibCallImpl.getEnumVal(); + Entry.Hash = HashValue; + Found = true; + break; + } + } + + if (!Found) + reportFatalInternalError("failure to hash " + ImplName); + } + + return Lookup; +} + +/// Generate hash table based lookup by name. +void RuntimeLibcallEmitter::emitNameMatchHashTable( + raw_ostream &OS, StringToOffsetTable &OffsetTable) const { + std::vector Hashes(RuntimeLibcallImplDefList.size()); + + size_t MaxFuncNameSize = 0; + size_t Index = 0; + for (const RuntimeLibcallImpl &LibCallImpl : RuntimeLibcallImplDefList) { + StringRef ImplName = LibCallImpl.getLibcallFuncName(); + MaxFuncNameSize = std::max(MaxFuncNameSize, ImplName.size()); + Hashes[Index++] = hash(ImplName); + } + + LLVM_DEBUG({ + for (const RuntimeLibcallImpl &LibCallImpl : RuntimeLibcallImplDefList) { + StringRef ImplName = LibCallImpl.getLibcallFuncName(); + if (ImplName.size() == MaxFuncNameSize) { + dbgs() << "Maximum runtime libcall name size: " << ImplName << '(' + << MaxFuncNameSize << ")\n"; + } + } + }); + + // Early exiting on the symbol name provides a significant speedup in the miss + // case on the set of symbols in a clang binary. Emit this as an inlinable + // precondition in the header. + // + // The empty check is also used to get sensible behavior on anonymous + // functions. + // + // TODO: It may make more sense to split the search by string size more. There + // are a few outliers, most call names are small. + OS << "#ifdef GET_LOOKUP_LIBCALL_IMPL_NAME_BODY\n" + " size_t Size = Name.size();\n" + " if (Size == 0 || Size > " + << MaxFuncNameSize + << ")\n" + " return enum_seq(RTLIB::Unsupported, RTLIB::Unsupported);\n" + " return lookupLibcallImplNameImpl(Name);\n" + "#endif\n"; + + auto [Size, Collisions] = computePerfectHashParameters(Hashes); + std::vector Lookup = constructPerfectHashTable( + RuntimeLibcallImplDefList, Hashes, Size, Collisions, OffsetTable); + + LLVM_DEBUG(dbgs() << "Runtime libcall perfect hashing parameters: Size = " + << Size << ", maximum collisions = " << Collisions << '\n'); + + OS << "#ifdef DEFINE_GET_LOOKUP_LIBCALL_IMPL_NAME\n"; + emitHashFunction(OS); + + OS << "iota_range RTLIB::RuntimeLibcallsInfo::" + "lookupLibcallImplNameImpl(StringRef Name) {\n"; + + // Emit RTLIB::LibcallImpl values + OS << " static constexpr uint16_t HashTableNameToEnum[" << Lookup.size() + << "] = {\n"; + + for (auto [FuncName, Hash, TableVal] : Lookup) { + OS << " " << TableVal << ','; + if (TableVal != 0) + OS << " // " << format_hex(Hash, 16) << ", " << FuncName; + + OS << '\n'; + } + + OS << " };\n\n"; + + OS << " unsigned Idx = (hash(Name) % " << Size << ") * " << Collisions + << ";\n\n" + " for (int I = 0; I != " + << Collisions << R"(; ++I) { + const uint16_t Entry = HashTableNameToEnum[Idx + I]; + const uint16_t StrOffset = RuntimeLibcallNameOffsetTable[Entry]; + const uint8_t StrSize = RuntimeLibcallNameSizeTable[Entry]; + StringRef Str( + &RTLIB::RuntimeLibcallsInfo::RuntimeLibcallImplNameTableStorage[StrOffset], + StrSize); + if (Str == Name) + return libcallImplNameHit(Entry, StrOffset); + } + + return enum_seq(RTLIB::Unsupported, RTLIB::Unsupported); +} +)"; + + OS << "#endif\n\n"; +} + void RuntimeLibcallEmitter::emitGetInitRuntimeLibcallNames( raw_ostream &OS) const { + OS << "#ifdef GET_INIT_RUNTIME_LIBCALL_NAMES\n"; + // Emit the implementation names StringToOffsetTable Table(/*AppendZero=*/true, "RTLIB::RuntimeLibcallsInfo::"); @@ -337,6 +518,15 @@ const uint16_t RTLIB::RuntimeLibcallsInfo::RuntimeLibcallNameOffsetTable[] = { } OS << "};\n"; + OS << R"( +const uint8_t RTLIB::RuntimeLibcallsInfo::RuntimeLibcallNameSizeTable[] = { +)"; + + OS << " 0,\n"; + for (const RuntimeLibcallImpl &LibCallImpl : RuntimeLibcallImplDefList) + OS << " " << LibCallImpl.getLibcallFuncName().size() << ",\n"; + OS << "};\n\n"; + // Emit the reverse mapping from implementation libraries to RTLIB::Libcall OS << "const RTLIB::Libcall llvm::RTLIB::RuntimeLibcallsInfo::" "ImplToLibcall[RTLIB::NumLibcallImpls] = {\n" @@ -351,6 +541,10 @@ const uint16_t RTLIB::RuntimeLibcallsInfo::RuntimeLibcallNameOffsetTable[] = { OS << '\n'; } OS << "};\n\n"; + + OS << "#endif\n\n"; + + emitNameMatchHashTable(OS, Table); } void RuntimeLibcallEmitter::emitSystemRuntimeLibrarySetCalls( @@ -531,9 +725,7 @@ void RuntimeLibcallEmitter::run(raw_ostream &OS) { emitSourceFileHeader("Runtime LibCalls Source Fragment", OS, Records); emitGetRuntimeLibcallEnum(OS); - OS << "#ifdef GET_INIT_RUNTIME_LIBCALL_NAMES\n"; emitGetInitRuntimeLibcallNames(OS); - OS << "#endif\n\n"; OS << "#ifdef GET_SET_TARGET_RUNTIME_LIBCALL_SETS\n"; emitSystemRuntimeLibrarySetCalls(OS); From e342dcd589a95b69fbd5066c90cfaabac29a16f8 Mon Sep 17 00:00:00 2001 From: Fangrui Song Date: Fri, 15 Aug 2025 18:23:24 -0700 Subject: [PATCH 048/214] github: Add llvm:mc label for generic MC interface (#153737) As a member of github.com/orgs/llvm/teams/pr-subscribers-llvm-mc , I was not notified about PR #149935. This commit introduces the `llvm:mc` label to cover the generic MC interface, excluding target-specific MCTargetDesc files. - Rename the `mc` label to `llvm:mc` for consistency with other LLVM subdirectory labels. - Exclude `llvm/test/MC` from the label scope, as it contains many target-specific directories. Admin: please change the name of https://github.com/orgs/llvm/teams/pr-subscribers-llvm-mc to "pr-subscribers-llvm:mc", similar to pr-subscribers-llvm:ir --- .github/new-prs-labeler.yml | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/.github/new-prs-labeler.yml b/.github/new-prs-labeler.yml index 8e0fa8d42d73..dab3db2616f5 100644 --- a/.github/new-prs-labeler.yml +++ b/.github/new-prs-labeler.yml @@ -90,9 +90,6 @@ LTO: - llvm/lib/Transforms/*/FunctionImport* - llvm/tools/gold/** -mc: - - llvm/*/MC/** - clang:driver: - clang/*/Driver/** @@ -621,6 +618,12 @@ llvm:adt: llvm:support: - llvm/**/Support/** +# Skip llvm/test/MC and llvm/unittests/MC, which includes target-specific directories. +llvm:mc: + - llvm/include/llvm/MC/** + - llvm/lib/MC/** + - llvm/tools/llvm-mc/** + llvm:transforms: - llvm/lib/Transforms/** - llvm/include/llvm/Transforms/** From 37729d8ceb91edaece07eea6ab45fe8d0012132c Mon Sep 17 00:00:00 2001 From: joaosaffran Date: Fri, 15 Aug 2025 21:33:44 -0400 Subject: [PATCH 049/214] [HLSL] Refactoring DXILABI.h to not depend on scope printer (#153840) This patch refactors DXILABI to remove the dependency on scope printer. Closes: #153827 --------- Co-authored-by: Joao Saffran <{ID}+{username}@users.noreply.github.com> --- llvm/include/llvm/Support/DXILABI.h | 3 --- llvm/lib/Frontend/HLSL/HLSLRootSignature.cpp | 5 ++-- .../Frontend/HLSL/RootSignatureMetadata.cpp | 8 +++--- llvm/lib/Support/DXILABI.cpp | 25 +++++++++---------- 4 files changed, 17 insertions(+), 24 deletions(-) diff --git a/llvm/include/llvm/Support/DXILABI.h b/llvm/include/llvm/Support/DXILABI.h index 2dcdd73415be..b25b3632f6c3 100644 --- a/llvm/include/llvm/Support/DXILABI.h +++ b/llvm/include/llvm/Support/DXILABI.h @@ -18,7 +18,6 @@ #define LLVM_SUPPORT_DXILABI_H #include "llvm/ADT/StringRef.h" -#include "llvm/Support/ScopedPrinter.h" #include namespace llvm { @@ -101,8 +100,6 @@ enum class SamplerFeedbackType : uint32_t { const unsigned MinWaveSize = 4; const unsigned MaxWaveSize = 128; -LLVM_ABI ArrayRef> getResourceClasses(); - LLVM_ABI StringRef getResourceClassName(ResourceClass RC); } // namespace dxil diff --git a/llvm/lib/Frontend/HLSL/HLSLRootSignature.cpp b/llvm/lib/Frontend/HLSL/HLSLRootSignature.cpp index 050cc46e8c9b..ac2c974fb11a 100644 --- a/llvm/lib/Frontend/HLSL/HLSLRootSignature.cpp +++ b/llvm/lib/Frontend/HLSL/HLSLRootSignature.cpp @@ -11,6 +11,7 @@ //===----------------------------------------------------------------------===// #include "llvm/Frontend/HLSL/HLSLRootSignature.h" +#include "llvm/Support/DXILABI.h" #include "llvm/Support/ScopedPrinter.h" namespace llvm { @@ -93,9 +94,7 @@ static raw_ostream &operator<<(raw_ostream &OS, } static raw_ostream &operator<<(raw_ostream &OS, const ClauseType &Type) { - OS << enumToStringRef(dxil::ResourceClass(llvm::to_underlying(Type)), - dxil::getResourceClasses()); - + OS << dxil::getResourceClassName(Type); return OS; } diff --git a/llvm/lib/Frontend/HLSL/RootSignatureMetadata.cpp b/llvm/lib/Frontend/HLSL/RootSignatureMetadata.cpp index 157bfc665b20..f822d918fae4 100644 --- a/llvm/lib/Frontend/HLSL/RootSignatureMetadata.cpp +++ b/llvm/lib/Frontend/HLSL/RootSignatureMetadata.cpp @@ -15,6 +15,7 @@ #include "llvm/Frontend/HLSL/RootSignatureValidations.h" #include "llvm/IR/IRBuilder.h" #include "llvm/IR/Metadata.h" +#include "llvm/Support/DXILABI.h" #include "llvm/Support/ScopedPrinter.h" using namespace llvm; @@ -120,8 +121,7 @@ MDNode *MetadataBuilder::BuildRootConstants(const RootConstants &Constants) { MDNode *MetadataBuilder::BuildRootDescriptor(const RootDescriptor &Descriptor) { IRBuilder<> Builder(Ctx); StringRef ResName = - enumToStringRef(dxil::ResourceClass(to_underlying(Descriptor.Type)), - dxil::getResourceClasses()); + dxil::getResourceClassName(dxil::ResourceClass(Descriptor.Type)); assert(!ResName.empty() && "Provided an invalid Resource Class"); SmallString<7> Name({"Root", ResName}); Metadata *Operands[] = { @@ -161,9 +161,7 @@ MDNode *MetadataBuilder::BuildDescriptorTable(const DescriptorTable &Table) { MDNode *MetadataBuilder::BuildDescriptorTableClause( const DescriptorTableClause &Clause) { IRBuilder<> Builder(Ctx); - StringRef ResName = - enumToStringRef(dxil::ResourceClass(to_underlying(Clause.Type)), - dxil::getResourceClasses()); + StringRef ResName = dxil::getResourceClassName(Clause.Type); assert(!ResName.empty() && "Provided an invalid Resource Class"); Metadata *Operands[] = { MDString::get(Ctx, ResName), diff --git a/llvm/lib/Support/DXILABI.cpp b/llvm/lib/Support/DXILABI.cpp index 261fe1ef9827..082e32061bd4 100644 --- a/llvm/lib/Support/DXILABI.cpp +++ b/llvm/lib/Support/DXILABI.cpp @@ -15,20 +15,19 @@ //===----------------------------------------------------------------------===// #include "llvm/Support/DXILABI.h" -#include "llvm/Support/ScopedPrinter.h" +#include "llvm/Support/ErrorHandling.h" using namespace llvm; -static const EnumEntry ResourceClassNames[] = { - {"SRV", llvm::dxil::ResourceClass::SRV}, - {"UAV", llvm::dxil::ResourceClass::UAV}, - {"CBV", llvm::dxil::ResourceClass::CBuffer}, - {"Sampler", llvm::dxil::ResourceClass::Sampler}, -}; - -ArrayRef> dxil::getResourceClasses() { - return ArrayRef(ResourceClassNames); -} - StringRef dxil::getResourceClassName(dxil::ResourceClass RC) { - return enumToStringRef(RC, getResourceClasses()); + switch (RC) { + case dxil::ResourceClass::SRV: + return "SRV"; + case dxil::ResourceClass::UAV: + return "UAV"; + case dxil::ResourceClass::CBuffer: + return "CBV"; + case dxil::ResourceClass::Sampler: + return "Sampler"; + } + llvm_unreachable("Invalid ResourceClass enum value"); } From 76d993bd25ff462d915f69772454e7b1ca42fdb8 Mon Sep 17 00:00:00 2001 From: Sergei Barannikov Date: Sat, 16 Aug 2025 05:13:43 +0300 Subject: [PATCH 050/214] [Hexagon] Add missing operand when disassembling Y4_crswap10 (#153849) Auto-generated decoder fails to add the $sgp10 operand because it has no encoding bits. Work around this by adding the missing operand after decoding is complete. Fixes #153829. --- llvm/lib/Target/Hexagon/Disassembler/HexagonDisassembler.cpp | 3 +++ llvm/test/MC/Hexagon/system-inst.s | 3 +++ 2 files changed, 6 insertions(+) diff --git a/llvm/lib/Target/Hexagon/Disassembler/HexagonDisassembler.cpp b/llvm/lib/Target/Hexagon/Disassembler/HexagonDisassembler.cpp index 22cff7c80fa0..bcddb540d35d 100644 --- a/llvm/lib/Target/Hexagon/Disassembler/HexagonDisassembler.cpp +++ b/llvm/lib/Target/Hexagon/Disassembler/HexagonDisassembler.cpp @@ -526,6 +526,9 @@ DecodeStatus HexagonDisassembler::getSingleInstruction(MCInst &MI, MCInst &MCB, MI.insert(MI.begin() + 1, MCOperand::createExpr(MCConstantExpr::create(-1, getContext()))); break; + case Hexagon::Y4_crswap10: + MI.addOperand(MCOperand::createReg(Hexagon::SGP1_0)); + break; default: break; } diff --git a/llvm/test/MC/Hexagon/system-inst.s b/llvm/test/MC/Hexagon/system-inst.s index 7bc153359853..07f7ca0acb2d 100644 --- a/llvm/test/MC/Hexagon/system-inst.s +++ b/llvm/test/MC/Hexagon/system-inst.s @@ -89,6 +89,9 @@ crswap(r12,sgp0) #CHECK: 652dc000 { crswap(r13,sgp1) } crswap(r13,sgp1) +#CHECK: 6d8ec000 { crswap(r15:14,s1:0) } +crswap(r15:14,sgp1:0) + #CHECK: 660fc00e { r14 = getimask(r15) } r14=getimask(r15) From 0561ede6c6048289dba207ceae392e70f1d3f1d9 Mon Sep 17 00:00:00 2001 From: Hristo Hristov Date: Sat, 16 Aug 2025 06:17:30 +0300 Subject: [PATCH 051/214] [libc++][jthread] LWG3788: `jthread::operator=(jthread&&)` postconditions are unimplementable under self-assignment (#153758) Already implemented in LLVM18: [695138c](https://github.com/llvm/llvm-project/commit/695138ca8405779c2b7756cc31d887aa54f56bb8) For details see: https://github.com/llvm/llvm-project/issues/105045#issuecomment-3190674947 Closes #105045 --- libcxx/docs/Status/Cxx23Issues.csv | 2 +- .../test/std/thread/thread.jthread/assign.move.pass.cpp | 9 +++++++++ 2 files changed, 10 insertions(+), 1 deletion(-) diff --git a/libcxx/docs/Status/Cxx23Issues.csv b/libcxx/docs/Status/Cxx23Issues.csv index 189f8452e067..0103d6319f16 100644 --- a/libcxx/docs/Status/Cxx23Issues.csv +++ b/libcxx/docs/Status/Cxx23Issues.csv @@ -230,7 +230,7 @@ "`LWG3782 `__","Should ```` declare ``::lerp``?","2022-11 (Kona)","|Complete|","17","" "`LWG3784 `__","std.compat should not provide ``::byte`` and its friends","2022-11 (Kona)","|Complete|","19","" "`LWG3785 `__","``ranges::to`` is over-constrained on the destination type being a range","2022-11 (Kona)","","","" -"`LWG3788 `__","``jthread::operator=(jthread&&)`` postconditions are unimplementable under self-assignment","2022-11 (Kona)","","","" +"`LWG3788 `__","``jthread::operator=(jthread&&)`` postconditions are unimplementable under self-assignment","2022-11 (Kona)","|Complete|","18","" "`LWG3792 `__","``__cpp_lib_constexpr_algorithms`` should also be defined in ````","2022-11 (Kona)","|Complete|","16","" "`LWG3795 `__","Self-move-assignment of ``std::future`` and ``std::shared_future`` have unimplementable postconditions","2022-11 (Kona)","","","" "`LWG3796 `__","``movable-box`` as member should use ``default-initialization`` instead of ``copy-initialization``","2022-11 (Kona)","","","" diff --git a/libcxx/test/std/thread/thread.jthread/assign.move.pass.cpp b/libcxx/test/std/thread/thread.jthread/assign.move.pass.cpp index b714cc58cbd3..fd5a1705c56a 100644 --- a/libcxx/test/std/thread/thread.jthread/assign.move.pass.cpp +++ b/libcxx/test/std/thread/thread.jthread/assign.move.pass.cpp @@ -112,5 +112,14 @@ int main(int, char**) { assert(j1.get_id() == j2Id); } + // LWG3788: self-assignement + { + std::jthread j = support::make_test_jthread([] {}); + auto oldId = j.get_id(); + j = std::move(j); + + assert(j.get_id() == oldId); + } + return 0; } From a44bd1568c24e998472239e5ead6784bffdd3956 Mon Sep 17 00:00:00 2001 From: Kelvin Li Date: Fri, 15 Aug 2025 23:18:18 -0400 Subject: [PATCH 052/214] [clang-repl][AIX][zOS] Disable clang/test/Interpreter tests on AIX and zOS (NFC) (#153587) --- clang/test/Interpreter/assignment-with-implicit-ctor.cpp | 1 - clang/test/Interpreter/bad_percent_command.cpp | 1 - clang/test/Interpreter/code-undo.cpp | 1 - clang/test/Interpreter/const.cpp | 1 - clang/test/Interpreter/cxx20-modules.cppm | 1 - clang/test/Interpreter/dynamic-library-bad-args.cpp | 1 - clang/test/Interpreter/execute-stmts.cpp | 1 - clang/test/Interpreter/execute-weak.cpp | 2 +- clang/test/Interpreter/execute.c | 1 - clang/test/Interpreter/execute.cpp | 2 -- clang/test/Interpreter/fail.cpp | 1 - clang/test/Interpreter/global-dtor.cpp | 3 +-- clang/test/Interpreter/help.cpp | 1 - clang/test/Interpreter/incremental-mode.cpp | 2 -- clang/test/Interpreter/inline-asm.cpp | 1 - clang/test/Interpreter/inline-virtual.cpp | 1 - clang/test/Interpreter/lambda.cpp | 3 +-- clang/test/Interpreter/lit.local.cfg | 6 +++++- clang/test/Interpreter/multiline.cpp | 1 - clang/test/Interpreter/pretty-print.c | 1 - clang/test/Interpreter/pretty-print.cpp | 2 +- clang/test/Interpreter/simple-exception.cpp | 1 - 22 files changed, 9 insertions(+), 26 deletions(-) diff --git a/clang/test/Interpreter/assignment-with-implicit-ctor.cpp b/clang/test/Interpreter/assignment-with-implicit-ctor.cpp index 24cea8ec1a4b..cef568c78663 100644 --- a/clang/test/Interpreter/assignment-with-implicit-ctor.cpp +++ b/clang/test/Interpreter/assignment-with-implicit-ctor.cpp @@ -1,5 +1,4 @@ // REQUIRES: host-supports-jit -// UNSUPPORTED: system-aix // // RUN: cat %s | clang-repl | FileCheck %s // RUN: cat %s | clang-repl -Xcc -O2 | FileCheck %s diff --git a/clang/test/Interpreter/bad_percent_command.cpp b/clang/test/Interpreter/bad_percent_command.cpp index 95bebeab58d3..207570cac187 100644 --- a/clang/test/Interpreter/bad_percent_command.cpp +++ b/clang/test/Interpreter/bad_percent_command.cpp @@ -1,4 +1,3 @@ -// UNSUPPORTED: system-aix // RUN: cat %s | clang-repl 2>&1 | FileCheck %s %foobar // CHECK: Invalid % command "%foobar", use "%help" to list commands diff --git a/clang/test/Interpreter/code-undo.cpp b/clang/test/Interpreter/code-undo.cpp index 83ade0ec9158..4516910ca3b4 100644 --- a/clang/test/Interpreter/code-undo.cpp +++ b/clang/test/Interpreter/code-undo.cpp @@ -1,4 +1,3 @@ -// UNSUPPORTED: system-aix // RUN: cat %s | clang-repl | FileCheck %s extern "C" int printf(const char *, ...); int x1 = 0; diff --git a/clang/test/Interpreter/const.cpp b/clang/test/Interpreter/const.cpp index 52be75e09ade..cadd446b7504 100644 --- a/clang/test/Interpreter/const.cpp +++ b/clang/test/Interpreter/const.cpp @@ -1,4 +1,3 @@ -// UNSUPPORTED: system-aix, system-zos // see https://github.com/llvm/llvm-project/issues/68092 // XFAIL: host={{.*}}-windows-msvc diff --git a/clang/test/Interpreter/cxx20-modules.cppm b/clang/test/Interpreter/cxx20-modules.cppm index 4e56e2fc1528..97744e3b25f7 100644 --- a/clang/test/Interpreter/cxx20-modules.cppm +++ b/clang/test/Interpreter/cxx20-modules.cppm @@ -1,5 +1,4 @@ // REQUIRES: host-supports-jit, x86_64-linux -// UNSUPPORTED: system-aix // // RUN: rm -rf %t // RUN: mkdir -p %t diff --git a/clang/test/Interpreter/dynamic-library-bad-args.cpp b/clang/test/Interpreter/dynamic-library-bad-args.cpp index 7684a8b746c9..f48a2657beac 100644 --- a/clang/test/Interpreter/dynamic-library-bad-args.cpp +++ b/clang/test/Interpreter/dynamic-library-bad-args.cpp @@ -1,4 +1,3 @@ -// UNSUPPORTED: system-aix // RUN: cat %s | clang-repl 2>&1 | FileCheck %s %lib // CHECK: %lib expects 1 argument: the path to a dynamic library diff --git a/clang/test/Interpreter/execute-stmts.cpp b/clang/test/Interpreter/execute-stmts.cpp index 433c6811777d..cc27fa615f71 100644 --- a/clang/test/Interpreter/execute-stmts.cpp +++ b/clang/test/Interpreter/execute-stmts.cpp @@ -1,5 +1,4 @@ // REQUIRES: host-supports-jit -// UNSUPPORTED: system-aix // RUN: cat %s | clang-repl -Xcc -Xclang -Xcc -verify | FileCheck %s // RUN: %clang_cc1 -verify -fincremental-extensions -emit-llvm -o - %s \ // RUN: | FileCheck --check-prefix=CODEGEN-CHECK %s diff --git a/clang/test/Interpreter/execute-weak.cpp b/clang/test/Interpreter/execute-weak.cpp index 85fa5d276f5f..f469451f5e5e 100644 --- a/clang/test/Interpreter/execute-weak.cpp +++ b/clang/test/Interpreter/execute-weak.cpp @@ -1,4 +1,4 @@ -// UNSUPPORTED: system-aix, system-windows +// UNSUPPORTED: system-windows // RUN: cat %s | clang-repl | FileCheck %s extern "C" int printf(const char *, ...); diff --git a/clang/test/Interpreter/execute.c b/clang/test/Interpreter/execute.c index 44a3a32c9301..ca8f83cf6e37 100644 --- a/clang/test/Interpreter/execute.c +++ b/clang/test/Interpreter/execute.c @@ -1,5 +1,4 @@ // REQUIRES: host-supports-jit -// UNSUPPORTED: system-aix // RUN: cat %s | clang-repl -Xcc -xc -Xcc -Xclang -Xcc -verify | FileCheck %s // RUN: cat %s | clang-repl -Xcc -xc -Xcc -O2 -Xcc -Xclang -Xcc -verify| FileCheck %s diff --git a/clang/test/Interpreter/execute.cpp b/clang/test/Interpreter/execute.cpp index 534a54ed94fb..82cd70a93980 100644 --- a/clang/test/Interpreter/execute.cpp +++ b/clang/test/Interpreter/execute.cpp @@ -1,5 +1,3 @@ -// UNSUPPORTED: system-aix - // clang-format off // RUN: clang-repl "int i = 10;" 'extern "C" int printf(const char*,...);' \ // RUN: 'auto r1 = printf("i = %d\n", i);' | FileCheck --check-prefix=CHECK-DRIVER %s diff --git a/clang/test/Interpreter/fail.cpp b/clang/test/Interpreter/fail.cpp index 4963df8c54a4..d92debc25354 100644 --- a/clang/test/Interpreter/fail.cpp +++ b/clang/test/Interpreter/fail.cpp @@ -1,5 +1,4 @@ // REQUIRES: host-supports-jit -// UNSUPPORTED: system-aix // clang-repl can be called from the prompt in non-interactive mode as a // calculator in shell scripts, for example. In that case if there is an error // we should set the exit code as failure. diff --git a/clang/test/Interpreter/global-dtor.cpp b/clang/test/Interpreter/global-dtor.cpp index 1f241d9f1931..9cb454b06c17 100644 --- a/clang/test/Interpreter/global-dtor.cpp +++ b/clang/test/Interpreter/global-dtor.cpp @@ -1,5 +1,4 @@ // clang-format off -// UNSUPPORTED: system-aix // // Tests that a global destructor is ran on platforms with gnu exception support. // @@ -10,4 +9,4 @@ extern "C" int printf(const char *, ...); struct D { float f = 1.0; D *m = nullptr; D(){} ~D() { printf("D[f=%f, m=0x%llx]\n", f, reinterpret_cast(m)); }} d; // CHECK: D[f=1.000000, m=0x0] -%quit \ No newline at end of file +%quit diff --git a/clang/test/Interpreter/help.cpp b/clang/test/Interpreter/help.cpp index 5573fb4284c6..70f114f4644e 100644 --- a/clang/test/Interpreter/help.cpp +++ b/clang/test/Interpreter/help.cpp @@ -1,4 +1,3 @@ -// UNSUPPORTED: system-aix // RUN: cat %s | clang-repl | FileCheck %s %help // CHECK: %help list clang-repl %commands diff --git a/clang/test/Interpreter/incremental-mode.cpp b/clang/test/Interpreter/incremental-mode.cpp index 71ff794872b2..d63cee0dd6d1 100644 --- a/clang/test/Interpreter/incremental-mode.cpp +++ b/clang/test/Interpreter/incremental-mode.cpp @@ -1,5 +1,3 @@ -// UNSUPPORTED: system-aix -// // RUN: clang-repl -Xcc -E // RUN: clang-repl -Xcc -emit-llvm // RUN: clang-repl -Xcc -xc diff --git a/clang/test/Interpreter/inline-asm.cpp b/clang/test/Interpreter/inline-asm.cpp index f94f14df72f8..6d071b1ef207 100644 --- a/clang/test/Interpreter/inline-asm.cpp +++ b/clang/test/Interpreter/inline-asm.cpp @@ -1,5 +1,4 @@ // REQUIRES: host-supports-jit, x86_64-linux -// UNSUPPORTED: system-aix // // RUN: rm -rf %t // RUN: mkdir -p %t diff --git a/clang/test/Interpreter/inline-virtual.cpp b/clang/test/Interpreter/inline-virtual.cpp index c9e85683d3cd..3790c110af09 100644 --- a/clang/test/Interpreter/inline-virtual.cpp +++ b/clang/test/Interpreter/inline-virtual.cpp @@ -1,5 +1,4 @@ // REQUIRES: host-supports-jit -// UNSUPPORTED: system-aix // // This test is flaky with ASan: https://github.com/llvm/llvm-project/issues/135401 // UNSUPPORTED: asan diff --git a/clang/test/Interpreter/lambda.cpp b/clang/test/Interpreter/lambda.cpp index db8c9db9b344..e6941087e947 100644 --- a/clang/test/Interpreter/lambda.cpp +++ b/clang/test/Interpreter/lambda.cpp @@ -1,5 +1,4 @@ // REQUIRES: host-supports-jit -// UNSUPPORTED: system-aix // RUN: cat %s | clang-repl | FileCheck %s // At -O2, somehow "x = 42" appears first when piped into FileCheck, // see https://github.com/llvm/llvm-project/issues/143547. @@ -27,4 +26,4 @@ auto capture = [&]() { return x * 2; }; printf("x = %d\n", x); // CHECK: x = 42 -%quit \ No newline at end of file +%quit diff --git a/clang/test/Interpreter/lit.local.cfg b/clang/test/Interpreter/lit.local.cfg index ac6d2205e9fc..37af5129d4ee 100644 --- a/clang/test/Interpreter/lit.local.cfg +++ b/clang/test/Interpreter/lit.local.cfg @@ -1,2 +1,6 @@ -if "host-supports-jit" not in config.available_features: +# clang-repl is not supported on AIX and zOS +unsupported_platforms = [ "system-aix", "system-zos" ] + +if "host-supports-jit" not in config.available_features or \ + any(up in config.available_features for up in unsupported_platforms): config.unsupported = True diff --git a/clang/test/Interpreter/multiline.cpp b/clang/test/Interpreter/multiline.cpp index 0f5ef48417f1..a9f1455fc94a 100644 --- a/clang/test/Interpreter/multiline.cpp +++ b/clang/test/Interpreter/multiline.cpp @@ -1,5 +1,4 @@ // REQUIRES: host-supports-jit -// UNSUPPORTED: system-aix // RUN: cat %s | clang-repl -Xcc -Xclang -Xcc -verify | FileCheck %s // expected-no-diagnostics diff --git a/clang/test/Interpreter/pretty-print.c b/clang/test/Interpreter/pretty-print.c index e1408c035a16..588df70e33e8 100644 --- a/clang/test/Interpreter/pretty-print.c +++ b/clang/test/Interpreter/pretty-print.c @@ -1,5 +1,4 @@ // REQUIRES: host-supports-jit -// UNSUPPORTED: system-aix // RUN: cat %s | clang-repl -Xcc -xc | FileCheck %s // RUN: cat %s | clang-repl -Xcc -std=c++11 | FileCheck %s diff --git a/clang/test/Interpreter/pretty-print.cpp b/clang/test/Interpreter/pretty-print.cpp index e1036ab87df9..bad71cdd48f0 100644 --- a/clang/test/Interpreter/pretty-print.cpp +++ b/clang/test/Interpreter/pretty-print.cpp @@ -1,7 +1,7 @@ // RUN: clang-repl "int i = 10;" 'extern "C" int printf(const char*,...);' \ // RUN: 'auto r1 = printf("i = %d\n", i);' | FileCheck --check-prefix=CHECK-DRIVER %s // The test is flaky with asan https://github.com/llvm/llvm-project/pull/148701. -// UNSUPPORTED: system-aix, asan +// UNSUPPORTED: asan // CHECK-DRIVER: i = 10 // RUN: cat %s | clang-repl -Xcc -std=c++11 -Xcc -fno-delayed-template-parsing | FileCheck %s extern "C" int printf(const char*,...); diff --git a/clang/test/Interpreter/simple-exception.cpp b/clang/test/Interpreter/simple-exception.cpp index 8f7b515c142b..2d43f807d7b9 100644 --- a/clang/test/Interpreter/simple-exception.cpp +++ b/clang/test/Interpreter/simple-exception.cpp @@ -1,5 +1,4 @@ // clang-format off -// UNSUPPORTED: system-aix // XFAIL for arm, or running on Windows. // XFAIL: target=arm-{{.*}}, target=armv{{.*}}, system-windows, system-cygwin // RUN: cat %s | clang-repl | FileCheck %s From 638bd11c13b976a9a76448da596a65dbbb036a14 Mon Sep 17 00:00:00 2001 From: Thurston Dang Date: Fri, 15 Aug 2025 20:28:30 -0700 Subject: [PATCH 053/214] [msan] Handle SSE/AVX pshuf intrinsic by applying to shadow (#153895) llvm.x86.sse.pshuf.w(<1 x i64>, i8) and llvm.x86.avx512.pshuf.b.512(<64 x i8>, <64 x i8>) are currently handled strictly, which is suboptimal. llvm.x86.ssse3.pshuf.b(<1 x i64>, <1 x i64>) llvm.x86.ssse3.pshuf.b.128(<16 x i8>, <16 x i8>) and llvm.x86.avx2.pshuf.b(<32 x i8>, <32 x i8>) are currently heuristically handled using maybeHandleSimpleNomemIntrinsic, which is incorrect. Since the second argument is the shuffle order, we instrument all these intrinsics using `handleIntrinsicByApplyingToShadow(..., /*trailingVerbatimArgs=*/1)` (https://github.com/llvm/llvm-project/pull/114490). --- .../Instrumentation/MemorySanitizer.cpp | 22 ++++++++++ .../X86/avx2-intrinsics-x86.ll | 5 ++- .../X86/avx512bw-intrinsics-upgrade.ll | 11 ++--- .../X86/avx512bw-intrinsics.ll | 16 ++++---- .../MemorySanitizer/X86/mmx-intrinsics.ll | 41 ++++++++----------- .../i386/avx2-intrinsics-i386.ll | 5 ++- .../MemorySanitizer/i386/mmx-intrinsics.ll | 41 ++++++++----------- 7 files changed, 79 insertions(+), 62 deletions(-) diff --git a/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp b/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp index 3ecace5cfe6e..6b394f533868 100644 --- a/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp +++ b/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp @@ -3237,6 +3237,8 @@ struct MemorySanitizerVisitor : public InstVisitor { /// /// TODO: "horizontal"/"pairwise" intrinsics are often incorrectly matched by /// by this handler. See horizontalReduce(). + /// + /// TODO: permutation intrinsics are also often incorrectly matched. [[maybe_unused]] bool maybeHandleSimpleNomemIntrinsic(IntrinsicInst &I, unsigned int trailingFlags) { @@ -5719,6 +5721,26 @@ struct MemorySanitizerVisitor : public InstVisitor { handleAVXVpermi2var(I); break; + // Packed Shuffle + // llvm.x86.sse.pshuf.w(<1 x i64>, i8) + // llvm.x86.ssse3.pshuf.b(<1 x i64>, <1 x i64>) + // llvm.x86.ssse3.pshuf.b.128(<16 x i8>, <16 x i8>) + // llvm.x86.avx2.pshuf.b(<32 x i8>, <32 x i8>) + // llvm.x86.avx512.pshuf.b.512(<64 x i8>, <64 x i8>) + // + // The following intrinsics are auto-upgraded: + // llvm.x86.sse2.pshuf.d(<4 x i32>, i8) + // llvm.x86.sse2.gpshufh.w(<8 x i16>, i8) + // llvm.x86.sse2.pshufl.w(<8 x i16>, i8) + case Intrinsic::x86_avx2_pshuf_b: + case Intrinsic::x86_sse_pshuf_w: + case Intrinsic::x86_ssse3_pshuf_b_128: + case Intrinsic::x86_ssse3_pshuf_b: + case Intrinsic::x86_avx512_pshuf_b_512: + handleIntrinsicByApplyingToShadow(I, I.getIntrinsicID(), + /*trailingVerbatimArgs=*/1); + break; + case Intrinsic::x86_avx512_mask_cvtps2dq_512: { handleAVX512VectorConvertFPToInt(I); break; diff --git a/llvm/test/Instrumentation/MemorySanitizer/X86/avx2-intrinsics-x86.ll b/llvm/test/Instrumentation/MemorySanitizer/X86/avx2-intrinsics-x86.ll index cc07958bd9f2..29269ff33377 100644 --- a/llvm/test/Instrumentation/MemorySanitizer/X86/avx2-intrinsics-x86.ll +++ b/llvm/test/Instrumentation/MemorySanitizer/X86/avx2-intrinsics-x86.ll @@ -767,8 +767,9 @@ define <32 x i8> @test_x86_avx2_pshuf_b(<32 x i8> %a0, <32 x i8> %a1) #0 { ; CHECK-NEXT: [[TMP1:%.*]] = load <32 x i8>, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: [[TMP2:%.*]] = load <32 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8 ; CHECK-NEXT: call void @llvm.donothing() -; CHECK-NEXT: [[_MSPROP:%.*]] = or <32 x i8> [[TMP1]], [[TMP2]] -; CHECK-NEXT: [[RES:%.*]] = call <32 x i8> @llvm.x86.avx2.pshuf.b(<32 x i8> [[A0:%.*]], <32 x i8> [[A1:%.*]]) +; CHECK-NEXT: [[TMP3:%.*]] = call <32 x i8> @llvm.x86.avx2.pshuf.b(<32 x i8> [[TMP1]], <32 x i8> [[A1:%.*]]) +; CHECK-NEXT: [[_MSPROP:%.*]] = or <32 x i8> [[TMP2]], [[TMP3]] +; CHECK-NEXT: [[RES:%.*]] = call <32 x i8> @llvm.x86.avx2.pshuf.b(<32 x i8> [[A0:%.*]], <32 x i8> [[A1]]) ; CHECK-NEXT: store <32 x i8> [[_MSPROP]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <32 x i8> [[RES]] ; diff --git a/llvm/test/Instrumentation/MemorySanitizer/X86/avx512bw-intrinsics-upgrade.ll b/llvm/test/Instrumentation/MemorySanitizer/X86/avx512bw-intrinsics-upgrade.ll index 02df9c49a010..abbbb040edf1 100644 --- a/llvm/test/Instrumentation/MemorySanitizer/X86/avx512bw-intrinsics-upgrade.ll +++ b/llvm/test/Instrumentation/MemorySanitizer/X86/avx512bw-intrinsics-upgrade.ll @@ -22,7 +22,6 @@ ; - llvm.x86.avx512.pavg.b.512, llvm.x86.avx512.pavg.w.512 ; - llvm.x86.avx512.permvar.hi.512 ; - llvm.x86.avx512.pmul.hr.sw.512, llvm.x86.avx512.pmulhu.w.512, llvm.x86.avx512.pmulh.w.512 -; - llvm.x86.avx512.pshuf.b.512 ; - llvm.x86.avx512.psllv.w.512, llvm.x86.avx512.psrav.w.512, llvm.x86.avx512.psrlv.w.512 target datalayout = "e-m:o-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" @@ -1968,8 +1967,9 @@ define <64 x i8> @test_int_x86_avx512_pshuf_b_512(<64 x i8> %x0, <64 x i8> %x1, ; CHECK-NEXT: [[TMP1:%.*]] = load <64 x i8>, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: [[TMP2:%.*]] = load <64 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 ; CHECK-NEXT: call void @llvm.donothing() -; CHECK-NEXT: [[_MSPROP:%.*]] = or <64 x i8> [[TMP1]], [[TMP2]] -; CHECK-NEXT: [[TMP3:%.*]] = call <64 x i8> @llvm.x86.avx512.pshuf.b.512(<64 x i8> [[X0:%.*]], <64 x i8> [[X1:%.*]]) +; CHECK-NEXT: [[TMP4:%.*]] = call <64 x i8> @llvm.x86.avx512.pshuf.b.512(<64 x i8> [[TMP1]], <64 x i8> [[X1:%.*]]) +; CHECK-NEXT: [[_MSPROP:%.*]] = or <64 x i8> [[TMP2]], [[TMP4]] +; CHECK-NEXT: [[TMP3:%.*]] = call <64 x i8> @llvm.x86.avx512.pshuf.b.512(<64 x i8> [[X0:%.*]], <64 x i8> [[X1]]) ; CHECK-NEXT: store <64 x i8> [[_MSPROP]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <64 x i8> [[TMP3]] ; @@ -1984,8 +1984,9 @@ define <64 x i8> @test_int_x86_avx512_mask_pshuf_b_512(<64 x i8> %x0, <64 x i8> ; CHECK-NEXT: [[TMP3:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 192) to ptr), align 8 ; CHECK-NEXT: [[TMP4:%.*]] = load <64 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8 ; CHECK-NEXT: call void @llvm.donothing() -; CHECK-NEXT: [[_MSPROP:%.*]] = or <64 x i8> [[TMP1]], [[TMP2]] -; CHECK-NEXT: [[TMP5:%.*]] = call <64 x i8> @llvm.x86.avx512.pshuf.b.512(<64 x i8> [[X0:%.*]], <64 x i8> [[X1:%.*]]) +; CHECK-NEXT: [[TMP13:%.*]] = call <64 x i8> @llvm.x86.avx512.pshuf.b.512(<64 x i8> [[TMP1]], <64 x i8> [[X1:%.*]]) +; CHECK-NEXT: [[_MSPROP:%.*]] = or <64 x i8> [[TMP2]], [[TMP13]] +; CHECK-NEXT: [[TMP5:%.*]] = call <64 x i8> @llvm.x86.avx512.pshuf.b.512(<64 x i8> [[X0:%.*]], <64 x i8> [[X1]]) ; CHECK-NEXT: [[TMP6:%.*]] = bitcast i64 [[TMP3]] to <64 x i1> ; CHECK-NEXT: [[TMP7:%.*]] = bitcast i64 [[X3:%.*]] to <64 x i1> ; CHECK-NEXT: [[TMP8:%.*]] = select <64 x i1> [[TMP7]], <64 x i8> [[_MSPROP]], <64 x i8> [[TMP4]] diff --git a/llvm/test/Instrumentation/MemorySanitizer/X86/avx512bw-intrinsics.ll b/llvm/test/Instrumentation/MemorySanitizer/X86/avx512bw-intrinsics.ll index 78c272c7b2c5..00337da67af1 100644 --- a/llvm/test/Instrumentation/MemorySanitizer/X86/avx512bw-intrinsics.ll +++ b/llvm/test/Instrumentation/MemorySanitizer/X86/avx512bw-intrinsics.ll @@ -17,7 +17,6 @@ ; - llvm.x86.avx512.pavg.b.512, llvm.x86.avx512.pavg.w.512 ; - llvm.x86.avx512.permvar.hi.512 ; - llvm.x86.avx512.pmul.hr.sw.512, llvm.x86.avx512.pmulhu.w.512, llvm.x86.avx512.pmulh.w.512 -; - llvm.x86.avx512.pshuf.b.512 ; - llvm.x86.avx512.psllv.w.512 ; - llvm.x86.avx512.psrav.w.512, llvm.x86.avx512.psrlv.w.512 @@ -1714,8 +1713,9 @@ define <64 x i8>@test_int_x86_avx512_pshuf_b_512(<64 x i8> %x0, <64 x i8> %x1) # ; CHECK-NEXT: [[TMP1:%.*]] = load <64 x i8>, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: [[TMP2:%.*]] = load <64 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 ; CHECK-NEXT: call void @llvm.donothing() -; CHECK-NEXT: [[_MSPROP:%.*]] = or <64 x i8> [[TMP1]], [[TMP2]] -; CHECK-NEXT: [[RES:%.*]] = call <64 x i8> @llvm.x86.avx512.pshuf.b.512(<64 x i8> [[X0:%.*]], <64 x i8> [[X1:%.*]]) +; CHECK-NEXT: [[TMP3:%.*]] = call <64 x i8> @llvm.x86.avx512.pshuf.b.512(<64 x i8> [[TMP1]], <64 x i8> [[X1:%.*]]) +; CHECK-NEXT: [[_MSPROP:%.*]] = or <64 x i8> [[TMP2]], [[TMP3]] +; CHECK-NEXT: [[RES:%.*]] = call <64 x i8> @llvm.x86.avx512.pshuf.b.512(<64 x i8> [[X0:%.*]], <64 x i8> [[X1]]) ; CHECK-NEXT: store <64 x i8> [[_MSPROP]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <64 x i8> [[RES]] ; @@ -1730,8 +1730,9 @@ define <64 x i8>@test_int_x86_avx512_pshuf_b_512_mask(<64 x i8> %x0, <64 x i8> % ; CHECK-NEXT: [[TMP3:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 192) to ptr), align 8 ; CHECK-NEXT: [[TMP4:%.*]] = load <64 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8 ; CHECK-NEXT: call void @llvm.donothing() -; CHECK-NEXT: [[_MSPROP:%.*]] = or <64 x i8> [[TMP1]], [[TMP2]] -; CHECK-NEXT: [[RES:%.*]] = call <64 x i8> @llvm.x86.avx512.pshuf.b.512(<64 x i8> [[X0:%.*]], <64 x i8> [[X1:%.*]]) +; CHECK-NEXT: [[TMP10:%.*]] = call <64 x i8> @llvm.x86.avx512.pshuf.b.512(<64 x i8> [[TMP1]], <64 x i8> [[X1:%.*]]) +; CHECK-NEXT: [[_MSPROP:%.*]] = or <64 x i8> [[TMP2]], [[TMP10]] +; CHECK-NEXT: [[RES:%.*]] = call <64 x i8> @llvm.x86.avx512.pshuf.b.512(<64 x i8> [[X0:%.*]], <64 x i8> [[X1]]) ; CHECK-NEXT: [[TMP5:%.*]] = bitcast i64 [[TMP3]] to <64 x i1> ; CHECK-NEXT: [[MASK_CAST:%.*]] = bitcast i64 [[MASK:%.*]] to <64 x i1> ; CHECK-NEXT: [[TMP6:%.*]] = select <64 x i1> [[MASK_CAST]], <64 x i8> [[_MSPROP]], <64 x i8> [[TMP4]] @@ -1755,8 +1756,9 @@ define <64 x i8>@test_int_x86_avx512_pshuf_b_512_maskz(<64 x i8> %x0, <64 x i8> ; CHECK-NEXT: [[TMP2:%.*]] = load <64 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 ; CHECK-NEXT: [[TMP3:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8 ; CHECK-NEXT: call void @llvm.donothing() -; CHECK-NEXT: [[_MSPROP:%.*]] = or <64 x i8> [[TMP1]], [[TMP2]] -; CHECK-NEXT: [[RES:%.*]] = call <64 x i8> @llvm.x86.avx512.pshuf.b.512(<64 x i8> [[X0:%.*]], <64 x i8> [[X1:%.*]]) +; CHECK-NEXT: [[TMP9:%.*]] = call <64 x i8> @llvm.x86.avx512.pshuf.b.512(<64 x i8> [[TMP1]], <64 x i8> [[X1:%.*]]) +; CHECK-NEXT: [[_MSPROP:%.*]] = or <64 x i8> [[TMP2]], [[TMP9]] +; CHECK-NEXT: [[RES:%.*]] = call <64 x i8> @llvm.x86.avx512.pshuf.b.512(<64 x i8> [[X0:%.*]], <64 x i8> [[X1]]) ; CHECK-NEXT: [[TMP4:%.*]] = bitcast i64 [[TMP3]] to <64 x i1> ; CHECK-NEXT: [[MASK_CAST:%.*]] = bitcast i64 [[MASK:%.*]] to <64 x i1> ; CHECK-NEXT: [[TMP5:%.*]] = select <64 x i1> [[MASK_CAST]], <64 x i8> [[_MSPROP]], <64 x i8> zeroinitializer diff --git a/llvm/test/Instrumentation/MemorySanitizer/X86/mmx-intrinsics.ll b/llvm/test/Instrumentation/MemorySanitizer/X86/mmx-intrinsics.ll index 99eafc13b2bf..3d98f60a8242 100644 --- a/llvm/test/Instrumentation/MemorySanitizer/X86/mmx-intrinsics.ll +++ b/llvm/test/Instrumentation/MemorySanitizer/X86/mmx-intrinsics.ll @@ -4,8 +4,6 @@ ; Handled strictly: ; - i32 @llvm.x86.mmx.pmovmskb(<1 x i64> %mmx_var.i) #2 ; - void @llvm.x86.mmx.maskmovq(<1 x i64> %mmx_var.i, <1 x i64> %mmx_var1.i, ptr %p) #2 -; - <1 x i64> @llvm.x86.sse.pshuf.w(<1 x i64> %4, i8 3) #5 -; - <1 x i64> @llvm.x86.sse.pshuf.w(<1 x i64> %4, i8 3) #5 ; - <2 x double> @llvm.x86.sse.cvtpi2pd(<1 x i64> %4) #5 ; - <1 x i64> @llvm.x86.sse.cvttpd2pi(<2 x double> %a) #5 ; - <1 x i64> @llvm.x86.sse.cvtpd2pi(<2 x double> %a) #5 @@ -2792,19 +2790,17 @@ define i64 @test21(<1 x i64> %a) #0 { ; CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x i64> [[A]] to <4 x i16> ; CHECK-NEXT: [[TMP10:%.*]] = bitcast <4 x i16> [[TMP8]] to <1 x i64> ; CHECK-NEXT: [[TMP11:%.*]] = bitcast <4 x i16> [[TMP0]] to <1 x i64> -; CHECK-NEXT: [[TMP9:%.*]] = bitcast <1 x i64> [[TMP10]] to i64 -; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP9]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP6:%.*]], label [[TMP12:%.*]], !prof [[PROF1]] -; CHECK: 6: -; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR6]] -; CHECK-NEXT: unreachable -; CHECK: 7: -; CHECK-NEXT: [[TMP13:%.*]] = tail call <1 x i64> @llvm.x86.sse.pshuf.w(<1 x i64> [[TMP11]], i8 3) #[[ATTR5]] +; CHECK-NEXT: [[TMP9:%.*]] = call <1 x i64> @llvm.x86.sse.pshuf.w(<1 x i64> [[TMP10]], i8 3) +; CHECK-NEXT: [[TMP13:%.*]] = or <1 x i64> zeroinitializer, [[TMP9]] +; CHECK-NEXT: [[TMP6:%.*]] = tail call <1 x i64> @llvm.x86.sse.pshuf.w(<1 x i64> [[TMP11]], i8 3) #[[ATTR5]] ; CHECK-NEXT: [[TMP3:%.*]] = bitcast <1 x i64> [[TMP13]] to <4 x i16> +; CHECK-NEXT: [[TMP12:%.*]] = bitcast <1 x i64> [[TMP6]] to <4 x i16> ; CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x i16> [[TMP3]] to <1 x i64> +; CHECK-NEXT: [[TMP14:%.*]] = bitcast <4 x i16> [[TMP12]] to <1 x i64> ; CHECK-NEXT: [[TMP5:%.*]] = extractelement <1 x i64> [[TMP4]], i32 0 -; CHECK-NEXT: store i64 0, ptr @__msan_retval_tls, align 8 -; CHECK-NEXT: ret i64 [[TMP5]] +; CHECK-NEXT: [[TMP15:%.*]] = extractelement <1 x i64> [[TMP14]], i32 0 +; CHECK-NEXT: store i64 [[TMP5]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret i64 [[TMP15]] ; entry: %0 = bitcast <1 x i64> %a to <4 x i16> @@ -2826,19 +2822,17 @@ define i32 @test21_2(<1 x i64> %a) #0 { ; CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x i64> [[A]] to <4 x i16> ; CHECK-NEXT: [[TMP10:%.*]] = bitcast <4 x i16> [[TMP8]] to <1 x i64> ; CHECK-NEXT: [[TMP11:%.*]] = bitcast <4 x i16> [[TMP0]] to <1 x i64> -; CHECK-NEXT: [[TMP9:%.*]] = bitcast <1 x i64> [[TMP10]] to i64 -; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP9]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP6:%.*]], label [[TMP12:%.*]], !prof [[PROF1]] -; CHECK: 6: -; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR6]] -; CHECK-NEXT: unreachable -; CHECK: 7: -; CHECK-NEXT: [[TMP13:%.*]] = tail call <1 x i64> @llvm.x86.sse.pshuf.w(<1 x i64> [[TMP11]], i8 3) #[[ATTR5]] +; CHECK-NEXT: [[TMP9:%.*]] = call <1 x i64> @llvm.x86.sse.pshuf.w(<1 x i64> [[TMP10]], i8 3) +; CHECK-NEXT: [[TMP13:%.*]] = or <1 x i64> zeroinitializer, [[TMP9]] +; CHECK-NEXT: [[TMP6:%.*]] = tail call <1 x i64> @llvm.x86.sse.pshuf.w(<1 x i64> [[TMP11]], i8 3) #[[ATTR5]] ; CHECK-NEXT: [[TMP3:%.*]] = bitcast <1 x i64> [[TMP13]] to <4 x i16> +; CHECK-NEXT: [[TMP12:%.*]] = bitcast <1 x i64> [[TMP6]] to <4 x i16> ; CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x i16> [[TMP3]] to <2 x i32> +; CHECK-NEXT: [[TMP14:%.*]] = bitcast <4 x i16> [[TMP12]] to <2 x i32> ; CHECK-NEXT: [[TMP5:%.*]] = extractelement <2 x i32> [[TMP4]], i32 0 -; CHECK-NEXT: store i32 0, ptr @__msan_retval_tls, align 8 -; CHECK-NEXT: ret i32 [[TMP5]] +; CHECK-NEXT: [[TMP15:%.*]] = extractelement <2 x i32> [[TMP14]], i32 0 +; CHECK-NEXT: store i32 [[TMP5]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret i32 [[TMP15]] ; entry: %0 = bitcast <1 x i64> %a to <4 x i16> @@ -3249,7 +3243,8 @@ define i64 @test9(<1 x i64> %a, <1 x i64> %b) #0 { ; CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64> ; CHECK-NEXT: [[TMP8:%.*]] = bitcast <8 x i8> [[TMP12]] to <1 x i64> ; CHECK-NEXT: [[TMP17:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64> -; CHECK-NEXT: [[_MSPROP:%.*]] = or <1 x i64> [[TMP16]], [[TMP8]] +; CHECK-NEXT: [[TMP20:%.*]] = call <1 x i64> @llvm.x86.ssse3.pshuf.b(<1 x i64> [[TMP16]], <1 x i64> [[TMP17]]) +; CHECK-NEXT: [[_MSPROP:%.*]] = or <1 x i64> [[TMP8]], [[TMP20]] ; CHECK-NEXT: [[TMP18:%.*]] = tail call <1 x i64> @llvm.x86.ssse3.pshuf.b(<1 x i64> [[TMP2]], <1 x i64> [[TMP17]]) #[[ATTR5]] ; CHECK-NEXT: [[TMP11:%.*]] = bitcast <1 x i64> [[_MSPROP]] to <8 x i8> ; CHECK-NEXT: [[TMP19:%.*]] = bitcast <1 x i64> [[TMP18]] to <8 x i8> diff --git a/llvm/test/Instrumentation/MemorySanitizer/i386/avx2-intrinsics-i386.ll b/llvm/test/Instrumentation/MemorySanitizer/i386/avx2-intrinsics-i386.ll index 9960b80f2856..cd79bcb2233f 100644 --- a/llvm/test/Instrumentation/MemorySanitizer/i386/avx2-intrinsics-i386.ll +++ b/llvm/test/Instrumentation/MemorySanitizer/i386/avx2-intrinsics-i386.ll @@ -807,8 +807,9 @@ define <32 x i8> @test_x86_avx2_pshuf_b(<32 x i8> %a0, <32 x i8> %a1) #0 { ; CHECK-NEXT: [[TMP2:%.*]] = load <32 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8 ; CHECK-NEXT: [[TMP3:%.*]] = load i64, ptr @__msan_va_arg_overflow_size_tls, align 8 ; CHECK-NEXT: call void @llvm.donothing() -; CHECK-NEXT: [[_MSPROP:%.*]] = or <32 x i8> [[TMP1]], [[TMP2]] -; CHECK-NEXT: [[RES:%.*]] = call <32 x i8> @llvm.x86.avx2.pshuf.b(<32 x i8> [[A0:%.*]], <32 x i8> [[A1:%.*]]) +; CHECK-NEXT: [[TMP4:%.*]] = call <32 x i8> @llvm.x86.avx2.pshuf.b(<32 x i8> [[TMP1]], <32 x i8> [[A1:%.*]]) +; CHECK-NEXT: [[_MSPROP:%.*]] = or <32 x i8> [[TMP2]], [[TMP4]] +; CHECK-NEXT: [[RES:%.*]] = call <32 x i8> @llvm.x86.avx2.pshuf.b(<32 x i8> [[A0:%.*]], <32 x i8> [[A1]]) ; CHECK-NEXT: store <32 x i8> [[_MSPROP]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <32 x i8> [[RES]] ; diff --git a/llvm/test/Instrumentation/MemorySanitizer/i386/mmx-intrinsics.ll b/llvm/test/Instrumentation/MemorySanitizer/i386/mmx-intrinsics.ll index 74822de4962b..8052b5e34526 100644 --- a/llvm/test/Instrumentation/MemorySanitizer/i386/mmx-intrinsics.ll +++ b/llvm/test/Instrumentation/MemorySanitizer/i386/mmx-intrinsics.ll @@ -4,8 +4,6 @@ ; Handled strictly: ; - i32 @llvm.x86.mmx.pmovmskb(<1 x i64> %mmx_var.i) #2 ; - void @llvm.x86.mmx.maskmovq(<1 x i64> %mmx_var.i, <1 x i64> %mmx_var1.i, ptr %p) #2 -; - <1 x i64> @llvm.x86.sse.pshuf.w(<1 x i64> %4, i8 3) #5 -; - <1 x i64> @llvm.x86.sse.pshuf.w(<1 x i64> %4, i8 3) #5 ; - <2 x double> @llvm.x86.sse.cvtpi2pd(<1 x i64> %4) #5 ; - <1 x i64> @llvm.x86.sse.cvttpd2pi(<2 x double> %a) #5 ; - <1 x i64> @llvm.x86.sse.cvtpd2pi(<2 x double> %a) #5 @@ -2863,19 +2861,17 @@ define i64 @test21(<1 x i64> %a) #0 { ; CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x i64> [[A]] to <4 x i16> ; CHECK-NEXT: [[TMP10:%.*]] = bitcast <4 x i16> [[TMP8]] to <1 x i64> ; CHECK-NEXT: [[TMP11:%.*]] = bitcast <4 x i16> [[TMP0]] to <1 x i64> -; CHECK-NEXT: [[TMP9:%.*]] = bitcast <1 x i64> [[TMP10]] to i64 -; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP9]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP12:%.*]], label [[TMP14:%.*]], !prof [[PROF1]] -; CHECK: 7: -; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR6]] -; CHECK-NEXT: unreachable -; CHECK: 8: -; CHECK-NEXT: [[TMP13:%.*]] = tail call <1 x i64> @llvm.x86.sse.pshuf.w(<1 x i64> [[TMP11]], i8 3) #[[ATTR5]] +; CHECK-NEXT: [[TMP6:%.*]] = call <1 x i64> @llvm.x86.sse.pshuf.w(<1 x i64> [[TMP10]], i8 3) +; CHECK-NEXT: [[TMP13:%.*]] = or <1 x i64> zeroinitializer, [[TMP6]] +; CHECK-NEXT: [[TMP14:%.*]] = tail call <1 x i64> @llvm.x86.sse.pshuf.w(<1 x i64> [[TMP11]], i8 3) #[[ATTR5]] ; CHECK-NEXT: [[TMP3:%.*]] = bitcast <1 x i64> [[TMP13]] to <4 x i16> +; CHECK-NEXT: [[TMP9:%.*]] = bitcast <1 x i64> [[TMP14]] to <4 x i16> ; CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x i16> [[TMP3]] to <1 x i64> +; CHECK-NEXT: [[TMP15:%.*]] = bitcast <4 x i16> [[TMP9]] to <1 x i64> ; CHECK-NEXT: [[TMP5:%.*]] = extractelement <1 x i64> [[TMP4]], i32 0 -; CHECK-NEXT: store i64 0, ptr @__msan_retval_tls, align 8 -; CHECK-NEXT: ret i64 [[TMP5]] +; CHECK-NEXT: [[TMP12:%.*]] = extractelement <1 x i64> [[TMP15]], i32 0 +; CHECK-NEXT: store i64 [[TMP5]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret i64 [[TMP12]] ; entry: %0 = bitcast <1 x i64> %a to <4 x i16> @@ -2898,19 +2894,17 @@ define i32 @test21_2(<1 x i64> %a) #0 { ; CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x i64> [[A]] to <4 x i16> ; CHECK-NEXT: [[TMP10:%.*]] = bitcast <4 x i16> [[TMP8]] to <1 x i64> ; CHECK-NEXT: [[TMP11:%.*]] = bitcast <4 x i16> [[TMP0]] to <1 x i64> -; CHECK-NEXT: [[TMP9:%.*]] = bitcast <1 x i64> [[TMP10]] to i64 -; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP9]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP12:%.*]], label [[TMP14:%.*]], !prof [[PROF1]] -; CHECK: 7: -; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR6]] -; CHECK-NEXT: unreachable -; CHECK: 8: -; CHECK-NEXT: [[TMP13:%.*]] = tail call <1 x i64> @llvm.x86.sse.pshuf.w(<1 x i64> [[TMP11]], i8 3) #[[ATTR5]] +; CHECK-NEXT: [[TMP6:%.*]] = call <1 x i64> @llvm.x86.sse.pshuf.w(<1 x i64> [[TMP10]], i8 3) +; CHECK-NEXT: [[TMP13:%.*]] = or <1 x i64> zeroinitializer, [[TMP6]] +; CHECK-NEXT: [[TMP14:%.*]] = tail call <1 x i64> @llvm.x86.sse.pshuf.w(<1 x i64> [[TMP11]], i8 3) #[[ATTR5]] ; CHECK-NEXT: [[TMP3:%.*]] = bitcast <1 x i64> [[TMP13]] to <4 x i16> +; CHECK-NEXT: [[TMP9:%.*]] = bitcast <1 x i64> [[TMP14]] to <4 x i16> ; CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x i16> [[TMP3]] to <2 x i32> +; CHECK-NEXT: [[TMP15:%.*]] = bitcast <4 x i16> [[TMP9]] to <2 x i32> ; CHECK-NEXT: [[TMP5:%.*]] = extractelement <2 x i32> [[TMP4]], i32 0 -; CHECK-NEXT: store i32 0, ptr @__msan_retval_tls, align 8 -; CHECK-NEXT: ret i32 [[TMP5]] +; CHECK-NEXT: [[TMP12:%.*]] = extractelement <2 x i32> [[TMP15]], i32 0 +; CHECK-NEXT: store i32 [[TMP5]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret i32 [[TMP12]] ; entry: %0 = bitcast <1 x i64> %a to <4 x i16> @@ -3333,7 +3327,8 @@ define i64 @test9(<1 x i64> %a, <1 x i64> %b) #0 { ; CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64> ; CHECK-NEXT: [[TMP8:%.*]] = bitcast <8 x i8> [[TMP12]] to <1 x i64> ; CHECK-NEXT: [[TMP17:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64> -; CHECK-NEXT: [[_MSPROP:%.*]] = or <1 x i64> [[TMP16]], [[TMP8]] +; CHECK-NEXT: [[TMP20:%.*]] = call <1 x i64> @llvm.x86.ssse3.pshuf.b(<1 x i64> [[TMP16]], <1 x i64> [[TMP17]]) +; CHECK-NEXT: [[_MSPROP:%.*]] = or <1 x i64> [[TMP8]], [[TMP20]] ; CHECK-NEXT: [[TMP18:%.*]] = tail call <1 x i64> @llvm.x86.ssse3.pshuf.b(<1 x i64> [[TMP2]], <1 x i64> [[TMP17]]) #[[ATTR5]] ; CHECK-NEXT: [[TMP11:%.*]] = bitcast <1 x i64> [[_MSPROP]] to <8 x i8> ; CHECK-NEXT: [[TMP19:%.*]] = bitcast <1 x i64> [[TMP18]] to <8 x i8> From 627f8018fe161480cfa08b29c3c8059f33fa5c3f Mon Sep 17 00:00:00 2001 From: Kazu Hirata Date: Fri, 15 Aug 2025 21:22:37 -0700 Subject: [PATCH 054/214] [ADT] Rename NumNonEmpty to NumEntries in SmallPtrSet (NFC) (#153757) Without this patch, we use NumNonEmpty, which keeps track of the number of valid entries plus tombstones even though we have a separate variable to keep track of the number of tombstones. This patch simplifies the metadata. Specifically, it changes the name and semantics of the variable to NumEntries to keep track of the number of valid entries. The difference in semantics requires some code changes aside from mechanical replacements: - size() just returns NumEntries. - erase_imp() and remove_if() need to decrement NumEntries in the large mode. - insert_imp_big() increments NumEntries for successful insertions, regardless of whether a tombstone is being replaced with a valid entry. It also computes the number of non-tombstone empty slots as: CurArraySize - NumEntries - NumTombstones - Grow() no longer needs NumNonEmpty -= NumTombstones. Overall, the resulting code should look more intuitive and more consistent with DenseMapSet. --- llvm/include/llvm/ADT/SmallPtrSet.h | 42 ++++++++++++++-------------- llvm/lib/Support/SmallPtrSet.cpp | 43 ++++++++++++++--------------- 2 files changed, 43 insertions(+), 42 deletions(-) diff --git a/llvm/include/llvm/ADT/SmallPtrSet.h b/llvm/include/llvm/ADT/SmallPtrSet.h index 73ec7c68f65c..2829cbf41b66 100644 --- a/llvm/include/llvm/ADT/SmallPtrSet.h +++ b/llvm/include/llvm/ADT/SmallPtrSet.h @@ -62,10 +62,10 @@ protected: /// CurArraySize - The allocated size of CurArray, always a power of two. unsigned CurArraySize; - /// Number of elements in CurArray that contain a value or are a tombstone. + /// Number of elements in CurArray that contain a value. /// If small, all these elements are at the beginning of CurArray and the rest /// is uninitialized. - unsigned NumNonEmpty; + unsigned NumEntries; /// Number of tombstones in CurArray. unsigned NumTombstones; /// Whether the set is in small representation. @@ -79,7 +79,7 @@ protected: SmallPtrSetImplBase &&that); explicit SmallPtrSetImplBase(const void **SmallStorage, unsigned SmallSize) - : CurArray(SmallStorage), CurArraySize(SmallSize), NumNonEmpty(0), + : CurArray(SmallStorage), CurArraySize(SmallSize), NumEntries(0), NumTombstones(0), IsSmall(true) { assert(llvm::has_single_bit(SmallSize) && "Initial size must be a power of two!"); @@ -96,7 +96,7 @@ public: SmallPtrSetImplBase &operator=(const SmallPtrSetImplBase &) = delete; [[nodiscard]] bool empty() const { return size() == 0; } - size_type size() const { return NumNonEmpty - NumTombstones; } + size_type size() const { return NumEntries; } size_type capacity() const { return CurArraySize; } void clear() { @@ -110,25 +110,25 @@ public: memset(CurArray, -1, CurArraySize * sizeof(void *)); } - NumNonEmpty = 0; + NumEntries = 0; NumTombstones = 0; } - void reserve(size_type NumEntries) { + void reserve(size_type NewNumEntries) { incrementEpoch(); // Do nothing if we're given zero as a reservation size. - if (NumEntries == 0) + if (NewNumEntries == 0) return; - // No need to expand if we're small and NumEntries will fit in the space. - if (isSmall() && NumEntries <= CurArraySize) + // No need to expand if we're small and NewNumEntries will fit in the space. + if (isSmall() && NewNumEntries <= CurArraySize) return; // insert_imp_big will reallocate if stores is more than 75% full, on the // /final/ insertion. - if (!isSmall() && ((NumEntries - 1) * 4) < (CurArraySize * 3)) + if (!isSmall() && ((NewNumEntries - 1) * 4) < (CurArraySize * 3)) return; // We must Grow -- find the size where we'd be 75% full, then round up to // the next power of two. - size_type NewSize = NumEntries + (NumEntries / 3); + size_type NewSize = NewNumEntries + (NewNumEntries / 3); NewSize = llvm::bit_ceil(NewSize); // Like insert_imp_big, always allocate at least 128 elements. NewSize = std::max(128u, NewSize); @@ -145,15 +145,15 @@ protected: } const void **EndPointer() const { - return isSmall() ? CurArray + NumNonEmpty : CurArray + CurArraySize; + return isSmall() ? CurArray + NumEntries : CurArray + CurArraySize; } iterator_range small_buckets() { - return make_range(CurArray, CurArray + NumNonEmpty); + return make_range(CurArray, CurArray + NumEntries); } iterator_range small_buckets() const { - return {CurArray, CurArray + NumNonEmpty}; + return {CurArray, CurArray + NumEntries}; } iterator_range buckets() { @@ -172,10 +172,10 @@ protected: } // Nope, there isn't. If we stay small, just 'pushback' now. - if (NumNonEmpty < CurArraySize) { - CurArray[NumNonEmpty++] = Ptr; + if (NumEntries < CurArraySize) { + CurArray[NumEntries++] = Ptr; incrementEpoch(); - return std::make_pair(CurArray + (NumNonEmpty - 1), true); + return std::make_pair(CurArray + (NumEntries - 1), true); } // Otherwise, hit the big set case, which will call grow. } @@ -190,7 +190,7 @@ protected: if (isSmall()) { for (const void *&Bucket : small_buckets()) { if (Bucket == Ptr) { - Bucket = CurArray[--NumNonEmpty]; + Bucket = CurArray[--NumEntries]; incrementEpoch(); return true; } @@ -204,6 +204,7 @@ protected: *const_cast(Bucket) = getTombstoneMarker(); NumTombstones++; + --NumEntries; // Treat this consistently from an API perspective, even if we don't // actually invalidate iterators here. incrementEpoch(); @@ -430,12 +431,12 @@ public: bool remove_if(UnaryPredicate P) { bool Removed = false; if (isSmall()) { - const void **APtr = CurArray, **E = CurArray + NumNonEmpty; + const void **APtr = CurArray, **E = CurArray + NumEntries; while (APtr != E) { PtrType Ptr = PtrTraits::getFromVoidPointer(const_cast(*APtr)); if (P(Ptr)) { *APtr = *--E; - --NumNonEmpty; + --NumEntries; incrementEpoch(); Removed = true; } else { @@ -452,6 +453,7 @@ public: if (P(Ptr)) { Bucket = getTombstoneMarker(); ++NumTombstones; + --NumEntries; incrementEpoch(); Removed = true; } diff --git a/llvm/lib/Support/SmallPtrSet.cpp b/llvm/lib/Support/SmallPtrSet.cpp index 0c226970906d..46cf53f1ecf8 100644 --- a/llvm/lib/Support/SmallPtrSet.cpp +++ b/llvm/lib/Support/SmallPtrSet.cpp @@ -28,7 +28,7 @@ void SmallPtrSetImplBase::shrink_and_clear() { // Reduce the number of buckets. unsigned Size = size(); CurArraySize = Size > 16 ? 1 << (Log2_32_Ceil(Size) + 1) : 32; - NumNonEmpty = NumTombstones = 0; + NumEntries = NumTombstones = 0; // Install the new array. Clear all the buckets to empty. CurArray = (const void**)safe_malloc(sizeof(void*) * CurArraySize); @@ -41,7 +41,8 @@ SmallPtrSetImplBase::insert_imp_big(const void *Ptr) { if (LLVM_UNLIKELY(size() * 4 >= CurArraySize * 3)) { // If more than 3/4 of the array is full, grow. Grow(CurArraySize < 64 ? 128 : CurArraySize * 2); - } else if (LLVM_UNLIKELY(CurArraySize - NumNonEmpty < CurArraySize / 8)) { + } else if (LLVM_UNLIKELY(CurArraySize - NumEntries - NumTombstones < + CurArraySize / 8)) { // If fewer of 1/8 of the array is empty (meaning that many are filled with // tombstones), rehash. Grow(CurArraySize); @@ -55,8 +56,7 @@ SmallPtrSetImplBase::insert_imp_big(const void *Ptr) { // Otherwise, insert it! if (*Bucket == getTombstoneMarker()) --NumTombstones; - else - ++NumNonEmpty; // Track density. + ++NumEntries; *Bucket = Ptr; incrementEpoch(); return std::make_pair(Bucket, true); @@ -130,7 +130,6 @@ void SmallPtrSetImplBase::Grow(unsigned NewSize) { if (!WasSmall) free(OldBuckets.begin()); - NumNonEmpty -= NumTombstones; NumTombstones = 0; IsSmall = false; } @@ -193,7 +192,7 @@ void SmallPtrSetImplBase::copyHelper(const SmallPtrSetImplBase &RHS) { // Copy over the contents from the other set std::copy(RHS.CurArray, RHS.EndPointer(), CurArray); - NumNonEmpty = RHS.NumNonEmpty; + NumEntries = RHS.NumEntries; NumTombstones = RHS.NumTombstones; } @@ -215,7 +214,7 @@ void SmallPtrSetImplBase::moveHelper(const void **SmallStorage, if (RHS.isSmall()) { // Copy a small RHS rather than moving. CurArray = SmallStorage; - std::copy(RHS.CurArray, RHS.CurArray + RHS.NumNonEmpty, CurArray); + std::copy(RHS.CurArray, RHS.CurArray + RHS.NumEntries, CurArray); } else { CurArray = RHS.CurArray; RHS.CurArray = RHSSmallStorage; @@ -223,13 +222,13 @@ void SmallPtrSetImplBase::moveHelper(const void **SmallStorage, // Copy the rest of the trivial members. CurArraySize = RHS.CurArraySize; - NumNonEmpty = RHS.NumNonEmpty; + NumEntries = RHS.NumEntries; NumTombstones = RHS.NumTombstones; IsSmall = RHS.IsSmall; // Make the RHS small and empty. RHS.CurArraySize = SmallSize; - RHS.NumNonEmpty = 0; + RHS.NumEntries = 0; RHS.NumTombstones = 0; RHS.IsSmall = true; } @@ -243,7 +242,7 @@ void SmallPtrSetImplBase::swap(const void **SmallStorage, if (!this->isSmall() && !RHS.isSmall()) { std::swap(this->CurArray, RHS.CurArray); std::swap(this->CurArraySize, RHS.CurArraySize); - std::swap(this->NumNonEmpty, RHS.NumNonEmpty); + std::swap(this->NumEntries, RHS.NumEntries); std::swap(this->NumTombstones, RHS.NumTombstones); return; } @@ -253,9 +252,9 @@ void SmallPtrSetImplBase::swap(const void **SmallStorage, // If only RHS is small, copy the small elements into LHS and move the pointer // from LHS to RHS. if (!this->isSmall() && RHS.isSmall()) { - std::copy(RHS.CurArray, RHS.CurArray + RHS.NumNonEmpty, SmallStorage); + std::copy(RHS.CurArray, RHS.CurArray + RHS.NumEntries, SmallStorage); std::swap(RHS.CurArraySize, this->CurArraySize); - std::swap(this->NumNonEmpty, RHS.NumNonEmpty); + std::swap(this->NumEntries, RHS.NumEntries); std::swap(this->NumTombstones, RHS.NumTombstones); RHS.CurArray = this->CurArray; RHS.IsSmall = false; @@ -267,10 +266,10 @@ void SmallPtrSetImplBase::swap(const void **SmallStorage, // If only LHS is small, copy the small elements into RHS and move the pointer // from RHS to LHS. if (this->isSmall() && !RHS.isSmall()) { - std::copy(this->CurArray, this->CurArray + this->NumNonEmpty, + std::copy(this->CurArray, this->CurArray + this->NumEntries, RHSSmallStorage); std::swap(RHS.CurArraySize, this->CurArraySize); - std::swap(RHS.NumNonEmpty, this->NumNonEmpty); + std::swap(RHS.NumEntries, this->NumEntries); std::swap(RHS.NumTombstones, this->NumTombstones); this->CurArray = RHS.CurArray; this->IsSmall = false; @@ -281,16 +280,16 @@ void SmallPtrSetImplBase::swap(const void **SmallStorage, // Both a small, just swap the small elements. assert(this->isSmall() && RHS.isSmall()); - unsigned MinNonEmpty = std::min(this->NumNonEmpty, RHS.NumNonEmpty); - std::swap_ranges(this->CurArray, this->CurArray + MinNonEmpty, RHS.CurArray); - if (this->NumNonEmpty > MinNonEmpty) { - std::copy(this->CurArray + MinNonEmpty, this->CurArray + this->NumNonEmpty, - RHS.CurArray + MinNonEmpty); + unsigned MinEntries = std::min(this->NumEntries, RHS.NumEntries); + std::swap_ranges(this->CurArray, this->CurArray + MinEntries, RHS.CurArray); + if (this->NumEntries > MinEntries) { + std::copy(this->CurArray + MinEntries, this->CurArray + this->NumEntries, + RHS.CurArray + MinEntries); } else { - std::copy(RHS.CurArray + MinNonEmpty, RHS.CurArray + RHS.NumNonEmpty, - this->CurArray + MinNonEmpty); + std::copy(RHS.CurArray + MinEntries, RHS.CurArray + RHS.NumEntries, + this->CurArray + MinEntries); } assert(this->CurArraySize == RHS.CurArraySize); - std::swap(this->NumNonEmpty, RHS.NumNonEmpty); + std::swap(this->NumEntries, RHS.NumEntries); std::swap(this->NumTombstones, RHS.NumTombstones); } From 1d73b2c10d3bb1a2caf91f44847b9759ed252b21 Mon Sep 17 00:00:00 2001 From: Matheus Izvekov Date: Sat, 16 Aug 2025 02:04:31 -0300 Subject: [PATCH 055/214] [clang] don't create type source info for vardecl created for structured bindings (#153923) These are implicit vardecls which its type was never written in source code. Don't create a TypeLoc and give it a fake source location. The fake as-written type also didn't match the actual type, which after fixing this gives some unrelated test churn on a CFG dump, since statement printing prefers type source info if thats available. Fixes https://github.com/llvm/llvm-project/issues/153649 This is a regression introduced in https://github.com/llvm/llvm-project/pull/147835 This regression was never released, so no release notes are added. --- .../checkers/modernize/type-traits-GH153649.cpp | 15 +++++++++++++++ clang/lib/Sema/SemaDeclCXX.cpp | 11 +++++++---- clang/test/Analysis/anonymous-decls.cpp | 4 ++-- 3 files changed, 24 insertions(+), 6 deletions(-) create mode 100644 clang-tools-extra/test/clang-tidy/checkers/modernize/type-traits-GH153649.cpp diff --git a/clang-tools-extra/test/clang-tidy/checkers/modernize/type-traits-GH153649.cpp b/clang-tools-extra/test/clang-tidy/checkers/modernize/type-traits-GH153649.cpp new file mode 100644 index 000000000000..142eb5847ae1 --- /dev/null +++ b/clang-tools-extra/test/clang-tidy/checkers/modernize/type-traits-GH153649.cpp @@ -0,0 +1,15 @@ +// RUN: %check_clang_tidy -std=c++20 %s modernize-type-traits %t + +namespace std { +template struct tuple_size { + static const int value = 1; +}; +template struct tuple_element { + using type = int; +}; +} + +struct A {}; +template int get(const A&); + +auto [a] = A(); diff --git a/clang/lib/Sema/SemaDeclCXX.cpp b/clang/lib/Sema/SemaDeclCXX.cpp index dd66a5f15a97..30930d9cf48c 100644 --- a/clang/lib/Sema/SemaDeclCXX.cpp +++ b/clang/lib/Sema/SemaDeclCXX.cpp @@ -1373,10 +1373,13 @@ static bool checkTupleLikeDecomposition(Sema &S, S.BuildReferenceType(T, E.get()->isLValue(), Loc, B->getDeclName()); if (RefType.isNull()) return true; - auto *RefVD = VarDecl::Create( - S.Context, Src->getDeclContext(), Loc, Loc, - B->getDeclName().getAsIdentifierInfo(), RefType, - S.Context.getTrivialTypeSourceInfo(T, Loc), Src->getStorageClass()); + + // Don't give this VarDecl a TypeSourceInfo, since this is a synthesized + // entity and this type was never written in source code. + auto *RefVD = + VarDecl::Create(S.Context, Src->getDeclContext(), Loc, Loc, + B->getDeclName().getAsIdentifierInfo(), RefType, + /*TInfo=*/nullptr, Src->getStorageClass()); RefVD->setLexicalDeclContext(Src->getLexicalDeclContext()); RefVD->setTSCSpec(Src->getTSCSpec()); RefVD->setImplicit(); diff --git a/clang/test/Analysis/anonymous-decls.cpp b/clang/test/Analysis/anonymous-decls.cpp index 3f972a33aa62..76e5155b61b6 100644 --- a/clang/test/Analysis/anonymous-decls.cpp +++ b/clang/test/Analysis/anonymous-decls.cpp @@ -78,12 +78,12 @@ int main() { // CHECK-NEXT: 8: decomposition-a-b // CHECK-NEXT: 9: [B3.7]([B3.8]) // CHECK-NEXT: 10: [B3.9] -// CHECK-NEXT: 11: std::tuple_element<0UL, std::pair>::type a = get<0UL>(decomposition-a-b); +// CHECK-NEXT: 11: std::tuple_element<0UL, std::pair>::type &&a = get<0UL>(decomposition-a-b); // CHECK-NEXT: 12: get<1UL> // CHECK-NEXT: 13: [B3.12] (ImplicitCastExpr, FunctionToPointerDecay, tuple_element<1L, pair >::type (*)(pair &)) // CHECK-NEXT: 14: decomposition-a-b // CHECK-NEXT: 15: [B3.13]([B3.14]) // CHECK-NEXT: 16: [B3.15] -// CHECK-NEXT: 17: std::tuple_element<1UL, std::pair>::type b = get<1UL>(decomposition-a-b); +// CHECK-NEXT: 17: std::tuple_element<1UL, std::pair>::type &&b = get<1UL>(decomposition-a-b); // CHECK-NEXT: Preds (1): B1 // CHECK-NEXT: Succs (1): B2 From f0967fca04c880e9aabd5be043a85127faabb4c6 Mon Sep 17 00:00:00 2001 From: Matthias Springer Date: Sat, 16 Aug 2025 09:45:08 +0200 Subject: [PATCH 056/214] [mlir][LLVM] `FuncToLLVM`: Add 1:N type conversion support (#153823) Add support for 1:N type conversions to the `FuncToLLVM` lowering patterns. This commit does not change the lowering of any types (such as `MemRefType`). It just sets up the infrastructure, such that 1:N type conversions can be used during `FuncToLLVM`. Note: When the converted result types of a `func.func` have more than 1 type, then the results are wrapped in an `llvm.struct`. That's because `llvm.func` does not support multiple result values. This "wrapping" was already implemented for cases where the original `func.func` has multiple results. With 1:N conversions, even a single result can now expand to multiple converted results, triggering the same wrapping mechanism. The test cases are exercised with both the old and the new no-rollback conversion driver. --- .../Conversion/LLVMCommon/TypeConverter.h | 31 +++-- mlir/lib/Conversion/FuncToLLVM/FuncToLLVM.cpp | 113 +++++++++++------ .../Conversion/LLVMCommon/TypeConverter.cpp | 117 ++++++++++-------- .../Conversion/NVGPUToNVVM/NVGPUToNVVM.cpp | 24 ++-- .../MemRefToLLVM/type-conversion.mlir | 97 +++++++++++++-- mlir/test/lib/Dialect/LLVM/TestPatterns.cpp | 30 +++++ 6 files changed, 284 insertions(+), 128 deletions(-) diff --git a/mlir/include/mlir/Conversion/LLVMCommon/TypeConverter.h b/mlir/include/mlir/Conversion/LLVMCommon/TypeConverter.h index 38b5e492a8ed..2096bcb9896a 100644 --- a/mlir/include/mlir/Conversion/LLVMCommon/TypeConverter.h +++ b/mlir/include/mlir/Conversion/LLVMCommon/TypeConverter.h @@ -74,8 +74,14 @@ public: /// LLVM-compatible type. In particular, if more than one value is returned, /// create an LLVM dialect structure type with elements that correspond to /// each of the types converted with `convertCallingConventionType`. - Type packFunctionResults(TypeRange types, - bool useBarePointerCallConv = false) const; + /// + /// Populate the converted (unpacked) types into `groupedTypes`, if provided. + /// `groupedType` contains one nested vector per input type. In case of a 1:N + /// conversion, a nested vector may contain 0 or more then 1 converted type. + Type + packFunctionResults(TypeRange types, bool useBarePointerCallConv = false, + SmallVector> *groupedTypes = nullptr, + int64_t *numConvertedTypes = nullptr) const; /// Convert a non-empty list of types of values produced by an operation into /// an LLVM-compatible type. In particular, if more than one value is @@ -88,15 +94,9 @@ public: /// UnrankedMemRefType, are converted following the specific rules for the /// calling convention. Calling convention independent types are converted /// following the default LLVM type conversions. - Type convertCallingConventionType(Type type, - bool useBarePointerCallConv = false) const; - - /// Promote the bare pointers in 'values' that resulted from memrefs to - /// descriptors. 'stdTypes' holds the types of 'values' before the conversion - /// to the LLVM-IR dialect (i.e., MemRefType, or any other builtin type). - void promoteBarePtrsToDescriptors(ConversionPatternRewriter &rewriter, - Location loc, ArrayRef stdTypes, - SmallVectorImpl &values) const; + LogicalResult + convertCallingConventionType(Type type, SmallVectorImpl &result, + bool useBarePointerCallConv = false) const; /// Returns the MLIR context. MLIRContext &getContext() const; @@ -109,9 +109,14 @@ public: /// Promote the LLVM representation of all operands including promoting MemRef /// descriptors to stack and use pointers to struct to avoid the complexity /// of the platform-specific C/C++ ABI lowering related to struct argument - /// passing. + /// passing. (The ArrayRef variant is for 1:N.) SmallVector promoteOperands(Location loc, ValueRange opOperands, - ValueRange operands, OpBuilder &builder, + ArrayRef adaptorOperands, + OpBuilder &builder, + bool useBarePtrCallConv = false) const; + SmallVector promoteOperands(Location loc, ValueRange opOperands, + ValueRange adaptorOperands, + OpBuilder &builder, bool useBarePtrCallConv = false) const; /// Promote the LLVM struct representation of one MemRef descriptor to stack diff --git a/mlir/lib/Conversion/FuncToLLVM/FuncToLLVM.cpp b/mlir/lib/Conversion/FuncToLLVM/FuncToLLVM.cpp index a4a6ae250640..42c76ed475b4 100644 --- a/mlir/lib/Conversion/FuncToLLVM/FuncToLLVM.cpp +++ b/mlir/lib/Conversion/FuncToLLVM/FuncToLLVM.cpp @@ -527,19 +527,21 @@ struct CallOpInterfaceLowering : public ConvertOpToLLVMPattern { using ConvertOpToLLVMPattern::ConvertOpToLLVMPattern; using Super = CallOpInterfaceLowering; using Base = ConvertOpToLLVMPattern; + using Adaptor = typename ConvertOpToLLVMPattern::OneToNOpAdaptor; - LogicalResult matchAndRewriteImpl(CallOpType callOp, - typename CallOpType::Adaptor adaptor, + LogicalResult matchAndRewriteImpl(CallOpType callOp, Adaptor adaptor, ConversionPatternRewriter &rewriter, bool useBarePtrCallConv = false) const { // Pack the result types into a struct. Type packedResult = nullptr; + SmallVector> groupedResultTypes; unsigned numResults = callOp.getNumResults(); auto resultTypes = llvm::to_vector<4>(callOp.getResultTypes()); - + int64_t numConvertedTypes = 0; if (numResults != 0) { if (!(packedResult = this->getTypeConverter()->packFunctionResults( - resultTypes, useBarePtrCallConv))) + resultTypes, useBarePtrCallConv, &groupedResultTypes, + &numConvertedTypes))) return failure(); } @@ -565,34 +567,64 @@ struct CallOpInterfaceLowering : public ConvertOpToLLVMPattern { static_cast(promoted.size()), 0}; newOp.getProperties().op_bundle_sizes = rewriter.getDenseI32ArrayAttr({}); - SmallVector results; - if (numResults < 2) { - // If < 2 results, packing did not do anything and we can just return. - results.append(newOp.result_begin(), newOp.result_end()); - } else { - // Otherwise, it had been converted to an operation producing a structure. - // Extract individual results from the structure and return them as list. - results.reserve(numResults); - for (unsigned i = 0; i < numResults; ++i) { - results.push_back(LLVM::ExtractValueOp::create( - rewriter, callOp.getLoc(), newOp->getResult(0), i)); + // Helper function that extracts an individual result from the return value + // of the new call op. llvm.call ops support only 0 or 1 result. In case of + // 2 or more results, the results are packed into a structure. + // + // The new call op may have more than 2 results because: + // a. The original call op has more than 2 results. + // b. An original op result type-converted to more than 1 result. + auto getUnpackedResult = [&](unsigned i) -> Value { + assert(numConvertedTypes > 0 && "convert op has no results"); + if (numConvertedTypes == 1) { + assert(i == 0 && "out of bounds: converted op has only one result"); + return newOp->getResult(0); + } + // Results have been converted to a structure. Extract individual results + // from the structure. + return LLVM::ExtractValueOp::create(rewriter, callOp.getLoc(), + newOp->getResult(0), i); + }; + + // Group the results into a vector of vectors, such that it is clear which + // original op result is replaced with which range of values. (In case of a + // 1:N conversion, there can be multiple replacements for a single result.) + SmallVector> results; + results.reserve(numResults); + unsigned counter = 0; + for (unsigned i = 0; i < numResults; ++i) { + SmallVector &group = results.emplace_back(); + for (unsigned j = 0, e = groupedResultTypes[i].size(); j < e; ++j) + group.push_back(getUnpackedResult(counter++)); + } + + // Special handling for MemRef types. + for (unsigned i = 0; i < numResults; ++i) { + Type origType = resultTypes[i]; + auto memrefType = dyn_cast(origType); + auto unrankedMemrefType = dyn_cast(origType); + if (useBarePtrCallConv && memrefType) { + // For the bare-ptr calling convention, promote memref results to + // descriptors. + assert(results[i].size() == 1 && "expected one converted result"); + results[i].front() = MemRefDescriptor::fromStaticShape( + rewriter, callOp.getLoc(), *this->getTypeConverter(), memrefType, + results[i].front()); + } + if (unrankedMemrefType) { + assert(!useBarePtrCallConv && "unranked memref is not supported in the " + "bare-ptr calling convention"); + assert(results[i].size() == 1 && "expected one converted result"); + Value desc = this->copyUnrankedDescriptor( + rewriter, callOp.getLoc(), unrankedMemrefType, results[i].front(), + /*toDynamic=*/false); + if (!desc) + return failure(); + results[i].front() = desc; } } - if (useBarePtrCallConv) { - // For the bare-ptr calling convention, promote memref results to - // descriptors. - assert(results.size() == resultTypes.size() && - "The number of arguments and types doesn't match"); - this->getTypeConverter()->promoteBarePtrsToDescriptors( - rewriter, callOp.getLoc(), resultTypes, results); - } else if (failed(this->copyUnrankedDescriptors(rewriter, callOp.getLoc(), - resultTypes, results, - /*toDynamic=*/false))) { - return failure(); - } - - rewriter.replaceOp(callOp, results); + rewriter.replaceOpWithMultiple(callOp, results); return success(); } }; @@ -606,7 +638,7 @@ public: symbolTables(symbolTables) {} LogicalResult - matchAndRewrite(func::CallOp callOp, OpAdaptor adaptor, + matchAndRewrite(func::CallOp callOp, OneToNOpAdaptor adaptor, ConversionPatternRewriter &rewriter) const override { bool useBarePtrCallConv = false; if (getTypeConverter()->getOptions().useBarePtrCallConv) { @@ -636,7 +668,7 @@ struct CallIndirectOpLowering using Super::Super; LogicalResult - matchAndRewrite(func::CallIndirectOp callIndirectOp, OpAdaptor adaptor, + matchAndRewrite(func::CallIndirectOp callIndirectOp, OneToNOpAdaptor adaptor, ConversionPatternRewriter &rewriter) const override { return matchAndRewriteImpl(callIndirectOp, adaptor, rewriter); } @@ -679,47 +711,50 @@ struct ReturnOpLowering : public ConvertOpToLLVMPattern { using ConvertOpToLLVMPattern::ConvertOpToLLVMPattern; LogicalResult - matchAndRewrite(func::ReturnOp op, OpAdaptor adaptor, + matchAndRewrite(func::ReturnOp op, OneToNOpAdaptor adaptor, ConversionPatternRewriter &rewriter) const override { Location loc = op.getLoc(); - unsigned numArguments = op.getNumOperands(); SmallVector updatedOperands; auto funcOp = op->getParentOfType(); bool useBarePtrCallConv = shouldUseBarePtrCallConv(funcOp, this->getTypeConverter()); - for (auto [oldOperand, newOperand] : + for (auto [oldOperand, newOperands] : llvm::zip_equal(op->getOperands(), adaptor.getOperands())) { Type oldTy = oldOperand.getType(); if (auto memRefType = dyn_cast(oldTy)) { + assert(newOperands.size() == 1 && "expected one converted result"); if (useBarePtrCallConv && getTypeConverter()->canConvertToBarePtr(memRefType)) { // For the bare-ptr calling convention, extract the aligned pointer to // be returned from the memref descriptor. - MemRefDescriptor memrefDesc(newOperand); + MemRefDescriptor memrefDesc(newOperands.front()); updatedOperands.push_back(memrefDesc.allocatedPtr(rewriter, loc)); continue; } } else if (auto unrankedMemRefType = dyn_cast(oldTy)) { + assert(newOperands.size() == 1 && "expected one converted result"); if (useBarePtrCallConv) { // Unranked memref is not supported in the bare pointer calling // convention. return failure(); } - Value updatedDesc = copyUnrankedDescriptor( - rewriter, loc, unrankedMemRefType, newOperand, /*toDynamic=*/true); + Value updatedDesc = + copyUnrankedDescriptor(rewriter, loc, unrankedMemRefType, + newOperands.front(), /*toDynamic=*/true); if (!updatedDesc) return failure(); updatedOperands.push_back(updatedDesc); continue; } - updatedOperands.push_back(newOperand); + + llvm::append_range(updatedOperands, newOperands); } // If ReturnOp has 0 or 1 operand, create it and return immediately. - if (numArguments <= 1) { + if (updatedOperands.size() <= 1) { rewriter.replaceOpWithNewOp( op, TypeRange(), updatedOperands, op->getAttrs()); return success(); diff --git a/mlir/lib/Conversion/LLVMCommon/TypeConverter.cpp b/mlir/lib/Conversion/LLVMCommon/TypeConverter.cpp index 1a9bf569086d..cb9dea108cc4 100644 --- a/mlir/lib/Conversion/LLVMCommon/TypeConverter.cpp +++ b/mlir/lib/Conversion/LLVMCommon/TypeConverter.cpp @@ -365,6 +365,7 @@ Type LLVMTypeConverter::convertFunctionSignatureImpl( useBarePtrCallConv = useBarePtrCallConv || options.useBarePtrCallConv; auto funcArgConverter = useBarePtrCallConv ? barePtrFuncArgTypeConverter : structFuncArgTypeConverter; + // Convert argument types one by one and check for errors. for (auto [idx, type] : llvm::enumerate(funcTy.getInputs())) { SmallVector converted; @@ -658,27 +659,19 @@ FailureOr LLVMTypeConverter::convertVectorType(VectorType type) const { /// UnrankedMemRefType, are converted following the specific rules for the /// calling convention. Calling convention independent types are converted /// following the default LLVM type conversions. -Type LLVMTypeConverter::convertCallingConventionType( - Type type, bool useBarePtrCallConv) const { - if (useBarePtrCallConv) - if (auto memrefTy = dyn_cast(type)) - return convertMemRefToBarePtr(memrefTy); +LogicalResult LLVMTypeConverter::convertCallingConventionType( + Type type, SmallVectorImpl &result, bool useBarePtrCallConv) const { + if (useBarePtrCallConv) { + if (auto memrefTy = dyn_cast(type)) { + Type converted = convertMemRefToBarePtr(memrefTy); + if (!converted) + return failure(); + result.push_back(converted); + return success(); + } + } - return convertType(type); -} - -/// Promote the bare pointers in 'values' that resulted from memrefs to -/// descriptors. 'stdTypes' holds they types of 'values' before the conversion -/// to the LLVM-IR dialect (i.e., MemRefType, or any other builtin type). -void LLVMTypeConverter::promoteBarePtrsToDescriptors( - ConversionPatternRewriter &rewriter, Location loc, ArrayRef stdTypes, - SmallVectorImpl &values) const { - assert(stdTypes.size() == values.size() && - "The number of types and values doesn't match"); - for (unsigned i = 0, end = values.size(); i < end; ++i) - if (auto memrefTy = dyn_cast(stdTypes[i])) - values[i] = MemRefDescriptor::fromStaticShape(rewriter, loc, *this, - memrefTy, values[i]); + return convertType(type, result); } /// Convert a non-empty list of types of values produced by an operation into an @@ -706,23 +699,35 @@ Type LLVMTypeConverter::packOperationResults(TypeRange types) const { /// LLVM-compatible type. In particular, if more than one value is returned, /// create an LLVM dialect structure type with elements that correspond to each /// of the types converted with `convertCallingConventionType`. -Type LLVMTypeConverter::packFunctionResults(TypeRange types, - bool useBarePtrCallConv) const { +Type LLVMTypeConverter::packFunctionResults( + TypeRange types, bool useBarePtrCallConv, + SmallVector> *groupedTypes, + int64_t *numConvertedTypes) const { assert(!types.empty() && "expected non-empty list of type"); + assert((!groupedTypes || groupedTypes->empty()) && + "expected groupedTypes to be empty"); useBarePtrCallConv |= options.useBarePtrCallConv; - if (types.size() == 1) - return convertCallingConventionType(types.front(), useBarePtrCallConv); - SmallVector resultTypes; resultTypes.reserve(types.size()); + size_t sizeBefore = 0; for (auto t : types) { - auto converted = convertCallingConventionType(t, useBarePtrCallConv); - if (!converted || !LLVM::isCompatibleType(converted)) + if (failed( + convertCallingConventionType(t, resultTypes, useBarePtrCallConv))) return {}; - resultTypes.push_back(converted); + if (groupedTypes) { + SmallVector &group = groupedTypes->emplace_back(); + llvm::append_range(group, ArrayRef(resultTypes).drop_front(sizeBefore)); + } + sizeBefore = resultTypes.size(); } + if (numConvertedTypes) + *numConvertedTypes = resultTypes.size(); + if (resultTypes.size() == 1) + return resultTypes.front(); + if (resultTypes.empty()) + return {}; return LLVM::LLVMStructType::getLiteral(&getContext(), resultTypes); } @@ -740,40 +745,50 @@ Value LLVMTypeConverter::promoteOneMemRefDescriptor(Location loc, Value operand, return allocated; } -SmallVector -LLVMTypeConverter::promoteOperands(Location loc, ValueRange opOperands, - ValueRange operands, OpBuilder &builder, - bool useBarePtrCallConv) const { - SmallVector promotedOperands; - promotedOperands.reserve(operands.size()); - useBarePtrCallConv |= options.useBarePtrCallConv; - for (auto it : llvm::zip(opOperands, operands)) { - auto operand = std::get<0>(it); - auto llvmOperand = std::get<1>(it); +SmallVector LLVMTypeConverter::promoteOperands( + Location loc, ValueRange opOperands, ValueRange adaptorOperands, + OpBuilder &builder, bool useBarePtrCallConv) const { + SmallVector ranges; + for (size_t i = 0, e = adaptorOperands.size(); i < e; i++) + ranges.push_back(adaptorOperands.slice(i, 1)); + return promoteOperands(loc, opOperands, ranges, builder, useBarePtrCallConv); +} +SmallVector LLVMTypeConverter::promoteOperands( + Location loc, ValueRange opOperands, ArrayRef adaptorOperands, + OpBuilder &builder, bool useBarePtrCallConv) const { + SmallVector promotedOperands; + promotedOperands.reserve(adaptorOperands.size()); + useBarePtrCallConv |= options.useBarePtrCallConv; + for (auto [operand, llvmOperand] : + llvm::zip_equal(opOperands, adaptorOperands)) { if (useBarePtrCallConv) { // For the bare-ptr calling convention, we only have to extract the // aligned pointer of a memref. if (isa(operand.getType())) { - MemRefDescriptor desc(llvmOperand); - llvmOperand = desc.alignedPtr(builder, loc); + assert(llvmOperand.size() == 1 && "Expected a single operand"); + MemRefDescriptor desc(llvmOperand.front()); + promotedOperands.push_back(desc.alignedPtr(builder, loc)); + continue; } else if (isa(operand.getType())) { llvm_unreachable("Unranked memrefs are not supported"); } } else { if (isa(operand.getType())) { - UnrankedMemRefDescriptor::unpack(builder, loc, llvmOperand, + assert(llvmOperand.size() == 1 && "Expected a single operand"); + UnrankedMemRefDescriptor::unpack(builder, loc, llvmOperand.front(), promotedOperands); continue; } if (auto memrefType = dyn_cast(operand.getType())) { - MemRefDescriptor::unpack(builder, loc, llvmOperand, memrefType, + assert(llvmOperand.size() == 1 && "Expected a single operand"); + MemRefDescriptor::unpack(builder, loc, llvmOperand.front(), memrefType, promotedOperands); continue; } } - promotedOperands.push_back(llvmOperand); + llvm::append_range(promotedOperands, llvmOperand); } return promotedOperands; } @@ -802,11 +817,7 @@ mlir::structFuncArgTypeConverter(const LLVMTypeConverter &converter, Type type, result.append(converted.begin(), converted.end()); return success(); } - auto converted = converter.convertType(type); - if (!converted) - return failure(); - result.push_back(converted); - return success(); + return converter.convertType(type, result); } /// Callback to convert function argument types. It converts MemRef function @@ -814,11 +825,7 @@ mlir::structFuncArgTypeConverter(const LLVMTypeConverter &converter, Type type, LogicalResult mlir::barePtrFuncArgTypeConverter(const LLVMTypeConverter &converter, Type type, SmallVectorImpl &result) { - auto llvmTy = converter.convertCallingConventionType( - type, /*useBarePointerCallConv=*/true); - if (!llvmTy) - return failure(); - - result.push_back(llvmTy); - return success(); + return converter.convertCallingConventionType( + type, result, + /*useBarePointerCallConv=*/true); } diff --git a/mlir/lib/Conversion/NVGPUToNVVM/NVGPUToNVVM.cpp b/mlir/lib/Conversion/NVGPUToNVVM/NVGPUToNVVM.cpp index f7f538179952..c6c5ab356f25 100644 --- a/mlir/lib/Conversion/NVGPUToNVVM/NVGPUToNVVM.cpp +++ b/mlir/lib/Conversion/NVGPUToNVVM/NVGPUToNVVM.cpp @@ -1106,12 +1106,10 @@ struct NVGPUGenerateWarpgroupDescriptorLowering // // [0,14) start_address dsc = insertBit(dsc, basePtr14bit, startBaseAddrBit); - LDBG() << "Generating warpgroup.descriptor: " - << "leading_off:" << leadDimVal << "\t" - << "stride_off :" << strideDimVal << "\t" - << "base_offset:" << offsetVal << "\t" - << "layout_type:" << swizzle << " (" - << nvgpu::stringifyTensorMapSwizzleKind(swizzleKind) + LDBG() << "Generating warpgroup.descriptor: " << "leading_off:" + << leadDimVal << "\t" << "stride_off :" << strideDimVal << "\t" + << "base_offset:" << offsetVal << "\t" << "layout_type:" << swizzle + << " (" << nvgpu::stringifyTensorMapSwizzleKind(swizzleKind) << ")\n start_addr : " << baseAddr; rewriter.replaceOp(op, dsc); @@ -1401,14 +1399,12 @@ struct NVGPUWarpgroupMmaOpLowering /// This function generates a WgmmaMmaAsyncOp using provided GMMA matrix /// descriptors and arranges them based on induction variables: i, j, and k. Value generateWgmma(int i, int j, int k, Value matrixC) { - LDBG() << "\t wgmma." - << "m" << wgmmaM << "n" << wgmmaN << "k" << wgmmaK << "(A[" - << (iterationM * wgmmaM) << ":" << (iterationM * wgmmaM) + wgmmaM - << "][" << (iterationK * wgmmaK) << ":" - << (iterationK * wgmmaK + wgmmaK) << "] * " - << " B[" << (iterationK * wgmmaK) << ":" - << (iterationK * wgmmaK + wgmmaK) << "][" << 0 << ":" << wgmmaN - << "])"; + LDBG() << "\t wgmma." << "m" << wgmmaM << "n" << wgmmaN << "k" << wgmmaK + << "(A[" << (iterationM * wgmmaM) << ":" + << (iterationM * wgmmaM) + wgmmaM << "][" << (iterationK * wgmmaK) + << ":" << (iterationK * wgmmaK + wgmmaK) << "] * " << " B[" + << (iterationK * wgmmaK) << ":" << (iterationK * wgmmaK + wgmmaK) + << "][" << 0 << ":" << wgmmaN << "])"; Value descriptorA = iterateDescriptorA(adaptor.getDescriptorA(), i, j, k); Value descriptorB = iterateDescriptorB(adaptor.getDescriptorB(), i, j, k); diff --git a/mlir/test/Conversion/MemRefToLLVM/type-conversion.mlir b/mlir/test/Conversion/MemRefToLLVM/type-conversion.mlir index 0288aa11313c..c1751f282b00 100644 --- a/mlir/test/Conversion/MemRefToLLVM/type-conversion.mlir +++ b/mlir/test/Conversion/MemRefToLLVM/type-conversion.mlir @@ -1,12 +1,13 @@ -// RUN: mlir-opt %s -test-llvm-legalize-patterns -split-input-file +// RUN: mlir-opt %s -test-llvm-legalize-patterns -split-input-file | FileCheck %s +// RUN: mlir-opt %s -test-llvm-legalize-patterns="allow-pattern-rollback=0" -split-input-file | FileCheck %s // Test the argument materializer for ranked MemRef types. // CHECK-LABEL: func @construct_ranked_memref_descriptor( -// CHECK: llvm.mlir.undef : !llvm.struct<(ptr, ptr, i64, array<2 x i64>, array<2 x i64>)> +// CHECK: llvm.mlir.poison : !llvm.struct<(ptr, ptr, i64, array<2 x i64>, array<2 x i64>)> // CHECK-COUNT-7: llvm.insertvalue // CHECK: builtin.unrealized_conversion_cast %{{.*}} : !llvm.struct<(ptr, ptr, i64, array<2 x i64>, array<2 x i64>)> to memref<5x4xf32> -func.func @construct_ranked_memref_descriptor(%arg0: !llvm.ptr, %arg1: !llvm.ptr, %arg2: i64, %arg3: i64, %arg4: i64, %arg5: i64, %arg6: i64) { +func.func @construct_ranked_memref_descriptor(%arg0: !llvm.ptr, %arg1: !llvm.ptr, %arg2: i64, %arg3: i64, %arg4: i64, %arg5: i64, %arg6: i64) attributes {is_legal} { %0 = "test.direct_replacement"(%arg0, %arg1, %arg2, %arg3, %arg4, %arg5, %arg6) : (!llvm.ptr, !llvm.ptr, i64, i64, i64, i64, i64) -> (memref<5x4xf32>) "test.legal_op"(%0) : (memref<5x4xf32>) -> () return @@ -21,7 +22,7 @@ func.func @construct_ranked_memref_descriptor(%arg0: !llvm.ptr, %arg1: !llvm.ptr // CHECK-LABEL: func @invalid_ranked_memref_descriptor( // CHECK: %[[cast:.*]] = builtin.unrealized_conversion_cast %{{.*}} : i1 to memref<5x4xf32> // CHECK: "test.legal_op"(%[[cast]]) -func.func @invalid_ranked_memref_descriptor(%arg0: i1) { +func.func @invalid_ranked_memref_descriptor(%arg0: i1) attributes {is_legal} { %0 = "test.direct_replacement"(%arg0) : (i1) -> (memref<5x4xf32>) "test.legal_op"(%0) : (memref<5x4xf32>) -> () return @@ -32,10 +33,10 @@ func.func @invalid_ranked_memref_descriptor(%arg0: i1) { // Test the argument materializer for unranked MemRef types. // CHECK-LABEL: func @construct_unranked_memref_descriptor( -// CHECK: llvm.mlir.undef : !llvm.struct<(i64, ptr)> +// CHECK: llvm.mlir.poison : !llvm.struct<(i64, ptr)> // CHECK-COUNT-2: llvm.insertvalue // CHECK: builtin.unrealized_conversion_cast %{{.*}} : !llvm.struct<(i64, ptr)> to memref<*xf32> -func.func @construct_unranked_memref_descriptor(%arg0: i64, %arg1: !llvm.ptr) { +func.func @construct_unranked_memref_descriptor(%arg0: i64, %arg1: !llvm.ptr) attributes {is_legal} { %0 = "test.direct_replacement"(%arg0, %arg1) : (i64, !llvm.ptr) -> (memref<*xf32>) "test.legal_op"(%0) : (memref<*xf32>) -> () return @@ -50,8 +51,90 @@ func.func @construct_unranked_memref_descriptor(%arg0: i64, %arg1: !llvm.ptr) { // CHECK-LABEL: func @invalid_unranked_memref_descriptor( // CHECK: %[[cast:.*]] = builtin.unrealized_conversion_cast %{{.*}} : i1 to memref<*xf32> // CHECK: "test.legal_op"(%[[cast]]) -func.func @invalid_unranked_memref_descriptor(%arg0: i1) { +func.func @invalid_unranked_memref_descriptor(%arg0: i1) attributes {is_legal} { %0 = "test.direct_replacement"(%arg0) : (i1) -> (memref<*xf32>) "test.legal_op"(%0) : (memref<*xf32>) -> () return } + +// ----- + +// CHECK-LABEL: llvm.func @simple_func_conversion( +// CHECK-SAME: %[[arg0:.*]]: i64) -> i64 +// CHECK: llvm.return %[[arg0]] : i64 +func.func @simple_func_conversion(%arg0: i64) -> i64 { + return %arg0 : i64 +} + +// ----- + +// CHECK-LABEL: llvm.func @one_to_n_argument_conversion( +// CHECK-SAME: %[[arg0:.*]]: i18, %[[arg1:.*]]: i18) +// CHECK: %[[cast:.*]] = builtin.unrealized_conversion_cast %[[arg0]], %[[arg1]] : i18, i18 to i17 +// CHECK: "test.legal_op"(%[[cast]]) : (i17) -> () +func.func @one_to_n_argument_conversion(%arg0: i17) { + "test.legal_op"(%arg0) : (i17) -> () + return +} + +// CHECK: llvm.func @caller(%[[arg0:.*]]: i18, %[[arg1:.*]]: i18) +// CHECK: llvm.call @one_to_n_argument_conversion(%[[arg0]], %[[arg1]]) : (i18, i18) -> () +func.func @caller(%arg0: i17) { + func.call @one_to_n_argument_conversion(%arg0) : (i17) -> () + return +} + +// ----- + +// CHECK-LABEL: llvm.func @one_to_n_return_conversion( +// CHECK-SAME: %[[arg0:.*]]: i18, %[[arg1:.*]]: i18) -> !llvm.struct<(i18, i18)> +// CHECK: %[[p1:.*]] = llvm.mlir.poison : !llvm.struct<(i18, i18)> +// CHECK: %[[p2:.*]] = llvm.insertvalue %[[arg0]], %[[p1]][0] : !llvm.struct<(i18, i18)> +// CHECK: %[[p3:.*]] = llvm.insertvalue %[[arg1]], %[[p2]][1] : !llvm.struct<(i18, i18)> +// CHECK: llvm.return %[[p3]] +func.func @one_to_n_return_conversion(%arg0: i17) -> i17 { + return %arg0 : i17 +} + +// CHECK: llvm.func @caller(%[[arg0:.*]]: i18, %[[arg1:.*]]: i18) +// CHECK: %[[res:.*]] = llvm.call @one_to_n_return_conversion(%[[arg0]], %[[arg1]]) : (i18, i18) -> !llvm.struct<(i18, i18)> +// CHECK: %[[e0:.*]] = llvm.extractvalue %[[res]][0] : !llvm.struct<(i18, i18)> +// CHECK: %[[e1:.*]] = llvm.extractvalue %[[res]][1] : !llvm.struct<(i18, i18)> +// CHECK: %[[i0:.*]] = llvm.mlir.poison : !llvm.struct<(i18, i18)> +// CHECK: %[[i1:.*]] = llvm.insertvalue %[[e0]], %[[i0]][0] : !llvm.struct<(i18, i18)> +// CHECK: %[[i2:.*]] = llvm.insertvalue %[[e1]], %[[i1]][1] : !llvm.struct<(i18, i18)> +// CHECK: llvm.return %[[i2]] +func.func @caller(%arg0: i17) -> (i17) { + %res = func.call @one_to_n_return_conversion(%arg0) : (i17) -> (i17) + return %res : i17 +} + +// ----- + +// CHECK-LABEL: llvm.func @multi_return( +// CHECK-SAME: %[[arg0:.*]]: i18, %[[arg1:.*]]: i18, %[[arg2:.*]]: i1) -> !llvm.struct<(i18, i18, i1)> +// CHECK: %[[p1:.*]] = llvm.mlir.poison : !llvm.struct<(i18, i18, i1)> +// CHECK: %[[p2:.*]] = llvm.insertvalue %[[arg0]], %[[p1]][0] : !llvm.struct<(i18, i18, i1)> +// CHECK: %[[p3:.*]] = llvm.insertvalue %[[arg1]], %[[p2]][1] : !llvm.struct<(i18, i18, i1)> +// CHECK: %[[p4:.*]] = llvm.insertvalue %[[arg2]], %[[p3]][2] : !llvm.struct<(i18, i18, i1)> +// CHECK: llvm.return %[[p4]] +func.func @multi_return(%arg0: i17, %arg1: i1) -> (i17, i1) { + return %arg0, %arg1 : i17, i1 +} + +// CHECK: llvm.func @caller(%[[arg0:.*]]: i1, %[[arg1:.*]]: i18, %[[arg2:.*]]: i18) +// CHECK: %[[res:.*]] = llvm.call @multi_return(%[[arg1]], %[[arg2]], %[[arg0]]) : (i18, i18, i1) -> !llvm.struct<(i18, i18, i1)> +// CHECK: %[[e0:.*]] = llvm.extractvalue %[[res]][0] : !llvm.struct<(i18, i18, i1)> +// CHECK: %[[e1:.*]] = llvm.extractvalue %[[res]][1] : !llvm.struct<(i18, i18, i1)> +// CHECK: %[[e2:.*]] = llvm.extractvalue %[[res]][2] : !llvm.struct<(i18, i18, i1)> +// CHECK: %[[i0:.*]] = llvm.mlir.poison : !llvm.struct<(i18, i18, i1, i18, i18)> +// CHECK: %[[i1:.*]] = llvm.insertvalue %[[e0]], %[[i0]][0] +// CHECK: %[[i2:.*]] = llvm.insertvalue %[[e1]], %[[i1]][1] +// CHECK: %[[i3:.*]] = llvm.insertvalue %[[e2]], %[[i2]][2] +// CHECK: %[[i4:.*]] = llvm.insertvalue %[[e0]], %[[i3]][3] +// CHECK: %[[i5:.*]] = llvm.insertvalue %[[e1]], %[[i4]][4] +// CHECK: llvm.return %[[i5]] +func.func @caller(%arg0: i1, %arg1: i17) -> (i17, i1, i17) { + %res:2 = func.call @multi_return(%arg1, %arg0) : (i17, i1) -> (i17, i1) + return %res#0, %res#1, %res#0 : i17, i1, i17 +} diff --git a/mlir/test/lib/Dialect/LLVM/TestPatterns.cpp b/mlir/test/lib/Dialect/LLVM/TestPatterns.cpp index ab02866970b1..fe9aa0f2a990 100644 --- a/mlir/test/lib/Dialect/LLVM/TestPatterns.cpp +++ b/mlir/test/lib/Dialect/LLVM/TestPatterns.cpp @@ -6,7 +6,9 @@ // //===----------------------------------------------------------------------===// +#include "mlir/Conversion/FuncToLLVM/ConvertFuncToLLVM.h" #include "mlir/Conversion/LLVMCommon/TypeConverter.h" +#include "mlir/Dialect/Func/IR/FuncOps.h" #include "mlir/Dialect/LLVMIR/LLVMDialect.h" #include "mlir/Dialect/LLVMIR/LLVMTypes.h" #include "mlir/Pass/Pass.h" @@ -34,6 +36,10 @@ struct TestLLVMLegalizePatternsPass : public PassWrapper> { MLIR_DEFINE_EXPLICIT_INTERNAL_INLINE_TYPE_ID(TestLLVMLegalizePatternsPass) + TestLLVMLegalizePatternsPass() = default; + TestLLVMLegalizePatternsPass(const TestLLVMLegalizePatternsPass &other) + : PassWrapper(other) {} + StringRef getArgument() const final { return "test-llvm-legalize-patterns"; } StringRef getDescription() const final { return "Run LLVM dialect legalization patterns"; @@ -45,22 +51,46 @@ struct TestLLVMLegalizePatternsPass void runOnOperation() override { MLIRContext *ctx = &getContext(); + + // Set up type converter. LLVMTypeConverter converter(ctx); + converter.addConversion( + [&](IntegerType type, SmallVectorImpl &result) { + if (type.isInteger(17)) { + // Convert i17 -> (i18, i18). + result.append(2, Builder(ctx).getIntegerType(18)); + return success(); + } + + result.push_back(type); + return success(); + }); + + // Populate patterns. mlir::RewritePatternSet patterns(ctx); patterns.add(ctx, converter); + populateFuncToLLVMConversionPatterns(converter, patterns); // Define the conversion target used for the test. ConversionTarget target(*ctx); target.addLegalOp(OperationName("test.legal_op", ctx)); + target.addLegalDialect(); + target.addDynamicallyLegalOp( + [&](func::FuncOp funcOp) { return funcOp->hasAttr("is_legal"); }); // Handle a partial conversion. DenseSet unlegalizedOps; ConversionConfig config; config.unlegalizedOps = &unlegalizedOps; + config.allowPatternRollback = allowPatternRollback; if (failed(applyPartialConversion(getOperation(), target, std::move(patterns), config))) getOperation()->emitError() << "applyPartialConversion failed"; } + + Option allowPatternRollback{*this, "allow-pattern-rollback", + llvm::cl::desc("Allow pattern rollback"), + llvm::cl::init(true)}; }; } // namespace From f8f23e838a3b31830e7b918316d9e0782b05061f Mon Sep 17 00:00:00 2001 From: Matthias Springer Date: Sat, 16 Aug 2025 12:51:40 +0200 Subject: [PATCH 057/214] [mlir][LLVM] `ControlFlowToLLVM`: Add 1:N type conversion support (#153937) Add support for 1:N type conversions to the `ControlFlowToLLVM` lowering patterns. Not applicable to `cf.switch` and `cf.assert`. --------- Co-authored-by: Tobias Gysi --- .../ControlFlowToLLVM/ControlFlowToLLVM.cpp | 41 ++++++++++++++----- .../MemRefToLLVM/type-conversion.mlir | 17 ++++++++ mlir/test/lib/Dialect/LLVM/TestPatterns.cpp | 2 + 3 files changed, 49 insertions(+), 11 deletions(-) diff --git a/mlir/lib/Conversion/ControlFlowToLLVM/ControlFlowToLLVM.cpp b/mlir/lib/Conversion/ControlFlowToLLVM/ControlFlowToLLVM.cpp index ff6d36917639..e1bbeb996d73 100644 --- a/mlir/lib/Conversion/ControlFlowToLLVM/ControlFlowToLLVM.cpp +++ b/mlir/lib/Conversion/ControlFlowToLLVM/ControlFlowToLLVM.cpp @@ -125,22 +125,33 @@ static FailureOr getConvertedBlock(ConversionPatternRewriter &rewriter, return rewriter.applySignatureConversion(block, *conversion, converter); } +/// Flatten the given value ranges into a single vector of values. +static SmallVector flattenValues(ArrayRef values) { + SmallVector result; + for (const ValueRange &vals : values) + llvm::append_range(result, vals); + return result; +} + /// Convert the destination block signature (if necessary) and lower the branch /// op to llvm.br. struct BranchOpLowering : public ConvertOpToLLVMPattern { using ConvertOpToLLVMPattern::ConvertOpToLLVMPattern; + using Adaptor = + typename ConvertOpToLLVMPattern::OneToNOpAdaptor; LogicalResult - matchAndRewrite(cf::BranchOp op, typename cf::BranchOp::Adaptor adaptor, + matchAndRewrite(cf::BranchOp op, Adaptor adaptor, ConversionPatternRewriter &rewriter) const override { + SmallVector flattenedAdaptor = flattenValues(adaptor.getOperands()); FailureOr convertedBlock = getConvertedBlock(rewriter, getTypeConverter(), op, op.getSuccessor(), - TypeRange(adaptor.getOperands())); + TypeRange(flattenedAdaptor)); if (failed(convertedBlock)) return failure(); DictionaryAttr attrs = op->getAttrDictionary(); Operation *newOp = rewriter.replaceOpWithNewOp( - op, adaptor.getOperands(), *convertedBlock); + op, flattenedAdaptor, *convertedBlock); // TODO: We should not just forward all attributes like that. But there are // existing Flang tests that depend on this behavior. newOp->setAttrs(attrs); @@ -152,29 +163,37 @@ struct BranchOpLowering : public ConvertOpToLLVMPattern { /// branch op to llvm.cond_br. struct CondBranchOpLowering : public ConvertOpToLLVMPattern { using ConvertOpToLLVMPattern::ConvertOpToLLVMPattern; + using Adaptor = + typename ConvertOpToLLVMPattern::OneToNOpAdaptor; LogicalResult - matchAndRewrite(cf::CondBranchOp op, - typename cf::CondBranchOp::Adaptor adaptor, + matchAndRewrite(cf::CondBranchOp op, Adaptor adaptor, ConversionPatternRewriter &rewriter) const override { + SmallVector flattenedAdaptorTrue = + flattenValues(adaptor.getTrueDestOperands()); + SmallVector flattenedAdaptorFalse = + flattenValues(adaptor.getFalseDestOperands()); + if (!llvm::hasSingleElement(adaptor.getCondition())) + return rewriter.notifyMatchFailure(op, + "expected single element condition"); FailureOr convertedTrueBlock = getConvertedBlock(rewriter, getTypeConverter(), op, op.getTrueDest(), - TypeRange(adaptor.getTrueDestOperands())); + TypeRange(flattenedAdaptorTrue)); if (failed(convertedTrueBlock)) return failure(); FailureOr convertedFalseBlock = getConvertedBlock(rewriter, getTypeConverter(), op, op.getFalseDest(), - TypeRange(adaptor.getFalseDestOperands())); + TypeRange(flattenedAdaptorFalse)); if (failed(convertedFalseBlock)) return failure(); - DictionaryAttr attrs = op->getAttrDictionary(); + DictionaryAttr attrs = op->getDiscardableAttrDictionary(); auto newOp = rewriter.replaceOpWithNewOp( - op, adaptor.getCondition(), adaptor.getTrueDestOperands(), - adaptor.getFalseDestOperands(), op.getBranchWeightsAttr(), + op, llvm::getSingleElement(adaptor.getCondition()), + flattenedAdaptorTrue, flattenedAdaptorFalse, op.getBranchWeightsAttr(), *convertedTrueBlock, *convertedFalseBlock); // TODO: We should not just forward all attributes like that. But there are // existing Flang tests that depend on this behavior. - newOp->setAttrs(attrs); + newOp->setDiscardableAttrs(attrs); return success(); } }; diff --git a/mlir/test/Conversion/MemRefToLLVM/type-conversion.mlir b/mlir/test/Conversion/MemRefToLLVM/type-conversion.mlir index c1751f282b00..6c6756f5097b 100644 --- a/mlir/test/Conversion/MemRefToLLVM/type-conversion.mlir +++ b/mlir/test/Conversion/MemRefToLLVM/type-conversion.mlir @@ -138,3 +138,20 @@ func.func @caller(%arg0: i1, %arg1: i17) -> (i17, i1, i17) { %res:2 = func.call @multi_return(%arg1, %arg0) : (i17, i1) -> (i17, i1) return %res#0, %res#1, %res#0 : i17, i1, i17 } + +// ----- + +// CHECK-LABEL: llvm.func @branch( +// CHECK-SAME: %[[arg0:.*]]: i1, %[[arg1:.*]]: i18, %[[arg2:.*]]: i18) +// CHECK: llvm.br ^[[bb1:.*]](%[[arg1]], %[[arg2]], %[[arg0]] : i18, i18, i1) +// CHECK: ^[[bb1]](%[[arg3:.*]]: i18, %[[arg4:.*]]: i18, %[[arg5:.*]]: i1): +// CHECK: llvm.cond_br %[[arg5]], ^[[bb1]](%[[arg1]], %[[arg2]], %[[arg5]] : i18, i18, i1), ^[[bb2:.*]](%[[arg3]], %[[arg4]] : i18, i18) +// CHECK: ^bb2(%{{.*}}: i18, %{{.*}}: i18): +// CHECK: llvm.return +func.func @branch(%arg0: i1, %arg1: i17) { + cf.br ^bb1(%arg1, %arg0: i17, i1) +^bb1(%arg2: i17, %arg3: i1): + cf.cond_br %arg3, ^bb1(%arg1, %arg3 : i17, i1), ^bb2(%arg2 : i17) +^bb2(%arg4: i17): + return +} diff --git a/mlir/test/lib/Dialect/LLVM/TestPatterns.cpp b/mlir/test/lib/Dialect/LLVM/TestPatterns.cpp index fe9aa0f2a990..9d30ae43cccc 100644 --- a/mlir/test/lib/Dialect/LLVM/TestPatterns.cpp +++ b/mlir/test/lib/Dialect/LLVM/TestPatterns.cpp @@ -6,6 +6,7 @@ // //===----------------------------------------------------------------------===// +#include "mlir/Conversion/ControlFlowToLLVM/ControlFlowToLLVM.h" #include "mlir/Conversion/FuncToLLVM/ConvertFuncToLLVM.h" #include "mlir/Conversion/LLVMCommon/TypeConverter.h" #include "mlir/Dialect/Func/IR/FuncOps.h" @@ -70,6 +71,7 @@ struct TestLLVMLegalizePatternsPass mlir::RewritePatternSet patterns(ctx); patterns.add(ctx, converter); populateFuncToLLVMConversionPatterns(converter, patterns); + cf::populateControlFlowToLLVMConversionPatterns(converter, patterns); // Define the conversion target used for the test. ConversionTarget target(*ctx); From 2692ff8213b40d5237a4b7db22f5993bb8a1cfbe Mon Sep 17 00:00:00 2001 From: Matthias Springer Date: Sat, 16 Aug 2025 13:06:58 +0200 Subject: [PATCH 058/214] [mlir][LLVM] Fix build (#153947) Fix build after #153937. --- mlir/lib/Conversion/ControlFlowToLLVM/ControlFlowToLLVM.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/mlir/lib/Conversion/ControlFlowToLLVM/ControlFlowToLLVM.cpp b/mlir/lib/Conversion/ControlFlowToLLVM/ControlFlowToLLVM.cpp index e1bbeb996d73..798d8b04eed7 100644 --- a/mlir/lib/Conversion/ControlFlowToLLVM/ControlFlowToLLVM.cpp +++ b/mlir/lib/Conversion/ControlFlowToLLVM/ControlFlowToLLVM.cpp @@ -146,7 +146,7 @@ struct BranchOpLowering : public ConvertOpToLLVMPattern { SmallVector flattenedAdaptor = flattenValues(adaptor.getOperands()); FailureOr convertedBlock = getConvertedBlock(rewriter, getTypeConverter(), op, op.getSuccessor(), - TypeRange(flattenedAdaptor)); + TypeRange(ValueRange(flattenedAdaptor))); if (failed(convertedBlock)) return failure(); DictionaryAttr attrs = op->getAttrDictionary(); @@ -178,12 +178,12 @@ struct CondBranchOpLowering : public ConvertOpToLLVMPattern { "expected single element condition"); FailureOr convertedTrueBlock = getConvertedBlock(rewriter, getTypeConverter(), op, op.getTrueDest(), - TypeRange(flattenedAdaptorTrue)); + TypeRange(ValueRange(flattenedAdaptorTrue))); if (failed(convertedTrueBlock)) return failure(); FailureOr convertedFalseBlock = getConvertedBlock(rewriter, getTypeConverter(), op, op.getFalseDest(), - TypeRange(flattenedAdaptorFalse)); + TypeRange(ValueRange(flattenedAdaptorFalse))); if (failed(convertedFalseBlock)) return failure(); DictionaryAttr attrs = op->getDiscardableAttrDictionary(); From a293573c4e3e43f9f6279f075c3262ea5dc17086 Mon Sep 17 00:00:00 2001 From: Mingjie Xu Date: Sat, 16 Aug 2025 19:59:10 +0800 Subject: [PATCH 059/214] [SSAUpdater] Only iterate blocks modified by CheckIfPHIMatches() in RecordMatchingPHIs() (#153596) In https://github.com/llvm/llvm-project/pull/100281, we use `TaggedBlocks` to record blocks modified by `CheckIfPHIMatche()`, so do not need to clear every block in `BlockList` if `CheckIfPHIMatches()` match failed. If `CheckIfPHIMatches()` match succeed, we can reuse `TaggedBlocks` to only record matching PHIs for modified blocks, avoid checking every block in `BlockList` to see if `PHITag` is set. --- .../llvm/Transforms/Utils/SSAUpdaterImpl.h | 26 +++++++++---------- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/llvm/include/llvm/Transforms/Utils/SSAUpdaterImpl.h b/llvm/include/llvm/Transforms/Utils/SSAUpdaterImpl.h index 746926e5bee3..52fe3a6f4baf 100644 --- a/llvm/include/llvm/Transforms/Utils/SSAUpdaterImpl.h +++ b/llvm/include/llvm/Transforms/Utils/SSAUpdaterImpl.h @@ -366,7 +366,7 @@ public: continue; // Look for an existing PHI. - FindExistingPHI(Info->BB, BlockList); + FindExistingPHI(Info->BB); if (Info->AvailableVal) continue; @@ -412,11 +412,11 @@ public: /// FindExistingPHI - Look through the PHI nodes in a block to see if any of /// them match what is needed. - void FindExistingPHI(BlkT *BB, BlockListTy *BlockList) { + void FindExistingPHI(BlkT *BB) { SmallVector TaggedBlocks; for (auto &SomePHI : BB->phis()) { if (CheckIfPHIMatches(&SomePHI, TaggedBlocks)) { - RecordMatchingPHIs(BlockList); + RecordMatchingPHIs(TaggedBlocks); break; } } @@ -424,7 +424,7 @@ public: /// CheckIfPHIMatches - Check if a PHI node matches the placement and values /// in the BBMap. - bool CheckIfPHIMatches(PhiT *PHI, SmallVectorImpl &TaggedBlocks) { + bool CheckIfPHIMatches(PhiT *PHI, BlockListTy &TaggedBlocks) { // Match failed: clear all the PHITag values. Only need to clear visited // blocks. auto Cleanup = make_scope_exit([&]() { @@ -484,15 +484,15 @@ public: /// RecordMatchingPHIs - For each PHI node that matches, record it in both /// the BBMap and the AvailableVals mapping. - void RecordMatchingPHIs(BlockListTy *BlockList) { - for (typename BlockListTy::iterator I = BlockList->begin(), - E = BlockList->end(); I != E; ++I) - if (PhiT *PHI = (*I)->PHITag) { - BlkT *BB = PHI->getParent(); - ValT PHIVal = Traits::GetPHIValue(PHI); - (*AvailableVals)[BB] = PHIVal; - BBMap[BB]->AvailableVal = PHIVal; - } + void RecordMatchingPHIs(BlockListTy &TaggedBlocks) { + for (BBInfo *Block : TaggedBlocks) { + PhiT *PHI = Block->PHITag; + assert(PHI && "PHITag didn't set?"); + BlkT *BB = PHI->getParent(); + ValT PHIVal = Traits::GetPHIValue(PHI); + (*AvailableVals)[BB] = PHIVal; + BBMap[BB]->AvailableVal = PHIVal; + } } }; From 6fc1deb8b7497c36c946fa468f53d797ad54f24c Mon Sep 17 00:00:00 2001 From: Maksim Levental Date: Sat, 16 Aug 2025 09:25:15 -0400 Subject: [PATCH 060/214] [mlir][python] handle more undefined symbols not covered by nanobind (#153861) Introduced (but omitted from this CMake) in https://github.com/llvm/llvm-project/pull/151246. --- mlir/cmake/modules/AddMLIRPython.cmake | 7 ++++++- mlir/lib/Bindings/Python/IRCore.cpp | 4 ++-- 2 files changed, 8 insertions(+), 3 deletions(-) diff --git a/mlir/cmake/modules/AddMLIRPython.cmake b/mlir/cmake/modules/AddMLIRPython.cmake index c14e614ed7d9..2b883558d33c 100644 --- a/mlir/cmake/modules/AddMLIRPython.cmake +++ b/mlir/cmake/modules/AddMLIRPython.cmake @@ -704,7 +704,12 @@ function(add_mlir_python_extension libname extname) # NanobindAdaptors.h uses PyClassMethod_New to build `pure_subclass`es but nanobind # doesn't declare this API as undefined in its linker flags. So we need to declare it as such # for downstream users that do not do something like `-undefined dynamic_lookup`. - target_link_options(${libname} PUBLIC "LINKER:-U,_PyClassMethod_New") + # Same for the rest. + target_link_options(${libname} PUBLIC + "LINKER:-U,_PyClassMethod_New" + "LINKER:-U,_PyCode_Addr2Location" + "LINKER:-U,_PyFrame_GetLasti" + ) endif() endif() diff --git a/mlir/lib/Bindings/Python/IRCore.cpp b/mlir/lib/Bindings/Python/IRCore.cpp index 390cdc5429be..4b3a06cbce85 100644 --- a/mlir/lib/Bindings/Python/IRCore.cpp +++ b/mlir/lib/Bindings/Python/IRCore.cpp @@ -2810,7 +2810,7 @@ private: // bpo-42262 added Py_XNewRef() #if !defined(Py_XNewRef) -PyObject *_Py_XNewRef(PyObject *obj) { +[[maybe_unused]] PyObject *_Py_XNewRef(PyObject *obj) { Py_XINCREF(obj); return obj; } @@ -2819,7 +2819,7 @@ PyObject *_Py_XNewRef(PyObject *obj) { // bpo-42262 added Py_NewRef() #if !defined(Py_NewRef) -PyObject *_Py_NewRef(PyObject *obj) { +[[maybe_unused]] PyObject *_Py_NewRef(PyObject *obj) { Py_INCREF(obj); return obj; } From 0ede7ace0df65d7436b3ed74694ad28a9edb1735 Mon Sep 17 00:00:00 2001 From: Kazu Hirata Date: Sat, 16 Aug 2025 06:47:18 -0700 Subject: [PATCH 061/214] [ADT] Use llvm::copy in SmallPtrSet.cpp (NFC) (#153930) This patch uses llvm::copy in combination with buckets() and small_buckets(). --- llvm/include/llvm/ADT/SmallPtrSet.h | 4 ++++ llvm/lib/Support/SmallPtrSet.cpp | 10 +++++----- 2 files changed, 9 insertions(+), 5 deletions(-) diff --git a/llvm/include/llvm/ADT/SmallPtrSet.h b/llvm/include/llvm/ADT/SmallPtrSet.h index 2829cbf41b66..0d7fe308a32f 100644 --- a/llvm/include/llvm/ADT/SmallPtrSet.h +++ b/llvm/include/llvm/ADT/SmallPtrSet.h @@ -160,6 +160,10 @@ protected: return make_range(CurArray, EndPointer()); } + iterator_range buckets() const { + return make_range(CurArray, EndPointer()); + } + /// insert_imp - This returns true if the pointer was new to the set, false if /// it was already in the set. This is hidden from the client so that the /// derived class can check that the right type of pointer is passed in. diff --git a/llvm/lib/Support/SmallPtrSet.cpp b/llvm/lib/Support/SmallPtrSet.cpp index 46cf53f1ecf8..39fe1715d19b 100644 --- a/llvm/lib/Support/SmallPtrSet.cpp +++ b/llvm/lib/Support/SmallPtrSet.cpp @@ -13,6 +13,7 @@ #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/DenseMapInfo.h" +#include "llvm/ADT/STLExtras.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/MemAlloc.h" #include @@ -190,7 +191,7 @@ void SmallPtrSetImplBase::copyHelper(const SmallPtrSetImplBase &RHS) { CurArraySize = RHS.CurArraySize; // Copy over the contents from the other set - std::copy(RHS.CurArray, RHS.EndPointer(), CurArray); + llvm::copy(RHS.buckets(), CurArray); NumEntries = RHS.NumEntries; NumTombstones = RHS.NumTombstones; @@ -214,7 +215,7 @@ void SmallPtrSetImplBase::moveHelper(const void **SmallStorage, if (RHS.isSmall()) { // Copy a small RHS rather than moving. CurArray = SmallStorage; - std::copy(RHS.CurArray, RHS.CurArray + RHS.NumEntries, CurArray); + llvm::copy(RHS.small_buckets(), CurArray); } else { CurArray = RHS.CurArray; RHS.CurArray = RHSSmallStorage; @@ -252,7 +253,7 @@ void SmallPtrSetImplBase::swap(const void **SmallStorage, // If only RHS is small, copy the small elements into LHS and move the pointer // from LHS to RHS. if (!this->isSmall() && RHS.isSmall()) { - std::copy(RHS.CurArray, RHS.CurArray + RHS.NumEntries, SmallStorage); + llvm::copy(RHS.small_buckets(), SmallStorage); std::swap(RHS.CurArraySize, this->CurArraySize); std::swap(this->NumEntries, RHS.NumEntries); std::swap(this->NumTombstones, RHS.NumTombstones); @@ -266,8 +267,7 @@ void SmallPtrSetImplBase::swap(const void **SmallStorage, // If only LHS is small, copy the small elements into RHS and move the pointer // from RHS to LHS. if (this->isSmall() && !RHS.isSmall()) { - std::copy(this->CurArray, this->CurArray + this->NumEntries, - RHSSmallStorage); + llvm::copy(this->small_buckets(), RHSSmallStorage); std::swap(RHS.CurArraySize, this->CurArraySize); std::swap(RHS.NumEntries, this->NumEntries); std::swap(RHS.NumTombstones, this->NumTombstones); From 84f4465135313f649da705e97ff4b6a8029b103e Mon Sep 17 00:00:00 2001 From: Kazu Hirata Date: Sat, 16 Aug 2025 06:47:25 -0700 Subject: [PATCH 062/214] [ExecutionEngine] Remove unnecessary casts (NFC) (#153931) getLoadAddressWithOffset() and getLoadAddress() already return uint64_t. --- llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp b/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp index 08d6c78bd1eb..d6268037dea8 100644 --- a/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp +++ b/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp @@ -654,11 +654,10 @@ bool RuntimeDyldELF::resolveLoongArch64ShortBranch( if (Loc == GlobalSymbolTable.end()) return false; const auto &SymInfo = Loc->second; - Address = - uint64_t(Sections[SymInfo.getSectionID()].getLoadAddressWithOffset( - SymInfo.getOffset())); + Address = Sections[SymInfo.getSectionID()].getLoadAddressWithOffset( + SymInfo.getOffset()); } else { - Address = uint64_t(Sections[Value.SectionID].getLoadAddress()); + Address = Sections[Value.SectionID].getLoadAddress(); } uint64_t Offset = RelI->getOffset(); uint64_t SourceAddress = Sections[SectionID].getLoadAddressWithOffset(Offset); From fc6f235101e42e3b5cd6dec6d3fd55ed5943bb7c Mon Sep 17 00:00:00 2001 From: Kazu Hirata Date: Sat, 16 Aug 2025 06:47:33 -0700 Subject: [PATCH 063/214] [llvm] Proofread LangRef.rst (#153932) --- llvm/docs/LangRef.rst | 102 +++++++++++++++++++++--------------------- 1 file changed, 51 insertions(+), 51 deletions(-) diff --git a/llvm/docs/LangRef.rst b/llvm/docs/LangRef.rst index 1aebcc443996..a71eefd1eb68 100644 --- a/llvm/docs/LangRef.rst +++ b/llvm/docs/LangRef.rst @@ -413,7 +413,7 @@ added in the future: - On AArch64 the callee preserves all general purpose registers, except X0-X8 and X16-X18. Not allowed with ``nest``. - - On RISC-V the callee preserve x5-x31 except x6, x7 and x28 registers. + - On RISC-V the callee preserves x5-x31 except x6, x7 and x28 registers. The idea behind this convention is to support calls to runtime functions that have a hot path and a cold path. The hot path is usually a small piece @@ -575,7 +575,7 @@ DLL storage classes: and the function or variable name. On XCOFF targets, ``dllexport`` indicates that the symbol will be made visible to other modules using "exported" visibility and thus placed by the linker in the loader section symbol table. - Since this storage class exists for defining a dll interface, the compiler, + Since this storage class exists for defining a DLL interface, the compiler, assembler and linker know it is externally referenced and must refrain from deleting the symbol. @@ -1887,7 +1887,7 @@ Attribute Groups Attribute groups are groups of attributes that are referenced by objects within the IR. They are important for keeping ``.ll`` files readable, because a lot of -functions will use the same set of attributes. In the degenerative case of a +functions will use the same set of attributes. In the degenerate case of a ``.ll`` file that corresponds to a single ``.c`` file, the single attribute group will capture the important command line flags used to build that file. @@ -1946,8 +1946,8 @@ For example: ``::operator::delete``. Matching malloc/realloc/free calls within a family can be optimized, but mismatched ones will be left alone. ``allockind("KIND")`` - Describes the behavior of an allocation function. The KIND string contains comma - separated entries from the following options: + Describes the behavior of an allocation function. The KIND string contains + comma-separated entries from the following options: * "alloc": the function returns a new block of memory or null. * "realloc": the function returns a new block of memory or null. If the @@ -2047,7 +2047,7 @@ For example: even if this attribute says the frame pointer can be eliminated. The allowed string values are: - * ``"none"`` (default) - the frame pointer can be eliminated, and it's + * ``"none"`` (default) - the frame pointer can be eliminated, and its register can be used for other purposes. * ``"reserved"`` - the frame pointer register must either be updated to point to a valid frame record for the current function, or not be @@ -2201,7 +2201,7 @@ For example: A ``nofree`` function is explicitly allowed to free memory which it allocated or (if not ``nosync``) arrange for another thread to free - memory on it's behalf. As a result, perhaps surprisingly, a ``nofree`` + memory on its behalf. As a result, perhaps surprisingly, a ``nofree`` function can return a pointer to a previously deallocated :ref:`allocated object`. ``noimplicitfloat`` @@ -2232,14 +2232,14 @@ For example: may make calls to the function faster, at the cost of extra program startup time if the function is not called during program startup. ``noprofile`` - This function attribute prevents instrumentation based profiling, used for + This function attribute prevents instrumentation-based profiling, used for coverage or profile based optimization, from being added to a function. It also blocks inlining if the caller and callee have different values of this attribute. ``skipprofile`` - This function attribute prevents instrumentation based profiling, used for + This function attribute prevents instrumentation-based profiling, used for coverage or profile based optimization, from being added to a function. This - attribute does not restrict inlining, so instrumented instruction could end + attribute does not restrict inlining, so instrumented instructions could end up in this function. ``noredzone`` This attribute indicates that the code generator should not use a @@ -2339,7 +2339,7 @@ For example: * ``"prologue-short-redirect"`` - This style of patchable function is intended to support patching a function prologue to - redirect control away from the function in a thread safe + redirect control away from the function in a thread-safe manner. It guarantees that the first instruction of the function will be large enough to accommodate a short jump instruction, and will be sufficiently aligned to allow being @@ -2584,7 +2584,7 @@ For example: ``uwtable[(sync|async)]`` This attribute indicates that the ABI being targeted requires that an unwind table entry be produced for this function even if we can - show that no exceptions passes by it. This is normally the case for + show that no exceptions pass by it. This is normally the case for the ELF x86-64 abi, but it can be disabled for some compilation units. The optional parameter describes what kind of unwind tables to generate: ``sync`` for normal unwind tables, ``async`` for asynchronous @@ -2599,7 +2599,7 @@ For example: ``shadowcallstack`` This attribute indicates that the ShadowCallStack checks are enabled for the function. The instrumentation checks that the return address for the - function has not changed between the function prolog and epilog. It is + function has not changed between the function prologue and epilogue. It is currently x86_64-specific. .. _langref_mustprogress: @@ -2807,7 +2807,7 @@ operand bundle tag. These operand bundles represent an alternate "safe" continuation for the call site they're attached to, and can be used by a suitable runtime to deoptimize the compiled frame at the specified call site. There can be at most one ``"deopt"`` operand -bundle attached to a call site. Exact details of deoptimization is +bundle attached to a call site. Exact details of deoptimization are out of scope for the language reference, but it usually involves rewriting a compiled frame into a set of interpreted frames. @@ -2896,7 +2896,7 @@ generated code. For more details, see :ref:`GC Transitions The bundle contains an arbitrary list of Values which need to be passed to GC transition code. They will be lowered and passed as operands to -the appropriate GC_TRANSITION nodes in the selection DAG. It is assumed +the appropriate ``GC_TRANSITION`` nodes in the selection DAG. It is assumed that these arguments must be available before and after (but not necessarily during) the execution of the callee. @@ -3334,7 +3334,7 @@ by the minus sign character ('-'). The canonical forms are: This information is passed along to the backend so that it generates code for the proper architecture. It's possible to override this on the -command line with the ``-mtriple`` command line option. +command line with the ``-mtriple`` command-line option. .. _allocatedobjects: @@ -4289,7 +4289,7 @@ X86_amx Type :Overview: The x86_amx type represents a value held in an AMX tile register on an x86 -machine. The operations allowed on it are quite limited. Only few intrinsics +machine. The operations allowed on it are quite limited. Only a few intrinsics are allowed: stride load and store, zero and dot product. No instruction is allowed for this type. There are no arguments, arrays, pointers, vectors or constants of this type. @@ -5058,14 +5058,14 @@ Addresses of Basic Blocks The '``blockaddress``' constant computes the address of the specified basic block in the specified function. -It always has an ``ptr addrspace(P)`` type, where ``P`` is the address space +It always has a ``ptr addrspace(P)`` type, where ``P`` is the address space of the function containing ``%block`` (usually ``addrspace(0)``). Taking the address of the entry block is illegal. This value only has defined behavior when used as an operand to the ':ref:`indirectbr `' or for comparisons against null. Pointer -equality tests between labels addresses results in undefined behavior --- +equality tests between label addresses results in undefined behavior --- though, again, comparison against null is ok, and no label is equal to the null pointer. This may be passed around as an opaque pointer sized value as long as the bits are not inspected. This allows ``ptrtoint`` and arithmetic to be @@ -5098,7 +5098,7 @@ The target function may not have ``extern_weak`` linkage. to the function. - ``dso_local_equivalent`` can be implemented with a stub that tail-calls the function. Many targets support relocations that resolve at link time to either - a function or a stub for it, depending on if the function is defined within the + a function or a stub for it, depending on whether the function is defined within the linkage unit; LLVM will use this when available. (This is commonly called a "PLT stub".) On other targets, the stub may need to be emitted explicitly. @@ -5320,7 +5320,7 @@ the '``unwind``' keyword, the behavior is undefined. If multiple keywords appear, the '``sideeffect``' keyword must come first, the '``alignstack``' keyword second, the '``inteldialect``' keyword -third and the '``unwind``' keyword last. +third, and the '``unwind``' keyword last. Inline Asm Constraint String ^^^^^^^^^^^^^^^^^^^^^^^^^^^^ @@ -5483,7 +5483,7 @@ followed by two letters (e.g. "``^wc``"), or "``{``" register-name "``}``" The one and two letter constraint codes are typically chosen to be the same as GCC's constraint codes. -A single constraint may include one or more than constraint code in it, leaving +A single constraint may include one or more constraint codes in it, leaving it up to LLVM to choose which one to use. This is included mainly for compatibility with the translation of GCC inline asm coming from clang. @@ -6028,7 +6028,7 @@ Inline Asm Metadata The call instructions that wrap inline asm nodes may have a "``!srcloc``" MDNode attached to it that contains a list of constant integers. If present, the code generator will use the integer as the -location cookie value when report errors through the ``LLVMContext`` +location cookie value when reporting errors through the ``LLVMContext`` error reporting mechanisms. This allows a front-end to correlate backend errors that occur with inline asm back to the source code that produced it. For example: @@ -6209,7 +6209,7 @@ Unlike instructions, global objects (functions and global variables) may have multiple metadata attachments with the same identifier. A transformation is required to drop any metadata attachment that it -does not know or know it can't preserve. Currently there is an +does not recognize or cannot preserve. Currently there is an exception for metadata attachment to globals for ``!func_sanitize``, ``!type``, ``!absolute_symbol`` and ``!associated`` which can't be unconditionally dropped unless the global is itself deleted. @@ -6460,7 +6460,7 @@ pointer and pointee is called association. The optional array is currently associated. The optional ``allocated`` is a ``DIExpression`` that describes whether the allocatable array is currently allocated. The optional ``rank`` is a ``DIExpression`` that describes the -rank (number of dimensions) of fortran assumed rank array (rank is +rank (number of dimensions) of Fortran assumed rank array (rank is known at runtime). The optional ``bitStride`` is an unsigned constant that describes the number of bits occupied by an element of the array; this is only needed if it differs from the element type's natural @@ -6776,7 +6776,7 @@ The current supported opcode vocabulary is limited: - ``DW_OP_plus_uconst, 93`` adds ``93`` to the working expression. - ``DW_OP_LLVM_fragment, 16, 8`` specifies the offset and size (``16`` and ``8`` here, respectively) of the variable fragment from the working expression. Note - that contrary to DW_OP_bit_piece, the offset is describing the location + that contrary to ``DW_OP_bit_piece``, the offset is describing the location within the described source variable. - ``DW_OP_LLVM_convert, 16, DW_ATE_signed`` specifies a bit size and encoding (``16`` and ``DW_ATE_signed`` here, respectively) to which the top of the @@ -6844,9 +6844,9 @@ The current supported opcode vocabulary is limited: expression over two registers. - ``DW_OP_push_object_address`` pushes the address of the object which can then serve as a descriptor in subsequent calculation. This opcode can be used to - calculate bounds of fortran allocatable array which has array descriptors. + calculate bounds of an Fortran allocatable array which has array descriptors. - ``DW_OP_over`` duplicates the entry currently second in the stack at the top - of the stack. This opcode can be used to calculate bounds of fortran assumed + of the stack. This opcode can be used to calculate bounds of a Fortran assumed rank array which has rank known at run time and current dimension number is implicitly first element of the stack. - ``DW_OP_LLVM_implicit_pointer`` It specifies the dereferenced value. It can @@ -7447,7 +7447,7 @@ For example, in the code below, the call instruction may only target the ``callback`` metadata may be attached to a function declaration, or definition. (Call sites are excluded only due to the lack of a use case.) For ease of -exposition, we'll refer to the function annotated w/ metadata as a broker +exposition, we'll refer to the function annotated with metadata as a broker function. The metadata describes how the arguments of a call to the broker are in turn passed to the callback function specified by the metadata. Thus, the ``callback`` metadata provides a partial description of a call site inside the @@ -7616,7 +7616,7 @@ loop is transformed to a different loop before an explicitly requested other transformations impossible. Mandatory loop canonicalizations such as loop rotation are still applied. -It is recommended to use this metadata in addition to any llvm.loop.* +It is recommended to use this metadata in addition to any ``llvm.loop.*`` transformation directive. Also, any loop should have at most one directive applied to it (and a sequence of transformations built using followup-attributes). Otherwise, which transformation will be applied @@ -7962,7 +7962,7 @@ the non-distributed fallback version will have. See '``llvm.loop.distribute.followup_all``' Metadata ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ -The attributes in this metadata is added to all followup loops of the +The attributes in this metadata are added to all followup loops of the loop distribution pass. See :ref:`Transformation Metadata ` for details. @@ -8041,8 +8041,8 @@ undefined. Note that if not all memory access instructions belong to an access group referred to by ``llvm.loop.parallel_accesses``, then the loop must not be considered trivially parallel. Additional -memory dependence analysis is required to make that determination. As a fail -safe mechanism, this causes loops that were originally parallel to be considered +memory dependence analysis is required to make that determination. As a +fail-safe mechanism, this causes loops that were originally parallel to be considered sequential (if optimization passes that are unaware of the parallel semantics insert new memory instructions into the loop body). @@ -8211,7 +8211,7 @@ compatibility, globals carrying this metadata should: - Be in ``@llvm.compiler.used``. - If the referenced global variable is in a comdat, be in the same comdat. -``!associated`` can not express many-to-one relationship. A global variable with +``!associated`` can not express a many-to-one relationship. A global variable with the metadata should generally not be referenced by a function: the function may be inlined into other functions, leading to more references to the metadata. Ideally we would want to keep metadata alive as long as any inline location is @@ -8272,7 +8272,7 @@ VP VP (value profile) metadata can be attached to instructions that have value profile information. Currently this is indirect calls (where it -records the hottest callees) and calls to memory intrinsics such as memcpy, +records the hottest callees) and calls to memory intrinsics, such as memcpy, memmove, and memset (where it records the hottest byte lengths). Each VP metadata node contains "VP" string, then a ``uint32_t`` value for the value @@ -8476,8 +8476,8 @@ Example: This is intended for use on targets with a notion of generic address spaces, which at runtime resolve to different physical memory -spaces. The interpretation of the address space values is target -specific. The behavior is undefined if the runtime memory address does +spaces. The interpretation of the address space values is target specific. +The behavior is undefined if the runtime memory address does resolve to an object defined in one of the indicated address spaces. @@ -8488,7 +8488,7 @@ Information about the module as a whole is difficult to convey to LLVM's subsystems. The LLVM IR isn't sufficient to transmit this information. The ``llvm.module.flags`` named metadata exists in order to facilitate this. These flags are in the form of key / value pairs --- much like a -dictionary --- making it easy for any subsystem who cares about a flag to +dictionary --- making it easy for any subsystem that cares about a flag to look it up. The ``llvm.module.flags`` metadata contains a list of metadata triplets. @@ -8748,7 +8748,7 @@ Automatic Linker Flags Named Metadata Some targets support embedding of flags to the linker inside individual object files. Typically this is used in conjunction with language extensions which -allow source files to contain linker command line options, and have these +allow source files to contain linker command-line options, and have these automatically be transmitted to the linker via object files. These flags are encoded in the IR using named metadata with the name @@ -11739,7 +11739,7 @@ size of the '' type. Note that this default alignment assumption is different from the alignment used for the load/store instructions when align isn't specified. -A ``atomicrmw`` instruction can also take an optional +An ``atomicrmw`` instruction can also take an optional ":ref:`syncscope `" argument. Semantics: @@ -12510,7 +12510,7 @@ Semantics: """""""""" The '``ptrtoint``' instruction converts ``value`` to integer type -``ty2`` by interpreting the all pointer representation bits as an integer +``ty2`` by interpreting all the pointer representation bits as an integer (equivalent to a ``bitcast``) and either truncating or zero extending that value to the size of the integer type. If ``value`` is smaller than ``ty2`` then a zero extension is done. If @@ -13542,7 +13542,7 @@ ensures that each ``catchpad`` has exactly one predecessor block, and it always terminates in a ``catchswitch``. The ``args`` correspond to whatever information the personality routine -requires to know if this is an appropriate handler for the exception. Control +requires to determine if this is an appropriate handler for the exception. Control will transfer to the ``catchpad`` if this is the first appropriate handler for the exception. @@ -13886,7 +13886,7 @@ Semantics: The '``llvm.va_copy``' intrinsic works just like the ``va_copy`` macro available in C. In a target-dependent way, it copies the source ``va_list`` element into the destination ``va_list`` element. This -intrinsic is necessary because the `` llvm.va_start`` intrinsic may be +intrinsic is necessary because the ``llvm.va_start`` intrinsic may be arbitrarily complex and require, for example, memory allocation. Accurate Garbage Collection Intrinsics @@ -14077,7 +14077,7 @@ types of the 'call parameters' arguments. The '#call args' operand is the number of arguments to the actual call. It must exactly match the number of arguments passed in the -'call parameters' variable length section. +'call parameters' variable-length section. The 'flags' operand is used to specify extra information about the statepoint. This is currently only used to mark certain statepoints @@ -14198,7 +14198,7 @@ so constructed. The third argument is an index which specify the (potentially) derived pointer being relocated. It is legal for this index to be the same as the second -argument if-and-only-if a base pointer is being relocated. +argument if and only if a base pointer is being relocated. Semantics: """""""""" @@ -14894,7 +14894,7 @@ Overview: """"""""" The '``llvm.instrprof.increment``' intrinsic can be emitted by a -frontend for use with instrumentation based profiling. These will be +frontend for use with instrumentation-based profiling. These will be lowered by the ``-instrprof`` pass to generate execution counts of a program at runtime. @@ -15097,7 +15097,7 @@ Overview: """"""""" The '``llvm.instrprof.value.profile``' intrinsic can be emitted by a -frontend for use with instrumentation based profiling. This will be +frontend for use with instrumentation-based profiling. This will be lowered by the ``-instrprof`` pass to find out the target values, instrumented expressions take in a program at runtime. @@ -18514,7 +18514,7 @@ Overview: """"""""" The '``llvm.umul.with.overflow``' family of intrinsic functions perform -a unsigned multiplication of the two arguments, and indicate whether an +an unsigned multiplication of the two arguments, and indicate whether an overflow occurred during the unsigned multiplication. Arguments: @@ -20681,7 +20681,7 @@ Semantics: The '``llvm.experimental.vector.histogram.*``' intrinsics are used to perform updates on potentially overlapping values in memory. The intrinsics represent -the follow sequence of operations: +the following sequence of operations: 1. Gather load from the ``ptrs`` operand, with element type matching that of the ``inc`` operand. @@ -28541,7 +28541,7 @@ environment. The rounding mode argument is only intended as information to the compiler. If the runtime floating-point environment is using the default rounding mode -then the results will be the same as the ``llvm.llrint intrinsic``. +then the results will be the same as the ``llvm.llrint`` intrinsic. '``llvm.experimental.constrained.nearbyint``' Intrinsic @@ -29002,7 +29002,7 @@ was only valid within a single iteration. .. code-block:: llvm - ; This examples shows two possible positions for noalias.decl and how they impact the semantics: + ; This example shows two possible positions for noalias.decl and how they impact the semantics: ; If it is outside the loop (Version 1), then %a and %b are noalias across *all* iterations. ; If it is inside the loop (Version 2), then %a and %b are noalias only within *one* iteration. declare void @decl_in_loop(ptr %a.base, ptr %b.base) { From 30007a5414931523687a555626431a57706d466e Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Sat, 16 Aug 2025 23:09:23 +0900 Subject: [PATCH 064/214] AMDGPU: Fix crash in rewrite AGPR copy MFMA pass on dead valnos (#153915) Test isn't great, probably won't reliably reproduce the problem in the future. --- .../AMDGPU/AMDGPURewriteAGPRCopyMFMA.cpp | 3 + .../AMDGPU/av-split-dead-valno-crash.ll | 211 ++++++++++++++++++ 2 files changed, 214 insertions(+) create mode 100644 llvm/test/CodeGen/AMDGPU/av-split-dead-valno-crash.ll diff --git a/llvm/lib/Target/AMDGPU/AMDGPURewriteAGPRCopyMFMA.cpp b/llvm/lib/Target/AMDGPU/AMDGPURewriteAGPRCopyMFMA.cpp index bab83483f3de..20b5fd94aba9 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPURewriteAGPRCopyMFMA.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPURewriteAGPRCopyMFMA.cpp @@ -147,6 +147,9 @@ bool AMDGPURewriteAGPRCopyMFMAImpl::run(MachineFunction &MF) const { // TODO: Test multiple uses for (VNInfo *VNI : LI.vnis()) { + if (VNI->isPHIDef() || VNI->isUnused()) + continue; + MachineInstr *DefMI = LIS.getInstructionFromIndex(VNI->def); // TODO: Handle SplitKit produced copy bundles for partially defined diff --git a/llvm/test/CodeGen/AMDGPU/av-split-dead-valno-crash.ll b/llvm/test/CodeGen/AMDGPU/av-split-dead-valno-crash.ll new file mode 100644 index 000000000000..89fe0ab526a8 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/av-split-dead-valno-crash.ll @@ -0,0 +1,211 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx942 -stress-regalloc=18 < %s | FileCheck %s + +define amdgpu_kernel void @vgpr_mfma_pass_av_split_crash(double %arg1, i1 %arg2, i1 %cond.i.i.i2295, ptr addrspace(1) %ptr, ptr %ptr1) #0 { +; CHECK-LABEL: vgpr_mfma_pass_av_split_crash: +; CHECK: ; %bb.0: ; %entry +; CHECK-NEXT: s_load_dword s0, s[4:5], 0x8 +; CHECK-NEXT: s_load_dwordx2 s[10:11], s[4:5], 0x0 +; CHECK-NEXT: s_load_dwordx4 s[12:15], s[4:5], 0x10 +; CHECK-NEXT: v_mov_b32_e32 v1, 0x3e21eeb6 +; CHECK-NEXT: v_mov_b32_e32 v2, 0xa17f65f6 +; CHECK-NEXT: s_waitcnt lgkmcnt(0) +; CHECK-NEXT: s_bitcmp1_b32 s0, 0 +; CHECK-NEXT: s_cselect_b64 s[16:17], -1, 0 +; CHECK-NEXT: s_xor_b64 s[18:19], s[16:17], -1 +; CHECK-NEXT: s_bitcmp1_b32 s0, 8 +; CHECK-NEXT: s_cselect_b64 s[2:3], -1, 0 +; CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[2:3] +; CHECK-NEXT: s_xor_b64 s[20:21], s[2:3], -1 +; CHECK-NEXT: v_cmp_ne_u32_e64 s[0:1], 1, v0 +; CHECK-NEXT: s_and_b64 s[2:3], exec, s[2:3] +; CHECK-NEXT: v_mov_b32_e32 v0, 0x9037ab78 +; CHECK-NEXT: v_mov_b32_e32 v3, 0xbe927e4f +; CHECK-NEXT: v_mov_b32_e32 v4, 0x19f4ec90 +; CHECK-NEXT: v_mov_b32_e32 v5, 0x3efa01a0 +; CHECK-NEXT: v_mov_b32_e32 v6, 0x16c16967 +; CHECK-NEXT: v_mov_b32_e32 v7, 0xbf56c16c +; CHECK-NEXT: v_mov_b32_e32 v8, 0x69efb384 +; CHECK-NEXT: v_mov_b32_e32 v9, 0x3f4b2bb0 +; CHECK-NEXT: v_mov_b32_e32 v10, 0xa57d9582 +; CHECK-NEXT: v_mov_b32_e32 v11, 0xbf8c6ea4 +; CHECK-NEXT: v_mov_b32_e32 v12, 0xe82d3ff0 +; CHECK-NEXT: v_mov_b32_e32 v13, 0xbfa59976 +; CHECK-NEXT: v_mov_b32_e32 v14, 0x8427b883 +; CHECK-NEXT: v_mov_b32_e32 v15, 0x3fae1bb4 +; CHECK-NEXT: s_mov_b64 s[22:23], 0 +; CHECK-NEXT: v_mov_b32_e32 v16, 0x57b87036 +; CHECK-NEXT: v_mov_b32_e32 v17, 0x3fb3b136 +; CHECK-NEXT: s_and_b64 s[4:5], exec, s[16:17] +; CHECK-NEXT: v_mov_b32_e32 v18, 0x55555523 +; CHECK-NEXT: v_mov_b32_e32 v19, 0xbfd55555 +; CHECK-NEXT: s_and_b64 s[6:7], exec, s[18:19] +; CHECK-NEXT: v_mov_b32_e32 v20, 0 +; CHECK-NEXT: ; implicit-def: $agpr0_agpr1 +; CHECK-NEXT: ; implicit-def: $vgpr22_vgpr23 +; CHECK-NEXT: s_branch .LBB0_2 +; CHECK-NEXT: .LBB0_1: ; %Flow9 +; CHECK-NEXT: ; in Loop: Header=BB0_2 Depth=1 +; CHECK-NEXT: s_andn2_b64 vcc, exec, s[24:25] +; CHECK-NEXT: s_cbranch_vccz .LBB0_17 +; CHECK-NEXT: .LBB0_2: ; %._crit_edge1942.i.i.i3548 +; CHECK-NEXT: ; =>This Loop Header: Depth=1 +; CHECK-NEXT: ; Child Loop BB0_6 Depth 2 +; CHECK-NEXT: s_and_b64 vcc, exec, s[0:1] +; CHECK-NEXT: s_cbranch_vccnz .LBB0_9 +; CHECK-NEXT: ; %bb.3: ; %.preheader1868.i.i.i3244 +; CHECK-NEXT: ; in Loop: Header=BB0_2 Depth=1 +; CHECK-NEXT: s_mov_b64 vcc, s[4:5] +; CHECK-NEXT: s_cbranch_vccz .LBB0_10 +; CHECK-NEXT: ; %bb.4: ; %.preheader1855.i.i.i3329.preheader +; CHECK-NEXT: ; in Loop: Header=BB0_2 Depth=1 +; CHECK-NEXT: v_mov_b64_e32 v[24:25], s[14:15] +; CHECK-NEXT: flat_load_dwordx2 v[24:25], v[24:25] +; CHECK-NEXT: v_mov_b64_e32 v[26:27], v[0:1] +; CHECK-NEXT: v_mov_b64_e32 v[28:29], v[2:3] +; CHECK-NEXT: v_mov_b64_e32 v[30:31], v[16:17] +; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; CHECK-NEXT: v_fmac_f64_e32 v[26:27], 0, v[24:25] +; CHECK-NEXT: v_fmac_f64_e32 v[28:29], 0, v[26:27] +; CHECK-NEXT: v_mov_b64_e32 v[26:27], v[4:5] +; CHECK-NEXT: v_fmac_f64_e32 v[26:27], 0, v[28:29] +; CHECK-NEXT: v_mov_b64_e32 v[28:29], v[6:7] +; CHECK-NEXT: v_fmac_f64_e32 v[28:29], 0, v[26:27] +; CHECK-NEXT: v_mov_b64_e32 v[26:27], v[8:9] +; CHECK-NEXT: v_fmac_f64_e32 v[26:27], 0, v[28:29] +; CHECK-NEXT: v_mov_b64_e32 v[28:29], v[10:11] +; CHECK-NEXT: v_fmac_f64_e32 v[28:29], 0, v[26:27] +; CHECK-NEXT: v_mov_b64_e32 v[26:27], v[12:13] +; CHECK-NEXT: v_fmac_f64_e32 v[26:27], 0, v[28:29] +; CHECK-NEXT: v_mov_b64_e32 v[28:29], v[14:15] +; CHECK-NEXT: v_fmac_f64_e32 v[28:29], 0, v[26:27] +; CHECK-NEXT: v_fmac_f64_e32 v[30:31], 0, v[28:29] +; CHECK-NEXT: v_mov_b64_e32 v[26:27], v[18:19] +; CHECK-NEXT: v_fmac_f64_e32 v[26:27], 0, v[30:31] +; CHECK-NEXT: v_mov_b64_e32 v[30:31], 0 +; CHECK-NEXT: s_branch .LBB0_6 +; CHECK-NEXT: .LBB0_5: ; %Flow +; CHECK-NEXT: ; in Loop: Header=BB0_6 Depth=2 +; CHECK-NEXT: s_and_b64 vcc, exec, s[8:9] +; CHECK-NEXT: s_cbranch_vccnz .LBB0_11 +; CHECK-NEXT: .LBB0_6: ; %.preheader1855.i.i.i3329 +; CHECK-NEXT: ; Parent Loop BB0_2 Depth=1 +; CHECK-NEXT: ; => This Inner Loop Header: Depth=2 +; CHECK-NEXT: v_mov_b64_e32 v[28:29], v[30:31] +; CHECK-NEXT: s_mov_b64 s[24:25], -1 +; CHECK-NEXT: s_mov_b64 s[8:9], -1 +; CHECK-NEXT: s_mov_b64 vcc, s[2:3] +; CHECK-NEXT: ; implicit-def: $vgpr30_vgpr31 +; CHECK-NEXT: s_cbranch_vccz .LBB0_5 +; CHECK-NEXT: ; %bb.7: ; %.lr.ph2070.i.i.i3291 +; CHECK-NEXT: ; in Loop: Header=BB0_6 Depth=2 +; CHECK-NEXT: v_accvgpr_read_b32 v31, a1 +; CHECK-NEXT: v_accvgpr_read_b32 v30, a0 +; CHECK-NEXT: s_mov_b64 s[8:9], s[18:19] +; CHECK-NEXT: s_mov_b64 vcc, s[6:7] +; CHECK-NEXT: s_cbranch_vccz .LBB0_5 +; CHECK-NEXT: ; %bb.8: ; %.preheader1856.preheader.i.i.i3325 +; CHECK-NEXT: ; in Loop: Header=BB0_6 Depth=2 +; CHECK-NEXT: s_mov_b64 s[24:25], 0 +; CHECK-NEXT: v_mov_b64_e32 v[30:31], v[26:27] +; CHECK-NEXT: s_mov_b64 s[8:9], 0 +; CHECK-NEXT: s_branch .LBB0_5 +; CHECK-NEXT: .LBB0_9: ; in Loop: Header=BB0_2 Depth=1 +; CHECK-NEXT: v_mov_b64_e32 v[24:25], s[10:11] +; CHECK-NEXT: v_accvgpr_write_b32 a0, v24 +; CHECK-NEXT: s_mov_b64 s[22:23], 0 +; CHECK-NEXT: v_accvgpr_write_b32 a1, v25 +; CHECK-NEXT: s_mov_b64 s[8:9], s[20:21] +; CHECK-NEXT: s_branch .LBB0_15 +; CHECK-NEXT: .LBB0_10: ; in Loop: Header=BB0_2 Depth=1 +; CHECK-NEXT: s_mov_b64 s[8:9], -1 +; CHECK-NEXT: v_mov_b64_e32 v[22:23], 0 +; CHECK-NEXT: s_branch .LBB0_15 +; CHECK-NEXT: .LBB0_11: ; %loop.exit.guard +; CHECK-NEXT: ; in Loop: Header=BB0_2 Depth=1 +; CHECK-NEXT: s_and_b64 vcc, exec, s[24:25] +; CHECK-NEXT: s_cbranch_vccz .LBB0_13 +; CHECK-NEXT: ; %bb.12: ; %._crit_edge2105.i.i.i2330.loopexit +; CHECK-NEXT: ; in Loop: Header=BB0_2 Depth=1 +; CHECK-NEXT: v_cmp_nlg_f64_e64 s[8:9], 0, v[28:29] +; CHECK-NEXT: v_accvgpr_write_b32 a0, v24 +; CHECK-NEXT: v_cndmask_b32_e64 v23, v23, 0, s[16:17] +; CHECK-NEXT: v_cndmask_b32_e64 v26, 0, 1, s[8:9] +; CHECK-NEXT: v_mov_b32_e32 v27, v26 +; CHECK-NEXT: s_and_b64 s[8:9], exec, s[16:17] +; CHECK-NEXT: v_cndmask_b32_e64 v22, v22, 0, s[16:17] +; CHECK-NEXT: global_store_dwordx2 v20, v[26:27], s[12:13] +; CHECK-NEXT: s_cselect_b32 s23, s23, 0 +; CHECK-NEXT: s_cselect_b32 s22, s22, 0 +; CHECK-NEXT: s_mov_b64 s[8:9], -1 +; CHECK-NEXT: s_branch .LBB0_14 +; CHECK-NEXT: .LBB0_13: ; in Loop: Header=BB0_2 Depth=1 +; CHECK-NEXT: v_accvgpr_write_b32 a0, v24 +; CHECK-NEXT: s_mov_b64 s[8:9], 0 +; CHECK-NEXT: v_mov_b64_e32 v[22:23], 0 +; CHECK-NEXT: .LBB0_14: ; %Flow6 +; CHECK-NEXT: ; in Loop: Header=BB0_2 Depth=1 +; CHECK-NEXT: v_accvgpr_write_b32 a1, v25 +; CHECK-NEXT: .LBB0_15: ; %Flow6 +; CHECK-NEXT: ; in Loop: Header=BB0_2 Depth=1 +; CHECK-NEXT: s_mov_b64 s[24:25], -1 +; CHECK-NEXT: s_and_b64 vcc, exec, s[8:9] +; CHECK-NEXT: s_cbranch_vccz .LBB0_1 +; CHECK-NEXT: ; %bb.16: ; %._crit_edge2105.i.i.i2330 +; CHECK-NEXT: ; in Loop: Header=BB0_2 Depth=1 +; CHECK-NEXT: v_mov_b32_e32 v21, v20 +; CHECK-NEXT: s_mov_b64 s[24:25], 0 +; CHECK-NEXT: global_store_dwordx2 v20, v[20:21], s[12:13] +; CHECK-NEXT: s_branch .LBB0_1 +; CHECK-NEXT: .LBB0_17: ; %DummyReturnBlock +; CHECK-NEXT: s_endpgm +entry: + br label %._crit_edge1942.i.i.i3548 + +._crit_edge1942.i.i.i3548: ; preds = %._crit_edge2105.i.i.i2330, %entry + %.sroa.02591.0.i.i.i226323 = phi double [ poison, %entry ], [ %.sroa.02591.3.i.i.i2301, %._crit_edge2105.i.i.i2330 ] + %.sroa.3.0.i.i.i2270 = phi double [ poison, %entry ], [ %.sroa.3.3.i.i.i2308, %._crit_edge2105.i.i.i2330 ] + %.014942244.i.i.i2280 = phi double [ 0.000000e+00, %entry ], [ %.31497.i.i.i2317, %._crit_edge2105.i.i.i2330 ] + br i1 %cond.i.i.i2295, label %.preheader1868.i.i.i3244, label %._crit_edge2105.i.i.i2330 + +.preheader1868.i.i.i3244: ; preds = %._crit_edge1942.i.i.i3548 + %i = load double, ptr %ptr1, align 8 + %i3 = call double @llvm.fma.f64(double %i, double 0.000000e+00, double 0x3E21EEB69037AB78) + %i4 = call double @llvm.fma.f64(double 0.000000e+00, double %i3, double 0xBE927E4FA17F65F6) + %i5 = call double @llvm.fma.f64(double 0.000000e+00, double %i4, double 0x3EFA01A019F4EC90) + %i6 = call double @llvm.fma.f64(double 0.000000e+00, double %i5, double 0xBF56C16C16C16967) + %spec.select.i.i.i3288 = select i1 %arg2, double 0.000000e+00, double %.sroa.3.0.i.i.i2270 + br i1 %arg2, label %.preheader1855.i.i.i3329, label %._crit_edge2105.i.i.i2330 + +.lr.ph2070.i.i.i3291: ; preds = %.preheader1855.i.i.i3329 + br i1 %arg2, label %.preheader1855.i.i.i3329, label %.preheader1856.preheader.i.i.i3325 + +.preheader1856.preheader.i.i.i3325: ; preds = %.lr.ph2070.i.i.i3291 + %i11 = call double @llvm.fma.f64(double 0.000000e+00, double %i6, double 0x3F4B2BB069EFB384) + %i14 = call double @llvm.fma.f64(double 0.000000e+00, double %i11, double 0xBF8C6EA4A57D9582) + %i18 = call double @llvm.fma.f64(double 0.000000e+00, double %i14, double 0xBFA59976E82D3FF0) + %i21 = call double @llvm.fma.f64(double 0.000000e+00, double %i18, double 0x3FAE1BB48427B883) + %i23 = call double @llvm.fma.f64(double 0.000000e+00, double %i21, double 0x3FB3B13657B87036) + %i28 = call double @llvm.fma.f64(double 0.000000e+00, double %i23, double 0xBFD5555555555523) + br label %.preheader1855.i.i.i3329 + +.preheader1855.i.i.i3329: ; preds = %.preheader1856.preheader.i.i.i3325, %.lr.ph2070.i.i.i3291, %.preheader1868.i.i.i3244 + %.sroa.02591.4.i.i.i3335 = phi double [ %i28, %.preheader1856.preheader.i.i.i3325 ], [ %.sroa.02591.0.i.i.i226323, %.lr.ph2070.i.i.i3291 ], [ 0.000000e+00, %.preheader1868.i.i.i3244 ] + %.21496.ph.i.i.i3348 = select i1 %arg2, double %.014942244.i.i.i2280, double 0.000000e+00 + %i31 = fcmp one double %.sroa.02591.4.i.i.i3335, 0.000000e+00 + %i32 = select i1 %i31, <2 x i32> zeroinitializer, <2 x i32> splat (i32 1) + store <2 x i32> %i32, ptr addrspace(1) %ptr, align 8 + br i1 %cond.i.i.i2295, label %.lr.ph2070.i.i.i3291, label %._crit_edge2105.i.i.i2330 + +._crit_edge2105.i.i.i2330: ; preds = %.preheader1855.i.i.i3329, %.preheader1868.i.i.i3244, %._crit_edge1942.i.i.i3548 + %.sroa.02591.3.i.i.i2301 = phi double [ %.sroa.02591.0.i.i.i226323, %.preheader1868.i.i.i3244 ], [ %arg1, %._crit_edge1942.i.i.i3548 ], [ %i, %.preheader1855.i.i.i3329 ] + %.sroa.3.3.i.i.i2308 = phi double [ 0.000000e+00, %.preheader1868.i.i.i3244 ], [ %.sroa.3.0.i.i.i2270, %._crit_edge1942.i.i.i3548 ], [ %spec.select.i.i.i3288, %.preheader1855.i.i.i3329 ] + %.31497.i.i.i2317 = phi double [ %.014942244.i.i.i2280, %.preheader1868.i.i.i3244 ], [ 0.000000e+00, %._crit_edge1942.i.i.i3548 ], [ %.21496.ph.i.i.i3348, %.preheader1855.i.i.i3329 ] + store double 0.000000e+00, ptr addrspace(1) %ptr, align 8 + br label %._crit_edge1942.i.i.i3548 +} + +declare double @llvm.fma.f64(double, double, double) #1 + +attributes #0 = { "amdgpu-waves-per-eu"="8,8" "target-cpu"="gfx942" } +attributes #1 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) } From c971c25544437ac451e08654f481ac72cf072b2b Mon Sep 17 00:00:00 2001 From: Mircea Trofin Date: Sat, 16 Aug 2025 07:26:13 -0700 Subject: [PATCH 065/214] [licm] don't drop `MD_prof` when dropping other metadata (#152420) Part of Issue #147390 --- llvm/include/llvm/IR/Instruction.h | 7 ++-- llvm/lib/IR/Instruction.cpp | 13 ++++-- llvm/lib/Transforms/Scalar/LICM.cpp | 8 +++- llvm/test/Transforms/LICM/hoist-profdata.ll | 45 +++++++++++++++++++++ 4 files changed, 64 insertions(+), 9 deletions(-) create mode 100644 llvm/test/Transforms/LICM/hoist-profdata.ll diff --git a/llvm/include/llvm/IR/Instruction.h b/llvm/include/llvm/IR/Instruction.h index 5d25804a684a..2eb4fd36c5b7 100644 --- a/llvm/include/llvm/IR/Instruction.h +++ b/llvm/include/llvm/IR/Instruction.h @@ -584,9 +584,10 @@ public: dropUBImplyingAttrsAndUnknownMetadata(ArrayRef KnownIDs = {}); /// Drop any attributes or metadata that can cause immediate undefined - /// behavior. Retain other attributes/metadata on a best-effort basis. - /// This should be used when speculating instructions. - LLVM_ABI void dropUBImplyingAttrsAndMetadata(); + /// behavior. Retain other attributes/metadata on a best-effort basis, as well + /// as those passed in `Keep`. This should be used when speculating + /// instructions. + LLVM_ABI void dropUBImplyingAttrsAndMetadata(ArrayRef Keep = {}); /// Return true if this instruction has UB-implying attributes /// that can cause immediate undefined behavior. diff --git a/llvm/lib/IR/Instruction.cpp b/llvm/lib/IR/Instruction.cpp index 0b7923248aa7..5e87b5ff941a 100644 --- a/llvm/lib/IR/Instruction.cpp +++ b/llvm/lib/IR/Instruction.cpp @@ -552,14 +552,19 @@ void Instruction::dropUBImplyingAttrsAndUnknownMetadata( CB->removeRetAttrs(UBImplyingAttributes); } -void Instruction::dropUBImplyingAttrsAndMetadata() { +void Instruction::dropUBImplyingAttrsAndMetadata(ArrayRef Keep) { // !annotation metadata does not impact semantics. // !range, !nonnull and !align produce poison, so they are safe to speculate. // !noundef and various AA metadata must be dropped, as it generally produces // immediate undefined behavior. - unsigned KnownIDs[] = {LLVMContext::MD_annotation, LLVMContext::MD_range, - LLVMContext::MD_nonnull, LLVMContext::MD_align}; - dropUBImplyingAttrsAndUnknownMetadata(KnownIDs); + static const unsigned KnownIDs[] = { + LLVMContext::MD_annotation, LLVMContext::MD_range, + LLVMContext::MD_nonnull, LLVMContext::MD_align}; + SmallVector KeepIDs; + KeepIDs.reserve(Keep.size() + std::size(KnownIDs)); + append_range(KeepIDs, KnownIDs); + append_range(KeepIDs, Keep); + dropUBImplyingAttrsAndUnknownMetadata(KeepIDs); } bool Instruction::hasUBImplyingAttrs() const { diff --git a/llvm/lib/Transforms/Scalar/LICM.cpp b/llvm/lib/Transforms/Scalar/LICM.cpp index 4c035a2464c8..8d61779a428e 100644 --- a/llvm/lib/Transforms/Scalar/LICM.cpp +++ b/llvm/lib/Transforms/Scalar/LICM.cpp @@ -1699,8 +1699,12 @@ static void hoist(Instruction &I, const DominatorTree *DT, const Loop *CurLoop, // The check on hasMetadataOtherThanDebugLoc is to prevent us from burning // time in isGuaranteedToExecute if we don't actually have anything to // drop. It is a compile time optimization, not required for correctness. - !SafetyInfo->isGuaranteedToExecute(I, DT, CurLoop)) - I.dropUBImplyingAttrsAndMetadata(); + !SafetyInfo->isGuaranteedToExecute(I, DT, CurLoop)) { + if (ProfcheckDisableMetadataFixes) + I.dropUBImplyingAttrsAndMetadata(); + else + I.dropUBImplyingAttrsAndMetadata({LLVMContext::MD_prof}); + } if (isa(I)) // Move the new node to the end of the phi list in the destination block. diff --git a/llvm/test/Transforms/LICM/hoist-profdata.ll b/llvm/test/Transforms/LICM/hoist-profdata.ll new file mode 100644 index 000000000000..18fa1b9f92e8 --- /dev/null +++ b/llvm/test/Transforms/LICM/hoist-profdata.ll @@ -0,0 +1,45 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals --version 2 +; Test that hoisting conditional branches copies the debug and profiling info +; metadata from the branch being hoisted. +; RUN: opt -S -passes=licm %s -o - | FileCheck %s + +declare i32 @foo() + +; to_hoist should get hoisted, and that should not result +; in a loss of profiling info +define i32 @hoist_select(i1 %cond, i32 %a, i32 %b) nounwind { +; CHECK-LABEL: define i32 @hoist_select +; CHECK-SAME: (i1 [[COND:%.*]], i32 [[A:%.*]], i32 [[B:%.*]]) #[[ATTR0:[0-9]+]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TO_HOIST:%.*]] = select i1 [[COND]], i32 [[A]], i32 [[B]], !prof [[PROF0:![0-9]+]] +; CHECK-NEXT: br label [[L0:%.*]] +; CHECK: L0: +; CHECK-NEXT: [[G:%.*]] = call i32 @foo() +; CHECK-NEXT: [[SUM:%.*]] = add i32 [[G]], [[TO_HOIST]] +; CHECK-NEXT: [[C:%.*]] = icmp eq i32 [[SUM]], 0 +; CHECK-NEXT: br i1 [[C]], label [[L0]], label [[EXIT:%.*]], !prof [[PROF1:![0-9]+]] +; CHECK: exit: +; CHECK-NEXT: [[SUM_LCSSA:%.*]] = phi i32 [ [[SUM]], [[L0]] ] +; CHECK-NEXT: ret i32 [[SUM_LCSSA]] +; +entry: + br label %L0 +L0: + %g = call i32 @foo() + %to_hoist = select i1 %cond, i32 %a, i32 %b, !prof !0 + %sum = add i32 %g, %to_hoist + %c = icmp eq i32 %sum, 0 + br i1 %c, label %L0, label %exit, !prof !1 + +exit: + ret i32 %sum +} + +!0 = !{!"branch_weights", i32 2, i32 5} +!1 = !{!"branch_weights", i32 101, i32 189} +;. +; CHECK: attributes #[[ATTR0]] = { nounwind } +;. +; CHECK: [[PROF0]] = !{!"branch_weights", i32 2, i32 5} +; CHECK: [[PROF1]] = !{!"branch_weights", i32 101, i32 189} +;. From 0b1b567d9f84e67124c58d69b5aa375357d68c9e Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Sat, 16 Aug 2025 23:28:39 +0900 Subject: [PATCH 066/214] RuntimeLibcalls: Temporarily disable benchmark depending on llvm-nm Breaks some builds --- llvm/benchmarks/CMakeLists.txt | 18 ------------------ 1 file changed, 18 deletions(-) diff --git a/llvm/benchmarks/CMakeLists.txt b/llvm/benchmarks/CMakeLists.txt index 14cccd9c8ed5..e32aba5ebff4 100644 --- a/llvm/benchmarks/CMakeLists.txt +++ b/llvm/benchmarks/CMakeLists.txt @@ -14,21 +14,3 @@ add_benchmark(SandboxIRBench SandboxIRBench.cpp PARTIAL_SOURCES_INTENDED) add_benchmark(RuntimeLibcallsBench RuntimeLibcalls.cpp PARTIAL_SOURCES_INTENDED) -if(TARGET llvm-nm) - # Extract the list of symbols in a random utility as sample data. - set(SYMBOL_TEST_DATA_FILE "sample_symbol_list.txt") - set(SYMBOL_TEST_DATA_SOURCE_BINARY $) - - add_custom_command(OUTPUT ${SYMBOL_TEST_DATA_FILE} - COMMAND $ --no-demangle --no-sort - --format=just-symbols - ${SYMBOL_TEST_DATA_SOURCE_BINARY} > ${SYMBOL_TEST_DATA_FILE} - DEPENDS "$" "$") - - add_custom_target(generate-runtime-libcalls-sample-symbol-list - DEPENDS ${SYMBOL_TEST_DATA_FILE}) - - add_dependencies(RuntimeLibcallsBench generate-runtime-libcalls-sample-symbol-list) - target_compile_definitions(RuntimeLibcallsBench PRIVATE - -DSYMBOL_TEST_DATA_FILE="${CMAKE_CURRENT_BINARY_DIR}/${SYMBOL_TEST_DATA_FILE}") -endif() From 373206d5e0591896477f988e4194f59eee7981e1 Mon Sep 17 00:00:00 2001 From: Timm Baeder Date: Sat, 16 Aug 2025 17:22:14 +0200 Subject: [PATCH 067/214] [clang][bytecode] Prefer ParmVarDecls as function parameters (#153952) We might create a local temporary variable for a ParmVarDecl, in which case a DeclRefExpr for that ParmVarDecl should _still_ result in us choosing the parameter, not that local. --- clang/lib/AST/ByteCode/Compiler.cpp | 30 ++++++++++++++------------- clang/test/AST/ByteCode/functions.cpp | 19 +++++++++++++++++ 2 files changed, 35 insertions(+), 14 deletions(-) diff --git a/clang/lib/AST/ByteCode/Compiler.cpp b/clang/lib/AST/ByteCode/Compiler.cpp index 8e651cf06062..6c6c8d41d3b9 100644 --- a/clang/lib/AST/ByteCode/Compiler.cpp +++ b/clang/lib/AST/ByteCode/Compiler.cpp @@ -6745,6 +6745,22 @@ bool Compiler::visitDeclRef(const ValueDecl *D, const Expr *E) { // value. bool IsReference = D->getType()->isReferenceType(); + // Function parameters. + // Note that it's important to check them first since we might have a local + // variable created for a ParmVarDecl as well. + if (const auto *PVD = dyn_cast(D)) { + if (Ctx.getLangOpts().CPlusPlus && !Ctx.getLangOpts().CPlusPlus11 && + !D->getType()->isIntegralOrEnumerationType()) { + return this->emitInvalidDeclRef(cast(E), + /*InitializerFailed=*/false, E); + } + if (auto It = this->Params.find(PVD); It != this->Params.end()) { + if (IsReference || !It->second.IsPtr) + return this->emitGetParam(classifyPrim(E), It->second.Offset, E); + + return this->emitGetPtrParam(It->second.Offset, E); + } + } // Local variables. if (auto It = Locals.find(D); It != Locals.end()) { const unsigned Offset = It->second.Offset; @@ -6762,20 +6778,6 @@ bool Compiler::visitDeclRef(const ValueDecl *D, const Expr *E) { return this->emitGetPtrGlobal(*GlobalIndex, E); } - // Function parameters. - if (const auto *PVD = dyn_cast(D)) { - if (Ctx.getLangOpts().CPlusPlus && !Ctx.getLangOpts().CPlusPlus11 && - !D->getType()->isIntegralOrEnumerationType()) { - return this->emitInvalidDeclRef(cast(E), - /*InitializerFailed=*/false, E); - } - if (auto It = this->Params.find(PVD); It != this->Params.end()) { - if (IsReference || !It->second.IsPtr) - return this->emitGetParam(classifyPrim(E), It->second.Offset, E); - - return this->emitGetPtrParam(It->second.Offset, E); - } - } // In case we need to re-visit a declaration. auto revisit = [&](const VarDecl *VD) -> bool { diff --git a/clang/test/AST/ByteCode/functions.cpp b/clang/test/AST/ByteCode/functions.cpp index 3c00de0102e5..4f090842510e 100644 --- a/clang/test/AST/ByteCode/functions.cpp +++ b/clang/test/AST/ByteCode/functions.cpp @@ -713,3 +713,22 @@ namespace EnableIfWithTemporary { struct A { ~A(); }; int &h() __attribute__((enable_if((A(), true), ""))); // both-warning {{clang extension}} } + +namespace LocalVarForParmVarDecl { + struct Iter { + void *p; + }; + constexpr bool bar2(Iter A) { + return true; + } + constexpr bool bar(Iter A, bool b) { + if (b) + return true; + + return bar(A, true); + } + constexpr int foo() { + return bar(Iter(), false); + } + static_assert(foo(), ""); +} From 0d8aa9d9ec52b5873d8857ab9789057d789f6791 Mon Sep 17 00:00:00 2001 From: Matthias Springer Date: Sat, 16 Aug 2025 18:45:26 +0200 Subject: [PATCH 068/214] [mlir][SparseTensor] Simplify pipeline (#152908) This refactoring improves compilation time. --- .../SparseTensor/Pipelines/SparseTensorPipelines.cpp | 11 ++--------- 1 file changed, 2 insertions(+), 9 deletions(-) diff --git a/mlir/lib/Dialect/SparseTensor/Pipelines/SparseTensorPipelines.cpp b/mlir/lib/Dialect/SparseTensor/Pipelines/SparseTensorPipelines.cpp index 3b97786e5815..dabbea1bdec6 100644 --- a/mlir/lib/Dialect/SparseTensor/Pipelines/SparseTensorPipelines.cpp +++ b/mlir/lib/Dialect/SparseTensor/Pipelines/SparseTensorPipelines.cpp @@ -71,7 +71,6 @@ void mlir::sparse_tensor::buildSparsifier(OpPassManager &pm, pm.addPass(createLowerAffinePass()); pm.addPass( createConvertVectorToLLVMPass(options.convertVectorToLLVMOptions())); - pm.addPass(createFinalizeMemRefToLLVMConversionPass()); pm.addNestedPass(createConvertComplexToStandardPass()); pm.addNestedPass(arith::createArithExpandOpsPass()); pm.addNestedPass(createConvertMathToLLVMPass()); @@ -79,12 +78,6 @@ void mlir::sparse_tensor::buildSparsifier(OpPassManager &pm, pm.addPass(createConvertComplexToLibm()); pm.addPass( createConvertVectorToLLVMPass(options.convertVectorToLLVMOptions())); - pm.addPass(createConvertComplexToLLVMPass()); - pm.addPass( - createConvertVectorToLLVMPass(options.convertVectorToLLVMOptions())); - pm.addPass(createConvertFuncToLLVMPass()); - pm.addPass(createArithToLLVMConversionPass()); - pm.addPass(createConvertControlFlowToLLVMPass()); // Finalize GPU code generation. if (gpuCodegen) { @@ -99,8 +92,8 @@ void mlir::sparse_tensor::buildSparsifier(OpPassManager &pm, pm.addPass(createGpuModuleToBinaryPass(gpuModuleToBinaryPassOptions)); } - // Convert poison values. - pm.addPass(createUBToLLVMConversionPass()); + // Convert to LLVM. + pm.addPass(createConvertToLLVMPass()); // Ensure all casts are realized. pm.addPass(createReconcileUnrealizedCastsPass()); From aa2fe4eb3d5bc4e0550677ebc312f68534d402ee Mon Sep 17 00:00:00 2001 From: Sergei Barannikov Date: Sat, 16 Aug 2025 20:01:03 +0300 Subject: [PATCH 069/214] [PowerPC] Remove some unused SDNodes and FastISel workaround (NFC) (#153964) These nodes have never been used since introduction in 2013/2015. --- llvm/lib/Target/PowerPC/PPCISelLowering.cpp | 7 ++--- llvm/lib/Target/PowerPC/PPCISelLowering.h | 14 ---------- llvm/lib/Target/PowerPC/PPCInstrInfo.td | 29 +++++---------------- 3 files changed, 8 insertions(+), 42 deletions(-) diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp index 4ab9461fc0af..b97d0e235c01 100644 --- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp @@ -1787,11 +1787,8 @@ const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const { case PPCISD::ADDI_DTPREL_L: return "PPCISD::ADDI_DTPREL_L"; case PPCISD::PADDI_DTPREL: return "PPCISD::PADDI_DTPREL"; - case PPCISD::VADD_SPLAT: return "PPCISD::VADD_SPLAT"; - case PPCISD::SC: return "PPCISD::SC"; - case PPCISD::CLRBHRB: return "PPCISD::CLRBHRB"; - case PPCISD::MFBHRBE: return "PPCISD::MFBHRBE"; - case PPCISD::RFEBB: return "PPCISD::RFEBB"; + case PPCISD::VADD_SPLAT: + return "PPCISD::VADD_SPLAT"; case PPCISD::XXSWAPD: return "PPCISD::XXSWAPD"; case PPCISD::SWAP_NO_CHAIN: return "PPCISD::SWAP_NO_CHAIN"; case PPCISD::BUILD_FP128: return "PPCISD::BUILD_FP128"; diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.h b/llvm/lib/Target/PowerPC/PPCISelLowering.h index 9755f0e272d1..5e0d6bf184f2 100644 --- a/llvm/lib/Target/PowerPC/PPCISelLowering.h +++ b/llvm/lib/Target/PowerPC/PPCISelLowering.h @@ -430,20 +430,6 @@ namespace llvm { /// optimizations due to constant folding. VADD_SPLAT, - /// CHAIN = SC CHAIN, Imm128 - System call. The 7-bit unsigned - /// operand identifies the operating system entry point. - SC, - - /// CHAIN = CLRBHRB CHAIN - Clear branch history rolling buffer. - CLRBHRB, - - /// GPRC, CHAIN = MFBHRBE CHAIN, Entry, Dummy - Move from branch - /// history rolling buffer entry. - MFBHRBE, - - /// CHAIN = RFEBB CHAIN, State - Return from event-based branch. - RFEBB, - /// VSRC, CHAIN = XXSWAPD CHAIN, VSRC - Occurs only for little /// endian. Maps to an xxswapd instruction that corrects an lxvd2x /// or stxvd2x instruction. The chain is necessary because the diff --git a/llvm/lib/Target/PowerPC/PPCInstrInfo.td b/llvm/lib/Target/PowerPC/PPCInstrInfo.td index 99ef89a7fdc0..c2f91ce8e6b9 100644 --- a/llvm/lib/Target/PowerPC/PPCInstrInfo.td +++ b/llvm/lib/Target/PowerPC/PPCInstrInfo.td @@ -365,16 +365,6 @@ def PPCeh_sjlj_longjmp : SDNode<"PPCISD::EH_SJLJ_LONGJMP", SDTypeProfile<0, 1, [SDTCisPtrTy<0>]>, [SDNPHasChain, SDNPSideEffect]>; -def SDT_PPCsc : SDTypeProfile<0, 1, [SDTCisInt<0>]>; -def PPCsc : SDNode<"PPCISD::SC", SDT_PPCsc, - [SDNPHasChain, SDNPSideEffect]>; - -def PPCclrbhrb : SDNode<"PPCISD::CLRBHRB", SDTNone, - [SDNPHasChain, SDNPSideEffect]>; -def PPCmfbhrbe : SDNode<"PPCISD::MFBHRBE", SDTIntBinOp, [SDNPHasChain]>; -def PPCrfebb : SDNode<"PPCISD::RFEBB", SDT_PPCsc, - [SDNPHasChain, SDNPSideEffect]>; - def PPCvcmp : SDNode<"PPCISD::VCMP" , SDT_PPCvcmp, []>; def PPCvcmp_rec : SDNode<"PPCISD::VCMP_rec", SDT_PPCvcmp, [SDNPOutGlue]>; @@ -1673,7 +1663,7 @@ let isBranch = 1, isTerminator = 1, Size = 0 in { // System call. let PPC970_Unit = 7 in { def SC : SCForm<17, 1, 0, (outs), (ins i32imm:$LEV), - "sc $LEV", IIC_BrB, [(PPCsc (i32 imm:$LEV))]>; + "sc $LEV", IIC_BrB, []>; } // We mark SCV as having no scheduling model since it is only meant to be used @@ -1685,21 +1675,14 @@ let Predicates = [IsISA3_0], hasNoSchedulingInfo = 1 in { } // Branch history rolling buffer. -def CLRBHRB : XForm_0<31, 430, (outs), (ins), "clrbhrb", IIC_BrB, - [(PPCclrbhrb)]>, +def CLRBHRB : XForm_0<31, 430, (outs), (ins), "clrbhrb", IIC_BrB, []>, PPC970_DGroup_Single; -// The $dmy argument used for MFBHRBE is not needed; however, including -// it avoids automatic generation of PPCFastISel::fastEmit_i(), which -// interferes with necessary special handling (see PPCFastISel.cpp). -def MFBHRBE : XFXForm_3p<31, 302, (outs gprc:$RT), - (ins u10imm:$imm, u10imm:$dmy), - "mfbhrbe $RT, $imm", IIC_BrB, - [(set i32:$RT, - (PPCmfbhrbe imm:$imm, imm:$dmy))]>, + +def MFBHRBE : XFXForm_3p<31, 302, (outs gprc:$RT), (ins u10imm:$imm), + "mfbhrbe $RT, $imm", IIC_BrB, []>, PPC970_DGroup_First; -def RFEBB : XLForm_S<19, 146, (outs), (ins u1imm:$S), "rfebb $S", - IIC_BrB, [(PPCrfebb (i32 imm:$S))]>, +def RFEBB : XLForm_S<19, 146, (outs), (ins u1imm:$S), "rfebb $S", IIC_BrB, []>, PPC970_DGroup_Single; def : InstAlias<"rfebb", (RFEBB 1)>; From 1893caa9bc9782eaef95d747658e070b132468a5 Mon Sep 17 00:00:00 2001 From: Fangrui Song Date: Sat, 16 Aug 2025 10:43:05 -0700 Subject: [PATCH 070/214] MCSymbol: Decrease the bitfield size of SymbolContents Follow-up to 57b0843f68f5f349c73d1bf54e321a1a6d1800bf The size of MCSymbol has been reduced to 24 bytes on 64-bit systems. --- llvm/include/llvm/MC/MCSymbol.h | 16 ++++++++-------- llvm/lib/MC/MCSymbol.cpp | 4 ++++ 2 files changed, 12 insertions(+), 8 deletions(-) diff --git a/llvm/include/llvm/MC/MCSymbol.h b/llvm/include/llvm/MC/MCSymbol.h index ddc560ec5250..ce160bd2c3cc 100644 --- a/llvm/include/llvm/MC/MCSymbol.h +++ b/llvm/include/llvm/MC/MCSymbol.h @@ -65,6 +65,10 @@ protected: /// relative to, if any. mutable MCFragment *Fragment = nullptr; + /// This is actually a Contents enumerator, but is unsigned to avoid sign + /// extension and achieve better bitpacking with MSVC. + unsigned SymbolContents : 2; + /// True if this symbol is named. A named symbol will have a pointer to the /// name allocated in the bytes immediately prior to the MCSymbol. unsigned HasName : 1; @@ -95,10 +99,6 @@ protected: /// Used to detect cyclic dependency like `a = a + 1` and `a = b; b = a`. unsigned IsResolving : 1; - /// This is actually a Contents enumerator, but is unsigned to avoid sign - /// extension and achieve better bitpacking with MSVC. - unsigned SymbolContents : 3; - /// The alignment of the symbol if it is 'common'. /// /// Internally, this is stored as log2(align) + 1. @@ -145,10 +145,10 @@ protected: }; MCSymbol(const MCSymbolTableEntry *Name, bool isTemporary) - : IsTemporary(isTemporary), IsRedefinable(false), IsRegistered(false), - IsExternal(false), IsPrivateExtern(false), IsWeakExternal(false), - IsUsedInReloc(false), IsResolving(0), SymbolContents(SymContentsUnset), - CommonAlignLog2(0), Flags(0) { + : SymbolContents(SymContentsUnset), IsTemporary(isTemporary), + IsRedefinable(false), IsRegistered(false), IsExternal(false), + IsPrivateExtern(false), IsWeakExternal(false), IsUsedInReloc(false), + IsResolving(0), CommonAlignLog2(0), Flags(0) { Offset = 0; HasName = !!Name; if (Name) diff --git a/llvm/lib/MC/MCSymbol.cpp b/llvm/lib/MC/MCSymbol.cpp index 8192896eeb6b..b19842aae46c 100644 --- a/llvm/lib/MC/MCSymbol.cpp +++ b/llvm/lib/MC/MCSymbol.cpp @@ -20,6 +20,10 @@ using namespace llvm; +// There are numerous MCSymbol objects, so keeping sizeof(MCSymbol) small is +// crucial for minimizing peak memory usage. +static_assert(sizeof(MCSymbol) <= 24, "Keep the base symbol small"); + // Only the address of this fragment is ever actually used. static MCFragment SentinelFragment; From 351d398a37d8545d8722ba36ad364067e94a69df Mon Sep 17 00:00:00 2001 From: Florian Hahn Date: Sat, 16 Aug 2025 18:54:27 +0100 Subject: [PATCH 071/214] [VPlan] Run final VPlan simplifications before codegen. Dissolving the hierarchical VPlan CFG and converting abstract to concrete recipes can expose additional simplification opportunities. Do a final run of simplifyRecipes before executing the VPlan. --- .../Transforms/Vectorize/LoopVectorize.cpp | 1 + .../Transforms/Vectorize/VPlanVerifier.cpp | 1 + .../LoopVectorize/AArch64/call-costs.ll | 5 +- .../AArch64/clamped-trip-count.ll | 6 +- .../AArch64/divs-with-scalable-vfs.ll | 6 +- .../AArch64/drop-poison-generating-flags.ll | 5 +- .../AArch64/force-target-instruction-cost.ll | 11 +-- .../LoopVectorize/AArch64/induction-costs.ll | 9 +- .../AArch64/interleave-with-gaps.ll | 6 +- .../AArch64/invariant-replicate-region.ll | 5 +- .../AArch64/low_trip_count_predicates.ll | 8 +- .../AArch64/mul-simplification.ll | 8 +- .../LoopVectorize/AArch64/optsize_minsize.ll | 54 +++++------ .../AArch64/outer_loop_prefer_scalable.ll | 33 ++++--- .../AArch64/scalable-avoid-scalarization.ll | 3 +- .../AArch64/simple_early_exit.ll | 9 +- .../LoopVectorize/AArch64/sve-widen-gep.ll | 12 +-- .../AArch64/synthesize-mask-for-call.ll | 9 +- ...eave-to-widen-memory-remove-loop-region.ll | 15 ++- ...arrow-interleave-to-widen-memory-unroll.ll | 10 +- .../AArch64/type-shrinkage-insertelt.ll | 6 +- ...ctor-loop-backedge-elimination-epilogue.ll | 7 +- .../LoopVectorize/AArch64/vplan-printing.ll | 9 +- .../LoopVectorize/ARM/optsize_minsize.ll | 45 ++++----- .../LoopVectorize/LoongArch/defaults.ll | 3 +- .../Transforms/LoopVectorize/RISCV/divrem.ll | 27 ++---- .../RISCV/evl-compatible-loops.ll | 3 +- .../RISCV/interleaved-accesses.ll | 33 +++---- .../RISCV/interleaved-masked-access.ll | 8 +- .../LoopVectorize/RISCV/mask-index-type.ll | 3 +- ...ruction-or-drop-poison-generating-flags.ll | 3 +- .../Transforms/LoopVectorize/RISCV/pr88802.ll | 7 +- .../LoopVectorize/RISCV/safe-dep-distance.ll | 3 +- .../LoopVectorize/RISCV/strided-accesses.ll | 6 +- .../RISCV/tail-folding-cast-intrinsics.ll | 6 +- .../RISCV/tail-folding-cond-reduction.ll | 28 ++---- .../tail-folding-fixed-order-recurrence.ll | 6 +- .../RISCV/tail-folding-gather-scatter.ll | 6 +- .../RISCV/tail-folding-interleave.ll | 24 ++--- .../RISCV/tail-folding-safe-dep-distance.ll | 12 +-- .../LoopVectorize/RISCV/uniform-load-store.ll | 18 ++-- ...demanding-all-lanes-and-first-lane-only.ll | 5 +- .../LoopVectorize/X86/constant-fold.ll | 3 +- .../X86/cost-constant-known-via-scev.ll | 8 +- .../LoopVectorize/X86/cost-model.ll | 12 +-- .../X86/imprecise-through-phis.ll | 12 +-- .../LoopVectorize/X86/induction-costs.ll | 6 +- .../LoopVectorize/X86/interleave-cost.ll | 15 ++- .../LoopVectorize/X86/load-deref-pred.ll | 72 +++++--------- .../LoopVectorize/X86/metadata-enable.ll | 6 +- .../Transforms/LoopVectorize/X86/optsize.ll | 6 +- .../X86/pr141968-instsimplifyfolder.ll | 5 +- .../Transforms/LoopVectorize/X86/pr34438.ll | 3 +- ...6-sunk-instruction-used-outside-of-loop.ll | 5 +- .../LoopVectorize/X86/reduction-fastmath.ll | 18 ++-- .../X86/replicate-uniform-call.ll | 5 +- .../LoopVectorize/X86/strided_load_cost.ll | 6 +- .../LoopVectorize/X86/uniform_mem_op.ll | 6 +- .../X86/vect.omp.force.small-tc.ll | 3 +- ...ned-value-used-as-scalar-and-first-lane.ll | 6 +- .../LoopVectorize/X86/x86-predication.ll | 6 +- .../constantfolder-infer-correct-gepty.ll | 5 +- .../LoopVectorize/constantfolder.ll | 35 +++---- .../LoopVectorize/create-induction-resume.ll | 6 +- .../LoopVectorize/dbg-outer-loop-vect.ll | 2 +- .../debugloc-optimize-vfuf-term.ll | 5 +- ...able-info-from-assumption-constant-size.ll | 80 +++++++--------- .../dont-fold-tail-for-const-TC.ll | 3 +- ...irst-order-recurrence-dead-instructions.ll | 8 +- .../first-order-recurrence-interleave-only.ll | 11 +-- ...t-order-recurrence-multiply-recurrences.ll | 9 +- .../LoopVectorize/first-order-recurrence.ll | 36 +++---- .../float-minmax-instruction-flag.ll | 6 +- .../LoopVectorize/if-pred-stores.ll | 15 +-- ...ction-multiple-uses-in-same-instruction.ll | 3 +- .../LoopVectorize/induction-step.ll | 5 +- .../Transforms/LoopVectorize/induction.ll | 54 ++++------- .../instruction-only-used-outside-of-loop.ll | 12 +-- .../interleave-with-i65-induction.ll | 11 +-- ...aved-accesses-different-insert-position.ll | 8 +- .../interleaved-accesses-metadata.ll | 5 +- .../Transforms/LoopVectorize/is_fpclass.ll | 3 +- .../LoopVectorize/iv-select-cmp-decreasing.ll | 80 ++++++---------- .../LoopVectorize/iv-select-cmp-trunc.ll | 72 ++++++-------- .../LoopVectorize/iv_outside_user.ll | 75 ++++++--------- .../LoopVectorize/load-deref-pred-align.ll | 27 ++---- .../LoopVectorize/load-deref-pred-neg-off.ll | 6 +- .../load-of-struct-deref-pred.ll | 6 +- .../LoopVectorize/make-followup-loop-id.ll | 5 +- .../test/Transforms/LoopVectorize/metadata.ll | 40 ++++---- .../minimumnum-maximumnum-reductions.ll | 32 +++---- .../LoopVectorize/noalias-scope-decl.ll | 5 +- llvm/test/Transforms/LoopVectorize/optsize.ll | 25 ++--- .../LoopVectorize/outer_loop_scalable.ll | 3 +- .../LoopVectorize/pr36983-multiple-lcssa.ll | 8 +- .../LoopVectorize/pr44488-predication.ll | 3 +- .../pr55167-fold-tail-live-out.ll | 9 +- llvm/test/Transforms/LoopVectorize/pr66616.ll | 3 +- .../LoopVectorize/predicate-switch.ll | 30 +++--- .../preserve-dbg-loc-and-loop-metadata.ll | 10 +- .../LoopVectorize/remarks-reduction-inloop.ll | 8 +- .../LoopVectorize/reverse_induction.ll | 38 +++----- .../scalable-first-order-recurrence.ll | 3 +- .../scev-exit-phi-invalidation.ll | 7 +- .../LoopVectorize/select-neg-cond.ll | 5 +- ...tion-start-value-may-be-undef-or-poison.ll | 18 ++-- .../single-early-exit-deref-assumptions.ll | 3 +- .../single-early-exit-interleave-hint.ll | 5 +- .../single-early-exit-interleave.ll | 21 ++--- .../LoopVectorize/single-value-blend-phis.ll | 21 ++--- .../LoopVectorize/single_early_exit.ll | 9 +- .../single_early_exit_live_outs.ll | 93 +++++++------------ .../strided-accesses-interleave-only.ll | 3 +- .../LoopVectorize/trunc-extended-icmps.ll | 3 +- .../LoopVectorize/trunc-loads-p16.ll | 3 +- .../Transforms/LoopVectorize/trunc-shifts.ll | 18 ++-- .../LoopVectorize/uitofp-preserve-nneg.ll | 3 +- .../Transforms/LoopVectorize/uniform-blend.ll | 15 ++- .../unused-blend-mask-for-first-operand.ll | 9 +- ...or-loop-backedge-elimination-early-exit.ll | 30 +++--- ...p-backedge-elimination-outside-iv-users.ll | 26 ++---- .../LoopVectorize/vplan-iv-transforms.ll | 3 +- .../widen-gep-all-indices-invariant.ll | 9 +- .../LoopVectorize/widen-intrinsic.ll | 5 +- 124 files changed, 659 insertions(+), 1147 deletions(-) diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp index a179775e1a04..675a230bd2c9 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -7292,6 +7292,7 @@ DenseMap LoopVectorizationPlanner::executePlan( BestVPlan, VectorPH, CM.foldTailByMasking(), CM.requiresScalarEpilogue(BestVF.isVector())); VPlanTransforms::materializeVFAndVFxUF(BestVPlan, VectorPH, BestVF); + VPlanTransforms::simplifyRecipes(BestVPlan); // Perform the actual loop transformation. VPTransformState State(&TTI, BestVF, LI, DT, ILV.AC, ILV.Builder, &BestVPlan, diff --git a/llvm/lib/Transforms/Vectorize/VPlanVerifier.cpp b/llvm/lib/Transforms/Vectorize/VPlanVerifier.cpp index ef9ea73ba994..e25ffe135418 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanVerifier.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanVerifier.cpp @@ -183,6 +183,7 @@ bool VPlanVerifier::verifyEVLRecipe(const VPInstruction &EVL) const { case Instruction::ZExt: case Instruction::Mul: case Instruction::FMul: + case VPInstruction::Broadcast: // Opcodes above can only use EVL after wide inductions have been // expanded. if (!VerifyLate) { diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/call-costs.ll b/llvm/test/Transforms/LoopVectorize/AArch64/call-costs.ll index b7706da36428..f099c22333c3 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/call-costs.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/call-costs.ll @@ -72,7 +72,7 @@ exit: define void @powi_call(ptr %P) { ; CHECK-LABEL: define void @powi_call( ; CHECK-SAME: ptr [[P:%.*]]) { -; CHECK-NEXT: [[ENTRY:.*]]: +; CHECK-NEXT: [[ENTRY:.*:]] ; CHECK-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] ; CHECK: [[VECTOR_PH]]: ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] @@ -84,10 +84,9 @@ define void @powi_call(ptr %P) { ; CHECK: [[MIDDLE_BLOCK]]: ; CHECK-NEXT: br label %[[EXIT:.*]] ; CHECK: [[SCALAR_PH]]: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, %[[ENTRY]] ] ; CHECK-NEXT: br label %[[LOOP:.*]] ; CHECK: [[LOOP]]: -; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] ; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds double, ptr [[P]], i64 [[IV]] ; CHECK-NEXT: [[L:%.*]] = load double, ptr [[GEP]], align 8 ; CHECK-NEXT: [[POWI:%.*]] = tail call double @llvm.powi.f64.i32(double [[L]], i32 3) diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/clamped-trip-count.ll b/llvm/test/Transforms/LoopVectorize/AArch64/clamped-trip-count.ll index 511622d28d64..626242667e20 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/clamped-trip-count.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/clamped-trip-count.ll @@ -15,8 +15,7 @@ define void @clamped_tc_8(ptr nocapture %dst, i32 %n, i64 %val) vscale_range(1,1 ; CHECK-NEXT: [[TMP8:%.*]] = call @llvm.stepvector.nxv8i64() ; CHECK-NEXT: [[TMP7:%.*]] = mul [[TMP8]], splat (i64 1) ; CHECK-NEXT: [[INDUCTION:%.*]] = add zeroinitializer, [[TMP7]] -; CHECK-NEXT: [[TMP12:%.*]] = mul i64 1, [[TMP1]] -; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i64 [[TMP12]], i64 0 +; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i64 [[TMP1]], i64 0 ; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: @@ -90,8 +89,7 @@ define void @clamped_tc_max_8(ptr nocapture %dst, i32 %n, i64 %val) vscale_range ; CHECK-NEXT: [[TMP8:%.*]] = call @llvm.stepvector.nxv8i64() ; CHECK-NEXT: [[TMP7:%.*]] = mul [[TMP8]], splat (i64 1) ; CHECK-NEXT: [[INDUCTION:%.*]] = add zeroinitializer, [[TMP7]] -; CHECK-NEXT: [[TMP12:%.*]] = mul i64 1, [[TMP1]] -; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i64 [[TMP12]], i64 0 +; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i64 [[TMP1]], i64 0 ; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/divs-with-scalable-vfs.ll b/llvm/test/Transforms/LoopVectorize/AArch64/divs-with-scalable-vfs.ll index 1819d7f56153..bfebbdad5af0 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/divs-with-scalable-vfs.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/divs-with-scalable-vfs.ll @@ -119,8 +119,7 @@ define void @sdiv_feeding_gep_predicated(ptr %dst, i32 %x, i64 %M, i64 %conv6, i ; CHECK-NEXT: [[TMP15:%.*]] = call @llvm.stepvector.nxv2i64() ; CHECK-NEXT: [[TMP17:%.*]] = mul [[TMP15]], splat (i64 1) ; CHECK-NEXT: [[INDUCTION:%.*]] = add zeroinitializer, [[TMP17]] -; CHECK-NEXT: [[TMP20:%.*]] = mul i64 1, [[TMP6]] -; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i64 [[TMP20]], i64 0 +; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i64 [[TMP6]], i64 0 ; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] ; CHECK: [[VECTOR_BODY]]: @@ -235,8 +234,7 @@ define void @udiv_urem_feeding_gep(i64 %x, ptr %dst, i64 %N) { ; CHECK-NEXT: [[TMP15:%.*]] = call @llvm.stepvector.nxv2i64() ; CHECK-NEXT: [[TMP17:%.*]] = mul [[TMP15]], splat (i64 1) ; CHECK-NEXT: [[INDUCTION:%.*]] = add zeroinitializer, [[TMP17]] -; CHECK-NEXT: [[TMP20:%.*]] = mul i64 1, [[TMP6]] -; CHECK-NEXT: [[BROADCAST_SPLATINSERT3:%.*]] = insertelement poison, i64 [[TMP20]], i64 0 +; CHECK-NEXT: [[BROADCAST_SPLATINSERT3:%.*]] = insertelement poison, i64 [[TMP6]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT4:%.*]] = shufflevector [[BROADCAST_SPLATINSERT3]], poison, zeroinitializer ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] ; CHECK: [[VECTOR_BODY]]: diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/drop-poison-generating-flags.ll b/llvm/test/Transforms/LoopVectorize/AArch64/drop-poison-generating-flags.ll index 221d944e1bc2..0cb46e18c536 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/drop-poison-generating-flags.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/drop-poison-generating-flags.ll @@ -8,7 +8,7 @@ target triple = "aarch64-unknown-linux" define void @check_widen_intrinsic_with_nnan(ptr noalias %dst.0, ptr noalias %dst.1, ptr noalias %src.1, ptr %src.2) { ; CHECK-LABEL: define void @check_widen_intrinsic_with_nnan( ; CHECK-SAME: ptr noalias [[DST_0:%.*]], ptr noalias [[DST_1:%.*]], ptr noalias [[SRC_1:%.*]], ptr [[SRC_2:%.*]]) #[[ATTR0:[0-9]+]] { -; CHECK-NEXT: [[ENTRY:.*]]: +; CHECK-NEXT: [[ENTRY:.*:]] ; CHECK-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] ; CHECK: [[VECTOR_PH]]: ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] @@ -70,10 +70,9 @@ define void @check_widen_intrinsic_with_nnan(ptr noalias %dst.0, ptr noalias %ds ; CHECK: [[MIDDLE_BLOCK]]: ; CHECK-NEXT: br label %[[EXIT:.*]] ; CHECK: [[SCALAR_PH]]: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, %[[ENTRY]] ] ; CHECK-NEXT: br label %[[LOOP_HEADER:.*]] ; CHECK: [[LOOP_HEADER]]: -; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ] +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ] ; CHECK-NEXT: [[GEP_SRC_1:%.*]] = getelementptr inbounds double, ptr [[SRC_1]], i64 [[IV]] ; CHECK-NEXT: [[L_1:%.*]] = load double, ptr [[GEP_SRC_1]], align 8 ; CHECK-NEXT: [[ABS:%.*]] = tail call nnan double @llvm.fabs.f64(double [[L_1]]) diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/force-target-instruction-cost.ll b/llvm/test/Transforms/LoopVectorize/AArch64/force-target-instruction-cost.ll index 927d1b82bc48..ad184bec2ac7 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/force-target-instruction-cost.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/force-target-instruction-cost.ll @@ -6,7 +6,7 @@ target triple = "arm64-apple-macosx14.0.0" define double @test_reduction_costs() { ; CHECK-LABEL: define double @test_reduction_costs() { -; CHECK-NEXT: [[ENTRY:.*]]: +; CHECK-NEXT: [[ENTRY:.*:]] ; CHECK-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] ; CHECK: [[VECTOR_PH]]: ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] @@ -19,14 +19,11 @@ define double @test_reduction_costs() { ; CHECK: [[MIDDLE_BLOCK]]: ; CHECK-NEXT: br label %[[EXIT:.*]] ; CHECK: [[SCALAR_PH]]: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, %[[ENTRY]] ] -; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi double [ 0.000000e+00, %[[ENTRY]] ] -; CHECK-NEXT: [[BC_MERGE_RDX2:%.*]] = phi double [ 0.000000e+00, %[[ENTRY]] ] ; CHECK-NEXT: br label %[[LOOP_1:.*]] ; CHECK: [[LOOP_1]]: -; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP_1]] ] -; CHECK-NEXT: [[R_1:%.*]] = phi double [ [[BC_MERGE_RDX]], %[[SCALAR_PH]] ], [ [[R_1_NEXT:%.*]], %[[LOOP_1]] ] -; CHECK-NEXT: [[R_2:%.*]] = phi double [ [[BC_MERGE_RDX2]], %[[SCALAR_PH]] ], [ [[R_2_NEXT:%.*]], %[[LOOP_1]] ] +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP_1]] ] +; CHECK-NEXT: [[R_1:%.*]] = phi double [ 0.000000e+00, %[[SCALAR_PH]] ], [ [[R_1_NEXT:%.*]], %[[LOOP_1]] ] +; CHECK-NEXT: [[R_2:%.*]] = phi double [ 0.000000e+00, %[[SCALAR_PH]] ], [ [[R_2_NEXT:%.*]], %[[LOOP_1]] ] ; CHECK-NEXT: [[R_1_NEXT]] = fadd double [[R_1]], 3.000000e+00 ; CHECK-NEXT: [[R_2_NEXT]] = fadd double [[R_2]], 9.000000e+00 ; CHECK-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1 diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/induction-costs.ll b/llvm/test/Transforms/LoopVectorize/AArch64/induction-costs.ll index aa2ec2de14c2..fed57c919c6c 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/induction-costs.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/induction-costs.ll @@ -171,14 +171,11 @@ define i64 @int_and_pointer_iv(ptr %start, i32 %N) { ; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT_FOR_PHI:%.*]] = extractelement <4 x i64> [[TMP5]], i32 2 ; CHECK-NEXT: br label [[EXIT:%.*]] ; CHECK: scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ] -; CHECK-NEXT: [[BC_RESUME_VAL1:%.*]] = phi ptr [ [[START]], [[ENTRY]] ] -; CHECK-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i64 [ 0, [[ENTRY]] ] ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: -; CHECK-NEXT: [[IV:%.*]] = phi i32 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ] -; CHECK-NEXT: [[PTR_IV:%.*]] = phi ptr [ [[BC_RESUME_VAL1]], [[SCALAR_PH]] ], [ [[PTR_IV_NEXT:%.*]], [[LOOP]] ] -; CHECK-NEXT: [[SCALAR_RECUR:%.*]] = phi i64 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[RECUR_NEXT:%.*]], [[LOOP]] ] +; CHECK-NEXT: [[IV:%.*]] = phi i32 [ 0, [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ] +; CHECK-NEXT: [[PTR_IV:%.*]] = phi ptr [ [[START]], [[SCALAR_PH]] ], [ [[PTR_IV_NEXT:%.*]], [[LOOP]] ] +; CHECK-NEXT: [[SCALAR_RECUR:%.*]] = phi i64 [ 0, [[SCALAR_PH]] ], [ [[RECUR_NEXT:%.*]], [[LOOP]] ] ; CHECK-NEXT: [[L:%.*]] = load i32, ptr [[PTR_IV]], align 4 ; CHECK-NEXT: [[RECUR_NEXT]] = zext i32 [[L]] to i64 ; CHECK-NEXT: [[PTR_IV_NEXT]] = getelementptr i8, ptr [[PTR_IV]], i64 4 diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/interleave-with-gaps.ll b/llvm/test/Transforms/LoopVectorize/AArch64/interleave-with-gaps.ll index 8d86de521b41..c23695dc5dbe 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/interleave-with-gaps.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/interleave-with-gaps.ll @@ -65,8 +65,7 @@ define i64 @vector_loop_with_remaining_iterations(ptr %src, ptr noalias %dst, i3 ; CHECK-NEXT: [[BROADCAST_SPLAT4:%.*]] = shufflevector [[BROADCAST_SPLATINSERT3]], poison, zeroinitializer ; CHECK-NEXT: [[TMP26:%.*]] = mul [[TMP25]], splat (i64 1) ; CHECK-NEXT: [[INDUCTION:%.*]] = add [[BROADCAST_SPLAT4]], [[TMP26]] -; CHECK-NEXT: [[TMP27:%.*]] = mul i64 1, [[TMP17]] -; CHECK-NEXT: [[BROADCAST_SPLATINSERT5:%.*]] = insertelement poison, i64 [[TMP27]], i64 0 +; CHECK-NEXT: [[BROADCAST_SPLATINSERT5:%.*]] = insertelement poison, i64 [[TMP17]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT6:%.*]] = shufflevector [[BROADCAST_SPLATINSERT5]], poison, zeroinitializer ; CHECK-NEXT: br label %[[VEC_EPILOG_VECTOR_BODY:.*]] ; CHECK: [[VEC_EPILOG_VECTOR_BODY]]: @@ -201,8 +200,7 @@ define i64 @main_vector_loop_fixed_with_no_remaining_iterations(ptr %src, ptr no ; CHECK-NEXT: [[BROADCAST_SPLAT4:%.*]] = shufflevector [[BROADCAST_SPLATINSERT3]], poison, zeroinitializer ; CHECK-NEXT: [[TMP38:%.*]] = mul [[TMP25]], splat (i64 1) ; CHECK-NEXT: [[INDUCTION:%.*]] = add [[BROADCAST_SPLAT4]], [[TMP38]] -; CHECK-NEXT: [[TMP39:%.*]] = mul i64 1, [[TMP17]] -; CHECK-NEXT: [[BROADCAST_SPLATINSERT5:%.*]] = insertelement poison, i64 [[TMP39]], i64 0 +; CHECK-NEXT: [[BROADCAST_SPLATINSERT5:%.*]] = insertelement poison, i64 [[TMP17]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT6:%.*]] = shufflevector [[BROADCAST_SPLATINSERT5]], poison, zeroinitializer ; CHECK-NEXT: br label %[[VEC_EPILOG_VECTOR_BODY:.*]] ; CHECK: [[VEC_EPILOG_VECTOR_BODY]]: diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/invariant-replicate-region.ll b/llvm/test/Transforms/LoopVectorize/AArch64/invariant-replicate-region.ll index d45dbcc8b166..0327334e220f 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/invariant-replicate-region.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/invariant-replicate-region.ll @@ -8,7 +8,7 @@ target triple = "arm64-apple-macosx14.0.0" define i32 @test_invariant_replicate_region(i32 %x, i1 %c) { ; CHECK-LABEL: define i32 @test_invariant_replicate_region( ; CHECK-SAME: i32 [[X:%.*]], i1 [[C:%.*]]) { -; CHECK-NEXT: [[ENTRY:.*]]: +; CHECK-NEXT: [[ENTRY:.*:]] ; CHECK-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] ; CHECK: [[VECTOR_PH]]: ; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i1> poison, i1 [[C]], i64 0 @@ -52,10 +52,9 @@ define i32 @test_invariant_replicate_region(i32 %x, i1 %c) { ; CHECK-NEXT: [[TMP17:%.*]] = extractelement <4 x i32> [[PREDPHI]], i32 3 ; CHECK-NEXT: br label %[[EXIT:.*]] ; CHECK: [[SCALAR_PH]]: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ 0, %[[ENTRY]] ] ; CHECK-NEXT: br label %[[LOOP_HEADER:.*]] ; CHECK: [[LOOP_HEADER]]: -; CHECK-NEXT: [[IV:%.*]] = phi i32 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ] +; CHECK-NEXT: [[IV:%.*]] = phi i32 [ 0, %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ] ; CHECK-NEXT: br i1 [[C]], label %[[THEN:.*]], label %[[LOOP_LATCH]] ; CHECK: [[THEN]]: ; CHECK-NEXT: [[REM_1:%.*]] = urem i32 10, [[X]] diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/low_trip_count_predicates.ll b/llvm/test/Transforms/LoopVectorize/AArch64/low_trip_count_predicates.ll index 110685e377dd..1f486fba069c 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/low_trip_count_predicates.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/low_trip_count_predicates.ll @@ -474,7 +474,7 @@ while.end: define i32 @tc4(ptr noundef readonly captures(none) %tmp) vscale_range(1,16) { ; CHECK-LABEL: define i32 @tc4( ; CHECK-SAME: ptr noundef readonly captures(none) [[TMP:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[ENTRY:.*]]: +; CHECK-NEXT: [[ENTRY:.*:]] ; CHECK-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] ; CHECK: [[VECTOR_PH]]: ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] @@ -490,12 +490,10 @@ define i32 @tc4(ptr noundef readonly captures(none) %tmp) vscale_range(1,16) { ; CHECK-NEXT: [[TMP4:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP3]]) ; CHECK-NEXT: br label %[[EXIT:.*]] ; CHECK: [[SCALAR_PH]]: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, %[[ENTRY]] ] -; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ 0, %[[ENTRY]] ] ; CHECK-NEXT: br label %[[FOR_BODY:.*]] ; CHECK: [[FOR_BODY]]: -; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], %[[FOR_BODY]] ] -; CHECK-NEXT: [[SUM_0179:%.*]] = phi i32 [ [[BC_MERGE_RDX]], %[[SCALAR_PH]] ], [ [[ADD:%.*]], %[[FOR_BODY]] ] +; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 0, %[[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], %[[FOR_BODY]] ] +; CHECK-NEXT: [[SUM_0179:%.*]] = phi i32 [ 0, %[[SCALAR_PH]] ], [ [[ADD:%.*]], %[[FOR_BODY]] ] ; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds nuw i32, ptr [[TMP]], i64 [[INDVARS_IV]] ; CHECK-NEXT: [[TMP5:%.*]] = load i32, ptr [[ARRAYIDX2]], align 4 ; CHECK-NEXT: [[ADD]] = add i32 [[SUM_0179]], [[TMP5]] diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/mul-simplification.ll b/llvm/test/Transforms/LoopVectorize/AArch64/mul-simplification.ll index 1159a641f5ce..2ca117c33dbb 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/mul-simplification.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/mul-simplification.ll @@ -45,7 +45,7 @@ exit: define i32 @add_reduction_select_operand_constant_but_non_uniform() { ; CHECK-LABEL: define i32 @add_reduction_select_operand_constant_but_non_uniform() { -; CHECK-NEXT: [[ENTRY:.*]]: +; CHECK-NEXT: [[ENTRY:.*:]] ; CHECK-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] ; CHECK: [[VECTOR_PH]]: ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] @@ -63,12 +63,10 @@ define i32 @add_reduction_select_operand_constant_but_non_uniform() { ; CHECK-NEXT: [[TMP3:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[BIN_RDX]]) ; CHECK-NEXT: br label %[[EXIT:.*]] ; CHECK: [[SCALAR_PH]]: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ 0, %[[ENTRY]] ] -; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ 42, %[[ENTRY]] ] ; CHECK-NEXT: br label %[[LOOP:.*]] ; CHECK: [[LOOP]]: -; CHECK-NEXT: [[IV:%.*]] = phi i32 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[ADD2_REASS:%.*]], %[[LOOP]] ] -; CHECK-NEXT: [[RDX:%.*]] = phi i32 [ [[BC_MERGE_RDX]], %[[SCALAR_PH]] ], [ [[RDX_NEXT:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[IV:%.*]] = phi i32 [ 0, %[[SCALAR_PH]] ], [ [[ADD2_REASS:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[RDX:%.*]] = phi i32 [ 42, %[[SCALAR_PH]] ], [ [[RDX_NEXT:%.*]], %[[LOOP]] ] ; CHECK-NEXT: [[ADD2_REASS]] = add i32 [[IV]], 1 ; CHECK-NEXT: [[RDX_NEXT]] = add i32 0, [[RDX]] ; CHECK-NEXT: [[CMP:%.*]] = icmp ult i32 [[ADD2_REASS]], 64 diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/optsize_minsize.ll b/llvm/test/Transforms/LoopVectorize/AArch64/optsize_minsize.ll index 410abfbc2f2b..67e6902b5d32 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/optsize_minsize.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/optsize_minsize.ll @@ -17,7 +17,7 @@ target triple = "aarch64-unknown-linux-gnu" define void @always_vectorize(ptr %p, i32 %x) { ; DEFAULT-LABEL: define void @always_vectorize( ; DEFAULT-SAME: ptr [[P:%.*]], i32 [[X:%.*]]) { -; DEFAULT-NEXT: [[ENTRY:.*]]: +; DEFAULT-NEXT: [[ENTRY:.*:]] ; DEFAULT-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] ; DEFAULT: [[VECTOR_PH]]: ; DEFAULT-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[X]], i64 0 @@ -31,10 +31,9 @@ define void @always_vectorize(ptr %p, i32 %x) { ; DEFAULT: [[MIDDLE_BLOCK]]: ; DEFAULT-NEXT: br label %[[FOR_COND_CLEANUP:.*]] ; DEFAULT: [[SCALAR_PH]]: -; DEFAULT-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, %[[ENTRY]] ] ; DEFAULT-NEXT: br label %[[FOR_BODY:.*]] ; DEFAULT: [[FOR_BODY]]: -; DEFAULT-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], %[[FOR_BODY]] ] +; DEFAULT-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 0, %[[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], %[[FOR_BODY]] ] ; DEFAULT-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[P]], i64 [[INDVARS_IV]] ; DEFAULT-NEXT: [[TMP4:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 ; DEFAULT-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP4]], [[X]] @@ -47,7 +46,7 @@ define void @always_vectorize(ptr %p, i32 %x) { ; ; OPTSIZE-LABEL: define void @always_vectorize( ; OPTSIZE-SAME: ptr [[P:%.*]], i32 [[X:%.*]]) #[[ATTR0:[0-9]+]] { -; OPTSIZE-NEXT: [[ENTRY:.*]]: +; OPTSIZE-NEXT: [[ENTRY:.*:]] ; OPTSIZE-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] ; OPTSIZE: [[VECTOR_PH]]: ; OPTSIZE-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[X]], i64 0 @@ -61,10 +60,9 @@ define void @always_vectorize(ptr %p, i32 %x) { ; OPTSIZE: [[MIDDLE_BLOCK]]: ; OPTSIZE-NEXT: br label %[[FOR_COND_CLEANUP:.*]] ; OPTSIZE: [[SCALAR_PH]]: -; OPTSIZE-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, %[[ENTRY]] ] ; OPTSIZE-NEXT: br label %[[FOR_BODY:.*]] ; OPTSIZE: [[FOR_BODY]]: -; OPTSIZE-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], %[[FOR_BODY]] ] +; OPTSIZE-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 0, %[[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], %[[FOR_BODY]] ] ; OPTSIZE-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[P]], i64 [[INDVARS_IV]] ; OPTSIZE-NEXT: [[TMP4:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 ; OPTSIZE-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP4]], [[X]] @@ -77,7 +75,7 @@ define void @always_vectorize(ptr %p, i32 %x) { ; ; MINSIZE-LABEL: define void @always_vectorize( ; MINSIZE-SAME: ptr [[P:%.*]], i32 [[X:%.*]]) #[[ATTR0:[0-9]+]] { -; MINSIZE-NEXT: [[ENTRY:.*]]: +; MINSIZE-NEXT: [[ENTRY:.*:]] ; MINSIZE-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] ; MINSIZE: [[VECTOR_PH]]: ; MINSIZE-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[X]], i64 0 @@ -91,10 +89,9 @@ define void @always_vectorize(ptr %p, i32 %x) { ; MINSIZE: [[MIDDLE_BLOCK]]: ; MINSIZE-NEXT: br label %[[FOR_COND_CLEANUP:.*]] ; MINSIZE: [[SCALAR_PH]]: -; MINSIZE-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, %[[ENTRY]] ] ; MINSIZE-NEXT: br label %[[FOR_BODY:.*]] ; MINSIZE: [[FOR_BODY]]: -; MINSIZE-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], %[[FOR_BODY]] ] +; MINSIZE-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 0, %[[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], %[[FOR_BODY]] ] ; MINSIZE-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[P]], i64 [[INDVARS_IV]] ; MINSIZE-NEXT: [[TMP4:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 ; MINSIZE-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP4]], [[X]] @@ -510,8 +507,7 @@ define void @sve_tail_predicate_without_minsize(ptr %p, i8 %a, i8 %b, i8 %c, i32 ; DEFAULT-NEXT: [[TMP11:%.*]] = mul [[TMP10]], splat (i8 1) ; DEFAULT-NEXT: [[INDUCTION:%.*]] = add zeroinitializer, [[TMP11]] ; DEFAULT-NEXT: [[TMP12:%.*]] = trunc i64 [[TMP1]] to i8 -; DEFAULT-NEXT: [[TMP13:%.*]] = mul i8 1, [[TMP12]] -; DEFAULT-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i8 [[TMP13]], i64 0 +; DEFAULT-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i8 [[TMP12]], i64 0 ; DEFAULT-NEXT: [[DOTSPLAT:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer ; DEFAULT-NEXT: br label %[[VECTOR_BODY:.*]] ; DEFAULT: [[VECTOR_BODY]]: @@ -578,8 +574,7 @@ define void @sve_tail_predicate_without_minsize(ptr %p, i8 %a, i8 %b, i8 %c, i32 ; OPTSIZE-NEXT: [[TMP11:%.*]] = mul [[TMP10]], splat (i8 1) ; OPTSIZE-NEXT: [[INDUCTION:%.*]] = add zeroinitializer, [[TMP11]] ; OPTSIZE-NEXT: [[TMP12:%.*]] = trunc i64 [[TMP1]] to i8 -; OPTSIZE-NEXT: [[TMP13:%.*]] = mul i8 1, [[TMP12]] -; OPTSIZE-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i8 [[TMP13]], i64 0 +; OPTSIZE-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i8 [[TMP12]], i64 0 ; OPTSIZE-NEXT: [[DOTSPLAT:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer ; OPTSIZE-NEXT: br label %[[VECTOR_BODY:.*]] ; OPTSIZE: [[VECTOR_BODY]]: @@ -646,8 +641,7 @@ define void @sve_tail_predicate_without_minsize(ptr %p, i8 %a, i8 %b, i8 %c, i32 ; MINSIZE-NEXT: [[TMP11:%.*]] = mul [[TMP10]], splat (i8 1) ; MINSIZE-NEXT: [[INDUCTION:%.*]] = add zeroinitializer, [[TMP11]] ; MINSIZE-NEXT: [[TMP12:%.*]] = trunc i64 [[TMP1]] to i8 -; MINSIZE-NEXT: [[TMP13:%.*]] = mul i8 1, [[TMP12]] -; MINSIZE-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i8 [[TMP13]], i64 0 +; MINSIZE-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i8 [[TMP12]], i64 0 ; MINSIZE-NEXT: [[DOTSPLAT:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer ; MINSIZE-NEXT: br label %[[VECTOR_BODY:.*]] ; MINSIZE: [[VECTOR_BODY]]: @@ -721,7 +715,7 @@ for.cond.cleanup: ; FIXME: We currently vectorize with minsize as the trunc cost is incorrect define void @dont_vectorize_with_minsize() { ; DEFAULT-LABEL: define void @dont_vectorize_with_minsize() { -; DEFAULT-NEXT: [[ENTRY:.*]]: +; DEFAULT-NEXT: [[ENTRY:.*:]] ; DEFAULT-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] ; DEFAULT: [[VECTOR_PH]]: ; DEFAULT-NEXT: br label %[[VECTOR_BODY:.*]] @@ -753,10 +747,9 @@ define void @dont_vectorize_with_minsize() { ; DEFAULT: [[MIDDLE_BLOCK]]: ; DEFAULT-NEXT: br label %[[FOR_COND_CLEANUP:.*]] ; DEFAULT: [[SCALAR_PH]]: -; DEFAULT-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, %[[ENTRY]] ] ; DEFAULT-NEXT: br label %[[FOR_BODY:.*]] ; DEFAULT: [[FOR_BODY]]: -; DEFAULT-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], %[[FOR_BODY]] ] +; DEFAULT-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 0, %[[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], %[[FOR_BODY]] ] ; DEFAULT-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds nuw [1000 x i32], ptr @B, i64 0, i64 [[INDVARS_IV]] ; DEFAULT-NEXT: [[BVAL:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 ; DEFAULT-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds nuw [1000 x i32], ptr @C, i64 0, i64 [[INDVARS_IV]] @@ -775,7 +768,7 @@ define void @dont_vectorize_with_minsize() { ; ; OPTSIZE-LABEL: define void @dont_vectorize_with_minsize( ; OPTSIZE-SAME: ) #[[ATTR0]] { -; OPTSIZE-NEXT: [[ENTRY:.*]]: +; OPTSIZE-NEXT: [[ENTRY:.*:]] ; OPTSIZE-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] ; OPTSIZE: [[VECTOR_PH]]: ; OPTSIZE-NEXT: br label %[[VECTOR_BODY:.*]] @@ -797,10 +790,9 @@ define void @dont_vectorize_with_minsize() { ; OPTSIZE: [[MIDDLE_BLOCK]]: ; OPTSIZE-NEXT: br label %[[FOR_COND_CLEANUP:.*]] ; OPTSIZE: [[SCALAR_PH]]: -; OPTSIZE-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, %[[ENTRY]] ] ; OPTSIZE-NEXT: br label %[[FOR_BODY:.*]] ; OPTSIZE: [[FOR_BODY]]: -; OPTSIZE-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], %[[FOR_BODY]] ] +; OPTSIZE-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 0, %[[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], %[[FOR_BODY]] ] ; OPTSIZE-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds nuw [1000 x i32], ptr @B, i64 0, i64 [[INDVARS_IV]] ; OPTSIZE-NEXT: [[BVAL:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 ; OPTSIZE-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds nuw [1000 x i32], ptr @C, i64 0, i64 [[INDVARS_IV]] @@ -819,7 +811,7 @@ define void @dont_vectorize_with_minsize() { ; ; MINSIZE-LABEL: define void @dont_vectorize_with_minsize( ; MINSIZE-SAME: ) #[[ATTR0]] { -; MINSIZE-NEXT: [[ENTRY:.*]]: +; MINSIZE-NEXT: [[ENTRY:.*:]] ; MINSIZE-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] ; MINSIZE: [[VECTOR_PH]]: ; MINSIZE-NEXT: br label %[[VECTOR_BODY:.*]] @@ -841,10 +833,9 @@ define void @dont_vectorize_with_minsize() { ; MINSIZE: [[MIDDLE_BLOCK]]: ; MINSIZE-NEXT: br label %[[FOR_COND_CLEANUP:.*]] ; MINSIZE: [[SCALAR_PH]]: -; MINSIZE-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, %[[ENTRY]] ] ; MINSIZE-NEXT: br label %[[FOR_BODY:.*]] ; MINSIZE: [[FOR_BODY]]: -; MINSIZE-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], %[[FOR_BODY]] ] +; MINSIZE-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 0, %[[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], %[[FOR_BODY]] ] ; MINSIZE-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds nuw [1000 x i32], ptr @B, i64 0, i64 [[INDVARS_IV]] ; MINSIZE-NEXT: [[BVAL:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 ; MINSIZE-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds nuw [1000 x i32], ptr @C, i64 0, i64 [[INDVARS_IV]] @@ -891,7 +882,7 @@ for.cond.cleanup: ; FIXME: We currently use width 2 as the load/store cost is incorrect. define void @vectorization_forced_minsize_reduce_width() { ; DEFAULT-LABEL: define void @vectorization_forced_minsize_reduce_width() { -; DEFAULT-NEXT: [[ENTRY:.*]]: +; DEFAULT-NEXT: [[ENTRY:.*:]] ; DEFAULT-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] ; DEFAULT: [[VECTOR_PH]]: ; DEFAULT-NEXT: br label %[[VECTOR_BODY:.*]] @@ -923,10 +914,9 @@ define void @vectorization_forced_minsize_reduce_width() { ; DEFAULT: [[MIDDLE_BLOCK]]: ; DEFAULT-NEXT: br label %[[FOR_COND_CLEANUP:.*]] ; DEFAULT: [[SCALAR_PH]]: -; DEFAULT-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, %[[ENTRY]] ] ; DEFAULT-NEXT: br label %[[FOR_BODY:.*]] ; DEFAULT: [[FOR_BODY]]: -; DEFAULT-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], %[[FOR_BODY]] ] +; DEFAULT-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 0, %[[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], %[[FOR_BODY]] ] ; DEFAULT-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds nuw [1000 x i32], ptr @B, i64 0, i64 [[INDVARS_IV]] ; DEFAULT-NEXT: [[BVAL:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 ; DEFAULT-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds nuw [1000 x i32], ptr @C, i64 0, i64 [[INDVARS_IV]] @@ -945,7 +935,7 @@ define void @vectorization_forced_minsize_reduce_width() { ; ; OPTSIZE-LABEL: define void @vectorization_forced_minsize_reduce_width( ; OPTSIZE-SAME: ) #[[ATTR0]] { -; OPTSIZE-NEXT: [[ENTRY:.*]]: +; OPTSIZE-NEXT: [[ENTRY:.*:]] ; OPTSIZE-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] ; OPTSIZE: [[VECTOR_PH]]: ; OPTSIZE-NEXT: br label %[[VECTOR_BODY:.*]] @@ -967,10 +957,9 @@ define void @vectorization_forced_minsize_reduce_width() { ; OPTSIZE: [[MIDDLE_BLOCK]]: ; OPTSIZE-NEXT: br label %[[FOR_COND_CLEANUP:.*]] ; OPTSIZE: [[SCALAR_PH]]: -; OPTSIZE-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, %[[ENTRY]] ] ; OPTSIZE-NEXT: br label %[[FOR_BODY:.*]] ; OPTSIZE: [[FOR_BODY]]: -; OPTSIZE-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], %[[FOR_BODY]] ] +; OPTSIZE-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 0, %[[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], %[[FOR_BODY]] ] ; OPTSIZE-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds nuw [1000 x i32], ptr @B, i64 0, i64 [[INDVARS_IV]] ; OPTSIZE-NEXT: [[BVAL:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 ; OPTSIZE-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds nuw [1000 x i32], ptr @C, i64 0, i64 [[INDVARS_IV]] @@ -989,7 +978,7 @@ define void @vectorization_forced_minsize_reduce_width() { ; ; MINSIZE-LABEL: define void @vectorization_forced_minsize_reduce_width( ; MINSIZE-SAME: ) #[[ATTR0]] { -; MINSIZE-NEXT: [[ENTRY:.*]]: +; MINSIZE-NEXT: [[ENTRY:.*:]] ; MINSIZE-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] ; MINSIZE: [[VECTOR_PH]]: ; MINSIZE-NEXT: br label %[[VECTOR_BODY:.*]] @@ -1011,10 +1000,9 @@ define void @vectorization_forced_minsize_reduce_width() { ; MINSIZE: [[MIDDLE_BLOCK]]: ; MINSIZE-NEXT: br label %[[FOR_COND_CLEANUP:.*]] ; MINSIZE: [[SCALAR_PH]]: -; MINSIZE-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, %[[ENTRY]] ] ; MINSIZE-NEXT: br label %[[FOR_BODY:.*]] ; MINSIZE: [[FOR_BODY]]: -; MINSIZE-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], %[[FOR_BODY]] ] +; MINSIZE-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 0, %[[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], %[[FOR_BODY]] ] ; MINSIZE-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds nuw [1000 x i32], ptr @B, i64 0, i64 [[INDVARS_IV]] ; MINSIZE-NEXT: [[BVAL:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 ; MINSIZE-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds nuw [1000 x i32], ptr @C, i64 0, i64 [[INDVARS_IV]] diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/outer_loop_prefer_scalable.ll b/llvm/test/Transforms/LoopVectorize/AArch64/outer_loop_prefer_scalable.ll index 87a18ba2c18e..50df6fcd3cdc 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/outer_loop_prefer_scalable.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/outer_loop_prefer_scalable.ll @@ -21,33 +21,32 @@ define void @foo() { ; CHECK-NEXT: [[TMP4:%.*]] = call @llvm.stepvector.nxv4i64() ; CHECK-NEXT: [[TMP5:%.*]] = mul [[TMP4]], splat (i64 1) ; CHECK-NEXT: [[INDUCTION:%.*]] = add zeroinitializer, [[TMP5]] -; CHECK-NEXT: [[TMP6:%.*]] = mul i64 1, [[TMP3]] -; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i64 [[TMP6]], i64 0 +; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i64 [[TMP3]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_LATCH:%.*]] ] ; CHECK-NEXT: [[VEC_IND:%.*]] = phi [ [[INDUCTION]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_LATCH]] ] -; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds [1024 x float], ptr @A, i64 0, [[VEC_IND]] -; CHECK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call @llvm.masked.gather.nxv4f32.nxv4p0( [[TMP7]], i32 4, splat (i1 true), poison) +; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1024 x float], ptr @A, i64 0, [[VEC_IND]] +; CHECK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call @llvm.masked.gather.nxv4f32.nxv4p0( [[TMP6]], i32 4, splat (i1 true), poison) ; CHECK-NEXT: br label [[INNER_LOOP1:%.*]] ; CHECK: inner_loop1: -; CHECK-NEXT: [[TMP8:%.*]] = phi [ zeroinitializer, [[VECTOR_BODY]] ], [ [[TMP12:%.*]], [[INNER_LOOP1]] ] -; CHECK-NEXT: [[TMP9:%.*]] = phi [ [[WIDE_MASKED_GATHER]], [[VECTOR_BODY]] ], [ [[TMP11:%.*]], [[INNER_LOOP1]] ] -; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds [512 x float], ptr @B, i64 0, [[TMP8]] -; CHECK-NEXT: [[WIDE_MASKED_GATHER2:%.*]] = call @llvm.masked.gather.nxv4f32.nxv4p0( [[TMP10]], i32 4, splat (i1 true), poison) -; CHECK-NEXT: [[TMP11]] = fmul [[TMP9]], [[WIDE_MASKED_GATHER2]] -; CHECK-NEXT: [[TMP12]] = add nuw nsw [[TMP8]], splat (i64 1) -; CHECK-NEXT: [[TMP13:%.*]] = icmp eq [[TMP12]], splat (i64 512) -; CHECK-NEXT: [[TMP14:%.*]] = extractelement [[TMP13]], i32 0 -; CHECK-NEXT: br i1 [[TMP14]], label [[VECTOR_LATCH]], label [[INNER_LOOP1]] +; CHECK-NEXT: [[TMP7:%.*]] = phi [ zeroinitializer, [[VECTOR_BODY]] ], [ [[TMP11:%.*]], [[INNER_LOOP1]] ] +; CHECK-NEXT: [[TMP8:%.*]] = phi [ [[WIDE_MASKED_GATHER]], [[VECTOR_BODY]] ], [ [[TMP10:%.*]], [[INNER_LOOP1]] ] +; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds [512 x float], ptr @B, i64 0, [[TMP7]] +; CHECK-NEXT: [[WIDE_MASKED_GATHER2:%.*]] = call @llvm.masked.gather.nxv4f32.nxv4p0( [[TMP9]], i32 4, splat (i1 true), poison) +; CHECK-NEXT: [[TMP10]] = fmul [[TMP8]], [[WIDE_MASKED_GATHER2]] +; CHECK-NEXT: [[TMP11]] = add nuw nsw [[TMP7]], splat (i64 1) +; CHECK-NEXT: [[TMP12:%.*]] = icmp eq [[TMP11]], splat (i64 512) +; CHECK-NEXT: [[TMP13:%.*]] = extractelement [[TMP12]], i32 0 +; CHECK-NEXT: br i1 [[TMP13]], label [[VECTOR_LATCH]], label [[INNER_LOOP1]] ; CHECK: vector.latch: -; CHECK-NEXT: [[TMP15:%.*]] = phi [ [[TMP11]], [[INNER_LOOP1]] ] -; CHECK-NEXT: call void @llvm.masked.scatter.nxv4f32.nxv4p0( [[TMP15]], [[TMP7]], i32 4, splat (i1 true)) +; CHECK-NEXT: [[TMP14:%.*]] = phi [ [[TMP10]], [[INNER_LOOP1]] ] +; CHECK-NEXT: call void @llvm.masked.scatter.nxv4f32.nxv4p0( [[TMP14]], [[TMP6]], i32 4, splat (i1 true)) ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP3]] ; CHECK-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[BROADCAST_SPLAT]] -; CHECK-NEXT: [[TMP16:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-NEXT: br i1 [[TMP16]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] +; CHECK-NEXT: [[TMP15:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: br i1 [[TMP15]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 1024, [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]] diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/scalable-avoid-scalarization.ll b/llvm/test/Transforms/LoopVectorize/AArch64/scalable-avoid-scalarization.ll index 62971b5ea3f8..2521ece2eea0 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/scalable-avoid-scalarization.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/scalable-avoid-scalarization.ll @@ -29,8 +29,7 @@ define void @test_no_scalarization(ptr %a, ptr noalias %b, i32 %idx, i32 %n) #0 ; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer ; CHECK-NEXT: [[TMP10:%.*]] = mul [[TMP8]], splat (i32 1) ; CHECK-NEXT: [[INDUCTION:%.*]] = add [[DOTSPLAT]], [[TMP10]] -; CHECK-NEXT: [[TMP13:%.*]] = mul i32 1, [[TMP5]] -; CHECK-NEXT: [[DOTSPLATINSERT1:%.*]] = insertelement poison, i32 [[TMP13]], i64 0 +; CHECK-NEXT: [[DOTSPLATINSERT1:%.*]] = insertelement poison, i32 [[TMP5]], i64 0 ; CHECK-NEXT: [[DOTSPLAT2:%.*]] = shufflevector [[DOTSPLATINSERT1]], poison, zeroinitializer ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/simple_early_exit.ll b/llvm/test/Transforms/LoopVectorize/AArch64/simple_early_exit.ll index 1ff59bd3a4c1..b7016ff4abf8 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/simple_early_exit.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/simple_early_exit.ll @@ -128,10 +128,9 @@ define i64 @same_exit_block_pre_inc_use4() { ; CHECK-NEXT: [[EARLY_EXIT_VALUE:%.*]] = add i64 3, [[TMP8]] ; CHECK-NEXT: br label [[LOOP_END]] ; CHECK: scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 3, [[ENTRY:%.*]] ] ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: -; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ [[INDEX_NEXT:%.*]], [[LOOP_INC:%.*]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ [[INDEX_NEXT:%.*]], [[LOOP_INC:%.*]] ], [ 3, [[SCALAR_PH]] ] ; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[P1]], i64 [[INDEX]] ; CHECK-NEXT: [[LD1:%.*]] = load i64, ptr [[ARRAYIDX]], align 1 ; CHECK-NEXT: [[CMP3:%.*]] = icmp ult i64 [[INDEX]], [[LD1]] @@ -202,10 +201,9 @@ define i64 @loop_contains_safe_call() #1 { ; CHECK-NEXT: [[EARLY_EXIT_VALUE:%.*]] = add i64 3, [[TMP9]] ; CHECK-NEXT: br label [[LOOP_END]] ; CHECK: scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 3, [[ENTRY:%.*]] ] ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: -; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ [[INDEX_NEXT:%.*]], [[LOOP_INC:%.*]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ [[INDEX_NEXT:%.*]], [[LOOP_INC:%.*]] ], [ 3, [[SCALAR_PH]] ] ; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[P1]], i64 [[INDEX]] ; CHECK-NEXT: [[LD1:%.*]] = load float, ptr [[ARRAYIDX]], align 1 ; CHECK-NEXT: [[SQRT:%.*]] = tail call fast float @llvm.sqrt.f32(float [[LD1]]) @@ -367,10 +365,9 @@ define i64 @loop_contains_load_after_early_exit(ptr dereferenceable(1024) align( ; CHECK-NEXT: [[EARLY_EXIT_VALUE:%.*]] = add i64 3, [[TMP11]] ; CHECK-NEXT: br label [[LOOP_END]] ; CHECK: scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 3, [[ENTRY:%.*]] ] ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: -; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ [[INDEX_NEXT:%.*]], [[LOOP_INC:%.*]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ [[INDEX_NEXT:%.*]], [[LOOP_INC:%.*]] ], [ 3, [[SCALAR_PH]] ] ; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[P1]], i64 [[INDEX]] ; CHECK-NEXT: [[LD1:%.*]] = load i32, ptr [[ARRAYIDX]], align 1 ; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[LD1]], 1 diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-widen-gep.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-widen-gep.ll index 1dd8dd531e17..ef111caafbf0 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-widen-gep.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-widen-gep.ll @@ -34,8 +34,7 @@ define void @pointer_induction_used_as_vector(ptr noalias %start.1, ptr noalias ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[POINTER_PHI:%.*]] = phi ptr [ [[START_2]], [[VECTOR_PH]] ], [ [[PTR_IND:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP7:%.*]] = call @llvm.stepvector.nxv2i64() -; CHECK-NEXT: [[TMP8:%.*]] = mul [[TMP7]], splat (i64 1) -; CHECK-NEXT: [[VECTOR_GEP:%.*]] = getelementptr i8, ptr [[POINTER_PHI]], [[TMP8]] +; CHECK-NEXT: [[VECTOR_GEP:%.*]] = getelementptr i8, ptr [[POINTER_PHI]], [[TMP7]] ; CHECK-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 8 ; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[START_1]], i64 [[OFFSET_IDX]] ; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i8, [[VECTOR_GEP]], i64 1 @@ -45,8 +44,7 @@ define void @pointer_induction_used_as_vector(ptr noalias %start.1, ptr noalias ; CHECK-NEXT: [[TMP12:%.*]] = add [[WIDE_LOAD]], splat (i8 1) ; CHECK-NEXT: store [[TMP12]], ptr [[TMP10]], align 1 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP3]] -; CHECK-NEXT: [[TMP11:%.*]] = mul i64 1, [[TMP3]] -; CHECK-NEXT: [[PTR_IND]] = getelementptr i8, ptr [[POINTER_PHI]], i64 [[TMP11]] +; CHECK-NEXT: [[PTR_IND]] = getelementptr i8, ptr [[POINTER_PHI]], i64 [[TMP3]] ; CHECK-NEXT: [[TMP21:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP21]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: middle.block: @@ -116,15 +114,13 @@ define void @pointer_induction(ptr noalias %start, i64 %N) { ; CHECK-NEXT: [[INDEX2:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[POINTER_PHI:%.*]] = phi ptr [ [[START]], [[VECTOR_PH]] ], [ [[PTR_IND:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP12:%.*]] = call @llvm.stepvector.nxv2i64() -; CHECK-NEXT: [[TMP14:%.*]] = mul [[TMP12]], splat (i64 1) -; CHECK-NEXT: [[VECTOR_GEP:%.*]] = getelementptr i8, ptr [[POINTER_PHI]], [[TMP14]] +; CHECK-NEXT: [[VECTOR_GEP:%.*]] = getelementptr i8, ptr [[POINTER_PHI]], [[TMP12]] ; CHECK-NEXT: [[TMP15:%.*]] = extractelement [[VECTOR_GEP]], i32 0 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP15]], align 1 ; CHECK-NEXT: [[TMP17:%.*]] = add [[WIDE_LOAD]], splat (i8 1) ; CHECK-NEXT: store [[TMP17]], ptr [[TMP15]], align 1 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX2]], [[TMP4]] -; CHECK-NEXT: [[TMP10:%.*]] = mul i64 1, [[TMP4]] -; CHECK-NEXT: [[PTR_IND]] = getelementptr i8, ptr [[POINTER_PHI]], i64 [[TMP10]] +; CHECK-NEXT: [[PTR_IND]] = getelementptr i8, ptr [[POINTER_PHI]], i64 [[TMP4]] ; CHECK-NEXT: [[TMP18:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP18]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; CHECK: middle.block: diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/synthesize-mask-for-call.ll b/llvm/test/Transforms/LoopVectorize/AArch64/synthesize-mask-for-call.ll index 3b04df3fec5e..e450fe7b54d4 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/synthesize-mask-for-call.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/synthesize-mask-for-call.ll @@ -319,10 +319,9 @@ define void @test_v4_v4m(ptr noalias %a, ptr readonly %b) #3 { ; CHECK: middle.block: ; CHECK-NEXT: br label [[FOR_COND_CLEANUP:%.*]] ; CHECK: scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ] ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: for.body: -; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ] +; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 0, [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ] ; CHECK-NEXT: [[GEP:%.*]] = getelementptr i64, ptr [[B]], i64 [[INDVARS_IV]] ; CHECK-NEXT: [[LOAD:%.*]] = load i64, ptr [[GEP]], align 8 ; CHECK-NEXT: [[CALL:%.*]] = call i64 @foo(i64 [[LOAD]]) #[[ATTR1:[0-9]+]] @@ -372,10 +371,9 @@ define void @test_v2_v4m(ptr noalias %a, ptr readonly %b) #3 { ; CHECK: middle.block: ; CHECK-NEXT: br label [[FOR_COND_CLEANUP:%.*]] ; CHECK: scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ] ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: for.body: -; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ] +; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 0, [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ] ; CHECK-NEXT: [[GEP:%.*]] = getelementptr i64, ptr [[B]], i64 [[INDVARS_IV]] ; CHECK-NEXT: [[LOAD:%.*]] = load i64, ptr [[GEP]], align 8 ; CHECK-NEXT: [[CALL:%.*]] = call i64 @foo(i64 [[LOAD]]) #[[ATTR2:[0-9]+]] @@ -425,10 +423,9 @@ define void @test_v2_v4(ptr noalias %a, ptr readonly %b) #3 { ; CHECK: middle.block: ; CHECK-NEXT: br label [[FOR_COND_CLEANUP:%.*]] ; CHECK: scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ] ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: for.body: -; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ] +; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 0, [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ] ; CHECK-NEXT: [[GEP:%.*]] = getelementptr i64, ptr [[B]], i64 [[INDVARS_IV]] ; CHECK-NEXT: [[LOAD:%.*]] = load i64, ptr [[GEP]], align 8 ; CHECK-NEXT: [[CALL:%.*]] = call i64 @foo(i64 [[LOAD]]) #[[ATTR3:[0-9]+]] diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/transform-narrow-interleave-to-widen-memory-remove-loop-region.ll b/llvm/test/Transforms/LoopVectorize/AArch64/transform-narrow-interleave-to-widen-memory-remove-loop-region.ll index 000e09004368..4dbe0d96f4de 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/transform-narrow-interleave-to-widen-memory-remove-loop-region.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/transform-narrow-interleave-to-widen-memory-remove-loop-region.ll @@ -8,7 +8,7 @@ target triple = "arm64-apple-macosx" define void @load_store_interleave_group_tc_2(ptr noalias %data) { ; VF2-LABEL: define void @load_store_interleave_group_tc_2( ; VF2-SAME: ptr noalias [[DATA:%.*]]) { -; VF2-NEXT: [[ENTRY:.*]]: +; VF2-NEXT: [[ENTRY:.*:]] ; VF2-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] ; VF2: [[VECTOR_PH]]: ; VF2-NEXT: br label %[[VECTOR_BODY:.*]] @@ -23,10 +23,9 @@ define void @load_store_interleave_group_tc_2(ptr noalias %data) { ; VF2: [[MIDDLE_BLOCK]]: ; VF2-NEXT: br label %[[EXIT:.*]] ; VF2: [[SCALAR_PH]]: -; VF2-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, %[[ENTRY]] ] ; VF2-NEXT: br label %[[LOOP:.*]] ; VF2: [[LOOP]]: -; VF2-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] +; VF2-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] ; VF2-NEXT: [[MUL_2:%.*]] = shl nsw i64 [[IV]], 1 ; VF2-NEXT: [[DATA_0:%.*]] = getelementptr inbounds i64, ptr [[DATA]], i64 [[MUL_2]] ; VF2-NEXT: [[L_0:%.*]] = load i64, ptr [[DATA_0]], align 8 @@ -226,7 +225,7 @@ define void @test_complex_add_float_tc_4(ptr %res, ptr noalias %A, ptr noalias % ; ; VF2-LABEL: define void @test_complex_add_float_tc_4( ; VF2-SAME: ptr [[RES:%.*]], ptr noalias [[A:%.*]], ptr noalias [[B:%.*]]) { -; VF2-NEXT: [[ENTRY:.*]]: +; VF2-NEXT: [[ENTRY:.*:]] ; VF2-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] ; VF2: [[VECTOR_PH]]: ; VF2-NEXT: br label %[[VECTOR_BODY:.*]] @@ -252,10 +251,9 @@ define void @test_complex_add_float_tc_4(ptr %res, ptr noalias %A, ptr noalias % ; VF2: [[MIDDLE_BLOCK]]: ; VF2-NEXT: br label %[[EXIT:.*]] ; VF2: [[SCALAR_PH]]: -; VF2-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, %[[ENTRY]] ] ; VF2-NEXT: br label %[[LOOP:.*]] ; VF2: [[LOOP]]: -; VF2-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] +; VF2-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] ; VF2-NEXT: [[GEP_A_0:%.*]] = getelementptr inbounds nuw { float, float }, ptr [[A]], i64 [[IV]] ; VF2-NEXT: [[GEP_B_0:%.*]] = getelementptr inbounds nuw { float, float }, ptr [[B]], i64 [[IV]] ; VF2-NEXT: [[L_A_0:%.*]] = load float, ptr [[GEP_A_0]], align 4 @@ -278,7 +276,7 @@ define void @test_complex_add_float_tc_4(ptr %res, ptr noalias %A, ptr noalias % ; ; VF4-LABEL: define void @test_complex_add_float_tc_4( ; VF4-SAME: ptr [[RES:%.*]], ptr noalias [[A:%.*]], ptr noalias [[B:%.*]]) { -; VF4-NEXT: [[ENTRY:.*]]: +; VF4-NEXT: [[ENTRY:.*:]] ; VF4-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] ; VF4: [[VECTOR_PH]]: ; VF4-NEXT: br label %[[VECTOR_BODY:.*]] @@ -298,10 +296,9 @@ define void @test_complex_add_float_tc_4(ptr %res, ptr noalias %A, ptr noalias % ; VF4: [[MIDDLE_BLOCK]]: ; VF4-NEXT: br label %[[EXIT:.*]] ; VF4: [[SCALAR_PH]]: -; VF4-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, %[[ENTRY]] ] ; VF4-NEXT: br label %[[LOOP:.*]] ; VF4: [[LOOP]]: -; VF4-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] +; VF4-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] ; VF4-NEXT: [[GEP_A_0:%.*]] = getelementptr inbounds nuw { float, float }, ptr [[A]], i64 [[IV]] ; VF4-NEXT: [[GEP_B_0:%.*]] = getelementptr inbounds nuw { float, float }, ptr [[B]], i64 [[IV]] ; VF4-NEXT: [[L_A_0:%.*]] = load float, ptr [[GEP_A_0]], align 4 diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/transform-narrow-interleave-to-widen-memory-unroll.ll b/llvm/test/Transforms/LoopVectorize/AArch64/transform-narrow-interleave-to-widen-memory-unroll.ll index 4df02a78a480..ee5f1929f41e 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/transform-narrow-interleave-to-widen-memory-unroll.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/transform-narrow-interleave-to-widen-memory-unroll.ll @@ -7,7 +7,7 @@ target triple = "arm64-apple-macosx" define void @load_store_interleave_group(ptr noalias %data) { ; CHECK-LABEL: define void @load_store_interleave_group( ; CHECK-SAME: ptr noalias [[DATA:%.*]]) { -; CHECK-NEXT: [[ENTRY:.*]]: +; CHECK-NEXT: [[ENTRY:.*:]] ; CHECK-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] ; CHECK: [[VECTOR_PH]]: ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] @@ -28,10 +28,9 @@ define void @load_store_interleave_group(ptr noalias %data) { ; CHECK: [[MIDDLE_BLOCK]]: ; CHECK-NEXT: br label %[[EXIT:.*]] ; CHECK: [[SCALAR_PH]]: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, %[[ENTRY]] ] ; CHECK-NEXT: br label %[[LOOP:.*]] ; CHECK: [[LOOP]]: -; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] ; CHECK-NEXT: [[MUL_2:%.*]] = shl nsw i64 [[IV]], 1 ; CHECK-NEXT: [[DATA_0:%.*]] = getelementptr inbounds i64, ptr [[DATA]], i64 [[MUL_2]] ; CHECK-NEXT: [[L_0:%.*]] = load i64, ptr [[DATA_0]], align 8 @@ -70,7 +69,7 @@ exit: define void @test_2xi64_with_wide_load(ptr noalias %data, ptr noalias %factor) { ; CHECK-LABEL: define void @test_2xi64_with_wide_load( ; CHECK-SAME: ptr noalias [[DATA:%.*]], ptr noalias [[FACTOR:%.*]]) { -; CHECK-NEXT: [[ENTRY:.*]]: +; CHECK-NEXT: [[ENTRY:.*:]] ; CHECK-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] ; CHECK: [[VECTOR_PH]]: ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] @@ -107,10 +106,9 @@ define void @test_2xi64_with_wide_load(ptr noalias %data, ptr noalias %factor) { ; CHECK: [[MIDDLE_BLOCK]]: ; CHECK-NEXT: br label %[[EXIT:.*]] ; CHECK: [[SCALAR_PH]]: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, %[[ENTRY]] ] ; CHECK-NEXT: br label %[[LOOP:.*]] ; CHECK: [[LOOP]]: -; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] ; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[FACTOR]], i64 [[IV]] ; CHECK-NEXT: [[L_FACTOR:%.*]] = load i64, ptr [[ARRAYIDX]], align 8 ; CHECK-NEXT: [[TMP13:%.*]] = shl nsw i64 [[IV]], 1 diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/type-shrinkage-insertelt.ll b/llvm/test/Transforms/LoopVectorize/AArch64/type-shrinkage-insertelt.ll index a2cbf6f9c5a0..0ada7d0f2257 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/type-shrinkage-insertelt.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/type-shrinkage-insertelt.ll @@ -50,10 +50,9 @@ define void @test0(ptr noalias %M3, ptr noalias %A, ptr noalias %B) { ; CHECK: middle.block: ; CHECK-NEXT: br label [[FOR_INC1286_LOOPEXIT:%.*]] ; CHECK: scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ] ; CHECK-NEXT: br label [[IF_THEN1165_US:%.*]] ; CHECK: if.then1165.us: -; CHECK-NEXT: [[INDVARS_IV1783:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT1784:%.*]], [[IF_THEN1165_US]] ] +; CHECK-NEXT: [[INDVARS_IV1783:%.*]] = phi i64 [ 0, [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT1784:%.*]], [[IF_THEN1165_US]] ] ; CHECK-NEXT: [[GEP_A:%.*]] = getelementptr inbounds i16, ptr [[A]], i64 [[INDVARS_IV1783]] ; CHECK-NEXT: [[L_A:%.*]] = load i16, ptr [[GEP_A]], align 2 ; CHECK-NEXT: [[CONV1177_US:%.*]] = zext i16 [[L_A]] to i32 @@ -143,10 +142,9 @@ define void @test1(ptr noalias %M3, ptr noalias %A, ptr noalias %B, ptr noalias ; CHECK: middle.block: ; CHECK-NEXT: br label [[FOR_INC1286_LOOPEXIT:%.*]] ; CHECK: scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ] ; CHECK-NEXT: br label [[IF_THEN1165_US:%.*]] ; CHECK: if.then1165.us: -; CHECK-NEXT: [[INDVARS_IV1783:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT1784:%.*]], [[IF_THEN1165_US]] ] +; CHECK-NEXT: [[INDVARS_IV1783:%.*]] = phi i64 [ 0, [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT1784:%.*]], [[IF_THEN1165_US]] ] ; CHECK-NEXT: [[FPTR:%.*]] = load i32, ptr [[C]], align 4 ; CHECK-NEXT: [[GEP_A:%.*]] = getelementptr inbounds i16, ptr [[A]], i64 [[INDVARS_IV1783]] ; CHECK-NEXT: [[L_A:%.*]] = load i16, ptr [[GEP_A]], align 2 diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/vector-loop-backedge-elimination-epilogue.ll b/llvm/test/Transforms/LoopVectorize/AArch64/vector-loop-backedge-elimination-epilogue.ll index a431fdd3178b..21928ce71500 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/vector-loop-backedge-elimination-epilogue.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/vector-loop-backedge-elimination-epilogue.ll @@ -34,18 +34,15 @@ define void @test_remove_vector_loop_region_epilogue(ptr %dst, i1 %c) { ; CHECK-NEXT: br i1 [[MIN_EPILOG_ITERS_CHECK]], label %[[VEC_EPILOG_SCALAR_PH]], label %[[VEC_EPILOG_PH]] ; CHECK: [[VEC_EPILOG_PH]]: ; CHECK-NEXT: [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[VEC_EPILOG_ITER_CHECK]] ], [ 0, %[[VECTOR_MAIN_LOOP_ITER_CHECK]] ] -; CHECK-NEXT: [[N_MOD_VF2:%.*]] = urem i64 [[TC]], 8 -; CHECK-NEXT: [[N_VEC3:%.*]] = sub i64 [[TC]], [[N_MOD_VF2]] ; CHECK-NEXT: br label %[[VEC_EPILOG_VECTOR_BODY:.*]] ; CHECK: [[VEC_EPILOG_VECTOR_BODY]]: ; CHECK-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[DST]], i64 [[VEC_EPILOG_RESUME_VAL]] ; CHECK-NEXT: store <8 x i8> zeroinitializer, ptr [[TMP5]], align 4 ; CHECK-NEXT: br label %[[VEC_EPILOG_MIDDLE_BLOCK:.*]] ; CHECK: [[VEC_EPILOG_MIDDLE_BLOCK]]: -; CHECK-NEXT: [[CMP_N4:%.*]] = icmp eq i64 [[TC]], [[N_VEC3]] -; CHECK-NEXT: br i1 [[CMP_N4]], label %[[EXIT]], label %[[VEC_EPILOG_SCALAR_PH]] +; CHECK-NEXT: br i1 true, label %[[EXIT]], label %[[VEC_EPILOG_SCALAR_PH]] ; CHECK: [[VEC_EPILOG_SCALAR_PH]]: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC3]], %[[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[N_VEC]], %[[VEC_EPILOG_ITER_CHECK]] ], [ 0, %[[ITER_CHECK]] ] +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[TC]], %[[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[N_VEC]], %[[VEC_EPILOG_ITER_CHECK]] ], [ 0, %[[ITER_CHECK]] ] ; CHECK-NEXT: br label %[[LOOP:.*]] ; CHECK: [[LOOP]]: ; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[VEC_EPILOG_SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/vplan-printing.ll b/llvm/test/Transforms/LoopVectorize/AArch64/vplan-printing.ll index 664af3c35b5f..d4e5dea3d4aa 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/vplan-printing.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/vplan-printing.ll @@ -81,8 +81,6 @@ define i32 @print_partial_reduction(ptr %a, ptr %b) { ; CHECK-NEXT: Successor(s): ir-bb, ir-bb ; CHECK-EMPTY: ; CHECK-NEXT: ir-bb: -; CHECK-NEXT: EMIT vp<%n.mod.vf> = urem ir<1024>, ir<16> -; CHECK-NEXT: EMIT vp<[[VEC_TC:%.+]]> = sub ir<1024>, vp<%n.mod.vf> ; CHECK-NEXT: EMIT vp<[[RDX_START:%.+]]> = reduction-start-vector ir<0>, ir<0>, ir<4> ; CHECK-NEXT: Successor(s): vector.body ; CHECK-EMPTY: @@ -98,13 +96,12 @@ define i32 @print_partial_reduction(ptr %a, ptr %b) { ; CHECK-NEXT: WIDEN ir<%mul> = mul ir<%ext.b>, ir<%ext.a> ; CHECK-NEXT: PARTIAL-REDUCE ir<%add> = add ir<%accum>, ir<%mul> ; CHECK-NEXT: EMIT vp<[[EP_IV_NEXT:%.+]]> = add nuw vp<[[EP_IV]]>, ir<16> -; CHECK-NEXT: EMIT branch-on-count vp<[[EP_IV_NEXT]]>, vp<[[VEC_TC]]> +; CHECK-NEXT: EMIT branch-on-count vp<[[EP_IV_NEXT]]>, ir<1024> ; CHECK-NEXT: Successor(s): middle.block, vector.body ; CHECK-EMPTY: ; CHECK-NEXT: middle.block: ; CHECK-NEXT: EMIT vp<[[RED_RESULT:%.+]]> = compute-reduction-result ir<%accum>, ir<%add> -; CHECK-NEXT: EMIT vp<[[CMP:%.+]]> = icmp eq ir<1024>, vp<[[VEC_TC]]> -; CHECK-NEXT: EMIT branch-on-cond vp<[[CMP]]> +; CHECK-NEXT: EMIT branch-on-cond ir ; CHECK-NEXT: Successor(s): ir-bb, ir-bb ; CHECK-EMPTY: ; CHECK-NEXT: ir-bb: @@ -112,7 +109,7 @@ define i32 @print_partial_reduction(ptr %a, ptr %b) { ; CHECK-NEXT: No successors ; CHECK-EMPTY: ; CHECK-NEXT: ir-bb: -; CHECK-NEXT: EMIT-SCALAR vp<[[EP_RESUME:%.+]]> = phi [ vp<[[VEC_TC]]>, middle.block ], [ ir<0>, ir-bb ] +; CHECK-NEXT: EMIT-SCALAR vp<[[EP_RESUME:%.+]]> = phi [ ir<1024>, middle.block ], [ ir<0>, ir-bb ] ; CHECK-NEXT: EMIT-SCALAR vp<[[EP_MERGE:%.+]]> = phi [ vp<[[RED_RESULT]]>, middle.block ], [ ir<0>, ir-bb ] ; CHECK-NEXT: EMIT-SCALAR vp<%6> = resume-for-epilogue vp<%vec.epilog.resume.val> ; CHECK-NEXT: Successor(s): ir-bb diff --git a/llvm/test/Transforms/LoopVectorize/ARM/optsize_minsize.ll b/llvm/test/Transforms/LoopVectorize/ARM/optsize_minsize.ll index 3f1d0dc2ff2a..dcf4bee728b2 100644 --- a/llvm/test/Transforms/LoopVectorize/ARM/optsize_minsize.ll +++ b/llvm/test/Transforms/LoopVectorize/ARM/optsize_minsize.ll @@ -17,7 +17,7 @@ target triple = "armv7a-none-eabi" define void @always_vectorize(ptr %p, i32 %x) { ; DEFAULT-LABEL: define void @always_vectorize( ; DEFAULT-SAME: ptr [[P:%.*]], i32 [[X:%.*]]) { -; DEFAULT-NEXT: [[ENTRY:.*]]: +; DEFAULT-NEXT: [[ENTRY:.*:]] ; DEFAULT-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] ; DEFAULT: [[VECTOR_PH]]: ; DEFAULT-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[X]], i64 0 @@ -31,10 +31,9 @@ define void @always_vectorize(ptr %p, i32 %x) { ; DEFAULT: [[MIDDLE_BLOCK]]: ; DEFAULT-NEXT: br label %[[FOR_COND_CLEANUP:.*]] ; DEFAULT: [[SCALAR_PH]]: -; DEFAULT-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, %[[ENTRY]] ] ; DEFAULT-NEXT: br label %[[FOR_BODY:.*]] ; DEFAULT: [[FOR_BODY]]: -; DEFAULT-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], %[[FOR_BODY]] ] +; DEFAULT-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 0, %[[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], %[[FOR_BODY]] ] ; DEFAULT-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[P]], i64 [[INDVARS_IV]] ; DEFAULT-NEXT: [[TMP4:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 ; DEFAULT-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP4]], [[X]] @@ -47,7 +46,7 @@ define void @always_vectorize(ptr %p, i32 %x) { ; ; OPTSIZE-LABEL: define void @always_vectorize( ; OPTSIZE-SAME: ptr [[P:%.*]], i32 [[X:%.*]]) #[[ATTR0:[0-9]+]] { -; OPTSIZE-NEXT: [[ENTRY:.*]]: +; OPTSIZE-NEXT: [[ENTRY:.*:]] ; OPTSIZE-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] ; OPTSIZE: [[VECTOR_PH]]: ; OPTSIZE-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[X]], i64 0 @@ -61,10 +60,9 @@ define void @always_vectorize(ptr %p, i32 %x) { ; OPTSIZE: [[MIDDLE_BLOCK]]: ; OPTSIZE-NEXT: br label %[[FOR_COND_CLEANUP:.*]] ; OPTSIZE: [[SCALAR_PH]]: -; OPTSIZE-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, %[[ENTRY]] ] ; OPTSIZE-NEXT: br label %[[FOR_BODY:.*]] ; OPTSIZE: [[FOR_BODY]]: -; OPTSIZE-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], %[[FOR_BODY]] ] +; OPTSIZE-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 0, %[[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], %[[FOR_BODY]] ] ; OPTSIZE-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[P]], i64 [[INDVARS_IV]] ; OPTSIZE-NEXT: [[TMP4:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 ; OPTSIZE-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP4]], [[X]] @@ -77,7 +75,7 @@ define void @always_vectorize(ptr %p, i32 %x) { ; ; MINSIZE-LABEL: define void @always_vectorize( ; MINSIZE-SAME: ptr [[P:%.*]], i32 [[X:%.*]]) #[[ATTR0:[0-9]+]] { -; MINSIZE-NEXT: [[ENTRY:.*]]: +; MINSIZE-NEXT: [[ENTRY:.*:]] ; MINSIZE-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] ; MINSIZE: [[VECTOR_PH]]: ; MINSIZE-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[X]], i64 0 @@ -91,10 +89,9 @@ define void @always_vectorize(ptr %p, i32 %x) { ; MINSIZE: [[MIDDLE_BLOCK]]: ; MINSIZE-NEXT: br label %[[FOR_COND_CLEANUP:.*]] ; MINSIZE: [[SCALAR_PH]]: -; MINSIZE-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, %[[ENTRY]] ] ; MINSIZE-NEXT: br label %[[FOR_BODY:.*]] ; MINSIZE: [[FOR_BODY]]: -; MINSIZE-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], %[[FOR_BODY]] ] +; MINSIZE-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 0, %[[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], %[[FOR_BODY]] ] ; MINSIZE-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[P]], i64 [[INDVARS_IV]] ; MINSIZE-NEXT: [[TMP4:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 ; MINSIZE-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP4]], [[X]] @@ -484,7 +481,7 @@ for.cond.cleanup: ; we don't account for the addressing mode difference. define void @dont_vectorize_with_minsize() { ; DEFAULT-LABEL: define void @dont_vectorize_with_minsize() { -; DEFAULT-NEXT: [[ENTRY:.*]]: +; DEFAULT-NEXT: [[ENTRY:.*:]] ; DEFAULT-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] ; DEFAULT: [[VECTOR_PH]]: ; DEFAULT-NEXT: br label %[[VECTOR_BODY:.*]] @@ -506,10 +503,9 @@ define void @dont_vectorize_with_minsize() { ; DEFAULT: [[MIDDLE_BLOCK]]: ; DEFAULT-NEXT: br label %[[FOR_COND_CLEANUP:.*]] ; DEFAULT: [[SCALAR_PH]]: -; DEFAULT-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, %[[ENTRY]] ] ; DEFAULT-NEXT: br label %[[FOR_BODY:.*]] ; DEFAULT: [[FOR_BODY]]: -; DEFAULT-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], %[[FOR_BODY]] ] +; DEFAULT-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 0, %[[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], %[[FOR_BODY]] ] ; DEFAULT-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds nuw [1000 x i32], ptr @B, i64 0, i64 [[INDVARS_IV]] ; DEFAULT-NEXT: [[BVAL:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 ; DEFAULT-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds nuw [1000 x i32], ptr @C, i64 0, i64 [[INDVARS_IV]] @@ -528,7 +524,7 @@ define void @dont_vectorize_with_minsize() { ; ; OPTSIZE-LABEL: define void @dont_vectorize_with_minsize( ; OPTSIZE-SAME: ) #[[ATTR0]] { -; OPTSIZE-NEXT: [[ENTRY:.*]]: +; OPTSIZE-NEXT: [[ENTRY:.*:]] ; OPTSIZE-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] ; OPTSIZE: [[VECTOR_PH]]: ; OPTSIZE-NEXT: br label %[[VECTOR_BODY:.*]] @@ -550,10 +546,9 @@ define void @dont_vectorize_with_minsize() { ; OPTSIZE: [[MIDDLE_BLOCK]]: ; OPTSIZE-NEXT: br label %[[FOR_COND_CLEANUP:.*]] ; OPTSIZE: [[SCALAR_PH]]: -; OPTSIZE-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, %[[ENTRY]] ] ; OPTSIZE-NEXT: br label %[[FOR_BODY:.*]] ; OPTSIZE: [[FOR_BODY]]: -; OPTSIZE-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], %[[FOR_BODY]] ] +; OPTSIZE-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 0, %[[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], %[[FOR_BODY]] ] ; OPTSIZE-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds nuw [1000 x i32], ptr @B, i64 0, i64 [[INDVARS_IV]] ; OPTSIZE-NEXT: [[BVAL:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 ; OPTSIZE-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds nuw [1000 x i32], ptr @C, i64 0, i64 [[INDVARS_IV]] @@ -572,7 +567,7 @@ define void @dont_vectorize_with_minsize() { ; ; MINSIZE-LABEL: define void @dont_vectorize_with_minsize( ; MINSIZE-SAME: ) #[[ATTR0]] { -; MINSIZE-NEXT: [[ENTRY:.*]]: +; MINSIZE-NEXT: [[ENTRY:.*:]] ; MINSIZE-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] ; MINSIZE: [[VECTOR_PH]]: ; MINSIZE-NEXT: br label %[[VECTOR_BODY:.*]] @@ -594,10 +589,9 @@ define void @dont_vectorize_with_minsize() { ; MINSIZE: [[MIDDLE_BLOCK]]: ; MINSIZE-NEXT: br label %[[FOR_COND_CLEANUP:.*]] ; MINSIZE: [[SCALAR_PH]]: -; MINSIZE-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, %[[ENTRY]] ] ; MINSIZE-NEXT: br label %[[FOR_BODY:.*]] ; MINSIZE: [[FOR_BODY]]: -; MINSIZE-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], %[[FOR_BODY]] ] +; MINSIZE-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 0, %[[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], %[[FOR_BODY]] ] ; MINSIZE-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds nuw [1000 x i32], ptr @B, i64 0, i64 [[INDVARS_IV]] ; MINSIZE-NEXT: [[BVAL:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 ; MINSIZE-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds nuw [1000 x i32], ptr @C, i64 0, i64 [[INDVARS_IV]] @@ -644,7 +638,7 @@ for.cond.cleanup: ; FIXME: We currently use width 2 as the load/store cost is incorrect. define void @vectorization_forced() { ; DEFAULT-LABEL: define void @vectorization_forced() { -; DEFAULT-NEXT: [[ENTRY:.*]]: +; DEFAULT-NEXT: [[ENTRY:.*:]] ; DEFAULT-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] ; DEFAULT: [[VECTOR_PH]]: ; DEFAULT-NEXT: br label %[[VECTOR_BODY:.*]] @@ -666,10 +660,9 @@ define void @vectorization_forced() { ; DEFAULT: [[MIDDLE_BLOCK]]: ; DEFAULT-NEXT: br label %[[FOR_COND_CLEANUP:.*]] ; DEFAULT: [[SCALAR_PH]]: -; DEFAULT-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, %[[ENTRY]] ] ; DEFAULT-NEXT: br label %[[FOR_BODY:.*]] ; DEFAULT: [[FOR_BODY]]: -; DEFAULT-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], %[[FOR_BODY]] ] +; DEFAULT-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 0, %[[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], %[[FOR_BODY]] ] ; DEFAULT-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds nuw [1000 x i32], ptr @B, i64 0, i64 [[INDVARS_IV]] ; DEFAULT-NEXT: [[BVAL:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 ; DEFAULT-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds nuw [1000 x i32], ptr @C, i64 0, i64 [[INDVARS_IV]] @@ -688,7 +681,7 @@ define void @vectorization_forced() { ; ; OPTSIZE-LABEL: define void @vectorization_forced( ; OPTSIZE-SAME: ) #[[ATTR0]] { -; OPTSIZE-NEXT: [[ENTRY:.*]]: +; OPTSIZE-NEXT: [[ENTRY:.*:]] ; OPTSIZE-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] ; OPTSIZE: [[VECTOR_PH]]: ; OPTSIZE-NEXT: br label %[[VECTOR_BODY:.*]] @@ -710,10 +703,9 @@ define void @vectorization_forced() { ; OPTSIZE: [[MIDDLE_BLOCK]]: ; OPTSIZE-NEXT: br label %[[FOR_COND_CLEANUP:.*]] ; OPTSIZE: [[SCALAR_PH]]: -; OPTSIZE-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, %[[ENTRY]] ] ; OPTSIZE-NEXT: br label %[[FOR_BODY:.*]] ; OPTSIZE: [[FOR_BODY]]: -; OPTSIZE-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], %[[FOR_BODY]] ] +; OPTSIZE-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 0, %[[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], %[[FOR_BODY]] ] ; OPTSIZE-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds nuw [1000 x i32], ptr @B, i64 0, i64 [[INDVARS_IV]] ; OPTSIZE-NEXT: [[BVAL:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 ; OPTSIZE-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds nuw [1000 x i32], ptr @C, i64 0, i64 [[INDVARS_IV]] @@ -732,7 +724,7 @@ define void @vectorization_forced() { ; ; MINSIZE-LABEL: define void @vectorization_forced( ; MINSIZE-SAME: ) #[[ATTR0]] { -; MINSIZE-NEXT: [[ENTRY:.*]]: +; MINSIZE-NEXT: [[ENTRY:.*:]] ; MINSIZE-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] ; MINSIZE: [[VECTOR_PH]]: ; MINSIZE-NEXT: br label %[[VECTOR_BODY:.*]] @@ -754,10 +746,9 @@ define void @vectorization_forced() { ; MINSIZE: [[MIDDLE_BLOCK]]: ; MINSIZE-NEXT: br label %[[FOR_COND_CLEANUP:.*]] ; MINSIZE: [[SCALAR_PH]]: -; MINSIZE-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, %[[ENTRY]] ] ; MINSIZE-NEXT: br label %[[FOR_BODY:.*]] ; MINSIZE: [[FOR_BODY]]: -; MINSIZE-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], %[[FOR_BODY]] ] +; MINSIZE-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 0, %[[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], %[[FOR_BODY]] ] ; MINSIZE-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds nuw [1000 x i32], ptr @B, i64 0, i64 [[INDVARS_IV]] ; MINSIZE-NEXT: [[BVAL:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 ; MINSIZE-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds nuw [1000 x i32], ptr @C, i64 0, i64 [[INDVARS_IV]] diff --git a/llvm/test/Transforms/LoopVectorize/LoongArch/defaults.ll b/llvm/test/Transforms/LoopVectorize/LoongArch/defaults.ll index 17eeafa574ae..8072a3d97313 100644 --- a/llvm/test/Transforms/LoopVectorize/LoongArch/defaults.ll +++ b/llvm/test/Transforms/LoopVectorize/LoongArch/defaults.ll @@ -35,10 +35,9 @@ define void @vector_add(ptr noalias nocapture %a, i64 %v) { ; CHECK: middle.block: ; CHECK-NEXT: br label [[FOR_END:%.*]] ; CHECK: scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ] ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: for.body: -; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] ; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[IV]] ; CHECK-NEXT: [[ELEM:%.*]] = load i64, ptr [[ARRAYIDX]], align 8 ; CHECK-NEXT: [[ADD:%.*]] = add i64 [[ELEM]], [[V]] diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/divrem.ll b/llvm/test/Transforms/LoopVectorize/RISCV/divrem.ll index 983b36caabfc..3af328fb6568 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/divrem.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/divrem.ll @@ -63,10 +63,9 @@ define void @vector_udiv(ptr noalias nocapture %a, i64 %v, i64 %n) { ; FIXED: middle.block: ; FIXED-NEXT: br label [[FOR_END:%.*]] ; FIXED: scalar.ph: -; FIXED-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ] ; FIXED-NEXT: br label [[FOR_BODY:%.*]] ; FIXED: for.body: -; FIXED-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] +; FIXED-NEXT: [[IV:%.*]] = phi i64 [ 0, [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] ; FIXED-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[IV]] ; FIXED-NEXT: [[ELEM:%.*]] = load i64, ptr [[ARRAYIDX]], align 8 ; FIXED-NEXT: [[DIVREM:%.*]] = udiv i64 [[ELEM]], [[V]] @@ -150,10 +149,9 @@ define void @vector_sdiv(ptr noalias nocapture %a, i64 %v, i64 %n) { ; FIXED: middle.block: ; FIXED-NEXT: br label [[FOR_END:%.*]] ; FIXED: scalar.ph: -; FIXED-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ] ; FIXED-NEXT: br label [[FOR_BODY:%.*]] ; FIXED: for.body: -; FIXED-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] +; FIXED-NEXT: [[IV:%.*]] = phi i64 [ 0, [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] ; FIXED-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[IV]] ; FIXED-NEXT: [[ELEM:%.*]] = load i64, ptr [[ARRAYIDX]], align 8 ; FIXED-NEXT: [[DIVREM:%.*]] = sdiv i64 [[ELEM]], [[V]] @@ -237,10 +235,9 @@ define void @vector_urem(ptr noalias nocapture %a, i64 %v, i64 %n) { ; FIXED: middle.block: ; FIXED-NEXT: br label [[FOR_END:%.*]] ; FIXED: scalar.ph: -; FIXED-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ] ; FIXED-NEXT: br label [[FOR_BODY:%.*]] ; FIXED: for.body: -; FIXED-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] +; FIXED-NEXT: [[IV:%.*]] = phi i64 [ 0, [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] ; FIXED-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[IV]] ; FIXED-NEXT: [[ELEM:%.*]] = load i64, ptr [[ARRAYIDX]], align 8 ; FIXED-NEXT: [[DIVREM:%.*]] = urem i64 [[ELEM]], [[V]] @@ -324,10 +321,9 @@ define void @vector_srem(ptr noalias nocapture %a, i64 %v, i64 %n) { ; FIXED: middle.block: ; FIXED-NEXT: br label [[FOR_END:%.*]] ; FIXED: scalar.ph: -; FIXED-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ] ; FIXED-NEXT: br label [[FOR_BODY:%.*]] ; FIXED: for.body: -; FIXED-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] +; FIXED-NEXT: [[IV:%.*]] = phi i64 [ 0, [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] ; FIXED-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[IV]] ; FIXED-NEXT: [[ELEM:%.*]] = load i64, ptr [[ARRAYIDX]], align 8 ; FIXED-NEXT: [[DIVREM:%.*]] = srem i64 [[ELEM]], [[V]] @@ -428,10 +424,9 @@ define void @predicated_udiv(ptr noalias nocapture %a, i64 %v, i64 %n) { ; FIXED: middle.block: ; FIXED-NEXT: br label [[FOR_END:%.*]] ; FIXED: scalar.ph: -; FIXED-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ] ; FIXED-NEXT: br label [[FOR_BODY:%.*]] ; FIXED: for.body: -; FIXED-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LATCH:%.*]] ] +; FIXED-NEXT: [[IV:%.*]] = phi i64 [ 0, [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LATCH:%.*]] ] ; FIXED-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[IV]] ; FIXED-NEXT: [[ELEM:%.*]] = load i64, ptr [[ARRAYIDX]], align 8 ; FIXED-NEXT: [[C:%.*]] = icmp ne i64 [[V]], 0 @@ -544,10 +539,9 @@ define void @predicated_sdiv(ptr noalias nocapture %a, i64 %v, i64 %n) { ; FIXED: middle.block: ; FIXED-NEXT: br label [[FOR_END:%.*]] ; FIXED: scalar.ph: -; FIXED-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ] ; FIXED-NEXT: br label [[FOR_BODY:%.*]] ; FIXED: for.body: -; FIXED-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LATCH:%.*]] ] +; FIXED-NEXT: [[IV:%.*]] = phi i64 [ 0, [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LATCH:%.*]] ] ; FIXED-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[IV]] ; FIXED-NEXT: [[ELEM:%.*]] = load i64, ptr [[ARRAYIDX]], align 8 ; FIXED-NEXT: [[C:%.*]] = icmp ne i64 [[V]], 0 @@ -654,10 +648,9 @@ define void @predicated_udiv_by_constant(ptr noalias nocapture %a, i64 %n) { ; FIXED: middle.block: ; FIXED-NEXT: br label [[FOR_END:%.*]] ; FIXED: scalar.ph: -; FIXED-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ] ; FIXED-NEXT: br label [[FOR_BODY:%.*]] ; FIXED: for.body: -; FIXED-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LATCH:%.*]] ] +; FIXED-NEXT: [[IV:%.*]] = phi i64 [ 0, [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LATCH:%.*]] ] ; FIXED-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[IV]] ; FIXED-NEXT: [[ELEM:%.*]] = load i64, ptr [[ARRAYIDX]], align 8 ; FIXED-NEXT: [[C:%.*]] = icmp ne i64 [[ELEM]], 42 @@ -764,10 +757,9 @@ define void @predicated_sdiv_by_constant(ptr noalias nocapture %a, i64 %n) { ; FIXED: middle.block: ; FIXED-NEXT: br label [[FOR_END:%.*]] ; FIXED: scalar.ph: -; FIXED-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ] ; FIXED-NEXT: br label [[FOR_BODY:%.*]] ; FIXED: for.body: -; FIXED-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LATCH:%.*]] ] +; FIXED-NEXT: [[IV:%.*]] = phi i64 [ 0, [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LATCH:%.*]] ] ; FIXED-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[IV]] ; FIXED-NEXT: [[ELEM:%.*]] = load i64, ptr [[ARRAYIDX]], align 8 ; FIXED-NEXT: [[C:%.*]] = icmp ne i64 [[ELEM]], 42 @@ -876,10 +868,9 @@ define void @predicated_sdiv_by_minus_one(ptr noalias nocapture %a, i64 %n) { ; FIXED: middle.block: ; FIXED-NEXT: br label [[FOR_END:%.*]] ; FIXED: scalar.ph: -; FIXED-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ] ; FIXED-NEXT: br label [[FOR_BODY:%.*]] ; FIXED: for.body: -; FIXED-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LATCH:%.*]] ] +; FIXED-NEXT: [[IV:%.*]] = phi i64 [ 0, [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LATCH:%.*]] ] ; FIXED-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[IV]] ; FIXED-NEXT: [[ELEM:%.*]] = load i8, ptr [[ARRAYIDX]], align 1 ; FIXED-NEXT: [[C:%.*]] = icmp ne i8 [[ELEM]], -128 diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/evl-compatible-loops.ll b/llvm/test/Transforms/LoopVectorize/RISCV/evl-compatible-loops.ll index 86afa5541044..be6f32a6f4ea 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/evl-compatible-loops.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/evl-compatible-loops.ll @@ -20,8 +20,7 @@ define void @test_wide_integer_induction(ptr noalias %a, i64 %N) { ; CHECK-NEXT: [[AVL:%.*]] = phi i64 [ [[N]], [[ENTRY]] ], [ [[AVL_NEXT:%.*]], [[FOR_BODY]] ] ; CHECK-NEXT: [[TMP11:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 2, i1 true) ; CHECK-NEXT: [[TMP12:%.*]] = zext i32 [[TMP11]] to i64 -; CHECK-NEXT: [[TMP13:%.*]] = mul i64 1, [[TMP12]] -; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i64 [[TMP13]], i64 0 +; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i64 [[TMP12]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer ; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[EVL_BASED_IV]] ; CHECK-NEXT: call void @llvm.vp.store.nxv2i64.p0( [[VEC_IND]], ptr align 8 [[TMP14]], splat (i1 true), i32 [[TMP11]]) diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/interleaved-accesses.ll b/llvm/test/Transforms/LoopVectorize/RISCV/interleaved-accesses.ll index 378478c00cd5..dc963f1bf264 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/interleaved-accesses.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/interleaved-accesses.ll @@ -79,10 +79,9 @@ define void @load_store_factor2_i32(ptr %p) { ; FIXED: middle.block: ; FIXED-NEXT: br label [[EXIT:%.*]] ; FIXED: scalar.ph: -; FIXED-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ] ; FIXED-NEXT: br label [[LOOP:%.*]] ; FIXED: loop: -; FIXED-NEXT: [[I:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[NEXTI:%.*]], [[LOOP]] ] +; FIXED-NEXT: [[I:%.*]] = phi i64 [ 0, [[SCALAR_PH]] ], [ [[NEXTI:%.*]], [[LOOP]] ] ; FIXED-NEXT: [[OFFSET0:%.*]] = shl i64 [[I]], 1 ; FIXED-NEXT: [[Q0:%.*]] = getelementptr i32, ptr [[P]], i64 [[OFFSET0]] ; FIXED-NEXT: [[X0:%.*]] = load i32, ptr [[Q0]], align 4 @@ -251,10 +250,9 @@ define void @load_store_factor2_i64(ptr %p) { ; FIXED: middle.block: ; FIXED-NEXT: br label [[EXIT:%.*]] ; FIXED: scalar.ph: -; FIXED-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ] ; FIXED-NEXT: br label [[LOOP:%.*]] ; FIXED: loop: -; FIXED-NEXT: [[I:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[NEXTI:%.*]], [[LOOP]] ] +; FIXED-NEXT: [[I:%.*]] = phi i64 [ 0, [[SCALAR_PH]] ], [ [[NEXTI:%.*]], [[LOOP]] ] ; FIXED-NEXT: [[OFFSET0:%.*]] = shl i64 [[I]], 1 ; FIXED-NEXT: [[Q0:%.*]] = getelementptr i64, ptr [[P]], i64 [[OFFSET0]] ; FIXED-NEXT: [[X0:%.*]] = load i64, ptr [[Q0]], align 8 @@ -434,10 +432,9 @@ define void @load_store_factor3_i32(ptr %p) { ; FIXED: middle.block: ; FIXED-NEXT: br label [[EXIT:%.*]] ; FIXED: scalar.ph: -; FIXED-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ] ; FIXED-NEXT: br label [[LOOP:%.*]] ; FIXED: loop: -; FIXED-NEXT: [[I:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[NEXTI:%.*]], [[LOOP]] ] +; FIXED-NEXT: [[I:%.*]] = phi i64 [ 0, [[SCALAR_PH]] ], [ [[NEXTI:%.*]], [[LOOP]] ] ; FIXED-NEXT: [[OFFSET0:%.*]] = mul i64 [[I]], 3 ; FIXED-NEXT: [[Q0:%.*]] = getelementptr i32, ptr [[P]], i64 [[OFFSET0]] ; FIXED-NEXT: [[X0:%.*]] = load i32, ptr [[Q0]], align 4 @@ -635,10 +632,9 @@ define void @load_store_factor3_i64(ptr %p) { ; FIXED: middle.block: ; FIXED-NEXT: br label [[EXIT:%.*]] ; FIXED: scalar.ph: -; FIXED-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ] ; FIXED-NEXT: br label [[LOOP:%.*]] ; FIXED: loop: -; FIXED-NEXT: [[I:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[NEXTI:%.*]], [[LOOP]] ] +; FIXED-NEXT: [[I:%.*]] = phi i64 [ 0, [[SCALAR_PH]] ], [ [[NEXTI:%.*]], [[LOOP]] ] ; FIXED-NEXT: [[OFFSET0:%.*]] = mul i64 [[I]], 3 ; FIXED-NEXT: [[Q0:%.*]] = getelementptr i64, ptr [[P]], i64 [[OFFSET0]] ; FIXED-NEXT: [[X0:%.*]] = load i64, ptr [[Q0]], align 8 @@ -845,10 +841,9 @@ define void @load_store_factor4(ptr %p) { ; FIXED: middle.block: ; FIXED-NEXT: br label [[EXIT:%.*]] ; FIXED: scalar.ph: -; FIXED-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ] ; FIXED-NEXT: br label [[LOOP:%.*]] ; FIXED: loop: -; FIXED-NEXT: [[I:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[NEXTI:%.*]], [[LOOP]] ] +; FIXED-NEXT: [[I:%.*]] = phi i64 [ 0, [[SCALAR_PH]] ], [ [[NEXTI:%.*]], [[LOOP]] ] ; FIXED-NEXT: [[OFFSET0:%.*]] = mul i64 [[I]], 4 ; FIXED-NEXT: [[Q0:%.*]] = getelementptr i64, ptr [[P]], i64 [[OFFSET0]] ; FIXED-NEXT: [[X0:%.*]] = load i64, ptr [[Q0]], align 8 @@ -1084,10 +1079,9 @@ define void @load_store_factor5(ptr %p) { ; FIXED: middle.block: ; FIXED-NEXT: br label [[EXIT:%.*]] ; FIXED: scalar.ph: -; FIXED-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ] ; FIXED-NEXT: br label [[LOOP:%.*]] ; FIXED: loop: -; FIXED-NEXT: [[I:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[NEXTI:%.*]], [[LOOP]] ] +; FIXED-NEXT: [[I:%.*]] = phi i64 [ 0, [[SCALAR_PH]] ], [ [[NEXTI:%.*]], [[LOOP]] ] ; FIXED-NEXT: [[OFFSET0:%.*]] = mul i64 [[I]], 5 ; FIXED-NEXT: [[Q0:%.*]] = getelementptr i64, ptr [[P]], i64 [[OFFSET0]] ; FIXED-NEXT: [[X0:%.*]] = load i64, ptr [[Q0]], align 8 @@ -1351,10 +1345,9 @@ define void @load_store_factor6(ptr %p) { ; FIXED: middle.block: ; FIXED-NEXT: br label [[EXIT:%.*]] ; FIXED: scalar.ph: -; FIXED-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ] ; FIXED-NEXT: br label [[LOOP:%.*]] ; FIXED: loop: -; FIXED-NEXT: [[I:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[NEXTI:%.*]], [[LOOP]] ] +; FIXED-NEXT: [[I:%.*]] = phi i64 [ 0, [[SCALAR_PH]] ], [ [[NEXTI:%.*]], [[LOOP]] ] ; FIXED-NEXT: [[OFFSET0:%.*]] = mul i64 [[I]], 6 ; FIXED-NEXT: [[Q0:%.*]] = getelementptr i64, ptr [[P]], i64 [[OFFSET0]] ; FIXED-NEXT: [[X0:%.*]] = load i64, ptr [[Q0]], align 8 @@ -1647,10 +1640,9 @@ define void @load_store_factor7(ptr %p) { ; FIXED: middle.block: ; FIXED-NEXT: br label [[EXIT:%.*]] ; FIXED: scalar.ph: -; FIXED-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ] ; FIXED-NEXT: br label [[LOOP:%.*]] ; FIXED: loop: -; FIXED-NEXT: [[I:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[NEXTI:%.*]], [[LOOP]] ] +; FIXED-NEXT: [[I:%.*]] = phi i64 [ 0, [[SCALAR_PH]] ], [ [[NEXTI:%.*]], [[LOOP]] ] ; FIXED-NEXT: [[OFFSET0:%.*]] = mul i64 [[I]], 7 ; FIXED-NEXT: [[Q0:%.*]] = getelementptr i64, ptr [[P]], i64 [[OFFSET0]] ; FIXED-NEXT: [[X0:%.*]] = load i64, ptr [[Q0]], align 8 @@ -1969,10 +1961,9 @@ define void @load_store_factor8(ptr %p) { ; FIXED: middle.block: ; FIXED-NEXT: br label [[EXIT:%.*]] ; FIXED: scalar.ph: -; FIXED-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ] ; FIXED-NEXT: br label [[LOOP:%.*]] ; FIXED: loop: -; FIXED-NEXT: [[I:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[NEXTI:%.*]], [[LOOP]] ] +; FIXED-NEXT: [[I:%.*]] = phi i64 [ 0, [[SCALAR_PH]] ], [ [[NEXTI:%.*]], [[LOOP]] ] ; FIXED-NEXT: [[OFFSET0:%.*]] = shl i64 [[I]], 3 ; FIXED-NEXT: [[Q0:%.*]] = getelementptr i64, ptr [[P]], i64 [[OFFSET0]] ; FIXED-NEXT: [[X0:%.*]] = load i64, ptr [[Q0]], align 8 @@ -2244,10 +2235,9 @@ define void @combine_load_factor2_i32(ptr noalias %p, ptr noalias %q) { ; FIXED: middle.block: ; FIXED-NEXT: br label [[EXIT:%.*]] ; FIXED: scalar.ph: -; FIXED-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ] ; FIXED-NEXT: br label [[LOOP:%.*]] ; FIXED: loop: -; FIXED-NEXT: [[I:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[NEXTI:%.*]], [[LOOP]] ] +; FIXED-NEXT: [[I:%.*]] = phi i64 [ 0, [[SCALAR_PH]] ], [ [[NEXTI:%.*]], [[LOOP]] ] ; FIXED-NEXT: [[OFFSET0:%.*]] = shl i64 [[I]], 1 ; FIXED-NEXT: [[Q0:%.*]] = getelementptr i32, ptr [[P]], i64 [[OFFSET0]] ; FIXED-NEXT: [[X0:%.*]] = load i32, ptr [[Q0]], align 4 @@ -2408,10 +2398,9 @@ define void @combine_load_factor2_i64(ptr noalias %p, ptr noalias %q) { ; FIXED: middle.block: ; FIXED-NEXT: br label [[EXIT:%.*]] ; FIXED: scalar.ph: -; FIXED-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ] ; FIXED-NEXT: br label [[LOOP:%.*]] ; FIXED: loop: -; FIXED-NEXT: [[I:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[NEXTI:%.*]], [[LOOP]] ] +; FIXED-NEXT: [[I:%.*]] = phi i64 [ 0, [[SCALAR_PH]] ], [ [[NEXTI:%.*]], [[LOOP]] ] ; FIXED-NEXT: [[OFFSET0:%.*]] = shl i64 [[I]], 1 ; FIXED-NEXT: [[Q0:%.*]] = getelementptr i64, ptr [[P]], i64 [[OFFSET0]] ; FIXED-NEXT: [[X0:%.*]] = load i64, ptr [[Q0]], align 8 diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/interleaved-masked-access.ll b/llvm/test/Transforms/LoopVectorize/RISCV/interleaved-masked-access.ll index 1bceb871bd99..48e8a1dac348 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/interleaved-masked-access.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/interleaved-masked-access.ll @@ -115,8 +115,6 @@ define void @masked_strided_factor2(ptr noalias nocapture readonly %p, ptr noali ; PREDICATED_DATA-WITH-EVL-NEXT: [[TMP1:%.*]] = call i32 @llvm.experimental.get.vector.length.i32(i32 [[AVL]], i32 16, i1 true) ; PREDICATED_DATA-WITH-EVL-NEXT: [[BROADCAST_SPLATINSERT3:%.*]] = insertelement poison, i32 [[TMP1]], i64 0 ; PREDICATED_DATA-WITH-EVL-NEXT: [[BROADCAST_SPLAT4:%.*]] = shufflevector [[BROADCAST_SPLATINSERT3]], poison, zeroinitializer -; PREDICATED_DATA-WITH-EVL-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement poison, i32 [[TMP1]], i64 0 -; PREDICATED_DATA-WITH-EVL-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector [[BROADCAST_SPLATINSERT1]], poison, zeroinitializer ; PREDICATED_DATA-WITH-EVL-NEXT: [[TMP2:%.*]] = call @llvm.stepvector.nxv16i32() ; PREDICATED_DATA-WITH-EVL-NEXT: [[TMP3:%.*]] = icmp ult [[TMP2]], [[BROADCAST_SPLAT4]] ; PREDICATED_DATA-WITH-EVL-NEXT: [[TMP4:%.*]] = icmp ugt [[VEC_IND]], [[BROADCAST_SPLAT]] @@ -138,7 +136,7 @@ define void @masked_strided_factor2(ptr noalias nocapture readonly %p, ptr noali ; PREDICATED_DATA-WITH-EVL-NEXT: call void @llvm.masked.store.nxv32i8.p0( [[INTERLEAVED_VEC]], ptr [[TMP13]], i32 1, [[INTERLEAVED_MASK5]]) ; PREDICATED_DATA-WITH-EVL-NEXT: [[INDEX_EVL_NEXT]] = add nuw i32 [[TMP1]], [[EVL_BASED_IV]] ; PREDICATED_DATA-WITH-EVL-NEXT: [[AVL_NEXT]] = sub nuw i32 [[AVL]], [[TMP1]] -; PREDICATED_DATA-WITH-EVL-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[BROADCAST_SPLAT2]] +; PREDICATED_DATA-WITH-EVL-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[BROADCAST_SPLAT4]] ; PREDICATED_DATA-WITH-EVL-NEXT: [[TMP15:%.*]] = icmp eq i32 [[INDEX_EVL_NEXT]], 1024 ; PREDICATED_DATA-WITH-EVL-NEXT: br i1 [[TMP15]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; PREDICATED_DATA-WITH-EVL: middle.block: @@ -298,8 +296,6 @@ define void @masked_strided_factor4(ptr noalias nocapture readonly %p, ptr noali ; PREDICATED_DATA-WITH-EVL-NEXT: [[TMP1:%.*]] = call i32 @llvm.experimental.get.vector.length.i32(i32 [[AVL]], i32 16, i1 true) ; PREDICATED_DATA-WITH-EVL-NEXT: [[BROADCAST_SPLATINSERT3:%.*]] = insertelement poison, i32 [[TMP1]], i64 0 ; PREDICATED_DATA-WITH-EVL-NEXT: [[BROADCAST_SPLAT4:%.*]] = shufflevector [[BROADCAST_SPLATINSERT3]], poison, zeroinitializer -; PREDICATED_DATA-WITH-EVL-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement poison, i32 [[TMP1]], i64 0 -; PREDICATED_DATA-WITH-EVL-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector [[BROADCAST_SPLATINSERT1]], poison, zeroinitializer ; PREDICATED_DATA-WITH-EVL-NEXT: [[TMP2:%.*]] = call @llvm.stepvector.nxv16i32() ; PREDICATED_DATA-WITH-EVL-NEXT: [[TMP3:%.*]] = icmp ult [[TMP2]], [[BROADCAST_SPLAT4]] ; PREDICATED_DATA-WITH-EVL-NEXT: [[TMP4:%.*]] = icmp ugt [[VEC_IND]], [[BROADCAST_SPLAT]] @@ -325,7 +321,7 @@ define void @masked_strided_factor4(ptr noalias nocapture readonly %p, ptr noali ; PREDICATED_DATA-WITH-EVL-NEXT: call void @llvm.masked.store.nxv64i8.p0( [[INTERLEAVED_VEC]], ptr [[TMP18]], i32 1, [[INTERLEAVED_MASK5]]) ; PREDICATED_DATA-WITH-EVL-NEXT: [[INDEX_EVL_NEXT]] = add nuw i32 [[TMP1]], [[EVL_BASED_IV]] ; PREDICATED_DATA-WITH-EVL-NEXT: [[AVL_NEXT]] = sub nuw i32 [[AVL]], [[TMP1]] -; PREDICATED_DATA-WITH-EVL-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[BROADCAST_SPLAT2]] +; PREDICATED_DATA-WITH-EVL-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[BROADCAST_SPLAT4]] ; PREDICATED_DATA-WITH-EVL-NEXT: [[TMP19:%.*]] = icmp eq i32 [[INDEX_EVL_NEXT]], 1024 ; PREDICATED_DATA-WITH-EVL-NEXT: br i1 [[TMP19]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] ; PREDICATED_DATA-WITH-EVL: middle.block: diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/mask-index-type.ll b/llvm/test/Transforms/LoopVectorize/RISCV/mask-index-type.ll index 1c4a47a4815f..a46d877825f8 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/mask-index-type.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/mask-index-type.ll @@ -27,8 +27,7 @@ define void @test(ptr noalias nocapture %a, ptr noalias nocapture %b, i32 %v) { ; VLENUNK-NEXT: [[BROADCAST_SPLATINSERT3:%.*]] = insertelement poison, i32 [[TMP7]], i64 0 ; VLENUNK-NEXT: [[BROADCAST_SPLAT4:%.*]] = shufflevector [[BROADCAST_SPLATINSERT3]], poison, zeroinitializer ; VLENUNK-NEXT: [[TMP15:%.*]] = zext i32 [[TMP7]] to i64 -; VLENUNK-NEXT: [[TMP9:%.*]] = mul i64 1, [[TMP15]] -; VLENUNK-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement poison, i64 [[TMP9]], i64 0 +; VLENUNK-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement poison, i64 [[TMP15]], i64 0 ; VLENUNK-NEXT: [[DOTSPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT1]], poison, zeroinitializer ; VLENUNK-NEXT: [[TMP10:%.*]] = call @llvm.stepvector.nxv4i32() ; VLENUNK-NEXT: [[TMP11:%.*]] = icmp ult [[TMP10]], [[BROADCAST_SPLAT4]] diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/pr87378-vpinstruction-or-drop-poison-generating-flags.ll b/llvm/test/Transforms/LoopVectorize/RISCV/pr87378-vpinstruction-or-drop-poison-generating-flags.ll index edc2b43d132c..37a0a8b4d7c8 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/pr87378-vpinstruction-or-drop-poison-generating-flags.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/pr87378-vpinstruction-or-drop-poison-generating-flags.ll @@ -29,8 +29,7 @@ define void @pr87378_vpinstruction_or_drop_poison_generating_flags(ptr %arg, i64 ; CHECK-NEXT: [[BROADCAST_SPLATINSERT7:%.*]] = insertelement poison, i32 [[TMP25]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT8:%.*]] = shufflevector [[BROADCAST_SPLATINSERT7]], poison, zeroinitializer ; CHECK-NEXT: [[TMP8:%.*]] = zext i32 [[TMP25]] to i64 -; CHECK-NEXT: [[TMP9:%.*]] = mul i64 1, [[TMP8]] -; CHECK-NEXT: [[BROADCAST_SPLATINSERT5:%.*]] = insertelement poison, i64 [[TMP9]], i64 0 +; CHECK-NEXT: [[BROADCAST_SPLATINSERT5:%.*]] = insertelement poison, i64 [[TMP8]], i64 0 ; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT5]], poison, zeroinitializer ; CHECK-NEXT: [[TMP10:%.*]] = call @llvm.stepvector.nxv8i32() ; CHECK-NEXT: [[TMP11:%.*]] = icmp ult [[TMP10]], [[BROADCAST_SPLAT8]] diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/pr88802.ll b/llvm/test/Transforms/LoopVectorize/RISCV/pr88802.ll index 68afe686c606..5820aaee1677 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/pr88802.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/pr88802.ll @@ -26,13 +26,10 @@ define void @test(ptr %p, i64 %a, i8 %b) { ; CHECK-NEXT: [[VEC_IND:%.*]] = phi [ [[INDUCTION]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[FOR_COND]] ] ; CHECK-NEXT: [[AVL:%.*]] = phi i32 [ 9, [[VECTOR_PH]] ], [ [[AVL_NEXT:%.*]], [[FOR_COND]] ] ; CHECK-NEXT: [[TMP11:%.*]] = call i32 @llvm.experimental.get.vector.length.i32(i32 [[AVL]], i32 2, i1 true) -; CHECK-NEXT: [[BROADCAST_SPLATINSERT5:%.*]] = insertelement poison, i32 [[TMP11]], i64 0 -; CHECK-NEXT: [[BROADCAST_SPLAT6:%.*]] = shufflevector [[BROADCAST_SPLATINSERT5]], poison, zeroinitializer -; CHECK-NEXT: [[TMP20:%.*]] = mul i32 1, [[TMP11]] -; CHECK-NEXT: [[BROADCAST_SPLATINSERT7:%.*]] = insertelement poison, i32 [[TMP20]], i64 0 +; CHECK-NEXT: [[BROADCAST_SPLATINSERT7:%.*]] = insertelement poison, i32 [[TMP11]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT8:%.*]] = shufflevector [[BROADCAST_SPLATINSERT7]], poison, zeroinitializer ; CHECK-NEXT: [[TMP19:%.*]] = call @llvm.stepvector.nxv2i32() -; CHECK-NEXT: [[TMP13:%.*]] = icmp ult [[TMP19]], [[BROADCAST_SPLAT6]] +; CHECK-NEXT: [[TMP13:%.*]] = icmp ult [[TMP19]], [[BROADCAST_SPLAT8]] ; CHECK-NEXT: [[TMP14:%.*]] = icmp sge [[VEC_IND]], splat (i32 2) ; CHECK-NEXT: [[TMP15:%.*]] = select [[TMP13]], [[TMP14]], zeroinitializer ; CHECK-NEXT: [[PREDPHI:%.*]] = select [[TMP15]], [[TMP7]], [[TMP8]] diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/safe-dep-distance.ll b/llvm/test/Transforms/LoopVectorize/RISCV/safe-dep-distance.ll index b7a51662a578..e0436e8eb002 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/safe-dep-distance.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/safe-dep-distance.ll @@ -83,10 +83,9 @@ define void @test_may_clobber(ptr %p) { ; CHECK: middle.block: ; CHECK-NEXT: br label [[EXIT:%.*]] ; CHECK: scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ] ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: -; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ] +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ] ; CHECK-NEXT: [[A1:%.*]] = getelementptr i64, ptr [[P]], i64 [[IV]] ; CHECK-NEXT: [[V:%.*]] = load i64, ptr [[A1]], align 32 ; CHECK-NEXT: [[OFFSET:%.*]] = add i64 [[IV]], 100 diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/strided-accesses.ll b/llvm/test/Transforms/LoopVectorize/RISCV/strided-accesses.ll index 59f1f4a6e54c..47979d358690 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/strided-accesses.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/strided-accesses.ll @@ -18,8 +18,7 @@ define void @single_constant_stride_int_scaled(ptr %p) { ; CHECK-NEXT: [[AVL:%.*]] = phi i64 [ 1024, [[VECTOR_PH]] ], [ [[AVL_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP11:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 4, i1 true) ; CHECK-NEXT: [[TMP12:%.*]] = zext i32 [[TMP11]] to i64 -; CHECK-NEXT: [[TMP9:%.*]] = mul i64 1, [[TMP12]] -; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i64 [[TMP9]], i64 0 +; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i64 [[TMP12]], i64 0 ; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer ; CHECK-NEXT: [[TMP14:%.*]] = mul nuw nsw [[VEC_IND]], splat (i64 8) ; CHECK-NEXT: [[TMP15:%.*]] = getelementptr i32, ptr [[P:%.*]], [[TMP14]] @@ -496,8 +495,7 @@ define void @double_stride_int_scaled(ptr %p, ptr %p2, i64 %stride) { ; STRIDED-NEXT: [[AVL:%.*]] = phi i64 [ 1024, [[VECTOR_PH]] ], [ [[AVL_NEXT:%.*]], [[VECTOR_BODY]] ] ; STRIDED-NEXT: [[TMP43:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 4, i1 true) ; STRIDED-NEXT: [[TMP44:%.*]] = zext i32 [[TMP43]] to i64 -; STRIDED-NEXT: [[TMP45:%.*]] = mul i64 1, [[TMP44]] -; STRIDED-NEXT: [[BROADCAST_SPLATINSERT9:%.*]] = insertelement poison, i64 [[TMP45]], i64 0 +; STRIDED-NEXT: [[BROADCAST_SPLATINSERT9:%.*]] = insertelement poison, i64 [[TMP44]], i64 0 ; STRIDED-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT9]], poison, zeroinitializer ; STRIDED-NEXT: [[TMP18:%.*]] = mul nuw nsw [[VEC_IND]], [[BROADCAST_SPLAT1]] ; STRIDED-NEXT: [[TMP19:%.*]] = getelementptr i32, ptr [[P]], [[TMP18]] diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-cast-intrinsics.ll b/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-cast-intrinsics.ll index fb71f6c187a3..7ea462eed42d 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-cast-intrinsics.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-cast-intrinsics.ll @@ -1203,8 +1203,7 @@ define void @vp_ptrtoint(ptr %a, ptr %b, i64 %N) { ; IF-EVL-NEXT: [[AVL:%.*]] = phi i64 [ [[N]], %[[VECTOR_PH]] ], [ [[AVL_NEXT:%.*]], %[[VECTOR_BODY]] ] ; IF-EVL-NEXT: [[TMP11:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 2, i1 true) ; IF-EVL-NEXT: [[TMP12:%.*]] = zext i32 [[TMP11]] to i64 -; IF-EVL-NEXT: [[TMP13:%.*]] = mul i64 1, [[TMP12]] -; IF-EVL-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i64 [[TMP13]], i64 0 +; IF-EVL-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i64 [[TMP12]], i64 0 ; IF-EVL-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer ; IF-EVL-NEXT: [[TMP14:%.*]] = getelementptr inbounds i32, ptr [[B]], [[VEC_IND]] ; IF-EVL-NEXT: [[TMP15:%.*]] = ptrtoint [[TMP14]] to @@ -1247,8 +1246,7 @@ define void @vp_ptrtoint(ptr %a, ptr %b, i64 %N) { ; NO-VP-NEXT: [[TMP6:%.*]] = call @llvm.stepvector.nxv2i64() ; NO-VP-NEXT: [[TMP7:%.*]] = mul [[TMP6]], splat (i64 1) ; NO-VP-NEXT: [[INDUCTION:%.*]] = add zeroinitializer, [[TMP7]] -; NO-VP-NEXT: [[TMP8:%.*]] = mul i64 1, [[TMP3]] -; NO-VP-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i64 [[TMP8]], i64 0 +; NO-VP-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i64 [[TMP3]], i64 0 ; NO-VP-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer ; NO-VP-NEXT: br label %[[VECTOR_BODY:.*]] ; NO-VP: [[VECTOR_BODY]]: diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-cond-reduction.ll b/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-cond-reduction.ll index 8bfd93b53bc7..fe3a723a9bbe 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-cond-reduction.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-cond-reduction.ll @@ -458,8 +458,7 @@ define i32 @step_cond_add(ptr %a, i64 %n, i32 %start) { ; IF-EVL-OUTLOOP-NEXT: [[VEC_IND:%.*]] = phi [ [[INDUCTION]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] ; IF-EVL-OUTLOOP-NEXT: [[AVL:%.*]] = phi i64 [ [[N]], [[VECTOR_PH]] ], [ [[AVL_NEXT:%.*]], [[VECTOR_BODY]] ] ; IF-EVL-OUTLOOP-NEXT: [[TMP12:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 4, i1 true) -; IF-EVL-OUTLOOP-NEXT: [[TMP13:%.*]] = mul i32 1, [[TMP12]] -; IF-EVL-OUTLOOP-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i32 [[TMP13]], i64 0 +; IF-EVL-OUTLOOP-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i32 [[TMP12]], i64 0 ; IF-EVL-OUTLOOP-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer ; IF-EVL-OUTLOOP-NEXT: [[TMP14:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[EVL_BASED_IV1]] ; IF-EVL-OUTLOOP-NEXT: [[VP_OP_LOAD:%.*]] = call @llvm.vp.load.nxv4i32.p0(ptr align 4 [[TMP14]], splat (i1 true), i32 [[TMP12]]) @@ -509,8 +508,7 @@ define i32 @step_cond_add(ptr %a, i64 %n, i32 %start) { ; IF-EVL-INLOOP-NEXT: [[VEC_IND:%.*]] = phi [ [[INDUCTION]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] ; IF-EVL-INLOOP-NEXT: [[AVL:%.*]] = phi i64 [ [[N]], [[VECTOR_PH]] ], [ [[AVL_NEXT:%.*]], [[VECTOR_BODY]] ] ; IF-EVL-INLOOP-NEXT: [[TMP11:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 4, i1 true) -; IF-EVL-INLOOP-NEXT: [[TMP12:%.*]] = mul i32 1, [[TMP11]] -; IF-EVL-INLOOP-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i32 [[TMP12]], i64 0 +; IF-EVL-INLOOP-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i32 [[TMP11]], i64 0 ; IF-EVL-INLOOP-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer ; IF-EVL-INLOOP-NEXT: [[TMP13:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[EVL_BASED_IV1]] ; IF-EVL-INLOOP-NEXT: [[VP_OP_LOAD:%.*]] = call @llvm.vp.load.nxv4i32.p0(ptr align 4 [[TMP13]], splat (i1 true), i32 [[TMP11]]) @@ -561,8 +559,7 @@ define i32 @step_cond_add(ptr %a, i64 %n, i32 %start) { ; NO-VP-OUTLOOP-NEXT: [[TMP14:%.*]] = mul [[TMP12]], splat (i32 1) ; NO-VP-OUTLOOP-NEXT: [[INDUCTION:%.*]] = add zeroinitializer, [[TMP14]] ; NO-VP-OUTLOOP-NEXT: [[TMP16:%.*]] = trunc i64 [[TMP7]] to i32 -; NO-VP-OUTLOOP-NEXT: [[TMP17:%.*]] = mul i32 1, [[TMP16]] -; NO-VP-OUTLOOP-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i32 [[TMP17]], i64 0 +; NO-VP-OUTLOOP-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i32 [[TMP16]], i64 0 ; NO-VP-OUTLOOP-NEXT: [[DOTSPLAT:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer ; NO-VP-OUTLOOP-NEXT: br label [[VECTOR_BODY:%.*]] ; NO-VP-OUTLOOP: vector.body: @@ -618,8 +615,7 @@ define i32 @step_cond_add(ptr %a, i64 %n, i32 %start) { ; NO-VP-INLOOP-NEXT: [[TMP8:%.*]] = mul [[TMP6]], splat (i32 1) ; NO-VP-INLOOP-NEXT: [[INDUCTION:%.*]] = add zeroinitializer, [[TMP8]] ; NO-VP-INLOOP-NEXT: [[TMP10:%.*]] = trunc i64 [[TMP3]] to i32 -; NO-VP-INLOOP-NEXT: [[TMP11:%.*]] = mul i32 1, [[TMP10]] -; NO-VP-INLOOP-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i32 [[TMP11]], i64 0 +; NO-VP-INLOOP-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i32 [[TMP10]], i64 0 ; NO-VP-INLOOP-NEXT: [[DOTSPLAT:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer ; NO-VP-INLOOP-NEXT: br label [[VECTOR_BODY:%.*]] ; NO-VP-INLOOP: vector.body: @@ -696,13 +692,10 @@ define i32 @step_cond_add_pred(ptr %a, i64 %n, i32 %start) { ; IF-EVL-OUTLOOP-NEXT: [[VEC_IND2:%.*]] = phi [ [[INDUCTION1]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT7:%.*]], [[VECTOR_BODY]] ] ; IF-EVL-OUTLOOP-NEXT: [[AVL:%.*]] = phi i64 [ [[N]], [[VECTOR_PH]] ], [ [[AVL_NEXT:%.*]], [[VECTOR_BODY]] ] ; IF-EVL-OUTLOOP-NEXT: [[TMP14:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 4, i1 true) -; IF-EVL-OUTLOOP-NEXT: [[BROADCAST_SPLATINSERT3:%.*]] = insertelement poison, i32 [[TMP14]], i64 0 -; IF-EVL-OUTLOOP-NEXT: [[BROADCAST_SPLAT4:%.*]] = shufflevector [[BROADCAST_SPLATINSERT3]], poison, zeroinitializer -; IF-EVL-OUTLOOP-NEXT: [[TMP11:%.*]] = mul i32 1, [[TMP14]] -; IF-EVL-OUTLOOP-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement poison, i32 [[TMP11]], i64 0 +; IF-EVL-OUTLOOP-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement poison, i32 [[TMP14]], i64 0 ; IF-EVL-OUTLOOP-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector [[BROADCAST_SPLATINSERT1]], poison, zeroinitializer ; IF-EVL-OUTLOOP-NEXT: [[TMP12:%.*]] = call @llvm.stepvector.nxv4i32() -; IF-EVL-OUTLOOP-NEXT: [[TMP18:%.*]] = icmp ult [[TMP12]], [[BROADCAST_SPLAT4]] +; IF-EVL-OUTLOOP-NEXT: [[TMP18:%.*]] = icmp ult [[TMP12]], [[BROADCAST_SPLAT2]] ; IF-EVL-OUTLOOP-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[IV]] ; IF-EVL-OUTLOOP-NEXT: [[VP_OP_LOAD:%.*]] = call @llvm.vp.load.nxv4i32.p0(ptr align 4 [[ARRAYIDX]], splat (i1 true), i32 [[TMP14]]) ; IF-EVL-OUTLOOP-NEXT: [[TMP21:%.*]] = icmp sle [[VP_OP_LOAD]], [[VEC_IND2]] @@ -756,8 +749,7 @@ define i32 @step_cond_add_pred(ptr %a, i64 %n, i32 %start) { ; IF-EVL-INLOOP-NEXT: [[VEC_IND:%.*]] = phi [ [[INDUCTION]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] ; IF-EVL-INLOOP-NEXT: [[AVL:%.*]] = phi i64 [ [[N]], [[VECTOR_PH]] ], [ [[AVL_NEXT:%.*]], [[VECTOR_BODY]] ] ; IF-EVL-INLOOP-NEXT: [[TMP11:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 4, i1 true) -; IF-EVL-INLOOP-NEXT: [[TMP12:%.*]] = mul i32 1, [[TMP11]] -; IF-EVL-INLOOP-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i32 [[TMP12]], i64 0 +; IF-EVL-INLOOP-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i32 [[TMP11]], i64 0 ; IF-EVL-INLOOP-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer ; IF-EVL-INLOOP-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[IV]] ; IF-EVL-INLOOP-NEXT: [[VP_OP_LOAD:%.*]] = call @llvm.vp.load.nxv4i32.p0(ptr align 4 [[ARRAYIDX]], splat (i1 true), i32 [[TMP11]]) @@ -811,8 +803,7 @@ define i32 @step_cond_add_pred(ptr %a, i64 %n, i32 %start) { ; NO-VP-OUTLOOP-NEXT: [[TMP14:%.*]] = mul [[TMP12]], splat (i32 1) ; NO-VP-OUTLOOP-NEXT: [[INDUCTION:%.*]] = add zeroinitializer, [[TMP14]] ; NO-VP-OUTLOOP-NEXT: [[TMP16:%.*]] = trunc i64 [[TMP7]] to i32 -; NO-VP-OUTLOOP-NEXT: [[TMP17:%.*]] = mul i32 1, [[TMP16]] -; NO-VP-OUTLOOP-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i32 [[TMP17]], i64 0 +; NO-VP-OUTLOOP-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i32 [[TMP16]], i64 0 ; NO-VP-OUTLOOP-NEXT: [[DOTSPLAT:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer ; NO-VP-OUTLOOP-NEXT: br label [[VECTOR_BODY:%.*]] ; NO-VP-OUTLOOP: vector.body: @@ -872,8 +863,7 @@ define i32 @step_cond_add_pred(ptr %a, i64 %n, i32 %start) { ; NO-VP-INLOOP-NEXT: [[TMP8:%.*]] = mul [[TMP6]], splat (i32 1) ; NO-VP-INLOOP-NEXT: [[INDUCTION:%.*]] = add zeroinitializer, [[TMP8]] ; NO-VP-INLOOP-NEXT: [[TMP10:%.*]] = trunc i64 [[TMP3]] to i32 -; NO-VP-INLOOP-NEXT: [[TMP11:%.*]] = mul i32 1, [[TMP10]] -; NO-VP-INLOOP-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i32 [[TMP11]], i64 0 +; NO-VP-INLOOP-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i32 [[TMP10]], i64 0 ; NO-VP-INLOOP-NEXT: [[DOTSPLAT:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer ; NO-VP-INLOOP-NEXT: br label [[VECTOR_BODY:%.*]] ; NO-VP-INLOOP: vector.body: diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-fixed-order-recurrence.ll b/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-fixed-order-recurrence.ll index c0988380f8f1..bf54f669b65f 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-fixed-order-recurrence.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-fixed-order-recurrence.ll @@ -603,8 +603,7 @@ define void @first_order_recurrence_indvar(ptr noalias %A, i64 %TC) { ; IF-EVL-NEXT: [[PREV_EVL:%.*]] = phi i32 [ [[TMP5]], %[[VECTOR_PH]] ], [ [[TMP11:%.*]], %[[VECTOR_BODY]] ] ; IF-EVL-NEXT: [[TMP11]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 2, i1 true) ; IF-EVL-NEXT: [[TMP7:%.*]] = zext i32 [[TMP11]] to i64 -; IF-EVL-NEXT: [[TMP8:%.*]] = mul i64 1, [[TMP7]] -; IF-EVL-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i64 [[TMP8]], i64 0 +; IF-EVL-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i64 [[TMP7]], i64 0 ; IF-EVL-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer ; IF-EVL-NEXT: [[TMP20]] = add [[VEC_IND]], splat (i64 42) ; IF-EVL-NEXT: [[TMP15:%.*]] = call @llvm.experimental.vp.splice.nxv2i64( [[VECTOR_RECUR]], [[TMP20]], i32 -1, splat (i1 true), i32 [[PREV_EVL]], i32 [[TMP11]]) @@ -647,8 +646,7 @@ define void @first_order_recurrence_indvar(ptr noalias %A, i64 %TC) { ; NO-VP-NEXT: [[TMP6:%.*]] = call @llvm.stepvector.nxv2i64() ; NO-VP-NEXT: [[TMP7:%.*]] = mul [[TMP6]], splat (i64 1) ; NO-VP-NEXT: [[INDUCTION:%.*]] = add zeroinitializer, [[TMP7]] -; NO-VP-NEXT: [[TMP10:%.*]] = mul i64 1, [[TMP3]] -; NO-VP-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i64 [[TMP10]], i64 0 +; NO-VP-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i64 [[TMP3]], i64 0 ; NO-VP-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer ; NO-VP-NEXT: [[TMP14:%.*]] = call i32 @llvm.vscale.i32() ; NO-VP-NEXT: [[TMP16:%.*]] = mul nuw i32 [[TMP14]], 2 diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-gather-scatter.ll b/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-gather-scatter.ll index e16bb64073a0..6f723c268914 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-gather-scatter.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-gather-scatter.ll @@ -22,8 +22,7 @@ define void @gather_scatter(ptr noalias %in, ptr noalias %out, ptr noalias %inde ; IF-EVL-NEXT: [[AVL:%.*]] = phi i64 [ [[N:%.*]], [[ENTRY]] ], [ [[AVL_NEXT:%.*]], [[FOR_BODY]] ] ; IF-EVL-NEXT: [[TMP11:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 2, i1 true) ; IF-EVL-NEXT: [[TMP12:%.*]] = zext i32 [[TMP11]] to i64 -; IF-EVL-NEXT: [[TMP13:%.*]] = mul i64 1, [[TMP12]] -; IF-EVL-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i64 [[TMP13]], i64 0 +; IF-EVL-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i64 [[TMP12]], i64 0 ; IF-EVL-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer ; IF-EVL-NEXT: [[TMP14:%.*]] = getelementptr inbounds i32, ptr [[INDEX:%.*]], [[VEC_IND]] ; IF-EVL-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call @llvm.vp.gather.nxv2i64.nxv2p0( align 8 [[TMP14]], splat (i1 true), i32 [[TMP11]]) @@ -69,8 +68,7 @@ define void @gather_scatter(ptr noalias %in, ptr noalias %out, ptr noalias %inde ; NO-VP-NEXT: [[TMP6:%.*]] = call @llvm.stepvector.nxv2i64() ; NO-VP-NEXT: [[TMP7:%.*]] = mul [[TMP6]], splat (i64 1) ; NO-VP-NEXT: [[INDUCTION:%.*]] = add zeroinitializer, [[TMP7]] -; NO-VP-NEXT: [[TMP8:%.*]] = mul i64 1, [[TMP3]] -; NO-VP-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i64 [[TMP8]], i64 0 +; NO-VP-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i64 [[TMP3]], i64 0 ; NO-VP-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer ; NO-VP-NEXT: br label [[FOR_BODY:%.*]] ; NO-VP: vector.body: diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-interleave.ll b/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-interleave.ll index fbd4658e18b5..332c16e8eb65 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-interleave.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-interleave.ll @@ -148,8 +148,7 @@ define i32 @load_factor_4_with_gap(i64 %n, ptr noalias %a) { ; IF-EVL-NEXT: [[AVL:%.*]] = phi i64 [ [[N:%.*]], [[VECTOR_PH]] ], [ [[AVL_NEXT:%.*]], [[VECTOR_BODY]] ] ; IF-EVL-NEXT: [[TMP4:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 4, i1 true) ; IF-EVL-NEXT: [[TMP5:%.*]] = zext i32 [[TMP4]] to i64 -; IF-EVL-NEXT: [[TMP6:%.*]] = mul i64 1, [[TMP5]] -; IF-EVL-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i64 [[TMP6]], i64 0 +; IF-EVL-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i64 [[TMP5]], i64 0 ; IF-EVL-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer ; IF-EVL-NEXT: [[TMP7:%.*]] = getelementptr inbounds [4 x i32], ptr [[A:%.*]], [[VEC_IND]], i32 0 ; IF-EVL-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call @llvm.vp.gather.nxv4i32.nxv4p0( align 4 [[TMP7]], splat (i1 true), i32 [[TMP4]]) @@ -206,8 +205,7 @@ define i32 @load_factor_4_with_gap(i64 %n, ptr noalias %a) { ; NO-VP-NEXT: [[TMP6:%.*]] = call @llvm.stepvector.nxv4i64() ; NO-VP-NEXT: [[TMP7:%.*]] = mul [[TMP6]], splat (i64 1) ; NO-VP-NEXT: [[INDUCTION:%.*]] = add zeroinitializer, [[TMP7]] -; NO-VP-NEXT: [[TMP8:%.*]] = mul i64 1, [[TMP3]] -; NO-VP-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i64 [[TMP8]], i64 0 +; NO-VP-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i64 [[TMP3]], i64 0 ; NO-VP-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer ; NO-VP-NEXT: br label [[VECTOR_BODY:%.*]] ; NO-VP: vector.body: @@ -299,8 +297,7 @@ define void @store_factor_4_with_gap(i32 %n, ptr noalias %a) { ; IF-EVL-NEXT: [[VEC_IND2:%.*]] = phi [ [[INDUCTION1]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT5:%.*]], [[VECTOR_BODY]] ] ; IF-EVL-NEXT: [[AVL:%.*]] = phi i32 [ [[N:%.*]], [[VECTOR_PH]] ], [ [[AVL_NEXT:%.*]], [[VECTOR_BODY]] ] ; IF-EVL-NEXT: [[TMP6:%.*]] = call i32 @llvm.experimental.get.vector.length.i32(i32 [[AVL]], i32 4, i1 true) -; IF-EVL-NEXT: [[TMP7:%.*]] = mul i32 1, [[TMP6]] -; IF-EVL-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i32 [[TMP7]], i64 0 +; IF-EVL-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i32 [[TMP6]], i64 0 ; IF-EVL-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer ; IF-EVL-NEXT: [[TMP10:%.*]] = getelementptr inbounds [4 x i32], ptr [[A:%.*]], [[VEC_IND2]], i32 0 ; IF-EVL-NEXT: call void @llvm.vp.scatter.nxv4i32.nxv4p0( [[VEC_IND2]], align 4 [[TMP10]], splat (i1 true), i32 [[TMP6]]) @@ -345,8 +342,7 @@ define void @store_factor_4_with_gap(i32 %n, ptr noalias %a) { ; NO-VP-NEXT: [[TMP7:%.*]] = call @llvm.stepvector.nxv4i32() ; NO-VP-NEXT: [[TMP8:%.*]] = mul [[TMP7]], splat (i32 1) ; NO-VP-NEXT: [[INDUCTION1:%.*]] = add zeroinitializer, [[TMP8]] -; NO-VP-NEXT: [[TMP10:%.*]] = mul i32 1, [[TMP9]] -; NO-VP-NEXT: [[BROADCAST_SPLATINSERT2:%.*]] = insertelement poison, i32 [[TMP10]], i64 0 +; NO-VP-NEXT: [[BROADCAST_SPLATINSERT2:%.*]] = insertelement poison, i32 [[TMP9]], i64 0 ; NO-VP-NEXT: [[BROADCAST_SPLAT3:%.*]] = shufflevector [[BROADCAST_SPLATINSERT2]], poison, zeroinitializer ; NO-VP-NEXT: br label [[VECTOR_BODY:%.*]] ; NO-VP: vector.body: @@ -428,8 +424,7 @@ define i32 @load_factor_4_with_tail_gap(i64 %n, ptr noalias %a) { ; IF-EVL-NEXT: [[AVL:%.*]] = phi i64 [ [[N:%.*]], [[VECTOR_PH]] ], [ [[AVL_NEXT:%.*]], [[VECTOR_BODY]] ] ; IF-EVL-NEXT: [[TMP4:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 4, i1 true) ; IF-EVL-NEXT: [[TMP5:%.*]] = zext i32 [[TMP4]] to i64 -; IF-EVL-NEXT: [[TMP6:%.*]] = mul i64 1, [[TMP5]] -; IF-EVL-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i64 [[TMP6]], i64 0 +; IF-EVL-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i64 [[TMP5]], i64 0 ; IF-EVL-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer ; IF-EVL-NEXT: [[TMP7:%.*]] = getelementptr inbounds [4 x i32], ptr [[A:%.*]], [[VEC_IND]], i32 0 ; IF-EVL-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call @llvm.vp.gather.nxv4i32.nxv4p0( align 4 [[TMP7]], splat (i1 true), i32 [[TMP4]]) @@ -486,8 +481,7 @@ define i32 @load_factor_4_with_tail_gap(i64 %n, ptr noalias %a) { ; NO-VP-NEXT: [[TMP6:%.*]] = call @llvm.stepvector.nxv4i64() ; NO-VP-NEXT: [[TMP7:%.*]] = mul [[TMP6]], splat (i64 1) ; NO-VP-NEXT: [[INDUCTION:%.*]] = add zeroinitializer, [[TMP7]] -; NO-VP-NEXT: [[TMP8:%.*]] = mul i64 1, [[TMP3]] -; NO-VP-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i64 [[TMP8]], i64 0 +; NO-VP-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i64 [[TMP3]], i64 0 ; NO-VP-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer ; NO-VP-NEXT: br label [[VECTOR_BODY:%.*]] ; NO-VP: vector.body: @@ -580,8 +574,7 @@ define void @store_factor_4_with_tail_gap(i32 %n, ptr noalias %a) { ; IF-EVL-NEXT: [[VEC_IND2:%.*]] = phi [ [[INDUCTION1]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT5:%.*]], [[VECTOR_BODY]] ] ; IF-EVL-NEXT: [[AVL:%.*]] = phi i32 [ [[N:%.*]], [[VECTOR_PH]] ], [ [[AVL_NEXT:%.*]], [[VECTOR_BODY]] ] ; IF-EVL-NEXT: [[TMP6:%.*]] = call i32 @llvm.experimental.get.vector.length.i32(i32 [[AVL]], i32 4, i1 true) -; IF-EVL-NEXT: [[TMP7:%.*]] = mul i32 1, [[TMP6]] -; IF-EVL-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i32 [[TMP7]], i64 0 +; IF-EVL-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i32 [[TMP6]], i64 0 ; IF-EVL-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer ; IF-EVL-NEXT: [[TMP10:%.*]] = getelementptr inbounds [4 x i32], ptr [[A:%.*]], [[VEC_IND2]], i32 0 ; IF-EVL-NEXT: call void @llvm.vp.scatter.nxv4i32.nxv4p0( [[VEC_IND2]], align 4 [[TMP10]], splat (i1 true), i32 [[TMP6]]) @@ -626,8 +619,7 @@ define void @store_factor_4_with_tail_gap(i32 %n, ptr noalias %a) { ; NO-VP-NEXT: [[TMP7:%.*]] = call @llvm.stepvector.nxv4i32() ; NO-VP-NEXT: [[TMP8:%.*]] = mul [[TMP7]], splat (i32 1) ; NO-VP-NEXT: [[INDUCTION1:%.*]] = add zeroinitializer, [[TMP8]] -; NO-VP-NEXT: [[TMP10:%.*]] = mul i32 1, [[TMP9]] -; NO-VP-NEXT: [[BROADCAST_SPLATINSERT2:%.*]] = insertelement poison, i32 [[TMP10]], i64 0 +; NO-VP-NEXT: [[BROADCAST_SPLATINSERT2:%.*]] = insertelement poison, i32 [[TMP9]], i64 0 ; NO-VP-NEXT: [[BROADCAST_SPLAT3:%.*]] = shufflevector [[BROADCAST_SPLATINSERT2]], poison, zeroinitializer ; NO-VP-NEXT: br label [[VECTOR_BODY:%.*]] ; NO-VP: vector.body: diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-safe-dep-distance.ll b/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-safe-dep-distance.ll index 6e810f71102d..21e87fdc7586 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-safe-dep-distance.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-safe-dep-distance.ll @@ -127,10 +127,9 @@ define void @test_may_clobber1(ptr %p) { ; IF-EVL: middle.block: ; IF-EVL-NEXT: br label [[EXIT:%.*]] ; IF-EVL: scalar.ph: -; IF-EVL-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ] ; IF-EVL-NEXT: br label [[LOOP:%.*]] ; IF-EVL: loop: -; IF-EVL-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ] +; IF-EVL-NEXT: [[IV:%.*]] = phi i64 [ 0, [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ] ; IF-EVL-NEXT: [[A1:%.*]] = getelementptr i64, ptr [[P]], i64 [[IV]] ; IF-EVL-NEXT: [[V:%.*]] = load i64, ptr [[A1]], align 32 ; IF-EVL-NEXT: [[OFFSET:%.*]] = add i64 [[IV]], 100 @@ -160,10 +159,9 @@ define void @test_may_clobber1(ptr %p) { ; NO-VP: middle.block: ; NO-VP-NEXT: br label [[EXIT:%.*]] ; NO-VP: scalar.ph: -; NO-VP-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ] ; NO-VP-NEXT: br label [[LOOP:%.*]] ; NO-VP: loop: -; NO-VP-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ] +; NO-VP-NEXT: [[IV:%.*]] = phi i64 [ 0, [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ] ; NO-VP-NEXT: [[A1:%.*]] = getelementptr i64, ptr [[P]], i64 [[IV]] ; NO-VP-NEXT: [[V:%.*]] = load i64, ptr [[A1]], align 32 ; NO-VP-NEXT: [[OFFSET:%.*]] = add i64 [[IV]], 100 @@ -263,10 +261,9 @@ define void @test_may_clobber3(ptr %p) { ; IF-EVL: middle.block: ; IF-EVL-NEXT: br label [[EXIT:%.*]] ; IF-EVL: scalar.ph: -; IF-EVL-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ] ; IF-EVL-NEXT: br label [[LOOP:%.*]] ; IF-EVL: loop: -; IF-EVL-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ] +; IF-EVL-NEXT: [[IV:%.*]] = phi i64 [ 0, [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ] ; IF-EVL-NEXT: [[A1:%.*]] = getelementptr i64, ptr [[P]], i64 [[IV]] ; IF-EVL-NEXT: [[V:%.*]] = load i64, ptr [[A1]], align 32 ; IF-EVL-NEXT: [[OFFSET:%.*]] = add i64 [[IV]], 10 @@ -296,10 +293,9 @@ define void @test_may_clobber3(ptr %p) { ; NO-VP: middle.block: ; NO-VP-NEXT: br label [[EXIT:%.*]] ; NO-VP: scalar.ph: -; NO-VP-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ] ; NO-VP-NEXT: br label [[LOOP:%.*]] ; NO-VP: loop: -; NO-VP-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ] +; NO-VP-NEXT: [[IV:%.*]] = phi i64 [ 0, [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ] ; NO-VP-NEXT: [[A1:%.*]] = getelementptr i64, ptr [[P]], i64 [[IV]] ; NO-VP-NEXT: [[V:%.*]] = load i64, ptr [[A1]], align 32 ; NO-VP-NEXT: [[OFFSET:%.*]] = add i64 [[IV]], 10 diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/uniform-load-store.ll b/llvm/test/Transforms/LoopVectorize/RISCV/uniform-load-store.ll index 7e10ce6def11..e3a93cbf450a 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/uniform-load-store.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/uniform-load-store.ll @@ -282,8 +282,7 @@ define void @conditional_uniform_load(ptr noalias nocapture %a, ptr noalias noca ; SCALABLE-NEXT: [[BROADCAST_SPLATINSERT3:%.*]] = insertelement poison, i32 [[TMP17]], i64 0 ; SCALABLE-NEXT: [[BROADCAST_SPLAT4:%.*]] = shufflevector [[BROADCAST_SPLATINSERT3]], poison, zeroinitializer ; SCALABLE-NEXT: [[TMP8:%.*]] = zext i32 [[TMP17]] to i64 -; SCALABLE-NEXT: [[TMP9:%.*]] = mul i64 1, [[TMP8]] -; SCALABLE-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i64 [[TMP9]], i64 0 +; SCALABLE-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i64 [[TMP8]], i64 0 ; SCALABLE-NEXT: [[DOTSPLAT:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer ; SCALABLE-NEXT: [[TMP18:%.*]] = call @llvm.stepvector.nxv4i32() ; SCALABLE-NEXT: [[TMP11:%.*]] = icmp ult [[TMP18]], [[BROADCAST_SPLAT4]] @@ -387,8 +386,7 @@ define void @conditional_uniform_load(ptr noalias nocapture %a, ptr noalias noca ; TF-SCALABLE-NEXT: [[BROADCAST_SPLATINSERT3:%.*]] = insertelement poison, i32 [[TMP7]], i64 0 ; TF-SCALABLE-NEXT: [[BROADCAST_SPLAT4:%.*]] = shufflevector [[BROADCAST_SPLATINSERT3]], poison, zeroinitializer ; TF-SCALABLE-NEXT: [[TMP11:%.*]] = zext i32 [[TMP7]] to i64 -; TF-SCALABLE-NEXT: [[TMP8:%.*]] = mul i64 1, [[TMP11]] -; TF-SCALABLE-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i64 [[TMP8]], i64 0 +; TF-SCALABLE-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i64 [[TMP11]], i64 0 ; TF-SCALABLE-NEXT: [[DOTSPLAT:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer ; TF-SCALABLE-NEXT: [[TMP16:%.*]] = call @llvm.stepvector.nxv4i32() ; TF-SCALABLE-NEXT: [[ACTIVE_LANE_MASK:%.*]] = icmp ult [[TMP16]], [[BROADCAST_SPLAT4]] @@ -710,8 +708,7 @@ define void @uniform_store_of_loop_varying(ptr noalias nocapture %a, ptr noalias ; SCALABLE-NEXT: [[AVL:%.*]] = phi i64 [ 1025, %[[VECTOR_PH]] ], [ [[AVL_NEXT:%.*]], %[[VECTOR_BODY]] ] ; SCALABLE-NEXT: [[TMP7:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 2, i1 true) ; SCALABLE-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -; SCALABLE-NEXT: [[INDEX:%.*]] = mul i64 1, [[TMP8]] -; SCALABLE-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i64 [[INDEX]], i64 0 +; SCALABLE-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i64 [[TMP8]], i64 0 ; SCALABLE-NEXT: [[DOTSPLAT:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer ; SCALABLE-NEXT: call void @llvm.vp.scatter.nxv2i64.nxv2p0( [[VEC_IND]], align 8 [[BROADCAST_SPLAT1]], splat (i1 true), i32 [[TMP7]]) ; SCALABLE-NEXT: [[TMP16:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP10]] @@ -794,8 +791,7 @@ define void @uniform_store_of_loop_varying(ptr noalias nocapture %a, ptr noalias ; TF-SCALABLE-NEXT: [[AVL:%.*]] = phi i64 [ 1025, %[[VECTOR_PH]] ], [ [[AVL_NEXT:%.*]], %[[VECTOR_BODY]] ] ; TF-SCALABLE-NEXT: [[TMP9:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 2, i1 true) ; TF-SCALABLE-NEXT: [[TMP13:%.*]] = zext i32 [[TMP9]] to i64 -; TF-SCALABLE-NEXT: [[TMP8:%.*]] = mul i64 1, [[TMP13]] -; TF-SCALABLE-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement poison, i64 [[TMP8]], i64 0 +; TF-SCALABLE-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement poison, i64 [[TMP13]], i64 0 ; TF-SCALABLE-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector [[BROADCAST_SPLATINSERT1]], poison, zeroinitializer ; TF-SCALABLE-NEXT: call void @llvm.vp.scatter.nxv2i64.nxv2p0( [[VEC_IND]], align 8 [[BROADCAST_SPLAT]], splat (i1 true), i32 [[TMP9]]) ; TF-SCALABLE-NEXT: [[TMP10:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[INDEX]] @@ -857,8 +853,7 @@ define void @conditional_uniform_store(ptr noalias nocapture %a, ptr noalias noc ; SCALABLE-NEXT: [[AVL:%.*]] = phi i64 [ 1025, %[[VECTOR_PH]] ], [ [[AVL_NEXT:%.*]], %[[VECTOR_BODY]] ] ; SCALABLE-NEXT: [[TMP7:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 2, i1 true) ; SCALABLE-NEXT: [[TMP14:%.*]] = zext i32 [[TMP7]] to i64 -; SCALABLE-NEXT: [[TMP9:%.*]] = mul i64 1, [[TMP14]] -; SCALABLE-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i64 [[TMP9]], i64 0 +; SCALABLE-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i64 [[TMP14]], i64 0 ; SCALABLE-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer ; SCALABLE-NEXT: [[TMP10:%.*]] = icmp ugt [[VEC_IND]], splat (i64 10) ; SCALABLE-NEXT: call void @llvm.vp.scatter.nxv2i64.nxv2p0( [[BROADCAST_SPLAT1]], align 8 [[BROADCAST_SPLAT2]], [[TMP10]], i32 [[TMP7]]) @@ -956,8 +951,7 @@ define void @conditional_uniform_store(ptr noalias nocapture %a, ptr noalias noc ; TF-SCALABLE-NEXT: [[AVL:%.*]] = phi i64 [ 1025, %[[VECTOR_PH]] ], [ [[AVL_NEXT:%.*]], %[[VECTOR_BODY]] ] ; TF-SCALABLE-NEXT: [[TMP9:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 2, i1 true) ; TF-SCALABLE-NEXT: [[TMP11:%.*]] = zext i32 [[TMP9]] to i64 -; TF-SCALABLE-NEXT: [[TMP8:%.*]] = mul i64 1, [[TMP11]] -; TF-SCALABLE-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i64 [[TMP8]], i64 0 +; TF-SCALABLE-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i64 [[TMP11]], i64 0 ; TF-SCALABLE-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer ; TF-SCALABLE-NEXT: [[TMP10:%.*]] = icmp ugt [[VEC_IND]], splat (i64 10) ; TF-SCALABLE-NEXT: call void @llvm.vp.scatter.nxv2i64.nxv2p0( [[BROADCAST_SPLAT1]], align 8 [[BROADCAST_SPLAT2]], [[TMP10]], i32 [[TMP9]]) diff --git a/llvm/test/Transforms/LoopVectorize/SystemZ/scalar-steps-with-users-demanding-all-lanes-and-first-lane-only.ll b/llvm/test/Transforms/LoopVectorize/SystemZ/scalar-steps-with-users-demanding-all-lanes-and-first-lane-only.ll index a91bc656cc7e..87f81881be32 100644 --- a/llvm/test/Transforms/LoopVectorize/SystemZ/scalar-steps-with-users-demanding-all-lanes-and-first-lane-only.ll +++ b/llvm/test/Transforms/LoopVectorize/SystemZ/scalar-steps-with-users-demanding-all-lanes-and-first-lane-only.ll @@ -11,7 +11,7 @@ target datalayout = "E-m:e-i1:8:16-i8:8:16-i64:64-f128:64-v128:64-a:8:16-n32:64" define void @test_scalar_iv_steps_used_by_replicate_and_first_lane_only_vpinst(ptr noalias %dst, ptr noalias %src.1) { ; CHECK-LABEL: define void @test_scalar_iv_steps_used_by_replicate_and_first_lane_only_vpinst( ; CHECK-SAME: ptr noalias [[DST:%.*]], ptr noalias [[SRC_1:%.*]]) #[[ATTR0:[0-9]+]] { -; CHECK-NEXT: [[ENTRY:.*]]: +; CHECK-NEXT: [[ENTRY:.*:]] ; CHECK-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] ; CHECK: [[VECTOR_PH]]: ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] @@ -67,10 +67,9 @@ define void @test_scalar_iv_steps_used_by_replicate_and_first_lane_only_vpinst(p ; CHECK: [[MIDDLE_BLOCK]]: ; CHECK-NEXT: br label %[[EXIT:.*]] ; CHECK: [[SCALAR_PH]]: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, %[[ENTRY]] ] ; CHECK-NEXT: br label %[[LOOP_HEADER:.*]] ; CHECK: [[LOOP_HEADER]]: -; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ] +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ] ; CHECK-NEXT: [[MUL_IV:%.*]] = mul nsw i64 [[IV]], 4 ; CHECK-NEXT: [[GEP_SRC_1:%.*]] = getelementptr inbounds i8, ptr [[SRC_1]], i64 [[MUL_IV]] ; CHECK-NEXT: [[L_1:%.*]] = load i8, ptr [[GEP_SRC_1]], align 1 diff --git a/llvm/test/Transforms/LoopVectorize/X86/constant-fold.ll b/llvm/test/Transforms/LoopVectorize/X86/constant-fold.ll index c7a0bcb71d11..a614d9a17550 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/constant-fold.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/constant-fold.ll @@ -24,10 +24,9 @@ define void @f1() { ; CHECK: middle.block: ; CHECK-NEXT: br label [[BB3:%.*]] ; CHECK: scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i16 [ 0, [[BB1:%.*]] ] ; CHECK-NEXT: br label [[BB2:%.*]] ; CHECK: bb2: -; CHECK-NEXT: [[C_1_0:%.*]] = phi i16 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[_TMP9:%.*]], [[BB2]] ] +; CHECK-NEXT: [[C_1_0:%.*]] = phi i16 [ 0, [[SCALAR_PH]] ], [ [[_TMP9:%.*]], [[BB2]] ] ; CHECK-NEXT: [[_TMP1:%.*]] = zext i16 0 to i64 ; CHECK-NEXT: [[_TMP2:%.*]] = getelementptr [1 x %rec8], ptr @a, i16 0, i64 [[_TMP1]] ; CHECK-NEXT: [[_TMP6:%.*]] = sext i16 [[C_1_0]] to i64 diff --git a/llvm/test/Transforms/LoopVectorize/X86/cost-constant-known-via-scev.ll b/llvm/test/Transforms/LoopVectorize/X86/cost-constant-known-via-scev.ll index 04e0dafba6b8..aecfc668cf29 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/cost-constant-known-via-scev.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/cost-constant-known-via-scev.ll @@ -62,7 +62,7 @@ exit: ; Test case for https://github.com/llvm/llvm-project/issues/109528. define i64 @second_lshr_operand_zero_via_scev() { ; CHECK-LABEL: define i64 @second_lshr_operand_zero_via_scev() { -; CHECK-NEXT: [[ENTRY:.*]]: +; CHECK-NEXT: [[ENTRY:.*:]] ; CHECK-NEXT: [[EXT_0:%.*]] = sext i8 0 to i32 ; CHECK-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] ; CHECK: [[VECTOR_PH]]: @@ -97,12 +97,10 @@ define i64 @second_lshr_operand_zero_via_scev() { ; CHECK-NEXT: [[TMP13:%.*]] = call i64 @llvm.vector.reduce.or.v2i64(<2 x i64> [[BIN_RDX]]) ; CHECK-NEXT: br label %[[EXIT:.*]] ; CHECK: [[SCALAR_PH]]: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, %[[ENTRY]] ] -; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i64 [ 0, %[[ENTRY]] ] ; CHECK-NEXT: br label %[[LOOPS:.*]] ; CHECK: [[LOOPS]]: -; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOPS]] ] -; CHECK-NEXT: [[RED:%.*]] = phi i64 [ [[BC_MERGE_RDX]], %[[SCALAR_PH]] ], [ [[RED_NEXT:%.*]], %[[LOOPS]] ] +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOPS]] ] +; CHECK-NEXT: [[RED:%.*]] = phi i64 [ 0, %[[SCALAR_PH]] ], [ [[RED_NEXT:%.*]], %[[LOOPS]] ] ; CHECK-NEXT: [[C:%.*]] = icmp eq i64 [[IV]], 0 ; CHECK-NEXT: [[AND:%.*]] = and i64 [[IV]], 0 ; CHECK-NEXT: [[TMP14:%.*]] = trunc i64 [[IV]] to i32 diff --git a/llvm/test/Transforms/LoopVectorize/X86/cost-model.ll b/llvm/test/Transforms/LoopVectorize/X86/cost-model.ll index a30a8c9e6a02..472aa0b5b716 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/cost-model.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/cost-model.ll @@ -1108,12 +1108,10 @@ define i64 @cost_loop_invariant_recipes(i1 %x, i64 %y) { ; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.vector.reduce.mul.v2i64(<2 x i64> [[TMP3]]) ; CHECK-NEXT: br label [[EXIT:%.*]] ; CHECK: scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ] -; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i64 [ 1, [[ENTRY]] ] ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: -; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT_I_I_I:%.*]], [[LOOP]] ] -; CHECK-NEXT: [[RED:%.*]] = phi i64 [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[RED_MUL:%.*]], [[LOOP]] ] +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, [[SCALAR_PH]] ], [ [[IV_NEXT_I_I_I:%.*]], [[LOOP]] ] +; CHECK-NEXT: [[RED:%.*]] = phi i64 [ 1, [[SCALAR_PH]] ], [ [[RED_MUL:%.*]], [[LOOP]] ] ; CHECK-NEXT: [[NOT_X:%.*]] = xor i1 [[X]], true ; CHECK-NEXT: [[EXT:%.*]] = zext i1 [[NOT_X]] to i64 ; CHECK-NEXT: [[SHL:%.*]] = shl i64 [[Y]], [[EXT]] @@ -1165,12 +1163,10 @@ define i32 @narrowed_reduction(ptr %a, i1 %cmp) #0 { ; CHECK-NEXT: [[TMP21:%.*]] = zext i1 [[TMP20]] to i32 ; CHECK-NEXT: br label [[EXIT:%.*]] ; CHECK: scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ 1, [[ENTRY:%.*]] ] -; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ 0, [[ENTRY]] ] ; CHECK-NEXT: br label [[LOOP1:%.*]] ; CHECK: loop: -; CHECK-NEXT: [[IV:%.*]] = phi i32 [ [[BC_RESUME_VAL]], [[VEC_EPILOG_PH]] ], [ [[INC:%.*]], [[LOOP1]] ] -; CHECK-NEXT: [[OR13:%.*]] = phi i32 [ [[BC_MERGE_RDX]], [[VEC_EPILOG_PH]] ], [ [[OR:%.*]], [[LOOP1]] ] +; CHECK-NEXT: [[IV:%.*]] = phi i32 [ 1, [[VEC_EPILOG_PH]] ], [ [[INC:%.*]], [[LOOP1]] ] +; CHECK-NEXT: [[OR13:%.*]] = phi i32 [ 0, [[VEC_EPILOG_PH]] ], [ [[OR:%.*]], [[LOOP1]] ] ; CHECK-NEXT: [[AND:%.*]] = and i32 [[OR13]], 1 ; CHECK-NEXT: [[OR]] = or i32 [[AND]], [[CONV]] ; CHECK-NEXT: [[INC]] = add i32 [[IV]], 1 diff --git a/llvm/test/Transforms/LoopVectorize/X86/imprecise-through-phis.ll b/llvm/test/Transforms/LoopVectorize/X86/imprecise-through-phis.ll index 7fe4c14781e8..8164c10ac371 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/imprecise-through-phis.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/imprecise-through-phis.ll @@ -92,12 +92,10 @@ define double @sumIfVector(ptr nocapture readonly %arr) { ; SSE-NEXT: [[TMP11:%.*]] = call fast double @llvm.vector.reduce.fadd.v2f64(double 0.000000e+00, <2 x double> [[BIN_RDX]]) ; SSE-NEXT: br label [[DONE:%.*]] ; SSE: scalar.ph: -; SSE-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ] -; SSE-NEXT: [[BC_MERGE_RDX:%.*]] = phi double [ 0.000000e+00, [[ENTRY]] ] ; SSE-NEXT: br label [[LOOP:%.*]] ; SSE: loop: -; SSE-NEXT: [[I:%.*]] = phi i32 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[I_NEXT:%.*]], [[NEXT_ITER:%.*]] ] -; SSE-NEXT: [[TOT:%.*]] = phi double [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[TOT_NEXT:%.*]], [[NEXT_ITER]] ] +; SSE-NEXT: [[I:%.*]] = phi i32 [ 0, [[SCALAR_PH]] ], [ [[I_NEXT:%.*]], [[NEXT_ITER:%.*]] ] +; SSE-NEXT: [[TOT:%.*]] = phi double [ 0.000000e+00, [[SCALAR_PH]] ], [ [[TOT_NEXT:%.*]], [[NEXT_ITER]] ] ; SSE-NEXT: [[ADDR:%.*]] = getelementptr double, ptr [[ARR]], i32 [[I]] ; SSE-NEXT: [[NEXTVAL:%.*]] = load double, ptr [[ADDR]], align 8 ; SSE-NEXT: [[TST:%.*]] = fcmp fast une double [[NEXTVAL]], 4.200000e+01 @@ -157,12 +155,10 @@ define double @sumIfVector(ptr nocapture readonly %arr) { ; AVX-NEXT: [[TMP21:%.*]] = call fast double @llvm.vector.reduce.fadd.v4f64(double 0.000000e+00, <4 x double> [[BIN_RDX11]]) ; AVX-NEXT: br label [[DONE:%.*]] ; AVX: scalar.ph: -; AVX-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ] -; AVX-NEXT: [[BC_MERGE_RDX:%.*]] = phi double [ 0.000000e+00, [[ENTRY]] ] ; AVX-NEXT: br label [[LOOP:%.*]] ; AVX: loop: -; AVX-NEXT: [[I:%.*]] = phi i32 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[I_NEXT:%.*]], [[NEXT_ITER:%.*]] ] -; AVX-NEXT: [[TOT:%.*]] = phi double [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[TOT_NEXT:%.*]], [[NEXT_ITER]] ] +; AVX-NEXT: [[I:%.*]] = phi i32 [ 0, [[SCALAR_PH]] ], [ [[I_NEXT:%.*]], [[NEXT_ITER:%.*]] ] +; AVX-NEXT: [[TOT:%.*]] = phi double [ 0.000000e+00, [[SCALAR_PH]] ], [ [[TOT_NEXT:%.*]], [[NEXT_ITER]] ] ; AVX-NEXT: [[ADDR:%.*]] = getelementptr double, ptr [[ARR]], i32 [[I]] ; AVX-NEXT: [[NEXTVAL:%.*]] = load double, ptr [[ADDR]], align 8 ; AVX-NEXT: [[TST:%.*]] = fcmp fast une double [[NEXTVAL]], 4.200000e+01 diff --git a/llvm/test/Transforms/LoopVectorize/X86/induction-costs.ll b/llvm/test/Transforms/LoopVectorize/X86/induction-costs.ll index 116a3822eac6..ff2846f235c9 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/induction-costs.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/induction-costs.ll @@ -413,12 +413,10 @@ define i16 @iv_and_step_trunc() { ; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT_FOR_PHI:%.*]] = extractelement <2 x i16> [[TMP2]], i32 0 ; CHECK-NEXT: br label [[EXIT:%.*]] ; CHECK: scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ] -; CHECK-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i16 [ 0, [[ENTRY]] ] ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: -; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ] -; CHECK-NEXT: [[SCALAR_RECUR:%.*]] = phi i16 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[REC_NEXT:%.*]], [[LOOP]] ] +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ] +; CHECK-NEXT: [[SCALAR_RECUR:%.*]] = phi i16 [ 0, [[SCALAR_PH]] ], [ [[REC_NEXT:%.*]], [[LOOP]] ] ; CHECK-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1 ; CHECK-NEXT: [[TMP3:%.*]] = trunc i64 [[IV]] to i16 ; CHECK-NEXT: [[TMP4:%.*]] = trunc i64 [[IV_NEXT]] to i16 diff --git a/llvm/test/Transforms/LoopVectorize/X86/interleave-cost.ll b/llvm/test/Transforms/LoopVectorize/X86/interleave-cost.ll index 6d562be03a9b..361482e4cb74 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/interleave-cost.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/interleave-cost.ll @@ -7,7 +7,7 @@ target triple = "x86_64-apple-macosx10.15.0" define void @test_free_instructions_feeding_geps_for_interleave_groups(ptr noalias %p.invar, ptr noalias %dst.1, ptr noalias %dst.2) { ; CHECK-LABEL: define void @test_free_instructions_feeding_geps_for_interleave_groups( ; CHECK-SAME: ptr noalias [[P_INVAR:%.*]], ptr noalias [[DST_1:%.*]], ptr noalias [[DST_2:%.*]]) { -; CHECK-NEXT: [[ENTRY:.*]]: +; CHECK-NEXT: [[ENTRY:.*:]] ; CHECK-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] ; CHECK: [[VECTOR_PH]]: ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] @@ -39,10 +39,9 @@ define void @test_free_instructions_feeding_geps_for_interleave_groups(ptr noali ; CHECK: [[MIDDLE_BLOCK]]: ; CHECK-NEXT: br label %[[EXIT:.*]] ; CHECK: [[SCALAR_PH]]: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, %[[ENTRY]] ] ; CHECK-NEXT: br label %[[LOOP:.*]] ; CHECK: [[LOOP]]: -; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] ; CHECK-NEXT: [[L_0:%.*]] = load float, ptr [[P_INVAR]], align 4 ; CHECK-NEXT: [[IV_MUL:%.*]] = shl i64 [[IV]], 2 ; CHECK-NEXT: [[GEP_DST_19:%.*]] = getelementptr float, ptr [[DST_1]], i64 [[IV_MUL]] @@ -507,7 +506,7 @@ exit: define void @interleave_store_double_i64(ptr %dst) { ; CHECK-LABEL: define void @interleave_store_double_i64( ; CHECK-SAME: ptr [[DST:%.*]]) { -; CHECK-NEXT: [[ENTRY:.*]]: +; CHECK-NEXT: [[ENTRY:.*:]] ; CHECK-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] ; CHECK: [[VECTOR_PH]]: ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] @@ -525,10 +524,9 @@ define void @interleave_store_double_i64(ptr %dst) { ; CHECK: [[MIDDLE_BLOCK]]: ; CHECK-NEXT: br label %[[EXIT:.*]] ; CHECK: [[SCALAR_PH]]: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, %[[ENTRY]] ] ; CHECK-NEXT: br label %[[LOOP:.*]] ; CHECK: [[LOOP]]: -; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] ; CHECK-NEXT: [[GEP_1:%.*]] = getelementptr { double, i64 }, ptr [[DST]], i64 [[IV]], i32 1 ; CHECK-NEXT: store i64 [[IV]], ptr [[GEP_1]], align 8 ; CHECK-NEXT: [[GEP_0:%.*]] = getelementptr { double, i64 }, ptr [[DST]], i64 [[IV]] @@ -628,7 +626,7 @@ exit: define void @interleave_store_i64_double_2(ptr %dst) { ; CHECK-LABEL: define void @interleave_store_i64_double_2( ; CHECK-SAME: ptr [[DST:%.*]]) { -; CHECK-NEXT: [[ENTRY:.*]]: +; CHECK-NEXT: [[ENTRY:.*:]] ; CHECK-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] ; CHECK: [[VECTOR_PH]]: ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] @@ -646,10 +644,9 @@ define void @interleave_store_i64_double_2(ptr %dst) { ; CHECK: [[MIDDLE_BLOCK]]: ; CHECK-NEXT: br label %[[EXIT:.*]] ; CHECK: [[SCALAR_PH]]: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, %[[ENTRY]] ] ; CHECK-NEXT: br label %[[LOOP:.*]] ; CHECK: [[LOOP]]: -; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] ; CHECK-NEXT: [[GEP_0:%.*]] = getelementptr { i64, double }, ptr [[DST]], i64 [[IV]] ; CHECK-NEXT: store i64 [[IV]], ptr [[GEP_0]], align 8 ; CHECK-NEXT: [[GEP_1:%.*]] = getelementptr { i64, double }, ptr [[DST]], i64 [[IV]], i32 1 diff --git a/llvm/test/Transforms/LoopVectorize/X86/load-deref-pred.ll b/llvm/test/Transforms/LoopVectorize/X86/load-deref-pred.ll index f615e23bcb8b..452868ddd59c 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/load-deref-pred.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/load-deref-pred.ll @@ -65,12 +65,10 @@ define i32 @test_explicit_pred(i64 %len) { ; CHECK-NEXT: [[TMP21:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[BIN_RDX14]]) ; CHECK-NEXT: br label [[LOOP_EXIT:%.*]] ; CHECK: scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ] -; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ 0, [[ENTRY]] ] ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: -; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LATCH:%.*]] ] -; CHECK-NEXT: [[ACCUM:%.*]] = phi i32 [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[ACCUM_NEXT:%.*]], [[LATCH]] ] +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LATCH:%.*]] ] +; CHECK-NEXT: [[ACCUM:%.*]] = phi i32 [ 0, [[SCALAR_PH]] ], [ [[ACCUM_NEXT:%.*]], [[LATCH]] ] ; CHECK-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1 ; CHECK-NEXT: [[EARLYCND:%.*]] = icmp slt i64 [[IV]], [[LEN]] ; CHECK-NEXT: br i1 [[EARLYCND]], label [[PRED:%.*]], label [[LATCH]] @@ -216,12 +214,10 @@ define i32 @test_explicit_pred_generic(i64 %len, ptr %test_base) { ; CHECK-NEXT: [[TMP77:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[BIN_RDX11]]) ; CHECK-NEXT: br label [[LOOP_EXIT:%.*]] ; CHECK: scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ] -; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ 0, [[ENTRY]] ] ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: -; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LATCH:%.*]] ] -; CHECK-NEXT: [[ACCUM:%.*]] = phi i32 [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[ACCUM_NEXT:%.*]], [[LATCH]] ] +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LATCH:%.*]] ] +; CHECK-NEXT: [[ACCUM:%.*]] = phi i32 [ 0, [[SCALAR_PH]] ], [ [[ACCUM_NEXT:%.*]], [[LATCH]] ] ; CHECK-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1 ; CHECK-NEXT: [[TEST_ADDR:%.*]] = getelementptr inbounds i1, ptr [[TEST_BASE]], i64 [[IV]] ; CHECK-NEXT: [[EARLYCND:%.*]] = load i1, ptr [[TEST_ADDR]], align 1 @@ -396,12 +392,10 @@ define i32 @test_invariant_address(i64 %len, ptr %test_base) { ; CHECK-NEXT: [[TMP101:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[BIN_RDX8]]) ; CHECK-NEXT: br label [[LOOP_EXIT:%.*]] ; CHECK: scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ] -; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ 0, [[ENTRY]] ] ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: -; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LATCH:%.*]] ] -; CHECK-NEXT: [[ACCUM:%.*]] = phi i32 [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[ACCUM_NEXT:%.*]], [[LATCH]] ] +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LATCH:%.*]] ] +; CHECK-NEXT: [[ACCUM:%.*]] = phi i32 [ 0, [[SCALAR_PH]] ], [ [[ACCUM_NEXT:%.*]], [[LATCH]] ] ; CHECK-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1 ; CHECK-NEXT: [[TEST_ADDR:%.*]] = getelementptr inbounds i1, ptr [[TEST_BASE]], i64 [[IV]] ; CHECK-NEXT: [[EARLYCND:%.*]] = load i1, ptr [[TEST_ADDR]], align 1 @@ -667,12 +661,10 @@ define i32 @test_step_narrower_than_access(i64 %len, ptr %test_base) { ; CHECK-NEXT: [[TMP149:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[BIN_RDX38]]) ; CHECK-NEXT: br label [[LOOP_EXIT:%.*]] ; CHECK: scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ] -; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ 0, [[ENTRY]] ] ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: -; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LATCH:%.*]] ] -; CHECK-NEXT: [[ACCUM:%.*]] = phi i32 [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[ACCUM_NEXT:%.*]], [[LATCH]] ] +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LATCH:%.*]] ] +; CHECK-NEXT: [[ACCUM:%.*]] = phi i32 [ 0, [[SCALAR_PH]] ], [ [[ACCUM_NEXT:%.*]], [[LATCH]] ] ; CHECK-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1 ; CHECK-NEXT: [[TEST_ADDR:%.*]] = getelementptr inbounds i1, ptr [[TEST_BASE]], i64 [[IV]] ; CHECK-NEXT: [[EARLYCND:%.*]] = load i1, ptr [[TEST_ADDR]], align 1 @@ -984,12 +976,10 @@ define i32 @test_non_zero_start(i64 %len, ptr %test_base) { ; CHECK-NEXT: [[TMP77:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[BIN_RDX11]]) ; CHECK-NEXT: br label [[LOOP_EXIT:%.*]] ; CHECK: scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 1024, [[ENTRY:%.*]] ] -; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ 0, [[ENTRY]] ] ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: -; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LATCH:%.*]] ] -; CHECK-NEXT: [[ACCUM:%.*]] = phi i32 [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[ACCUM_NEXT:%.*]], [[LATCH]] ] +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 1024, [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LATCH:%.*]] ] +; CHECK-NEXT: [[ACCUM:%.*]] = phi i32 [ 0, [[SCALAR_PH]] ], [ [[ACCUM_NEXT:%.*]], [[LATCH]] ] ; CHECK-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1 ; CHECK-NEXT: [[TEST_ADDR:%.*]] = getelementptr inbounds i1, ptr [[TEST_BASE]], i64 [[IV]] ; CHECK-NEXT: [[EARLYCND:%.*]] = load i1, ptr [[TEST_ADDR]], align 1 @@ -1228,12 +1218,10 @@ define i32 @test_non_unit_stride(i64 %len, ptr %test_base) { ; CHECK-NEXT: [[TMP117:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[BIN_RDX8]]) ; CHECK-NEXT: br label [[LOOP_EXIT:%.*]] ; CHECK: scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ] -; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ 0, [[ENTRY]] ] ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: -; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LATCH:%.*]] ] -; CHECK-NEXT: [[ACCUM:%.*]] = phi i32 [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[ACCUM_NEXT:%.*]], [[LATCH]] ] +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LATCH:%.*]] ] +; CHECK-NEXT: [[ACCUM:%.*]] = phi i32 [ 0, [[SCALAR_PH]] ], [ [[ACCUM_NEXT:%.*]], [[LATCH]] ] ; CHECK-NEXT: [[IV_NEXT]] = add i64 [[IV]], 2 ; CHECK-NEXT: [[TEST_ADDR:%.*]] = getelementptr inbounds i1, ptr [[TEST_BASE]], i64 [[IV]] ; CHECK-NEXT: [[EARLYCND:%.*]] = load i1, ptr [[TEST_ADDR]], align 1 @@ -1380,12 +1368,10 @@ define i32 @neg_off_by_many(i64 %len, ptr %test_base) { ; CHECK-NEXT: [[TMP77:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[BIN_RDX11]]) ; CHECK-NEXT: br label [[LOOP_EXIT:%.*]] ; CHECK: scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ] -; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ 0, [[ENTRY]] ] ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: -; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LATCH:%.*]] ] -; CHECK-NEXT: [[ACCUM:%.*]] = phi i32 [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[ACCUM_NEXT:%.*]], [[LATCH]] ] +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LATCH:%.*]] ] +; CHECK-NEXT: [[ACCUM:%.*]] = phi i32 [ 0, [[SCALAR_PH]] ], [ [[ACCUM_NEXT:%.*]], [[LATCH]] ] ; CHECK-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1 ; CHECK-NEXT: [[TEST_ADDR:%.*]] = getelementptr inbounds i1, ptr [[TEST_BASE]], i64 [[IV]] ; CHECK-NEXT: [[EARLYCND:%.*]] = load i1, ptr [[TEST_ADDR]], align 1 @@ -1532,12 +1518,10 @@ define i32 @neg_off_by_one_iteration(i64 %len, ptr %test_base) { ; CHECK-NEXT: [[TMP77:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[BIN_RDX11]]) ; CHECK-NEXT: br label [[LOOP_EXIT:%.*]] ; CHECK: scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ] -; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ 0, [[ENTRY]] ] ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: -; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LATCH:%.*]] ] -; CHECK-NEXT: [[ACCUM:%.*]] = phi i32 [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[ACCUM_NEXT:%.*]], [[LATCH]] ] +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LATCH:%.*]] ] +; CHECK-NEXT: [[ACCUM:%.*]] = phi i32 [ 0, [[SCALAR_PH]] ], [ [[ACCUM_NEXT:%.*]], [[LATCH]] ] ; CHECK-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1 ; CHECK-NEXT: [[TEST_ADDR:%.*]] = getelementptr inbounds i1, ptr [[TEST_BASE]], i64 [[IV]] ; CHECK-NEXT: [[EARLYCND:%.*]] = load i1, ptr [[TEST_ADDR]], align 1 @@ -1684,12 +1668,10 @@ define i32 @neg_off_by_one_byte(i64 %len, ptr %test_base) { ; CHECK-NEXT: [[TMP77:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[BIN_RDX11]]) ; CHECK-NEXT: br label [[LOOP_EXIT:%.*]] ; CHECK: scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ] -; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ 0, [[ENTRY]] ] ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: -; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LATCH:%.*]] ] -; CHECK-NEXT: [[ACCUM:%.*]] = phi i32 [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[ACCUM_NEXT:%.*]], [[LATCH]] ] +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LATCH:%.*]] ] +; CHECK-NEXT: [[ACCUM:%.*]] = phi i32 [ 0, [[SCALAR_PH]] ], [ [[ACCUM_NEXT:%.*]], [[LATCH]] ] ; CHECK-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1 ; CHECK-NEXT: [[TEST_ADDR:%.*]] = getelementptr inbounds i1, ptr [[TEST_BASE]], i64 [[IV]] ; CHECK-NEXT: [[EARLYCND:%.*]] = load i1, ptr [[TEST_ADDR]], align 1 @@ -2005,12 +1987,10 @@ define i32 @test_allocsize(i64 %len, ptr %test_base) nofree nosync { ; CHECK-NEXT: [[TMP77:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[BIN_RDX11]]) ; CHECK-NEXT: br label [[LOOP_EXIT:%.*]] ; CHECK: scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ] -; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ 0, [[ENTRY]] ] ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: -; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LATCH:%.*]] ] -; CHECK-NEXT: [[ACCUM:%.*]] = phi i32 [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[ACCUM_NEXT:%.*]], [[LATCH]] ] +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LATCH:%.*]] ] +; CHECK-NEXT: [[ACCUM:%.*]] = phi i32 [ 0, [[SCALAR_PH]] ], [ [[ACCUM_NEXT:%.*]], [[LATCH]] ] ; CHECK-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1 ; CHECK-NEXT: [[TEST_ADDR:%.*]] = getelementptr inbounds i1, ptr [[TEST_BASE]], i64 [[IV]] ; CHECK-NEXT: [[EARLYCND:%.*]] = load i1, ptr [[TEST_ADDR]], align 1 @@ -2158,12 +2138,10 @@ define i32 @test_allocsize_array(i64 %len, ptr %test_base) nofree nosync { ; CHECK-NEXT: [[TMP77:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[BIN_RDX11]]) ; CHECK-NEXT: br label [[LOOP_EXIT:%.*]] ; CHECK: scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ] -; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ 0, [[ENTRY]] ] ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: -; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LATCH:%.*]] ] -; CHECK-NEXT: [[ACCUM:%.*]] = phi i32 [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[ACCUM_NEXT:%.*]], [[LATCH]] ] +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LATCH:%.*]] ] +; CHECK-NEXT: [[ACCUM:%.*]] = phi i32 [ 0, [[SCALAR_PH]] ], [ [[ACCUM_NEXT:%.*]], [[LATCH]] ] ; CHECK-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1 ; CHECK-NEXT: [[TEST_ADDR:%.*]] = getelementptr inbounds i1, ptr [[TEST_BASE]], i64 [[IV]] ; CHECK-NEXT: [[EARLYCND:%.*]] = load i1, ptr [[TEST_ADDR]], align 1 @@ -2321,12 +2299,10 @@ define i32 @test_allocsize_cond_deref(i1 %allzero, ptr %test_base) { ; CHECK-NEXT: [[TMP77:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[BIN_RDX11]]) ; CHECK-NEXT: br label [[LOOP_EXIT:%.*]] ; CHECK: scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, [[PREHEADER]] ] -; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ 0, [[PREHEADER]] ] ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: -; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LATCH:%.*]] ] -; CHECK-NEXT: [[ACCUM:%.*]] = phi i32 [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[ACCUM_NEXT:%.*]], [[LATCH]] ] +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LATCH:%.*]] ] +; CHECK-NEXT: [[ACCUM:%.*]] = phi i32 [ 0, [[SCALAR_PH]] ], [ [[ACCUM_NEXT:%.*]], [[LATCH]] ] ; CHECK-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1 ; CHECK-NEXT: [[TEST_ADDR:%.*]] = getelementptr inbounds i1, ptr [[TEST_BASE]], i64 [[IV]] ; CHECK-NEXT: [[EARLYCND:%.*]] = load i1, ptr [[TEST_ADDR]], align 1 diff --git a/llvm/test/Transforms/LoopVectorize/X86/metadata-enable.ll b/llvm/test/Transforms/LoopVectorize/X86/metadata-enable.ll index f26064a4a81d..6a90f03c2f7f 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/metadata-enable.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/metadata-enable.ll @@ -1201,10 +1201,9 @@ define i32 @nopragma(ptr noalias nocapture %a, ptr noalias nocapture readonly %b ; O1VEC2: middle.block: ; O1VEC2-NEXT: br label [[FOR_END:%.*]] ; O1VEC2: scalar.ph: -; O1VEC2-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ] ; O1VEC2-NEXT: br label [[FOR_BODY:%.*]] ; O1VEC2: for.body: -; O1VEC2-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ] +; O1VEC2-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 0, [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ] ; O1VEC2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds nuw i32, ptr [[B]], i64 [[INDVARS_IV]] ; O1VEC2-NEXT: [[TMP10:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 ; O1VEC2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP10]], [[N]] @@ -1242,10 +1241,9 @@ define i32 @nopragma(ptr noalias nocapture %a, ptr noalias nocapture readonly %b ; OzVEC2: middle.block: ; OzVEC2-NEXT: br label [[FOR_END:%.*]] ; OzVEC2: scalar.ph: -; OzVEC2-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ] ; OzVEC2-NEXT: br label [[FOR_BODY:%.*]] ; OzVEC2: for.body: -; OzVEC2-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ] +; OzVEC2-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 0, [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ] ; OzVEC2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds nuw i32, ptr [[B]], i64 [[INDVARS_IV]] ; OzVEC2-NEXT: [[TMP10:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 ; OzVEC2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP10]], [[N]] diff --git a/llvm/test/Transforms/LoopVectorize/X86/optsize.ll b/llvm/test/Transforms/LoopVectorize/X86/optsize.ll index 056b8ecddd88..6d7b8a222c51 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/optsize.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/optsize.ll @@ -222,10 +222,9 @@ define void @scev4stride1(ptr noalias nocapture %a, ptr noalias nocapture readon ; CHECK: middle.block: ; CHECK-NEXT: br label [[FOR_END_LOOPEXIT:%.*]] ; CHECK: scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ 0, [[FOR_BODY_PREHEADER:%.*]] ] ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: for.body: -; CHECK-NEXT: [[I_07:%.*]] = phi i32 [ [[INC:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] +; CHECK-NEXT: [[I_07:%.*]] = phi i32 [ [[INC:%.*]], [[FOR_BODY]] ], [ 0, [[SCALAR_PH]] ] ; CHECK-NEXT: [[MUL:%.*]] = mul nsw i32 [[I_07]], [[K]] ; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[B]], i32 [[MUL]] ; CHECK-NEXT: [[TMP6:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 @@ -259,10 +258,9 @@ define void @scev4stride1(ptr noalias nocapture %a, ptr noalias nocapture readon ; AUTOVF: middle.block: ; AUTOVF-NEXT: br label [[FOR_END_LOOPEXIT:%.*]] ; AUTOVF: scalar.ph: -; AUTOVF-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ 0, [[FOR_BODY_PREHEADER:%.*]] ] ; AUTOVF-NEXT: br label [[FOR_BODY:%.*]] ; AUTOVF: for.body: -; AUTOVF-NEXT: [[I_07:%.*]] = phi i32 [ [[INC:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] +; AUTOVF-NEXT: [[I_07:%.*]] = phi i32 [ [[INC:%.*]], [[FOR_BODY]] ], [ 0, [[SCALAR_PH]] ] ; AUTOVF-NEXT: [[MUL:%.*]] = mul nsw i32 [[I_07]], [[K]] ; AUTOVF-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[B]], i32 [[MUL]] ; AUTOVF-NEXT: [[TMP6:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 diff --git a/llvm/test/Transforms/LoopVectorize/X86/pr141968-instsimplifyfolder.ll b/llvm/test/Transforms/LoopVectorize/X86/pr141968-instsimplifyfolder.ll index c1d08e152fc5..9181cce613ad 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/pr141968-instsimplifyfolder.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/pr141968-instsimplifyfolder.ll @@ -6,7 +6,7 @@ target triple = "x86_64" define i8 @pr141968(i1 %cond, i8 %v) { ; CHECK-LABEL: define i8 @pr141968( ; CHECK-SAME: i1 [[COND:%.*]], i8 [[V:%.*]]) { -; CHECK-NEXT: [[ENTRY:.*]]: +; CHECK-NEXT: [[ENTRY:.*:]] ; CHECK-NEXT: [[ZEXT_TRUE:%.*]] = zext i1 true to i16 ; CHECK-NEXT: [[SEXT:%.*]] = sext i8 [[V]] to i16 ; CHECK-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] @@ -107,10 +107,9 @@ define i8 @pr141968(i1 %cond, i8 %v) { ; CHECK-NEXT: [[TMP18:%.*]] = extractelement <16 x i8> [[PREDPHI]], i32 15 ; CHECK-NEXT: br label %[[EXIT:.*]] ; CHECK: [[SCALAR_PH]]: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i8 [ 0, %[[ENTRY]] ] ; CHECK-NEXT: br label %[[LOOP_HEADER:.*]] ; CHECK: [[LOOP_HEADER]]: -; CHECK-NEXT: [[IV:%.*]] = phi i8 [ [[IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ], [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ] +; CHECK-NEXT: [[IV:%.*]] = phi i8 [ [[IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ], [ 0, %[[SCALAR_PH]] ] ; CHECK-NEXT: br i1 [[COND]], label %[[LOOP_LATCH]], label %[[COND_FALSE:.*]] ; CHECK: [[COND_FALSE]]: ; CHECK-NEXT: [[SDIV:%.*]] = sdiv i16 [[SEXT]], [[ZEXT_TRUE]] diff --git a/llvm/test/Transforms/LoopVectorize/X86/pr34438.ll b/llvm/test/Transforms/LoopVectorize/X86/pr34438.ll index df2e35d3922d..204271173da0 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/pr34438.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/pr34438.ll @@ -24,10 +24,9 @@ define void @small_tc(ptr noalias nocapture %A, ptr noalias nocapture readonly % ; CHECK: middle.block: ; CHECK-NEXT: br label [[FOR_END:%.*]] ; CHECK: scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ] ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: for.body: -; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ] +; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 0, [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ] ; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[TMP0]], i64 [[INDVARS_IV]] ; CHECK-NEXT: [[TMP6:%.*]] = load float, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP0]] ; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds float, ptr [[TMP2]], i64 [[INDVARS_IV]] diff --git a/llvm/test/Transforms/LoopVectorize/X86/pr51366-sunk-instruction-used-outside-of-loop.ll b/llvm/test/Transforms/LoopVectorize/X86/pr51366-sunk-instruction-used-outside-of-loop.ll index 36f0f14e732c..2c97863c182b 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/pr51366-sunk-instruction-used-outside-of-loop.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/pr51366-sunk-instruction-used-outside-of-loop.ll @@ -4,7 +4,7 @@ define ptr @test(ptr noalias %src, ptr noalias %dst) { ; CHECK-LABEL: define ptr @test( ; CHECK-SAME: ptr noalias [[SRC:%.*]], ptr noalias [[DST:%.*]]) { -; CHECK-NEXT: [[ENTRY:.*]]: +; CHECK-NEXT: [[ENTRY:.*:]] ; CHECK-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] ; CHECK: [[VECTOR_PH]]: ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] @@ -45,10 +45,9 @@ define ptr @test(ptr noalias %src, ptr noalias %dst) { ; CHECK: [[MIDDLE_BLOCK]]: ; CHECK-NEXT: br label %[[EXIT:.*]] ; CHECK: [[SCALAR_PH]]: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, %[[ENTRY]] ] ; CHECK-NEXT: br label %[[LOOP_HEADER:.*]] ; CHECK: [[LOOP_HEADER]]: -; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ] +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ] ; CHECK-NEXT: [[GEP_SRC:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i64 [[IV]] ; CHECK-NEXT: [[CMP_1:%.*]] = icmp eq i64 [[IV]], 0 ; CHECK-NEXT: br i1 [[CMP_1]], label %[[LOOP_LATCH]], label %[[THEN:.*]] diff --git a/llvm/test/Transforms/LoopVectorize/X86/reduction-fastmath.ll b/llvm/test/Transforms/LoopVectorize/X86/reduction-fastmath.ll index 0e83cf374fc3..04cdc759d812 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/reduction-fastmath.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/reduction-fastmath.ll @@ -73,12 +73,10 @@ define float @reduction_sum_float_fastmath(i32 %n, ptr %array) { ; CHECK-NEXT: [[TMP9:%.*]] = call fast float @llvm.vector.reduce.fadd.v4f32(float 0.000000e+00, <4 x float> [[BIN_RDX]]) ; CHECK-NEXT: br label [[LOOP_EXIT_LOOPEXIT:%.*]] ; CHECK: scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ 0, [[LOOP_PREHEADER]] ] -; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi float [ 0.000000e+00, [[LOOP_PREHEADER]] ] ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: -; CHECK-NEXT: [[IDX:%.*]] = phi i32 [ [[IDX_INC:%.*]], [[LOOP]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] -; CHECK-NEXT: [[SUM:%.*]] = phi float [ [[SUM_INC:%.*]], [[LOOP]] ], [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ] +; CHECK-NEXT: [[IDX:%.*]] = phi i32 [ [[IDX_INC:%.*]], [[LOOP]] ], [ 0, [[SCALAR_PH]] ] +; CHECK-NEXT: [[SUM:%.*]] = phi float [ [[SUM_INC:%.*]], [[LOOP]] ], [ 0.000000e+00, [[SCALAR_PH]] ] ; CHECK-NEXT: [[ADDRESS:%.*]] = getelementptr float, ptr [[ARRAY]], i32 [[IDX]] ; CHECK-NEXT: [[VALUE:%.*]] = load float, ptr [[ADDRESS]], align 4 ; CHECK-NEXT: [[SUM_INC]] = fadd fast float [[SUM]], [[VALUE]] @@ -138,12 +136,10 @@ define float @reduction_sum_float_only_reassoc(i32 %n, ptr %array) { ; CHECK-NEXT: [[TMP9:%.*]] = call reassoc float @llvm.vector.reduce.fadd.v4f32(float -0.000000e+00, <4 x float> [[BIN_RDX]]) ; CHECK-NEXT: br label [[LOOP_EXIT_LOOPEXIT:%.*]] ; CHECK: scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ 0, [[LOOP_PREHEADER]] ] -; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi float [ -0.000000e+00, [[LOOP_PREHEADER]] ] ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: -; CHECK-NEXT: [[IDX:%.*]] = phi i32 [ [[IDX_INC:%.*]], [[LOOP]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] -; CHECK-NEXT: [[SUM:%.*]] = phi float [ [[SUM_INC:%.*]], [[LOOP]] ], [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ] +; CHECK-NEXT: [[IDX:%.*]] = phi i32 [ [[IDX_INC:%.*]], [[LOOP]] ], [ 0, [[SCALAR_PH]] ] +; CHECK-NEXT: [[SUM:%.*]] = phi float [ [[SUM_INC:%.*]], [[LOOP]] ], [ -0.000000e+00, [[SCALAR_PH]] ] ; CHECK-NEXT: [[ADDRESS:%.*]] = getelementptr float, ptr [[ARRAY]], i32 [[IDX]] ; CHECK-NEXT: [[VALUE:%.*]] = load float, ptr [[ADDRESS]], align 4 ; CHECK-NEXT: [[SUM_INC]] = fadd reassoc float [[SUM]], [[VALUE]] @@ -203,12 +199,10 @@ define float @reduction_sum_float_only_reassoc_and_contract(i32 %n, ptr %array) ; CHECK-NEXT: [[TMP9:%.*]] = call reassoc contract float @llvm.vector.reduce.fadd.v4f32(float -0.000000e+00, <4 x float> [[BIN_RDX]]) ; CHECK-NEXT: br label [[LOOP_EXIT_LOOPEXIT:%.*]] ; CHECK: scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ 0, [[LOOP_PREHEADER]] ] -; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi float [ -0.000000e+00, [[LOOP_PREHEADER]] ] ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: -; CHECK-NEXT: [[IDX:%.*]] = phi i32 [ [[IDX_INC:%.*]], [[LOOP]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] -; CHECK-NEXT: [[SUM:%.*]] = phi float [ [[SUM_INC:%.*]], [[LOOP]] ], [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ] +; CHECK-NEXT: [[IDX:%.*]] = phi i32 [ [[IDX_INC:%.*]], [[LOOP]] ], [ 0, [[SCALAR_PH]] ] +; CHECK-NEXT: [[SUM:%.*]] = phi float [ [[SUM_INC:%.*]], [[LOOP]] ], [ -0.000000e+00, [[SCALAR_PH]] ] ; CHECK-NEXT: [[ADDRESS:%.*]] = getelementptr float, ptr [[ARRAY]], i32 [[IDX]] ; CHECK-NEXT: [[VALUE:%.*]] = load float, ptr [[ADDRESS]], align 4 ; CHECK-NEXT: [[SUM_INC]] = fadd reassoc contract float [[SUM]], [[VALUE]] diff --git a/llvm/test/Transforms/LoopVectorize/X86/replicate-uniform-call.ll b/llvm/test/Transforms/LoopVectorize/X86/replicate-uniform-call.ll index 0b61f207d041..28435d4f34ac 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/replicate-uniform-call.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/replicate-uniform-call.ll @@ -8,7 +8,7 @@ target triple = "x86_64-unknown-linux-gnu" define void @smax_call_uniform(ptr %dst, i64 %x) { ; CHECK-LABEL: define void @smax_call_uniform( ; CHECK-SAME: ptr [[DST:%.*]], i64 [[X:%.*]]) { -; CHECK-NEXT: [[ENTRY:.*]]: +; CHECK-NEXT: [[ENTRY:.*:]] ; CHECK-NEXT: [[C:%.*]] = icmp ult i8 -68, -69 ; CHECK-NEXT: [[MUL:%.*]] = mul nuw nsw i64 [[X]], 0 ; CHECK-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] @@ -55,10 +55,9 @@ define void @smax_call_uniform(ptr %dst, i64 %x) { ; CHECK: [[MIDDLE_BLOCK]]: ; CHECK-NEXT: br label %[[EXIT:.*]] ; CHECK: [[SCALAR_PH]]: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, %[[ENTRY]] ] ; CHECK-NEXT: br label %[[LOOP_HEADER:.*]] ; CHECK: [[LOOP_HEADER]]: -; CHECK-NEXT: [[IV1:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT1:%.*]], %[[LOOP_LATCH:.*]] ] +; CHECK-NEXT: [[IV1:%.*]] = phi i64 [ 0, %[[SCALAR_PH]] ], [ [[IV_NEXT1:%.*]], %[[LOOP_LATCH:.*]] ] ; CHECK-NEXT: br i1 [[C]], label %[[LOOP_LATCH]], label %[[ELSE:.*]] ; CHECK: [[ELSE]]: ; CHECK-NEXT: [[REM1:%.*]] = urem i64 [[MUL]], [[X]] diff --git a/llvm/test/Transforms/LoopVectorize/X86/strided_load_cost.ll b/llvm/test/Transforms/LoopVectorize/X86/strided_load_cost.ll index 33b173d3a700..35f61b2aa838 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/strided_load_cost.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/strided_load_cost.ll @@ -556,10 +556,9 @@ define void @test(ptr %A, ptr noalias %B) #0 { ; CHECK: middle.block: ; CHECK-NEXT: br label [[FOR_COND_CLEANUP:%.*]] ; CHECK: scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ] ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: for.body: -; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] ; CHECK-NEXT: [[IV_0:%.*]] = add nuw nsw i64 [[IV]], 0 ; CHECK-NEXT: [[IV_1:%.*]] = add nuw nsw i64 [[IV]], 1 ; CHECK-NEXT: [[IN0:%.*]] = getelementptr inbounds [1024 x i32], ptr [[A]], i64 0, i64 [[IV_0]] @@ -676,10 +675,9 @@ define void @test(ptr %A, ptr noalias %B) #0 { ; MAX-BW: middle.block: ; MAX-BW-NEXT: br label [[FOR_COND_CLEANUP:%.*]] ; MAX-BW: scalar.ph: -; MAX-BW-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ] ; MAX-BW-NEXT: br label [[FOR_BODY:%.*]] ; MAX-BW: for.body: -; MAX-BW-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] +; MAX-BW-NEXT: [[IV:%.*]] = phi i64 [ 0, [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] ; MAX-BW-NEXT: [[IV_0:%.*]] = add nuw nsw i64 [[IV]], 0 ; MAX-BW-NEXT: [[IV_1:%.*]] = add nuw nsw i64 [[IV]], 1 ; MAX-BW-NEXT: [[IN0:%.*]] = getelementptr inbounds [1024 x i32], ptr [[A]], i64 0, i64 [[IV_0]] diff --git a/llvm/test/Transforms/LoopVectorize/X86/uniform_mem_op.ll b/llvm/test/Transforms/LoopVectorize/X86/uniform_mem_op.ll index 68b4f202e106..a491a6233fda 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/uniform_mem_op.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/uniform_mem_op.ll @@ -406,12 +406,10 @@ define i32 @test_count_bits(ptr %test_base) { ; CHECK-NEXT: [[TMP41:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[BIN_RDX14]]) ; CHECK-NEXT: br label [[LOOP_EXIT:%.*]] ; CHECK: scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ] -; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ 0, [[ENTRY]] ] ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: -; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ] -; CHECK-NEXT: [[ACCUM:%.*]] = phi i32 [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[ACCUM_NEXT:%.*]], [[LOOP]] ] +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ] +; CHECK-NEXT: [[ACCUM:%.*]] = phi i32 [ 0, [[SCALAR_PH]] ], [ [[ACCUM_NEXT:%.*]], [[LOOP]] ] ; CHECK-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1 ; CHECK-NEXT: [[BYTE:%.*]] = udiv i64 [[IV]], 8 ; CHECK-NEXT: [[TEST_ADDR:%.*]] = getelementptr inbounds i8, ptr [[TEST_BASE]], i64 [[BYTE]] diff --git a/llvm/test/Transforms/LoopVectorize/X86/vect.omp.force.small-tc.ll b/llvm/test/Transforms/LoopVectorize/X86/vect.omp.force.small-tc.ll index 6979f4fc199a..52f491eed030 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/vect.omp.force.small-tc.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/vect.omp.force.small-tc.ll @@ -204,10 +204,9 @@ define void @vectorized2(ptr noalias nocapture %A, ptr noalias nocapture readonl ; CHECK: middle.block: ; CHECK-NEXT: br label [[FOR_END:%.*]] ; CHECK: scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ] ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: for.body: -; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ] +; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 0, [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ] ; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[INDVARS_IV]] ; CHECK-NEXT: [[TMP7:%.*]] = load float, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP7]] ; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDVARS_IV]] diff --git a/llvm/test/Transforms/LoopVectorize/X86/widened-value-used-as-scalar-and-first-lane.ll b/llvm/test/Transforms/LoopVectorize/X86/widened-value-used-as-scalar-and-first-lane.ll index 05d08a4e3635..c49d36962796 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/widened-value-used-as-scalar-and-first-lane.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/widened-value-used-as-scalar-and-first-lane.ll @@ -50,10 +50,9 @@ define void @iv.4_used_as_vector_and_first_lane(ptr %src, ptr noalias %dst) { ; CHECK: middle.block: ; CHECK-NEXT: br label [[EXIT:%.*]] ; CHECK: scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ] ; CHECK-NEXT: br label [[LOOP_HEADER:%.*]] ; CHECK: loop.header: -; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP_LATCH:%.*]] ] +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP_LATCH:%.*]] ] ; CHECK-NEXT: [[G_SRC:%.*]] = getelementptr inbounds i64, ptr [[SRC]], i64 [[IV]] ; CHECK-NEXT: [[L:%.*]] = load i64, ptr [[G_SRC]], align 8 ; CHECK-NEXT: [[IV_4:%.*]] = add nuw nsw i64 [[IV]], 4 @@ -134,10 +133,9 @@ define void @iv.4_used_as_first_lane(ptr %src, ptr noalias %dst) { ; CHECK: middle.block: ; CHECK-NEXT: br label [[EXIT:%.*]] ; CHECK: scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ] ; CHECK-NEXT: br label [[LOOP_HEADER:%.*]] ; CHECK: loop.header: -; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP_LATCH:%.*]] ] +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP_LATCH:%.*]] ] ; CHECK-NEXT: [[G_SRC:%.*]] = getelementptr inbounds i64, ptr [[SRC]], i64 [[IV]] ; CHECK-NEXT: [[L:%.*]] = load i64, ptr [[G_SRC]], align 8 ; CHECK-NEXT: [[IV_4:%.*]] = add nuw nsw i64 [[IV]], 4 diff --git a/llvm/test/Transforms/LoopVectorize/X86/x86-predication.ll b/llvm/test/Transforms/LoopVectorize/X86/x86-predication.ll index b0ae40cafbde..34c6384b63c8 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/x86-predication.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/x86-predication.ll @@ -136,12 +136,10 @@ define i32 @predicated_sdiv_masked_load(ptr %a, ptr %b, i32 %x, i1 %c) { ; SINK-GATHER-NEXT: [[TMP49:%.*]] = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> [[TMP47]]) ; SINK-GATHER-NEXT: br label [[FOR_END:%.*]] ; SINK-GATHER: scalar.ph: -; SINK-GATHER-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ] -; SINK-GATHER-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ 0, [[ENTRY]] ] ; SINK-GATHER-NEXT: br label [[FOR_BODY:%.*]] ; SINK-GATHER: for.body: -; SINK-GATHER-NEXT: [[I:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[I_NEXT:%.*]], [[FOR_INC:%.*]] ] -; SINK-GATHER-NEXT: [[R:%.*]] = phi i32 [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[T7:%.*]], [[FOR_INC]] ] +; SINK-GATHER-NEXT: [[I:%.*]] = phi i64 [ 0, [[SCALAR_PH]] ], [ [[I_NEXT:%.*]], [[FOR_INC:%.*]] ] +; SINK-GATHER-NEXT: [[R:%.*]] = phi i32 [ 0, [[SCALAR_PH]] ], [ [[T7:%.*]], [[FOR_INC]] ] ; SINK-GATHER-NEXT: [[T0:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[I]] ; SINK-GATHER-NEXT: [[T1:%.*]] = load i32, ptr [[T0]], align 4 ; SINK-GATHER-NEXT: br i1 [[C]], label [[IF_THEN:%.*]], label [[FOR_INC]] diff --git a/llvm/test/Transforms/LoopVectorize/constantfolder-infer-correct-gepty.ll b/llvm/test/Transforms/LoopVectorize/constantfolder-infer-correct-gepty.ll index af528eee503d..e629560354f2 100644 --- a/llvm/test/Transforms/LoopVectorize/constantfolder-infer-correct-gepty.ll +++ b/llvm/test/Transforms/LoopVectorize/constantfolder-infer-correct-gepty.ll @@ -6,7 +6,7 @@ define void @test(ptr %data) { ; CHECK-LABEL: define void @test( ; CHECK-SAME: ptr [[DATA:%.*]]) { -; CHECK-NEXT: [[ENTRY:.*]]: +; CHECK-NEXT: [[ENTRY:.*:]] ; CHECK-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] ; CHECK: [[VECTOR_PH]]: ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] @@ -20,10 +20,9 @@ define void @test(ptr %data) { ; CHECK: [[MIDDLE_BLOCK]]: ; CHECK-NEXT: br label %[[END:.*]] ; CHECK: [[SCALAR_PH]]: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, %[[ENTRY]] ] ; CHECK-NEXT: br label %[[LOOP:.*]] ; CHECK: [[LOOP]]: -; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] ; CHECK-NEXT: [[OR_IV_1:%.*]] = or disjoint i64 [[IV]], 1 ; CHECK-NEXT: [[GEP_POSTSCALE:%.*]] = getelementptr [64 x float], ptr @postscale, i64 0, i64 [[OR_IV_1]] ; CHECK-NEXT: [[LOAD_POSTSCALE:%.*]] = load float, ptr [[GEP_POSTSCALE]], align 4, !tbaa [[TBAA0]] diff --git a/llvm/test/Transforms/LoopVectorize/constantfolder.ll b/llvm/test/Transforms/LoopVectorize/constantfolder.ll index cfd36bfe3652..9fbd1330de74 100644 --- a/llvm/test/Transforms/LoopVectorize/constantfolder.ll +++ b/llvm/test/Transforms/LoopVectorize/constantfolder.ll @@ -4,7 +4,7 @@ define void @const_fold_ptradd(ptr %dst, i64 %d) { ; CHECK-LABEL: define void @const_fold_ptradd( ; CHECK-SAME: ptr [[DST:%.*]], i64 [[D:%.*]]) { -; CHECK-NEXT: [[ENTRY:.*]]: +; CHECK-NEXT: [[ENTRY:.*:]] ; CHECK-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] ; CHECK: [[VECTOR_PH]]: ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] @@ -17,10 +17,9 @@ define void @const_fold_ptradd(ptr %dst, i64 %d) { ; CHECK: [[MIDDLE_BLOCK]]: ; CHECK-NEXT: br label %[[EXIT:.*]] ; CHECK: [[SCALAR_PH]]: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, %[[ENTRY]] ] ; CHECK-NEXT: br label %[[LOOP_HEADER:.*]] ; CHECK: [[LOOP_HEADER]]: -; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ] +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ] ; CHECK-NEXT: br i1 true, label %[[LOOP_LATCH]], label %[[ELSE:.*]] ; CHECK: [[ELSE]]: ; CHECK-NEXT: br label %[[LOOP_LATCH]] @@ -59,7 +58,7 @@ exit: define void @const_fold_inbounds_ptradd(ptr %dst, i64 %d) { ; CHECK-LABEL: define void @const_fold_inbounds_ptradd( ; CHECK-SAME: ptr [[DST:%.*]], i64 [[D:%.*]]) { -; CHECK-NEXT: [[ENTRY:.*]]: +; CHECK-NEXT: [[ENTRY:.*:]] ; CHECK-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] ; CHECK: [[VECTOR_PH]]: ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] @@ -72,10 +71,9 @@ define void @const_fold_inbounds_ptradd(ptr %dst, i64 %d) { ; CHECK: [[MIDDLE_BLOCK]]: ; CHECK-NEXT: br label %[[EXIT:.*]] ; CHECK: [[SCALAR_PH]]: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, %[[ENTRY]] ] ; CHECK-NEXT: br label %[[LOOP_HEADER:.*]] ; CHECK: [[LOOP_HEADER]]: -; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ] +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ] ; CHECK-NEXT: br i1 true, label %[[LOOP_LATCH]], label %[[ELSE:.*]] ; CHECK: [[ELSE]]: ; CHECK-NEXT: br label %[[LOOP_LATCH]] @@ -114,7 +112,7 @@ exit: define void @const_fold_select(ptr %dst, i64 %d) { ; CHECK-LABEL: define void @const_fold_select( ; CHECK-SAME: ptr [[DST:%.*]], i64 [[D:%.*]]) { -; CHECK-NEXT: [[ENTRY:.*]]: +; CHECK-NEXT: [[ENTRY:.*:]] ; CHECK-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] ; CHECK: [[VECTOR_PH]]: ; CHECK-NEXT: [[TMP3:%.*]] = or i64 [[D]], 1 @@ -128,10 +126,9 @@ define void @const_fold_select(ptr %dst, i64 %d) { ; CHECK: [[MIDDLE_BLOCK]]: ; CHECK-NEXT: br label %[[EXIT:.*]] ; CHECK: [[SCALAR_PH]]: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, %[[ENTRY]] ] ; CHECK-NEXT: br label %[[LOOP_HEADER:.*]] ; CHECK: [[LOOP_HEADER]]: -; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ] +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ] ; CHECK-NEXT: br i1 true, label %[[LOOP_LATCH]], label %[[ELSE:.*]] ; CHECK: [[ELSE]]: ; CHECK-NEXT: br label %[[LOOP_LATCH]] @@ -170,7 +167,7 @@ exit: define void @const_fold_add_sub_mul_ashr_lshr(ptr %dst, i64 %d) { ; CHECK-LABEL: define void @const_fold_add_sub_mul_ashr_lshr( ; CHECK-SAME: ptr [[DST:%.*]], i64 [[D:%.*]]) { -; CHECK-NEXT: [[ENTRY:.*]]: +; CHECK-NEXT: [[ENTRY:.*:]] ; CHECK-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] ; CHECK: [[VECTOR_PH]]: ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] @@ -183,10 +180,9 @@ define void @const_fold_add_sub_mul_ashr_lshr(ptr %dst, i64 %d) { ; CHECK: [[MIDDLE_BLOCK]]: ; CHECK-NEXT: br label %[[EXIT:.*]] ; CHECK: [[SCALAR_PH]]: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, %[[ENTRY]] ] ; CHECK-NEXT: br label %[[LOOP_HEADER:.*]] ; CHECK: [[LOOP_HEADER]]: -; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ] +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ] ; CHECK-NEXT: br i1 true, label %[[LOOP_LATCH]], label %[[ELSE:.*]] ; CHECK: [[ELSE]]: ; CHECK-NEXT: br label %[[LOOP_LATCH]] @@ -233,7 +229,7 @@ exit: define void @const_fold_and_or_xor(ptr %dst, i64 %d) { ; CHECK-LABEL: define void @const_fold_and_or_xor( ; CHECK-SAME: ptr [[DST:%.*]], i64 [[D:%.*]]) { -; CHECK-NEXT: [[ENTRY:.*]]: +; CHECK-NEXT: [[ENTRY:.*:]] ; CHECK-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] ; CHECK: [[VECTOR_PH]]: ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] @@ -246,10 +242,9 @@ define void @const_fold_and_or_xor(ptr %dst, i64 %d) { ; CHECK: [[MIDDLE_BLOCK]]: ; CHECK-NEXT: br label %[[EXIT:.*]] ; CHECK: [[SCALAR_PH]]: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, %[[ENTRY]] ] ; CHECK-NEXT: br label %[[LOOP_HEADER:.*]] ; CHECK: [[LOOP_HEADER]]: -; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ] +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ] ; CHECK-NEXT: br i1 true, label %[[LOOP_LATCH]], label %[[ELSE:.*]] ; CHECK: [[ELSE]]: ; CHECK-NEXT: br label %[[LOOP_LATCH]] @@ -292,7 +287,7 @@ exit: define void @const_fold_cmp_zext(ptr %dst, i64 %d) { ; CHECK-LABEL: define void @const_fold_cmp_zext( ; CHECK-SAME: ptr [[DST:%.*]], i64 [[D:%.*]]) { -; CHECK-NEXT: [[ENTRY:.*]]: +; CHECK-NEXT: [[ENTRY:.*:]] ; CHECK-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] ; CHECK: [[VECTOR_PH]]: ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] @@ -305,10 +300,9 @@ define void @const_fold_cmp_zext(ptr %dst, i64 %d) { ; CHECK: [[MIDDLE_BLOCK]]: ; CHECK-NEXT: br label %[[EXIT:.*]] ; CHECK: [[SCALAR_PH]]: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, %[[ENTRY]] ] ; CHECK-NEXT: br label %[[LOOP_HEADER:.*]] ; CHECK: [[LOOP_HEADER]]: -; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ] +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ] ; CHECK-NEXT: br i1 true, label %[[LOOP_LATCH]], label %[[ELSE:.*]] ; CHECK: [[ELSE]]: ; CHECK-NEXT: br label %[[LOOP_LATCH]] @@ -349,7 +343,7 @@ exit: define void @const_fold_trunc(ptr %dst, i64 %d) { ; CHECK-LABEL: define void @const_fold_trunc( ; CHECK-SAME: ptr [[DST:%.*]], i64 [[D:%.*]]) { -; CHECK-NEXT: [[ENTRY:.*]]: +; CHECK-NEXT: [[ENTRY:.*:]] ; CHECK-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] ; CHECK: [[VECTOR_PH]]: ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] @@ -362,10 +356,9 @@ define void @const_fold_trunc(ptr %dst, i64 %d) { ; CHECK: [[MIDDLE_BLOCK]]: ; CHECK-NEXT: br label %[[EXIT:.*]] ; CHECK: [[SCALAR_PH]]: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, %[[ENTRY]] ] ; CHECK-NEXT: br label %[[LOOP_HEADER:.*]] ; CHECK: [[LOOP_HEADER]]: -; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ] +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ] ; CHECK-NEXT: br i1 true, label %[[LOOP_LATCH]], label %[[ELSE:.*]] ; CHECK: [[ELSE]]: ; CHECK-NEXT: br label %[[LOOP_LATCH]] diff --git a/llvm/test/Transforms/LoopVectorize/create-induction-resume.ll b/llvm/test/Transforms/LoopVectorize/create-induction-resume.ll index e9c7f75cb337..fbdc11dd9847 100644 --- a/llvm/test/Transforms/LoopVectorize/create-induction-resume.ll +++ b/llvm/test/Transforms/LoopVectorize/create-induction-resume.ll @@ -69,12 +69,10 @@ define void @test(i32 %arg, i32 %L1.limit, i32 %L2.switch, i1 %c, ptr %dst) { ; CHECK: middle.block: ; CHECK-NEXT: br label [[L2_HEADER_LOOPEXIT:%.*]] ; CHECK: scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ 1, [[L2_INNER_HEADER_PREHEADER]] ] -; CHECK-NEXT: [[BC_RESUME_VAL2:%.*]] = phi i64 [ 1, [[L2_INNER_HEADER_PREHEADER]] ] ; CHECK-NEXT: br label [[L2_INNER_HEADER:%.*]] ; CHECK: L2.Inner.header: -; CHECK-NEXT: [[L2_ACCUM:%.*]] = phi i32 [ [[L2_ACCUM_NEXT:%.*]], [[L2_INNER_HEADER]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] -; CHECK-NEXT: [[L2_IV:%.*]] = phi i64 [ [[L2_IV_NEXT:%.*]], [[L2_INNER_HEADER]] ], [ [[BC_RESUME_VAL2]], [[SCALAR_PH]] ] +; CHECK-NEXT: [[L2_ACCUM:%.*]] = phi i32 [ [[L2_ACCUM_NEXT:%.*]], [[L2_INNER_HEADER]] ], [ 1, [[SCALAR_PH]] ] +; CHECK-NEXT: [[L2_IV:%.*]] = phi i64 [ [[L2_IV_NEXT:%.*]], [[L2_INNER_HEADER]] ], [ 1, [[SCALAR_PH]] ] ; CHECK-NEXT: [[L2_ACCUM_NEXT]] = sub i32 [[L2_ACCUM]], [[L1_EXIT_VAL]] ; CHECK-NEXT: [[L2_DUMMY_BUT_NEED_IT:%.*]] = sext i32 [[L2_ACCUM_NEXT]] to i64 ; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds i64, ptr [[DST]], i64 [[L2_IV]] diff --git a/llvm/test/Transforms/LoopVectorize/dbg-outer-loop-vect.ll b/llvm/test/Transforms/LoopVectorize/dbg-outer-loop-vect.ll index aa1b6cee0987..e3a8ca777ddd 100644 --- a/llvm/test/Transforms/LoopVectorize/dbg-outer-loop-vect.ll +++ b/llvm/test/Transforms/LoopVectorize/dbg-outer-loop-vect.ll @@ -13,7 +13,7 @@ define void @foo(ptr %h) !dbg !4 { ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]], !dbg [[DBG21]] ; CHECK: [[VECTOR_BODY]]: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_LATCH:.*]] ], !dbg [[DBG22:![0-9]+]] -; CHECK-NEXT: br label %[[FOR_COND5_PREHEADER1:.*]] +; CHECK-NEXT: br label %[[FOR_COND5_PREHEADER1:.*]], !dbg [[DBG21]] ; CHECK: [[FOR_COND5_PREHEADER1]]: ; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i64> [ zeroinitializer, %[[VECTOR_BODY]] ], [ [[TMP5:%.*]], %[[FOR_COND5_PREHEADER1]] ], !dbg [[DBG23:![0-9]+]] ; CHECK-NEXT: [[TMP0:%.*]] = getelementptr i32, ptr [[H]], <4 x i64> [[VEC_PHI]] diff --git a/llvm/test/Transforms/LoopVectorize/debugloc-optimize-vfuf-term.ll b/llvm/test/Transforms/LoopVectorize/debugloc-optimize-vfuf-term.ll index 9ade6e9a8980..ab9a84dddf92 100644 --- a/llvm/test/Transforms/LoopVectorize/debugloc-optimize-vfuf-term.ll +++ b/llvm/test/Transforms/LoopVectorize/debugloc-optimize-vfuf-term.ll @@ -6,7 +6,7 @@ define i32 @foo(ptr %p) { ; CHECK-LABEL: define i32 @foo( ; CHECK-SAME: ptr [[P:%.*]]) { -; CHECK-NEXT: [[ENTRY:.*]]: +; CHECK-NEXT: [[ENTRY:.*:]] ; CHECK-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] ; CHECK: [[VECTOR_PH]]: ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] @@ -16,10 +16,9 @@ define i32 @foo(ptr %p) { ; CHECK: [[MIDDLE_BLOCK]]: ; CHECK-NEXT: br label %[[EXIT:.*]], !dbg [[DBG3]] ; CHECK: [[SCALAR_PH]]: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, %[[ENTRY]] ], !dbg [[DBG7:![0-9]+]] ; CHECK-NEXT: br label %[[LOOP:.*]] ; CHECK: [[LOOP]]: -; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[IV_NEXT:%.*]], %[[LOOP]] ], [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], !dbg [[DBG7]] +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[IV_NEXT:%.*]], %[[LOOP]] ], [ 0, %[[SCALAR_PH]] ], !dbg [[DBG7:![0-9]+]] ; CHECK-NEXT: [[CONV:%.*]] = trunc i64 0 to i8, !dbg [[DBG8:![0-9]+]] ; CHECK-NEXT: store i8 [[CONV]], ptr [[P]], align 1, !dbg [[DBG3]] ; CHECK-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1, !dbg [[DBG9:![0-9]+]] diff --git a/llvm/test/Transforms/LoopVectorize/dereferenceable-info-from-assumption-constant-size.ll b/llvm/test/Transforms/LoopVectorize/dereferenceable-info-from-assumption-constant-size.ll index 373c8e0b385c..d24e7e871239 100644 --- a/llvm/test/Transforms/LoopVectorize/dereferenceable-info-from-assumption-constant-size.ll +++ b/llvm/test/Transforms/LoopVectorize/dereferenceable-info-from-assumption-constant-size.ll @@ -6,7 +6,7 @@ declare void @llvm.assume(i1) define void @deref_assumption_in_header_constant_trip_count(ptr noalias noundef %a, ptr noalias %b, ptr noalias %c) nofree nosync{ ; CHECK-LABEL: define void @deref_assumption_in_header_constant_trip_count( ; CHECK-SAME: ptr noalias noundef [[A:%.*]], ptr noalias [[B:%.*]], ptr noalias [[C:%.*]]) #[[ATTR1:[0-9]+]] { -; CHECK-NEXT: [[ENTRY:.*]]: +; CHECK-NEXT: [[ENTRY:.*:]] ; CHECK-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] ; CHECK: [[VECTOR_PH]]: ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] @@ -50,10 +50,9 @@ define void @deref_assumption_in_header_constant_trip_count(ptr noalias noundef ; CHECK: [[MIDDLE_BLOCK]]: ; CHECK-NEXT: br label %[[EXIT:.*]] ; CHECK: [[SCALAR_PH]]: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, %[[ENTRY]] ] ; CHECK-NEXT: br label %[[LOOP_HEADER:.*]] ; CHECK: [[LOOP_HEADER]]: -; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ] +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ] ; CHECK-NEXT: [[GEP_A:%.*]] = getelementptr i32, ptr [[A]], i64 [[IV]] ; CHECK-NEXT: call void @llvm.assume(i1 true) [ "align"(ptr [[GEP_A]], i64 4), "dereferenceable"(ptr [[GEP_A]], i64 4) ] ; CHECK-NEXT: [[GEP_B:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[IV]] @@ -104,7 +103,7 @@ exit: define void @align_deref_assumption_in_header_constant_trip_count_loop_invariant_ptr(ptr noalias noundef %a, ptr noalias %b, ptr noalias %c) nofree nosync{ ; CHECK-LABEL: define void @align_deref_assumption_in_header_constant_trip_count_loop_invariant_ptr( ; CHECK-SAME: ptr noalias noundef [[A:%.*]], ptr noalias [[B:%.*]], ptr noalias [[C:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[ENTRY:.*]]: +; CHECK-NEXT: [[ENTRY:.*:]] ; CHECK-NEXT: call void @llvm.assume(i1 true) [ "align"(ptr [[A]], i64 4), "dereferenceable"(ptr [[A]], i64 4) ] ; CHECK-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] ; CHECK: [[VECTOR_PH]]: @@ -127,10 +126,9 @@ define void @align_deref_assumption_in_header_constant_trip_count_loop_invariant ; CHECK: [[MIDDLE_BLOCK]]: ; CHECK-NEXT: br label %[[EXIT:.*]] ; CHECK: [[SCALAR_PH]]: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, %[[ENTRY]] ] ; CHECK-NEXT: br label %[[LOOP_HEADER:.*]] ; CHECK: [[LOOP_HEADER]]: -; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ] +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ] ; CHECK-NEXT: [[GEP_B:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[IV]] ; CHECK-NEXT: [[L_B:%.*]] = load i32, ptr [[GEP_B]], align 4 ; CHECK-NEXT: [[C_1:%.*]] = icmp sge i32 [[L_B]], 0 @@ -178,7 +176,7 @@ exit: define void @deref_assumption_too_small_in_header_constant_trip_count(ptr noalias noundef %a, ptr noalias %b, ptr noalias %c) nofree nosync{ ; CHECK-LABEL: define void @deref_assumption_too_small_in_header_constant_trip_count( ; CHECK-SAME: ptr noalias noundef [[A:%.*]], ptr noalias [[B:%.*]], ptr noalias [[C:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[ENTRY:.*]]: +; CHECK-NEXT: [[ENTRY:.*:]] ; CHECK-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] ; CHECK: [[VECTOR_PH]]: ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] @@ -222,10 +220,9 @@ define void @deref_assumption_too_small_in_header_constant_trip_count(ptr noalia ; CHECK: [[MIDDLE_BLOCK]]: ; CHECK-NEXT: br label %[[EXIT:.*]] ; CHECK: [[SCALAR_PH]]: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, %[[ENTRY]] ] ; CHECK-NEXT: br label %[[LOOP_HEADER:.*]] ; CHECK: [[LOOP_HEADER]]: -; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ] +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ] ; CHECK-NEXT: [[GEP_A:%.*]] = getelementptr i32, ptr [[A]], i64 [[IV]] ; CHECK-NEXT: call void @llvm.assume(i1 true) [ "align"(ptr [[GEP_A]], i64 4), "dereferenceable"(ptr [[GEP_A]], i64 2) ] ; CHECK-NEXT: [[GEP_B:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[IV]] @@ -276,7 +273,7 @@ exit: define void @deref_assumption_in_header_constant_trip_count_align_1(ptr noalias noundef %a, ptr noalias %b, ptr noalias %c) nofree nosync{ ; CHECK-LABEL: define void @deref_assumption_in_header_constant_trip_count_align_1( ; CHECK-SAME: ptr noalias noundef [[A:%.*]], ptr noalias [[B:%.*]], ptr noalias [[C:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[ENTRY:.*]]: +; CHECK-NEXT: [[ENTRY:.*:]] ; CHECK-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] ; CHECK: [[VECTOR_PH]]: ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] @@ -320,10 +317,9 @@ define void @deref_assumption_in_header_constant_trip_count_align_1(ptr noalias ; CHECK: [[MIDDLE_BLOCK]]: ; CHECK-NEXT: br label %[[EXIT:.*]] ; CHECK: [[SCALAR_PH]]: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, %[[ENTRY]] ] ; CHECK-NEXT: br label %[[LOOP_HEADER:.*]] ; CHECK: [[LOOP_HEADER]]: -; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ] +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ] ; CHECK-NEXT: [[GEP_A:%.*]] = getelementptr i32, ptr [[A]], i64 [[IV]] ; CHECK-NEXT: call void @llvm.assume(i1 true) [ "dereferenceable"(ptr [[GEP_A]], i64 4) ] ; CHECK-NEXT: [[GEP_B:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[IV]] @@ -374,7 +370,7 @@ exit: define void @deref_assumption_in_header_constant_trip_count_align_via_arg_attribute(ptr noalias align 4 %a, ptr noalias %b, ptr noalias %c) nofree nosync{ ; CHECK-LABEL: define void @deref_assumption_in_header_constant_trip_count_align_via_arg_attribute( ; CHECK-SAME: ptr noalias align 4 [[A:%.*]], ptr noalias [[B:%.*]], ptr noalias [[C:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[ENTRY:.*]]: +; CHECK-NEXT: [[ENTRY:.*:]] ; CHECK-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] ; CHECK: [[VECTOR_PH]]: ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] @@ -418,10 +414,9 @@ define void @deref_assumption_in_header_constant_trip_count_align_via_arg_attrib ; CHECK: [[MIDDLE_BLOCK]]: ; CHECK-NEXT: br label %[[EXIT:.*]] ; CHECK: [[SCALAR_PH]]: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, %[[ENTRY]] ] ; CHECK-NEXT: br label %[[LOOP_HEADER:.*]] ; CHECK: [[LOOP_HEADER]]: -; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ] +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ] ; CHECK-NEXT: [[GEP_A:%.*]] = getelementptr i32, ptr [[A]], i64 [[IV]] ; CHECK-NEXT: call void @llvm.assume(i1 true) [ "dereferenceable"(ptr [[GEP_A]], i64 4) ] ; CHECK-NEXT: [[GEP_B:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[IV]] @@ -472,7 +467,7 @@ exit: define void @deref_assumption_in_header_constant_trip_count_align_not_known(ptr noalias noundef %a, ptr noalias %b, ptr noalias %c) nofree nosync{ ; CHECK-LABEL: define void @deref_assumption_in_header_constant_trip_count_align_not_known( ; CHECK-SAME: ptr noalias noundef [[A:%.*]], ptr noalias [[B:%.*]], ptr noalias [[C:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[ENTRY:.*]]: +; CHECK-NEXT: [[ENTRY:.*:]] ; CHECK-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] ; CHECK: [[VECTOR_PH]]: ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] @@ -516,10 +511,9 @@ define void @deref_assumption_in_header_constant_trip_count_align_not_known(ptr ; CHECK: [[MIDDLE_BLOCK]]: ; CHECK-NEXT: br label %[[EXIT:.*]] ; CHECK: [[SCALAR_PH]]: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, %[[ENTRY]] ] ; CHECK-NEXT: br label %[[LOOP_HEADER:.*]] ; CHECK: [[LOOP_HEADER]]: -; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ] +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ] ; CHECK-NEXT: [[GEP_A:%.*]] = getelementptr i32, ptr [[A]], i64 [[IV]] ; CHECK-NEXT: call void @llvm.assume(i1 true) [ "dereferenceable"(ptr [[GEP_A]], i64 4) ] ; CHECK-NEXT: [[GEP_B:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[IV]] @@ -570,7 +564,7 @@ exit: define void @deref_assumption_in_then_constant_trip_count(ptr noalias noundef %a, ptr noalias %b, ptr noalias %c) nofree nosync{ ; CHECK-LABEL: define void @deref_assumption_in_then_constant_trip_count( ; CHECK-SAME: ptr noalias noundef [[A:%.*]], ptr noalias [[B:%.*]], ptr noalias [[C:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[ENTRY:.*]]: +; CHECK-NEXT: [[ENTRY:.*:]] ; CHECK-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] ; CHECK: [[VECTOR_PH]]: ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] @@ -610,10 +604,9 @@ define void @deref_assumption_in_then_constant_trip_count(ptr noalias noundef %a ; CHECK: [[MIDDLE_BLOCK]]: ; CHECK-NEXT: br label %[[EXIT:.*]] ; CHECK: [[SCALAR_PH]]: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, %[[ENTRY]] ] ; CHECK-NEXT: br label %[[LOOP_HEADER:.*]] ; CHECK: [[LOOP_HEADER]]: -; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ] +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ] ; CHECK-NEXT: [[GEP_B:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[IV]] ; CHECK-NEXT: [[L_B:%.*]] = load i32, ptr [[GEP_B]], align 4 ; CHECK-NEXT: [[C_1:%.*]] = icmp sge i32 [[L_B]], 0 @@ -664,7 +657,7 @@ exit: define void @deref_assumption_in_latch_constant_trip_count(ptr noalias noundef %a, ptr noalias %b, ptr noalias %c) nofree nosync{ ; CHECK-LABEL: define void @deref_assumption_in_latch_constant_trip_count( ; CHECK-SAME: ptr noalias noundef [[A:%.*]], ptr noalias [[B:%.*]], ptr noalias [[C:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[ENTRY:.*]]: +; CHECK-NEXT: [[ENTRY:.*:]] ; CHECK-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] ; CHECK: [[VECTOR_PH]]: ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] @@ -710,10 +703,9 @@ define void @deref_assumption_in_latch_constant_trip_count(ptr noalias noundef % ; CHECK: [[MIDDLE_BLOCK]]: ; CHECK-NEXT: br label %[[EXIT:.*]] ; CHECK: [[SCALAR_PH]]: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, %[[ENTRY]] ] ; CHECK-NEXT: br label %[[LOOP_HEADER:.*]] ; CHECK: [[LOOP_HEADER]]: -; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ] +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ] ; CHECK-NEXT: [[GEP_A:%.*]] = getelementptr i32, ptr [[A]], i64 [[IV]] ; CHECK-NEXT: [[GEP_B:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[IV]] ; CHECK-NEXT: [[L_B:%.*]] = load i32, ptr [[GEP_B]], align 4 @@ -866,7 +858,7 @@ exit: define void @deref_assumption_in_preheader_constant_trip_count_align_1(ptr noalias noundef %a, ptr noalias %b, ptr noalias %c) nofree nosync{ ; CHECK-LABEL: define void @deref_assumption_in_preheader_constant_trip_count_align_1( ; CHECK-SAME: ptr noalias noundef [[A:%.*]], ptr noalias [[B:%.*]], ptr noalias [[C:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[ENTRY:.*]]: +; CHECK-NEXT: [[ENTRY:.*:]] ; CHECK-NEXT: call void @llvm.assume(i1 true) [ "dereferenceable"(ptr [[A]], i64 4000) ] ; CHECK-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] ; CHECK: [[VECTOR_PH]]: @@ -887,10 +879,9 @@ define void @deref_assumption_in_preheader_constant_trip_count_align_1(ptr noali ; CHECK: [[MIDDLE_BLOCK]]: ; CHECK-NEXT: br label %[[EXIT:.*]] ; CHECK: [[SCALAR_PH]]: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, %[[ENTRY]] ] ; CHECK-NEXT: br label %[[LOOP_HEADER:.*]] ; CHECK: [[LOOP_HEADER]]: -; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ] +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ] ; CHECK-NEXT: [[GEP_B:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[IV]] ; CHECK-NEXT: [[L_B:%.*]] = load i32, ptr [[GEP_B]], align 4 ; CHECK-NEXT: [[C_1:%.*]] = icmp sge i32 [[L_B]], 0 @@ -940,7 +931,7 @@ exit: define void @deref_assumption_too_small_in_preheader_constant_trip_count_align_1(ptr noalias noundef %a, ptr noalias %b, ptr noalias %c) nofree nosync{ ; CHECK-LABEL: define void @deref_assumption_too_small_in_preheader_constant_trip_count_align_1( ; CHECK-SAME: ptr noalias noundef [[A:%.*]], ptr noalias [[B:%.*]], ptr noalias [[C:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[ENTRY:.*]]: +; CHECK-NEXT: [[ENTRY:.*:]] ; CHECK-NEXT: call void @llvm.assume(i1 true) [ "dereferenceable"(ptr [[A]], i64 3999) ] ; CHECK-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] ; CHECK: [[VECTOR_PH]]: @@ -980,10 +971,9 @@ define void @deref_assumption_too_small_in_preheader_constant_trip_count_align_1 ; CHECK: [[MIDDLE_BLOCK]]: ; CHECK-NEXT: br label %[[EXIT:.*]] ; CHECK: [[SCALAR_PH]]: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, %[[ENTRY]] ] ; CHECK-NEXT: br label %[[LOOP_HEADER:.*]] ; CHECK: [[LOOP_HEADER]]: -; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ] +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ] ; CHECK-NEXT: [[GEP_B:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[IV]] ; CHECK-NEXT: [[L_B:%.*]] = load i32, ptr [[GEP_B]], align 4 ; CHECK-NEXT: [[C_1:%.*]] = icmp sge i32 [[L_B]], 0 @@ -1033,7 +1023,7 @@ exit: define void @align_and_deref_assumption_in_preheader_constant_trip_count_align_4(ptr noalias noundef %a, ptr noalias %b, ptr noalias %c) nofree nosync{ ; CHECK-LABEL: define void @align_and_deref_assumption_in_preheader_constant_trip_count_align_4( ; CHECK-SAME: ptr noalias noundef [[A:%.*]], ptr noalias [[B:%.*]], ptr noalias [[C:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[ENTRY:.*]]: +; CHECK-NEXT: [[ENTRY:.*:]] ; CHECK-NEXT: call void @llvm.assume(i1 true) [ "align"(ptr [[A]], i64 4), "dereferenceable"(ptr [[A]], i64 4000) ] ; CHECK-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] ; CHECK: [[VECTOR_PH]]: @@ -1054,10 +1044,9 @@ define void @align_and_deref_assumption_in_preheader_constant_trip_count_align_4 ; CHECK: [[MIDDLE_BLOCK]]: ; CHECK-NEXT: br label %[[EXIT:.*]] ; CHECK: [[SCALAR_PH]]: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, %[[ENTRY]] ] ; CHECK-NEXT: br label %[[LOOP_HEADER:.*]] ; CHECK: [[LOOP_HEADER]]: -; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ] +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ] ; CHECK-NEXT: [[GEP_B:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[IV]] ; CHECK-NEXT: [[L_B:%.*]] = load i32, ptr [[GEP_B]], align 4 ; CHECK-NEXT: [[C_1:%.*]] = icmp sge i32 [[L_B]], 0 @@ -1108,7 +1097,7 @@ exit: define void @deref_assumption_in_preheader_constant_trip_count_align_4_known_via_argument_attr(ptr noalias noundef align 4 %a, ptr noalias %b, ptr noalias %c) nofree nosync{ ; CHECK-LABEL: define void @deref_assumption_in_preheader_constant_trip_count_align_4_known_via_argument_attr( ; CHECK-SAME: ptr noalias noundef align 4 [[A:%.*]], ptr noalias [[B:%.*]], ptr noalias [[C:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[ENTRY:.*]]: +; CHECK-NEXT: [[ENTRY:.*:]] ; CHECK-NEXT: call void @llvm.assume(i1 true) [ "dereferenceable"(ptr [[A]], i64 4000) ] ; CHECK-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] ; CHECK: [[VECTOR_PH]]: @@ -1129,10 +1118,9 @@ define void @deref_assumption_in_preheader_constant_trip_count_align_4_known_via ; CHECK: [[MIDDLE_BLOCK]]: ; CHECK-NEXT: br label %[[EXIT:.*]] ; CHECK: [[SCALAR_PH]]: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, %[[ENTRY]] ] ; CHECK-NEXT: br label %[[LOOP_HEADER:.*]] ; CHECK: [[LOOP_HEADER]]: -; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ] +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ] ; CHECK-NEXT: [[GEP_B:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[IV]] ; CHECK-NEXT: [[L_B:%.*]] = load i32, ptr [[GEP_B]], align 4 ; CHECK-NEXT: [[C_1:%.*]] = icmp sge i32 [[L_B]], 0 @@ -1182,7 +1170,7 @@ exit: define void @deref_assumption_in_preheader_constant_trip_count_align_4_not_known(ptr noalias noundef %a, ptr noalias %b, ptr noalias %c) nofree nosync{ ; CHECK-LABEL: define void @deref_assumption_in_preheader_constant_trip_count_align_4_not_known( ; CHECK-SAME: ptr noalias noundef [[A:%.*]], ptr noalias [[B:%.*]], ptr noalias [[C:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[ENTRY:.*]]: +; CHECK-NEXT: [[ENTRY:.*:]] ; CHECK-NEXT: call void @llvm.assume(i1 true) [ "dereferenceable"(ptr [[A]], i64 4000) ] ; CHECK-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] ; CHECK: [[VECTOR_PH]]: @@ -1222,10 +1210,9 @@ define void @deref_assumption_in_preheader_constant_trip_count_align_4_not_known ; CHECK: [[MIDDLE_BLOCK]]: ; CHECK-NEXT: br label %[[EXIT:.*]] ; CHECK: [[SCALAR_PH]]: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, %[[ENTRY]] ] ; CHECK-NEXT: br label %[[LOOP_HEADER:.*]] ; CHECK: [[LOOP_HEADER]]: -; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ] +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ] ; CHECK-NEXT: [[GEP_B:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[IV]] ; CHECK-NEXT: [[L_B:%.*]] = load i32, ptr [[GEP_B]], align 4 ; CHECK-NEXT: [[C_1:%.*]] = icmp sge i32 [[L_B]], 0 @@ -1275,7 +1262,7 @@ exit: define void @deref_assumption_too_small_in_preheader_constant_trip_count_align_4(ptr noalias noundef %a, ptr noalias %b, ptr noalias %c) nofree nosync{ ; CHECK-LABEL: define void @deref_assumption_too_small_in_preheader_constant_trip_count_align_4( ; CHECK-SAME: ptr noalias noundef [[A:%.*]], ptr noalias [[B:%.*]], ptr noalias [[C:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[ENTRY:.*]]: +; CHECK-NEXT: [[ENTRY:.*:]] ; CHECK-NEXT: call void @llvm.assume(i1 true) [ "dereferenceable"(ptr [[A]], i64 3999) ] ; CHECK-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] ; CHECK: [[VECTOR_PH]]: @@ -1315,10 +1302,9 @@ define void @deref_assumption_too_small_in_preheader_constant_trip_count_align_4 ; CHECK: [[MIDDLE_BLOCK]]: ; CHECK-NEXT: br label %[[EXIT:.*]] ; CHECK: [[SCALAR_PH]]: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, %[[ENTRY]] ] ; CHECK-NEXT: br label %[[LOOP_HEADER:.*]] ; CHECK: [[LOOP_HEADER]]: -; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ] +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ] ; CHECK-NEXT: [[GEP_B:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[IV]] ; CHECK-NEXT: [[L_B:%.*]] = load i32, ptr [[GEP_B]], align 4 ; CHECK-NEXT: [[C_1:%.*]] = icmp sge i32 [[L_B]], 0 @@ -1369,7 +1355,7 @@ exit: define void @may_free_align_deref_assumption_in_header_constant_trip_count_loop_invariant_ptr(ptr noalias noundef %a, ptr noalias %b, ptr noalias %c) { ; CHECK-LABEL: define void @may_free_align_deref_assumption_in_header_constant_trip_count_loop_invariant_ptr( ; CHECK-SAME: ptr noalias noundef [[A:%.*]], ptr noalias [[B:%.*]], ptr noalias [[C:%.*]]) { -; CHECK-NEXT: [[ENTRY:.*]]: +; CHECK-NEXT: [[ENTRY:.*:]] ; CHECK-NEXT: call void @llvm.assume(i1 true) [ "align"(ptr [[A]], i64 4), "dereferenceable"(ptr [[A]], i64 4) ] ; CHECK-NEXT: call void @may_free() ; CHECK-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] @@ -1406,10 +1392,9 @@ define void @may_free_align_deref_assumption_in_header_constant_trip_count_loop_ ; CHECK: [[MIDDLE_BLOCK]]: ; CHECK-NEXT: br label %[[EXIT:.*]] ; CHECK: [[SCALAR_PH]]: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, %[[ENTRY]] ] ; CHECK-NEXT: br label %[[LOOP_HEADER:.*]] ; CHECK: [[LOOP_HEADER]]: -; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ] +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ] ; CHECK-NEXT: [[GEP_B:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[IV]] ; CHECK-NEXT: [[L_B:%.*]] = load i32, ptr [[GEP_B]], align 4 ; CHECK-NEXT: [[C_1:%.*]] = icmp sge i32 [[L_B]], 0 @@ -1459,7 +1444,7 @@ exit: define void @may_free_local_ptr_align_deref_assumption_in_header_constant_trip_count_loop_invariant_ptr(ptr noalias %b, ptr noalias %c) nofree nosync { ; CHECK-LABEL: define void @may_free_local_ptr_align_deref_assumption_in_header_constant_trip_count_loop_invariant_ptr( ; CHECK-SAME: ptr noalias [[B:%.*]], ptr noalias [[C:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[ENTRY:.*]]: +; CHECK-NEXT: [[ENTRY:.*:]] ; CHECK-NEXT: [[A:%.*]] = call ptr @get_ptr() ; CHECK-NEXT: call void @llvm.assume(i1 true) [ "align"(ptr [[A]], i64 4), "dereferenceable"(ptr [[A]], i64 4) ] ; CHECK-NEXT: call void @may_free() @@ -1497,10 +1482,9 @@ define void @may_free_local_ptr_align_deref_assumption_in_header_constant_trip_c ; CHECK: [[MIDDLE_BLOCK]]: ; CHECK-NEXT: br label %[[EXIT:.*]] ; CHECK: [[SCALAR_PH]]: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, %[[ENTRY]] ] ; CHECK-NEXT: br label %[[LOOP_HEADER:.*]] ; CHECK: [[LOOP_HEADER]]: -; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ] +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ] ; CHECK-NEXT: [[GEP_B:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[IV]] ; CHECK-NEXT: [[L_B:%.*]] = load i32, ptr [[GEP_B]], align 4 ; CHECK-NEXT: [[C_1:%.*]] = icmp sge i32 [[L_B]], 0 diff --git a/llvm/test/Transforms/LoopVectorize/dont-fold-tail-for-const-TC.ll b/llvm/test/Transforms/LoopVectorize/dont-fold-tail-for-const-TC.ll index 4f95bddc4b4c..dae2cd3cacd0 100644 --- a/llvm/test/Transforms/LoopVectorize/dont-fold-tail-for-const-TC.ll +++ b/llvm/test/Transforms/LoopVectorize/dont-fold-tail-for-const-TC.ll @@ -26,10 +26,9 @@ define dso_local void @constTC(ptr noalias nocapture %A) optsize { ; CHECK: middle.block: ; CHECK-NEXT: br label [[EXIT:%.*]] ; CHECK: scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ] ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: -; CHECK-NEXT: [[RIV:%.*]] = phi i32 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[RIVPLUS1:%.*]], [[LOOP]] ] +; CHECK-NEXT: [[RIV:%.*]] = phi i32 [ 0, [[SCALAR_PH]] ], [ [[RIVPLUS1:%.*]], [[LOOP]] ] ; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[A]], i32 [[RIV]] ; CHECK-NEXT: store i32 13, ptr [[ARRAYIDX]], align 1 ; CHECK-NEXT: [[RIVPLUS1]] = add nuw nsw i32 [[RIV]], 1 diff --git a/llvm/test/Transforms/LoopVectorize/first-order-recurrence-dead-instructions.ll b/llvm/test/Transforms/LoopVectorize/first-order-recurrence-dead-instructions.ll index e6a81b6f9f6d..414773cb00d7 100644 --- a/llvm/test/Transforms/LoopVectorize/first-order-recurrence-dead-instructions.ll +++ b/llvm/test/Transforms/LoopVectorize/first-order-recurrence-dead-instructions.ll @@ -87,7 +87,7 @@ exit: define i32 @sink_after_dead_inst(ptr %A.ptr) { ; CHECK-LABEL: define i32 @sink_after_dead_inst( ; CHECK-SAME: ptr [[A_PTR:%.*]]) { -; CHECK-NEXT: [[ENTRY:.*]]: +; CHECK-NEXT: [[ENTRY:.*:]] ; CHECK-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] ; CHECK: [[VECTOR_PH]]: ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] @@ -111,12 +111,10 @@ define i32 @sink_after_dead_inst(ptr %A.ptr) { ; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT_FOR_PHI:%.*]] = extractelement <4 x i32> [[TMP2]], i32 2 ; CHECK-NEXT: br label %[[FOR_END:.*]] ; CHECK: [[SCALAR_PH]]: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i16 [ 0, %[[ENTRY]] ] -; CHECK-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ 0, %[[ENTRY]] ] ; CHECK-NEXT: br label %[[LOOP:.*]] ; CHECK: [[LOOP]]: -; CHECK-NEXT: [[IV:%.*]] = phi i16 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] -; CHECK-NEXT: [[FOR:%.*]] = phi i32 [ [[SCALAR_RECUR_INIT]], %[[SCALAR_PH]] ], [ [[FOR_PREV:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[IV:%.*]] = phi i16 [ 0, %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[FOR:%.*]] = phi i32 [ 0, %[[SCALAR_PH]] ], [ [[FOR_PREV:%.*]], %[[LOOP]] ] ; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[FOR]], 15 ; CHECK-NEXT: [[C:%.*]] = icmp eq i1 [[CMP]], true ; CHECK-NEXT: [[VEC_DEAD:%.*]] = and i1 [[C]], true diff --git a/llvm/test/Transforms/LoopVectorize/first-order-recurrence-interleave-only.ll b/llvm/test/Transforms/LoopVectorize/first-order-recurrence-interleave-only.ll index d95c48717819..c13d3421ba7f 100644 --- a/llvm/test/Transforms/LoopVectorize/first-order-recurrence-interleave-only.ll +++ b/llvm/test/Transforms/LoopVectorize/first-order-recurrence-interleave-only.ll @@ -4,7 +4,7 @@ define float @for_load_interleave_only(ptr %src) { ; CHECK-LABEL: define float @for_load_interleave_only( ; CHECK-SAME: ptr [[SRC:%.*]]) { -; CHECK-NEXT: [[ENTRY:.*]]: +; CHECK-NEXT: [[ENTRY:.*:]] ; CHECK-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] ; CHECK: [[VECTOR_PH]]: ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] @@ -23,14 +23,11 @@ define float @for_load_interleave_only(ptr %src) { ; CHECK: [[MIDDLE_BLOCK]]: ; CHECK-NEXT: br label %[[EXIT:.*]] ; CHECK: [[SCALAR_PH]]: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ 1, %[[ENTRY]] ] -; CHECK-NEXT: [[BC_RESUME_VAL2:%.*]] = phi ptr [ [[SRC]], %[[ENTRY]] ] -; CHECK-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi float [ 0.000000e+00, %[[ENTRY]] ] ; CHECK-NEXT: br label %[[LOOP:.*]] ; CHECK: [[LOOP]]: -; CHECK-NEXT: [[IV:%.*]] = phi i32 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] -; CHECK-NEXT: [[PTR_IV:%.*]] = phi ptr [ [[BC_RESUME_VAL2]], %[[SCALAR_PH]] ], [ [[PTR_IV_NEXT:%.*]], %[[LOOP]] ] -; CHECK-NEXT: [[FOR:%.*]] = phi float [ [[SCALAR_RECUR_INIT]], %[[SCALAR_PH]] ], [ [[L:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[IV:%.*]] = phi i32 [ 1, %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[PTR_IV:%.*]] = phi ptr [ [[SRC]], %[[SCALAR_PH]] ], [ [[PTR_IV_NEXT:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[FOR:%.*]] = phi float [ 0.000000e+00, %[[SCALAR_PH]] ], [ [[L:%.*]], %[[LOOP]] ] ; CHECK-NEXT: [[IV_NEXT]] = add i32 [[IV]], 1 ; CHECK-NEXT: [[PTR_IV_NEXT]] = getelementptr i8, ptr [[PTR_IV]], i64 16 ; CHECK-NEXT: [[L]] = load float, ptr [[PTR_IV]], align 4 diff --git a/llvm/test/Transforms/LoopVectorize/first-order-recurrence-multiply-recurrences.ll b/llvm/test/Transforms/LoopVectorize/first-order-recurrence-multiply-recurrences.ll index 715ea1c51aba..899c20ab30c6 100644 --- a/llvm/test/Transforms/LoopVectorize/first-order-recurrence-multiply-recurrences.ll +++ b/llvm/test/Transforms/LoopVectorize/first-order-recurrence-multiply-recurrences.ll @@ -121,14 +121,11 @@ define void @test_pr54223_sink_after_insertion_order(ptr noalias %a, ptr noalias ; CHECK: middle.block: ; CHECK-NEXT: br label [[EXIT:%.*]] ; CHECK: scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ] -; CHECK-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi float [ 0.000000e+00, [[ENTRY]] ] -; CHECK-NEXT: [[SCALAR_RECUR_INIT5:%.*]] = phi float [ 0.000000e+00, [[ENTRY]] ] ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: -; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ] -; CHECK-NEXT: [[SCALAR_RECUR:%.*]] = phi float [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[FOR_1_NEXT:%.*]], [[LOOP]] ] -; CHECK-NEXT: [[SCALAR_RECUR6:%.*]] = phi float [ [[SCALAR_RECUR_INIT5]], [[SCALAR_PH]] ], [ [[FOR_2_NEXT:%.*]], [[LOOP]] ] +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ] +; CHECK-NEXT: [[SCALAR_RECUR:%.*]] = phi float [ 0.000000e+00, [[SCALAR_PH]] ], [ [[FOR_1_NEXT:%.*]], [[LOOP]] ] +; CHECK-NEXT: [[SCALAR_RECUR6:%.*]] = phi float [ 0.000000e+00, [[SCALAR_PH]] ], [ [[FOR_2_NEXT:%.*]], [[LOOP]] ] ; CHECK-NEXT: [[NEG:%.*]] = fneg float [[SCALAR_RECUR6]] ; CHECK-NEXT: [[MULADD:%.*]] = call float @llvm.fmuladd.f32(float [[SCALAR_RECUR]], float [[NEG]], float 0.000000e+00) ; CHECK-NEXT: [[DST_GEP:%.*]] = getelementptr inbounds float, ptr [[DST]], i64 [[IV]] diff --git a/llvm/test/Transforms/LoopVectorize/first-order-recurrence.ll b/llvm/test/Transforms/LoopVectorize/first-order-recurrence.ll index 0526f1b4ed1e..10cbf66c783d 100644 --- a/llvm/test/Transforms/LoopVectorize/first-order-recurrence.ll +++ b/llvm/test/Transforms/LoopVectorize/first-order-recurrence.ll @@ -1195,12 +1195,10 @@ define i64 @constant_folded_previous_value() { ; UNROLL-NO-IC: middle.block: ; UNROLL-NO-IC-NEXT: br label [[FOR_END:%.*]] ; UNROLL-NO-IC: scalar.ph: -; UNROLL-NO-IC-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ] -; UNROLL-NO-IC-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i64 [ 0, [[ENTRY]] ] ; UNROLL-NO-IC-NEXT: br label [[SCALAR_BODY:%.*]] ; UNROLL-NO-IC: scalar.body: -; UNROLL-NO-IC-NEXT: [[I:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[I_NEXT:%.*]], [[SCALAR_BODY]] ] -; UNROLL-NO-IC-NEXT: [[VAR2:%.*]] = phi i64 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[VAR3:%.*]], [[SCALAR_BODY]] ] +; UNROLL-NO-IC-NEXT: [[I:%.*]] = phi i64 [ 0, [[SCALAR_PH]] ], [ [[I_NEXT:%.*]], [[SCALAR_BODY]] ] +; UNROLL-NO-IC-NEXT: [[VAR2:%.*]] = phi i64 [ 0, [[SCALAR_PH]] ], [ [[VAR3:%.*]], [[SCALAR_BODY]] ] ; UNROLL-NO-IC-NEXT: [[VAR3]] = add i64 0, 1 ; UNROLL-NO-IC-NEXT: [[I_NEXT]] = add nuw nsw i64 [[I]], 1 ; UNROLL-NO-IC-NEXT: [[COND:%.*]] = icmp eq i64 [[I_NEXT]], 1000 @@ -1222,12 +1220,10 @@ define i64 @constant_folded_previous_value() { ; UNROLL-NO-VF: middle.block: ; UNROLL-NO-VF-NEXT: br label [[FOR_END:%.*]] ; UNROLL-NO-VF: scalar.ph: -; UNROLL-NO-VF-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ] -; UNROLL-NO-VF-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i64 [ 0, [[ENTRY]] ] ; UNROLL-NO-VF-NEXT: br label [[SCALAR_BODY:%.*]] ; UNROLL-NO-VF: scalar.body: -; UNROLL-NO-VF-NEXT: [[I:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[I_NEXT:%.*]], [[SCALAR_BODY]] ] -; UNROLL-NO-VF-NEXT: [[VAR2:%.*]] = phi i64 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[VAR3:%.*]], [[SCALAR_BODY]] ] +; UNROLL-NO-VF-NEXT: [[I:%.*]] = phi i64 [ 0, [[SCALAR_PH]] ], [ [[I_NEXT:%.*]], [[SCALAR_BODY]] ] +; UNROLL-NO-VF-NEXT: [[VAR2:%.*]] = phi i64 [ 0, [[SCALAR_PH]] ], [ [[VAR3:%.*]], [[SCALAR_BODY]] ] ; UNROLL-NO-VF-NEXT: [[VAR3]] = add i64 0, 1 ; UNROLL-NO-VF-NEXT: [[I_NEXT]] = add nuw nsw i64 [[I]], 1 ; UNROLL-NO-VF-NEXT: [[COND:%.*]] = icmp eq i64 [[I_NEXT]], 1000 @@ -1249,12 +1245,10 @@ define i64 @constant_folded_previous_value() { ; SINK-AFTER: middle.block: ; SINK-AFTER-NEXT: br label [[FOR_END:%.*]] ; SINK-AFTER: scalar.ph: -; SINK-AFTER-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ] -; SINK-AFTER-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i64 [ 0, [[ENTRY]] ] ; SINK-AFTER-NEXT: br label [[SCALAR_BODY:%.*]] ; SINK-AFTER: scalar.body: -; SINK-AFTER-NEXT: [[I:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[I_NEXT:%.*]], [[SCALAR_BODY]] ] -; SINK-AFTER-NEXT: [[VAR2:%.*]] = phi i64 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[VAR3:%.*]], [[SCALAR_BODY]] ] +; SINK-AFTER-NEXT: [[I:%.*]] = phi i64 [ 0, [[SCALAR_PH]] ], [ [[I_NEXT:%.*]], [[SCALAR_BODY]] ] +; SINK-AFTER-NEXT: [[VAR2:%.*]] = phi i64 [ 0, [[SCALAR_PH]] ], [ [[VAR3:%.*]], [[SCALAR_BODY]] ] ; SINK-AFTER-NEXT: [[VAR3]] = add i64 0, 1 ; SINK-AFTER-NEXT: [[I_NEXT]] = add nuw nsw i64 [[I]], 1 ; SINK-AFTER-NEXT: [[COND:%.*]] = icmp eq i64 [[I_NEXT]], 1000 @@ -3358,12 +3352,10 @@ define i32 @sink_after_dead_inst(ptr %A.ptr, i32 %n) { ; UNROLL-NO-IC-NEXT: [[VECTOR_RECUR_EXTRACT_FOR_PHI:%.*]] = extractelement <4 x i32> [[TMP3]], i32 2 ; UNROLL-NO-IC-NEXT: br label [[FOR_END:%.*]] ; UNROLL-NO-IC: scalar.ph: -; UNROLL-NO-IC-NEXT: [[BC_RESUME_VAL:%.*]] = phi i16 [ 0, [[ENTRY:%.*]] ] -; UNROLL-NO-IC-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ 0, [[ENTRY]] ] ; UNROLL-NO-IC-NEXT: br label [[LOOP:%.*]] ; UNROLL-NO-IC: loop: -; UNROLL-NO-IC-NEXT: [[IV:%.*]] = phi i16 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ] -; UNROLL-NO-IC-NEXT: [[FOR:%.*]] = phi i32 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[FOR_PREV:%.*]], [[LOOP]] ] +; UNROLL-NO-IC-NEXT: [[IV:%.*]] = phi i16 [ 0, [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ] +; UNROLL-NO-IC-NEXT: [[FOR:%.*]] = phi i32 [ 0, [[SCALAR_PH]] ], [ [[FOR_PREV:%.*]], [[LOOP]] ] ; UNROLL-NO-IC-NEXT: [[CMP:%.*]] = icmp eq i32 [[FOR]], 15 ; UNROLL-NO-IC-NEXT: [[C:%.*]] = icmp eq i1 [[CMP]], true ; UNROLL-NO-IC-NEXT: [[VEC_DEAD:%.*]] = and i1 [[C]], true @@ -3401,12 +3393,10 @@ define i32 @sink_after_dead_inst(ptr %A.ptr, i32 %n) { ; UNROLL-NO-VF: middle.block: ; UNROLL-NO-VF-NEXT: br label [[FOR_END:%.*]] ; UNROLL-NO-VF: scalar.ph: -; UNROLL-NO-VF-NEXT: [[BC_RESUME_VAL:%.*]] = phi i16 [ 0, [[ENTRY:%.*]] ] -; UNROLL-NO-VF-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ 0, [[ENTRY]] ] ; UNROLL-NO-VF-NEXT: br label [[LOOP:%.*]] ; UNROLL-NO-VF: loop: -; UNROLL-NO-VF-NEXT: [[IV:%.*]] = phi i16 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ] -; UNROLL-NO-VF-NEXT: [[FOR:%.*]] = phi i32 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[FOR_PREV:%.*]], [[LOOP]] ] +; UNROLL-NO-VF-NEXT: [[IV:%.*]] = phi i16 [ 0, [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ] +; UNROLL-NO-VF-NEXT: [[FOR:%.*]] = phi i32 [ 0, [[SCALAR_PH]] ], [ [[FOR_PREV:%.*]], [[LOOP]] ] ; UNROLL-NO-VF-NEXT: [[CMP:%.*]] = icmp eq i32 [[FOR]], 15 ; UNROLL-NO-VF-NEXT: [[C:%.*]] = icmp eq i1 [[CMP]], true ; UNROLL-NO-VF-NEXT: [[VEC_DEAD:%.*]] = and i1 [[C]], true @@ -3444,12 +3434,10 @@ define i32 @sink_after_dead_inst(ptr %A.ptr, i32 %n) { ; SINK-AFTER-NEXT: [[VECTOR_RECUR_EXTRACT_FOR_PHI:%.*]] = extractelement <4 x i32> [[TMP3]], i32 2 ; SINK-AFTER-NEXT: br label [[FOR_END:%.*]] ; SINK-AFTER: scalar.ph: -; SINK-AFTER-NEXT: [[BC_RESUME_VAL:%.*]] = phi i16 [ 0, [[ENTRY:%.*]] ] -; SINK-AFTER-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ 0, [[ENTRY]] ] ; SINK-AFTER-NEXT: br label [[LOOP:%.*]] ; SINK-AFTER: loop: -; SINK-AFTER-NEXT: [[IV:%.*]] = phi i16 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ] -; SINK-AFTER-NEXT: [[FOR:%.*]] = phi i32 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[FOR_PREV:%.*]], [[LOOP]] ] +; SINK-AFTER-NEXT: [[IV:%.*]] = phi i16 [ 0, [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ] +; SINK-AFTER-NEXT: [[FOR:%.*]] = phi i32 [ 0, [[SCALAR_PH]] ], [ [[FOR_PREV:%.*]], [[LOOP]] ] ; SINK-AFTER-NEXT: [[CMP:%.*]] = icmp eq i32 [[FOR]], 15 ; SINK-AFTER-NEXT: [[C:%.*]] = icmp eq i1 [[CMP]], true ; SINK-AFTER-NEXT: [[VEC_DEAD:%.*]] = and i1 [[C]], true diff --git a/llvm/test/Transforms/LoopVectorize/float-minmax-instruction-flag.ll b/llvm/test/Transforms/LoopVectorize/float-minmax-instruction-flag.ll index 2c02f839edff..5f4214c5d632 100644 --- a/llvm/test/Transforms/LoopVectorize/float-minmax-instruction-flag.ll +++ b/llvm/test/Transforms/LoopVectorize/float-minmax-instruction-flag.ll @@ -68,12 +68,10 @@ define float @minloopattr(ptr nocapture readonly %arg) #0 { ; CHECK-NEXT: [[TMP6:%.*]] = call float @llvm.vector.reduce.fmin.v4f32(<4 x float> [[TMP4]]) ; CHECK-NEXT: br label [[OUT:%.*]] ; CHECK: scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 1, [[TOP:%.*]] ] -; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi float [ [[T]], [[TOP]] ] ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: -; CHECK-NEXT: [[T1:%.*]] = phi i64 [ [[T7:%.*]], [[LOOP]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] -; CHECK-NEXT: [[T2:%.*]] = phi float [ [[T6:%.*]], [[LOOP]] ], [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ] +; CHECK-NEXT: [[T1:%.*]] = phi i64 [ [[T7:%.*]], [[LOOP]] ], [ 1, [[SCALAR_PH]] ] +; CHECK-NEXT: [[T2:%.*]] = phi float [ [[T6:%.*]], [[LOOP]] ], [ [[T]], [[SCALAR_PH]] ] ; CHECK-NEXT: [[T3:%.*]] = getelementptr float, ptr [[ARG]], i64 [[T1]] ; CHECK-NEXT: [[T4:%.*]] = load float, ptr [[T3]], align 4 ; CHECK-NEXT: [[T5:%.*]] = fcmp olt float [[T2]], [[T4]] diff --git a/llvm/test/Transforms/LoopVectorize/if-pred-stores.ll b/llvm/test/Transforms/LoopVectorize/if-pred-stores.ll index b971400c662b..ade90894ba90 100644 --- a/llvm/test/Transforms/LoopVectorize/if-pred-stores.ll +++ b/llvm/test/Transforms/LoopVectorize/if-pred-stores.ll @@ -69,10 +69,9 @@ define i32 @test(ptr nocapture %f) #0 { ; UNROLL-NOSIMPLIFY: middle.block: ; UNROLL-NOSIMPLIFY-NEXT: br label [[FOR_END:%.*]] ; UNROLL-NOSIMPLIFY: scalar.ph: -; UNROLL-NOSIMPLIFY-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ] ; UNROLL-NOSIMPLIFY-NEXT: br label [[FOR_BODY:%.*]] ; UNROLL-NOSIMPLIFY: for.body: -; UNROLL-NOSIMPLIFY-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_INC:%.*]] ] +; UNROLL-NOSIMPLIFY-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 0, [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_INC:%.*]] ] ; UNROLL-NOSIMPLIFY-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[F]], i64 [[INDVARS_IV]] ; UNROLL-NOSIMPLIFY-NEXT: [[TMP11:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 ; UNROLL-NOSIMPLIFY-NEXT: [[CMP1:%.*]] = icmp sgt i32 [[TMP11]], 100 @@ -452,12 +451,10 @@ define void @minimal_bit_widths(i1 %c) { ; UNROLL-NOSIMPLIFY: middle.block: ; UNROLL-NOSIMPLIFY-NEXT: br label [[FOR_END:%.*]] ; UNROLL-NOSIMPLIFY: scalar.ph: -; UNROLL-NOSIMPLIFY-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ] -; UNROLL-NOSIMPLIFY-NEXT: [[BC_RESUME_VAL3:%.*]] = phi i64 [ 1000, [[ENTRY]] ] ; UNROLL-NOSIMPLIFY-NEXT: br label [[FOR_BODY:%.*]] ; UNROLL-NOSIMPLIFY: for.body: -; UNROLL-NOSIMPLIFY-NEXT: [[TMP1:%.*]] = phi i64 [ [[TMP9:%.*]], [[FOR_INC:%.*]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] -; UNROLL-NOSIMPLIFY-NEXT: [[TMP2:%.*]] = phi i64 [ [[TMP7:%.*]], [[FOR_INC]] ], [ [[BC_RESUME_VAL3]], [[SCALAR_PH]] ] +; UNROLL-NOSIMPLIFY-NEXT: [[TMP1:%.*]] = phi i64 [ [[TMP9:%.*]], [[FOR_INC:%.*]] ], [ 0, [[SCALAR_PH]] ] +; UNROLL-NOSIMPLIFY-NEXT: [[TMP2:%.*]] = phi i64 [ [[TMP7:%.*]], [[FOR_INC]] ], [ 1000, [[SCALAR_PH]] ] ; UNROLL-NOSIMPLIFY-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr undef, i64 [[TMP1]] ; UNROLL-NOSIMPLIFY-NEXT: [[TMP4:%.*]] = load i8, ptr [[TMP3]], align 1 ; UNROLL-NOSIMPLIFY-NEXT: br i1 [[C]], label [[IF_THEN:%.*]], label [[FOR_INC]] @@ -580,12 +577,10 @@ define void @minimal_bit_widths_with_aliasing_store(i1 %c, ptr %ptr) { ; UNROLL-NOSIMPLIFY: middle.block: ; UNROLL-NOSIMPLIFY-NEXT: br label [[FOR_END:%.*]] ; UNROLL-NOSIMPLIFY: scalar.ph: -; UNROLL-NOSIMPLIFY-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ] -; UNROLL-NOSIMPLIFY-NEXT: [[BC_RESUME_VAL3:%.*]] = phi i64 [ 1000, [[ENTRY]] ] ; UNROLL-NOSIMPLIFY-NEXT: br label [[FOR_BODY:%.*]] ; UNROLL-NOSIMPLIFY: for.body: -; UNROLL-NOSIMPLIFY-NEXT: [[TMP1:%.*]] = phi i64 [ [[TMP9:%.*]], [[FOR_INC:%.*]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] -; UNROLL-NOSIMPLIFY-NEXT: [[TMP2:%.*]] = phi i64 [ [[TMP7:%.*]], [[FOR_INC]] ], [ [[BC_RESUME_VAL3]], [[SCALAR_PH]] ] +; UNROLL-NOSIMPLIFY-NEXT: [[TMP1:%.*]] = phi i64 [ [[TMP9:%.*]], [[FOR_INC:%.*]] ], [ 0, [[SCALAR_PH]] ] +; UNROLL-NOSIMPLIFY-NEXT: [[TMP2:%.*]] = phi i64 [ [[TMP7:%.*]], [[FOR_INC]] ], [ 1000, [[SCALAR_PH]] ] ; UNROLL-NOSIMPLIFY-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[PTR]], i64 [[TMP1]] ; UNROLL-NOSIMPLIFY-NEXT: [[TMP4:%.*]] = load i8, ptr [[TMP3]], align 1 ; UNROLL-NOSIMPLIFY-NEXT: store i8 0, ptr [[TMP3]], align 1 diff --git a/llvm/test/Transforms/LoopVectorize/induction-multiple-uses-in-same-instruction.ll b/llvm/test/Transforms/LoopVectorize/induction-multiple-uses-in-same-instruction.ll index 9cff1cfae0b1..ff7594a5d3a8 100644 --- a/llvm/test/Transforms/LoopVectorize/induction-multiple-uses-in-same-instruction.ll +++ b/llvm/test/Transforms/LoopVectorize/induction-multiple-uses-in-same-instruction.ll @@ -26,10 +26,9 @@ define void @multiple_iv_uses_in_same_instruction(ptr %ptr) { ; CHECK: middle.block: ; CHECK-NEXT: br label [[EXIT:%.*]] ; CHECK: scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ] ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: -; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ] +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ] ; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds [100 x [100 x i32]], ptr [[PTR]], i64 0, i64 [[IV]], i64 [[IV]] ; CHECK-NEXT: [[T:%.*]] = trunc i64 [[IV]] to i32 ; CHECK-NEXT: store i32 [[T]], ptr [[GEP]], align 4 diff --git a/llvm/test/Transforms/LoopVectorize/induction-step.ll b/llvm/test/Transforms/LoopVectorize/induction-step.ll index 59f6e8b04f54..0dab78039ea6 100644 --- a/llvm/test/Transforms/LoopVectorize/induction-step.ll +++ b/llvm/test/Transforms/LoopVectorize/induction-step.ll @@ -276,7 +276,7 @@ for.end: define void @iv_no_binary_op_in_descriptor(i1 %c, ptr %dst) { ; CHECK-LABEL: define void @iv_no_binary_op_in_descriptor( ; CHECK-SAME: i1 [[C:%.*]], ptr [[DST:%.*]]) { -; CHECK-NEXT: [[ENTRY:.*]]: +; CHECK-NEXT: [[ENTRY:.*:]] ; CHECK-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] ; CHECK: [[VECTOR_PH]]: ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] @@ -292,10 +292,9 @@ define void @iv_no_binary_op_in_descriptor(i1 %c, ptr %dst) { ; CHECK: [[MIDDLE_BLOCK]]: ; CHECK-NEXT: br label %[[EXIT:.*]] ; CHECK: [[SCALAR_PH]]: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, %[[ENTRY]] ] ; CHECK-NEXT: br label %[[LOOP_HEADER:.*]] ; CHECK: [[LOOP_HEADER]]: -; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT_P:%.*]], %[[LOOP_LATCH:.*]] ] +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[SCALAR_PH]] ], [ [[IV_NEXT_P:%.*]], %[[LOOP_LATCH:.*]] ] ; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds i64, ptr [[DST]], i64 [[IV]] ; CHECK-NEXT: store i64 [[IV]], ptr [[GEP]], align 8 ; CHECK-NEXT: [[IV_NEXT:%.*]] = add i64 [[IV]], 1 diff --git a/llvm/test/Transforms/LoopVectorize/induction.ll b/llvm/test/Transforms/LoopVectorize/induction.ll index 77b91ccb913c..343facb2ef69 100644 --- a/llvm/test/Transforms/LoopVectorize/induction.ll +++ b/llvm/test/Transforms/LoopVectorize/induction.ll @@ -2747,12 +2747,10 @@ define i32 @i8_loop() nounwind readnone ssp uwtable { ; CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.vector.reduce.and.v2i32(<2 x i32> [[TMP0]]) ; CHECK-NEXT: br label [[EXIT:%.*]] ; CHECK: scalar.ph: -; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ 1, [[ENTRY:%.*]] ] -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i8 [ 0, [[ENTRY]] ] ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: -; CHECK-NEXT: [[A_0:%.*]] = phi i32 [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[A_0_AND:%.*]], [[LOOP]] ] -; CHECK-NEXT: [[B_0:%.*]] = phi i8 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[B_NEXT:%.*]], [[LOOP]] ] +; CHECK-NEXT: [[A_0:%.*]] = phi i32 [ 1, [[SCALAR_PH]] ], [ [[A_0_AND:%.*]], [[LOOP]] ] +; CHECK-NEXT: [[B_0:%.*]] = phi i8 [ 0, [[SCALAR_PH]] ], [ [[B_NEXT:%.*]], [[LOOP]] ] ; CHECK-NEXT: [[A_0_AND]] = and i32 [[A_0]], 4 ; CHECK-NEXT: [[B_NEXT]] = add i8 [[B_0]], -1 ; CHECK-NEXT: [[EC:%.*]] = icmp eq i8 [[B_NEXT]], 0 @@ -2818,12 +2816,10 @@ define i32 @i8_loop() nounwind readnone ssp uwtable { ; UNROLL-NO-IC-NEXT: [[TMP3:%.*]] = call i32 @llvm.vector.reduce.and.v2i32(<2 x i32> [[BIN_RDX]]) ; UNROLL-NO-IC-NEXT: br label [[EXIT:%.*]] ; UNROLL-NO-IC: scalar.ph: -; UNROLL-NO-IC-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ 1, [[ENTRY:%.*]] ] -; UNROLL-NO-IC-NEXT: [[BC_RESUME_VAL:%.*]] = phi i8 [ 0, [[ENTRY]] ] ; UNROLL-NO-IC-NEXT: br label [[LOOP:%.*]] ; UNROLL-NO-IC: loop: -; UNROLL-NO-IC-NEXT: [[A_0:%.*]] = phi i32 [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[A_0_AND:%.*]], [[LOOP]] ] -; UNROLL-NO-IC-NEXT: [[B_0:%.*]] = phi i8 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[B_NEXT:%.*]], [[LOOP]] ] +; UNROLL-NO-IC-NEXT: [[A_0:%.*]] = phi i32 [ 1, [[SCALAR_PH]] ], [ [[A_0_AND:%.*]], [[LOOP]] ] +; UNROLL-NO-IC-NEXT: [[B_0:%.*]] = phi i8 [ 0, [[SCALAR_PH]] ], [ [[B_NEXT:%.*]], [[LOOP]] ] ; UNROLL-NO-IC-NEXT: [[A_0_AND]] = and i32 [[A_0]], 4 ; UNROLL-NO-IC-NEXT: [[B_NEXT]] = add i8 [[B_0]], -1 ; UNROLL-NO-IC-NEXT: [[EC:%.*]] = icmp eq i8 [[B_NEXT]], 0 @@ -2884,12 +2880,10 @@ define i32 @i16_loop() nounwind readnone ssp uwtable { ; CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.vector.reduce.and.v2i32(<2 x i32> [[TMP0]]) ; CHECK-NEXT: br label [[EXIT:%.*]] ; CHECK: scalar.ph: -; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ 1, [[ENTRY:%.*]] ] -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i16 [ 0, [[ENTRY]] ] ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: -; CHECK-NEXT: [[A_0:%.*]] = phi i32 [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[A_0_AND:%.*]], [[LOOP]] ] -; CHECK-NEXT: [[B_0:%.*]] = phi i16 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[B_0_NEXT:%.*]], [[LOOP]] ] +; CHECK-NEXT: [[A_0:%.*]] = phi i32 [ 1, [[SCALAR_PH]] ], [ [[A_0_AND:%.*]], [[LOOP]] ] +; CHECK-NEXT: [[B_0:%.*]] = phi i16 [ 0, [[SCALAR_PH]] ], [ [[B_0_NEXT:%.*]], [[LOOP]] ] ; CHECK-NEXT: [[A_0_AND]] = and i32 [[A_0]], 4 ; CHECK-NEXT: [[B_0_NEXT]] = add i16 [[B_0]], -1 ; CHECK-NEXT: [[EC:%.*]] = icmp eq i16 [[B_0_NEXT]], 0 @@ -2955,12 +2949,10 @@ define i32 @i16_loop() nounwind readnone ssp uwtable { ; UNROLL-NO-IC-NEXT: [[TMP3:%.*]] = call i32 @llvm.vector.reduce.and.v2i32(<2 x i32> [[BIN_RDX]]) ; UNROLL-NO-IC-NEXT: br label [[EXIT:%.*]] ; UNROLL-NO-IC: scalar.ph: -; UNROLL-NO-IC-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ 1, [[ENTRY:%.*]] ] -; UNROLL-NO-IC-NEXT: [[BC_RESUME_VAL:%.*]] = phi i16 [ 0, [[ENTRY]] ] ; UNROLL-NO-IC-NEXT: br label [[LOOP:%.*]] ; UNROLL-NO-IC: loop: -; UNROLL-NO-IC-NEXT: [[A_0:%.*]] = phi i32 [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[A_0_AND:%.*]], [[LOOP]] ] -; UNROLL-NO-IC-NEXT: [[B_0:%.*]] = phi i16 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[B_0_NEXT:%.*]], [[LOOP]] ] +; UNROLL-NO-IC-NEXT: [[A_0:%.*]] = phi i32 [ 1, [[SCALAR_PH]] ], [ [[A_0_AND:%.*]], [[LOOP]] ] +; UNROLL-NO-IC-NEXT: [[B_0:%.*]] = phi i16 [ 0, [[SCALAR_PH]] ], [ [[B_0_NEXT:%.*]], [[LOOP]] ] ; UNROLL-NO-IC-NEXT: [[A_0_AND]] = and i32 [[A_0]], 4 ; UNROLL-NO-IC-NEXT: [[B_0_NEXT]] = add i16 [[B_0]], -1 ; UNROLL-NO-IC-NEXT: [[EC:%.*]] = icmp eq i16 [[B_0_NEXT]], 0 @@ -5017,12 +5009,10 @@ define i32 @PR32419(i32 %a, i16 %b) { ; CHECK-NEXT: [[TMP17:%.*]] = call i32 @llvm.vector.reduce.or.v2i32(<2 x i32> [[TMP15]]) ; CHECK-NEXT: br label [[FOR_END:%.*]] ; CHECK: scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ -20, [[ENTRY:%.*]] ] -; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[A]], [[ENTRY]] ] ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: for.body: -; CHECK-NEXT: [[I:%.*]] = phi i32 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[I_NEXT:%.*]], [[FOR_INC:%.*]] ] -; CHECK-NEXT: [[VAR0:%.*]] = phi i32 [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[VAR6:%.*]], [[FOR_INC]] ] +; CHECK-NEXT: [[I:%.*]] = phi i32 [ -20, [[SCALAR_PH]] ], [ [[I_NEXT:%.*]], [[FOR_INC:%.*]] ] +; CHECK-NEXT: [[VAR0:%.*]] = phi i32 [ [[A]], [[SCALAR_PH]] ], [ [[VAR6:%.*]], [[FOR_INC]] ] ; CHECK-NEXT: [[VAR1:%.*]] = trunc i32 [[I]] to i16 ; CHECK-NEXT: [[VAR2:%.*]] = icmp eq i16 [[VAR1]], 0 ; CHECK-NEXT: br i1 [[VAR2]], label [[FOR_INC]], label [[FOR_COND:%.*]] @@ -5237,12 +5227,10 @@ define i32 @PR32419(i32 %a, i16 %b) { ; UNROLL-NO-IC-NEXT: [[TMP31:%.*]] = call i32 @llvm.vector.reduce.or.v2i32(<2 x i32> [[BIN_RDX]]) ; UNROLL-NO-IC-NEXT: br label [[FOR_END:%.*]] ; UNROLL-NO-IC: scalar.ph: -; UNROLL-NO-IC-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ -20, [[ENTRY:%.*]] ] -; UNROLL-NO-IC-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[A]], [[ENTRY]] ] ; UNROLL-NO-IC-NEXT: br label [[FOR_BODY:%.*]] ; UNROLL-NO-IC: for.body: -; UNROLL-NO-IC-NEXT: [[I:%.*]] = phi i32 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[I_NEXT:%.*]], [[FOR_INC:%.*]] ] -; UNROLL-NO-IC-NEXT: [[VAR0:%.*]] = phi i32 [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[VAR6:%.*]], [[FOR_INC]] ] +; UNROLL-NO-IC-NEXT: [[I:%.*]] = phi i32 [ -20, [[SCALAR_PH]] ], [ [[I_NEXT:%.*]], [[FOR_INC:%.*]] ] +; UNROLL-NO-IC-NEXT: [[VAR0:%.*]] = phi i32 [ [[A]], [[SCALAR_PH]] ], [ [[VAR6:%.*]], [[FOR_INC]] ] ; UNROLL-NO-IC-NEXT: [[VAR1:%.*]] = trunc i32 [[I]] to i16 ; UNROLL-NO-IC-NEXT: [[VAR2:%.*]] = icmp eq i16 [[VAR1]], 0 ; UNROLL-NO-IC-NEXT: br i1 [[VAR2]], label [[FOR_INC]], label [[FOR_COND:%.*]] @@ -5833,14 +5821,11 @@ define void @pr52460_first_order_recurrence_truncated_iv(ptr noalias %src, ptr % ; CHECK: middle.block: ; CHECK-NEXT: br label [[EXIT:%.*]] ; CHECK: scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ] -; CHECK-NEXT: [[BC_RESUME_VAL1:%.*]] = phi i32 [ 0, [[ENTRY]] ] -; CHECK-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ 0, [[ENTRY]] ] ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: -; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ] -; CHECK-NEXT: [[TRUNC_IV:%.*]] = phi i32 [ [[BC_RESUME_VAL1]], [[SCALAR_PH]] ], [ [[TRUNC_IV_NEXT:%.*]], [[LOOP]] ] -; CHECK-NEXT: [[RECUR:%.*]] = phi i32 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[IV_TRUNC:%.*]], [[LOOP]] ] +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ] +; CHECK-NEXT: [[TRUNC_IV:%.*]] = phi i32 [ 0, [[SCALAR_PH]] ], [ [[TRUNC_IV_NEXT:%.*]], [[LOOP]] ] +; CHECK-NEXT: [[RECUR:%.*]] = phi i32 [ 0, [[SCALAR_PH]] ], [ [[IV_TRUNC:%.*]], [[LOOP]] ] ; CHECK-NEXT: [[LV:%.*]] = load i32, ptr [[SRC]], align 4 ; CHECK-NEXT: [[MUL:%.*]] = mul nsw i32 [[LV]], [[RECUR]] ; CHECK-NEXT: [[TRUNC_IV_NEXT]] = add i32 [[TRUNC_IV]], 1 @@ -5955,14 +5940,11 @@ define void @pr52460_first_order_recurrence_truncated_iv(ptr noalias %src, ptr % ; UNROLL-NO-IC: middle.block: ; UNROLL-NO-IC-NEXT: br label [[EXIT:%.*]] ; UNROLL-NO-IC: scalar.ph: -; UNROLL-NO-IC-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ] -; UNROLL-NO-IC-NEXT: [[BC_RESUME_VAL1:%.*]] = phi i32 [ 0, [[ENTRY]] ] -; UNROLL-NO-IC-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ 0, [[ENTRY]] ] ; UNROLL-NO-IC-NEXT: br label [[LOOP:%.*]] ; UNROLL-NO-IC: loop: -; UNROLL-NO-IC-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ] -; UNROLL-NO-IC-NEXT: [[TRUNC_IV:%.*]] = phi i32 [ [[BC_RESUME_VAL1]], [[SCALAR_PH]] ], [ [[TRUNC_IV_NEXT:%.*]], [[LOOP]] ] -; UNROLL-NO-IC-NEXT: [[RECUR:%.*]] = phi i32 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[IV_TRUNC:%.*]], [[LOOP]] ] +; UNROLL-NO-IC-NEXT: [[IV:%.*]] = phi i64 [ 0, [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ] +; UNROLL-NO-IC-NEXT: [[TRUNC_IV:%.*]] = phi i32 [ 0, [[SCALAR_PH]] ], [ [[TRUNC_IV_NEXT:%.*]], [[LOOP]] ] +; UNROLL-NO-IC-NEXT: [[RECUR:%.*]] = phi i32 [ 0, [[SCALAR_PH]] ], [ [[IV_TRUNC:%.*]], [[LOOP]] ] ; UNROLL-NO-IC-NEXT: [[LV:%.*]] = load i32, ptr [[SRC]], align 4 ; UNROLL-NO-IC-NEXT: [[MUL:%.*]] = mul nsw i32 [[LV]], [[RECUR]] ; UNROLL-NO-IC-NEXT: [[TRUNC_IV_NEXT]] = add i32 [[TRUNC_IV]], 1 diff --git a/llvm/test/Transforms/LoopVectorize/instruction-only-used-outside-of-loop.ll b/llvm/test/Transforms/LoopVectorize/instruction-only-used-outside-of-loop.ll index 3330f2bfe661..acd10a57e0ce 100644 --- a/llvm/test/Transforms/LoopVectorize/instruction-only-used-outside-of-loop.ll +++ b/llvm/test/Transforms/LoopVectorize/instruction-only-used-outside-of-loop.ll @@ -20,10 +20,9 @@ define i32 @one_direct_branch(ptr %src) { ; CHECK-NEXT: [[TMP5:%.*]] = extractelement <4 x i32> [[TMP3]], i32 3 ; CHECK-NEXT: br label [[EXIT:%.*]] ; CHECK: scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ] ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: -; CHECK-NEXT: [[IV:%.*]] = phi i32 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP_LATCH:%.*]] ] +; CHECK-NEXT: [[IV:%.*]] = phi i32 [ 0, [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP_LATCH:%.*]] ] ; CHECK-NEXT: [[SRC_GEP:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i32 [[IV]] ; CHECK-NEXT: [[LV:%.*]] = load i32, ptr [[SRC_GEP]], align 4 ; CHECK-NEXT: [[XOR:%.*]] = xor i32 25500, [[LV]] @@ -76,10 +75,9 @@ define i32 @two_direct_branch(ptr %src) { ; CHECK-NEXT: [[TMP5:%.*]] = extractelement <4 x i32> [[TMP3]], i32 3 ; CHECK-NEXT: br label [[EXIT:%.*]] ; CHECK: scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ] ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: -; CHECK-NEXT: [[IV:%.*]] = phi i32 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP_LATCH:%.*]] ] +; CHECK-NEXT: [[IV:%.*]] = phi i32 [ 0, [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP_LATCH:%.*]] ] ; CHECK-NEXT: [[SRC_GEP:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i32 [[IV]] ; CHECK-NEXT: [[LV:%.*]] = load i32, ptr [[SRC_GEP]], align 4 ; CHECK-NEXT: [[XOR:%.*]] = xor i32 25500, [[LV]] @@ -145,10 +143,9 @@ define i32 @cond_branch(i32 %a, ptr %src) { ; CHECK-NEXT: [[TMP7:%.*]] = extractelement <4 x i32> [[PREDPHI]], i32 3 ; CHECK-NEXT: br label [[EXIT:%.*]] ; CHECK: scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ] ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: -; CHECK-NEXT: [[IV:%.*]] = phi i32 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP_LATCH:%.*]] ] +; CHECK-NEXT: [[IV:%.*]] = phi i32 [ 0, [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP_LATCH:%.*]] ] ; CHECK-NEXT: [[SRC_GEP:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i32 [[IV]] ; CHECK-NEXT: [[LV:%.*]] = load i32, ptr [[SRC_GEP]], align 4 ; CHECK-NEXT: [[XOR:%.*]] = xor i32 25500, [[LV]] @@ -210,10 +207,9 @@ define i32 @optimizable_trunc_used_outside() { ; CHECK-NEXT: [[TMP1:%.*]] = extractelement <4 x i32> [[VEC_IND]], i32 3 ; CHECK-NEXT: br label [[EXIT:%.*]] ; CHECK: scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ] ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: -; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ] +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ] ; CHECK-NEXT: [[IV_TRUNC:%.*]] = trunc i64 [[IV]] to i32 ; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 ; CHECK-NEXT: [[EXITCOND_NOT_I_I:%.*]] = icmp eq i64 [[IV_NEXT]], 1000 diff --git a/llvm/test/Transforms/LoopVectorize/interleave-with-i65-induction.ll b/llvm/test/Transforms/LoopVectorize/interleave-with-i65-induction.ll index 8a4820949af1..3eb16e9a2d78 100644 --- a/llvm/test/Transforms/LoopVectorize/interleave-with-i65-induction.ll +++ b/llvm/test/Transforms/LoopVectorize/interleave-with-i65-induction.ll @@ -5,7 +5,7 @@ define void @i65_induction_with_negative_step(ptr %dst) { ; CHECK-LABEL: define void @i65_induction_with_negative_step( ; CHECK-SAME: ptr [[DST:%.*]]) { -; CHECK-NEXT: [[ENTRY:.*]]: +; CHECK-NEXT: [[ENTRY:.*:]] ; CHECK-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] ; CHECK: [[VECTOR_PH]]: ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] @@ -34,14 +34,11 @@ define void @i65_induction_with_negative_step(ptr %dst) { ; CHECK: [[MIDDLE_BLOCK]]: ; CHECK-NEXT: br label %[[EXIT:.*]] ; CHECK: [[SCALAR_PH]]: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, %[[ENTRY]] ] -; CHECK-NEXT: [[BC_RESUME_VAL2:%.*]] = phi i65 [ 0, %[[ENTRY]] ] -; CHECK-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i64 [ 0, %[[ENTRY]] ] ; CHECK-NEXT: br label %[[LOOP:.*]] ; CHECK: [[LOOP]]: -; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] -; CHECK-NEXT: [[IV_I65:%.*]] = phi i65 [ [[BC_RESUME_VAL2]], %[[SCALAR_PH]] ], [ [[IV_I65_NEXT:%.*]], %[[LOOP]] ] -; CHECK-NEXT: [[FOR:%.*]] = phi i64 [ [[SCALAR_RECUR_INIT]], %[[SCALAR_PH]] ], [ [[TRUNC:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[IV_I65:%.*]] = phi i65 [ 0, %[[SCALAR_PH]] ], [ [[IV_I65_NEXT:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[FOR:%.*]] = phi i64 [ 0, %[[SCALAR_PH]] ], [ [[TRUNC:%.*]], %[[LOOP]] ] ; CHECK-NEXT: [[TRUNC]] = trunc i65 [[IV_I65]] to i64 ; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds i64, ptr [[DST]], i64 [[TRUNC]] ; CHECK-NEXT: store i64 [[FOR]], ptr [[GEP]], align 8 diff --git a/llvm/test/Transforms/LoopVectorize/interleaved-accesses-different-insert-position.ll b/llvm/test/Transforms/LoopVectorize/interleaved-accesses-different-insert-position.ll index 651210df823d..fa339f45fcdd 100644 --- a/llvm/test/Transforms/LoopVectorize/interleaved-accesses-different-insert-position.ll +++ b/llvm/test/Transforms/LoopVectorize/interleaved-accesses-different-insert-position.ll @@ -6,7 +6,7 @@ target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128-Fn32" define void @gep_for_first_member_does_not_dominate_insert_point(ptr %str, ptr noalias %dst) { ; CHECK-LABEL: define void @gep_for_first_member_does_not_dominate_insert_point( ; CHECK-SAME: ptr [[STR:%.*]], ptr noalias [[DST:%.*]]) { -; CHECK-NEXT: [[ENTRY:.*]]: +; CHECK-NEXT: [[ENTRY:.*:]] ; CHECK-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] ; CHECK: [[VECTOR_PH]]: ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] @@ -28,12 +28,10 @@ define void @gep_for_first_member_does_not_dominate_insert_point(ptr %str, ptr n ; CHECK: [[MIDDLE_BLOCK]]: ; CHECK-NEXT: br label %[[EXIT:.*]] ; CHECK: [[SCALAR_PH]]: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, %[[ENTRY]] ] -; CHECK-NEXT: [[BC_RESUME_VAL2:%.*]] = phi i64 [ 0, %[[ENTRY]] ] ; CHECK-NEXT: br label %[[LOOP:.*]] ; CHECK: [[LOOP]]: -; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] -; CHECK-NEXT: [[IV2:%.*]] = phi i64 [ [[BC_RESUME_VAL2]], %[[SCALAR_PH]] ], [ [[IV2_NEXT:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[IV2:%.*]] = phi i64 [ 0, %[[SCALAR_PH]] ], [ [[IV2_NEXT:%.*]], %[[LOOP]] ] ; CHECK-NEXT: [[OR_1:%.*]] = or disjoint i64 [[IV2]], 1 ; CHECK-NEXT: [[GEP1:%.*]] = getelementptr i8, ptr [[STR]], i64 [[OR_1]] ; CHECK-NEXT: [[TMP9:%.*]] = load i8, ptr [[GEP1]], align 1 diff --git a/llvm/test/Transforms/LoopVectorize/interleaved-accesses-metadata.ll b/llvm/test/Transforms/LoopVectorize/interleaved-accesses-metadata.ll index 65148b0babcd..4fe7c97ccd66 100644 --- a/llvm/test/Transforms/LoopVectorize/interleaved-accesses-metadata.ll +++ b/llvm/test/Transforms/LoopVectorize/interleaved-accesses-metadata.ll @@ -14,7 +14,7 @@ target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128" define void @merge_tbaa_interleave_group(ptr nocapture readonly %p, ptr noalias %cp, i32 %i) ; CHECK-LABEL: define void @merge_tbaa_interleave_group( ; CHECK-SAME: ptr readonly captures(none) [[P:%.*]], ptr noalias [[CP:%.*]], i32 [[I:%.*]]) { -; CHECK-NEXT: [[ENTRY:.*]]: +; CHECK-NEXT: [[ENTRY:.*:]] ; CHECK-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] ; CHECK: [[VECTOR_PH]]: ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] @@ -46,10 +46,9 @@ define void @merge_tbaa_interleave_group(ptr nocapture readonly %p, ptr noalias ; CHECK: [[MIDDLE_BLOCK]]: ; CHECK-NEXT: br label %[[EXIT:.*]] ; CHECK: [[SCALAR_PH]]: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, %[[ENTRY]] ] ; CHECK-NEXT: br label %[[LOOP:.*]] ; CHECK: [[LOOP]]: -; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] ; CHECK-NEXT: [[X:%.*]] = getelementptr inbounds [[STRUCT_VEC4R]], ptr [[P]], i64 [[IV]], i32 0 ; CHECK-NEXT: [[TMP19:%.*]] = load double, ptr [[X]], align 8, !tbaa [[TBAA0]] ; CHECK-NEXT: [[MUL:%.*]] = fmul double [[TMP19]], 2.000000e+00 diff --git a/llvm/test/Transforms/LoopVectorize/is_fpclass.ll b/llvm/test/Transforms/LoopVectorize/is_fpclass.ll index 6eeeace80aa8..42f6c0532caa 100644 --- a/llvm/test/Transforms/LoopVectorize/is_fpclass.ll +++ b/llvm/test/Transforms/LoopVectorize/is_fpclass.ll @@ -22,10 +22,9 @@ define void @d() { ; CHECK: middle.block: ; CHECK-NEXT: br label [[EXIT:%.*]] ; CHECK: scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ] ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: -; CHECK-NEXT: [[I:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[I7:%.*]], [[LOOP]] ] +; CHECK-NEXT: [[I:%.*]] = phi i64 [ 0, [[SCALAR_PH]] ], [ [[I7:%.*]], [[LOOP]] ] ; CHECK-NEXT: [[I3:%.*]] = load float, ptr null, align 4 ; CHECK-NEXT: [[I4:%.*]] = getelementptr float, ptr @d, i64 [[I]] ; CHECK-NEXT: [[I5:%.*]] = tail call i1 @llvm.is.fpclass.f32(float [[I3]], i32 0) diff --git a/llvm/test/Transforms/LoopVectorize/iv-select-cmp-decreasing.ll b/llvm/test/Transforms/LoopVectorize/iv-select-cmp-decreasing.ll index 1ad1094fe236..615f50124b41 100644 --- a/llvm/test/Transforms/LoopVectorize/iv-select-cmp-decreasing.ll +++ b/llvm/test/Transforms/LoopVectorize/iv-select-cmp-decreasing.ll @@ -6,7 +6,7 @@ define i64 @select_decreasing_induction_icmp_const_start(ptr %a) { ; IC1VF4-LABEL: define i64 @select_decreasing_induction_icmp_const_start( ; IC1VF4-SAME: ptr [[A:%.*]]) { -; IC1VF4-NEXT: [[ENTRY:.*]]: +; IC1VF4-NEXT: [[ENTRY:.*:]] ; IC1VF4-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] ; IC1VF4: [[VECTOR_PH]]: ; IC1VF4-NEXT: br label %[[VECTOR_BODY:.*]] @@ -32,12 +32,10 @@ define i64 @select_decreasing_induction_icmp_const_start(ptr %a) { ; IC1VF4-NEXT: [[RDX_SELECT:%.*]] = select i1 [[RDX_SELECT_CMP]], i64 [[TMP6]], i64 331 ; IC1VF4-NEXT: br label %[[EXIT:.*]] ; IC1VF4: [[SCALAR_PH]]: -; IC1VF4-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 19999, %[[ENTRY]] ] -; IC1VF4-NEXT: [[BC_MERGE_RDX:%.*]] = phi i64 [ 331, %[[ENTRY]] ] ; IC1VF4-NEXT: br label %[[LOOP:.*]] ; IC1VF4: [[LOOP]]: -; IC1VF4-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] -; IC1VF4-NEXT: [[RDX:%.*]] = phi i64 [ [[BC_MERGE_RDX]], %[[SCALAR_PH]] ], [ [[SPEC_SELECT:%.*]], %[[LOOP]] ] +; IC1VF4-NEXT: [[IV:%.*]] = phi i64 [ 19999, %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] +; IC1VF4-NEXT: [[RDX:%.*]] = phi i64 [ 331, %[[SCALAR_PH]] ], [ [[SPEC_SELECT:%.*]], %[[LOOP]] ] ; IC1VF4-NEXT: [[GEP_A_IV:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[IV]] ; IC1VF4-NEXT: [[LD_A:%.*]] = load i64, ptr [[GEP_A_IV]], align 8 ; IC1VF4-NEXT: [[CMP_A_3:%.*]] = icmp sgt i64 [[LD_A]], 3 @@ -51,7 +49,7 @@ define i64 @select_decreasing_induction_icmp_const_start(ptr %a) { ; ; IC4VF4-LABEL: define i64 @select_decreasing_induction_icmp_const_start( ; IC4VF4-SAME: ptr [[A:%.*]]) { -; IC4VF4-NEXT: [[ENTRY:.*]]: +; IC4VF4-NEXT: [[ENTRY:.*:]] ; IC4VF4-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] ; IC4VF4: [[VECTOR_PH]]: ; IC4VF4-NEXT: br label %[[VECTOR_BODY:.*]] @@ -104,12 +102,10 @@ define i64 @select_decreasing_induction_icmp_const_start(ptr %a) { ; IC4VF4-NEXT: [[RDX_SELECT:%.*]] = select i1 [[RDX_SELECT_CMP]], i64 [[TMP18]], i64 331 ; IC4VF4-NEXT: br label %[[EXIT:.*]] ; IC4VF4: [[SCALAR_PH]]: -; IC4VF4-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 19999, %[[ENTRY]] ] -; IC4VF4-NEXT: [[BC_MERGE_RDX:%.*]] = phi i64 [ 331, %[[ENTRY]] ] ; IC4VF4-NEXT: br label %[[LOOP:.*]] ; IC4VF4: [[LOOP]]: -; IC4VF4-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] -; IC4VF4-NEXT: [[RDX:%.*]] = phi i64 [ [[BC_MERGE_RDX]], %[[SCALAR_PH]] ], [ [[SPEC_SELECT:%.*]], %[[LOOP]] ] +; IC4VF4-NEXT: [[IV:%.*]] = phi i64 [ 19999, %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] +; IC4VF4-NEXT: [[RDX:%.*]] = phi i64 [ 331, %[[SCALAR_PH]] ], [ [[SPEC_SELECT:%.*]], %[[LOOP]] ] ; IC4VF4-NEXT: [[GEP_A_IV:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[IV]] ; IC4VF4-NEXT: [[LD_A:%.*]] = load i64, ptr [[GEP_A_IV]], align 8 ; IC4VF4-NEXT: [[CMP_A_3:%.*]] = icmp sgt i64 [[LD_A]], 3 @@ -123,7 +119,7 @@ define i64 @select_decreasing_induction_icmp_const_start(ptr %a) { ; ; IC4VF1-LABEL: define i64 @select_decreasing_induction_icmp_const_start( ; IC4VF1-SAME: ptr [[A:%.*]]) { -; IC4VF1-NEXT: [[ENTRY:.*]]: +; IC4VF1-NEXT: [[ENTRY:.*:]] ; IC4VF1-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] ; IC4VF1: [[VECTOR_PH]]: ; IC4VF1-NEXT: br label %[[VECTOR_BODY:.*]] @@ -164,12 +160,10 @@ define i64 @select_decreasing_induction_icmp_const_start(ptr %a) { ; IC4VF1-NEXT: [[RDX_SELECT:%.*]] = select i1 [[RDX_SELECT_CMP]], i64 [[RDX_MINMAX5]], i64 331 ; IC4VF1-NEXT: br label %[[EXIT:.*]] ; IC4VF1: [[SCALAR_PH]]: -; IC4VF1-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 19999, %[[ENTRY]] ] -; IC4VF1-NEXT: [[BC_MERGE_RDX:%.*]] = phi i64 [ 331, %[[ENTRY]] ] ; IC4VF1-NEXT: br label %[[LOOP:.*]] ; IC4VF1: [[LOOP]]: -; IC4VF1-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] -; IC4VF1-NEXT: [[RDX:%.*]] = phi i64 [ [[BC_MERGE_RDX]], %[[SCALAR_PH]] ], [ [[SPEC_SELECT:%.*]], %[[LOOP]] ] +; IC4VF1-NEXT: [[IV:%.*]] = phi i64 [ 19999, %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] +; IC4VF1-NEXT: [[RDX:%.*]] = phi i64 [ 331, %[[SCALAR_PH]] ], [ [[SPEC_SELECT:%.*]], %[[LOOP]] ] ; IC4VF1-NEXT: [[GEP_A_IV:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[IV]] ; IC4VF1-NEXT: [[LD_A:%.*]] = load i64, ptr [[GEP_A_IV]], align 8 ; IC4VF1-NEXT: [[CMP_A_3:%.*]] = icmp sgt i64 [[LD_A]], 3 @@ -204,7 +198,7 @@ exit: ; preds = %loop define i16 @select_decreasing_induction_icmp_table_i16(i16 noundef %val) { ; IC1VF4-LABEL: define i16 @select_decreasing_induction_icmp_table_i16( ; IC1VF4-SAME: i16 noundef [[VAL:%.*]]) { -; IC1VF4-NEXT: [[ENTRY:.*]]: +; IC1VF4-NEXT: [[ENTRY:.*:]] ; IC1VF4-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] ; IC1VF4: [[VECTOR_PH]]: ; IC1VF4-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i16> poison, i16 [[VAL]], i64 0 @@ -234,12 +228,10 @@ define i16 @select_decreasing_induction_icmp_table_i16(i16 noundef %val) { ; IC1VF4-NEXT: [[RDX_SELECT:%.*]] = select i1 [[RDX_SELECT_CMP]], i16 [[TMP7]], i16 0 ; IC1VF4-NEXT: br label %[[EXIT:.*]] ; IC1VF4: [[SCALAR_PH]]: -; IC1VF4-NEXT: [[BC_RESUME_VAL:%.*]] = phi i16 [ 12, %[[ENTRY]] ] -; IC1VF4-NEXT: [[BC_MERGE_RDX:%.*]] = phi i16 [ 0, %[[ENTRY]] ] ; IC1VF4-NEXT: br label %[[LOOP:.*]] ; IC1VF4: [[LOOP]]: -; IC1VF4-NEXT: [[IV:%.*]] = phi i16 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] -; IC1VF4-NEXT: [[RDX:%.*]] = phi i16 [ [[BC_MERGE_RDX]], %[[SCALAR_PH]] ], [ [[SPEC_SELECT:%.*]], %[[LOOP]] ] +; IC1VF4-NEXT: [[IV:%.*]] = phi i16 [ 12, %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] +; IC1VF4-NEXT: [[RDX:%.*]] = phi i16 [ 0, %[[SCALAR_PH]] ], [ [[SPEC_SELECT:%.*]], %[[LOOP]] ] ; IC1VF4-NEXT: [[GEP_TABLE_IV:%.*]] = getelementptr inbounds [13 x i16], ptr @table, i16 0, i16 [[IV]] ; IC1VF4-NEXT: [[LD_TABLE:%.*]] = load i16, ptr [[GEP_TABLE_IV]], align 1 ; IC1VF4-NEXT: [[CMP_TABLE_VAL:%.*]] = icmp ugt i16 [[LD_TABLE]], [[VAL]] @@ -486,7 +478,7 @@ define i16 @select_decreasing_induction_icmp_table_i16(i16 noundef %val) { ; ; IC4VF1-LABEL: define i16 @select_decreasing_induction_icmp_table_i16( ; IC4VF1-SAME: i16 noundef [[VAL:%.*]]) { -; IC4VF1-NEXT: [[ENTRY:.*]]: +; IC4VF1-NEXT: [[ENTRY:.*:]] ; IC4VF1-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] ; IC4VF1: [[VECTOR_PH]]: ; IC4VF1-NEXT: br label %[[VECTOR_BODY:.*]] @@ -532,12 +524,10 @@ define i16 @select_decreasing_induction_icmp_table_i16(i16 noundef %val) { ; IC4VF1-NEXT: [[RDX_SELECT:%.*]] = select i1 [[RDX_SELECT_CMP]], i16 [[RDX_MINMAX5]], i16 0 ; IC4VF1-NEXT: br label %[[EXIT:.*]] ; IC4VF1: [[SCALAR_PH]]: -; IC4VF1-NEXT: [[BC_RESUME_VAL:%.*]] = phi i16 [ 12, %[[ENTRY]] ] -; IC4VF1-NEXT: [[BC_MERGE_RDX:%.*]] = phi i16 [ 0, %[[ENTRY]] ] ; IC4VF1-NEXT: br label %[[LOOP:.*]] ; IC4VF1: [[LOOP]]: -; IC4VF1-NEXT: [[IV:%.*]] = phi i16 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] -; IC4VF1-NEXT: [[RDX:%.*]] = phi i16 [ [[BC_MERGE_RDX]], %[[SCALAR_PH]] ], [ [[SPEC_SELECT:%.*]], %[[LOOP]] ] +; IC4VF1-NEXT: [[IV:%.*]] = phi i16 [ 12, %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] +; IC4VF1-NEXT: [[RDX:%.*]] = phi i16 [ 0, %[[SCALAR_PH]] ], [ [[SPEC_SELECT:%.*]], %[[LOOP]] ] ; IC4VF1-NEXT: [[GEP_TABLE_IV:%.*]] = getelementptr inbounds [13 x i16], ptr @table, i16 0, i16 [[IV]] ; IC4VF1-NEXT: [[LD_TABLE:%.*]] = load i16, ptr [[GEP_TABLE_IV]], align 1 ; IC4VF1-NEXT: [[CMP_TABLE_VAL:%.*]] = icmp ugt i16 [[LD_TABLE]], [[VAL]] @@ -573,7 +563,7 @@ exit: ; preds = %loop define i16 @select_decreasing_induction_icmp_table_half(half noundef %val) { ; IC1VF4-LABEL: define i16 @select_decreasing_induction_icmp_table_half( ; IC1VF4-SAME: half noundef [[VAL:%.*]]) { -; IC1VF4-NEXT: [[ENTRY:.*]]: +; IC1VF4-NEXT: [[ENTRY:.*:]] ; IC1VF4-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] ; IC1VF4: [[VECTOR_PH]]: ; IC1VF4-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x half> poison, half [[VAL]], i64 0 @@ -603,12 +593,10 @@ define i16 @select_decreasing_induction_icmp_table_half(half noundef %val) { ; IC1VF4-NEXT: [[RDX_SELECT:%.*]] = select i1 [[RDX_SELECT_CMP]], i16 [[TMP7]], i16 0 ; IC1VF4-NEXT: br label %[[EXIT:.*]] ; IC1VF4: [[SCALAR_PH]]: -; IC1VF4-NEXT: [[BC_RESUME_VAL:%.*]] = phi i16 [ 12, %[[ENTRY]] ] -; IC1VF4-NEXT: [[BC_MERGE_RDX:%.*]] = phi i16 [ 0, %[[ENTRY]] ] ; IC1VF4-NEXT: br label %[[LOOP:.*]] ; IC1VF4: [[LOOP]]: -; IC1VF4-NEXT: [[IV:%.*]] = phi i16 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] -; IC1VF4-NEXT: [[RDX:%.*]] = phi i16 [ [[BC_MERGE_RDX]], %[[SCALAR_PH]] ], [ [[SPEC_SELECT:%.*]], %[[LOOP]] ] +; IC1VF4-NEXT: [[IV:%.*]] = phi i16 [ 12, %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] +; IC1VF4-NEXT: [[RDX:%.*]] = phi i16 [ 0, %[[SCALAR_PH]] ], [ [[SPEC_SELECT:%.*]], %[[LOOP]] ] ; IC1VF4-NEXT: [[GEP_TABLE_IV:%.*]] = getelementptr inbounds [13 x i16], ptr @table, i16 0, i16 [[IV]] ; IC1VF4-NEXT: [[LD_TABLE:%.*]] = load half, ptr [[GEP_TABLE_IV]], align 1 ; IC1VF4-NEXT: [[CMP_TABLE_VAL:%.*]] = fcmp ugt half [[LD_TABLE]], [[VAL]] @@ -855,7 +843,7 @@ define i16 @select_decreasing_induction_icmp_table_half(half noundef %val) { ; ; IC4VF1-LABEL: define i16 @select_decreasing_induction_icmp_table_half( ; IC4VF1-SAME: half noundef [[VAL:%.*]]) { -; IC4VF1-NEXT: [[ENTRY:.*]]: +; IC4VF1-NEXT: [[ENTRY:.*:]] ; IC4VF1-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] ; IC4VF1: [[VECTOR_PH]]: ; IC4VF1-NEXT: br label %[[VECTOR_BODY:.*]] @@ -901,12 +889,10 @@ define i16 @select_decreasing_induction_icmp_table_half(half noundef %val) { ; IC4VF1-NEXT: [[RDX_SELECT:%.*]] = select i1 [[RDX_SELECT_CMP]], i16 [[RDX_MINMAX5]], i16 0 ; IC4VF1-NEXT: br label %[[EXIT:.*]] ; IC4VF1: [[SCALAR_PH]]: -; IC4VF1-NEXT: [[BC_RESUME_VAL:%.*]] = phi i16 [ 12, %[[ENTRY]] ] -; IC4VF1-NEXT: [[BC_MERGE_RDX:%.*]] = phi i16 [ 0, %[[ENTRY]] ] ; IC4VF1-NEXT: br label %[[LOOP:.*]] ; IC4VF1: [[LOOP]]: -; IC4VF1-NEXT: [[IV:%.*]] = phi i16 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] -; IC4VF1-NEXT: [[RDX:%.*]] = phi i16 [ [[BC_MERGE_RDX]], %[[SCALAR_PH]] ], [ [[SPEC_SELECT:%.*]], %[[LOOP]] ] +; IC4VF1-NEXT: [[IV:%.*]] = phi i16 [ 12, %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] +; IC4VF1-NEXT: [[RDX:%.*]] = phi i16 [ 0, %[[SCALAR_PH]] ], [ [[SPEC_SELECT:%.*]], %[[LOOP]] ] ; IC4VF1-NEXT: [[GEP_TABLE_IV:%.*]] = getelementptr inbounds [13 x i16], ptr @table, i16 0, i16 [[IV]] ; IC4VF1-NEXT: [[LD_TABLE:%.*]] = load half, ptr [[GEP_TABLE_IV]], align 1 ; IC4VF1-NEXT: [[CMP_TABLE_VAL:%.*]] = fcmp ugt half [[LD_TABLE]], [[VAL]] @@ -943,7 +929,7 @@ exit: ; preds = %loop define i64 @select_decreasing_induction_icmp_iv_unsigned(ptr %a) { ; IC1VF4-LABEL: define i64 @select_decreasing_induction_icmp_iv_unsigned( ; IC1VF4-SAME: ptr [[A:%.*]]) { -; IC1VF4-NEXT: [[ENTRY:.*]]: +; IC1VF4-NEXT: [[ENTRY:.*:]] ; IC1VF4-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] ; IC1VF4: [[VECTOR_PH]]: ; IC1VF4-NEXT: br label %[[VECTOR_BODY:.*]] @@ -969,12 +955,10 @@ define i64 @select_decreasing_induction_icmp_iv_unsigned(ptr %a) { ; IC1VF4-NEXT: [[RDX_SELECT:%.*]] = select i1 [[RDX_SELECT_CMP]], i64 [[TMP6]], i64 331 ; IC1VF4-NEXT: br label %[[EXIT:.*]] ; IC1VF4: [[SCALAR_PH]]: -; IC1VF4-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 9223372036854775807, %[[ENTRY]] ] -; IC1VF4-NEXT: [[BC_MERGE_RDX:%.*]] = phi i64 [ 331, %[[ENTRY]] ] ; IC1VF4-NEXT: br label %[[LOOP:.*]] ; IC1VF4: [[LOOP]]: -; IC1VF4-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] -; IC1VF4-NEXT: [[RDX:%.*]] = phi i64 [ [[BC_MERGE_RDX]], %[[SCALAR_PH]] ], [ [[SPEC_SELECT:%.*]], %[[LOOP]] ] +; IC1VF4-NEXT: [[IV:%.*]] = phi i64 [ 9223372036854775807, %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] +; IC1VF4-NEXT: [[RDX:%.*]] = phi i64 [ 331, %[[SCALAR_PH]] ], [ [[SPEC_SELECT:%.*]], %[[LOOP]] ] ; IC1VF4-NEXT: [[GEP_A_IV:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[IV]] ; IC1VF4-NEXT: [[LD_A:%.*]] = load i64, ptr [[GEP_A_IV]], align 8 ; IC1VF4-NEXT: [[CMP_A_3:%.*]] = icmp sgt i64 [[LD_A]], 3 @@ -988,7 +972,7 @@ define i64 @select_decreasing_induction_icmp_iv_unsigned(ptr %a) { ; ; IC4VF4-LABEL: define i64 @select_decreasing_induction_icmp_iv_unsigned( ; IC4VF4-SAME: ptr [[A:%.*]]) { -; IC4VF4-NEXT: [[ENTRY:.*]]: +; IC4VF4-NEXT: [[ENTRY:.*:]] ; IC4VF4-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] ; IC4VF4: [[VECTOR_PH]]: ; IC4VF4-NEXT: br label %[[VECTOR_BODY:.*]] @@ -1041,12 +1025,10 @@ define i64 @select_decreasing_induction_icmp_iv_unsigned(ptr %a) { ; IC4VF4-NEXT: [[RDX_SELECT:%.*]] = select i1 [[RDX_SELECT_CMP]], i64 [[TMP18]], i64 331 ; IC4VF4-NEXT: br label %[[EXIT:.*]] ; IC4VF4: [[SCALAR_PH]]: -; IC4VF4-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 9223372036854775807, %[[ENTRY]] ] -; IC4VF4-NEXT: [[BC_MERGE_RDX:%.*]] = phi i64 [ 331, %[[ENTRY]] ] ; IC4VF4-NEXT: br label %[[LOOP:.*]] ; IC4VF4: [[LOOP]]: -; IC4VF4-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] -; IC4VF4-NEXT: [[RDX:%.*]] = phi i64 [ [[BC_MERGE_RDX]], %[[SCALAR_PH]] ], [ [[SPEC_SELECT:%.*]], %[[LOOP]] ] +; IC4VF4-NEXT: [[IV:%.*]] = phi i64 [ 9223372036854775807, %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] +; IC4VF4-NEXT: [[RDX:%.*]] = phi i64 [ 331, %[[SCALAR_PH]] ], [ [[SPEC_SELECT:%.*]], %[[LOOP]] ] ; IC4VF4-NEXT: [[GEP_A_IV:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[IV]] ; IC4VF4-NEXT: [[LD_A:%.*]] = load i64, ptr [[GEP_A_IV]], align 8 ; IC4VF4-NEXT: [[CMP_A_3:%.*]] = icmp sgt i64 [[LD_A]], 3 @@ -1060,7 +1042,7 @@ define i64 @select_decreasing_induction_icmp_iv_unsigned(ptr %a) { ; ; IC4VF1-LABEL: define i64 @select_decreasing_induction_icmp_iv_unsigned( ; IC4VF1-SAME: ptr [[A:%.*]]) { -; IC4VF1-NEXT: [[ENTRY:.*]]: +; IC4VF1-NEXT: [[ENTRY:.*:]] ; IC4VF1-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] ; IC4VF1: [[VECTOR_PH]]: ; IC4VF1-NEXT: br label %[[VECTOR_BODY:.*]] @@ -1101,12 +1083,10 @@ define i64 @select_decreasing_induction_icmp_iv_unsigned(ptr %a) { ; IC4VF1-NEXT: [[RDX_SELECT:%.*]] = select i1 [[RDX_SELECT_CMP]], i64 [[RDX_MINMAX5]], i64 331 ; IC4VF1-NEXT: br label %[[EXIT:.*]] ; IC4VF1: [[SCALAR_PH]]: -; IC4VF1-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 9223372036854775807, %[[ENTRY]] ] -; IC4VF1-NEXT: [[BC_MERGE_RDX:%.*]] = phi i64 [ 331, %[[ENTRY]] ] ; IC4VF1-NEXT: br label %[[LOOP:.*]] ; IC4VF1: [[LOOP]]: -; IC4VF1-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] -; IC4VF1-NEXT: [[RDX:%.*]] = phi i64 [ [[BC_MERGE_RDX]], %[[SCALAR_PH]] ], [ [[SPEC_SELECT:%.*]], %[[LOOP]] ] +; IC4VF1-NEXT: [[IV:%.*]] = phi i64 [ 9223372036854775807, %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] +; IC4VF1-NEXT: [[RDX:%.*]] = phi i64 [ 331, %[[SCALAR_PH]] ], [ [[SPEC_SELECT:%.*]], %[[LOOP]] ] ; IC4VF1-NEXT: [[GEP_A_IV:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[IV]] ; IC4VF1-NEXT: [[LD_A:%.*]] = load i64, ptr [[GEP_A_IV]], align 8 ; IC4VF1-NEXT: [[CMP_A_3:%.*]] = icmp sgt i64 [[LD_A]], 3 diff --git a/llvm/test/Transforms/LoopVectorize/iv-select-cmp-trunc.ll b/llvm/test/Transforms/LoopVectorize/iv-select-cmp-trunc.ll index 1054482fb80d..80c5bb359cb4 100644 --- a/llvm/test/Transforms/LoopVectorize/iv-select-cmp-trunc.ll +++ b/llvm/test/Transforms/LoopVectorize/iv-select-cmp-trunc.ll @@ -240,7 +240,7 @@ exit: ; preds = %for.body, %entry define i32 @select_icmp_const_truncated_iv_const_exit(ptr %a) { ; CHECK-VF4IC1-LABEL: define i32 @select_icmp_const_truncated_iv_const_exit( ; CHECK-VF4IC1-SAME: ptr [[A:%.*]]) { -; CHECK-VF4IC1-NEXT: [[ENTRY:.*]]: +; CHECK-VF4IC1-NEXT: [[ENTRY:.*:]] ; CHECK-VF4IC1-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] ; CHECK-VF4IC1: [[VECTOR_PH]]: ; CHECK-VF4IC1-NEXT: br label %[[VECTOR_BODY:.*]] @@ -262,12 +262,10 @@ define i32 @select_icmp_const_truncated_iv_const_exit(ptr %a) { ; CHECK-VF4IC1-NEXT: [[RDX_SELECT:%.*]] = select i1 [[RDX_SELECT_CMP]], i32 [[TMP6]], i32 331 ; CHECK-VF4IC1-NEXT: br label %[[EXIT:.*]] ; CHECK-VF4IC1: [[SCALAR_PH]]: -; CHECK-VF4IC1-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, %[[ENTRY]] ] -; CHECK-VF4IC1-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ 331, %[[ENTRY]] ] ; CHECK-VF4IC1-NEXT: br label %[[FOR_BODY:.*]] ; CHECK-VF4IC1: [[FOR_BODY]]: -; CHECK-VF4IC1-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[INC:%.*]], %[[FOR_BODY]] ] -; CHECK-VF4IC1-NEXT: [[RDX:%.*]] = phi i32 [ [[BC_MERGE_RDX]], %[[SCALAR_PH]] ], [ [[SPEC_SELECT:%.*]], %[[FOR_BODY]] ] +; CHECK-VF4IC1-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[SCALAR_PH]] ], [ [[INC:%.*]], %[[FOR_BODY]] ] +; CHECK-VF4IC1-NEXT: [[RDX:%.*]] = phi i32 [ 331, %[[SCALAR_PH]] ], [ [[SPEC_SELECT:%.*]], %[[FOR_BODY]] ] ; CHECK-VF4IC1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[IV]] ; CHECK-VF4IC1-NEXT: [[TMP7:%.*]] = load i64, ptr [[ARRAYIDX]], align 8 ; CHECK-VF4IC1-NEXT: [[CMP:%.*]] = icmp sgt i64 [[TMP7]], 3 @@ -282,7 +280,7 @@ define i32 @select_icmp_const_truncated_iv_const_exit(ptr %a) { ; ; CHECK-VF4IC4-LABEL: define i32 @select_icmp_const_truncated_iv_const_exit( ; CHECK-VF4IC4-SAME: ptr [[A:%.*]]) { -; CHECK-VF4IC4-NEXT: [[ENTRY:.*]]: +; CHECK-VF4IC4-NEXT: [[ENTRY:.*:]] ; CHECK-VF4IC4-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] ; CHECK-VF4IC4: [[VECTOR_PH]]: ; CHECK-VF4IC4-NEXT: br label %[[VECTOR_BODY:.*]] @@ -325,12 +323,10 @@ define i32 @select_icmp_const_truncated_iv_const_exit(ptr %a) { ; CHECK-VF4IC4-NEXT: [[RDX_SELECT:%.*]] = select i1 [[RDX_SELECT_CMP]], i32 [[TMP15]], i32 331 ; CHECK-VF4IC4-NEXT: br label %[[EXIT:.*]] ; CHECK-VF4IC4: [[SCALAR_PH]]: -; CHECK-VF4IC4-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, %[[ENTRY]] ] -; CHECK-VF4IC4-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ 331, %[[ENTRY]] ] ; CHECK-VF4IC4-NEXT: br label %[[FOR_BODY:.*]] ; CHECK-VF4IC4: [[FOR_BODY]]: -; CHECK-VF4IC4-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[INC:%.*]], %[[FOR_BODY]] ] -; CHECK-VF4IC4-NEXT: [[RDX:%.*]] = phi i32 [ [[BC_MERGE_RDX]], %[[SCALAR_PH]] ], [ [[SPEC_SELECT:%.*]], %[[FOR_BODY]] ] +; CHECK-VF4IC4-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[SCALAR_PH]] ], [ [[INC:%.*]], %[[FOR_BODY]] ] +; CHECK-VF4IC4-NEXT: [[RDX:%.*]] = phi i32 [ 331, %[[SCALAR_PH]] ], [ [[SPEC_SELECT:%.*]], %[[FOR_BODY]] ] ; CHECK-VF4IC4-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[IV]] ; CHECK-VF4IC4-NEXT: [[TMP16:%.*]] = load i64, ptr [[ARRAYIDX]], align 8 ; CHECK-VF4IC4-NEXT: [[CMP:%.*]] = icmp sgt i64 [[TMP16]], 3 @@ -345,7 +341,7 @@ define i32 @select_icmp_const_truncated_iv_const_exit(ptr %a) { ; ; CHECK-VF1IC4-LABEL: define i32 @select_icmp_const_truncated_iv_const_exit( ; CHECK-VF1IC4-SAME: ptr [[A:%.*]]) { -; CHECK-VF1IC4-NEXT: [[ENTRY:.*]]: +; CHECK-VF1IC4-NEXT: [[ENTRY:.*:]] ; CHECK-VF1IC4-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] ; CHECK-VF1IC4: [[VECTOR_PH]]: ; CHECK-VF1IC4-NEXT: br label %[[VECTOR_BODY:.*]] @@ -389,12 +385,10 @@ define i32 @select_icmp_const_truncated_iv_const_exit(ptr %a) { ; CHECK-VF1IC4-NEXT: [[RDX_SELECT:%.*]] = select i1 [[RDX_SELECT_CMP]], i32 [[RDX_MINMAX5]], i32 331 ; CHECK-VF1IC4-NEXT: br label %[[EXIT:.*]] ; CHECK-VF1IC4: [[SCALAR_PH]]: -; CHECK-VF1IC4-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, %[[ENTRY]] ] -; CHECK-VF1IC4-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ 331, %[[ENTRY]] ] ; CHECK-VF1IC4-NEXT: br label %[[FOR_BODY:.*]] ; CHECK-VF1IC4: [[FOR_BODY]]: -; CHECK-VF1IC4-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[INC:%.*]], %[[FOR_BODY]] ] -; CHECK-VF1IC4-NEXT: [[RDX:%.*]] = phi i32 [ [[BC_MERGE_RDX]], %[[SCALAR_PH]] ], [ [[SPEC_SELECT:%.*]], %[[FOR_BODY]] ] +; CHECK-VF1IC4-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[SCALAR_PH]] ], [ [[INC:%.*]], %[[FOR_BODY]] ] +; CHECK-VF1IC4-NEXT: [[RDX:%.*]] = phi i32 [ 331, %[[SCALAR_PH]] ], [ [[SPEC_SELECT:%.*]], %[[FOR_BODY]] ] ; CHECK-VF1IC4-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[IV]] ; CHECK-VF1IC4-NEXT: [[TMP26:%.*]] = load i64, ptr [[ARRAYIDX]], align 8 ; CHECK-VF1IC4-NEXT: [[CMP:%.*]] = icmp sgt i64 [[TMP26]], 3 @@ -431,7 +425,7 @@ exit: ; preds = %for.body define i32 @select_fcmp_max_valid_const_ub(ptr %a) { ; CHECK-VF4IC1-LABEL: define i32 @select_fcmp_max_valid_const_ub( ; CHECK-VF4IC1-SAME: ptr [[A:%.*]]) { -; CHECK-VF4IC1-NEXT: [[ENTRY:.*]]: +; CHECK-VF4IC1-NEXT: [[ENTRY:.*:]] ; CHECK-VF4IC1-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] ; CHECK-VF4IC1: [[VECTOR_PH]]: ; CHECK-VF4IC1-NEXT: br label %[[VECTOR_BODY:.*]] @@ -453,12 +447,10 @@ define i32 @select_fcmp_max_valid_const_ub(ptr %a) { ; CHECK-VF4IC1-NEXT: [[RDX_SELECT:%.*]] = select i1 [[RDX_SELECT_CMP]], i32 [[TMP6]], i32 -1 ; CHECK-VF4IC1-NEXT: br label %[[EXIT:.*]] ; CHECK-VF4IC1: [[SCALAR_PH]]: -; CHECK-VF4IC1-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, %[[ENTRY]] ] -; CHECK-VF4IC1-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ -1, %[[ENTRY]] ] ; CHECK-VF4IC1-NEXT: br label %[[FOR_BODY:.*]] ; CHECK-VF4IC1: [[FOR_BODY]]: -; CHECK-VF4IC1-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[INC:%.*]], %[[FOR_BODY]] ] -; CHECK-VF4IC1-NEXT: [[RDX:%.*]] = phi i32 [ [[BC_MERGE_RDX]], %[[SCALAR_PH]] ], [ [[SPEC_SELECT:%.*]], %[[FOR_BODY]] ] +; CHECK-VF4IC1-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[SCALAR_PH]] ], [ [[INC:%.*]], %[[FOR_BODY]] ] +; CHECK-VF4IC1-NEXT: [[RDX:%.*]] = phi i32 [ -1, %[[SCALAR_PH]] ], [ [[SPEC_SELECT:%.*]], %[[FOR_BODY]] ] ; CHECK-VF4IC1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[IV]] ; CHECK-VF4IC1-NEXT: [[TMP7:%.*]] = load float, ptr [[ARRAYIDX]], align 4 ; CHECK-VF4IC1-NEXT: [[CMP:%.*]] = fcmp fast olt float [[TMP7]], 0.000000e+00 @@ -473,7 +465,7 @@ define i32 @select_fcmp_max_valid_const_ub(ptr %a) { ; ; CHECK-VF4IC4-LABEL: define i32 @select_fcmp_max_valid_const_ub( ; CHECK-VF4IC4-SAME: ptr [[A:%.*]]) { -; CHECK-VF4IC4-NEXT: [[ENTRY:.*]]: +; CHECK-VF4IC4-NEXT: [[ENTRY:.*:]] ; CHECK-VF4IC4-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] ; CHECK-VF4IC4: [[VECTOR_PH]]: ; CHECK-VF4IC4-NEXT: br label %[[VECTOR_BODY:.*]] @@ -516,12 +508,10 @@ define i32 @select_fcmp_max_valid_const_ub(ptr %a) { ; CHECK-VF4IC4-NEXT: [[RDX_SELECT:%.*]] = select i1 [[RDX_SELECT_CMP]], i32 [[TMP15]], i32 -1 ; CHECK-VF4IC4-NEXT: br label %[[EXIT:.*]] ; CHECK-VF4IC4: [[SCALAR_PH]]: -; CHECK-VF4IC4-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, %[[ENTRY]] ] -; CHECK-VF4IC4-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ -1, %[[ENTRY]] ] ; CHECK-VF4IC4-NEXT: br label %[[FOR_BODY:.*]] ; CHECK-VF4IC4: [[FOR_BODY]]: -; CHECK-VF4IC4-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[INC:%.*]], %[[FOR_BODY]] ] -; CHECK-VF4IC4-NEXT: [[RDX:%.*]] = phi i32 [ [[BC_MERGE_RDX]], %[[SCALAR_PH]] ], [ [[SPEC_SELECT:%.*]], %[[FOR_BODY]] ] +; CHECK-VF4IC4-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[SCALAR_PH]] ], [ [[INC:%.*]], %[[FOR_BODY]] ] +; CHECK-VF4IC4-NEXT: [[RDX:%.*]] = phi i32 [ -1, %[[SCALAR_PH]] ], [ [[SPEC_SELECT:%.*]], %[[FOR_BODY]] ] ; CHECK-VF4IC4-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[IV]] ; CHECK-VF4IC4-NEXT: [[TMP16:%.*]] = load float, ptr [[ARRAYIDX]], align 4 ; CHECK-VF4IC4-NEXT: [[CMP:%.*]] = fcmp fast olt float [[TMP16]], 0.000000e+00 @@ -536,7 +526,7 @@ define i32 @select_fcmp_max_valid_const_ub(ptr %a) { ; ; CHECK-VF1IC4-LABEL: define i32 @select_fcmp_max_valid_const_ub( ; CHECK-VF1IC4-SAME: ptr [[A:%.*]]) { -; CHECK-VF1IC4-NEXT: [[ENTRY:.*]]: +; CHECK-VF1IC4-NEXT: [[ENTRY:.*:]] ; CHECK-VF1IC4-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] ; CHECK-VF1IC4: [[VECTOR_PH]]: ; CHECK-VF1IC4-NEXT: br label %[[VECTOR_BODY:.*]] @@ -580,12 +570,10 @@ define i32 @select_fcmp_max_valid_const_ub(ptr %a) { ; CHECK-VF1IC4-NEXT: [[RDX_SELECT:%.*]] = select i1 [[RDX_SELECT_CMP]], i32 [[RDX_MINMAX5]], i32 -1 ; CHECK-VF1IC4-NEXT: br label %[[EXIT:.*]] ; CHECK-VF1IC4: [[SCALAR_PH]]: -; CHECK-VF1IC4-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, %[[ENTRY]] ] -; CHECK-VF1IC4-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ -1, %[[ENTRY]] ] ; CHECK-VF1IC4-NEXT: br label %[[FOR_BODY:.*]] ; CHECK-VF1IC4: [[FOR_BODY]]: -; CHECK-VF1IC4-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[INC:%.*]], %[[FOR_BODY]] ] -; CHECK-VF1IC4-NEXT: [[RDX:%.*]] = phi i32 [ [[BC_MERGE_RDX]], %[[SCALAR_PH]] ], [ [[SPEC_SELECT:%.*]], %[[FOR_BODY]] ] +; CHECK-VF1IC4-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[SCALAR_PH]] ], [ [[INC:%.*]], %[[FOR_BODY]] ] +; CHECK-VF1IC4-NEXT: [[RDX:%.*]] = phi i32 [ -1, %[[SCALAR_PH]] ], [ [[SPEC_SELECT:%.*]], %[[FOR_BODY]] ] ; CHECK-VF1IC4-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[IV]] ; CHECK-VF1IC4-NEXT: [[TMP26:%.*]] = load float, ptr [[ARRAYIDX]], align 4 ; CHECK-VF1IC4-NEXT: [[CMP:%.*]] = fcmp fast olt float [[TMP26]], 0.000000e+00 @@ -626,7 +614,7 @@ exit: ; preds = %for.body define i32 @select_icmp_truncated_unsigned_iv_range(ptr %a) { ; CHECK-VF4IC1-LABEL: define i32 @select_icmp_truncated_unsigned_iv_range( ; CHECK-VF4IC1-SAME: ptr [[A:%.*]]) { -; CHECK-VF4IC1-NEXT: [[ENTRY:.*]]: +; CHECK-VF4IC1-NEXT: [[ENTRY:.*:]] ; CHECK-VF4IC1-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] ; CHECK-VF4IC1: [[VECTOR_PH]]: ; CHECK-VF4IC1-NEXT: br label %[[VECTOR_BODY:.*]] @@ -649,12 +637,10 @@ define i32 @select_icmp_truncated_unsigned_iv_range(ptr %a) { ; CHECK-VF4IC1-NEXT: [[RDX_SELECT:%.*]] = select i1 [[RDX_SELECT_CMP]], i32 [[TMP5]], i32 331 ; CHECK-VF4IC1-NEXT: br label %[[EXIT:.*]] ; CHECK-VF4IC1: [[SCALAR_PH]]: -; CHECK-VF4IC1-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 2147483646, %[[ENTRY]] ] -; CHECK-VF4IC1-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ 331, %[[ENTRY]] ] ; CHECK-VF4IC1-NEXT: br label %[[FOR_BODY:.*]] ; CHECK-VF4IC1: [[FOR_BODY]]: -; CHECK-VF4IC1-NEXT: [[IV1:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[INC:%.*]], %[[FOR_BODY]] ] -; CHECK-VF4IC1-NEXT: [[RDX:%.*]] = phi i32 [ [[BC_MERGE_RDX]], %[[SCALAR_PH]] ], [ [[SPEC_SELECT:%.*]], %[[FOR_BODY]] ] +; CHECK-VF4IC1-NEXT: [[IV1:%.*]] = phi i64 [ 2147483646, %[[SCALAR_PH]] ], [ [[INC:%.*]], %[[FOR_BODY]] ] +; CHECK-VF4IC1-NEXT: [[RDX:%.*]] = phi i32 [ 331, %[[SCALAR_PH]] ], [ [[SPEC_SELECT:%.*]], %[[FOR_BODY]] ] ; CHECK-VF4IC1-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[IV1]] ; CHECK-VF4IC1-NEXT: [[TMP0:%.*]] = load i32, ptr [[ARRAYIDX1]], align 4 ; CHECK-VF4IC1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP0]], 3 @@ -669,7 +655,7 @@ define i32 @select_icmp_truncated_unsigned_iv_range(ptr %a) { ; ; CHECK-VF4IC4-LABEL: define i32 @select_icmp_truncated_unsigned_iv_range( ; CHECK-VF4IC4-SAME: ptr [[A:%.*]]) { -; CHECK-VF4IC4-NEXT: [[ENTRY:.*]]: +; CHECK-VF4IC4-NEXT: [[ENTRY:.*:]] ; CHECK-VF4IC4-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] ; CHECK-VF4IC4: [[VECTOR_PH]]: ; CHECK-VF4IC4-NEXT: br label %[[VECTOR_BODY:.*]] @@ -713,12 +699,10 @@ define i32 @select_icmp_truncated_unsigned_iv_range(ptr %a) { ; CHECK-VF4IC4-NEXT: [[RDX_SELECT:%.*]] = select i1 [[RDX_SELECT_CMP]], i32 [[TMP14]], i32 331 ; CHECK-VF4IC4-NEXT: br label %[[EXIT:.*]] ; CHECK-VF4IC4: [[SCALAR_PH]]: -; CHECK-VF4IC4-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 2147483646, %[[ENTRY]] ] -; CHECK-VF4IC4-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ 331, %[[ENTRY]] ] ; CHECK-VF4IC4-NEXT: br label %[[FOR_BODY:.*]] ; CHECK-VF4IC4: [[FOR_BODY]]: -; CHECK-VF4IC4-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[INC:%.*]], %[[FOR_BODY]] ] -; CHECK-VF4IC4-NEXT: [[RDX:%.*]] = phi i32 [ [[BC_MERGE_RDX]], %[[SCALAR_PH]] ], [ [[SPEC_SELECT:%.*]], %[[FOR_BODY]] ] +; CHECK-VF4IC4-NEXT: [[IV:%.*]] = phi i64 [ 2147483646, %[[SCALAR_PH]] ], [ [[INC:%.*]], %[[FOR_BODY]] ] +; CHECK-VF4IC4-NEXT: [[RDX:%.*]] = phi i32 [ 331, %[[SCALAR_PH]] ], [ [[SPEC_SELECT:%.*]], %[[FOR_BODY]] ] ; CHECK-VF4IC4-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[IV]] ; CHECK-VF4IC4-NEXT: [[TMP15:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 ; CHECK-VF4IC4-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP15]], 3 @@ -733,7 +717,7 @@ define i32 @select_icmp_truncated_unsigned_iv_range(ptr %a) { ; ; CHECK-VF1IC4-LABEL: define i32 @select_icmp_truncated_unsigned_iv_range( ; CHECK-VF1IC4-SAME: ptr [[A:%.*]]) { -; CHECK-VF1IC4-NEXT: [[ENTRY:.*]]: +; CHECK-VF1IC4-NEXT: [[ENTRY:.*:]] ; CHECK-VF1IC4-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] ; CHECK-VF1IC4: [[VECTOR_PH]]: ; CHECK-VF1IC4-NEXT: br label %[[VECTOR_BODY:.*]] @@ -779,12 +763,10 @@ define i32 @select_icmp_truncated_unsigned_iv_range(ptr %a) { ; CHECK-VF1IC4-NEXT: [[RDX_SELECT:%.*]] = select i1 [[RDX_SELECT_CMP]], i32 [[RDX_MINMAX6]], i32 331 ; CHECK-VF1IC4-NEXT: br label %[[EXIT:.*]] ; CHECK-VF1IC4: [[SCALAR_PH]]: -; CHECK-VF1IC4-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 2147483646, %[[ENTRY]] ] -; CHECK-VF1IC4-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ 331, %[[ENTRY]] ] ; CHECK-VF1IC4-NEXT: br label %[[FOR_BODY:.*]] ; CHECK-VF1IC4: [[FOR_BODY]]: -; CHECK-VF1IC4-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[INC:%.*]], %[[FOR_BODY]] ] -; CHECK-VF1IC4-NEXT: [[RDX:%.*]] = phi i32 [ [[BC_MERGE_RDX]], %[[SCALAR_PH]] ], [ [[SPEC_SELECT:%.*]], %[[FOR_BODY]] ] +; CHECK-VF1IC4-NEXT: [[IV:%.*]] = phi i64 [ 2147483646, %[[SCALAR_PH]] ], [ [[INC:%.*]], %[[FOR_BODY]] ] +; CHECK-VF1IC4-NEXT: [[RDX:%.*]] = phi i32 [ 331, %[[SCALAR_PH]] ], [ [[SPEC_SELECT:%.*]], %[[FOR_BODY]] ] ; CHECK-VF1IC4-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[IV]] ; CHECK-VF1IC4-NEXT: [[TMP24:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 ; CHECK-VF1IC4-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP24]], 3 diff --git a/llvm/test/Transforms/LoopVectorize/iv_outside_user.ll b/llvm/test/Transforms/LoopVectorize/iv_outside_user.ll index 97d33858bd83..766e7acdfd1c 100644 --- a/llvm/test/Transforms/LoopVectorize/iv_outside_user.ll +++ b/llvm/test/Transforms/LoopVectorize/iv_outside_user.ll @@ -91,7 +91,7 @@ for.end: define i32 @constpre() { ; CHECK-LABEL: define i32 @constpre() { -; CHECK-NEXT: [[ENTRY:.*]]: +; CHECK-NEXT: [[ENTRY:.*:]] ; CHECK-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] ; CHECK: [[VECTOR_PH]]: ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] @@ -103,10 +103,9 @@ define i32 @constpre() { ; CHECK: [[MIDDLE_BLOCK]]: ; CHECK-NEXT: br label %[[FOR_END:.*]] ; CHECK: [[SCALAR_PH]]: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ 32, %[[ENTRY]] ] ; CHECK-NEXT: br label %[[FOR_BODY:.*]] ; CHECK: [[FOR_BODY]]: -; CHECK-NEXT: [[INC_PHI:%.*]] = phi i32 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[INC:%.*]], %[[FOR_BODY]] ] +; CHECK-NEXT: [[INC_PHI:%.*]] = phi i32 [ 32, %[[SCALAR_PH]] ], [ [[INC:%.*]], %[[FOR_BODY]] ] ; CHECK-NEXT: [[INC]] = sub nsw i32 [[INC_PHI]], 2 ; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[INC]], 0 ; CHECK-NEXT: br i1 [[CMP]], label %[[FOR_END]], label %[[FOR_BODY]], {{!llvm.loop ![0-9]+}} @@ -130,7 +129,7 @@ for.end: define ptr @geppre(ptr %ptr) { ; CHECK-LABEL: define ptr @geppre( ; CHECK-SAME: ptr [[PTR:%.*]]) { -; CHECK-NEXT: [[ENTRY:.*]]: +; CHECK-NEXT: [[ENTRY:.*:]] ; CHECK-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] ; CHECK: [[VECTOR_PH]]: ; CHECK-NEXT: [[TMP0:%.*]] = getelementptr i8, ptr [[PTR]], i64 512 @@ -144,12 +143,10 @@ define ptr @geppre(ptr %ptr) { ; CHECK-NEXT: [[IND_ESCAPE:%.*]] = getelementptr i8, ptr [[TMP0]], i64 -16 ; CHECK-NEXT: br label %[[FOR_END:.*]] ; CHECK: [[SCALAR_PH]]: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ 0, %[[ENTRY]] ] -; CHECK-NEXT: [[BC_RESUME_VAL1:%.*]] = phi ptr [ [[PTR]], %[[ENTRY]] ] ; CHECK-NEXT: br label %[[FOR_BODY:.*]] ; CHECK: [[FOR_BODY]]: -; CHECK-NEXT: [[INC_PHI:%.*]] = phi i32 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[INC:%.*]], %[[FOR_BODY]] ] -; CHECK-NEXT: [[PTR_PHI:%.*]] = phi ptr [ [[BC_RESUME_VAL1]], %[[SCALAR_PH]] ], [ [[INC_PTR:%.*]], %[[FOR_BODY]] ] +; CHECK-NEXT: [[INC_PHI:%.*]] = phi i32 [ 0, %[[SCALAR_PH]] ], [ [[INC:%.*]], %[[FOR_BODY]] ] +; CHECK-NEXT: [[PTR_PHI:%.*]] = phi ptr [ [[PTR]], %[[SCALAR_PH]] ], [ [[INC_PTR:%.*]], %[[FOR_BODY]] ] ; CHECK-NEXT: [[INC]] = add nsw i32 [[INC_PHI]], 1 ; CHECK-NEXT: [[INC_PTR]] = getelementptr i32, ptr [[PTR_PHI]], i32 4 ; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[INC]], 32 @@ -399,7 +396,7 @@ BB4: define i64 @iv_scalar_steps_and_outside_users(ptr %ptr) { ; VEC-LABEL: define i64 @iv_scalar_steps_and_outside_users( ; VEC-SAME: ptr [[PTR:%.*]]) { -; VEC-NEXT: [[ENTRY:.*]]: +; VEC-NEXT: [[ENTRY:.*:]] ; VEC-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] ; VEC: [[VECTOR_PH]]: ; VEC-NEXT: br label %[[VECTOR_BODY:.*]] @@ -415,10 +412,9 @@ define i64 @iv_scalar_steps_and_outside_users(ptr %ptr) { ; VEC: [[MIDDLE_BLOCK]]: ; VEC-NEXT: br label %[[EXIT:.*]] ; VEC: [[SCALAR_PH]]: -; VEC-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, %[[ENTRY]] ] ; VEC-NEXT: br label %[[LOOP:.*]] ; VEC: [[LOOP]]: -; VEC-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] +; VEC-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] ; VEC-NEXT: [[IV_NEXT]] = add nuw i64 [[IV]], 1 ; VEC-NEXT: [[GEP_PTR:%.*]] = getelementptr inbounds i64, ptr [[PTR]], i64 [[IV]] ; VEC-NEXT: store i64 [[IV]], ptr [[GEP_PTR]], align 4 @@ -430,7 +426,7 @@ define i64 @iv_scalar_steps_and_outside_users(ptr %ptr) { ; ; INTERLEAVE-LABEL: define i64 @iv_scalar_steps_and_outside_users( ; INTERLEAVE-SAME: ptr [[PTR:%.*]]) { -; INTERLEAVE-NEXT: [[ENTRY:.*]]: +; INTERLEAVE-NEXT: [[ENTRY:.*:]] ; INTERLEAVE-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] ; INTERLEAVE: [[VECTOR_PH]]: ; INTERLEAVE-NEXT: br label %[[VECTOR_BODY:.*]] @@ -447,10 +443,9 @@ define i64 @iv_scalar_steps_and_outside_users(ptr %ptr) { ; INTERLEAVE: [[MIDDLE_BLOCK]]: ; INTERLEAVE-NEXT: br label %[[EXIT:.*]] ; INTERLEAVE: [[SCALAR_PH]]: -; INTERLEAVE-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, %[[ENTRY]] ] ; INTERLEAVE-NEXT: br label %[[LOOP:.*]] ; INTERLEAVE: [[LOOP]]: -; INTERLEAVE-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] +; INTERLEAVE-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] ; INTERLEAVE-NEXT: [[IV_NEXT]] = add nuw i64 [[IV]], 1 ; INTERLEAVE-NEXT: [[GEP_PTR:%.*]] = getelementptr inbounds i64, ptr [[PTR]], i64 [[IV]] ; INTERLEAVE-NEXT: store i64 [[IV]], ptr [[GEP_PTR]], align 4 @@ -481,7 +476,7 @@ exit: define i32 @iv_2_dead_in_loop_only_used_outside(ptr %ptr) { ; VEC-LABEL: define i32 @iv_2_dead_in_loop_only_used_outside( ; VEC-SAME: ptr [[PTR:%.*]]) { -; VEC-NEXT: [[ENTRY:.*]]: +; VEC-NEXT: [[ENTRY:.*:]] ; VEC-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] ; VEC: [[VECTOR_PH]]: ; VEC-NEXT: br label %[[VECTOR_BODY:.*]] @@ -497,12 +492,10 @@ define i32 @iv_2_dead_in_loop_only_used_outside(ptr %ptr) { ; VEC: [[MIDDLE_BLOCK]]: ; VEC-NEXT: br label %[[EXIT:.*]] ; VEC: [[SCALAR_PH]]: -; VEC-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, %[[ENTRY]] ] -; VEC-NEXT: [[BC_RESUME_VAL1:%.*]] = phi i32 [ 0, %[[ENTRY]] ] ; VEC-NEXT: br label %[[LOOP:.*]] ; VEC: [[LOOP]]: -; VEC-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] -; VEC-NEXT: [[IV_2:%.*]] = phi i32 [ [[BC_RESUME_VAL1]], %[[SCALAR_PH]] ], [ [[IV_2_NEXT:%.*]], %[[LOOP]] ] +; VEC-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] +; VEC-NEXT: [[IV_2:%.*]] = phi i32 [ 0, %[[SCALAR_PH]] ], [ [[IV_2_NEXT:%.*]], %[[LOOP]] ] ; VEC-NEXT: [[IV_NEXT]] = add nuw i64 [[IV]], 1 ; VEC-NEXT: [[IV_2_NEXT]] = add nuw i32 [[IV_2]], 2 ; VEC-NEXT: [[GEP_PTR:%.*]] = getelementptr inbounds i64, ptr [[PTR]], i64 [[IV]] @@ -515,7 +508,7 @@ define i32 @iv_2_dead_in_loop_only_used_outside(ptr %ptr) { ; ; INTERLEAVE-LABEL: define i32 @iv_2_dead_in_loop_only_used_outside( ; INTERLEAVE-SAME: ptr [[PTR:%.*]]) { -; INTERLEAVE-NEXT: [[ENTRY:.*]]: +; INTERLEAVE-NEXT: [[ENTRY:.*:]] ; INTERLEAVE-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] ; INTERLEAVE: [[VECTOR_PH]]: ; INTERLEAVE-NEXT: br label %[[VECTOR_BODY:.*]] @@ -532,12 +525,10 @@ define i32 @iv_2_dead_in_loop_only_used_outside(ptr %ptr) { ; INTERLEAVE: [[MIDDLE_BLOCK]]: ; INTERLEAVE-NEXT: br label %[[EXIT:.*]] ; INTERLEAVE: [[SCALAR_PH]]: -; INTERLEAVE-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, %[[ENTRY]] ] -; INTERLEAVE-NEXT: [[BC_RESUME_VAL1:%.*]] = phi i32 [ 0, %[[ENTRY]] ] ; INTERLEAVE-NEXT: br label %[[LOOP:.*]] ; INTERLEAVE: [[LOOP]]: -; INTERLEAVE-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] -; INTERLEAVE-NEXT: [[IV_2:%.*]] = phi i32 [ [[BC_RESUME_VAL1]], %[[SCALAR_PH]] ], [ [[IV_2_NEXT:%.*]], %[[LOOP]] ] +; INTERLEAVE-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] +; INTERLEAVE-NEXT: [[IV_2:%.*]] = phi i32 [ 0, %[[SCALAR_PH]] ], [ [[IV_2_NEXT:%.*]], %[[LOOP]] ] ; INTERLEAVE-NEXT: [[IV_NEXT]] = add nuw i64 [[IV]], 1 ; INTERLEAVE-NEXT: [[IV_2_NEXT]] = add nuw i32 [[IV_2]], 2 ; INTERLEAVE-NEXT: [[GEP_PTR:%.*]] = getelementptr inbounds i64, ptr [[PTR]], i64 [[IV]] @@ -1083,7 +1074,7 @@ exit: define i32 @test_iv_uniform_with_outside_use_scev_simplification(ptr %dst) { ; VEC-LABEL: define i32 @test_iv_uniform_with_outside_use_scev_simplification( ; VEC-SAME: ptr [[DST:%.*]]) { -; VEC-NEXT: [[ENTRY:.*]]: +; VEC-NEXT: [[ENTRY:.*:]] ; VEC-NEXT: [[STEP_1:%.*]] = sext i8 0 to i32 ; VEC-NEXT: [[STEP_2:%.*]] = add nsw i32 [[STEP_1]], 1 ; VEC-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] @@ -1102,10 +1093,9 @@ define i32 @test_iv_uniform_with_outside_use_scev_simplification(ptr %dst) { ; VEC: [[MIDDLE_BLOCK]]: ; VEC-NEXT: br label %[[E_EXIT:.*]] ; VEC: [[SCALAR_PH]]: -; VEC-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ 0, %[[ENTRY]] ] ; VEC-NEXT: br label %[[LOOP:.*]] ; VEC: [[LOOP]]: -; VEC-NEXT: [[IV:%.*]] = phi i32 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] +; VEC-NEXT: [[IV:%.*]] = phi i32 [ 0, %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] ; VEC-NEXT: [[GEP_DST:%.*]] = getelementptr inbounds i16, ptr [[DST]], i32 [[IV]] ; VEC-NEXT: store i16 0, ptr [[GEP_DST]], align 2 ; VEC-NEXT: [[IV_NEXT]] = add i32 [[STEP_2]], [[IV]] @@ -1117,7 +1107,7 @@ define i32 @test_iv_uniform_with_outside_use_scev_simplification(ptr %dst) { ; ; INTERLEAVE-LABEL: define i32 @test_iv_uniform_with_outside_use_scev_simplification( ; INTERLEAVE-SAME: ptr [[DST:%.*]]) { -; INTERLEAVE-NEXT: [[ENTRY:.*]]: +; INTERLEAVE-NEXT: [[ENTRY:.*:]] ; INTERLEAVE-NEXT: [[STEP_1:%.*]] = sext i8 0 to i32 ; INTERLEAVE-NEXT: [[STEP_2:%.*]] = add nsw i32 [[STEP_1]], 1 ; INTERLEAVE-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] @@ -1137,10 +1127,9 @@ define i32 @test_iv_uniform_with_outside_use_scev_simplification(ptr %dst) { ; INTERLEAVE: [[MIDDLE_BLOCK]]: ; INTERLEAVE-NEXT: br label %[[E_EXIT:.*]] ; INTERLEAVE: [[SCALAR_PH]]: -; INTERLEAVE-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ 0, %[[ENTRY]] ] ; INTERLEAVE-NEXT: br label %[[LOOP:.*]] ; INTERLEAVE: [[LOOP]]: -; INTERLEAVE-NEXT: [[IV:%.*]] = phi i32 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] +; INTERLEAVE-NEXT: [[IV:%.*]] = phi i32 [ 0, %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] ; INTERLEAVE-NEXT: [[GEP_DST:%.*]] = getelementptr inbounds i16, ptr [[DST]], i32 [[IV]] ; INTERLEAVE-NEXT: store i16 0, ptr [[GEP_DST]], align 2 ; INTERLEAVE-NEXT: [[IV_NEXT]] = add i32 [[STEP_2]], [[IV]] @@ -1171,7 +1160,7 @@ e.exit: define i32 @test_iv_uniform_with_outside_use_scev_simplification_2(ptr %dst) { ; VEC-LABEL: define i32 @test_iv_uniform_with_outside_use_scev_simplification_2( ; VEC-SAME: ptr [[DST:%.*]]) { -; VEC-NEXT: [[ENTRY:.*]]: +; VEC-NEXT: [[ENTRY:.*:]] ; VEC-NEXT: [[STEP_1:%.*]] = sext i8 0 to i32 ; VEC-NEXT: [[STEP_2:%.*]] = add nsw i32 [[STEP_1]], 1 ; VEC-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] @@ -1199,10 +1188,9 @@ define i32 @test_iv_uniform_with_outside_use_scev_simplification_2(ptr %dst) { ; VEC-NEXT: [[TMP7:%.*]] = extractelement <2 x i32> [[TMP5]], i32 1 ; VEC-NEXT: br label %[[E_EXIT:.*]] ; VEC: [[SCALAR_PH]]: -; VEC-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ 0, %[[ENTRY]] ] ; VEC-NEXT: br label %[[LOOP:.*]] ; VEC: [[LOOP]]: -; VEC-NEXT: [[IV:%.*]] = phi i32 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] +; VEC-NEXT: [[IV:%.*]] = phi i32 [ 0, %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] ; VEC-NEXT: [[GEP_DST:%.*]] = getelementptr inbounds i16, ptr [[DST]], i32 [[IV]] ; VEC-NEXT: store i16 0, ptr [[GEP_DST]], align 2 ; VEC-NEXT: [[INC:%.*]] = add i32 [[IV]], 1 @@ -1215,7 +1203,7 @@ define i32 @test_iv_uniform_with_outside_use_scev_simplification_2(ptr %dst) { ; ; INTERLEAVE-LABEL: define i32 @test_iv_uniform_with_outside_use_scev_simplification_2( ; INTERLEAVE-SAME: ptr [[DST:%.*]]) { -; INTERLEAVE-NEXT: [[ENTRY:.*]]: +; INTERLEAVE-NEXT: [[ENTRY:.*:]] ; INTERLEAVE-NEXT: [[STEP_1:%.*]] = sext i8 0 to i32 ; INTERLEAVE-NEXT: [[STEP_2:%.*]] = add nsw i32 [[STEP_1]], 1 ; INTERLEAVE-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] @@ -1237,10 +1225,9 @@ define i32 @test_iv_uniform_with_outside_use_scev_simplification_2(ptr %dst) { ; INTERLEAVE: [[MIDDLE_BLOCK]]: ; INTERLEAVE-NEXT: br label %[[E_EXIT:.*]] ; INTERLEAVE: [[SCALAR_PH]]: -; INTERLEAVE-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ 0, %[[ENTRY]] ] ; INTERLEAVE-NEXT: br label %[[LOOP:.*]] ; INTERLEAVE: [[LOOP]]: -; INTERLEAVE-NEXT: [[IV:%.*]] = phi i32 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] +; INTERLEAVE-NEXT: [[IV:%.*]] = phi i32 [ 0, %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] ; INTERLEAVE-NEXT: [[GEP_DST:%.*]] = getelementptr inbounds i16, ptr [[DST]], i32 [[IV]] ; INTERLEAVE-NEXT: store i16 0, ptr [[GEP_DST]], align 2 ; INTERLEAVE-NEXT: [[INC:%.*]] = add i32 [[IV]], 1 @@ -1363,7 +1350,7 @@ exit: define i64 @test_iv_increment_incremented(ptr %dst) { ; VEC-LABEL: define i64 @test_iv_increment_incremented( ; VEC-SAME: ptr [[DST:%.*]]) { -; VEC-NEXT: [[ENTRY:.*]]: +; VEC-NEXT: [[ENTRY:.*:]] ; VEC-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] ; VEC: [[VECTOR_PH]]: ; VEC-NEXT: br label %[[VECTOR_BODY:.*]] @@ -1378,12 +1365,10 @@ define i64 @test_iv_increment_incremented(ptr %dst) { ; VEC: [[MIDDLE_BLOCK]]: ; VEC-NEXT: br label %[[EXIT:.*]] ; VEC: [[SCALAR_PH]]: -; VEC-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 3, %[[ENTRY]] ] -; VEC-NEXT: [[BC_RESUME_VAL1:%.*]] = phi i64 [ 2, %[[ENTRY]] ] ; VEC-NEXT: br label %[[LOOP:.*]] ; VEC: [[LOOP]]: -; VEC-NEXT: [[IV_1:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_1_NEXT:%.*]], %[[LOOP]] ] -; VEC-NEXT: [[IV_2:%.*]] = phi i64 [ [[BC_RESUME_VAL1]], %[[SCALAR_PH]] ], [ [[IV_2_NEXT:%.*]], %[[LOOP]] ] +; VEC-NEXT: [[IV_1:%.*]] = phi i64 [ 3, %[[SCALAR_PH]] ], [ [[IV_1_NEXT:%.*]], %[[LOOP]] ] +; VEC-NEXT: [[IV_2:%.*]] = phi i64 [ 2, %[[SCALAR_PH]] ], [ [[IV_2_NEXT:%.*]], %[[LOOP]] ] ; VEC-NEXT: [[GEP:%.*]] = getelementptr i16, ptr [[DST]], i64 [[IV_1]] ; VEC-NEXT: store i16 1, ptr [[GEP]], align 2 ; VEC-NEXT: [[IV_2_NEXT]] = add i64 [[IV_2]], -1 @@ -1396,7 +1381,7 @@ define i64 @test_iv_increment_incremented(ptr %dst) { ; ; INTERLEAVE-LABEL: define i64 @test_iv_increment_incremented( ; INTERLEAVE-SAME: ptr [[DST:%.*]]) { -; INTERLEAVE-NEXT: [[ENTRY:.*]]: +; INTERLEAVE-NEXT: [[ENTRY:.*:]] ; INTERLEAVE-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] ; INTERLEAVE: [[VECTOR_PH]]: ; INTERLEAVE-NEXT: br label %[[VECTOR_BODY:.*]] @@ -1411,12 +1396,10 @@ define i64 @test_iv_increment_incremented(ptr %dst) { ; INTERLEAVE: [[MIDDLE_BLOCK]]: ; INTERLEAVE-NEXT: br label %[[EXIT:.*]] ; INTERLEAVE: [[SCALAR_PH]]: -; INTERLEAVE-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 3, %[[ENTRY]] ] -; INTERLEAVE-NEXT: [[BC_RESUME_VAL1:%.*]] = phi i64 [ 2, %[[ENTRY]] ] ; INTERLEAVE-NEXT: br label %[[LOOP:.*]] ; INTERLEAVE: [[LOOP]]: -; INTERLEAVE-NEXT: [[IV_1:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_1_NEXT:%.*]], %[[LOOP]] ] -; INTERLEAVE-NEXT: [[IV_2:%.*]] = phi i64 [ [[BC_RESUME_VAL1]], %[[SCALAR_PH]] ], [ [[IV_2_NEXT:%.*]], %[[LOOP]] ] +; INTERLEAVE-NEXT: [[IV_1:%.*]] = phi i64 [ 3, %[[SCALAR_PH]] ], [ [[IV_1_NEXT:%.*]], %[[LOOP]] ] +; INTERLEAVE-NEXT: [[IV_2:%.*]] = phi i64 [ 2, %[[SCALAR_PH]] ], [ [[IV_2_NEXT:%.*]], %[[LOOP]] ] ; INTERLEAVE-NEXT: [[GEP:%.*]] = getelementptr i16, ptr [[DST]], i64 [[IV_1]] ; INTERLEAVE-NEXT: store i16 1, ptr [[GEP]], align 2 ; INTERLEAVE-NEXT: [[IV_2_NEXT]] = add i64 [[IV_2]], -1 diff --git a/llvm/test/Transforms/LoopVectorize/load-deref-pred-align.ll b/llvm/test/Transforms/LoopVectorize/load-deref-pred-align.ll index 2c7d1bd3a134..b7b67c263d44 100644 --- a/llvm/test/Transforms/LoopVectorize/load-deref-pred-align.ll +++ b/llvm/test/Transforms/LoopVectorize/load-deref-pred-align.ll @@ -50,12 +50,10 @@ define i16 @test_access_size_not_multiple_of_align(i64 %len, ptr %test_base) { ; CHECK-NEXT: [[TMP17:%.*]] = call i16 @llvm.vector.reduce.add.v2i16(<2 x i16> [[TMP15]]) ; CHECK-NEXT: br label [[LOOP_EXIT:%.*]] ; CHECK: scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ] -; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i16 [ 0, [[ENTRY]] ] ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: -; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LATCH:%.*]] ] -; CHECK-NEXT: [[ACCUM:%.*]] = phi i16 [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[ACCUM_NEXT:%.*]], [[LATCH]] ] +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LATCH:%.*]] ] +; CHECK-NEXT: [[ACCUM:%.*]] = phi i16 [ 0, [[SCALAR_PH]] ], [ [[ACCUM_NEXT:%.*]], [[LATCH]] ] ; CHECK-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1 ; CHECK-NEXT: [[TEST_ADDR:%.*]] = getelementptr inbounds i8, ptr [[TEST_BASE]], i64 [[IV]] ; CHECK-NEXT: [[L_T:%.*]] = load i8, ptr [[TEST_ADDR]], align 1 @@ -146,12 +144,10 @@ define i32 @test_access_size_multiple_of_align_but_offset_by_1(i64 %len, ptr %te ; CHECK-NEXT: [[TMP17:%.*]] = call i32 @llvm.vector.reduce.add.v2i32(<2 x i32> [[TMP15]]) ; CHECK-NEXT: br label [[LOOP_EXIT:%.*]] ; CHECK: scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ] -; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ 0, [[ENTRY]] ] ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: -; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LATCH:%.*]] ] -; CHECK-NEXT: [[ACCUM:%.*]] = phi i32 [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[ACCUM_NEXT:%.*]], [[LATCH]] ] +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LATCH:%.*]] ] +; CHECK-NEXT: [[ACCUM:%.*]] = phi i32 [ 0, [[SCALAR_PH]] ], [ [[ACCUM_NEXT:%.*]], [[LATCH]] ] ; CHECK-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1 ; CHECK-NEXT: [[TEST_ADDR:%.*]] = getelementptr inbounds i8, ptr [[TEST_BASE]], i64 [[IV]] ; CHECK-NEXT: [[L_T:%.*]] = load i8, ptr [[TEST_ADDR]], align 1 @@ -376,10 +372,9 @@ define void @test_rev_loops_deref_loads(ptr nocapture noundef writeonly %dest) { ; CHECK: middle.block: ; CHECK-NEXT: br label [[EXIT:%.*]] ; CHECK: scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 1023, [[ENTRY:%.*]] ] ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: for.body: -; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_INC:%.*]] ] +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 1023, [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_INC:%.*]] ] ; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [1024 x i32], ptr [[LOCAL_CMP]], i64 0, i64 [[IV]] ; CHECK-NEXT: [[TMP19:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 ; CHECK-NEXT: [[CMP3_NOT:%.*]] = icmp eq i32 [[TMP19]], 3 @@ -488,10 +483,9 @@ define void @test_rev_loops_non_deref_loads(ptr nocapture noundef writeonly %des ; CHECK: middle.block: ; CHECK-NEXT: br label [[EXIT:%.*]] ; CHECK: scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 1023, [[ENTRY:%.*]] ] ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: for.body: -; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_INC:%.*]] ] +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 1023, [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_INC:%.*]] ] ; CHECK-NEXT: [[OFF:%.*]] = add i64 [[IV]], -1 ; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [1024 x i32], ptr [[LOCAL_CMP]], i64 0, i64 [[OFF]] ; CHECK-NEXT: [[TMP22:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 @@ -582,12 +576,10 @@ define i16 @test_strided_access(i64 %len, ptr %test_base) { ; CHECK-NEXT: [[TMP15:%.*]] = call i16 @llvm.vector.reduce.add.v2i16(<2 x i16> [[TMP13]]) ; CHECK-NEXT: br label [[LOOP_EXIT:%.*]] ; CHECK: scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ] -; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i16 [ 0, [[ENTRY]] ] ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: -; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LATCH:%.*]] ] -; CHECK-NEXT: [[ACCUM:%.*]] = phi i16 [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[ACCUM_NEXT:%.*]], [[LATCH]] ] +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LATCH:%.*]] ] +; CHECK-NEXT: [[ACCUM:%.*]] = phi i16 [ 0, [[SCALAR_PH]] ], [ [[ACCUM_NEXT:%.*]], [[LATCH]] ] ; CHECK-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1 ; CHECK-NEXT: [[TEST_ADDR:%.*]] = getelementptr inbounds i8, ptr [[TEST_BASE]], i64 [[IV]] ; CHECK-NEXT: [[L_T:%.*]] = load i8, ptr [[TEST_ADDR]], align 1 @@ -691,10 +683,9 @@ define void @test_rev_loops_strided_deref_loads(ptr nocapture noundef writeonly ; CHECK: middle.block: ; CHECK-NEXT: br label [[EXIT:%.*]] ; CHECK: scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 511, [[ENTRY:%.*]] ] ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: for.body: -; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_INC:%.*]] ] +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 511, [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_INC:%.*]] ] ; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [1024 x i32], ptr [[LOCAL_CMP]], i64 0, i64 [[IV]] ; CHECK-NEXT: [[TMP21:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 ; CHECK-NEXT: [[CMP3_NOT:%.*]] = icmp eq i32 [[TMP21]], 3 diff --git a/llvm/test/Transforms/LoopVectorize/load-deref-pred-neg-off.ll b/llvm/test/Transforms/LoopVectorize/load-deref-pred-neg-off.ll index 34c04de22755..468e6823e9b5 100644 --- a/llvm/test/Transforms/LoopVectorize/load-deref-pred-neg-off.ll +++ b/llvm/test/Transforms/LoopVectorize/load-deref-pred-neg-off.ll @@ -54,12 +54,10 @@ define i8 @test_negative_off(i16 %len, ptr %test_base) { ; CHECK-NEXT: [[TMP20:%.*]] = call i8 @llvm.vector.reduce.add.v2i8(<2 x i8> [[TMP18]]) ; CHECK-NEXT: br label [[LOOP_EXIT:%.*]] ; CHECK: scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i16 [ -1000, [[ENTRY:%.*]] ] -; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i8 [ 0, [[ENTRY]] ] ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: -; CHECK-NEXT: [[IV:%.*]] = phi i16 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LATCH:%.*]] ] -; CHECK-NEXT: [[ACCUM:%.*]] = phi i8 [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[ACCUM_NEXT:%.*]], [[LATCH]] ] +; CHECK-NEXT: [[IV:%.*]] = phi i16 [ -1000, [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LATCH:%.*]] ] +; CHECK-NEXT: [[ACCUM:%.*]] = phi i8 [ 0, [[SCALAR_PH]] ], [ [[ACCUM_NEXT:%.*]], [[LATCH]] ] ; CHECK-NEXT: [[IV_NEXT]] = add i16 [[IV]], 1 ; CHECK-NEXT: [[TEST_ADDR:%.*]] = getelementptr inbounds i1, ptr [[TEST_BASE]], i16 [[IV]] ; CHECK-NEXT: [[EARLYCND:%.*]] = load i1, ptr [[TEST_ADDR]], align 1 diff --git a/llvm/test/Transforms/LoopVectorize/load-of-struct-deref-pred.ll b/llvm/test/Transforms/LoopVectorize/load-of-struct-deref-pred.ll index f99e883c045d..fbe57c81053f 100644 --- a/llvm/test/Transforms/LoopVectorize/load-of-struct-deref-pred.ll +++ b/llvm/test/Transforms/LoopVectorize/load-of-struct-deref-pred.ll @@ -31,10 +31,9 @@ define void @accesses_to_struct_dereferenceable(ptr noalias %dst) { ; CHECK: middle.block: ; CHECK-NEXT: br label [[EXIT:%.*]] ; CHECK: scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ] ; CHECK-NEXT: br label [[LOOP_HEADER:%.*]] ; CHECK: loop.header: -; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP_LATCH:%.*]] ] +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP_LATCH:%.*]] ] ; CHECK-NEXT: [[GEP_DST:%.*]] = getelementptr inbounds i32, ptr [[DST]], i64 [[IV]] ; CHECK-NEXT: [[D:%.*]] = load i32, ptr [[GEP_DST]], align 4 ; CHECK-NEXT: [[CMP3:%.*]] = icmp ult i32 [[D]], 0 @@ -270,10 +269,9 @@ define void @accesses_to_struct_may_not_be_dereferenceable_access_size(ptr noali ; CHECK: middle.block: ; CHECK-NEXT: br label [[EXIT:%.*]] ; CHECK: scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ] ; CHECK-NEXT: br label [[LOOP_HEADER:%.*]] ; CHECK: loop.header: -; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP_LATCH:%.*]] ] +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP_LATCH:%.*]] ] ; CHECK-NEXT: [[GEP_DST:%.*]] = getelementptr inbounds i32, ptr [[DST]], i64 [[IV]] ; CHECK-NEXT: [[D:%.*]] = load i32, ptr [[GEP_DST]], align 4 ; CHECK-NEXT: [[CMP3:%.*]] = icmp ult i32 [[D]], 0 diff --git a/llvm/test/Transforms/LoopVectorize/make-followup-loop-id.ll b/llvm/test/Transforms/LoopVectorize/make-followup-loop-id.ll index 14a091feb58b..3190d239e047 100644 --- a/llvm/test/Transforms/LoopVectorize/make-followup-loop-id.ll +++ b/llvm/test/Transforms/LoopVectorize/make-followup-loop-id.ll @@ -15,7 +15,7 @@ define void @f(ptr noundef captures(none) %a, float noundef %x) { ; CHECK-LABEL: define void @f( ; CHECK-SAME: ptr noundef captures(none) [[A:%.*]], float noundef [[X:%.*]]) { -; CHECK-NEXT: [[ENTRY:.*]]: +; CHECK-NEXT: [[ENTRY:.*:]] ; CHECK-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] ; CHECK: [[VECTOR_PH]]: ; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x float> poison, float [[X]], i64 0 @@ -68,10 +68,9 @@ define void @f(ptr noundef captures(none) %a, float noundef %x) { ; CHECK: [[MIDDLE_BLOCK]]: ; CHECK-NEXT: br label %[[EXIT:.*]] ; CHECK: [[SCALAR_PH]]: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, %[[ENTRY]] ] ; CHECK-NEXT: br label %[[FOR_BODY:.*]] ; CHECK: [[FOR_BODY]]: -; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT_7:%.*]], %[[FOR_BODY]] ] +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[SCALAR_PH]] ], [ [[IV_NEXT_7:%.*]], %[[FOR_BODY]] ] ; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds nuw float, ptr [[A]], i64 [[IV]] ; CHECK-NEXT: [[LOAD:%.*]] = load float, ptr [[ARRAYIDX]], align 4 ; CHECK-NEXT: [[MUL:%.*]] = fmul float [[X]], [[LOAD]] diff --git a/llvm/test/Transforms/LoopVectorize/metadata.ll b/llvm/test/Transforms/LoopVectorize/metadata.ll index ce9c62408633..54779ed55cff 100644 --- a/llvm/test/Transforms/LoopVectorize/metadata.ll +++ b/llvm/test/Transforms/LoopVectorize/metadata.ll @@ -126,7 +126,7 @@ exit: define void @widen_call_range(ptr noalias %a, ptr readonly %b) { ; CHECK-LABEL: define void @widen_call_range( ; CHECK-SAME: ptr noalias [[A:%.*]], ptr readonly [[B:%.*]]) { -; CHECK-NEXT: [[ENTRY:.*]]: +; CHECK-NEXT: [[ENTRY:.*:]] ; CHECK-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] ; CHECK: [[VECTOR_PH]]: ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] @@ -143,10 +143,9 @@ define void @widen_call_range(ptr noalias %a, ptr readonly %b) { ; CHECK: [[MIDDLE_BLOCK]]: ; CHECK-NEXT: br label %[[EXIT:.*]] ; CHECK: [[SCALAR_PH]]: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, %[[ENTRY]] ] ; CHECK-NEXT: br label %[[LOOP:.*]] ; CHECK: [[LOOP]]: -; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] ; CHECK-NEXT: [[GEP:%.*]] = getelementptr i64, ptr [[B]], i64 [[IV]] ; CHECK-NEXT: [[LOAD:%.*]] = load i64, ptr [[GEP]], align 4, !tbaa [[TBAA0]], !range [[RNG9:![0-9]+]] ; CHECK-NEXT: [[CALL:%.*]] = call i64 @foo(i64 [[LOAD]]) #[[ATTR1:[0-9]+]], !range [[RNG9]] @@ -160,7 +159,7 @@ define void @widen_call_range(ptr noalias %a, ptr readonly %b) { ; ; INTERLEAVE-LABEL: define void @widen_call_range( ; INTERLEAVE-SAME: ptr noalias [[A:%.*]], ptr readonly [[B:%.*]]) { -; INTERLEAVE-NEXT: [[ENTRY:.*]]: +; INTERLEAVE-NEXT: [[ENTRY:.*:]] ; INTERLEAVE-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] ; INTERLEAVE: [[VECTOR_PH]]: ; INTERLEAVE-NEXT: br label %[[VECTOR_BODY:.*]] @@ -182,10 +181,9 @@ define void @widen_call_range(ptr noalias %a, ptr readonly %b) { ; INTERLEAVE: [[MIDDLE_BLOCK]]: ; INTERLEAVE-NEXT: br label %[[EXIT:.*]] ; INTERLEAVE: [[SCALAR_PH]]: -; INTERLEAVE-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, %[[ENTRY]] ] ; INTERLEAVE-NEXT: br label %[[LOOP:.*]] ; INTERLEAVE: [[LOOP]]: -; INTERLEAVE-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] +; INTERLEAVE-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] ; INTERLEAVE-NEXT: [[GEP:%.*]] = getelementptr i64, ptr [[B]], i64 [[IV]] ; INTERLEAVE-NEXT: [[LOAD:%.*]] = load i64, ptr [[GEP]], align 4, !tbaa [[TBAA0]], !range [[RNG9:![0-9]+]] ; INTERLEAVE-NEXT: [[CALL:%.*]] = call i64 @foo(i64 [[LOAD]]) #[[ATTR1:[0-9]+]], !range [[RNG9]] @@ -218,7 +216,7 @@ exit: define void @widen_call_fpmath(ptr noalias %a, ptr readonly %b) { ; CHECK-LABEL: define void @widen_call_fpmath( ; CHECK-SAME: ptr noalias [[A:%.*]], ptr readonly [[B:%.*]]) { -; CHECK-NEXT: [[ENTRY:.*]]: +; CHECK-NEXT: [[ENTRY:.*:]] ; CHECK-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] ; CHECK: [[VECTOR_PH]]: ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] @@ -235,10 +233,9 @@ define void @widen_call_fpmath(ptr noalias %a, ptr readonly %b) { ; CHECK: [[MIDDLE_BLOCK]]: ; CHECK-NEXT: br label %[[EXIT:.*]] ; CHECK: [[SCALAR_PH]]: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, %[[ENTRY]] ] ; CHECK-NEXT: br label %[[LOOP:.*]] ; CHECK: [[LOOP]]: -; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] ; CHECK-NEXT: [[GEP:%.*]] = getelementptr double, ptr [[B]], i64 [[IV]] ; CHECK-NEXT: [[LOAD:%.*]] = load double, ptr [[GEP]], align 8, !tbaa [[TBAA0]] ; CHECK-NEXT: [[CALL:%.*]] = call double @bar(double [[LOAD]]) #[[ATTR2:[0-9]+]], !fpmath [[META3]] @@ -252,7 +249,7 @@ define void @widen_call_fpmath(ptr noalias %a, ptr readonly %b) { ; ; INTERLEAVE-LABEL: define void @widen_call_fpmath( ; INTERLEAVE-SAME: ptr noalias [[A:%.*]], ptr readonly [[B:%.*]]) { -; INTERLEAVE-NEXT: [[ENTRY:.*]]: +; INTERLEAVE-NEXT: [[ENTRY:.*:]] ; INTERLEAVE-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] ; INTERLEAVE: [[VECTOR_PH]]: ; INTERLEAVE-NEXT: br label %[[VECTOR_BODY:.*]] @@ -274,10 +271,9 @@ define void @widen_call_fpmath(ptr noalias %a, ptr readonly %b) { ; INTERLEAVE: [[MIDDLE_BLOCK]]: ; INTERLEAVE-NEXT: br label %[[EXIT:.*]] ; INTERLEAVE: [[SCALAR_PH]]: -; INTERLEAVE-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, %[[ENTRY]] ] ; INTERLEAVE-NEXT: br label %[[LOOP:.*]] ; INTERLEAVE: [[LOOP]]: -; INTERLEAVE-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] +; INTERLEAVE-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] ; INTERLEAVE-NEXT: [[GEP:%.*]] = getelementptr double, ptr [[B]], i64 [[IV]] ; INTERLEAVE-NEXT: [[LOAD:%.*]] = load double, ptr [[GEP]], align 8, !tbaa [[TBAA0]] ; INTERLEAVE-NEXT: [[CALL:%.*]] = call double @bar(double [[LOAD]]) #[[ATTR2:[0-9]+]], !fpmath [[META3]] @@ -310,7 +306,7 @@ exit: define void @widen_intrinsic(ptr noalias %a, ptr readonly %b) { ; CHECK-LABEL: define void @widen_intrinsic( ; CHECK-SAME: ptr noalias [[A:%.*]], ptr readonly [[B:%.*]]) { -; CHECK-NEXT: [[ENTRY:.*]]: +; CHECK-NEXT: [[ENTRY:.*:]] ; CHECK-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] ; CHECK: [[VECTOR_PH]]: ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] @@ -327,10 +323,9 @@ define void @widen_intrinsic(ptr noalias %a, ptr readonly %b) { ; CHECK: [[MIDDLE_BLOCK]]: ; CHECK-NEXT: br label %[[EXIT:.*]] ; CHECK: [[SCALAR_PH]]: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, %[[ENTRY]] ] ; CHECK-NEXT: br label %[[LOOP:.*]] ; CHECK: [[LOOP]]: -; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] ; CHECK-NEXT: [[GEP:%.*]] = getelementptr i64, ptr [[B]], i64 [[IV]] ; CHECK-NEXT: [[LOAD:%.*]] = load i64, ptr [[GEP]], align 4 ; CHECK-NEXT: [[CALL:%.*]] = call i64 @llvm.abs.i64(i64 [[LOAD]], i1 true), !range [[RNG9]] @@ -344,7 +339,7 @@ define void @widen_intrinsic(ptr noalias %a, ptr readonly %b) { ; ; INTERLEAVE-LABEL: define void @widen_intrinsic( ; INTERLEAVE-SAME: ptr noalias [[A:%.*]], ptr readonly [[B:%.*]]) { -; INTERLEAVE-NEXT: [[ENTRY:.*]]: +; INTERLEAVE-NEXT: [[ENTRY:.*:]] ; INTERLEAVE-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] ; INTERLEAVE: [[VECTOR_PH]]: ; INTERLEAVE-NEXT: br label %[[VECTOR_BODY:.*]] @@ -366,10 +361,9 @@ define void @widen_intrinsic(ptr noalias %a, ptr readonly %b) { ; INTERLEAVE: [[MIDDLE_BLOCK]]: ; INTERLEAVE-NEXT: br label %[[EXIT:.*]] ; INTERLEAVE: [[SCALAR_PH]]: -; INTERLEAVE-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, %[[ENTRY]] ] ; INTERLEAVE-NEXT: br label %[[LOOP:.*]] ; INTERLEAVE: [[LOOP]]: -; INTERLEAVE-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] +; INTERLEAVE-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] ; INTERLEAVE-NEXT: [[GEP:%.*]] = getelementptr i64, ptr [[B]], i64 [[IV]] ; INTERLEAVE-NEXT: [[LOAD:%.*]] = load i64, ptr [[GEP]], align 4 ; INTERLEAVE-NEXT: [[CALL:%.*]] = call i64 @llvm.abs.i64(i64 [[LOAD]], i1 true), !range [[RNG9]] @@ -402,7 +396,7 @@ exit: define void @widen_intrinsic_fpmath(ptr noalias %a, ptr readonly %b) { ; CHECK-LABEL: define void @widen_intrinsic_fpmath( ; CHECK-SAME: ptr noalias [[A:%.*]], ptr readonly [[B:%.*]]) { -; CHECK-NEXT: [[ENTRY:.*]]: +; CHECK-NEXT: [[ENTRY:.*:]] ; CHECK-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] ; CHECK: [[VECTOR_PH]]: ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] @@ -419,10 +413,9 @@ define void @widen_intrinsic_fpmath(ptr noalias %a, ptr readonly %b) { ; CHECK: [[MIDDLE_BLOCK]]: ; CHECK-NEXT: br label %[[EXIT:.*]] ; CHECK: [[SCALAR_PH]]: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, %[[ENTRY]] ] ; CHECK-NEXT: br label %[[LOOP:.*]] ; CHECK: [[LOOP]]: -; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] ; CHECK-NEXT: [[GEP:%.*]] = getelementptr double, ptr [[B]], i64 [[IV]] ; CHECK-NEXT: [[LOAD:%.*]] = load double, ptr [[GEP]], align 8, !tbaa [[TBAA0]] ; CHECK-NEXT: [[CALL:%.*]] = call double @llvm.sin.f64(double [[LOAD]]) #[[ATTR2]], !fpmath [[META3]] @@ -436,7 +429,7 @@ define void @widen_intrinsic_fpmath(ptr noalias %a, ptr readonly %b) { ; ; INTERLEAVE-LABEL: define void @widen_intrinsic_fpmath( ; INTERLEAVE-SAME: ptr noalias [[A:%.*]], ptr readonly [[B:%.*]]) { -; INTERLEAVE-NEXT: [[ENTRY:.*]]: +; INTERLEAVE-NEXT: [[ENTRY:.*:]] ; INTERLEAVE-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] ; INTERLEAVE: [[VECTOR_PH]]: ; INTERLEAVE-NEXT: br label %[[VECTOR_BODY:.*]] @@ -458,10 +451,9 @@ define void @widen_intrinsic_fpmath(ptr noalias %a, ptr readonly %b) { ; INTERLEAVE: [[MIDDLE_BLOCK]]: ; INTERLEAVE-NEXT: br label %[[EXIT:.*]] ; INTERLEAVE: [[SCALAR_PH]]: -; INTERLEAVE-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, %[[ENTRY]] ] ; INTERLEAVE-NEXT: br label %[[LOOP:.*]] ; INTERLEAVE: [[LOOP]]: -; INTERLEAVE-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] +; INTERLEAVE-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] ; INTERLEAVE-NEXT: [[GEP:%.*]] = getelementptr double, ptr [[B]], i64 [[IV]] ; INTERLEAVE-NEXT: [[LOAD:%.*]] = load double, ptr [[GEP]], align 8, !tbaa [[TBAA0]] ; INTERLEAVE-NEXT: [[CALL:%.*]] = call double @llvm.sin.f64(double [[LOAD]]) #[[ATTR2]], !fpmath [[META3]] diff --git a/llvm/test/Transforms/LoopVectorize/minimumnum-maximumnum-reductions.ll b/llvm/test/Transforms/LoopVectorize/minimumnum-maximumnum-reductions.ll index e26fef4f02ee..536dffa920c6 100644 --- a/llvm/test/Transforms/LoopVectorize/minimumnum-maximumnum-reductions.ll +++ b/llvm/test/Transforms/LoopVectorize/minimumnum-maximumnum-reductions.ll @@ -5,7 +5,7 @@ define float @maximumnum_intrinsic(ptr readonly %x) { ; CHECK-LABEL: define float @maximumnum_intrinsic( ; CHECK-SAME: ptr readonly [[X:%.*]]) { -; CHECK-NEXT: [[ENTRY:.*]]: +; CHECK-NEXT: [[ENTRY:.*:]] ; CHECK-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] ; CHECK: [[VECTOR_PH]]: ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] @@ -27,12 +27,10 @@ define float @maximumnum_intrinsic(ptr readonly %x) { ; CHECK-NEXT: [[TMP6:%.*]] = call float @llvm.vector.reduce.fmax.v2f32(<2 x float> [[RDX_MINMAX]]) ; CHECK-NEXT: br label %[[EXIT:.*]] ; CHECK: [[SCALAR_PH]]: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ 0, %[[ENTRY]] ] -; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi float [ 0.000000e+00, %[[ENTRY]] ] ; CHECK-NEXT: br label %[[LOOP:.*]] ; CHECK: [[LOOP]]: -; CHECK-NEXT: [[IV1:%.*]] = phi i32 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[INC:%.*]], %[[LOOP]] ] -; CHECK-NEXT: [[RED:%.*]] = phi float [ [[BC_MERGE_RDX]], %[[SCALAR_PH]] ], [ [[RED_NEXT:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[IV1:%.*]] = phi i32 [ 0, %[[SCALAR_PH]] ], [ [[INC:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[RED:%.*]] = phi float [ 0.000000e+00, %[[SCALAR_PH]] ], [ [[RED_NEXT:%.*]], %[[LOOP]] ] ; CHECK-NEXT: [[GEP1:%.*]] = getelementptr inbounds float, ptr [[X]], i32 [[IV1]] ; CHECK-NEXT: [[L:%.*]] = load float, ptr [[GEP1]], align 4 ; CHECK-NEXT: [[RED_NEXT]] = tail call float @llvm.maximumnum.f32(float [[RED]], float [[L]]) @@ -63,7 +61,7 @@ exit: define float @maximumnum_intrinsic_fast(ptr readonly %x) { ; CHECK-LABEL: define float @maximumnum_intrinsic_fast( ; CHECK-SAME: ptr readonly [[X:%.*]]) { -; CHECK-NEXT: [[ENTRY:.*]]: +; CHECK-NEXT: [[ENTRY:.*:]] ; CHECK-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] ; CHECK: [[VECTOR_PH]]: ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] @@ -85,12 +83,10 @@ define float @maximumnum_intrinsic_fast(ptr readonly %x) { ; CHECK-NEXT: [[TMP6:%.*]] = call fast float @llvm.vector.reduce.fmax.v2f32(<2 x float> [[RDX_MINMAX]]) ; CHECK-NEXT: br label %[[EXIT:.*]] ; CHECK: [[SCALAR_PH]]: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ 0, %[[ENTRY]] ] -; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi float [ 0.000000e+00, %[[ENTRY]] ] ; CHECK-NEXT: br label %[[LOOP:.*]] ; CHECK: [[LOOP]]: -; CHECK-NEXT: [[IV1:%.*]] = phi i32 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[INC:%.*]], %[[LOOP]] ] -; CHECK-NEXT: [[RED:%.*]] = phi float [ [[BC_MERGE_RDX]], %[[SCALAR_PH]] ], [ [[RED_NEXT:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[IV1:%.*]] = phi i32 [ 0, %[[SCALAR_PH]] ], [ [[INC:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[RED:%.*]] = phi float [ 0.000000e+00, %[[SCALAR_PH]] ], [ [[RED_NEXT:%.*]], %[[LOOP]] ] ; CHECK-NEXT: [[GEP1:%.*]] = getelementptr inbounds float, ptr [[X]], i32 [[IV1]] ; CHECK-NEXT: [[L:%.*]] = load float, ptr [[GEP1]], align 4 ; CHECK-NEXT: [[RED_NEXT]] = tail call fast float @llvm.maximumnum.f32(float [[RED]], float [[L]]) @@ -121,7 +117,7 @@ exit: define float @minimumnum_intrinsic(ptr readonly %x) { ; CHECK-LABEL: define float @minimumnum_intrinsic( ; CHECK-SAME: ptr readonly [[X:%.*]]) { -; CHECK-NEXT: [[ENTRY:.*]]: +; CHECK-NEXT: [[ENTRY:.*:]] ; CHECK-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] ; CHECK: [[VECTOR_PH]]: ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] @@ -143,12 +139,10 @@ define float @minimumnum_intrinsic(ptr readonly %x) { ; CHECK-NEXT: [[TMP6:%.*]] = call float @llvm.vector.reduce.fmin.v2f32(<2 x float> [[RDX_MINMAX]]) ; CHECK-NEXT: br label %[[EXIT:.*]] ; CHECK: [[SCALAR_PH]]: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ 0, %[[ENTRY]] ] -; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi float [ 0.000000e+00, %[[ENTRY]] ] ; CHECK-NEXT: br label %[[LOOP:.*]] ; CHECK: [[LOOP]]: -; CHECK-NEXT: [[IV1:%.*]] = phi i32 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[INC:%.*]], %[[LOOP]] ] -; CHECK-NEXT: [[RED:%.*]] = phi float [ [[BC_MERGE_RDX]], %[[SCALAR_PH]] ], [ [[RED_NEXT:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[IV1:%.*]] = phi i32 [ 0, %[[SCALAR_PH]] ], [ [[INC:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[RED:%.*]] = phi float [ 0.000000e+00, %[[SCALAR_PH]] ], [ [[RED_NEXT:%.*]], %[[LOOP]] ] ; CHECK-NEXT: [[GEP1:%.*]] = getelementptr inbounds float, ptr [[X]], i32 [[IV1]] ; CHECK-NEXT: [[L:%.*]] = load float, ptr [[GEP1]], align 4 ; CHECK-NEXT: [[RED_NEXT]] = tail call float @llvm.minimumnum.f32(float [[RED]], float [[L]]) @@ -179,7 +173,7 @@ exit: define float @minimumnum_intrinsic_fast(ptr readonly %x) { ; CHECK-LABEL: define float @minimumnum_intrinsic_fast( ; CHECK-SAME: ptr readonly [[X:%.*]]) { -; CHECK-NEXT: [[ENTRY:.*]]: +; CHECK-NEXT: [[ENTRY:.*:]] ; CHECK-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] ; CHECK: [[VECTOR_PH]]: ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] @@ -201,12 +195,10 @@ define float @minimumnum_intrinsic_fast(ptr readonly %x) { ; CHECK-NEXT: [[TMP6:%.*]] = call fast float @llvm.vector.reduce.fmin.v2f32(<2 x float> [[RDX_MINMAX]]) ; CHECK-NEXT: br label %[[EXIT:.*]] ; CHECK: [[SCALAR_PH]]: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ 0, %[[ENTRY]] ] -; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi float [ 0.000000e+00, %[[ENTRY]] ] ; CHECK-NEXT: br label %[[LOOP:.*]] ; CHECK: [[LOOP]]: -; CHECK-NEXT: [[IV1:%.*]] = phi i32 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[INC:%.*]], %[[LOOP]] ] -; CHECK-NEXT: [[RED:%.*]] = phi float [ [[BC_MERGE_RDX]], %[[SCALAR_PH]] ], [ [[RED_NEXT:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[IV1:%.*]] = phi i32 [ 0, %[[SCALAR_PH]] ], [ [[INC:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[RED:%.*]] = phi float [ 0.000000e+00, %[[SCALAR_PH]] ], [ [[RED_NEXT:%.*]], %[[LOOP]] ] ; CHECK-NEXT: [[GEP1:%.*]] = getelementptr inbounds float, ptr [[X]], i32 [[IV1]] ; CHECK-NEXT: [[L:%.*]] = load float, ptr [[GEP1]], align 4 ; CHECK-NEXT: [[RED_NEXT]] = tail call fast float @llvm.minimumnum.f32(float [[RED]], float [[L]]) diff --git a/llvm/test/Transforms/LoopVectorize/noalias-scope-decl.ll b/llvm/test/Transforms/LoopVectorize/noalias-scope-decl.ll index 7fef13af8d4a..1fe0bf2713dd 100644 --- a/llvm/test/Transforms/LoopVectorize/noalias-scope-decl.ll +++ b/llvm/test/Transforms/LoopVectorize/noalias-scope-decl.ll @@ -4,7 +4,7 @@ define void @test1(ptr noalias nocapture %a, ptr noalias nocapture readonly %b) { ; CHECK-LABEL: define void @test1( ; CHECK-SAME: ptr noalias captures(none) [[A:%.*]], ptr noalias readonly captures(none) [[B:%.*]]) { -; CHECK-NEXT: [[ENTRY:.*]]: +; CHECK-NEXT: [[ENTRY:.*:]] ; CHECK-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] ; CHECK: [[VECTOR_PH]]: ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] @@ -27,10 +27,9 @@ define void @test1(ptr noalias nocapture %a, ptr noalias nocapture readonly %b) ; CHECK: [[MIDDLE_BLOCK]]: ; CHECK-NEXT: br label %[[FOR_END:.*]] ; CHECK: [[SCALAR_PH]]: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, %[[ENTRY]] ] ; CHECK-NEXT: br label %[[FOR_BODY:.*]] ; CHECK: [[FOR_BODY]]: -; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], %[[FOR_BODY]] ] +; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 0, %[[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], %[[FOR_BODY]] ] ; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[INDVARS_IV]] ; CHECK-NEXT: [[TMP7:%.*]] = load float, ptr [[ARRAYIDX]], align 4 ; CHECK-NEXT: [[CMP1:%.*]] = fcmp ogt float [[TMP7]], 1.000000e+02 diff --git a/llvm/test/Transforms/LoopVectorize/optsize.ll b/llvm/test/Transforms/LoopVectorize/optsize.ll index cdb9e9952586..a843aeb1ee8a 100644 --- a/llvm/test/Transforms/LoopVectorize/optsize.ll +++ b/llvm/test/Transforms/LoopVectorize/optsize.ll @@ -253,7 +253,7 @@ define void @pr43371() optsize { ; ; CHECK-LABEL: define void @pr43371( ; CHECK-SAME: ) #[[ATTR0]] { -; CHECK-NEXT: [[ENTRY:.*]]: +; CHECK-NEXT: [[ENTRY:.*:]] ; CHECK-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] ; CHECK: [[VECTOR_PH]]: ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] @@ -275,12 +275,11 @@ define void @pr43371() optsize { ; CHECK: [[MIDDLE_BLOCK]]: ; CHECK-NEXT: br label %[[FOR_COND_CLEANUP28:.*]] ; CHECK: [[SCALAR_PH]]: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i16 [ 0, %[[ENTRY]] ] ; CHECK-NEXT: br label %[[FOR_BODY29:.*]] ; CHECK: [[FOR_COND_CLEANUP28]]: ; CHECK-NEXT: unreachable ; CHECK: [[FOR_BODY29]]: -; CHECK-NEXT: [[I24_0170:%.*]] = phi i16 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[INC37:%.*]], %[[FOR_BODY29]] ] +; CHECK-NEXT: [[I24_0170:%.*]] = phi i16 [ 0, %[[SCALAR_PH]] ], [ [[INC37:%.*]], %[[FOR_BODY29]] ] ; CHECK-NEXT: [[ADD33:%.*]] = add i16 undef, [[I24_0170]] ; CHECK-NEXT: [[IDXPROM34:%.*]] = zext i16 [[ADD33]] to i32 ; CHECK-NEXT: [[ARRAYIDX35:%.*]] = getelementptr [2592 x i16], ptr @cm_array, i32 0, i32 [[IDXPROM34]] @@ -291,7 +290,7 @@ define void @pr43371() optsize { ; ; PGSO-LABEL: define void @pr43371( ; PGSO-SAME: ) #[[ATTR0]] { -; PGSO-NEXT: [[ENTRY:.*]]: +; PGSO-NEXT: [[ENTRY:.*:]] ; PGSO-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] ; PGSO: [[VECTOR_PH]]: ; PGSO-NEXT: br label %[[VECTOR_BODY:.*]] @@ -313,12 +312,11 @@ define void @pr43371() optsize { ; PGSO: [[MIDDLE_BLOCK]]: ; PGSO-NEXT: br label %[[FOR_COND_CLEANUP28:.*]] ; PGSO: [[SCALAR_PH]]: -; PGSO-NEXT: [[BC_RESUME_VAL:%.*]] = phi i16 [ 0, %[[ENTRY]] ] ; PGSO-NEXT: br label %[[FOR_BODY29:.*]] ; PGSO: [[FOR_COND_CLEANUP28]]: ; PGSO-NEXT: unreachable ; PGSO: [[FOR_BODY29]]: -; PGSO-NEXT: [[I24_0170:%.*]] = phi i16 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[INC37:%.*]], %[[FOR_BODY29]] ] +; PGSO-NEXT: [[I24_0170:%.*]] = phi i16 [ 0, %[[SCALAR_PH]] ], [ [[INC37:%.*]], %[[FOR_BODY29]] ] ; PGSO-NEXT: [[ADD33:%.*]] = add i16 undef, [[I24_0170]] ; PGSO-NEXT: [[IDXPROM34:%.*]] = zext i16 [[ADD33]] to i32 ; PGSO-NEXT: [[ARRAYIDX35:%.*]] = getelementptr [2592 x i16], ptr @cm_array, i32 0, i32 [[IDXPROM34]] @@ -329,7 +327,7 @@ define void @pr43371() optsize { ; ; NPGSO-LABEL: define void @pr43371( ; NPGSO-SAME: ) #[[ATTR0]] { -; NPGSO-NEXT: [[ENTRY:.*]]: +; NPGSO-NEXT: [[ENTRY:.*:]] ; NPGSO-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] ; NPGSO: [[VECTOR_PH]]: ; NPGSO-NEXT: br label %[[VECTOR_BODY:.*]] @@ -351,12 +349,11 @@ define void @pr43371() optsize { ; NPGSO: [[MIDDLE_BLOCK]]: ; NPGSO-NEXT: br label %[[FOR_COND_CLEANUP28:.*]] ; NPGSO: [[SCALAR_PH]]: -; NPGSO-NEXT: [[BC_RESUME_VAL:%.*]] = phi i16 [ 0, %[[ENTRY]] ] ; NPGSO-NEXT: br label %[[FOR_BODY29:.*]] ; NPGSO: [[FOR_COND_CLEANUP28]]: ; NPGSO-NEXT: unreachable ; NPGSO: [[FOR_BODY29]]: -; NPGSO-NEXT: [[I24_0170:%.*]] = phi i16 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[INC37:%.*]], %[[FOR_BODY29]] ] +; NPGSO-NEXT: [[I24_0170:%.*]] = phi i16 [ 0, %[[SCALAR_PH]] ], [ [[INC37:%.*]], %[[FOR_BODY29]] ] ; NPGSO-NEXT: [[ADD33:%.*]] = add i16 undef, [[I24_0170]] ; NPGSO-NEXT: [[IDXPROM34:%.*]] = zext i16 [[ADD33]] to i32 ; NPGSO-NEXT: [[ARRAYIDX35:%.*]] = getelementptr [2592 x i16], ptr @cm_array, i32 0, i32 [[IDXPROM34]] @@ -390,7 +387,7 @@ define void @pr43371_pgso() !prof !14 { ; ; CHECK-LABEL: define void @pr43371_pgso( ; CHECK-SAME: ) !prof [[PROF14]] { -; CHECK-NEXT: [[ENTRY:.*]]: +; CHECK-NEXT: [[ENTRY:.*:]] ; CHECK-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] ; CHECK: [[VECTOR_PH]]: ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] @@ -412,12 +409,11 @@ define void @pr43371_pgso() !prof !14 { ; CHECK: [[MIDDLE_BLOCK]]: ; CHECK-NEXT: br label %[[FOR_COND_CLEANUP28:.*]] ; CHECK: [[SCALAR_PH]]: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i16 [ 0, %[[ENTRY]] ] ; CHECK-NEXT: br label %[[FOR_BODY29:.*]] ; CHECK: [[FOR_COND_CLEANUP28]]: ; CHECK-NEXT: unreachable ; CHECK: [[FOR_BODY29]]: -; CHECK-NEXT: [[I24_0170:%.*]] = phi i16 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[INC37:%.*]], %[[FOR_BODY29]] ] +; CHECK-NEXT: [[I24_0170:%.*]] = phi i16 [ 0, %[[SCALAR_PH]] ], [ [[INC37:%.*]], %[[FOR_BODY29]] ] ; CHECK-NEXT: [[ADD33:%.*]] = add i16 undef, [[I24_0170]] ; CHECK-NEXT: [[IDXPROM34:%.*]] = zext i16 [[ADD33]] to i32 ; CHECK-NEXT: [[ARRAYIDX35:%.*]] = getelementptr [2592 x i16], ptr @cm_array, i32 0, i32 [[IDXPROM34]] @@ -428,7 +424,7 @@ define void @pr43371_pgso() !prof !14 { ; ; PGSO-LABEL: define void @pr43371_pgso( ; PGSO-SAME: ) !prof [[PROF14]] { -; PGSO-NEXT: [[ENTRY:.*]]: +; PGSO-NEXT: [[ENTRY:.*:]] ; PGSO-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] ; PGSO: [[VECTOR_PH]]: ; PGSO-NEXT: br label %[[VECTOR_BODY:.*]] @@ -450,12 +446,11 @@ define void @pr43371_pgso() !prof !14 { ; PGSO: [[MIDDLE_BLOCK]]: ; PGSO-NEXT: br label %[[FOR_COND_CLEANUP28:.*]] ; PGSO: [[SCALAR_PH]]: -; PGSO-NEXT: [[BC_RESUME_VAL:%.*]] = phi i16 [ 0, %[[ENTRY]] ] ; PGSO-NEXT: br label %[[FOR_BODY29:.*]] ; PGSO: [[FOR_COND_CLEANUP28]]: ; PGSO-NEXT: unreachable ; PGSO: [[FOR_BODY29]]: -; PGSO-NEXT: [[I24_0170:%.*]] = phi i16 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[INC37:%.*]], %[[FOR_BODY29]] ] +; PGSO-NEXT: [[I24_0170:%.*]] = phi i16 [ 0, %[[SCALAR_PH]] ], [ [[INC37:%.*]], %[[FOR_BODY29]] ] ; PGSO-NEXT: [[ADD33:%.*]] = add i16 undef, [[I24_0170]] ; PGSO-NEXT: [[IDXPROM34:%.*]] = zext i16 [[ADD33]] to i32 ; PGSO-NEXT: [[ARRAYIDX35:%.*]] = getelementptr [2592 x i16], ptr @cm_array, i32 0, i32 [[IDXPROM34]] diff --git a/llvm/test/Transforms/LoopVectorize/outer_loop_scalable.ll b/llvm/test/Transforms/LoopVectorize/outer_loop_scalable.ll index 84f19a19f362..70ce7a7f33ab 100644 --- a/llvm/test/Transforms/LoopVectorize/outer_loop_scalable.ll +++ b/llvm/test/Transforms/LoopVectorize/outer_loop_scalable.ll @@ -27,8 +27,7 @@ define void @foo() { ; CHECK-NEXT: [[TMP4:%.*]] = call @llvm.stepvector.nxv4i64() ; CHECK-NEXT: [[TMP6:%.*]] = mul [[TMP4]], splat (i64 1) ; CHECK-NEXT: [[INDUCTION:%.*]] = add zeroinitializer, [[TMP6]] -; CHECK-NEXT: [[TMP9:%.*]] = mul i64 1, [[TMP3]] -; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i64 [[TMP9]], i64 0 +; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i64 [[TMP3]], i64 0 ; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: diff --git a/llvm/test/Transforms/LoopVectorize/pr36983-multiple-lcssa.ll b/llvm/test/Transforms/LoopVectorize/pr36983-multiple-lcssa.ll index 2b21eb21bb94..356a344c15bd 100644 --- a/llvm/test/Transforms/LoopVectorize/pr36983-multiple-lcssa.ll +++ b/llvm/test/Transforms/LoopVectorize/pr36983-multiple-lcssa.ll @@ -5,7 +5,7 @@ define i16 @duplicate_lcssa(i16 %val) { ; CHECK-LABEL: define i16 @duplicate_lcssa( ; CHECK-SAME: i16 [[VAL:%.*]]) { -; CHECK-NEXT: [[ENTRY:.*]]: +; CHECK-NEXT: [[ENTRY:.*:]] ; CHECK-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] ; CHECK: [[VECTOR_PH]]: ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] @@ -22,12 +22,10 @@ define i16 @duplicate_lcssa(i16 %val) { ; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT_FOR_PHI1:%.*]] = extractelement <4 x i16> [[TMP0]], i32 2 ; CHECK-NEXT: br label %[[EXIT:.*]] ; CHECK: [[SCALAR_PH]]: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i16 [ 0, %[[ENTRY]] ] -; CHECK-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i16 [ [[VAL]], %[[ENTRY]] ] ; CHECK-NEXT: br label %[[LOOP:.*]] ; CHECK: [[LOOP]]: -; CHECK-NEXT: [[IV:%.*]] = phi i16 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] -; CHECK-NEXT: [[RES:%.*]] = phi i16 [ [[SCALAR_RECUR_INIT]], %[[SCALAR_PH]] ], [ [[IV_NEXT]], %[[LOOP]] ] +; CHECK-NEXT: [[IV:%.*]] = phi i16 [ 0, %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[RES:%.*]] = phi i16 [ [[VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT]], %[[LOOP]] ] ; CHECK-NEXT: [[IV_NEXT]] = sub nsw i16 [[IV]], 1 ; CHECK-NEXT: [[EXIT_COND:%.*]] = icmp ne i16 [[IV_NEXT]], 0 ; CHECK-NEXT: br i1 [[EXIT_COND]], label %[[LOOP]], label %[[EXIT]], !llvm.loop [[LOOP3:![0-9]+]] diff --git a/llvm/test/Transforms/LoopVectorize/pr44488-predication.ll b/llvm/test/Transforms/LoopVectorize/pr44488-predication.ll index 315ea12f7551..c5dc81b28db2 100644 --- a/llvm/test/Transforms/LoopVectorize/pr44488-predication.ll +++ b/llvm/test/Transforms/LoopVectorize/pr44488-predication.ll @@ -46,10 +46,9 @@ define i16 @test_true_and_false_branch_equal() { ; CHECK: middle.block: ; CHECK-NEXT: br label [[EXIT:%.*]] ; CHECK: scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i16 [ 99, [[ENTRY:%.*]] ] ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: for.body: -; CHECK-NEXT: [[I_07:%.*]] = phi i16 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INC7:%.*]], [[FOR_LATCH:%.*]] ] +; CHECK-NEXT: [[I_07:%.*]] = phi i16 [ 99, [[SCALAR_PH]] ], [ [[INC7:%.*]], [[FOR_LATCH:%.*]] ] ; CHECK-NEXT: [[LV:%.*]] = load i16, ptr @v_38, align 1 ; CHECK-NEXT: [[CMP1:%.*]] = icmp eq i16 [[LV]], 32767 ; CHECK-NEXT: br i1 [[CMP1]], label [[COND_END:%.*]], label [[COND_END]] diff --git a/llvm/test/Transforms/LoopVectorize/pr55167-fold-tail-live-out.ll b/llvm/test/Transforms/LoopVectorize/pr55167-fold-tail-live-out.ll index a28bdb838405..9f811f834818 100644 --- a/llvm/test/Transforms/LoopVectorize/pr55167-fold-tail-live-out.ll +++ b/llvm/test/Transforms/LoopVectorize/pr55167-fold-tail-live-out.ll @@ -23,8 +23,7 @@ define i32 @test(i32 %a, i1 %c.1, i1 %c.2 ) #0 { ; CHECK-NEXT: [[TMP0:%.*]] = add <2 x i32> [[VEC_PHI]], splat (i32 10) ; CHECK-NEXT: [[TMP1:%.*]] = add <2 x i32> [[TMP0]], splat (i32 20) ; CHECK-NEXT: [[TMP3:%.*]] = add <2 x i32> [[TMP1]], [[TMP2]] -; CHECK-NEXT: [[PREDPHI:%.*]] = select <2 x i1> [[TMP5]], <2 x i32> splat (i32 9), <2 x i32> splat (i32 9) -; CHECK-NEXT: [[PREDPHI5:%.*]] = select <2 x i1> [[BROADCAST_SPLAT4]], <2 x i32> [[VEC_IND]], <2 x i32> [[PREDPHI]] +; CHECK-NEXT: [[PREDPHI5:%.*]] = select <2 x i1> [[BROADCAST_SPLAT4]], <2 x i32> [[VEC_IND]], <2 x i32> splat (i32 9) ; CHECK-NEXT: [[PREDPHI6:%.*]] = select <2 x i1> [[TMP5]], <2 x i32> [[TMP0]], <2 x i32> [[TMP3]] ; CHECK-NEXT: [[PREDPHI7]] = select <2 x i1> [[BROADCAST_SPLAT4]], <2 x i32> [[VEC_PHI]], <2 x i32> [[PREDPHI6]] ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2 @@ -36,12 +35,10 @@ define i32 @test(i32 %a, i1 %c.1, i1 %c.2 ) #0 { ; CHECK-NEXT: [[TMP9:%.*]] = extractelement <2 x i32> [[PREDPHI5]], i32 1 ; CHECK-NEXT: br label [[EXIT:%.*]] ; CHECK: scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ 6, [[BB:%.*]] ] -; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ 35902, [[BB]] ] ; CHECK-NEXT: br label [[LOOP_HEADER:%.*]] ; CHECK: loop.header: -; CHECK-NEXT: [[IV:%.*]] = phi i32 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP_LATCH:%.*]] ] -; CHECK-NEXT: [[V_2:%.*]] = phi i32 [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[P_2:%.*]], [[LOOP_LATCH]] ] +; CHECK-NEXT: [[IV:%.*]] = phi i32 [ 6, [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP_LATCH:%.*]] ] +; CHECK-NEXT: [[V_2:%.*]] = phi i32 [ 35902, [[SCALAR_PH]] ], [ [[P_2:%.*]], [[LOOP_LATCH]] ] ; CHECK-NEXT: br i1 [[C_2]], label [[LOOP_LATCH]], label [[BODY_1:%.*]] ; CHECK: body.1: ; CHECK-NEXT: [[V_2_ADD:%.*]] = add i32 [[V_2]], 10 diff --git a/llvm/test/Transforms/LoopVectorize/pr66616.ll b/llvm/test/Transforms/LoopVectorize/pr66616.ll index a39fd471080a..59c64764b901 100644 --- a/llvm/test/Transforms/LoopVectorize/pr66616.ll +++ b/llvm/test/Transforms/LoopVectorize/pr66616.ll @@ -23,10 +23,9 @@ define void @pr66616(ptr %ptr) { ; CHECK-NEXT: [[TMP3:%.*]] = extractelement <4 x i32> [[TMP1]], i32 3 ; CHECK-NEXT: br label [[PREHEADER:%.*]] ; CHECK: scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i8 [ 0, [[ENTRY:%.*]] ] ; CHECK-NEXT: br label [[LOOP_1:%.*]] ; CHECK: loop.1: -; CHECK-NEXT: [[IV_1:%.*]] = phi i8 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INC:%.*]], [[LOOP_1]] ] +; CHECK-NEXT: [[IV_1:%.*]] = phi i8 [ 0, [[SCALAR_PH]] ], [ [[INC:%.*]], [[LOOP_1]] ] ; CHECK-NEXT: [[LOAD:%.*]] = load i32, ptr [[PTR]], align 4 ; CHECK-NEXT: [[ADD3:%.*]] = add i32 [[LOAD]], 1 ; CHECK-NEXT: [[INC]] = add i8 [[IV_1]], 1 diff --git a/llvm/test/Transforms/LoopVectorize/predicate-switch.ll b/llvm/test/Transforms/LoopVectorize/predicate-switch.ll index 724aed888add..f59d4aa99918 100644 --- a/llvm/test/Transforms/LoopVectorize/predicate-switch.ll +++ b/llvm/test/Transforms/LoopVectorize/predicate-switch.ll @@ -338,21 +338,21 @@ define void @switch_to_header(ptr %start) { ; IC1-NEXT: [[ENTRY:.*]]: ; IC1-NEXT: br label %[[LOOP_HEADER:.*]] ; IC1: [[LOOP_HEADER]]: -; IC1-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[IF_THEN1:.*]] ] +; IC1-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[IF_THEN:.*]] ] ; IC1-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1 ; IC1-NEXT: switch i64 [[IV]], label %[[LOOP_LATCH:.*]] [ -; IC1-NEXT: i64 120, label %[[IF_THEN1]] +; IC1-NEXT: i64 120, label %[[IF_THEN]] ; IC1-NEXT: i64 100, label %[[LOOP_LATCH]] ; IC1-NEXT: ] -; IC1: [[IF_THEN1]]: +; IC1: [[IF_THEN]]: ; IC1-NEXT: br label %[[LOOP_HEADER]] -; IC1: [[IF_THEN:.*:]] +; IC1: [[IF_THEN1:.*:]] ; IC1-NEXT: [[GEP:%.*]] = getelementptr inbounds i64, ptr [[START]], i64 poison ; IC1-NEXT: store i64 42, ptr [[GEP]], align 1 ; IC1-NEXT: unreachable ; IC1: [[LOOP_LATCH]]: ; IC1-NEXT: [[CMP:%.*]] = icmp eq i64 [[IV_NEXT]], 100 -; IC1-NEXT: br i1 [[CMP]], label %[[EXIT:.*]], label %[[IF_THEN1]] +; IC1-NEXT: br i1 [[CMP]], label %[[EXIT:.*]], label %[[IF_THEN]] ; IC1: [[EXIT]]: ; IC1-NEXT: ret void ; @@ -361,21 +361,21 @@ define void @switch_to_header(ptr %start) { ; IC2-NEXT: [[ENTRY:.*]]: ; IC2-NEXT: br label %[[LOOP_HEADER:.*]] ; IC2: [[LOOP_HEADER]]: -; IC2-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[IF_THEN1:.*]] ] +; IC2-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[IF_THEN:.*]] ] ; IC2-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1 ; IC2-NEXT: switch i64 [[IV]], label %[[LOOP_LATCH:.*]] [ -; IC2-NEXT: i64 120, label %[[IF_THEN1]] +; IC2-NEXT: i64 120, label %[[IF_THEN]] ; IC2-NEXT: i64 100, label %[[LOOP_LATCH]] ; IC2-NEXT: ] -; IC2: [[IF_THEN1]]: +; IC2: [[IF_THEN]]: ; IC2-NEXT: br label %[[LOOP_HEADER]] -; IC2: [[IF_THEN:.*:]] +; IC2: [[IF_THEN1:.*:]] ; IC2-NEXT: [[GEP:%.*]] = getelementptr inbounds i64, ptr [[START]], i64 poison ; IC2-NEXT: store i64 42, ptr [[GEP]], align 1 ; IC2-NEXT: unreachable ; IC2: [[LOOP_LATCH]]: ; IC2-NEXT: [[CMP:%.*]] = icmp eq i64 [[IV_NEXT]], 100 -; IC2-NEXT: br i1 [[CMP]], label %[[EXIT:.*]], label %[[IF_THEN1]] +; IC2-NEXT: br i1 [[CMP]], label %[[EXIT:.*]], label %[[IF_THEN]] ; IC2: [[EXIT]]: ; IC2-NEXT: ret void ; @@ -406,7 +406,7 @@ exit: define void @switch_all_to_default(ptr %start) { ; IC1-LABEL: define void @switch_all_to_default( ; IC1-SAME: ptr [[START:%.*]]) { -; IC1-NEXT: [[ENTRY:.*]]: +; IC1-NEXT: [[ENTRY:.*:]] ; IC1-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] ; IC1: [[VECTOR_PH]]: ; IC1-NEXT: br label %[[VECTOR_BODY:.*]] @@ -420,10 +420,9 @@ define void @switch_all_to_default(ptr %start) { ; IC1: [[MIDDLE_BLOCK]]: ; IC1-NEXT: br label %[[EXIT:.*]] ; IC1: [[SCALAR_PH]]: -; IC1-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, %[[ENTRY]] ] ; IC1-NEXT: br label %[[LOOP_HEADER:.*]] ; IC1: [[LOOP_HEADER]]: -; IC1-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ] +; IC1-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ] ; IC1-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1 ; IC1-NEXT: switch i64 [[IV]], label %[[LOOP_LATCH]] [ ; IC1-NEXT: i64 120, label %[[LOOP_LATCH]] @@ -439,7 +438,7 @@ define void @switch_all_to_default(ptr %start) { ; ; IC2-LABEL: define void @switch_all_to_default( ; IC2-SAME: ptr [[START:%.*]]) { -; IC2-NEXT: [[ENTRY:.*]]: +; IC2-NEXT: [[ENTRY:.*:]] ; IC2-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] ; IC2: [[VECTOR_PH]]: ; IC2-NEXT: br label %[[VECTOR_BODY:.*]] @@ -455,10 +454,9 @@ define void @switch_all_to_default(ptr %start) { ; IC2: [[MIDDLE_BLOCK]]: ; IC2-NEXT: br label %[[EXIT:.*]] ; IC2: [[SCALAR_PH]]: -; IC2-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, %[[ENTRY]] ] ; IC2-NEXT: br label %[[LOOP_HEADER:.*]] ; IC2: [[LOOP_HEADER]]: -; IC2-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ] +; IC2-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ] ; IC2-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1 ; IC2-NEXT: switch i64 [[IV]], label %[[LOOP_LATCH]] [ ; IC2-NEXT: i64 120, label %[[LOOP_LATCH]] diff --git a/llvm/test/Transforms/LoopVectorize/preserve-dbg-loc-and-loop-metadata.ll b/llvm/test/Transforms/LoopVectorize/preserve-dbg-loc-and-loop-metadata.ll index f11b2f25e34b..372c703f4cb2 100644 --- a/llvm/test/Transforms/LoopVectorize/preserve-dbg-loc-and-loop-metadata.ll +++ b/llvm/test/Transforms/LoopVectorize/preserve-dbg-loc-and-loop-metadata.ll @@ -12,7 +12,7 @@ target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" define void @_Z3fooPf(ptr %a) { ; CHECK-LABEL: define void @_Z3fooPf( ; CHECK-SAME: ptr [[A:%.*]]) { -; CHECK-NEXT: [[ENTRY:.*]]: +; CHECK-NEXT: [[ENTRY:.*:]] ; CHECK-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] ; CHECK: [[VECTOR_PH]]: ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] @@ -28,10 +28,9 @@ define void @_Z3fooPf(ptr %a) { ; CHECK: [[MIDDLE_BLOCK]]: ; CHECK-NEXT: br label %[[FOR_END:.*]] ; CHECK: [[SCALAR_PH]]: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, %[[ENTRY]] ] ; CHECK-NEXT: br label %[[FOR_BODY:.*]] ; CHECK: [[FOR_BODY]]: -; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], %[[FOR_BODY]] ] +; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 0, %[[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], %[[FOR_BODY]] ] ; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDVARS_IV]] ; CHECK-NEXT: [[P:%.*]] = load float, ptr [[ARRAYIDX]], align 4 ; CHECK-NEXT: [[MUL:%.*]] = fmul float [[P]], 2.000000e+00 @@ -44,7 +43,7 @@ define void @_Z3fooPf(ptr %a) { ; ; DEBUGLOC-LABEL: define void @_Z3fooPf( ; DEBUGLOC-SAME: ptr [[A:%.*]]) !dbg [[DBG5:![0-9]+]] { -; DEBUGLOC-NEXT: [[ENTRY:.*]]: +; DEBUGLOC-NEXT: [[ENTRY:.*:]] ; DEBUGLOC-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]], !dbg [[DBG18:![0-9]+]] ; DEBUGLOC: [[VECTOR_PH]]: ; DEBUGLOC-NEXT: br label %[[VECTOR_BODY:.*]], !dbg [[DBG18]] @@ -60,10 +59,9 @@ define void @_Z3fooPf(ptr %a) { ; DEBUGLOC: [[MIDDLE_BLOCK]]: ; DEBUGLOC-NEXT: br label %[[FOR_END:.*]], !dbg [[DBG24]] ; DEBUGLOC: [[SCALAR_PH]]: -; DEBUGLOC-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, %[[ENTRY]] ], !dbg [[DBG19]] ; DEBUGLOC-NEXT: br label %[[FOR_BODY:.*]], !dbg [[DBG18]] ; DEBUGLOC: [[FOR_BODY]]: -; DEBUGLOC-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], %[[FOR_BODY]] ], !dbg [[DBG19]] +; DEBUGLOC-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 0, %[[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], %[[FOR_BODY]] ], !dbg [[DBG19]] ; DEBUGLOC-NEXT: #dbg_value(i64 [[INDVARS_IV]], [[META9:![0-9]+]], !DIExpression(), [[DBG19]]) ; DEBUGLOC-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDVARS_IV]], !dbg [[DBG20]] ; DEBUGLOC-NEXT: #dbg_value(ptr [[ARRAYIDX]], [[META11:![0-9]+]], !DIExpression(), [[DBG20]]) diff --git a/llvm/test/Transforms/LoopVectorize/remarks-reduction-inloop.ll b/llvm/test/Transforms/LoopVectorize/remarks-reduction-inloop.ll index 5c52b1ab2778..07643a1c6e83 100644 --- a/llvm/test/Transforms/LoopVectorize/remarks-reduction-inloop.ll +++ b/llvm/test/Transforms/LoopVectorize/remarks-reduction-inloop.ll @@ -8,7 +8,7 @@ target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f3 define i32 @reduction_sum(ptr noalias nocapture %A, ptr noalias nocapture %B) { ; CHECK-LABEL: define i32 @reduction_sum( ; CHECK-SAME: ptr noalias captures(none) [[A:%.*]], ptr noalias captures(none) [[B:%.*]]) { -; CHECK-NEXT: [[ENTRY:.*]]: +; CHECK-NEXT: [[ENTRY:.*:]] ; CHECK-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] ; CHECK: [[VECTOR_PH]]: ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] @@ -25,12 +25,10 @@ define i32 @reduction_sum(ptr noalias nocapture %A, ptr noalias nocapture %B) { ; CHECK: [[MIDDLE_BLOCK]]: ; CHECK-NEXT: br label %[[EXIT:.*]] ; CHECK: [[SCALAR_PH]]: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, %[[ENTRY]] ] -; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ 0, %[[ENTRY]] ] ; CHECK-NEXT: br label %[[BODY:.*]] ; CHECK: [[BODY]]: -; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], %[[BODY]] ], [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ] -; CHECK-NEXT: [[SUM_TMP:%.*]] = phi i32 [ [[SUM:%.*]], %[[BODY]] ], [ [[BC_MERGE_RDX]], %[[SCALAR_PH]] ] +; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], %[[BODY]] ], [ 0, %[[SCALAR_PH]] ] +; CHECK-NEXT: [[SUM_TMP:%.*]] = phi i32 [ [[SUM:%.*]], %[[BODY]] ], [ 0, %[[SCALAR_PH]] ] ; CHECK-NEXT: [[GEP0:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[INDVARS_IV]] ; CHECK-NEXT: [[LOAD0:%.*]] = load i32, ptr [[GEP0]], align 4 ; CHECK-NEXT: [[SUM]] = add i32 [[SUM_TMP]], [[LOAD0]] diff --git a/llvm/test/Transforms/LoopVectorize/reverse_induction.ll b/llvm/test/Transforms/LoopVectorize/reverse_induction.ll index cea16c9eb751..4895f6a83d56 100644 --- a/llvm/test/Transforms/LoopVectorize/reverse_induction.ll +++ b/llvm/test/Transforms/LoopVectorize/reverse_induction.ll @@ -9,7 +9,7 @@ target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f3 define i32 @reverse_induction_i64(i64 %startval, ptr %ptr) { ; CHECK-LABEL: define i32 @reverse_induction_i64( ; CHECK-SAME: i64 [[STARTVAL:%.*]], ptr [[PTR:%.*]]) { -; CHECK-NEXT: [[ENTRY:.*]]: +; CHECK-NEXT: [[ENTRY:.*:]] ; CHECK-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] ; CHECK: [[VECTOR_PH]]: ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] @@ -38,14 +38,11 @@ define i32 @reverse_induction_i64(i64 %startval, ptr %ptr) { ; CHECK-NEXT: [[TMP13:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[BIN_RDX]]) ; CHECK-NEXT: br label %[[LOOPEND:.*]] ; CHECK: [[SCALAR_PH]]: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[STARTVAL]], %[[ENTRY]] ] -; CHECK-NEXT: [[BC_RESUME_VAL4:%.*]] = phi i32 [ 0, %[[ENTRY]] ] -; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ 0, %[[ENTRY]] ] ; CHECK-NEXT: br label %[[FOR_BODY:.*]] ; CHECK: [[FOR_BODY]]: -; CHECK-NEXT: [[ADD_I7:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[ADD_I:%.*]], %[[FOR_BODY]] ] -; CHECK-NEXT: [[I_06:%.*]] = phi i32 [ [[BC_RESUME_VAL4]], %[[SCALAR_PH]] ], [ [[INC4:%.*]], %[[FOR_BODY]] ] -; CHECK-NEXT: [[REDUX5:%.*]] = phi i32 [ [[BC_MERGE_RDX]], %[[SCALAR_PH]] ], [ [[INC_REDUX:%.*]], %[[FOR_BODY]] ] +; CHECK-NEXT: [[ADD_I7:%.*]] = phi i64 [ [[STARTVAL]], %[[SCALAR_PH]] ], [ [[ADD_I:%.*]], %[[FOR_BODY]] ] +; CHECK-NEXT: [[I_06:%.*]] = phi i32 [ 0, %[[SCALAR_PH]] ], [ [[INC4:%.*]], %[[FOR_BODY]] ] +; CHECK-NEXT: [[REDUX5:%.*]] = phi i32 [ 0, %[[SCALAR_PH]] ], [ [[INC_REDUX:%.*]], %[[FOR_BODY]] ] ; CHECK-NEXT: [[ADD_I]] = add i64 [[ADD_I7]], -1 ; CHECK-NEXT: [[KIND__I:%.*]] = getelementptr inbounds i32, ptr [[PTR]], i64 [[ADD_I]] ; CHECK-NEXT: [[TMP_I1:%.*]] = load i32, ptr [[KIND__I]], align 4 @@ -80,7 +77,7 @@ loopend: define i32 @reverse_induction_i128(i128 %startval, ptr %ptr) { ; CHECK-LABEL: define i32 @reverse_induction_i128( ; CHECK-SAME: i128 [[STARTVAL:%.*]], ptr [[PTR:%.*]]) { -; CHECK-NEXT: [[ENTRY:.*]]: +; CHECK-NEXT: [[ENTRY:.*:]] ; CHECK-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] ; CHECK: [[VECTOR_PH]]: ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] @@ -109,14 +106,11 @@ define i32 @reverse_induction_i128(i128 %startval, ptr %ptr) { ; CHECK-NEXT: [[TMP13:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[BIN_RDX]]) ; CHECK-NEXT: br label %[[LOOPEND:.*]] ; CHECK: [[SCALAR_PH]]: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i128 [ [[STARTVAL]], %[[ENTRY]] ] -; CHECK-NEXT: [[BC_RESUME_VAL4:%.*]] = phi i32 [ 0, %[[ENTRY]] ] -; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ 0, %[[ENTRY]] ] ; CHECK-NEXT: br label %[[FOR_BODY:.*]] ; CHECK: [[FOR_BODY]]: -; CHECK-NEXT: [[ADD_I7:%.*]] = phi i128 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[ADD_I:%.*]], %[[FOR_BODY]] ] -; CHECK-NEXT: [[I_06:%.*]] = phi i32 [ [[BC_RESUME_VAL4]], %[[SCALAR_PH]] ], [ [[INC4:%.*]], %[[FOR_BODY]] ] -; CHECK-NEXT: [[REDUX5:%.*]] = phi i32 [ [[BC_MERGE_RDX]], %[[SCALAR_PH]] ], [ [[INC_REDUX:%.*]], %[[FOR_BODY]] ] +; CHECK-NEXT: [[ADD_I7:%.*]] = phi i128 [ [[STARTVAL]], %[[SCALAR_PH]] ], [ [[ADD_I:%.*]], %[[FOR_BODY]] ] +; CHECK-NEXT: [[I_06:%.*]] = phi i32 [ 0, %[[SCALAR_PH]] ], [ [[INC4:%.*]], %[[FOR_BODY]] ] +; CHECK-NEXT: [[REDUX5:%.*]] = phi i32 [ 0, %[[SCALAR_PH]] ], [ [[INC_REDUX:%.*]], %[[FOR_BODY]] ] ; CHECK-NEXT: [[ADD_I]] = add i128 [[ADD_I7]], -1 ; CHECK-NEXT: [[KIND__I:%.*]] = getelementptr inbounds i32, ptr [[PTR]], i128 [[ADD_I]] ; CHECK-NEXT: [[TMP_I1:%.*]] = load i32, ptr [[KIND__I]], align 4 @@ -248,7 +242,7 @@ loopend: define void @reverse_forward_induction_i64_i8() { ; CHECK-LABEL: define void @reverse_forward_induction_i64_i8() { -; CHECK-NEXT: [[ENTRY:.*]]: +; CHECK-NEXT: [[ENTRY:.*:]] ; CHECK-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] ; CHECK: [[VECTOR_PH]]: ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] @@ -277,12 +271,10 @@ define void @reverse_forward_induction_i64_i8() { ; CHECK: [[MIDDLE_BLOCK]]: ; CHECK-NEXT: br label %[[WHILE_END:.*]] ; CHECK: [[SCALAR_PH]]: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 1023, %[[ENTRY]] ] -; CHECK-NEXT: [[BC_RESUME_VAL2:%.*]] = phi i8 [ 0, %[[ENTRY]] ] ; CHECK-NEXT: br label %[[WHILE_BODY:.*]] ; CHECK: [[WHILE_BODY]]: -; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], %[[WHILE_BODY]] ] -; CHECK-NEXT: [[FORWARD_INDUCTION_05:%.*]] = phi i8 [ [[BC_RESUME_VAL2]], %[[SCALAR_PH]] ], [ [[INC:%.*]], %[[WHILE_BODY]] ] +; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 1023, %[[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], %[[WHILE_BODY]] ] +; CHECK-NEXT: [[FORWARD_INDUCTION_05:%.*]] = phi i8 [ 0, %[[SCALAR_PH]] ], [ [[INC:%.*]], %[[WHILE_BODY]] ] ; CHECK-NEXT: [[INC]] = add i8 [[FORWARD_INDUCTION_05]], 1 ; CHECK-NEXT: [[CONV:%.*]] = zext i8 [[INC]] to i32 ; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [1024 x i32], ptr @a, i64 0, i64 [[INDVARS_IV]] @@ -316,7 +308,7 @@ while.end: define void @reverse_forward_induction_i64_i8_signed() { ; CHECK-LABEL: define void @reverse_forward_induction_i64_i8_signed() { -; CHECK-NEXT: [[ENTRY:.*]]: +; CHECK-NEXT: [[ENTRY:.*:]] ; CHECK-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] ; CHECK: [[VECTOR_PH]]: ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] @@ -345,12 +337,10 @@ define void @reverse_forward_induction_i64_i8_signed() { ; CHECK: [[MIDDLE_BLOCK]]: ; CHECK-NEXT: br label %[[WHILE_END:.*]] ; CHECK: [[SCALAR_PH]]: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 1023, %[[ENTRY]] ] -; CHECK-NEXT: [[BC_RESUME_VAL2:%.*]] = phi i8 [ -127, %[[ENTRY]] ] ; CHECK-NEXT: br label %[[WHILE_BODY:.*]] ; CHECK: [[WHILE_BODY]]: -; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], %[[WHILE_BODY]] ] -; CHECK-NEXT: [[FORWARD_INDUCTION_05:%.*]] = phi i8 [ [[BC_RESUME_VAL2]], %[[SCALAR_PH]] ], [ [[INC:%.*]], %[[WHILE_BODY]] ] +; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 1023, %[[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], %[[WHILE_BODY]] ] +; CHECK-NEXT: [[FORWARD_INDUCTION_05:%.*]] = phi i8 [ -127, %[[SCALAR_PH]] ], [ [[INC:%.*]], %[[WHILE_BODY]] ] ; CHECK-NEXT: [[INC]] = add i8 [[FORWARD_INDUCTION_05]], 1 ; CHECK-NEXT: [[CONV:%.*]] = sext i8 [[INC]] to i32 ; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [1024 x i32], ptr @a, i64 0, i64 [[INDVARS_IV]] diff --git a/llvm/test/Transforms/LoopVectorize/scalable-first-order-recurrence.ll b/llvm/test/Transforms/LoopVectorize/scalable-first-order-recurrence.ll index 37ed28993cf6..60da3368b664 100644 --- a/llvm/test/Transforms/LoopVectorize/scalable-first-order-recurrence.ll +++ b/llvm/test/Transforms/LoopVectorize/scalable-first-order-recurrence.ll @@ -635,8 +635,7 @@ define i32 @extract_second_last_iteration(ptr %cval, i32 %x) { ; CHECK-VF4UF1-NEXT: [[TMP6:%.*]] = call @llvm.stepvector.nxv4i32() ; CHECK-VF4UF1-NEXT: [[TMP7:%.*]] = mul [[TMP6]], splat (i32 1) ; CHECK-VF4UF1-NEXT: [[INDUCTION:%.*]] = add zeroinitializer, [[TMP7]] -; CHECK-VF4UF1-NEXT: [[TMP8:%.*]] = mul i32 1, [[TMP3]] -; CHECK-VF4UF1-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i32 [[TMP8]], i64 0 +; CHECK-VF4UF1-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i32 [[TMP3]], i64 0 ; CHECK-VF4UF1-NEXT: [[DOTSPLAT:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer ; CHECK-VF4UF1-NEXT: br label %[[VECTOR_BODY:.*]] ; CHECK-VF4UF1: [[VECTOR_BODY]]: diff --git a/llvm/test/Transforms/LoopVectorize/scev-exit-phi-invalidation.ll b/llvm/test/Transforms/LoopVectorize/scev-exit-phi-invalidation.ll index 70772dcd0cdf..89f15ea5e918 100644 --- a/llvm/test/Transforms/LoopVectorize/scev-exit-phi-invalidation.ll +++ b/llvm/test/Transforms/LoopVectorize/scev-exit-phi-invalidation.ll @@ -21,10 +21,9 @@ define void @test_pr63368(i1 %c, ptr %A) { ; CHECK: middle.block: ; CHECK-NEXT: br label [[EXIT_1:%.*]] ; CHECK: scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ] ; CHECK-NEXT: br label [[LOOP_1_HEADER:%.*]] ; CHECK: loop.1.header: -; CHECK-NEXT: [[IV_1:%.*]] = phi i32 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_1_NEXT:%.*]], [[LOOP_1_LATCH:%.*]] ] +; CHECK-NEXT: [[IV_1:%.*]] = phi i32 [ 0, [[SCALAR_PH]] ], [ [[IV_1_NEXT:%.*]], [[LOOP_1_LATCH:%.*]] ] ; CHECK-NEXT: [[L:%.*]] = load i32, ptr [[A]], align 4 ; CHECK-NEXT: br i1 [[C]], label [[LOOP_1_LATCH]], label [[LOOP_1_LATCH]] ; CHECK: loop.1.latch: @@ -65,10 +64,10 @@ define void @test_pr63368(i1 %c, ptr %A) { ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[TMP2]], [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label [[EXIT_2:%.*]], label [[SCALAR_PH2]] ; CHECK: scalar.ph2: -; CHECK-NEXT: [[BC_RESUME_VAL8:%.*]] = phi i8 [ [[TMP9]], [[MIDDLE_BLOCK7]] ], [ 0, [[EXIT_1]] ], [ 0, [[VECTOR_SCEVCHECK]] ] +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i8 [ [[TMP9]], [[MIDDLE_BLOCK7]] ], [ 0, [[EXIT_1]] ], [ 0, [[VECTOR_SCEVCHECK]] ] ; CHECK-NEXT: br label [[LOOP_2:%.*]] ; CHECK: loop.2: -; CHECK-NEXT: [[IV_2:%.*]] = phi i8 [ [[BC_RESUME_VAL8]], [[SCALAR_PH2]] ], [ [[IV_2_NEXT:%.*]], [[LOOP_2]] ] +; CHECK-NEXT: [[IV_2:%.*]] = phi i8 [ [[BC_RESUME_VAL]], [[SCALAR_PH2]] ], [ [[IV_2_NEXT:%.*]], [[LOOP_2]] ] ; CHECK-NEXT: [[IV_2_NEXT]] = add i8 [[IV_2]], 1 ; CHECK-NEXT: [[GEP_A:%.*]] = getelementptr i8, ptr [[A]], i8 [[IV_2_NEXT]] ; CHECK-NEXT: store i8 0, ptr [[GEP_A]], align 1 diff --git a/llvm/test/Transforms/LoopVectorize/select-neg-cond.ll b/llvm/test/Transforms/LoopVectorize/select-neg-cond.ll index 285c6742a7f5..4b080ddaa119 100644 --- a/llvm/test/Transforms/LoopVectorize/select-neg-cond.ll +++ b/llvm/test/Transforms/LoopVectorize/select-neg-cond.ll @@ -4,7 +4,7 @@ define void @neg_cond(ptr noalias %p, ptr noalias %q) { ; CHECK-LABEL: define void @neg_cond( ; CHECK-SAME: ptr noalias [[P:%.*]], ptr noalias [[Q:%.*]]) { -; CHECK-NEXT: [[ENTRY:.*]]: +; CHECK-NEXT: [[ENTRY:.*:]] ; CHECK-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] ; CHECK: [[VECTOR_PH]]: ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] @@ -21,10 +21,9 @@ define void @neg_cond(ptr noalias %p, ptr noalias %q) { ; CHECK: [[MIDDLE_BLOCK]]: ; CHECK-NEXT: br label %[[EXIT:.*]] ; CHECK: [[SCALAR_PH]]: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ 0, %[[ENTRY]] ] ; CHECK-NEXT: br label %[[LOOP:.*]] ; CHECK: [[LOOP]]: -; CHECK-NEXT: [[IV:%.*]] = phi i32 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[IV:%.*]] = phi i32 [ 0, %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] ; CHECK-NEXT: [[P_GEP:%.*]] = getelementptr i32, ptr [[P]], i32 [[IV]] ; CHECK-NEXT: [[X:%.*]] = load i32, ptr [[P_GEP]], align 4 ; CHECK-NEXT: [[Q_GEP:%.*]] = getelementptr i32, ptr [[Q]], i32 [[IV]] diff --git a/llvm/test/Transforms/LoopVectorize/select-reduction-start-value-may-be-undef-or-poison.ll b/llvm/test/Transforms/LoopVectorize/select-reduction-start-value-may-be-undef-or-poison.ll index 37d75ffe2c2f..30585d07a8e6 100644 --- a/llvm/test/Transforms/LoopVectorize/select-reduction-start-value-may-be-undef-or-poison.ll +++ b/llvm/test/Transforms/LoopVectorize/select-reduction-start-value-may-be-undef-or-poison.ll @@ -26,12 +26,10 @@ define i64 @pr62565_incoming_value_known_undef(i64 %a, ptr %src) { ; CHECK-NEXT: [[RDX_SELECT:%.*]] = select i1 [[TMP5]], i64 [[A]], i64 undef ; CHECK-NEXT: br label [[EXIT:%.*]] ; CHECK: scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ 1, [[ENTRY:%.*]] ] -; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i64 [ undef, [[ENTRY]] ] ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: -; CHECK-NEXT: [[IV:%.*]] = phi i32 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[ADD:%.*]], [[LOOP]] ] -; CHECK-NEXT: [[RED:%.*]] = phi i64 [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[SELECT:%.*]], [[LOOP]] ] +; CHECK-NEXT: [[IV:%.*]] = phi i32 [ 1, [[SCALAR_PH]] ], [ [[ADD:%.*]], [[LOOP]] ] +; CHECK-NEXT: [[RED:%.*]] = phi i64 [ undef, [[SCALAR_PH]] ], [ [[SELECT:%.*]], [[LOOP]] ] ; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i32 [[IV]] ; CHECK-NEXT: [[L:%.*]] = load i32, ptr [[GEP]], align 4 ; CHECK-NEXT: [[C:%.*]] = icmp eq i32 [[L]], 1 @@ -86,12 +84,10 @@ define i64 @pr62565_incoming_value_known_poison(i64 %a, ptr %src) { ; CHECK-NEXT: [[RDX_SELECT:%.*]] = select i1 [[TMP5]], i64 [[A]], i64 poison ; CHECK-NEXT: br label [[EXIT:%.*]] ; CHECK: scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ 1, [[ENTRY:%.*]] ] -; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i64 [ poison, [[ENTRY]] ] ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: -; CHECK-NEXT: [[IV:%.*]] = phi i32 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[ADD:%.*]], [[LOOP]] ] -; CHECK-NEXT: [[RED:%.*]] = phi i64 [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[SELECT:%.*]], [[LOOP]] ] +; CHECK-NEXT: [[IV:%.*]] = phi i32 [ 1, [[SCALAR_PH]] ], [ [[ADD:%.*]], [[LOOP]] ] +; CHECK-NEXT: [[RED:%.*]] = phi i64 [ poison, [[SCALAR_PH]] ], [ [[SELECT:%.*]], [[LOOP]] ] ; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i32 [[IV]] ; CHECK-NEXT: [[L:%.*]] = load i32, ptr [[GEP]], align 4 ; CHECK-NEXT: [[C:%.*]] = icmp eq i32 [[L]], 1 @@ -146,12 +142,10 @@ define i64 @pr62565_incoming_value_may_be_poison(i64 %a, ptr %src, i64 %start) { ; CHECK-NEXT: [[RDX_SELECT:%.*]] = select i1 [[TMP5]], i64 [[A]], i64 [[START]] ; CHECK-NEXT: br label [[EXIT:%.*]] ; CHECK: scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ 1, [[ENTRY:%.*]] ] -; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i64 [ [[START]], [[ENTRY]] ] ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: -; CHECK-NEXT: [[IV:%.*]] = phi i32 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[ADD:%.*]], [[LOOP]] ] -; CHECK-NEXT: [[RED:%.*]] = phi i64 [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[SELECT:%.*]], [[LOOP]] ] +; CHECK-NEXT: [[IV:%.*]] = phi i32 [ 1, [[SCALAR_PH]] ], [ [[ADD:%.*]], [[LOOP]] ] +; CHECK-NEXT: [[RED:%.*]] = phi i64 [ [[START]], [[SCALAR_PH]] ], [ [[SELECT:%.*]], [[LOOP]] ] ; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i32 [[IV]] ; CHECK-NEXT: [[L:%.*]] = load i32, ptr [[GEP]], align 4 ; CHECK-NEXT: [[C:%.*]] = icmp eq i32 [[L]], 1 diff --git a/llvm/test/Transforms/LoopVectorize/single-early-exit-deref-assumptions.ll b/llvm/test/Transforms/LoopVectorize/single-early-exit-deref-assumptions.ll index f329a18f3eae..df40ba0d4ac3 100644 --- a/llvm/test/Transforms/LoopVectorize/single-early-exit-deref-assumptions.ll +++ b/llvm/test/Transforms/LoopVectorize/single-early-exit-deref-assumptions.ll @@ -31,10 +31,9 @@ define i64 @early_exit_alignment_and_deref_known_via_assumption_with_constant_si ; CHECK-NEXT: [[TMP9:%.*]] = add i64 [[INDEX1]], [[TMP8]] ; CHECK-NEXT: br label [[LOOP_END]] ; CHECK: scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ] ; CHECK-NEXT: br label [[LOOP1:%.*]] ; CHECK: loop: -; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ [[INDEX_NEXT:%.*]], [[LOOP_INC:%.*]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ [[INDEX_NEXT:%.*]], [[LOOP_INC:%.*]] ], [ 0, [[SCALAR_PH]] ] ; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 [[INDEX]] ; CHECK-NEXT: [[LD1:%.*]] = load i8, ptr [[ARRAYIDX]], align 1 ; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i8, ptr [[P2]], i64 [[INDEX]] diff --git a/llvm/test/Transforms/LoopVectorize/single-early-exit-interleave-hint.ll b/llvm/test/Transforms/LoopVectorize/single-early-exit-interleave-hint.ll index c648bedabc05..5b9e75a9f7eb 100644 --- a/llvm/test/Transforms/LoopVectorize/single-early-exit-interleave-hint.ll +++ b/llvm/test/Transforms/LoopVectorize/single-early-exit-interleave-hint.ll @@ -5,7 +5,7 @@ declare void @init_mem(ptr, i64); define i64 @multi_exiting_to_different_exits_live_in_exit_values() { ; VF4IC4-LABEL: define i64 @multi_exiting_to_different_exits_live_in_exit_values() { -; VF4IC4-NEXT: [[ENTRY:.*]]: +; VF4IC4-NEXT: [[ENTRY:.*:]] ; VF4IC4-NEXT: [[SRC:%.*]] = alloca [128 x i32], align 4 ; VF4IC4-NEXT: call void @init_mem(ptr [[SRC]]) ; VF4IC4-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] @@ -40,10 +40,9 @@ define i64 @multi_exiting_to_different_exits_live_in_exit_values() { ; VF4IC4: [[VECTOR_EARLY_EXIT]]: ; VF4IC4-NEXT: br label %[[E1:.*]] ; VF4IC4: [[SCALAR_PH]]: -; VF4IC4-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, %[[ENTRY]] ] ; VF4IC4-NEXT: br label %[[LOOP_HEADER:.*]] ; VF4IC4: [[LOOP_HEADER]]: -; VF4IC4-NEXT: [[IV:%.*]] = phi i64 [ [[INC:%.*]], %[[LOOP_LATCH:.*]] ], [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ] +; VF4IC4-NEXT: [[IV:%.*]] = phi i64 [ [[INC:%.*]], %[[LOOP_LATCH:.*]] ], [ 0, %[[SCALAR_PH]] ] ; VF4IC4-NEXT: [[GEP_SRC:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i64 [[IV]] ; VF4IC4-NEXT: [[L:%.*]] = load i32, ptr [[GEP_SRC]], align 4 ; VF4IC4-NEXT: [[C_1:%.*]] = icmp eq i32 [[L]], 10 diff --git a/llvm/test/Transforms/LoopVectorize/single-early-exit-interleave.ll b/llvm/test/Transforms/LoopVectorize/single-early-exit-interleave.ll index 3f51c72a6d3d..678b171832c3 100644 --- a/llvm/test/Transforms/LoopVectorize/single-early-exit-interleave.ll +++ b/llvm/test/Transforms/LoopVectorize/single-early-exit-interleave.ll @@ -40,10 +40,9 @@ define i64 @multi_exiting_to_different_exits_live_in_exit_values() { ; VF4IC4: vector.early.exit: ; VF4IC4-NEXT: br label [[E1:%.*]] ; VF4IC4: scalar.ph: -; VF4IC4-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ] ; VF4IC4-NEXT: br label [[LOOP_HEADER:%.*]] ; VF4IC4: loop.header: -; VF4IC4-NEXT: [[IV:%.*]] = phi i64 [ [[INC:%.*]], [[LOOP_LATCH:%.*]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] +; VF4IC4-NEXT: [[IV:%.*]] = phi i64 [ [[INC:%.*]], [[LOOP_LATCH:%.*]] ], [ 0, [[SCALAR_PH]] ] ; VF4IC4-NEXT: [[GEP_SRC:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i64 [[IV]] ; VF4IC4-NEXT: [[L:%.*]] = load i32, ptr [[GEP_SRC]], align 4 ; VF4IC4-NEXT: [[C_1:%.*]] = icmp eq i32 [[L]], 10 @@ -149,10 +148,9 @@ define i64 @same_exit_block_pre_inc_use1() { ; VF4IC4-NEXT: [[TMP10:%.*]] = add i64 3, [[TMP9]] ; VF4IC4-NEXT: br label [[LOOP_END]] ; VF4IC4: scalar.ph: -; VF4IC4-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 3, [[ENTRY:%.*]] ] ; VF4IC4-NEXT: br label [[LOOP:%.*]] ; VF4IC4: loop: -; VF4IC4-NEXT: [[IV:%.*]] = phi i64 [ [[IV_NEXT:%.*]], [[LOOP_INC:%.*]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] +; VF4IC4-NEXT: [[IV:%.*]] = phi i64 [ [[IV_NEXT:%.*]], [[LOOP_INC:%.*]] ], [ 3, [[SCALAR_PH]] ] ; VF4IC4-NEXT: [[GEP_P1:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 [[IV]] ; VF4IC4-NEXT: [[LD1:%.*]] = load i8, ptr [[GEP_P1]], align 1 ; VF4IC4-NEXT: [[GEP_P2:%.*]] = getelementptr inbounds i8, ptr [[P2]], i64 [[IV]] @@ -247,10 +245,9 @@ define ptr @same_exit_block_pre_inc_use1_ivptr() { ; VF4IC4-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[P1]], i64 [[TMP7]] ; VF4IC4-NEXT: br label [[LOOP_END]] ; VF4IC4: scalar.ph: -; VF4IC4-NEXT: [[BC_RESUME_VAL:%.*]] = phi ptr [ [[P1]], [[ENTRY:%.*]] ] ; VF4IC4-NEXT: br label [[LOOP:%.*]] ; VF4IC4: loop: -; VF4IC4-NEXT: [[PTR:%.*]] = phi ptr [ [[PTR_NEXT:%.*]], [[LOOP_INC:%.*]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] +; VF4IC4-NEXT: [[PTR:%.*]] = phi ptr [ [[PTR_NEXT:%.*]], [[LOOP_INC:%.*]] ], [ [[P1]], [[SCALAR_PH]] ] ; VF4IC4-NEXT: [[LD1:%.*]] = load i8, ptr [[PTR]], align 1 ; VF4IC4-NEXT: [[CMP3:%.*]] = icmp eq i8 [[LD1]], 72 ; VF4IC4-NEXT: br i1 [[CMP3]], label [[LOOP_INC]], label [[LOOP_END]] @@ -348,10 +345,9 @@ define i64 @same_exit_block_post_inc_use() { ; VF4IC4-NEXT: [[TMP10:%.*]] = add i64 3, [[TMP9]] ; VF4IC4-NEXT: br label [[LOOP_END]] ; VF4IC4: scalar.ph: -; VF4IC4-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 3, [[ENTRY:%.*]] ] ; VF4IC4-NEXT: br label [[LOOP:%.*]] ; VF4IC4: loop: -; VF4IC4-NEXT: [[IV:%.*]] = phi i64 [ [[IV_NEXT:%.*]], [[LOOP_INC:%.*]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] +; VF4IC4-NEXT: [[IV:%.*]] = phi i64 [ [[IV_NEXT:%.*]], [[LOOP_INC:%.*]] ], [ 3, [[SCALAR_PH]] ] ; VF4IC4-NEXT: [[GEP_P1:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 [[IV]] ; VF4IC4-NEXT: [[LD1:%.*]] = load i8, ptr [[GEP_P1]], align 1 ; VF4IC4-NEXT: [[GEP_P2:%.*]] = getelementptr inbounds i8, ptr [[P2]], i64 [[IV]] @@ -456,10 +452,9 @@ define i64 @diff_exit_block_pre_inc_use1() { ; VF4IC4-NEXT: [[TMP10:%.*]] = add i64 3, [[TMP9]] ; VF4IC4-NEXT: br label [[LOOP_EARLY_EXIT:%.*]] ; VF4IC4: scalar.ph: -; VF4IC4-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 3, [[ENTRY:%.*]] ] ; VF4IC4-NEXT: br label [[LOOP:%.*]] ; VF4IC4: loop: -; VF4IC4-NEXT: [[IV:%.*]] = phi i64 [ [[IV_NEXT:%.*]], [[LOOP_INC:%.*]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] +; VF4IC4-NEXT: [[IV:%.*]] = phi i64 [ [[IV_NEXT:%.*]], [[LOOP_INC:%.*]] ], [ 3, [[SCALAR_PH]] ] ; VF4IC4-NEXT: [[GEP_P1:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 [[IV]] ; VF4IC4-NEXT: [[LD1:%.*]] = load i8, ptr [[GEP_P1]], align 1 ; VF4IC4-NEXT: [[GEP_P2:%.*]] = getelementptr inbounds i8, ptr [[P2]], i64 [[IV]] @@ -571,10 +566,9 @@ define i64 @diff_exit_block_post_inc_use1() { ; VF4IC4-NEXT: [[TMP10:%.*]] = add i64 3, [[TMP9]] ; VF4IC4-NEXT: br label [[LOOP_EARLY_EXIT:%.*]] ; VF4IC4: scalar.ph: -; VF4IC4-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 3, [[ENTRY:%.*]] ] ; VF4IC4-NEXT: br label [[LOOP:%.*]] ; VF4IC4: loop: -; VF4IC4-NEXT: [[IV:%.*]] = phi i64 [ [[IV_NEXT:%.*]], [[LOOP_INC:%.*]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] +; VF4IC4-NEXT: [[IV:%.*]] = phi i64 [ [[IV_NEXT:%.*]], [[LOOP_INC:%.*]] ], [ 3, [[SCALAR_PH]] ] ; VF4IC4-NEXT: [[GEP_P1:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 [[IV]] ; VF4IC4-NEXT: [[LD1:%.*]] = load i8, ptr [[GEP_P1]], align 1 ; VF4IC4-NEXT: [[GEP_P2:%.*]] = getelementptr inbounds i8, ptr [[P2]], i64 [[IV]] @@ -823,10 +817,9 @@ define i8 @same_exit_block_use_loaded_value() { ; VF4IC4-NEXT: [[TMP42:%.*]] = select i1 [[TMP41]], i8 [[TMP40]], i8 [[TMP38]] ; VF4IC4-NEXT: br label [[LOOP_END]] ; VF4IC4: scalar.ph: -; VF4IC4-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ] ; VF4IC4-NEXT: br label [[LOOP:%.*]] ; VF4IC4: loop: -; VF4IC4-NEXT: [[IV:%.*]] = phi i64 [ [[IV_NEXT:%.*]], [[LOOP_INC:%.*]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] +; VF4IC4-NEXT: [[IV:%.*]] = phi i64 [ [[IV_NEXT:%.*]], [[LOOP_INC:%.*]] ], [ 0, [[SCALAR_PH]] ] ; VF4IC4-NEXT: [[GEP_P1:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 [[IV]] ; VF4IC4-NEXT: [[LD1:%.*]] = load i8, ptr [[GEP_P1]], align 1 ; VF4IC4-NEXT: [[GEP_P2:%.*]] = getelementptr inbounds i8, ptr [[P2]], i64 [[IV]] diff --git a/llvm/test/Transforms/LoopVectorize/single-value-blend-phis.ll b/llvm/test/Transforms/LoopVectorize/single-value-blend-phis.ll index 842ff910c89d..dd3521fd99c8 100644 --- a/llvm/test/Transforms/LoopVectorize/single-value-blend-phis.ll +++ b/llvm/test/Transforms/LoopVectorize/single-value-blend-phis.ll @@ -31,10 +31,9 @@ define void @single_incoming_phi_no_blend_mask(i64 %a, i64 %b) { ; CHECK: middle.block: ; CHECK-NEXT: br label [[EXIT:%.*]] ; CHECK: scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ] ; CHECK-NEXT: br label [[LOOP_HEADER:%.*]] ; CHECK: loop.header: -; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP_LATCH:%.*]] ] +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP_LATCH:%.*]] ] ; CHECK-NEXT: [[IV_TRUNC:%.*]] = trunc i64 [[IV]] to i16 ; CHECK-NEXT: br label [[LOOP_COND:%.*]] ; CHECK: loop.cond: @@ -104,9 +103,8 @@ define void @single_incoming_phi_with_blend_mask(i64 %a, i64 %b) { ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i16>, ptr [[TMP4]], align 1 ; CHECK-NEXT: [[TMP6:%.*]] = icmp sle <2 x i64> [[VEC_IND]], [[BROADCAST_SPLAT]] ; CHECK-NEXT: [[TMP7:%.*]] = select <2 x i1> [[TMP3]], <2 x i1> [[TMP6]], <2 x i1> zeroinitializer -; CHECK-NEXT: [[TMP8:%.*]] = xor <2 x i1> [[TMP3]], splat (i1 true) ; CHECK-NEXT: [[PREDPHI:%.*]] = select <2 x i1> [[TMP7]], <2 x i16> [[WIDE_LOAD]], <2 x i16> splat (i16 1) -; CHECK-NEXT: [[PREDPHI1:%.*]] = select <2 x i1> [[TMP8]], <2 x i16> zeroinitializer, <2 x i16> [[PREDPHI]] +; CHECK-NEXT: [[PREDPHI1:%.*]] = select <2 x i1> [[TMP3]], <2 x i16> [[PREDPHI]], <2 x i16> zeroinitializer ; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds [32 x i16], ptr @dst, i16 0, i64 [[INDEX]] ; CHECK-NEXT: store <2 x i16> [[PREDPHI1]], ptr [[TMP9]], align 2 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 @@ -116,10 +114,9 @@ define void @single_incoming_phi_with_blend_mask(i64 %a, i64 %b) { ; CHECK: middle.block: ; CHECK-NEXT: br label [[EXIT:%.*]] ; CHECK: scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ] ; CHECK-NEXT: br label [[LOOP_HEADER:%.*]] ; CHECK: loop.header: -; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP_LATCH:%.*]] ] +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP_LATCH:%.*]] ] ; CHECK-NEXT: [[IV_TRUNC:%.*]] = trunc i64 [[IV]] to i16 ; CHECK-NEXT: [[CMP_A:%.*]] = icmp ugt i64 [[IV]], [[A]] ; CHECK-NEXT: br i1 [[CMP_A]], label [[LOOP_COND:%.*]], label [[LOOP_LATCH]] @@ -206,10 +203,9 @@ define void @multiple_incoming_phi_with_blend_mask(i64 %a, ptr noalias %dst) { ; CHECK: middle.block: ; CHECK-NEXT: br label [[EXIT:%.*]] ; CHECK: scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ] ; CHECK-NEXT: br label [[LOOP_HEADER:%.*]] ; CHECK: loop.header: -; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP_LATCH:%.*]] ] +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP_LATCH:%.*]] ] ; CHECK-NEXT: [[IV_TRUNC:%.*]] = trunc i64 [[IV]] to i16 ; CHECK-NEXT: [[IV_TRUNC_2:%.*]] = trunc i64 [[IV]] to i16 ; CHECK-NEXT: [[CMP_A:%.*]] = icmp ugt i64 [[IV]], [[A]] @@ -292,9 +288,8 @@ define void @single_incoming_needs_predication(i64 %a, i64 %b) { ; CHECK-NEXT: [[TMP14:%.*]] = phi <2 x i16> [ [[TMP8]], [[PRED_LOAD_CONTINUE]] ], [ [[TMP13]], [[PRED_LOAD_IF1]] ] ; CHECK-NEXT: [[TMP15:%.*]] = icmp sle <2 x i64> [[VEC_IND]], [[BROADCAST_SPLAT]] ; CHECK-NEXT: [[TMP16:%.*]] = select <2 x i1> [[TMP2]], <2 x i1> [[TMP15]], <2 x i1> zeroinitializer -; CHECK-NEXT: [[TMP17:%.*]] = xor <2 x i1> [[TMP2]], splat (i1 true) ; CHECK-NEXT: [[PREDPHI:%.*]] = select <2 x i1> [[TMP16]], <2 x i16> [[TMP14]], <2 x i16> splat (i16 1) -; CHECK-NEXT: [[PREDPHI3:%.*]] = select <2 x i1> [[TMP17]], <2 x i16> zeroinitializer, <2 x i16> [[PREDPHI]] +; CHECK-NEXT: [[PREDPHI3:%.*]] = select <2 x i1> [[TMP2]], <2 x i16> [[PREDPHI]], <2 x i16> zeroinitializer ; CHECK-NEXT: [[TMP18:%.*]] = getelementptr inbounds [32 x i16], ptr @dst, i16 0, i64 [[INDEX]] ; CHECK-NEXT: store <2 x i16> [[PREDPHI3]], ptr [[TMP18]], align 2 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 @@ -304,10 +299,9 @@ define void @single_incoming_needs_predication(i64 %a, i64 %b) { ; CHECK: middle.block: ; CHECK-NEXT: br label [[EXIT:%.*]] ; CHECK: scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ] ; CHECK-NEXT: br label [[LOOP_HEADER:%.*]] ; CHECK: loop.header: -; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP_LATCH:%.*]] ] +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP_LATCH:%.*]] ] ; CHECK-NEXT: [[IV_TRUNC:%.*]] = trunc i64 [[IV]] to i16 ; CHECK-NEXT: [[CMP_A:%.*]] = icmp ugt i64 [[IV]], [[A]] ; CHECK-NEXT: br i1 [[CMP_A]], label [[LOOP_COND:%.*]], label [[LOOP_LATCH]] @@ -379,10 +373,9 @@ define void @duplicated_incoming_blocks_blend(i32 %x, ptr %ptr) { ; CHECK: middle.block: ; CHECK-NEXT: br label [[EXIT:%.*]] ; CHECK: scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ] ; CHECK-NEXT: br label [[LOOP_HEADER:%.*]] ; CHECK: loop.header: -; CHECK-NEXT: [[IV:%.*]] = phi i32 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[ADD_I:%.*]], [[LOOP_LATCH:%.*]] ] +; CHECK-NEXT: [[IV:%.*]] = phi i32 [ 0, [[SCALAR_PH]] ], [ [[ADD_I:%.*]], [[LOOP_LATCH:%.*]] ] ; CHECK-NEXT: [[C_0:%.*]] = icmp ugt i32 [[IV]], [[X:%.*]] ; CHECK-NEXT: br i1 [[C_0]], label [[LOOP_LATCH]], label [[LOOP_LATCH]] ; CHECK: loop.latch: diff --git a/llvm/test/Transforms/LoopVectorize/single_early_exit.ll b/llvm/test/Transforms/LoopVectorize/single_early_exit.ll index 2c0a6f1b032c..b3451704ea51 100644 --- a/llvm/test/Transforms/LoopVectorize/single_early_exit.ll +++ b/llvm/test/Transforms/LoopVectorize/single_early_exit.ll @@ -34,10 +34,9 @@ define i64 @same_exit_block_phi_of_consts() { ; CHECK: vector.early.exit: ; CHECK-NEXT: br label [[LOOP_END]] ; CHECK: scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 3, [[ENTRY:%.*]] ] ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: -; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ [[INDEX_NEXT:%.*]], [[LOOP_INC:%.*]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ [[INDEX_NEXT:%.*]], [[LOOP_INC:%.*]] ], [ 3, [[SCALAR_PH]] ] ; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 [[INDEX]] ; CHECK-NEXT: [[LD1:%.*]] = load i8, ptr [[ARRAYIDX]], align 1 ; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i8, ptr [[P2]], i64 [[INDEX]] @@ -109,10 +108,9 @@ define i64 @diff_exit_block_phi_of_consts() { ; CHECK: vector.early.exit: ; CHECK-NEXT: br label [[LOOP_EARLY_EXIT:%.*]] ; CHECK: scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 3, [[ENTRY:%.*]] ] ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: -; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ [[INDEX_NEXT:%.*]], [[LOOP_INC:%.*]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ [[INDEX_NEXT:%.*]], [[LOOP_INC:%.*]] ], [ 3, [[SCALAR_PH]] ] ; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 [[INDEX]] ; CHECK-NEXT: [[LD1:%.*]] = load i8, ptr [[ARRAYIDX]], align 1 ; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i8, ptr [[P2]], i64 [[INDEX]] @@ -292,10 +290,9 @@ define i32 @diff_blocks_invariant_early_exit_cond(ptr %s) { ; CHECK: vector.early.exit: ; CHECK-NEXT: br label [[EARLY_EXIT:%.*]] ; CHECK: scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ -10, [[ENTRY:%.*]] ] ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: for.body: -; CHECK-NEXT: [[IND:%.*]] = phi i32 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IND_NEXT:%.*]], [[FOR_INC:%.*]] ] +; CHECK-NEXT: [[IND:%.*]] = phi i32 [ -10, [[SCALAR_PH]] ], [ [[IND_NEXT:%.*]], [[FOR_INC:%.*]] ] ; CHECK-NEXT: br i1 [[COND]], label [[FOR_INC]], label [[EARLY_EXIT]] ; CHECK: for.inc: ; CHECK-NEXT: [[IND_NEXT]] = add nsw i32 [[IND]], 1 diff --git a/llvm/test/Transforms/LoopVectorize/single_early_exit_live_outs.ll b/llvm/test/Transforms/LoopVectorize/single_early_exit_live_outs.ll index 940e3980a01a..f4b35c779a4b 100644 --- a/llvm/test/Transforms/LoopVectorize/single_early_exit_live_outs.ll +++ b/llvm/test/Transforms/LoopVectorize/single_early_exit_live_outs.ll @@ -36,10 +36,9 @@ define i64 @same_exit_block_pre_inc_use1() { ; CHECK-NEXT: [[EARLY_EXIT_VALUE:%.*]] = add i64 3, [[TMP10]] ; CHECK-NEXT: br label [[LOOP_END]] ; CHECK: scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 3, [[ENTRY:%.*]] ] ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: -; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ [[INDEX_NEXT:%.*]], [[LOOP_INC:%.*]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ [[INDEX_NEXT:%.*]], [[LOOP_INC:%.*]] ], [ 3, [[SCALAR_PH]] ] ; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 [[INDEX]] ; CHECK-NEXT: [[LD1:%.*]] = load i8, ptr [[ARRAYIDX]], align 1 ; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i8, ptr [[P2]], i64 [[INDEX]] @@ -116,12 +115,10 @@ define i32 @same_exit_block_pre_inc_use1_iv64_endi32_step2() { ; CHECK-NEXT: [[EARLY_EXIT_VALUE:%.*]] = add i32 9, [[TMP11]] ; CHECK-NEXT: br label [[LOOP_END]] ; CHECK: scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 3, [[ENTRY:%.*]] ] -; CHECK-NEXT: [[BC_RESUME_VAL4:%.*]] = phi i32 [ 9, [[ENTRY]] ] ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: -; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ [[INDEX_NEXT:%.*]], [[LOOP_INC:%.*]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] -; CHECK-NEXT: [[INDEX2:%.*]] = phi i32 [ [[INDEX2_NEXT:%.*]], [[LOOP_INC]] ], [ [[BC_RESUME_VAL4]], [[SCALAR_PH]] ] +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ [[INDEX_NEXT:%.*]], [[LOOP_INC:%.*]] ], [ 3, [[SCALAR_PH]] ] +; CHECK-NEXT: [[INDEX2:%.*]] = phi i32 [ [[INDEX2_NEXT:%.*]], [[LOOP_INC]] ], [ 9, [[SCALAR_PH]] ] ; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 [[INDEX]] ; CHECK-NEXT: [[LD1:%.*]] = load i8, ptr [[ARRAYIDX]], align 1 ; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i8, ptr [[P2]], i64 [[INDEX]] @@ -198,14 +195,11 @@ define i32 @same_exit_block_pre_inc_use1_iv128_endi32_step2() { ; CHECK-NEXT: [[EARLY_EXIT_VALUE:%.*]] = add i32 9, [[TMP10]] ; CHECK-NEXT: br label [[LOOP_END]] ; CHECK: scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i128 [ 3, [[ENTRY:%.*]] ] -; CHECK-NEXT: [[BC_RESUME_VAL3:%.*]] = phi i32 [ 9, [[ENTRY]] ] -; CHECK-NEXT: [[BC_RESUME_VAL4:%.*]] = phi ptr [ [[P1]], [[ENTRY]] ] ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: -; CHECK-NEXT: [[INDEX:%.*]] = phi i128 [ [[INDEX_NEXT:%.*]], [[LOOP_INC:%.*]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] -; CHECK-NEXT: [[INDEX2:%.*]] = phi i32 [ [[INDEX2_NEXT:%.*]], [[LOOP_INC]] ], [ [[BC_RESUME_VAL3]], [[SCALAR_PH]] ] -; CHECK-NEXT: [[PTR:%.*]] = phi ptr [ [[PTR_NEXT:%.*]], [[LOOP_INC]] ], [ [[BC_RESUME_VAL4]], [[SCALAR_PH]] ] +; CHECK-NEXT: [[INDEX:%.*]] = phi i128 [ [[INDEX_NEXT:%.*]], [[LOOP_INC:%.*]] ], [ 3, [[SCALAR_PH]] ] +; CHECK-NEXT: [[INDEX2:%.*]] = phi i32 [ [[INDEX2_NEXT:%.*]], [[LOOP_INC]] ], [ 9, [[SCALAR_PH]] ] +; CHECK-NEXT: [[PTR:%.*]] = phi ptr [ [[PTR_NEXT:%.*]], [[LOOP_INC]] ], [ [[P1]], [[SCALAR_PH]] ] ; CHECK-NEXT: [[LD1:%.*]] = load i8, ptr [[PTR]], align 1 ; CHECK-NEXT: [[CMP3:%.*]] = icmp eq i8 [[LD1]], 3 ; CHECK-NEXT: br i1 [[CMP3]], label [[LOOP_INC]], label [[LOOP_END]] @@ -280,12 +274,10 @@ define float @same_exit_block_pre_inc_use1_iv64_endf32() { ; CHECK-NEXT: [[EARLY_EXIT_VALUE:%.*]] = fadd fast float 9.000000e+00, [[TMP11]] ; CHECK-NEXT: br label [[LOOP_END]] ; CHECK: scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 3, [[ENTRY:%.*]] ] -; CHECK-NEXT: [[BC_RESUME_VAL4:%.*]] = phi float [ 9.000000e+00, [[ENTRY]] ] ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: -; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ [[INDEX_NEXT:%.*]], [[LOOP_INC:%.*]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] -; CHECK-NEXT: [[INDEX2:%.*]] = phi float [ [[INDEX2_NEXT:%.*]], [[LOOP_INC]] ], [ [[BC_RESUME_VAL4]], [[SCALAR_PH]] ] +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ [[INDEX_NEXT:%.*]], [[LOOP_INC:%.*]] ], [ 3, [[SCALAR_PH]] ] +; CHECK-NEXT: [[INDEX2:%.*]] = phi float [ [[INDEX2_NEXT:%.*]], [[LOOP_INC]] ], [ 9.000000e+00, [[SCALAR_PH]] ] ; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 [[INDEX]] ; CHECK-NEXT: [[LD1:%.*]] = load i8, ptr [[ARRAYIDX]], align 1 ; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i8, ptr [[P2]], i64 [[INDEX]] @@ -364,12 +356,10 @@ define ptr @same_exit_block_pre_inc_use1_iv64_endptr() { ; CHECK-NEXT: [[EARLY_EXIT_VALUE:%.*]] = getelementptr i8, ptr [[P2]], i64 [[TMP20]] ; CHECK-NEXT: br label [[LOOP_END]] ; CHECK: scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 3, [[ENTRY:%.*]] ] -; CHECK-NEXT: [[BC_RESUME_VAL8:%.*]] = phi ptr [ [[P2]], [[ENTRY]] ] ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: -; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ [[INDEX_NEXT:%.*]], [[LOOP_INC:%.*]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] -; CHECK-NEXT: [[INDEX2:%.*]] = phi ptr [ [[INDEX2_NEXT:%.*]], [[LOOP_INC]] ], [ [[BC_RESUME_VAL8]], [[SCALAR_PH]] ] +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ [[INDEX_NEXT:%.*]], [[LOOP_INC:%.*]] ], [ 3, [[SCALAR_PH]] ] +; CHECK-NEXT: [[INDEX2:%.*]] = phi ptr [ [[INDEX2_NEXT:%.*]], [[LOOP_INC]] ], [ [[P2]], [[SCALAR_PH]] ] ; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 [[INDEX]] ; CHECK-NEXT: [[LD1:%.*]] = load i8, ptr [[ARRAYIDX]], align 1 ; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i8, ptr [[P2]], i64 [[INDEX]] @@ -443,10 +433,9 @@ define ptr @same_exit_block_pre_inc_use1_ivptr() { ; CHECK-NEXT: [[EARLY_EXIT_VALUE:%.*]] = getelementptr i8, ptr [[P1]], i64 [[TMP8]] ; CHECK-NEXT: br label [[LOOP_END]] ; CHECK: scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi ptr [ [[P1]], [[ENTRY:%.*]] ] ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: -; CHECK-NEXT: [[PTR:%.*]] = phi ptr [ [[PTR_NEXT:%.*]], [[LOOP_INC:%.*]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] +; CHECK-NEXT: [[PTR:%.*]] = phi ptr [ [[PTR_NEXT:%.*]], [[LOOP_INC:%.*]] ], [ [[P1]], [[SCALAR_PH]] ] ; CHECK-NEXT: [[LD1:%.*]] = load i8, ptr [[PTR]], align 1 ; CHECK-NEXT: [[CMP3:%.*]] = icmp eq i8 [[LD1]], 72 ; CHECK-NEXT: br i1 [[CMP3]], label [[LOOP_INC]], label [[LOOP_END]] @@ -517,10 +506,9 @@ define i64 @same_exit_block_pre_inc1_use_inv_cond(i1 %cond) { ; CHECK-NEXT: [[EARLY_EXIT_VALUE:%.*]] = add i64 3, [[TMP11]] ; CHECK-NEXT: br label [[LOOP_END]] ; CHECK: scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 3, [[ENTRY:%.*]] ] ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: -; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ [[INDEX_NEXT:%.*]], [[LOOP_INC:%.*]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ [[INDEX_NEXT:%.*]], [[LOOP_INC:%.*]] ], [ 3, [[SCALAR_PH]] ] ; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 [[INDEX]] ; CHECK-NEXT: [[LD1:%.*]] = load i8, ptr [[ARRAYIDX]], align 1 ; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i8, ptr [[P2]], i64 [[INDEX]] @@ -597,10 +585,9 @@ define i64 @same_exit_block_pre_inc_use1_gep_two_indices() { ; CHECK-NEXT: [[EARLY_EXIT_VALUE:%.*]] = add i64 3, [[TMP10]] ; CHECK-NEXT: br label [[LOOP_END]] ; CHECK: scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 3, [[ENTRY:%.*]] ] ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: -; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ [[INDEX_NEXT:%.*]], [[LOOP_INC:%.*]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ [[INDEX_NEXT:%.*]], [[LOOP_INC:%.*]] ], [ 3, [[SCALAR_PH]] ] ; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [1024 x i8], ptr [[P1]], i64 0, i64 [[INDEX]] ; CHECK-NEXT: [[LD1:%.*]] = load i8, ptr [[ARRAYIDX]], align 1 ; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds [1024 x i8], ptr [[P2]], i64 0, i64 [[INDEX]] @@ -675,10 +662,9 @@ define i64 @same_exit_block_pre_inc_use1_alloca_diff_type() { ; CHECK-NEXT: [[EARLY_EXIT_VALUE:%.*]] = add i64 3, [[TMP10]] ; CHECK-NEXT: br label [[LOOP_END]] ; CHECK: scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 3, [[ENTRY:%.*]] ] ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: -; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ [[INDEX_NEXT:%.*]], [[LOOP_INC:%.*]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ [[INDEX_NEXT:%.*]], [[LOOP_INC:%.*]] ], [ 3, [[SCALAR_PH]] ] ; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 [[INDEX]] ; CHECK-NEXT: [[LD1:%.*]] = load i8, ptr [[ARRAYIDX]], align 1 ; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i8, ptr [[P2]], i64 [[INDEX]] @@ -750,10 +736,9 @@ define i64 @same_exit_block_pre_inc_use2() { ; CHECK: vector.early.exit: ; CHECK-NEXT: br label [[LOOP_END]] ; CHECK: scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 3, [[ENTRY:%.*]] ] ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: -; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ [[INDEX_NEXT:%.*]], [[LOOP_INC:%.*]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ [[INDEX_NEXT:%.*]], [[LOOP_INC:%.*]] ], [ 3, [[SCALAR_PH]] ] ; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 [[INDEX]] ; CHECK-NEXT: [[LD1:%.*]] = load i8, ptr [[ARRAYIDX]], align 1 ; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i8, ptr [[P2]], i64 [[INDEX]] @@ -828,10 +813,9 @@ define i64 @same_exit_block_pre_inc_use3() { ; CHECK-NEXT: [[EARLY_EXIT_VALUE:%.*]] = add i64 3, [[TMP10]] ; CHECK-NEXT: br label [[LOOP_END]] ; CHECK: scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 3, [[ENTRY:%.*]] ] ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: -; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ [[INDEX_NEXT:%.*]], [[LOOP_INC:%.*]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ [[INDEX_NEXT:%.*]], [[LOOP_INC:%.*]] ], [ 3, [[SCALAR_PH]] ] ; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 [[INDEX]] ; CHECK-NEXT: [[LD1:%.*]] = load i8, ptr [[ARRAYIDX]], align 1 ; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i8, ptr [[P2]], i64 [[INDEX]] @@ -907,10 +891,9 @@ define i64 @same_exit_block_pre_inc_use4() { ; CHECK-NEXT: [[EARLY_EXIT_VALUE:%.*]] = add i64 3, [[TMP8]] ; CHECK-NEXT: br label [[LOOP_END]] ; CHECK: scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 3, [[ENTRY:%.*]] ] ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: -; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ [[INDEX_NEXT:%.*]], [[LOOP_INC:%.*]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ [[INDEX_NEXT:%.*]], [[LOOP_INC:%.*]] ], [ 3, [[SCALAR_PH]] ] ; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[P1]], i64 [[INDEX]] ; CHECK-NEXT: [[LD1:%.*]] = load i64, ptr [[ARRAYIDX]], align 1 ; CHECK-NEXT: [[CMP3:%.*]] = icmp ult i64 [[INDEX]], [[LD1]] @@ -981,10 +964,9 @@ define i64 @same_exit_block_post_inc_use() { ; CHECK-NEXT: [[EARLY_EXIT_VALUE:%.*]] = add i64 3, [[TMP10]] ; CHECK-NEXT: br label [[LOOP_END]] ; CHECK: scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 3, [[ENTRY:%.*]] ] ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: -; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ [[INDEX_NEXT:%.*]], [[LOOP_INC:%.*]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ [[INDEX_NEXT:%.*]], [[LOOP_INC:%.*]] ], [ 3, [[SCALAR_PH]] ] ; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 [[INDEX]] ; CHECK-NEXT: [[LD1:%.*]] = load i8, ptr [[ARRAYIDX]], align 1 ; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i8, ptr [[P2]], i64 [[INDEX]] @@ -1056,10 +1038,9 @@ define ptr @same_exit_block_post_inc_use1_ivptr() { ; CHECK-NEXT: [[EARLY_EXIT_VALUE:%.*]] = getelementptr i8, ptr [[P1]], i64 [[TMP9]] ; CHECK-NEXT: br label [[LOOP_END]] ; CHECK: scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi ptr [ [[P1]], [[ENTRY:%.*]] ] ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: -; CHECK-NEXT: [[PTR:%.*]] = phi ptr [ [[PTR_NEXT:%.*]], [[LOOP_INC:%.*]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] +; CHECK-NEXT: [[PTR:%.*]] = phi ptr [ [[PTR_NEXT:%.*]], [[LOOP_INC:%.*]] ], [ [[P1]], [[SCALAR_PH]] ] ; CHECK-NEXT: [[LD1:%.*]] = load i8, ptr [[PTR]], align 1 ; CHECK-NEXT: [[PTR_NEXT]] = getelementptr inbounds i8, ptr [[PTR]], i64 1 ; CHECK-NEXT: [[CMP3:%.*]] = icmp eq i8 [[LD1]], 72 @@ -1128,10 +1109,9 @@ define i64 @same_exit_block_post_inc_use2() { ; CHECK-NEXT: [[EARLY_EXIT_VALUE:%.*]] = add i64 3, [[TMP11]] ; CHECK-NEXT: br label [[LOOP_END]] ; CHECK: scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 3, [[ENTRY:%.*]] ] ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: -; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ [[INDEX_NEXT:%.*]], [[LOOP_INC:%.*]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ [[INDEX_NEXT:%.*]], [[LOOP_INC:%.*]] ], [ 3, [[SCALAR_PH]] ] ; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 [[INDEX]] ; CHECK-NEXT: [[LD1:%.*]] = load i8, ptr [[ARRAYIDX]], align 1 ; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i8, ptr [[P2]], i64 [[INDEX]] @@ -1206,10 +1186,9 @@ define i64 @diff_exit_block_pre_inc_use1() { ; CHECK-NEXT: [[EARLY_EXIT_VALUE:%.*]] = add i64 3, [[TMP10]] ; CHECK-NEXT: br label [[LOOP_EARLY_EXIT:%.*]] ; CHECK: scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 3, [[ENTRY:%.*]] ] ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: -; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ [[INDEX_NEXT:%.*]], [[LOOP_INC:%.*]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ [[INDEX_NEXT:%.*]], [[LOOP_INC:%.*]] ], [ 3, [[SCALAR_PH]] ] ; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 [[INDEX]] ; CHECK-NEXT: [[LD1:%.*]] = load i8, ptr [[ARRAYIDX]], align 1 ; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i8, ptr [[P2]], i64 [[INDEX]] @@ -1288,10 +1267,9 @@ define i64 @diff_exit_block_pre_inc_use2() { ; CHECK: vector.early.exit: ; CHECK-NEXT: br label [[LOOP_EARLY_EXIT:%.*]] ; CHECK: scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 3, [[ENTRY:%.*]] ] ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: -; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ [[INDEX_NEXT:%.*]], [[LOOP_INC:%.*]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ [[INDEX_NEXT:%.*]], [[LOOP_INC:%.*]] ], [ 3, [[SCALAR_PH]] ] ; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 [[INDEX]] ; CHECK-NEXT: [[LD1:%.*]] = load i8, ptr [[ARRAYIDX]], align 1 ; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i8, ptr [[P2]], i64 [[INDEX]] @@ -1373,10 +1351,9 @@ define i64 @diff_exit_block_pre_inc_use3() { ; CHECK-NEXT: [[EARLY_EXIT_VALUE:%.*]] = add i64 3, [[TMP10]] ; CHECK-NEXT: br label [[LOOP_EARLY_EXIT:%.*]] ; CHECK: scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 3, [[ENTRY:%.*]] ] ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: -; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ [[INDEX_NEXT:%.*]], [[LOOP_INC:%.*]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ [[INDEX_NEXT:%.*]], [[LOOP_INC:%.*]] ], [ 3, [[SCALAR_PH]] ] ; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 [[INDEX]] ; CHECK-NEXT: [[LD1:%.*]] = load i8, ptr [[ARRAYIDX]], align 1 ; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i8, ptr [[P2]], i64 [[INDEX]] @@ -1456,10 +1433,9 @@ define i64 @diff_exit_block_post_inc_use1() { ; CHECK-NEXT: [[EARLY_EXIT_VALUE:%.*]] = add i64 3, [[TMP10]] ; CHECK-NEXT: br label [[LOOP_EARLY_EXIT:%.*]] ; CHECK: scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 3, [[ENTRY:%.*]] ] ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: -; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ [[INDEX_NEXT:%.*]], [[LOOP_INC:%.*]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ [[INDEX_NEXT:%.*]], [[LOOP_INC:%.*]] ], [ 3, [[SCALAR_PH]] ] ; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 [[INDEX]] ; CHECK-NEXT: [[LD1:%.*]] = load i8, ptr [[ARRAYIDX]], align 1 ; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i8, ptr [[P2]], i64 [[INDEX]] @@ -1542,10 +1518,9 @@ define i64 @diff_exit_block_post_inc_use2() { ; CHECK-NEXT: [[TMP21:%.*]] = add i64 3, [[TMP11]] ; CHECK-NEXT: br label [[LOOP_EARLY_EXIT:%.*]] ; CHECK: scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 3, [[ENTRY:%.*]] ] ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: -; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ [[INDEX_NEXT:%.*]], [[LOOP_INC:%.*]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ [[INDEX_NEXT:%.*]], [[LOOP_INC:%.*]] ], [ 3, [[SCALAR_PH]] ] ; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 1 ; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 [[INDEX]] ; CHECK-NEXT: [[LD1:%.*]] = load i8, ptr [[ARRAYIDX]], align 1 @@ -1630,12 +1605,10 @@ define i64 @diff_exit_block_post_inc_use3(i64 %start) { ; CHECK-NEXT: [[EARLY_EXIT_VALUE:%.*]] = add i64 [[START]], [[TMP12]] ; CHECK-NEXT: br label [[LOOP_EARLY_EXIT:%.*]] ; CHECK: scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 3, [[ENTRY:%.*]] ] -; CHECK-NEXT: [[BC_RESUME_VAL5:%.*]] = phi i64 [ [[START]], [[ENTRY]] ] ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: -; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ [[INDEX_NEXT:%.*]], [[LOOP_INC:%.*]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] -; CHECK-NEXT: [[INDEX2:%.*]] = phi i64 [ [[INDEX2_NEXT:%.*]], [[LOOP_INC]] ], [ [[BC_RESUME_VAL5]], [[SCALAR_PH]] ] +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ [[INDEX_NEXT:%.*]], [[LOOP_INC:%.*]] ], [ 3, [[SCALAR_PH]] ] +; CHECK-NEXT: [[INDEX2:%.*]] = phi i64 [ [[INDEX2_NEXT:%.*]], [[LOOP_INC]] ], [ [[START]], [[SCALAR_PH]] ] ; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 1 ; CHECK-NEXT: [[INDEX2_NEXT]] = add i64 [[INDEX2]], 1 ; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 [[INDEX]] @@ -1719,10 +1692,9 @@ define i64 @loop_contains_safe_call() { ; CHECK-NEXT: [[EARLY_EXIT_VALUE:%.*]] = add i64 3, [[TMP9]] ; CHECK-NEXT: br label [[LOOP_END]] ; CHECK: scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 3, [[ENTRY:%.*]] ] ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: -; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ [[INDEX_NEXT:%.*]], [[LOOP_INC:%.*]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ [[INDEX_NEXT:%.*]], [[LOOP_INC:%.*]] ], [ 3, [[SCALAR_PH]] ] ; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[P1]], i64 [[INDEX]] ; CHECK-NEXT: [[LD1:%.*]] = load float, ptr [[ARRAYIDX]], align 1 ; CHECK-NEXT: [[SQRT:%.*]] = tail call fast float @llvm.sqrt.f32(float [[LD1]]) @@ -1794,10 +1766,9 @@ define i64 @loop_contains_safe_div() { ; CHECK-NEXT: [[EARLY_EXIT_VALUE:%.*]] = add i64 3, [[TMP9]] ; CHECK-NEXT: br label [[LOOP_END]] ; CHECK: scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 3, [[ENTRY:%.*]] ] ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: -; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ [[INDEX_NEXT:%.*]], [[LOOP_INC:%.*]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ [[INDEX_NEXT:%.*]], [[LOOP_INC:%.*]] ], [ 3, [[SCALAR_PH]] ] ; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[P1]], i64 [[INDEX]] ; CHECK-NEXT: [[LD1:%.*]] = load i32, ptr [[ARRAYIDX]], align 1 ; CHECK-NEXT: [[DIV:%.*]] = udiv i32 [[LD1]], 20000 @@ -1870,10 +1841,9 @@ define i64 @loop_contains_load_after_early_exit(ptr dereferenceable(1024) align( ; CHECK-NEXT: [[EARLY_EXIT_VALUE:%.*]] = add i64 3, [[TMP11]] ; CHECK-NEXT: br label [[LOOP_END]] ; CHECK: scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 3, [[ENTRY:%.*]] ] ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: -; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ [[INDEX_NEXT:%.*]], [[LOOP_INC:%.*]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ [[INDEX_NEXT:%.*]], [[LOOP_INC:%.*]] ], [ 3, [[SCALAR_PH]] ] ; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[P1]], i64 [[INDEX]] ; CHECK-NEXT: [[LD1:%.*]] = load i32, ptr [[ARRAYIDX]], align 1 ; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[LD1]], 1 @@ -2077,10 +2047,9 @@ define i64 @same_exit_block_pre_inc_use1_deref_ptrs(ptr dereferenceable(1024) %p ; CHECK-NEXT: [[EARLY_EXIT_VALUE:%.*]] = add i64 3, [[TMP10]] ; CHECK-NEXT: br label [[LOOP_END]] ; CHECK: scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 3, [[ENTRY:%.*]] ] ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: -; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ [[INDEX_NEXT:%.*]], [[LOOP_INC:%.*]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ [[INDEX_NEXT:%.*]], [[LOOP_INC:%.*]] ], [ 3, [[SCALAR_PH]] ] ; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 [[INDEX]] ; CHECK-NEXT: [[LD1:%.*]] = load i8, ptr [[ARRAYIDX]], align 1 ; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i8, ptr [[P2]], i64 [[INDEX]] diff --git a/llvm/test/Transforms/LoopVectorize/strided-accesses-interleave-only.ll b/llvm/test/Transforms/LoopVectorize/strided-accesses-interleave-only.ll index 971921a9c1d6..8e47f19efa24 100644 --- a/llvm/test/Transforms/LoopVectorize/strided-accesses-interleave-only.ll +++ b/llvm/test/Transforms/LoopVectorize/strided-accesses-interleave-only.ll @@ -23,10 +23,9 @@ define void @test_variable_stride(ptr %dst, i32 %scale) { ; CHECK: middle.block: ; CHECK-NEXT: br label [[EXIT:%.*]] ; CHECK: scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ] ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: -; CHECK-NEXT: [[IV:%.*]] = phi i32 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ] +; CHECK-NEXT: [[IV:%.*]] = phi i32 [ 0, [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ] ; CHECK-NEXT: [[IDX:%.*]] = mul i32 [[IV]], [[SCALE]] ; CHECK-NEXT: [[GEP:%.*]] = getelementptr i16, ptr [[DST]], i32 [[IDX]] ; CHECK-NEXT: store i32 [[IV]], ptr [[GEP]], align 2 diff --git a/llvm/test/Transforms/LoopVectorize/trunc-extended-icmps.ll b/llvm/test/Transforms/LoopVectorize/trunc-extended-icmps.ll index a687ecc33af5..9e94768fc2cb 100644 --- a/llvm/test/Transforms/LoopVectorize/trunc-extended-icmps.ll +++ b/llvm/test/Transforms/LoopVectorize/trunc-extended-icmps.ll @@ -145,10 +145,9 @@ define void @ext_cmp(ptr %src.1, ptr %src.2, ptr noalias %dst) { ; CHECK: middle.block: ; CHECK-NEXT: br label [[EXIT:%.*]] ; CHECK: scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ] ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: -; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ] +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ] ; CHECK-NEXT: [[GEP_SRC_1:%.*]] = getelementptr inbounds i16, ptr [[SRC_1]], i64 [[IV]] ; CHECK-NEXT: [[I2:%.*]] = load i16, ptr [[GEP_SRC_1]], align 2 ; CHECK-NEXT: [[I3:%.*]] = sext i16 [[I2]] to i32 diff --git a/llvm/test/Transforms/LoopVectorize/trunc-loads-p16.ll b/llvm/test/Transforms/LoopVectorize/trunc-loads-p16.ll index 66dc785d95f4..ac9b03567aab 100644 --- a/llvm/test/Transforms/LoopVectorize/trunc-loads-p16.ll +++ b/llvm/test/Transforms/LoopVectorize/trunc-loads-p16.ll @@ -28,10 +28,9 @@ define void @pr77468(ptr noalias %src, ptr noalias %dst, i1 %x) { ; CHECK: middle.block: ; CHECK-NEXT: br label [[EXIT:%.*]] ; CHECK: scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i16 [ 0, [[ENTRY:%.*]] ] ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: -; CHECK-NEXT: [[IV:%.*]] = phi i16 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ] +; CHECK-NEXT: [[IV:%.*]] = phi i16 [ 0, [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ] ; CHECK-NEXT: [[GEP_SRC:%.*]] = getelementptr i32, ptr [[SRC]], i16 [[IV]] ; CHECK-NEXT: [[L:%.*]] = load i32, ptr [[GEP_SRC]], align 1 ; CHECK-NEXT: [[X_EXT:%.*]] = zext i1 [[X]] to i32 diff --git a/llvm/test/Transforms/LoopVectorize/trunc-shifts.ll b/llvm/test/Transforms/LoopVectorize/trunc-shifts.ll index 10e9ae80beb7..9e710763be13 100644 --- a/llvm/test/Transforms/LoopVectorize/trunc-shifts.ll +++ b/llvm/test/Transforms/LoopVectorize/trunc-shifts.ll @@ -26,10 +26,9 @@ define void @test_pr47927_lshr_const_shift_ops(ptr %dst, i32 %f) { ; CHECK: middle.block: ; CHECK-NEXT: br label [[EXIT:%.*]] ; CHECK: scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i8 [ 0, [[ENTRY:%.*]] ] ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: -; CHECK-NEXT: [[IV:%.*]] = phi i8 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ] +; CHECK-NEXT: [[IV:%.*]] = phi i8 [ 0, [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ] ; CHECK-NEXT: [[L:%.*]] = lshr i32 [[F]], 18 ; CHECK-NEXT: [[L_T:%.*]] = trunc i32 [[L]] to i8 ; CHECK-NEXT: [[IV_EXT:%.*]] = zext i8 [[IV]] to i64 @@ -84,10 +83,9 @@ define void @test_shl_const_shift_ops(ptr %dst, i32 %f) { ; CHECK: middle.block: ; CHECK-NEXT: br label [[EXIT:%.*]] ; CHECK: scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i8 [ 0, [[ENTRY:%.*]] ] ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: -; CHECK-NEXT: [[IV:%.*]] = phi i8 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ] +; CHECK-NEXT: [[IV:%.*]] = phi i8 [ 0, [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ] ; CHECK-NEXT: [[L:%.*]] = shl i32 [[F]], 18 ; CHECK-NEXT: [[L_T:%.*]] = trunc i32 [[L]] to i8 ; CHECK-NEXT: [[IV_EXT:%.*]] = zext i8 [[IV]] to i64 @@ -142,10 +140,9 @@ define void @test_ashr_const_shift_ops(ptr %dst, i32 %f) { ; CHECK: middle.block: ; CHECK-NEXT: br label [[EXIT:%.*]] ; CHECK: scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i8 [ 0, [[ENTRY:%.*]] ] ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: -; CHECK-NEXT: [[IV:%.*]] = phi i8 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ] +; CHECK-NEXT: [[IV:%.*]] = phi i8 [ 0, [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ] ; CHECK-NEXT: [[L:%.*]] = ashr i32 [[F]], 18 ; CHECK-NEXT: [[L_T:%.*]] = trunc i32 [[L]] to i8 ; CHECK-NEXT: [[IV_EXT:%.*]] = zext i8 [[IV]] to i64 @@ -200,10 +197,9 @@ define void @test_shl_const_shifted_op(ptr %dst, i32 %f) { ; CHECK: middle.block: ; CHECK-NEXT: br label [[EXIT:%.*]] ; CHECK: scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i8 [ 0, [[ENTRY:%.*]] ] ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: -; CHECK-NEXT: [[IV:%.*]] = phi i8 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ] +; CHECK-NEXT: [[IV:%.*]] = phi i8 [ 0, [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ] ; CHECK-NEXT: [[IV_EXT:%.*]] = zext i8 [[IV]] to i64 ; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds i8, ptr [[DST]], i64 [[IV_EXT]] ; CHECK-NEXT: [[LV:%.*]] = load i8, ptr [[GEP]], align 1 @@ -263,10 +259,9 @@ define void @test_lshr_by_18(ptr %A) { ; CHECK: middle.block: ; CHECK-NEXT: br label [[EXIT:%.*]] ; CHECK: scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i8 [ 0, [[ENTRY:%.*]] ] ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: -; CHECK-NEXT: [[IV:%.*]] = phi i8 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ] +; CHECK-NEXT: [[IV:%.*]] = phi i8 [ 0, [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ] ; CHECK-NEXT: [[IV_EXT:%.*]] = zext i8 [[IV]] to i64 ; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[IV_EXT]] ; CHECK-NEXT: [[LV:%.*]] = load i8, ptr [[GEP]], align 1 @@ -325,10 +320,9 @@ define void @test_lshr_by_4(ptr %A) { ; CHECK: middle.block: ; CHECK-NEXT: br label [[EXIT:%.*]] ; CHECK: scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i8 [ 0, [[ENTRY:%.*]] ] ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: -; CHECK-NEXT: [[IV:%.*]] = phi i8 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ] +; CHECK-NEXT: [[IV:%.*]] = phi i8 [ 0, [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ] ; CHECK-NEXT: [[IV_EXT:%.*]] = zext i8 [[IV]] to i64 ; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[IV_EXT]] ; CHECK-NEXT: [[LV:%.*]] = load i8, ptr [[GEP]], align 1 diff --git a/llvm/test/Transforms/LoopVectorize/uitofp-preserve-nneg.ll b/llvm/test/Transforms/LoopVectorize/uitofp-preserve-nneg.ll index c67817556c16..c92dc1a641c6 100644 --- a/llvm/test/Transforms/LoopVectorize/uitofp-preserve-nneg.ll +++ b/llvm/test/Transforms/LoopVectorize/uitofp-preserve-nneg.ll @@ -24,10 +24,9 @@ define void @uitofp_preserve_nneg(ptr %result, i32 %size, float %y) { ; CHECK: middle.block: ; CHECK-NEXT: br label [[FOR_EXIT:%.*]] ; CHECK: scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ] ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: for.body: -; CHECK-NEXT: [[TMP4:%.*]] = phi i32 [ [[BC_RESUME_VAL]], [[FOR_BODY_PREHEADER4]] ], [ [[INC:%.*]], [[FOR_BODY]] ] +; CHECK-NEXT: [[TMP4:%.*]] = phi i32 [ 0, [[FOR_BODY_PREHEADER4]] ], [ [[INC:%.*]], [[FOR_BODY]] ] ; CHECK-NEXT: [[CONV:%.*]] = uitofp nneg i32 [[TMP4]] to float ; CHECK-NEXT: [[TMP5:%.*]] = fmul float [[CONV]], [[Y]] ; CHECK-NEXT: [[INDVARS_IV:%.*]] = zext nneg i32 [[TMP4]] to i64 diff --git a/llvm/test/Transforms/LoopVectorize/uniform-blend.ll b/llvm/test/Transforms/LoopVectorize/uniform-blend.ll index b500acb79782..5c464419f36d 100644 --- a/llvm/test/Transforms/LoopVectorize/uniform-blend.ll +++ b/llvm/test/Transforms/LoopVectorize/uniform-blend.ll @@ -6,7 +6,7 @@ define void @blend_uniform_iv_trunc(i1 %c) { ; CHECK-LABEL: define void @blend_uniform_iv_trunc( ; CHECK-SAME: i1 [[C:%.*]]) { -; CHECK-NEXT: [[ENTRY:.*]]: +; CHECK-NEXT: [[ENTRY:.*:]] ; CHECK-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] ; CHECK: [[VECTOR_PH]]: ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] @@ -22,10 +22,9 @@ define void @blend_uniform_iv_trunc(i1 %c) { ; CHECK: [[MIDDLE_BLOCK]]: ; CHECK-NEXT: br label %[[EXIT:.*]] ; CHECK: [[SCALAR_PH]]: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, %[[ENTRY]] ] ; CHECK-NEXT: br label %[[LOOP_HEADER:.*]] ; CHECK: [[LOOP_HEADER]]: -; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ] +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ] ; CHECK-NEXT: [[IV_TRUNC_2:%.*]] = trunc i64 [[IV]] to i16 ; CHECK-NEXT: br i1 [[C]], label %[[LOOP_NEXT:.*]], label %[[LOOP_LATCH]] ; CHECK: [[LOOP_NEXT]]: @@ -66,7 +65,7 @@ exit: ; preds = %loop.latch define void @blend_uniform_iv(i1 %c) { ; CHECK-LABEL: define void @blend_uniform_iv( ; CHECK-SAME: i1 [[C:%.*]]) { -; CHECK-NEXT: [[ENTRY:.*]]: +; CHECK-NEXT: [[ENTRY:.*:]] ; CHECK-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] ; CHECK: [[VECTOR_PH]]: ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] @@ -81,10 +80,9 @@ define void @blend_uniform_iv(i1 %c) { ; CHECK: [[MIDDLE_BLOCK]]: ; CHECK-NEXT: br label %[[EXIT:.*]] ; CHECK: [[SCALAR_PH]]: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, %[[ENTRY]] ] ; CHECK-NEXT: br label %[[LOOP_HEADER:.*]] ; CHECK: [[LOOP_HEADER]]: -; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ] +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ] ; CHECK-NEXT: br i1 [[C]], label %[[LOOP_NEXT:.*]], label %[[LOOP_LATCH]] ; CHECK: [[LOOP_NEXT]]: ; CHECK-NEXT: br label %[[LOOP_LATCH]] @@ -124,7 +122,7 @@ exit: ; preds = %loop.latch define void @blend_chain_iv(i1 %c) { ; CHECK-LABEL: define void @blend_chain_iv( ; CHECK-SAME: i1 [[C:%.*]]) { -; CHECK-NEXT: [[ENTRY:.*]]: +; CHECK-NEXT: [[ENTRY:.*:]] ; CHECK-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] ; CHECK: [[VECTOR_PH]]: ; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i1> poison, i1 [[C]], i64 0 @@ -156,10 +154,9 @@ define void @blend_chain_iv(i1 %c) { ; CHECK: [[MIDDLE_BLOCK]]: ; CHECK-NEXT: br label %[[EXIT:.*]] ; CHECK: [[SCALAR_PH]]: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, %[[ENTRY]] ] ; CHECK-NEXT: br label %[[LOOP_HEADER:.*]] ; CHECK: [[LOOP_HEADER]]: -; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ] +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ] ; CHECK-NEXT: br i1 [[C]], label %[[LOOP_NEXT:.*]], label %[[LOOP_LATCH]] ; CHECK: [[LOOP_NEXT]]: ; CHECK-NEXT: br i1 [[C]], label %[[LOOP_NEXT_2:.*]], label %[[LOOP_NEXT_3:.*]] diff --git a/llvm/test/Transforms/LoopVectorize/unused-blend-mask-for-first-operand.ll b/llvm/test/Transforms/LoopVectorize/unused-blend-mask-for-first-operand.ll index 0541c9d92240..ef1acc0349a6 100644 --- a/llvm/test/Transforms/LoopVectorize/unused-blend-mask-for-first-operand.ll +++ b/llvm/test/Transforms/LoopVectorize/unused-blend-mask-for-first-operand.ll @@ -25,10 +25,9 @@ define void @test_not_first_lane_only_constant(ptr %A, ptr noalias %B) { ; CHECK: middle.block: ; CHECK-NEXT: br label [[EXIT:%.*]] ; CHECK: scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i16 [ 0, [[ENTRY:%.*]] ] ; CHECK-NEXT: br label [[LOOP_HEADER:%.*]] ; CHECK: loop.header: -; CHECK-NEXT: [[IV:%.*]] = phi i16 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP_LATCH:%.*]] ] +; CHECK-NEXT: [[IV:%.*]] = phi i16 [ 0, [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP_LATCH:%.*]] ] ; CHECK-NEXT: [[GEP_A:%.*]] = getelementptr inbounds i16, ptr [[A]], i16 [[IV]] ; CHECK-NEXT: br i1 false, label [[LOOP_LATCH]], label [[ELSE_1:%.*]] ; CHECK: else.1: @@ -101,10 +100,9 @@ define void @test_not_first_lane_only_wide_compare(ptr %A, ptr noalias %B, i16 % ; CHECK: middle.block: ; CHECK-NEXT: br label [[EXIT:%.*]] ; CHECK: scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i16 [ 0, [[ENTRY:%.*]] ] ; CHECK-NEXT: br label [[LOOP_HEADER:%.*]] ; CHECK: loop.header: -; CHECK-NEXT: [[IV:%.*]] = phi i16 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP_LATCH:%.*]] ] +; CHECK-NEXT: [[IV:%.*]] = phi i16 [ 0, [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP_LATCH:%.*]] ] ; CHECK-NEXT: [[GEP_A:%.*]] = getelementptr inbounds i16, ptr [[A]], i16 [[IV]] ; CHECK-NEXT: [[L_0:%.*]] = load i16, ptr [[GEP_A]], align 2 ; CHECK-NEXT: [[C_0:%.*]] = icmp ult i16 [[L_0]], [[X]] @@ -183,10 +181,9 @@ define void @test_not_first_lane_only_wide_compare_incoming_order_swapped(ptr %A ; CHECK: middle.block: ; CHECK-NEXT: br label [[EXIT:%.*]] ; CHECK: scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i16 [ 0, [[ENTRY:%.*]] ] ; CHECK-NEXT: br label [[LOOP_HEADER:%.*]] ; CHECK: loop.header: -; CHECK-NEXT: [[IV:%.*]] = phi i16 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP_LATCH:%.*]] ] +; CHECK-NEXT: [[IV:%.*]] = phi i16 [ 0, [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP_LATCH:%.*]] ] ; CHECK-NEXT: [[GEP_A:%.*]] = getelementptr inbounds i16, ptr [[A]], i16 [[IV]] ; CHECK-NEXT: [[L_0:%.*]] = load i16, ptr [[GEP_A]], align 2 ; CHECK-NEXT: [[C_0:%.*]] = icmp ult i16 [[L_0]], [[X]] diff --git a/llvm/test/Transforms/LoopVectorize/vector-loop-backedge-elimination-early-exit.ll b/llvm/test/Transforms/LoopVectorize/vector-loop-backedge-elimination-early-exit.ll index e1185207813e..f1e68d47848a 100644 --- a/llvm/test/Transforms/LoopVectorize/vector-loop-backedge-elimination-early-exit.ll +++ b/llvm/test/Transforms/LoopVectorize/vector-loop-backedge-elimination-early-exit.ll @@ -10,7 +10,7 @@ target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128" define i8 @test_early_exit_max_tc_less_than_16(ptr dereferenceable(16) %A) nosync nofree { ; VF8UF1-LABEL: define i8 @test_early_exit_max_tc_less_than_16( ; VF8UF1-SAME: ptr dereferenceable(16) [[A:%.*]]) #[[ATTR0:[0-9]+]] { -; VF8UF1-NEXT: [[ENTRY:.*]]: +; VF8UF1-NEXT: [[ENTRY:.*:]] ; VF8UF1-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] ; VF8UF1: [[VECTOR_PH]]: ; VF8UF1-NEXT: br label %[[VECTOR_BODY:.*]] @@ -31,10 +31,9 @@ define i8 @test_early_exit_max_tc_less_than_16(ptr dereferenceable(16) %A) nosyn ; VF8UF1: [[VECTOR_EARLY_EXIT]]: ; VF8UF1-NEXT: br label %[[EXIT]] ; VF8UF1: [[SCALAR_PH]]: -; VF8UF1-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, %[[ENTRY]] ] ; VF8UF1-NEXT: br label %[[LOOP_HEADER:.*]] ; VF8UF1: [[LOOP_HEADER]]: -; VF8UF1-NEXT: [[IV1:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ] +; VF8UF1-NEXT: [[IV1:%.*]] = phi i64 [ 0, %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ] ; VF8UF1-NEXT: [[P_SRC1:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[IV1]] ; VF8UF1-NEXT: [[L:%.*]] = load i8, ptr [[P_SRC1]], align 1 ; VF8UF1-NEXT: [[C:%.*]] = icmp eq i8 [[L]], 0 @@ -49,7 +48,7 @@ define i8 @test_early_exit_max_tc_less_than_16(ptr dereferenceable(16) %A) nosyn ; ; VF8UF2-LABEL: define i8 @test_early_exit_max_tc_less_than_16( ; VF8UF2-SAME: ptr dereferenceable(16) [[A:%.*]]) #[[ATTR0:[0-9]+]] { -; VF8UF2-NEXT: [[ENTRY:.*]]: +; VF8UF2-NEXT: [[ENTRY:.*:]] ; VF8UF2-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] ; VF8UF2: [[VECTOR_PH]]: ; VF8UF2-NEXT: br label %[[VECTOR_BODY:.*]] @@ -69,10 +68,9 @@ define i8 @test_early_exit_max_tc_less_than_16(ptr dereferenceable(16) %A) nosyn ; VF8UF2: [[VECTOR_EARLY_EXIT]]: ; VF8UF2-NEXT: br label %[[EXIT]] ; VF8UF2: [[SCALAR_PH]]: -; VF8UF2-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, %[[ENTRY]] ] ; VF8UF2-NEXT: br label %[[LOOP_HEADER:.*]] ; VF8UF2: [[LOOP_HEADER]]: -; VF8UF2-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ] +; VF8UF2-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ] ; VF8UF2-NEXT: [[P_SRC:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[IV]] ; VF8UF2-NEXT: [[L:%.*]] = load i8, ptr [[P_SRC]], align 1 ; VF8UF2-NEXT: [[C:%.*]] = icmp eq i8 [[L]], 0 @@ -87,7 +85,7 @@ define i8 @test_early_exit_max_tc_less_than_16(ptr dereferenceable(16) %A) nosyn ; ; VF16UF1-LABEL: define i8 @test_early_exit_max_tc_less_than_16( ; VF16UF1-SAME: ptr dereferenceable(16) [[A:%.*]]) #[[ATTR0:[0-9]+]] { -; VF16UF1-NEXT: [[ENTRY:.*]]: +; VF16UF1-NEXT: [[ENTRY:.*:]] ; VF16UF1-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] ; VF16UF1: [[VECTOR_PH]]: ; VF16UF1-NEXT: br label %[[VECTOR_BODY:.*]] @@ -103,10 +101,9 @@ define i8 @test_early_exit_max_tc_less_than_16(ptr dereferenceable(16) %A) nosyn ; VF16UF1: [[VECTOR_EARLY_EXIT]]: ; VF16UF1-NEXT: br label %[[EXIT]] ; VF16UF1: [[SCALAR_PH]]: -; VF16UF1-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, %[[ENTRY]] ] ; VF16UF1-NEXT: br label %[[LOOP_HEADER:.*]] ; VF16UF1: [[LOOP_HEADER]]: -; VF16UF1-NEXT: [[IV1:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ] +; VF16UF1-NEXT: [[IV1:%.*]] = phi i64 [ 0, %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ] ; VF16UF1-NEXT: [[P_SRC1:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[IV1]] ; VF16UF1-NEXT: [[L:%.*]] = load i8, ptr [[P_SRC1]], align 1 ; VF16UF1-NEXT: [[C:%.*]] = icmp eq i8 [[L]], 0 @@ -142,7 +139,7 @@ exit: define i64 @test_early_exit_max_tc_less_than_16_with_iv_used_outside(ptr dereferenceable(16) %A) nosync nofree { ; VF8UF1-LABEL: define i64 @test_early_exit_max_tc_less_than_16_with_iv_used_outside( ; VF8UF1-SAME: ptr dereferenceable(16) [[A:%.*]]) #[[ATTR0]] { -; VF8UF1-NEXT: [[ENTRY:.*]]: +; VF8UF1-NEXT: [[ENTRY:.*:]] ; VF8UF1-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] ; VF8UF1: [[VECTOR_PH]]: ; VF8UF1-NEXT: br label %[[VECTOR_BODY:.*]] @@ -165,10 +162,9 @@ define i64 @test_early_exit_max_tc_less_than_16_with_iv_used_outside(ptr derefer ; VF8UF1-NEXT: [[TMP8:%.*]] = add i64 [[INDEX]], [[FIRST_ACTIVE_LANE]] ; VF8UF1-NEXT: br label %[[EXIT]] ; VF8UF1: [[SCALAR_PH]]: -; VF8UF1-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, %[[ENTRY]] ] ; VF8UF1-NEXT: br label %[[LOOP_HEADER:.*]] ; VF8UF1: [[LOOP_HEADER]]: -; VF8UF1-NEXT: [[IV1:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ] +; VF8UF1-NEXT: [[IV1:%.*]] = phi i64 [ 0, %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ] ; VF8UF1-NEXT: [[P_SRC1:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[IV1]] ; VF8UF1-NEXT: [[L:%.*]] = load i8, ptr [[P_SRC1]], align 1 ; VF8UF1-NEXT: [[C:%.*]] = icmp eq i8 [[L]], 0 @@ -183,7 +179,7 @@ define i64 @test_early_exit_max_tc_less_than_16_with_iv_used_outside(ptr derefer ; ; VF8UF2-LABEL: define i64 @test_early_exit_max_tc_less_than_16_with_iv_used_outside( ; VF8UF2-SAME: ptr dereferenceable(16) [[A:%.*]]) #[[ATTR0]] { -; VF8UF2-NEXT: [[ENTRY:.*]]: +; VF8UF2-NEXT: [[ENTRY:.*:]] ; VF8UF2-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] ; VF8UF2: [[VECTOR_PH]]: ; VF8UF2-NEXT: br label %[[VECTOR_BODY:.*]] @@ -210,10 +206,9 @@ define i64 @test_early_exit_max_tc_less_than_16_with_iv_used_outside(ptr derefer ; VF8UF2-NEXT: [[TMP12:%.*]] = add i64 0, [[TMP11]] ; VF8UF2-NEXT: br label %[[EXIT]] ; VF8UF2: [[SCALAR_PH]]: -; VF8UF2-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, %[[ENTRY]] ] ; VF8UF2-NEXT: br label %[[LOOP_HEADER:.*]] ; VF8UF2: [[LOOP_HEADER]]: -; VF8UF2-NEXT: [[IV1:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ] +; VF8UF2-NEXT: [[IV1:%.*]] = phi i64 [ 0, %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ] ; VF8UF2-NEXT: [[P_SRC1:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[IV1]] ; VF8UF2-NEXT: [[L:%.*]] = load i8, ptr [[P_SRC1]], align 1 ; VF8UF2-NEXT: [[C:%.*]] = icmp eq i8 [[L]], 0 @@ -228,7 +223,7 @@ define i64 @test_early_exit_max_tc_less_than_16_with_iv_used_outside(ptr derefer ; ; VF16UF1-LABEL: define i64 @test_early_exit_max_tc_less_than_16_with_iv_used_outside( ; VF16UF1-SAME: ptr dereferenceable(16) [[A:%.*]]) #[[ATTR0]] { -; VF16UF1-NEXT: [[ENTRY:.*]]: +; VF16UF1-NEXT: [[ENTRY:.*:]] ; VF16UF1-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] ; VF16UF1: [[VECTOR_PH]]: ; VF16UF1-NEXT: br label %[[VECTOR_BODY:.*]] @@ -246,10 +241,9 @@ define i64 @test_early_exit_max_tc_less_than_16_with_iv_used_outside(ptr derefer ; VF16UF1-NEXT: [[TMP5:%.*]] = add i64 0, [[FIRST_ACTIVE_LANE]] ; VF16UF1-NEXT: br label %[[EXIT]] ; VF16UF1: [[SCALAR_PH]]: -; VF16UF1-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, %[[ENTRY]] ] ; VF16UF1-NEXT: br label %[[LOOP_HEADER:.*]] ; VF16UF1: [[LOOP_HEADER]]: -; VF16UF1-NEXT: [[IV1:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ] +; VF16UF1-NEXT: [[IV1:%.*]] = phi i64 [ 0, %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ] ; VF16UF1-NEXT: [[P_SRC1:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[IV1]] ; VF16UF1-NEXT: [[L:%.*]] = load i8, ptr [[P_SRC1]], align 1 ; VF16UF1-NEXT: [[C:%.*]] = icmp eq i8 [[L]], 0 diff --git a/llvm/test/Transforms/LoopVectorize/vector-loop-backedge-elimination-outside-iv-users.ll b/llvm/test/Transforms/LoopVectorize/vector-loop-backedge-elimination-outside-iv-users.ll index 5f1cee887fda..ada59e90b881 100644 --- a/llvm/test/Transforms/LoopVectorize/vector-loop-backedge-elimination-outside-iv-users.ll +++ b/llvm/test/Transforms/LoopVectorize/vector-loop-backedge-elimination-outside-iv-users.ll @@ -6,7 +6,7 @@ target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128" define i64 @remove_loop_region_int_iv_used_outside(ptr %dst) { ; CHECK-LABEL: define i64 @remove_loop_region_int_iv_used_outside( ; CHECK-SAME: ptr [[DST:%.*]]) { -; CHECK-NEXT: [[ENTRY:.*]]: +; CHECK-NEXT: [[ENTRY:.*:]] ; CHECK-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] ; CHECK: [[VECTOR_PH]]: ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] @@ -18,10 +18,9 @@ define i64 @remove_loop_region_int_iv_used_outside(ptr %dst) { ; CHECK: [[MIDDLE_BLOCK]]: ; CHECK-NEXT: br label %[[EXIT:.*]] ; CHECK: [[SCALAR_PH]]: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, %[[ENTRY]] ] ; CHECK-NEXT: br label %[[LOOP:.*]] ; CHECK: [[LOOP]]: -; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] ; CHECK-NEXT: [[GEP:%.*]] = getelementptr ptr, ptr [[DST]], i64 [[IV]] ; CHECK-NEXT: store ptr null, ptr [[GEP]], align 8 ; CHECK-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1 @@ -50,7 +49,7 @@ exit: define i64 @remove_loop_region_int_iv_inc_used_outside(ptr %dst) { ; CHECK-LABEL: define i64 @remove_loop_region_int_iv_inc_used_outside( ; CHECK-SAME: ptr [[DST:%.*]]) { -; CHECK-NEXT: [[ENTRY:.*]]: +; CHECK-NEXT: [[ENTRY:.*:]] ; CHECK-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] ; CHECK: [[VECTOR_PH]]: ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] @@ -62,10 +61,9 @@ define i64 @remove_loop_region_int_iv_inc_used_outside(ptr %dst) { ; CHECK: [[MIDDLE_BLOCK]]: ; CHECK-NEXT: br label %[[EXIT:.*]] ; CHECK: [[SCALAR_PH]]: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, %[[ENTRY]] ] ; CHECK-NEXT: br label %[[LOOP:.*]] ; CHECK: [[LOOP]]: -; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] ; CHECK-NEXT: [[GEP:%.*]] = getelementptr ptr, ptr [[DST]], i64 [[IV]] ; CHECK-NEXT: store ptr null, ptr [[GEP]], align 8 ; CHECK-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1 @@ -94,7 +92,7 @@ exit: define ptr @remove_loop_region_ptr_iv_used_outside(ptr %dst) { ; CHECK-LABEL: define ptr @remove_loop_region_ptr_iv_used_outside( ; CHECK-SAME: ptr [[DST:%.*]]) { -; CHECK-NEXT: [[ENTRY:.*]]: +; CHECK-NEXT: [[ENTRY:.*:]] ; CHECK-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] ; CHECK: [[VECTOR_PH]]: ; CHECK-NEXT: [[TMP0:%.*]] = getelementptr i8, ptr [[DST]], i64 128 @@ -108,12 +106,10 @@ define ptr @remove_loop_region_ptr_iv_used_outside(ptr %dst) { ; CHECK-NEXT: [[IND_ESCAPE:%.*]] = getelementptr i8, ptr [[TMP0]], i64 -8 ; CHECK-NEXT: br label %[[EXIT:.*]] ; CHECK: [[SCALAR_PH]]: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi ptr [ [[DST]], %[[ENTRY]] ] -; CHECK-NEXT: [[BC_RESUME_VAL1:%.*]] = phi i64 [ 0, %[[ENTRY]] ] ; CHECK-NEXT: br label %[[LOOP:.*]] ; CHECK: [[LOOP]]: -; CHECK-NEXT: [[PTR_IV:%.*]] = phi ptr [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[PTR_IV_NEXT:%.*]], %[[LOOP]] ] -; CHECK-NEXT: [[INT_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL1]], %[[SCALAR_PH]] ], [ [[INT_IV_NEXT:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[PTR_IV:%.*]] = phi ptr [ [[DST]], %[[SCALAR_PH]] ], [ [[PTR_IV_NEXT:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[INT_IV:%.*]] = phi i64 [ 0, %[[SCALAR_PH]] ], [ [[INT_IV_NEXT:%.*]], %[[LOOP]] ] ; CHECK-NEXT: store ptr null, ptr [[PTR_IV]], align 8 ; CHECK-NEXT: [[INT_IV_NEXT]] = add i64 [[INT_IV]], 1 ; CHECK-NEXT: [[PTR_IV_NEXT]] = getelementptr i8, ptr [[PTR_IV]], i64 8 @@ -143,7 +139,7 @@ exit: define ptr @remove_loop_region_ptr_iv_inc_used_outside(ptr %dst) { ; CHECK-LABEL: define ptr @remove_loop_region_ptr_iv_inc_used_outside( ; CHECK-SAME: ptr [[DST:%.*]]) { -; CHECK-NEXT: [[ENTRY:.*]]: +; CHECK-NEXT: [[ENTRY:.*:]] ; CHECK-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] ; CHECK: [[VECTOR_PH]]: ; CHECK-NEXT: [[TMP0:%.*]] = getelementptr i8, ptr [[DST]], i64 128 @@ -156,12 +152,10 @@ define ptr @remove_loop_region_ptr_iv_inc_used_outside(ptr %dst) { ; CHECK: [[MIDDLE_BLOCK]]: ; CHECK-NEXT: br label %[[EXIT:.*]] ; CHECK: [[SCALAR_PH]]: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi ptr [ [[DST]], %[[ENTRY]] ] -; CHECK-NEXT: [[BC_RESUME_VAL1:%.*]] = phi i64 [ 0, %[[ENTRY]] ] ; CHECK-NEXT: br label %[[LOOP:.*]] ; CHECK: [[LOOP]]: -; CHECK-NEXT: [[PTR_IV:%.*]] = phi ptr [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[PTR_IV_NEXT:%.*]], %[[LOOP]] ] -; CHECK-NEXT: [[INT_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL1]], %[[SCALAR_PH]] ], [ [[INT_IV_NEXT:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[PTR_IV:%.*]] = phi ptr [ [[DST]], %[[SCALAR_PH]] ], [ [[PTR_IV_NEXT:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[INT_IV:%.*]] = phi i64 [ 0, %[[SCALAR_PH]] ], [ [[INT_IV_NEXT:%.*]], %[[LOOP]] ] ; CHECK-NEXT: store ptr null, ptr [[PTR_IV]], align 8 ; CHECK-NEXT: [[INT_IV_NEXT]] = add i64 [[INT_IV]], 1 ; CHECK-NEXT: [[PTR_IV_NEXT]] = getelementptr i8, ptr [[PTR_IV]], i64 8 diff --git a/llvm/test/Transforms/LoopVectorize/vplan-iv-transforms.ll b/llvm/test/Transforms/LoopVectorize/vplan-iv-transforms.ll index e779233e6c67..cae5c4af1379 100644 --- a/llvm/test/Transforms/LoopVectorize/vplan-iv-transforms.ll +++ b/llvm/test/Transforms/LoopVectorize/vplan-iv-transforms.ll @@ -96,8 +96,7 @@ define void @iv_expand(ptr %p, i64 %n) { ; CHECK-NEXT: EMIT vp<[[BROADCAST_1:%.+]]> = broadcast ir<1> ; CHECK-NEXT: EMIT vp<[[MUL:%.+]]> = mul vp<[[STEP_VECTOR]]>, vp<[[BROADCAST_1]]> ; CHECK-NEXT: EMIT vp<[[INDUCTION:%.+]]> = add vp<[[BROADCAST_0]]>, vp<[[MUL]]> -; CHECK-NEXT: EMIT vp<[[INC:%.+]]> = mul ir<1>, ir<8> -; CHECK-NEXT: EMIT vp<[[BROADCAST_INC:%.+]]> = broadcast vp<[[INC]]> +; CHECK-NEXT: EMIT vp<[[BROADCAST_INC:%.+]]> = broadcast ir<8> ; CHECK-NEXT: Successor(s): vector.body ; CHECK-EMPTY: ; CHECK-NEXT: vector.body: diff --git a/llvm/test/Transforms/LoopVectorize/widen-gep-all-indices-invariant.ll b/llvm/test/Transforms/LoopVectorize/widen-gep-all-indices-invariant.ll index 5d0d391e5b99..19cf1069f05b 100644 --- a/llvm/test/Transforms/LoopVectorize/widen-gep-all-indices-invariant.ll +++ b/llvm/test/Transforms/LoopVectorize/widen-gep-all-indices-invariant.ll @@ -22,10 +22,9 @@ define void @pr63340(ptr %A, ptr %B) { ; CHECK: middle.block: ; CHECK-NEXT: br label [[EXIT:%.*]] ; CHECK: scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i8 [ 0, [[ENTRY:%.*]] ] ; CHECK-NEXT: br label [[LOOP_HEADER:%.*]] ; CHECK: loop.header: -; CHECK-NEXT: [[IV:%.*]] = phi i8 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP_LATCH:%.*]] ] +; CHECK-NEXT: [[IV:%.*]] = phi i8 [ 0, [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP_LATCH:%.*]] ] ; CHECK-NEXT: br label [[LOOP_LATCH]] ; CHECK: loop.latch: ; CHECK-NEXT: [[F_0_I:%.*]] = phi ptr [ [[A]], [[LOOP_HEADER]] ] @@ -80,10 +79,9 @@ define void @wide_gep_index_invariant(ptr noalias %dst, ptr noalias %src, i64 %n ; CHECK: middle.block: ; CHECK-NEXT: br label [[EXIT:%.*]] ; CHECK: scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ] ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: -; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ] +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ] ; CHECK-NEXT: [[L:%.*]] = load ptr, ptr [[SRC]], align 8 ; CHECK-NEXT: [[GEP_L:%.*]] = getelementptr float, ptr [[L]], i64 [[N]] ; CHECK-NEXT: [[GEP_DST:%.*]] = getelementptr ptr, ptr [[DST]], i64 [[IV]] @@ -134,10 +132,9 @@ define void @wide_gep_multiple_indices_some_invariant(ptr noalias %dst, ptr noal ; CHECK: middle.block: ; CHECK-NEXT: br label [[EXIT:%.*]] ; CHECK: scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ] ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: -; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ] +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ] ; CHECK-NEXT: [[L:%.*]] = load ptr, ptr [[SRC]], align 8 ; CHECK-NEXT: [[GEP_L:%.*]] = getelementptr [10 x float], ptr [[L]], i32 [[X]], i64 [[IV]] ; CHECK-NEXT: [[GEP_DST:%.*]] = getelementptr ptr, ptr [[DST]], i64 [[IV]] diff --git a/llvm/test/Transforms/LoopVectorize/widen-intrinsic.ll b/llvm/test/Transforms/LoopVectorize/widen-intrinsic.ll index c23d2b38659f..1cc2e871925b 100644 --- a/llvm/test/Transforms/LoopVectorize/widen-intrinsic.ll +++ b/llvm/test/Transforms/LoopVectorize/widen-intrinsic.ll @@ -5,7 +5,7 @@ define void @powi_only_first_lane_used_of_second_arg(ptr %p, i32 %pow) { ; CHECK-LABEL: define void @powi_only_first_lane_used_of_second_arg( ; CHECK-SAME: ptr [[P:%.*]], i32 [[POW:%.*]]) { -; CHECK-NEXT: [[ENTRY:.*]]: +; CHECK-NEXT: [[ENTRY:.*:]] ; CHECK-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] ; CHECK: [[VECTOR_PH]]: ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] @@ -21,10 +21,9 @@ define void @powi_only_first_lane_used_of_second_arg(ptr %p, i32 %pow) { ; CHECK: [[MIDDLE_BLOCK]]: ; CHECK-NEXT: br label %[[EXIT:.*]] ; CHECK: [[SCALAR_PH]]: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ 0, %[[ENTRY]] ] ; CHECK-NEXT: br label %[[LOOP:.*]] ; CHECK: [[LOOP]]: -; CHECK-NEXT: [[IV:%.*]] = phi i32 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[IV:%.*]] = phi i32 [ 0, %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] ; CHECK-NEXT: [[P_GEP:%.*]] = getelementptr float, ptr [[P]], i32 [[IV]] ; CHECK-NEXT: [[X:%.*]] = load float, ptr [[P_GEP]], align 4 ; CHECK-NEXT: [[Y:%.*]] = call float @llvm.powi.f32.i32(float [[X]], i32 [[POW]]) From 3acb679bdab6aadf446d1aa240d3e41782e0f059 Mon Sep 17 00:00:00 2001 From: Sergei Barannikov Date: Sat, 16 Aug 2025 23:11:53 +0300 Subject: [PATCH 072/214] [TableGen] Remove redundant variable (NFC) --- llvm/utils/TableGen/DecoderEmitter.cpp | 11 +++-------- 1 file changed, 3 insertions(+), 8 deletions(-) diff --git a/llvm/utils/TableGen/DecoderEmitter.cpp b/llvm/utils/TableGen/DecoderEmitter.cpp index b22c60a00081..42a19f731b46 100644 --- a/llvm/utils/TableGen/DecoderEmitter.cpp +++ b/llvm/utils/TableGen/DecoderEmitter.cpp @@ -398,9 +398,6 @@ protected: // Number of instructions which fall under FilteredInstructions category. unsigned NumFiltered; - // Keeps track of the last opcode in the filtered bucket. - EncodingIDAndOpcode LastOpcFiltered; - public: Filter(Filter &&f); Filter(const FilterChooser &owner, unsigned startBit, unsigned numBits); @@ -411,7 +408,7 @@ public: EncodingIDAndOpcode getSingletonOpc() const { assert(NumFiltered == 1); - return LastOpcFiltered; + return FilteredInstructions.begin()->second.front(); } // Return the filter chooser for the group of instructions without constant @@ -650,14 +647,13 @@ Filter::Filter(Filter &&f) FilteredInstructions(std::move(f.FilteredInstructions)), VariableInstructions(std::move(f.VariableInstructions)), FilterChooserMap(std::move(f.FilterChooserMap)), - NumFiltered(f.NumFiltered), LastOpcFiltered(f.LastOpcFiltered) {} + NumFiltered(f.NumFiltered) {} Filter::Filter(const FilterChooser &owner, unsigned startBit, unsigned numBits) : Owner(owner), StartBit(startBit), NumBits(numBits) { assert(StartBit + NumBits - 1 < Owner.BitWidth); NumFiltered = 0; - LastOpcFiltered = {0, 0}; for (const auto &OpcPair : Owner.Opcodes) { // Populates the insn given the uid. @@ -669,8 +665,7 @@ Filter::Filter(const FilterChooser &owner, unsigned startBit, unsigned numBits) if (Ok) { // The encoding bits are well-known. Lets add the uid of the // instruction into the bucket keyed off the constant field value. - LastOpcFiltered = OpcPair; - FilteredInstructions[Field].push_back(LastOpcFiltered); + FilteredInstructions[Field].push_back(OpcPair); ++NumFiltered; } else { // Some of the encoding bit(s) are unspecified. This contributes to From 75bf7392089d027bb6fa78ded21acaa97b16a412 Mon Sep 17 00:00:00 2001 From: Leandro Lacerda Date: Sat, 16 Aug 2025 17:14:26 -0300 Subject: [PATCH 073/214] [libc][gpu] Disable loop unrolling in the throughput benchmark loop (#153971) This patch makes GPU throughput benchmark results more comparable across targets by disabling loop unrolling in the benchmark loop. Motivation: * PTX (post-LTO) evidence on NVPTX: for libc `sin`, the generated PTX shows the `throughput` loop unrolled 8x at `N=128` (one iteration advances the input pointer by 64 bytes = 8 doubles), interleaving eight independent chains before the back-edge. This hides latency and significantly reduces cycles/call as the batch size `N` grows. * Observed scaling (NVPTX measurements): with unrolling enabled, `sin` dropped from ~3,100 cycles/call at `N=1` to ~360 at `N=128`. After enforcing `#pragma clang loop unroll(disable)`, results stabilized (e.g., from ~3100 cycles/call at `N=1` to ~2700 at `N=128`). * libdevice contrast: the libdevice `sin` path did not exhibit a similar drop in our measurements, and the PTX appears as compact internal calls rather than a long FMA chain, leaving less ILP for the outer loop to extract. What this change does: * Applies `#pragma clang loop unroll(disable)` to the GPU `throughput()` loop in both NVPTX and AMDGPU backends. Leaving unrolling entirely to the optimizer makes apples-to-apples comparisons uneven (e.g., libc vs. vendor). Disabling unrolling yields fairer, more consistent numbers. --- libc/benchmarks/gpu/timing/amdgpu/timing.h | 8 ++++++++ libc/benchmarks/gpu/timing/nvptx/timing.h | 8 ++++++++ 2 files changed, 16 insertions(+) diff --git a/libc/benchmarks/gpu/timing/amdgpu/timing.h b/libc/benchmarks/gpu/timing/amdgpu/timing.h index b4a174f72981..8b92584b3923 100644 --- a/libc/benchmarks/gpu/timing/amdgpu/timing.h +++ b/libc/benchmarks/gpu/timing/amdgpu/timing.h @@ -117,6 +117,8 @@ throughput_baseline(const cpp::array &inputs) { asm("" ::"s"(start)); T result{}; + +#pragma clang loop unroll(disable) for (auto input : inputs) { asm("" ::"v"(input)); result = input; @@ -146,6 +148,8 @@ static LIBC_INLINE uint64_t throughput(F f, const cpp::array &inputs) { asm("" ::"s"(start)); T result{}; + +#pragma clang loop unroll(disable) for (auto input : inputs) { asm("" ::"v"(input)); result = f(input); @@ -174,6 +178,8 @@ static LIBC_INLINE uint64_t throughput_baseline( asm("" ::"s"(start)); T result{}; + +#pragma clang loop unroll(disable) for (size_t i = 0; i < N; i++) { T x = inputs1[i]; T y = inputs2[i]; @@ -206,6 +212,8 @@ static LIBC_INLINE uint64_t throughput(F f, const cpp::array &inputs1, asm("" ::"s"(start)); T result{}; + +#pragma clang loop unroll(disable) for (size_t i = 0; i < N; i++) { T x = inputs1[i]; T y = inputs2[i]; diff --git a/libc/benchmarks/gpu/timing/nvptx/timing.h b/libc/benchmarks/gpu/timing/nvptx/timing.h index 0c93a67129b8..944d3732eae6 100644 --- a/libc/benchmarks/gpu/timing/nvptx/timing.h +++ b/libc/benchmarks/gpu/timing/nvptx/timing.h @@ -106,6 +106,8 @@ throughput_baseline(const cpp::array &inputs) { asm("" ::"llr"(start)); T result{}; + +#pragma clang loop unroll(disable) for (auto input : inputs) { asm("" ::"r"(input)); result = input; @@ -135,6 +137,8 @@ static LIBC_INLINE uint64_t throughput(F f, const cpp::array &inputs) { asm("" ::"llr"(start)); T result{}; + +#pragma clang loop unroll(disable) for (auto input : inputs) { asm("" ::"r"(input)); result = f(input); @@ -163,6 +167,8 @@ static LIBC_INLINE uint64_t throughput_baseline( asm("" ::"llr"(start)); T result{}; + +#pragma clang loop unroll(disable) for (size_t i = 0; i < N; i++) { T x = inputs1[i]; T y = inputs2[i]; @@ -195,6 +201,8 @@ static LIBC_INLINE uint64_t throughput(F f, const cpp::array &inputs1, asm("" ::"llr"(start)); T result{}; + +#pragma clang loop unroll(disable) for (size_t i = 0; i < N; i++) { T x = inputs1[i]; T y = inputs2[i]; From 1c8da29f48e3ad295371f612785f7f13f16db9d2 Mon Sep 17 00:00:00 2001 From: Kazu Hirata Date: Sat, 16 Aug 2025 13:15:36 -0700 Subject: [PATCH 074/214] [ADT] Use small_buckets() in SmallPtrSetImpl::remove_if (NFC) (#153962) --- llvm/include/llvm/ADT/SmallPtrSet.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/llvm/include/llvm/ADT/SmallPtrSet.h b/llvm/include/llvm/ADT/SmallPtrSet.h index 0d7fe308a32f..f55627c6866f 100644 --- a/llvm/include/llvm/ADT/SmallPtrSet.h +++ b/llvm/include/llvm/ADT/SmallPtrSet.h @@ -435,7 +435,8 @@ public: bool remove_if(UnaryPredicate P) { bool Removed = false; if (isSmall()) { - const void **APtr = CurArray, **E = CurArray + NumEntries; + auto Buckets = small_buckets(); + const void **APtr = Buckets.begin(), **E = Buckets.end(); while (APtr != E) { PtrType Ptr = PtrTraits::getFromVoidPointer(const_cast(*APtr)); if (P(Ptr)) { From 73775a0f2749d0cc8b8877bf34ebb3534a2e7913 Mon Sep 17 00:00:00 2001 From: Florian Hahn Date: Sat, 16 Aug 2025 21:17:01 +0100 Subject: [PATCH 075/214] [LV] Add test for #153946. Add test for miscompile from https://github.com/llvm/llvm-project/issues/153946, caused by poison propagation. --- .../single-early-exit-cond-poison.ll | 128 ++++++++++++++++++ 1 file changed, 128 insertions(+) create mode 100644 llvm/test/Transforms/LoopVectorize/single-early-exit-cond-poison.ll diff --git a/llvm/test/Transforms/LoopVectorize/single-early-exit-cond-poison.ll b/llvm/test/Transforms/LoopVectorize/single-early-exit-cond-poison.ll new file mode 100644 index 000000000000..660212378ae6 --- /dev/null +++ b/llvm/test/Transforms/LoopVectorize/single-early-exit-cond-poison.ll @@ -0,0 +1,128 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals none --version 5 +; RUN: opt -p loop-vectorize -force-vector-width=4 -force-vector-interleave=2 -S %s | FileCheck --check-prefix=VF4IC2 %s +; RUN: opt -p loop-vectorize -force-vector-width=8 -force-vector-interleave=1 -S %s | FileCheck --check-prefix=VF8IC1 %s + +; Test case from https://github.com/llvm/llvm-project/issues/153946. +; %shr and thus %early.cond will be poison from %iv == 4 onwards. +; TODO: Make sure the mask being poison does not propagate across lanes in the +; OR reduction when computing the early exit condition in the vector loop. +define noundef i32 @f(i32 noundef %g) { +; VF4IC2-LABEL: define noundef i32 @f( +; VF4IC2-SAME: i32 noundef [[G:%.*]]) { +; VF4IC2-NEXT: [[ENTRY:.*:]] +; VF4IC2-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] +; VF4IC2: [[VECTOR_PH]]: +; VF4IC2-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[G]], i64 0 +; VF4IC2-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer +; VF4IC2-NEXT: br label %[[VECTOR_BODY:.*]] +; VF4IC2: [[VECTOR_BODY]]: +; VF4IC2-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] +; VF4IC2-NEXT: [[VEC_IND:%.*]] = phi <4 x i32> [ , %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ] +; VF4IC2-NEXT: [[STEP_ADD:%.*]] = add <4 x i32> [[VEC_IND]], splat (i32 4) +; VF4IC2-NEXT: [[TMP0:%.*]] = shl nuw nsw <4 x i32> [[VEC_IND]], splat (i32 3) +; VF4IC2-NEXT: [[TMP1:%.*]] = shl nuw nsw <4 x i32> [[STEP_ADD]], splat (i32 3) +; VF4IC2-NEXT: [[TMP2:%.*]] = ashr <4 x i32> [[BROADCAST_SPLAT]], [[TMP0]] +; VF4IC2-NEXT: [[TMP3:%.*]] = ashr <4 x i32> [[BROADCAST_SPLAT]], [[TMP1]] +; VF4IC2-NEXT: [[TMP4:%.*]] = icmp ne <4 x i32> [[TMP2]], zeroinitializer +; VF4IC2-NEXT: [[TMP5:%.*]] = icmp ne <4 x i32> [[TMP3]], zeroinitializer +; VF4IC2-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 8 +; VF4IC2-NEXT: [[TMP6:%.*]] = or <4 x i1> [[TMP4]], [[TMP5]] +; VF4IC2-NEXT: [[TMP7:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP6]]) +; VF4IC2-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[STEP_ADD]], splat (i32 4) +; VF4IC2-NEXT: br i1 true, label %[[MIDDLE_SPLIT:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] +; VF4IC2: [[MIDDLE_SPLIT]]: +; VF4IC2-NEXT: [[TMP8:%.*]] = extractelement <4 x i32> [[TMP3]], i32 3 +; VF4IC2-NEXT: br i1 [[TMP7]], label %[[VECTOR_EARLY_EXIT:.*]], label %[[MIDDLE_BLOCK:.*]] +; VF4IC2: [[MIDDLE_BLOCK]]: +; VF4IC2-NEXT: br label %[[RETURN:.*]] +; VF4IC2: [[VECTOR_EARLY_EXIT]]: +; VF4IC2-NEXT: [[TMP9:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP5]], i1 true) +; VF4IC2-NEXT: [[TMP10:%.*]] = add i64 4, [[TMP9]] +; VF4IC2-NEXT: [[TMP11:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP4]], i1 true) +; VF4IC2-NEXT: [[TMP12:%.*]] = add i64 0, [[TMP11]] +; VF4IC2-NEXT: [[TMP13:%.*]] = icmp ne i64 [[TMP11]], 4 +; VF4IC2-NEXT: [[TMP14:%.*]] = select i1 [[TMP13]], i64 [[TMP12]], i64 [[TMP10]] +; VF4IC2-NEXT: [[TMP15:%.*]] = trunc i64 [[TMP14]] to i32 +; VF4IC2-NEXT: [[TMP16:%.*]] = add i32 [[INDEX]], [[TMP15]] +; VF4IC2-NEXT: br label %[[RETURN]] +; VF4IC2: [[SCALAR_PH]]: +; VF4IC2-NEXT: br label %[[LOOP_HEADER:.*]] +; VF4IC2: [[LOOP_HEADER]]: +; VF4IC2-NEXT: [[IV:%.*]] = phi i32 [ 0, %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ] +; VF4IC2-NEXT: [[MUL:%.*]] = shl nuw nsw i32 [[IV]], 3 +; VF4IC2-NEXT: [[SHR:%.*]] = ashr i32 [[G]], [[MUL]] +; VF4IC2-NEXT: [[EARLY_COND:%.*]] = icmp eq i32 [[SHR]], 0 +; VF4IC2-NEXT: br i1 [[EARLY_COND]], label %[[LOOP_LATCH]], label %[[RETURN]] +; VF4IC2: [[LOOP_LATCH]]: +; VF4IC2-NEXT: [[IV_NEXT]] = add nuw nsw i32 [[IV]], 1 +; VF4IC2-NEXT: [[EC:%.*]] = icmp eq i32 [[IV_NEXT]], 8 +; VF4IC2-NEXT: br i1 [[EC]], label %[[RETURN]], label %[[LOOP_HEADER]], !llvm.loop [[LOOP3:![0-9]+]] +; VF4IC2: [[RETURN]]: +; VF4IC2-NEXT: [[RES:%.*]] = phi i32 [ [[SHR]], %[[LOOP_LATCH]] ], [ [[IV]], %[[LOOP_HEADER]] ], [ [[TMP8]], %[[MIDDLE_BLOCK]] ], [ [[TMP16]], %[[VECTOR_EARLY_EXIT]] ] +; VF4IC2-NEXT: ret i32 [[RES]] +; +; VF8IC1-LABEL: define noundef i32 @f( +; VF8IC1-SAME: i32 noundef [[G:%.*]]) { +; VF8IC1-NEXT: [[ENTRY:.*:]] +; VF8IC1-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] +; VF8IC1: [[VECTOR_PH]]: +; VF8IC1-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <8 x i32> poison, i32 [[G]], i64 0 +; VF8IC1-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <8 x i32> [[BROADCAST_SPLATINSERT]], <8 x i32> poison, <8 x i32> zeroinitializer +; VF8IC1-NEXT: br label %[[VECTOR_BODY:.*]] +; VF8IC1: [[VECTOR_BODY]]: +; VF8IC1-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] +; VF8IC1-NEXT: [[VEC_IND:%.*]] = phi <8 x i32> [ , %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ] +; VF8IC1-NEXT: [[TMP0:%.*]] = shl nuw nsw <8 x i32> [[VEC_IND]], splat (i32 3) +; VF8IC1-NEXT: [[TMP1:%.*]] = ashr <8 x i32> [[BROADCAST_SPLAT]], [[TMP0]] +; VF8IC1-NEXT: [[TMP2:%.*]] = icmp ne <8 x i32> [[TMP1]], zeroinitializer +; VF8IC1-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 8 +; VF8IC1-NEXT: [[TMP3:%.*]] = call i1 @llvm.vector.reduce.or.v8i1(<8 x i1> [[TMP2]]) +; VF8IC1-NEXT: [[VEC_IND_NEXT]] = add <8 x i32> [[VEC_IND]], splat (i32 8) +; VF8IC1-NEXT: br i1 true, label %[[MIDDLE_SPLIT:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] +; VF8IC1: [[MIDDLE_SPLIT]]: +; VF8IC1-NEXT: [[TMP4:%.*]] = extractelement <8 x i32> [[TMP1]], i32 7 +; VF8IC1-NEXT: br i1 [[TMP3]], label %[[VECTOR_EARLY_EXIT:.*]], label %[[MIDDLE_BLOCK:.*]] +; VF8IC1: [[MIDDLE_BLOCK]]: +; VF8IC1-NEXT: br label %[[RETURN:.*]] +; VF8IC1: [[VECTOR_EARLY_EXIT]]: +; VF8IC1-NEXT: [[TMP5:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v8i1(<8 x i1> [[TMP2]], i1 true) +; VF8IC1-NEXT: [[TMP6:%.*]] = trunc i64 [[TMP5]] to i32 +; VF8IC1-NEXT: [[TMP7:%.*]] = add i32 [[INDEX]], [[TMP6]] +; VF8IC1-NEXT: br label %[[RETURN]] +; VF8IC1: [[SCALAR_PH]]: +; VF8IC1-NEXT: br label %[[LOOP_HEADER:.*]] +; VF8IC1: [[LOOP_HEADER]]: +; VF8IC1-NEXT: [[IV:%.*]] = phi i32 [ 0, %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ] +; VF8IC1-NEXT: [[MUL:%.*]] = shl nuw nsw i32 [[IV]], 3 +; VF8IC1-NEXT: [[SHR:%.*]] = ashr i32 [[G]], [[MUL]] +; VF8IC1-NEXT: [[EARLY_COND:%.*]] = icmp eq i32 [[SHR]], 0 +; VF8IC1-NEXT: br i1 [[EARLY_COND]], label %[[LOOP_LATCH]], label %[[RETURN]] +; VF8IC1: [[LOOP_LATCH]]: +; VF8IC1-NEXT: [[IV_NEXT]] = add nuw nsw i32 [[IV]], 1 +; VF8IC1-NEXT: [[EC:%.*]] = icmp eq i32 [[IV_NEXT]], 8 +; VF8IC1-NEXT: br i1 [[EC]], label %[[RETURN]], label %[[LOOP_HEADER]], !llvm.loop [[LOOP3:![0-9]+]] +; VF8IC1: [[RETURN]]: +; VF8IC1-NEXT: [[RES:%.*]] = phi i32 [ [[SHR]], %[[LOOP_LATCH]] ], [ [[IV]], %[[LOOP_HEADER]] ], [ [[TMP4]], %[[MIDDLE_BLOCK]] ], [ [[TMP7]], %[[VECTOR_EARLY_EXIT]] ] +; VF8IC1-NEXT: ret i32 [[RES]] +; +entry: + br label %loop.header + +loop.header: + %iv = phi i32 [ 0, %entry ], [ %iv.next, %loop.latch ] + %mul = shl nuw nsw i32 %iv, 3 + %shr = ashr i32 %g, %mul + %early.cond = icmp eq i32 %shr, 0 + br i1 %early.cond, label %loop.latch, label %return + +loop.latch: + %iv.next = add nuw nsw i32 %iv, 1 + %ec = icmp eq i32 %iv.next, 8 + br i1 %ec, label %return, label %loop.header + +return: + %res = phi i32 [ %shr, %loop.latch ], [ %iv, %loop.header ] + ret i32 %res +} + + From ddae3b74a396512b7578f4ccb5ce0e8b57234962 Mon Sep 17 00:00:00 2001 From: Aiden Grossman Date: Sat, 16 Aug 2025 20:55:45 +0000 Subject: [PATCH 076/214] [CI] Show Stats in CI Log This patch makes utils.sh also print the stats out. This is particularly useful in postcommit CI where we are currently not saving artifacts. --- .ci/utils.sh | 1 + 1 file changed, 1 insertion(+) diff --git a/.ci/utils.sh b/.ci/utils.sh index 97724444b96f..2a3d2426b630 100644 --- a/.ci/utils.sh +++ b/.ci/utils.sh @@ -24,6 +24,7 @@ function at-exit { retcode=$? mkdir -p artifacts + sccache --show-stats sccache --show-stats >> artifacts/sccache_stats.txt cp "${BUILD_DIR}"/.ninja_log artifacts/.ninja_log cp "${MONOREPO_ROOT}"/*.log artifacts/ || : From f8740920eec94f04752ad7bf1b86de9587412b1c Mon Sep 17 00:00:00 2001 From: Shafik Yaghmour Date: Sat, 16 Aug 2025 14:08:39 -0700 Subject: [PATCH 077/214] [Clang][Sema] Check the return value of DiagnoseClassNameShadow in ActOnEnumConstant (#143754) Static analysis flagged that we were not checking the return value of DiagnoseClassNameShadow when we did so everywhere else. Modifying this case to match how other places uses it makes sense and does not change behavior. Likely if this check fails later actions will fail as well but it is more correct to exit early. --- clang/lib/Sema/SemaDecl.cpp | 7 ++++--- clang/test/CXX/class/class.mem/p13.cpp | 9 +++++++++ 2 files changed, 13 insertions(+), 3 deletions(-) diff --git a/clang/lib/Sema/SemaDecl.cpp b/clang/lib/Sema/SemaDecl.cpp index 5001e080f946..8ddbaf34a7f4 100644 --- a/clang/lib/Sema/SemaDecl.cpp +++ b/clang/lib/Sema/SemaDecl.cpp @@ -20270,9 +20270,10 @@ Decl *Sema::ActOnEnumConstant(Scope *S, Decl *theEnumDecl, Decl *lastEnumConst, // different from T: // - every enumerator of every member of class T that is an unscoped // enumerated type - if (getLangOpts().CPlusPlus && !TheEnumDecl->isScoped()) - DiagnoseClassNameShadow(TheEnumDecl->getDeclContext(), - DeclarationNameInfo(Id, IdLoc)); + if (getLangOpts().CPlusPlus && !TheEnumDecl->isScoped() && + DiagnoseClassNameShadow(TheEnumDecl->getDeclContext(), + DeclarationNameInfo(Id, IdLoc))) + return nullptr; EnumConstantDecl *New = CheckEnumConstant(TheEnumDecl, LastEnumConst, IdLoc, Id, Val); diff --git a/clang/test/CXX/class/class.mem/p13.cpp b/clang/test/CXX/class/class.mem/p13.cpp index d947586c4194..a30aa5d0b2ee 100644 --- a/clang/test/CXX/class/class.mem/p13.cpp +++ b/clang/test/CXX/class/class.mem/p13.cpp @@ -114,3 +114,12 @@ template struct CtorDtorName : B { CtorDtorName(); ~CtorDtorName(); // expected-error {{identifier 'CtorDtorName' after '~' in destructor name does not name a type}} }; + +struct S { // expected-note {{'S' declared here}} + enum E { + R = 11, + S = 12 // expected-error {{member 'S' has the same name as its class}} + }; + static_assert(E::R == 11, "E::R is not 11"); + static_assert(E::S == 12, "E::S is not 12"); // expected-error {{no member named 'S' in 'S::E'}} +}; From 7bb73455f76c8ce3b470fb931daa68a33ec79c34 Mon Sep 17 00:00:00 2001 From: Sergei Barannikov Date: Sun, 17 Aug 2025 00:49:17 +0300 Subject: [PATCH 078/214] [TableGen][DecoderEmitter] Add helpers for working with scopes (NFC) (#153979) Part of an effort to simplify DecoderEmitter code. --- llvm/utils/TableGen/DecoderEmitter.cpp | 47 ++++++++++---------------- 1 file changed, 18 insertions(+), 29 deletions(-) diff --git a/llvm/utils/TableGen/DecoderEmitter.cpp b/llvm/utils/TableGen/DecoderEmitter.cpp index 42a19f731b46..630793e02711 100644 --- a/llvm/utils/TableGen/DecoderEmitter.cpp +++ b/llvm/utils/TableGen/DecoderEmitter.cpp @@ -192,6 +192,17 @@ struct DecoderTableInfo { DecoderSet Decoders; bool isOutermostScope() const { return FixupStack.size() == 1; } + + void pushScope() { FixupStack.emplace_back(); } + + void popScope() { + // Resolve any remaining fixups in the current scope before popping it. + // All fixups resolve to the current location. + uint32_t DestIdx = Table.size(); + for (uint32_t FixupIdx : FixupStack.back()) + Table.patchNumToSkip(FixupIdx, DestIdx); + FixupStack.pop_back(); + } }; struct EncodingAndInst { @@ -726,14 +737,6 @@ void Filter::recurse() { } } -static void resolveTableFixups(DecoderTable &Table, const FixupList &Fixups, - uint32_t DestIdx) { - // Any NumToSkip fixups in the current scope can resolve to the - // current location. - for (uint32_t FixupIdx : Fixups) - Table.patchNumToSkip(FixupIdx, DestIdx); -} - // Emit table entries to decode instructions given a segment or segments // of bits. void Filter::emitTableEntry(DecoderTableInfo &TableInfo) const { @@ -753,7 +756,7 @@ void Filter::emitTableEntry(DecoderTableInfo &TableInfo) const { const uint64_t LastFilter = FilterChooserMap.rbegin()->first; bool HasFallthrough = LastFilter == NO_FIXED_SEGMENTS_SENTINEL; if (HasFallthrough) - TableInfo.FixupStack.emplace_back(); + TableInfo.pushScope(); DecoderTable &Table = TableInfo.Table; @@ -765,13 +768,7 @@ void Filter::emitTableEntry(DecoderTableInfo &TableInfo) const { // Each scope should always have at least one filter value to check // for. assert(PrevFilter != 0 && "empty filter set!"); - FixupList &CurScope = TableInfo.FixupStack.back(); - // Resolve any NumToSkip fixups in the current scope. - resolveTableFixups(Table, CurScope, Table.size()); - - // Delete the scope we have added here. - TableInfo.FixupStack.pop_back(); - + TableInfo.popScope(); PrevFilter = 0; // Don't re-process the filter's fallthrough. } else { // The last filtervalue emitted can be OPC_FilterValue if we are at @@ -1515,13 +1512,9 @@ void FilterChooser::emitSingletonTableEntry(DecoderTableInfo &TableInfo, // complex singletons need predicate checks from the first singleton // to refer forward to the variable filterchooser that follows. - TableInfo.FixupStack.emplace_back(); - + TableInfo.pushScope(); emitSingletonTableEntry(TableInfo, Opc); - - resolveTableFixups(TableInfo.Table, TableInfo.FixupStack.back(), - TableInfo.Table.size()); - TableInfo.FixupStack.pop_back(); + TableInfo.popScope(); Best.getVariableFC().emitTableEntries(TableInfo); } @@ -2623,16 +2616,12 @@ namespace { // predicates and decoders themselves, however, are shared across all // decoders to give more opportunities for uniqueing. TableInfo.Table.clear(); - TableInfo.FixupStack.clear(); - TableInfo.FixupStack.emplace_back(); + TableInfo.pushScope(); FC.emitTableEntries(TableInfo); // Any NumToSkip fixups in the top level scope can resolve to the // OPC_Fail at the end of the table. - assert(TableInfo.FixupStack.size() == 1 && "fixup stack phasing error!"); - // Resolve any NumToSkip fixups in the current scope. - resolveTableFixups(TableInfo.Table, TableInfo.FixupStack.back(), - TableInfo.Table.size()); - TableInfo.FixupStack.clear(); + assert(TableInfo.isOutermostScope() && "fixup stack phasing error!"); + TableInfo.popScope(); TableInfo.Table.push_back(MCD::OPC_Fail); From 190778a8ba6d30995b7e1b4b4a556ab6444bdf3a Mon Sep 17 00:00:00 2001 From: Fangrui Song Date: Sat, 16 Aug 2025 15:10:34 -0700 Subject: [PATCH 079/214] MCSymbol: Rename SymContents to kind The names "SymbolContents" and "SymContents*" members are confusing. Rename to kind and Kind::XXX similar to lld/ELF/Symbols.h Rename SymContentsVariable to Kind::Equated as the former term is "equated symbol", not "variable". --- llvm/include/llvm/MC/MCSymbol.h | 48 +++++++++++++++------------------ llvm/lib/MC/MCSymbol.cpp | 9 +++---- 2 files changed, 26 insertions(+), 31 deletions(-) diff --git a/llvm/include/llvm/MC/MCSymbol.h b/llvm/include/llvm/MC/MCSymbol.h index ce160bd2c3cc..740fae22d1b9 100644 --- a/llvm/include/llvm/MC/MCSymbol.h +++ b/llvm/include/llvm/MC/MCSymbol.h @@ -42,11 +42,11 @@ class raw_ostream; class MCSymbol { protected: // A symbol can be regular, equated to an expression, or a common symbol. - enum Contents : uint8_t { - SymContentsUnset, - SymContentsVariable, - SymContentsCommon, - SymContentsTargetCommon, // Index stores the section index + enum Kind : uint8_t { + Regular, + Equated, + Common, + TargetCommon, // Index stores the section index }; // Special sentinel value for the absolute pseudo fragment. @@ -65,9 +65,9 @@ protected: /// relative to, if any. mutable MCFragment *Fragment = nullptr; - /// This is actually a Contents enumerator, but is unsigned to avoid sign - /// extension and achieve better bitpacking with MSVC. - unsigned SymbolContents : 2; + /// The symbol kind. Use an unsigned bitfield to achieve better bitpacking + /// with MSVC. + unsigned kind : 2; /// True if this symbol is named. A named symbol will have a pointer to the /// name allocated in the bytes immediately prior to the MCSymbol. @@ -145,10 +145,10 @@ protected: }; MCSymbol(const MCSymbolTableEntry *Name, bool isTemporary) - : SymbolContents(SymContentsUnset), IsTemporary(isTemporary), - IsRedefinable(false), IsRegistered(false), IsExternal(false), - IsPrivateExtern(false), IsWeakExternal(false), IsUsedInReloc(false), - IsResolving(0), CommonAlignLog2(0), Flags(0) { + : kind(Kind::Regular), IsTemporary(isTemporary), IsRedefinable(false), + IsRegistered(false), IsExternal(false), IsPrivateExtern(false), + IsWeakExternal(false), IsUsedInReloc(false), IsResolving(0), + CommonAlignLog2(0), Flags(0) { Offset = 0; HasName = !!Name; if (Name) @@ -212,9 +212,9 @@ public: /// Prepare this symbol to be redefined. void redefineIfPossible() { if (IsRedefinable) { - if (SymbolContents == SymContentsVariable) { + if (kind == Kind::Equated) { Value = nullptr; - SymbolContents = SymContentsUnset; + kind = Kind::Regular; } setUndefined(); IsRedefinable = false; @@ -268,9 +268,7 @@ public: /// @{ /// isVariable - Check if this is a variable symbol. - bool isVariable() const { - return SymbolContents == SymContentsVariable; - } + bool isVariable() const { return kind == Equated; } /// Get the expression of the variable symbol. const MCExpr *getVariableValue() const { @@ -293,12 +291,12 @@ public: } uint64_t getOffset() const { - assert(SymbolContents == SymContentsUnset && + assert(kind == Kind::Regular && "Cannot get offset for a common/variable symbol"); return Offset; } void setOffset(uint64_t Value) { - assert(SymbolContents == SymContentsUnset && + assert(kind == Kind::Regular && "Cannot set offset for a common/variable symbol"); Offset = Value; } @@ -317,7 +315,7 @@ public: void setCommon(uint64_t Size, Align Alignment, bool Target = false) { assert(getOffset() == 0); CommonSize = Size; - SymbolContents = Target ? SymContentsTargetCommon : SymContentsCommon; + kind = Target ? Kind::TargetCommon : Kind::Common; unsigned Log2Align = encode(Alignment); assert(Log2Align < (1U << NumCommonAlignmentBits) && @@ -350,14 +348,12 @@ public: /// Is this a 'common' symbol. bool isCommon() const { - return SymbolContents == SymContentsCommon || - SymbolContents == SymContentsTargetCommon; + return kind == Kind::Common || kind == Kind::TargetCommon; } - /// Is this a target-specific common-like symbol. - bool isTargetCommon() const { - return SymbolContents == SymContentsTargetCommon; - } + /// Used by AMDGPU to indicate a common-like symbol of section index + /// SHN_AMDGPU_LDS. + bool isTargetCommon() const { return kind == Kind::TargetCommon; } MCFragment *getFragment() const { if (Fragment || !isVariable() || isWeakExternal()) diff --git a/llvm/lib/MC/MCSymbol.cpp b/llvm/lib/MC/MCSymbol.cpp index b19842aae46c..771b1204272d 100644 --- a/llvm/lib/MC/MCSymbol.cpp +++ b/llvm/lib/MC/MCSymbol.cpp @@ -48,12 +48,11 @@ void *MCSymbol::operator new(size_t s, const MCSymbolTableEntry *Name, } void MCSymbol::setVariableValue(const MCExpr *Value) { - assert(Value && "Invalid variable value!"); - assert((SymbolContents == SymContentsUnset || - SymbolContents == SymContentsVariable) && - "Cannot give common/offset symbol a variable value"); + assert(Value && "Invalid equated expression"); + assert((kind == Kind::Regular || kind == Kind::Equated) && + "Cannot equate a common symbol"); this->Value = Value; - SymbolContents = SymContentsVariable; + kind = Kind::Equated; setUndefined(); } From aa96e20dcefa7d73229c98a7d2727696ff949459 Mon Sep 17 00:00:00 2001 From: Fangrui Song Date: Sat, 16 Aug 2025 15:39:33 -0700 Subject: [PATCH 080/214] MCSymbol: Remove AMDGPU-specific Kind::TargetCommon The SymContentsTargetCommon kind introduced by https://reviews.llvm.org/D61493 lackes significant and should be treated as a regular common symbol with a different section index. Update ELFObjectWriter to respect the specified section index. The new representation also works with Hexagon's SHN_HEXAGON_SCOMMON. --- llvm/include/llvm/MC/MCSymbol.h | 14 +++----------- llvm/lib/MC/ELFObjectWriter.cpp | 8 ++++---- 2 files changed, 7 insertions(+), 15 deletions(-) diff --git a/llvm/include/llvm/MC/MCSymbol.h b/llvm/include/llvm/MC/MCSymbol.h index 740fae22d1b9..88e2230a0c34 100644 --- a/llvm/include/llvm/MC/MCSymbol.h +++ b/llvm/include/llvm/MC/MCSymbol.h @@ -46,7 +46,6 @@ protected: Regular, Equated, Common, - TargetCommon, // Index stores the section index }; // Special sentinel value for the absolute pseudo fragment. @@ -315,7 +314,7 @@ public: void setCommon(uint64_t Size, Align Alignment, bool Target = false) { assert(getOffset() == 0); CommonSize = Size; - kind = Target ? Kind::TargetCommon : Kind::Common; + kind = Kind::Common; unsigned Log2Align = encode(Alignment); assert(Log2Align < (1U << NumCommonAlignmentBits) && @@ -338,8 +337,7 @@ public: bool declareCommon(uint64_t Size, Align Alignment, bool Target = false) { assert(isCommon() || getOffset() == 0); if(isCommon()) { - if (CommonSize != Size || getCommonAlignment() != Alignment || - isTargetCommon() != Target) + if (CommonSize != Size || getCommonAlignment() != Alignment) return true; } else setCommon(Size, Alignment, Target); @@ -347,13 +345,7 @@ public: } /// Is this a 'common' symbol. - bool isCommon() const { - return kind == Kind::Common || kind == Kind::TargetCommon; - } - - /// Used by AMDGPU to indicate a common-like symbol of section index - /// SHN_AMDGPU_LDS. - bool isTargetCommon() const { return kind == Kind::TargetCommon; } + bool isCommon() const { return kind == Kind::Common; } MCFragment *getFragment() const { if (Fragment || !isVariable() || isWeakExternal()) diff --git a/llvm/lib/MC/ELFObjectWriter.cpp b/llvm/lib/MC/ELFObjectWriter.cpp index 8f3814a1dd62..759d3e0e1429 100644 --- a/llvm/lib/MC/ELFObjectWriter.cpp +++ b/llvm/lib/MC/ELFObjectWriter.cpp @@ -541,12 +541,12 @@ void ELFWriter::computeSymbolTable(const RevGroupMapTy &RevGroupMap) { if (Symbol.isAbsolute()) { MSD.SectionIndex = ELF::SHN_ABS; } else if (Symbol.isCommon()) { - if (Symbol.isTargetCommon()) { - MSD.SectionIndex = Symbol.getIndex(); - } else { + auto Shndx = Symbol.getIndex(); + if (!Shndx) { assert(!Local); - MSD.SectionIndex = ELF::SHN_COMMON; + Shndx = ELF::SHN_COMMON; } + MSD.SectionIndex = Shndx; } else if (Symbol.isUndefined()) { if (Symbol.isSignature() && !Symbol.isUsedInReloc()) { MSD.SectionIndex = RevGroupMap.lookup(&Symbol); From 2cedb286b8a37a3c6f09ac394b5e95413baac287 Mon Sep 17 00:00:00 2001 From: Fangrui Song Date: Sat, 16 Aug 2025 15:47:39 -0700 Subject: [PATCH 081/214] MCSymbol: Remove unused IsTarget parameter from declareCommon --- llvm/include/llvm/MC/MCSymbol.h | 7 +++---- .../Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp | 2 +- 2 files changed, 4 insertions(+), 5 deletions(-) diff --git a/llvm/include/llvm/MC/MCSymbol.h b/llvm/include/llvm/MC/MCSymbol.h index 88e2230a0c34..13d74157bc01 100644 --- a/llvm/include/llvm/MC/MCSymbol.h +++ b/llvm/include/llvm/MC/MCSymbol.h @@ -311,7 +311,7 @@ public: /// \param Size - The size of the symbol. /// \param Alignment - The alignment of the symbol. /// \param Target - Is the symbol a target-specific common-like symbol. - void setCommon(uint64_t Size, Align Alignment, bool Target = false) { + void setCommon(uint64_t Size, Align Alignment) { assert(getOffset() == 0); CommonSize = Size; kind = Kind::Common; @@ -332,15 +332,14 @@ public: /// /// \param Size - The size of the symbol. /// \param Alignment - The alignment of the symbol. - /// \param Target - Is the symbol a target-specific common-like symbol. /// \return True if symbol was already declared as a different type - bool declareCommon(uint64_t Size, Align Alignment, bool Target = false) { + bool declareCommon(uint64_t Size, Align Alignment) { assert(isCommon() || getOffset() == 0); if(isCommon()) { if (CommonSize != Size || getCommonAlignment() != Alignment) return true; } else - setCommon(Size, Alignment, Target); + setCommon(Size, Alignment); return false; } diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp index 1f35e92151bf..f000b2cc60c9 100644 --- a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp +++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp @@ -886,7 +886,7 @@ void AMDGPUTargetELFStreamer::emitAMDGPULDS(MCSymbol *Symbol, unsigned Size, if (!SymbolELF->isBindingSet()) SymbolELF->setBinding(ELF::STB_GLOBAL); - if (SymbolELF->declareCommon(Size, Alignment, true)) { + if (SymbolELF->declareCommon(Size, Alignment)) { report_fatal_error("Symbol: " + Symbol->getName() + " redeclared as different type"); } From 1f5047e43092f39a60a0ddba921610c2ab00897e Mon Sep 17 00:00:00 2001 From: Aiden Grossman Date: Sat, 16 Aug 2025 15:52:39 -0700 Subject: [PATCH 082/214] [Github] Remove call to llvm-project-tests.yml from spirv-tests.yml This will eventually allow for removing llvm-project-tests.yml. This should significantly reduce the complexity of these workflows at the cost of a little bit of duplication standard to github actions. Reviewers: michalpaszkowski, sudonatalie Reviewed By: sudonatalie Pull Request: https://github.com/llvm/llvm-project/pull/153869 --- .github/workflows/spirv-tests.yml | 31 ++++++++++++++++++++++++------- 1 file changed, 24 insertions(+), 7 deletions(-) diff --git a/.github/workflows/spirv-tests.yml b/.github/workflows/spirv-tests.yml index f15ca1cb64ba..8708fb06d9eb 100644 --- a/.github/workflows/spirv-tests.yml +++ b/.github/workflows/spirv-tests.yml @@ -4,7 +4,6 @@ permissions: contents: read on: - workflow_dispatch: pull_request: paths: - 'llvm/lib/Target/SPIRV/**' @@ -21,9 +20,27 @@ jobs: check_spirv: if: github.repository_owner == 'llvm' name: Test SPIR-V - uses: ./.github/workflows/llvm-project-tests.yml - with: - build_target: check-llvm-codegen-spirv - projects: - extra_cmake_args: '-DLLVM_TARGETS_TO_BUILD="SPIRV" -DLLVM_INCLUDE_SPIRV_TOOLS_TESTS=ON' - os_list: '["ubuntu-24.04"]' + runs-on: ubuntu-24.04 + container: + image: ghcr.io/llvm/ci-ubuntu-24.04:latest + steps: + - uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0 + - name: Setup ccache + uses: hendrikmuhs/ccache-action@a1209f81afb8c005c13b4296c32e363431bffea5 # v1.2.17 + with: + max-size: 2G + key: spirv-ubuntu-24.04 + variant: sccache + - name: Build and Test + run: | + mkdir build + cmake -GNinja \ + -S llvm \ + -B build \ + -DCMAKE_BUILD_TYPE=Release \ + -DLLVM_ENABLE_ASSERTIONS=ON \ + -DCMAKE_C_COMPILER_LAUNCHER=sccache \ + -DCMAKE_CXX_COMPILER_LAUNCHER=sccache \ + -DLLVM_TARGETS_TO_BUILD="SPIRV" \ + -DLLVM_INCLUDE_SPIRV_TOOLS_TESTS=ON + ninja -C build check-llvm-codegen-spirv From 29d49c8a37335c22f0a1d974968ecaa6310fca6a Mon Sep 17 00:00:00 2001 From: Aiden Grossman Date: Sat, 16 Aug 2025 16:05:45 -0700 Subject: [PATCH 083/214] [libc] Correct standard for getcpu (#153982) --- libc/include/sched.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/libc/include/sched.yaml b/libc/include/sched.yaml index f14799ddf33f..8014aa7ed61f 100644 --- a/libc/include/sched.yaml +++ b/libc/include/sched.yaml @@ -20,7 +20,7 @@ functions: - type: const cpu_set_t * - name: getcpu standards: - - POSIX + - Linux return_type: int arguments: - type: unsigned int * From bc3754de0aed44147e64b99414ecb06f84ab1a91 Mon Sep 17 00:00:00 2001 From: knickish Date: Sat, 16 Aug 2025 18:33:47 -0500 Subject: [PATCH 084/214] [M68k] Add anyext patterns for PCD addressing mode (#150356) Does what it says on the tin: anyext loads with the PCD addressing mode were failing addr mode selection, adding the patterns resolved it. --- llvm/lib/Target/M68k/M68kInstrData.td | 4 +++ llvm/test/CodeGen/M68k/Data/load-extend.ll | 42 ++++++++++++++++++++++ 2 files changed, 46 insertions(+) diff --git a/llvm/lib/Target/M68k/M68kInstrData.td b/llvm/lib/Target/M68k/M68kInstrData.td index f4ed62720ff9..c5b7ae332822 100644 --- a/llvm/lib/Target/M68k/M68kInstrData.td +++ b/llvm/lib/Target/M68k/M68kInstrData.td @@ -701,18 +701,22 @@ def: Pat<(MxExtLoadi16i8 MxCP_ARID:$src), (EXTRACT_SUBREG (MOVZXd32p8 MxARID8:$src), MxSubRegIndex16Lo)>; def: Pat<(MxExtLoadi16i8 MxCP_ARII:$src), (EXTRACT_SUBREG (MOVZXd32f8 MxARII8:$src), MxSubRegIndex16Lo)>; +def: Pat<(MxExtLoadi16i8 MxCP_PCD:$src), + (EXTRACT_SUBREG (MOVZXd32q8 MxPCD8:$src), MxSubRegIndex16Lo)>; // i32 <- anyext i8 def: Pat<(i32 (anyext i8:$src)), (MOVZXd32d8 MxDRD8:$src)>; def: Pat<(MxExtLoadi32i8 MxCP_ARI :$src), (MOVZXd32j8 MxARI8 :$src)>; def: Pat<(MxExtLoadi32i8 MxCP_ARID:$src), (MOVZXd32p8 MxARID8:$src)>; def: Pat<(MxExtLoadi32i8 MxCP_ARII:$src), (MOVZXd32f8 MxARII8:$src)>; +def: Pat<(MxExtLoadi32i8 MxCP_PCD:$src), (MOVZXd32q8 MxPCD8:$src)>; // i32 <- anyext i16 def: Pat<(i32 (anyext i16:$src)), (MOVZXd32d16 MxDRD16:$src)>; def: Pat<(MxExtLoadi32i16 MxCP_ARI :$src), (MOVZXd32j16 MxARI16 :$src)>; def: Pat<(MxExtLoadi32i16 MxCP_ARID:$src), (MOVZXd32p16 MxARID16:$src)>; def: Pat<(MxExtLoadi32i16 MxCP_ARII:$src), (MOVZXd32f16 MxARII16:$src)>; +def: Pat<(MxExtLoadi32i16 MxCP_PCD:$src), (MOVZXd32q16 MxPCD16:$src)>; // trunc patterns def : Pat<(i16 (trunc i32:$src)), diff --git a/llvm/test/CodeGen/M68k/Data/load-extend.ll b/llvm/test/CodeGen/M68k/Data/load-extend.ll index 51159730ecc0..687d3f24523d 100644 --- a/llvm/test/CodeGen/M68k/Data/load-extend.ll +++ b/llvm/test/CodeGen/M68k/Data/load-extend.ll @@ -41,3 +41,45 @@ define i32 @"test_zext_pcd_i16_to_i32"() { %val2 = zext i16 %val to i32 ret i32 %val2 } + +define i16 @test_anyext_pcd_i8_to_i16() nounwind { +; CHECK-LABEL: test_anyext_pcd_i8_to_i16: +; CHECK: ; %bb.0: +; CHECK-NEXT: move.b (__unnamed_1+4,%pc), %d0 +; CHECK-NEXT: and.l #255, %d0 +; CHECK-NEXT: lsl.w #8, %d0 +; CHECK-NEXT: ; kill: def $wd0 killed $wd0 killed $d0 +; CHECK-NEXT: rts + %copyload = load i8, ptr getelementptr inbounds nuw (i8, ptr @0, i32 4) + %insert_ext = zext i8 %copyload to i16 + %insert_shift = shl i16 %insert_ext, 8 + ret i16 %insert_shift +} + +define i32 @test_anyext_pcd_i8_to_i32() nounwind { +; CHECK-LABEL: test_anyext_pcd_i8_to_i32: +; CHECK: ; %bb.0: +; CHECK-NEXT: moveq #24, %d1 +; CHECK-NEXT: move.b (__unnamed_1+4,%pc), %d0 +; CHECK-NEXT: and.l #255, %d0 +; CHECK-NEXT: lsl.l %d1, %d0 +; CHECK-NEXT: rts + %copyload = load i8, ptr getelementptr inbounds nuw (i8, ptr @0, i32 4) + %insert_ext = zext i8 %copyload to i32 + %insert_shift = shl i32 %insert_ext, 24 + ret i32 %insert_shift +} + +define i32 @test_anyext_pcd_i16_to_i32() nounwind { +; CHECK-LABEL: test_anyext_pcd_i16_to_i32: +; CHECK: ; %bb.0: +; CHECK-NEXT: moveq #16, %d1 +; CHECK-NEXT: move.w (__unnamed_1+4,%pc), %d0 +; CHECK-NEXT: and.l #65535, %d0 +; CHECK-NEXT: lsl.l %d1, %d0 +; CHECK-NEXT: rts + %copyload = load i16, ptr getelementptr inbounds nuw (i8, ptr @0, i32 4) + %insert_ext = zext i16 %copyload to i32 + %insert_shift = shl i32 %insert_ext, 16 + ret i32 %insert_shift +} From f3008c11406440959f6fc2120dd2117df6fa11ce Mon Sep 17 00:00:00 2001 From: Hristo Hristov Date: Sun, 17 Aug 2025 04:52:07 +0300 Subject: [PATCH 085/214] [libc++][flat_set] LWG3751, LWG3774 (#153934) - LWG3751: Missing feature macro for `flat_set` Implemented in LLVM21: https://github.com/llvm/llvm-project/commit/7013b51548c0bd2c7e5564735c44506909a2f8dc Closes #105021 - LWG3774: `` should include `` Implemented in LLVM21: https://github.com/llvm/llvm-project/commit/2f1416bbcde898d65e9762e6ce498bb0121e4610 https://github.com/llvm/llvm-project/blob/684797b6446073b5afd23022449aeecef84c404c/libcxx/include/flat_set#L77 Closes #105036 --- libcxx/docs/Status/Cxx23Issues.csv | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/libcxx/docs/Status/Cxx23Issues.csv b/libcxx/docs/Status/Cxx23Issues.csv index 0103d6319f16..d1546f4a452b 100644 --- a/libcxx/docs/Status/Cxx23Issues.csv +++ b/libcxx/docs/Status/Cxx23Issues.csv @@ -209,7 +209,7 @@ "`LWG3746 `__","``optional``'s spaceship with ``U`` with a type derived from optional causes infinite constraint meta-recursion","2022-11 (Kona)","|Complete|","17","" "`LWG3747 `__","``ranges::uninitialized_copy_n``, ``ranges::uninitialized_move_n``, and ``ranges::destroy_n`` should use ``std::move``","2022-11 (Kona)","","","" "`LWG3750 `__","Too many papers bump ``__cpp_lib_format``","2022-11 (Kona)","|Partial|","","Only ``__cpp_lib_format_ranges`` is fully implemented" -"`LWG3751 `__","Missing feature macro for ``flat_set``","2022-11 (Kona)","","","" +"`LWG3751 `__","Missing feature macro for ``flat_set``","2022-11 (Kona)","|Complete|","21","" "`LWG3753 `__","Clarify entity vs. freestanding entity","2022-11 (Kona)","","","" "`LWG3754 `__","Class template expected synopsis contains declarations that do not match the detailed description","2022-11 (Kona)","|Nothing To Do|","","" "`LWG3755 `__","``tuple-for-each`` can call ``user-defined`` ``operator,``","2022-11 (Kona)","|Complete|","17","" @@ -223,7 +223,7 @@ "`LWG3766 `__","``view_interface::cbegin`` is underconstrained","2022-11 (Kona)","","","" "`LWG3770 `__","``const_sentinel_t`` is missing","2022-11 (Kona)","","","" "`LWG3773 `__","``views::zip_transform`` still requires ``F`` to be ``copy_constructible`` when empty pack","2022-11 (Kona)","|Complete|","22","" -"`LWG3774 `__","```` should include ````","2022-11 (Kona)","","","" +"`LWG3774 `__","```` should include ````","2022-11 (Kona)","|Complete|","21","" "`LWG3775 `__","Broken dependencies in the ``Cpp17Allocator`` requirements","2022-11 (Kona)","","","" "`LWG3778 `__","``vector`` missing exception specifications","2022-11 (Kona)","|Complete|","3.7","" "`LWG3781 `__","The exposition-only alias templates ``cont-key-type`` and ``cont-mapped-type`` should be removed","2022-11 (Kona)","|Nothing To Do|","","" From ee51f35993d9623563551f9bb9521824992967b6 Mon Sep 17 00:00:00 2001 From: Owen Pan Date: Sat, 16 Aug 2025 20:58:26 -0700 Subject: [PATCH 086/214] [clang-format][doc] Add OneLineFormatOffRegex to format-off section --- clang/docs/ClangFormatStyleOptions.rst | 3 +++ 1 file changed, 3 insertions(+) diff --git a/clang/docs/ClangFormatStyleOptions.rst b/clang/docs/ClangFormatStyleOptions.rst index 02986a94a656..55ba261b7bdc 100644 --- a/clang/docs/ClangFormatStyleOptions.rst +++ b/clang/docs/ClangFormatStyleOptions.rst @@ -126,6 +126,9 @@ clang-format is turned off or back on. // clang-format on void formatted_code_again; +In addition, the ``OneLineFormatOffRegex`` option gives you a concise way to +disable formatting for all of the lines that match the regular expression. + Configuring Style in Code ========================= From fc6024d8959cdb82e5421787882288e26ee728bc Mon Sep 17 00:00:00 2001 From: Sergei Barannikov Date: Sun, 17 Aug 2025 07:02:16 +0300 Subject: [PATCH 087/214] [TableGen][DecoderEmitter] Shrink lifetime of `Filters` vector (NFC) (#153998) Only one element of the `Filters` vector (see `BestIndex`) is used outside the method that fills it. Localize the vector to the method, replacing the member variable with the only used element. Part of an effort to simplify DecoderEmitter code. --- llvm/utils/TableGen/DecoderEmitter.cpp | 77 ++++++++++++-------------- 1 file changed, 34 insertions(+), 43 deletions(-) diff --git a/llvm/utils/TableGen/DecoderEmitter.cpp b/llvm/utils/TableGen/DecoderEmitter.cpp index 630793e02711..daa30629b022 100644 --- a/llvm/utils/TableGen/DecoderEmitter.cpp +++ b/llvm/utils/TableGen/DecoderEmitter.cpp @@ -487,8 +487,8 @@ protected: // Lookup table for the operand decoding of instructions. const std::map> &Operands; - // Vector of candidate filters. - std::vector Filters; + // The selected filter, if any. + std::unique_ptr BestFilter; // Array of bit values passed down from our parent. // Set to all BIT_UNFILTERED's for Parent == NULL. @@ -497,9 +497,6 @@ protected: // Links to the FilterChooser above us in the decoding tree. const FilterChooser *Parent; - // Index of the best filter from Filters. - int BestIndex; - // Width of instructions unsigned BitWidth; @@ -519,7 +516,7 @@ public: unsigned BW, const DecoderEmitter *E) : AllInstructions(Insts), Opcodes(IDs), Operands(Ops), FilterBitValues(BW, BitValue::BIT_UNFILTERED), Parent(nullptr), - BestIndex(-1), BitWidth(BW), Emitter(E) { + BitWidth(BW), Emitter(E) { doFilter(); } @@ -529,7 +526,7 @@ public: const std::vector &ParentFilterBitValues, const FilterChooser &parent) : AllInstructions(Insts), Opcodes(IDs), Operands(Ops), - FilterBitValues(ParentFilterBitValues), Parent(&parent), BestIndex(-1), + FilterBitValues(ParentFilterBitValues), Parent(&parent), BitWidth(parent.BitWidth), Emitter(parent.Emitter) { doFilter(); } @@ -578,11 +575,6 @@ protected: /// dumpFilterArray on each filter chooser up to the top level one. void dumpStack(raw_ostream &OS, const char *prefix) const; - Filter &bestFilter() { - assert(BestIndex != -1 && "BestIndex not set"); - return Filters[BestIndex]; - } - bool PositionFiltered(unsigned Idx) const { return FilterBitValues[Idx].isSet(); } @@ -625,8 +617,8 @@ protected: // reportRegion is a helper function for filterProcessor to mark a region as // eligible for use as a filter region. - void reportRegion(bitAttr_t RA, unsigned StartBit, unsigned BitIndex, - bool AllowMixed); + void reportRegion(std::vector> &Filters, bitAttr_t RA, + unsigned StartBit, unsigned BitIndex, bool AllowMixed); // FilterProcessor scans the well-known encoding bits of the instructions and // builds up a list of candidate filters. It chooses the best filter and @@ -1522,18 +1514,18 @@ void FilterChooser::emitSingletonTableEntry(DecoderTableInfo &TableInfo, // Assign a single filter and run with it. Top level API client can initialize // with a single filter to start the filtering process. void FilterChooser::runSingleFilter(unsigned startBit, unsigned numBit) { - Filters.clear(); - Filters.emplace_back(*this, startBit, numBit); - BestIndex = 0; // Sole Filter instance to choose from. - bestFilter().recurse(); + BestFilter = std::make_unique(*this, startBit, numBit); + BestFilter->recurse(); } // reportRegion is a helper function for filterProcessor to mark a region as // eligible for use as a filter region. -void FilterChooser::reportRegion(bitAttr_t RA, unsigned StartBit, +void FilterChooser::reportRegion(std::vector> &Filters, + bitAttr_t RA, unsigned StartBit, unsigned BitIndex, bool AllowMixed) { if (AllowMixed ? RA == ATTR_MIXED : RA == ATTR_ALL_SET) - Filters.emplace_back(*this, StartBit, BitIndex - StartBit); + Filters.push_back( + std::make_unique(*this, StartBit, BitIndex - StartBit)); } // FilterProcessor scans the well-known encoding bits of the instructions and @@ -1541,9 +1533,6 @@ void FilterChooser::reportRegion(bitAttr_t RA, unsigned StartBit, // recursively descends down the decoding tree. bool FilterChooser::filterProcessor(ArrayRef BitAttrs, bool AllowMixed, bool Greedy) { - Filters.clear(); - BestIndex = -1; - assert(Opcodes.size() >= 2 && "Nothing to filter"); // Heuristics. See also doFilter()'s "Heuristics" comment when num of @@ -1587,6 +1576,7 @@ bool FilterChooser::filterProcessor(ArrayRef BitAttrs, bitAttr_t RA = ATTR_NONE; unsigned StartBit = 0; + std::vector> Filters; for (unsigned BitIndex = 0; BitIndex < BitWidth; ++BitIndex) { bitAttr_t bitAttr = BitAttrs[BitIndex]; @@ -1614,17 +1604,17 @@ bool FilterChooser::filterProcessor(ArrayRef BitAttrs, case ATTR_ALL_SET: switch (bitAttr) { case ATTR_FILTERED: - reportRegion(RA, StartBit, BitIndex, AllowMixed); + reportRegion(Filters, RA, StartBit, BitIndex, AllowMixed); RA = ATTR_NONE; break; case ATTR_ALL_SET: break; case ATTR_ALL_UNSET: - reportRegion(RA, StartBit, BitIndex, AllowMixed); + reportRegion(Filters, RA, StartBit, BitIndex, AllowMixed); RA = ATTR_NONE; break; case ATTR_MIXED: - reportRegion(RA, StartBit, BitIndex, AllowMixed); + reportRegion(Filters, RA, StartBit, BitIndex, AllowMixed); StartBit = BitIndex; RA = ATTR_MIXED; break; @@ -1635,17 +1625,17 @@ bool FilterChooser::filterProcessor(ArrayRef BitAttrs, case ATTR_MIXED: switch (bitAttr) { case ATTR_FILTERED: - reportRegion(RA, StartBit, BitIndex, AllowMixed); + reportRegion(Filters, RA, StartBit, BitIndex, AllowMixed); StartBit = BitIndex; RA = ATTR_NONE; break; case ATTR_ALL_SET: - reportRegion(RA, StartBit, BitIndex, AllowMixed); + reportRegion(Filters, RA, StartBit, BitIndex, AllowMixed); StartBit = BitIndex; RA = ATTR_ALL_SET; break; case ATTR_ALL_UNSET: - reportRegion(RA, StartBit, BitIndex, AllowMixed); + reportRegion(Filters, RA, StartBit, BitIndex, AllowMixed); RA = ATTR_NONE; break; case ATTR_MIXED: @@ -1668,23 +1658,23 @@ bool FilterChooser::filterProcessor(ArrayRef BitAttrs, case ATTR_FILTERED: break; case ATTR_ALL_SET: - reportRegion(RA, StartBit, BitWidth, AllowMixed); + reportRegion(Filters, RA, StartBit, BitWidth, AllowMixed); break; case ATTR_ALL_UNSET: break; case ATTR_MIXED: - reportRegion(RA, StartBit, BitWidth, AllowMixed); + reportRegion(Filters, RA, StartBit, BitWidth, AllowMixed); break; } // We have finished with the filter processings. Now it's time to choose // the best performing filter. - BestIndex = 0; + unsigned BestIndex = 0; bool AllUseless = true; unsigned BestScore = 0; for (const auto &[Idx, Filter] : enumerate(Filters)) { - unsigned Usefulness = Filter.usefulness(); + unsigned Usefulness = Filter->usefulness(); if (Usefulness) AllUseless = false; @@ -1695,10 +1685,13 @@ bool FilterChooser::filterProcessor(ArrayRef BitAttrs, } } - if (!AllUseless) - bestFilter().recurse(); + if (AllUseless) + return false; + + BestFilter = std::move(Filters[BestIndex]); + BestFilter->recurse(); + return true; - return !AllUseless; } // end of FilterChooser::filterProcessor(bool) // Decides on the best configuration of filter(s) to use in order to decode @@ -1779,8 +1772,7 @@ void FilterChooser::doFilter() { return; // If we come to here, the instruction decoding has failed. - // Set the BestIndex to -1 to indicate so. - BestIndex = -1; + assert(!BestFilter); } // emitTableEntries - Emit state machine entries to decode our share of @@ -1795,12 +1787,11 @@ void FilterChooser::emitTableEntries(DecoderTableInfo &TableInfo) const { } // Choose the best filter to do the decodings! - if (BestIndex != -1) { - const Filter &Best = Filters[BestIndex]; - if (Best.getNumFiltered() == 1) - emitSingletonTableEntry(TableInfo, Best); + if (BestFilter) { + if (BestFilter->getNumFiltered() == 1) + emitSingletonTableEntry(TableInfo, *BestFilter); else - Best.emitTableEntry(TableInfo); + BestFilter->emitTableEntry(TableInfo); return; } From 3d83dbb73610c6cfb1b67252bd2d519a0aa5e308 Mon Sep 17 00:00:00 2001 From: Errant Date: Sun, 17 Aug 2025 13:01:59 +0800 Subject: [PATCH 088/214] [clang] Fix typos in OMPClauseProfiler method names for consistency (#153852) --- clang/lib/AST/StmtProfile.cpp | 50 +++++++++++++++++------------------ 1 file changed, 25 insertions(+), 25 deletions(-) diff --git a/clang/lib/AST/StmtProfile.cpp b/clang/lib/AST/StmtProfile.cpp index 5fee88458527..7998d2369460 100644 --- a/clang/lib/AST/StmtProfile.cpp +++ b/clang/lib/AST/StmtProfile.cpp @@ -440,37 +440,37 @@ public: #define GEN_CLANG_CLAUSE_CLASS #define CLAUSE_CLASS(Enum, Str, Class) void Visit##Class(const Class *C); #include "llvm/Frontend/OpenMP/OMP.inc" - void VistOMPClauseWithPreInit(const OMPClauseWithPreInit *C); - void VistOMPClauseWithPostUpdate(const OMPClauseWithPostUpdate *C); + void VisitOMPClauseWithPreInit(const OMPClauseWithPreInit *C); + void VisitOMPClauseWithPostUpdate(const OMPClauseWithPostUpdate *C); }; -void OMPClauseProfiler::VistOMPClauseWithPreInit( +void OMPClauseProfiler::VisitOMPClauseWithPreInit( const OMPClauseWithPreInit *C) { if (auto *S = C->getPreInitStmt()) Profiler->VisitStmt(S); } -void OMPClauseProfiler::VistOMPClauseWithPostUpdate( +void OMPClauseProfiler::VisitOMPClauseWithPostUpdate( const OMPClauseWithPostUpdate *C) { - VistOMPClauseWithPreInit(C); + VisitOMPClauseWithPreInit(C); if (auto *E = C->getPostUpdateExpr()) Profiler->VisitStmt(E); } void OMPClauseProfiler::VisitOMPIfClause(const OMPIfClause *C) { - VistOMPClauseWithPreInit(C); + VisitOMPClauseWithPreInit(C); if (C->getCondition()) Profiler->VisitStmt(C->getCondition()); } void OMPClauseProfiler::VisitOMPFinalClause(const OMPFinalClause *C) { - VistOMPClauseWithPreInit(C); + VisitOMPClauseWithPreInit(C); if (C->getCondition()) Profiler->VisitStmt(C->getCondition()); } void OMPClauseProfiler::VisitOMPNumThreadsClause(const OMPNumThreadsClause *C) { - VistOMPClauseWithPreInit(C); + VisitOMPClauseWithPreInit(C); if (C->getNumThreads()) Profiler->VisitStmt(C->getNumThreads()); } @@ -526,13 +526,13 @@ void OMPClauseProfiler::VisitOMPDetachClause(const OMPDetachClause *C) { } void OMPClauseProfiler::VisitOMPNovariantsClause(const OMPNovariantsClause *C) { - VistOMPClauseWithPreInit(C); + VisitOMPClauseWithPreInit(C); if (C->getCondition()) Profiler->VisitStmt(C->getCondition()); } void OMPClauseProfiler::VisitOMPNocontextClause(const OMPNocontextClause *C) { - VistOMPClauseWithPreInit(C); + VisitOMPClauseWithPreInit(C); if (C->getCondition()) Profiler->VisitStmt(C->getCondition()); } @@ -568,7 +568,7 @@ void OMPClauseProfiler::VisitOMPMessageClause(const OMPMessageClause *C) { } void OMPClauseProfiler::VisitOMPScheduleClause(const OMPScheduleClause *C) { - VistOMPClauseWithPreInit(C); + VisitOMPClauseWithPreInit(C); if (auto *S = C->getChunkSize()) Profiler->VisitStmt(S); } @@ -646,7 +646,7 @@ void OMPClauseProfiler::VisitOMPDestroyClause(const OMPDestroyClause *C) { } void OMPClauseProfiler::VisitOMPFilterClause(const OMPFilterClause *C) { - VistOMPClauseWithPreInit(C); + VisitOMPClauseWithPreInit(C); if (C->getThreadID()) Profiler->VisitStmt(C->getThreadID()); } @@ -669,7 +669,7 @@ void OMPClauseProfiler::VisitOMPPrivateClause(const OMPPrivateClause *C) { void OMPClauseProfiler::VisitOMPFirstprivateClause(const OMPFirstprivateClause *C) { VisitOMPClauseList(C); - VistOMPClauseWithPreInit(C); + VisitOMPClauseWithPreInit(C); for (auto *E : C->private_copies()) { if (E) Profiler->VisitStmt(E); @@ -682,7 +682,7 @@ OMPClauseProfiler::VisitOMPFirstprivateClause(const OMPFirstprivateClause *C) { void OMPClauseProfiler::VisitOMPLastprivateClause(const OMPLastprivateClause *C) { VisitOMPClauseList(C); - VistOMPClauseWithPostUpdate(C); + VisitOMPClauseWithPostUpdate(C); for (auto *E : C->source_exprs()) { if (E) Profiler->VisitStmt(E); @@ -705,7 +705,7 @@ void OMPClauseProfiler::VisitOMPReductionClause( C->getQualifierLoc().getNestedNameSpecifier()); Profiler->VisitName(C->getNameInfo().getName()); VisitOMPClauseList(C); - VistOMPClauseWithPostUpdate(C); + VisitOMPClauseWithPostUpdate(C); for (auto *E : C->privates()) { if (E) Profiler->VisitStmt(E); @@ -743,7 +743,7 @@ void OMPClauseProfiler::VisitOMPTaskReductionClause( C->getQualifierLoc().getNestedNameSpecifier()); Profiler->VisitName(C->getNameInfo().getName()); VisitOMPClauseList(C); - VistOMPClauseWithPostUpdate(C); + VisitOMPClauseWithPostUpdate(C); for (auto *E : C->privates()) { if (E) Profiler->VisitStmt(E); @@ -767,7 +767,7 @@ void OMPClauseProfiler::VisitOMPInReductionClause( C->getQualifierLoc().getNestedNameSpecifier()); Profiler->VisitName(C->getNameInfo().getName()); VisitOMPClauseList(C); - VistOMPClauseWithPostUpdate(C); + VisitOMPClauseWithPostUpdate(C); for (auto *E : C->privates()) { if (E) Profiler->VisitStmt(E); @@ -791,7 +791,7 @@ void OMPClauseProfiler::VisitOMPInReductionClause( } void OMPClauseProfiler::VisitOMPLinearClause(const OMPLinearClause *C) { VisitOMPClauseList(C); - VistOMPClauseWithPostUpdate(C); + VisitOMPClauseWithPostUpdate(C); for (auto *E : C->privates()) { if (E) Profiler->VisitStmt(E); @@ -873,25 +873,25 @@ void OMPClauseProfiler::VisitOMPAllocateClause(const OMPAllocateClause *C) { } void OMPClauseProfiler::VisitOMPNumTeamsClause(const OMPNumTeamsClause *C) { VisitOMPClauseList(C); - VistOMPClauseWithPreInit(C); + VisitOMPClauseWithPreInit(C); } void OMPClauseProfiler::VisitOMPThreadLimitClause( const OMPThreadLimitClause *C) { VisitOMPClauseList(C); - VistOMPClauseWithPreInit(C); + VisitOMPClauseWithPreInit(C); } void OMPClauseProfiler::VisitOMPPriorityClause(const OMPPriorityClause *C) { - VistOMPClauseWithPreInit(C); + VisitOMPClauseWithPreInit(C); if (C->getPriority()) Profiler->VisitStmt(C->getPriority()); } void OMPClauseProfiler::VisitOMPGrainsizeClause(const OMPGrainsizeClause *C) { - VistOMPClauseWithPreInit(C); + VisitOMPClauseWithPreInit(C); if (C->getGrainsize()) Profiler->VisitStmt(C->getGrainsize()); } void OMPClauseProfiler::VisitOMPNumTasksClause(const OMPNumTasksClause *C) { - VistOMPClauseWithPreInit(C); + VisitOMPClauseWithPreInit(C); if (C->getNumTasks()) Profiler->VisitStmt(C->getNumTasks()); } @@ -952,7 +952,7 @@ void OMPClauseProfiler::VisitOMPOrderClause(const OMPOrderClause *C) {} void OMPClauseProfiler::VisitOMPBindClause(const OMPBindClause *C) {} void OMPClauseProfiler::VisitOMPXDynCGroupMemClause( const OMPXDynCGroupMemClause *C) { - VistOMPClauseWithPreInit(C); + VisitOMPClauseWithPreInit(C); if (Expr *Size = C->getSize()) Profiler->VisitStmt(Size); } @@ -1229,7 +1229,7 @@ void StmtProfiler::VisitOMPDistributeDirective( void OMPClauseProfiler::VisitOMPDistScheduleClause( const OMPDistScheduleClause *C) { - VistOMPClauseWithPreInit(C); + VisitOMPClauseWithPreInit(C); if (auto *S = C->getChunkSize()) Profiler->VisitStmt(S); } From 05827e7ccb4e1295648e38b11ebdbb8fd817177b Mon Sep 17 00:00:00 2001 From: Sergei Barannikov Date: Sun, 17 Aug 2025 08:20:31 +0300 Subject: [PATCH 089/214] [TableGen][DecoderEmitter] Dump conflicts earlier Dump a conflict as soon as we discover it, no need to wait until we start building the decoder table. This improves debugging experience. --- llvm/utils/TableGen/DecoderEmitter.cpp | 43 ++++++++++++-------------- 1 file changed, 19 insertions(+), 24 deletions(-) diff --git a/llvm/utils/TableGen/DecoderEmitter.cpp b/llvm/utils/TableGen/DecoderEmitter.cpp index daa30629b022..780350935641 100644 --- a/llvm/utils/TableGen/DecoderEmitter.cpp +++ b/llvm/utils/TableGen/DecoderEmitter.cpp @@ -1771,32 +1771,9 @@ void FilterChooser::doFilter() { filterProcessor(BitAttrs, /*AllowMixed=*/true, /*Greedy=*/false)) return; - // If we come to here, the instruction decoding has failed. - assert(!BestFilter); -} - -// emitTableEntries - Emit state machine entries to decode our share of -// instructions. -void FilterChooser::emitTableEntries(DecoderTableInfo &TableInfo) const { - if (Opcodes.size() == 1) { - // There is only one instruction in the set, which is great! - // Call emitSingletonDecoder() to see whether there are any remaining - // encodings bits. - emitSingletonTableEntry(TableInfo, Opcodes[0]); - return; - } - - // Choose the best filter to do the decodings! - if (BestFilter) { - if (BestFilter->getNumFiltered() == 1) - emitSingletonTableEntry(TableInfo, *BestFilter); - else - BestFilter->emitTableEntry(TableInfo); - return; - } - // We don't know how to decode these instructions! Dump the // conflict set and bail. + assert(!BestFilter); // Print out useful conflict information for postmortem analysis. errs() << "Decoding Conflict:\n"; @@ -1812,6 +1789,24 @@ void FilterChooser::emitTableEntries(DecoderTableInfo &TableInfo) const { PrintFatalError("Decoding conflict encountered"); } +// emitTableEntries - Emit state machine entries to decode our share of +// instructions. +void FilterChooser::emitTableEntries(DecoderTableInfo &TableInfo) const { + if (Opcodes.size() == 1) { + // There is only one instruction in the set, which is great! + // Call emitSingletonDecoder() to see whether there are any remaining + // encodings bits. + emitSingletonTableEntry(TableInfo, Opcodes[0]); + return; + } + + // Use the best filter to do the decoding! + if (BestFilter->getNumFiltered() == 1) + emitSingletonTableEntry(TableInfo, *BestFilter); + else + BestFilter->emitTableEntry(TableInfo); +} + static std::string findOperandDecoderMethod(const Record *Record) { std::string Decoder; From 05f1673e7569da0762d2a5d890ca4860760dad34 Mon Sep 17 00:00:00 2001 From: Sergei Barannikov Date: Sun, 17 Aug 2025 09:30:58 +0300 Subject: [PATCH 090/214] [TableGen] Make a function static (NFC) Also, modernize the return value to std::optional. --- llvm/utils/TableGen/DecoderEmitter.cpp | 55 ++++++++++---------------- 1 file changed, 21 insertions(+), 34 deletions(-) diff --git a/llvm/utils/TableGen/DecoderEmitter.cpp b/llvm/utils/TableGen/DecoderEmitter.cpp index 780350935641..6277e959574f 100644 --- a/llvm/utils/TableGen/DecoderEmitter.cpp +++ b/llvm/utils/TableGen/DecoderEmitter.cpp @@ -348,6 +348,24 @@ static const BitsInit &getBitsField(const Record &Def, StringRef FieldName) { // Representation of the instruction to work on. typedef std::vector insn_t; +/// Extracts a NumBits long field from Insn, starting from StartBit. +/// Returns the value of the field if all bits are well-known, +/// otherwise std::nullopt. +static std::optional +fieldFromInsn(const insn_t &Insn, unsigned StartBit, unsigned NumBits) { + uint64_t Field = 0; + + for (unsigned BitIndex = 0; BitIndex < NumBits; ++BitIndex) { + if (Insn[StartBit + BitIndex] == BitValue::BIT_UNSET) + return std::nullopt; + + if (Insn[StartBit + BitIndex] == BitValue::BIT_TRUE) + Field = Field | (1ULL << BitIndex); + } + + return Field; +} + namespace { static constexpr uint64_t NO_FIXED_SEGMENTS_SENTINEL = @@ -558,15 +576,6 @@ protected: return Insn; } - // Populates the field of the insn given the start position and the number of - // consecutive bits to scan for. - // - // Returns a pair of values (indicator, field), where the indicator is false - // if there exists any uninitialized bit value in the range and true if all - // bits are well-known. The second value is the potentially populated field. - std::pair fieldFromInsn(const insn_t &Insn, unsigned StartBit, - unsigned NumBits) const; - /// dumpFilterArray - dumpFilterArray prints out debugging info for the given /// filter array as a series of chars. void dumpFilterArray(raw_ostream &OS, ArrayRef Filter) const; @@ -663,12 +672,12 @@ Filter::Filter(const FilterChooser &owner, unsigned startBit, unsigned numBits) insn_t Insn = Owner.insnWithID(OpcPair.EncodingID); // Scans the segment for possibly well-specified encoding bits. - auto [Ok, Field] = Owner.fieldFromInsn(Insn, StartBit, NumBits); + std::optional Field = fieldFromInsn(Insn, StartBit, NumBits); - if (Ok) { + if (Field) { // The encoding bits are well-known. Lets add the uid of the // instruction into the bucket keyed off the constant field value. - FilteredInstructions[Field].push_back(OpcPair); + FilteredInstructions[*Field].push_back(OpcPair); ++NumFiltered; } else { // Some of the encoding bit(s) are unspecified. This contributes to @@ -1099,28 +1108,6 @@ void DecoderEmitter::emitDecoderFunction(formatted_raw_ostream &OS, OS << "}\n"; } -// Populates the field of the insn given the start position and the number of -// consecutive bits to scan for. -// -// Returns a pair of values (indicator, field), where the indicator is false -// if there exists any uninitialized bit value in the range and true if all -// bits are well-known. The second value is the potentially populated field. -std::pair FilterChooser::fieldFromInsn(const insn_t &Insn, - unsigned StartBit, - unsigned NumBits) const { - uint64_t Field = 0; - - for (unsigned i = 0; i < NumBits; ++i) { - if (Insn[StartBit + i] == BitValue::BIT_UNSET) - return {false, Field}; - - if (Insn[StartBit + i] == BitValue::BIT_TRUE) - Field = Field | (1ULL << i); - } - - return {true, Field}; -} - /// dumpFilterArray - dumpFilterArray prints out debugging info for the given /// filter array as a series of chars. void FilterChooser::dumpFilterArray(raw_ostream &OS, From ea4325f174baca7d12e128db4f9f3b41a918da67 Mon Sep 17 00:00:00 2001 From: Sergei Barannikov Date: Sun, 17 Aug 2025 09:42:25 +0300 Subject: [PATCH 091/214] [TableGen][DecoderEmitter] Improve conflicts dump (#154001) * Print filter stack in non-reversed order. * Print encoding name to the right of encoding bits to deal with alignment issues. * Use the correct bit width when printing encoding bits. Example of old output: ``` 01000100........ 01000........... 0100............ ................ tADDhirr 000000000000000001000100________ tADDrSP 000000000000000001000100_1101___ tADDspr 0000000000000000010001001____101 ``` New output: ``` ................ 0100............ 01000........... 01000100........ 01000100________ tADDhirr 01000100_1101___ tADDrSP 010001001____101 tADDspr ``` --- .../FixedLenDecoderEmitter/conflict.td | 8 +-- llvm/utils/TableGen/DecoderEmitter.cpp | 57 +++++++++---------- 2 files changed, 32 insertions(+), 33 deletions(-) diff --git a/llvm/test/TableGen/FixedLenDecoderEmitter/conflict.td b/llvm/test/TableGen/FixedLenDecoderEmitter/conflict.td index 7399ef726d0e..853a68d22d1d 100644 --- a/llvm/test/TableGen/FixedLenDecoderEmitter/conflict.td +++ b/llvm/test/TableGen/FixedLenDecoderEmitter/conflict.td @@ -29,7 +29,7 @@ def B : I<(outs GPR32:$dst), (ins GPR32:$src1), []> { } // CHECK: Decoding Conflict: -// CHECK: 00000000000000000000000000000000 -// CHECK: ................................ -// CHECK: A 00000000000000000000000000000000 -// CHECK: B 00000000000000000000000000000000 +// CHECK: ................................ +// CHECK: 00000000000000000000000000000000 +// CHECK: 00000000000000000000000000000000 A +// CHECK: 00000000000000000000000000000000 B diff --git a/llvm/utils/TableGen/DecoderEmitter.cpp b/llvm/utils/TableGen/DecoderEmitter.cpp index 6277e959574f..11d06d1613e8 100644 --- a/llvm/utils/TableGen/DecoderEmitter.cpp +++ b/llvm/utils/TableGen/DecoderEmitter.cpp @@ -319,8 +319,8 @@ static raw_ostream &operator<<(raw_ostream &OS, const EncodingAndInst &Value) { } // Prints the bit value for each position. -static void dumpBits(raw_ostream &OS, const BitsInit &Bits) { - for (const Init *Bit : reverse(Bits.getBits())) +static void dumpBits(raw_ostream &OS, const BitsInit &Bits, unsigned BitWidth) { + for (const Init *Bit : reverse(Bits.getBits().take_front(BitWidth))) OS << BitValue(Bit); } @@ -389,16 +389,16 @@ class FilterChooser; /// /// An example of a conflict is /// -/// Conflict: -/// 111101000.00........00010000.... -/// 111101000.00........0001........ -/// 1111010...00........0001........ -/// 1111010...00.................... -/// 1111010......................... -/// 1111............................ -/// ................................ -/// VST4q8a 111101000_00________00010000____ -/// VST4q8b 111101000_00________00010000____ +/// Decoding Conflict: +/// ................................ +/// 1111............................ +/// 1111010......................... +/// 1111010...00.................... +/// 1111010...00........0001........ +/// 111101000.00........0001........ +/// 111101000.00........00010000.... +/// 111101000_00________00010000____ VST4q8a +/// 111101000_00________00010000____ VST4q8b /// /// The Debug output shows the path that the decoding tree follows to reach the /// the conclusion that there is a conflict. VST4q8a is a vst4 to double-spaced @@ -582,7 +582,7 @@ protected: /// dumpStack - dumpStack traverses the filter chooser chain and calls /// dumpFilterArray on each filter chooser up to the top level one. - void dumpStack(raw_ostream &OS, const char *prefix) const; + void dumpStack(raw_ostream &OS, indent Indent) const; bool PositionFiltered(unsigned Idx) const { return FilterBitValues[Idx].isSet(); @@ -701,9 +701,8 @@ void Filter::recurse() { std::vector BitValueArray(Owner.FilterBitValues); if (!VariableInstructions.empty()) { - // Conservatively marks each segment position as BIT_UNSET. for (unsigned bitIndex = 0; bitIndex < NumBits; ++bitIndex) - BitValueArray[StartBit + bitIndex] = BitValue::BIT_UNSET; + BitValueArray[StartBit + bitIndex] = BitValue::BIT_UNFILTERED; // Delegates to an inferior filter chooser for further processing on this // group of instructions whose segment values are variable. @@ -1118,15 +1117,12 @@ void FilterChooser::dumpFilterArray(raw_ostream &OS, /// dumpStack - dumpStack traverses the filter chooser chain and calls /// dumpFilterArray on each filter chooser up to the top level one. -void FilterChooser::dumpStack(raw_ostream &OS, const char *prefix) const { - const FilterChooser *current = this; - - while (current) { - OS << prefix; - dumpFilterArray(OS, current->FilterBitValues); - OS << '\n'; - current = current->Parent; - } +void FilterChooser::dumpStack(raw_ostream &OS, indent Indent) const { + if (Parent) + Parent->dumpStack(OS, Indent); + OS << Indent; + dumpFilterArray(OS, FilterBitValues); + OS << '\n'; } // Calculates the island(s) needed to decode the instruction. @@ -1765,13 +1761,16 @@ void FilterChooser::doFilter() { // Print out useful conflict information for postmortem analysis. errs() << "Decoding Conflict:\n"; - dumpStack(errs(), "\t\t"); + // Dump filters. + indent Indent(4); + dumpStack(errs(), Indent); - for (auto Opcode : Opcodes) { + // Dump encodings. + for (EncodingIDAndOpcode Opcode : Opcodes) { const EncodingAndInst &Enc = AllInstructions[Opcode.EncodingID]; - errs() << '\t' << Enc << ' '; - dumpBits(errs(), getBitsField(*Enc.EncodingDef, "Inst")); - errs() << '\n'; + errs() << Indent; + dumpBits(errs(), getBitsField(*Enc.EncodingDef, "Inst"), BitWidth); + errs() << " " << Enc << '\n'; } PrintFatalError("Decoding conflict encountered"); } From e44784fb44bd00acc0ecd25537a359c3a1df8f17 Mon Sep 17 00:00:00 2001 From: Timm Baeder Date: Sun, 17 Aug 2025 08:47:57 +0200 Subject: [PATCH 092/214] [clang][bytecode] Fix pseudo dtor calls on non-pointers (#153970) The isGLValue() check made us ignore expressions we shouldn't ignore. --- clang/lib/AST/ByteCode/Compiler.cpp | 3 +- clang/test/AST/ByteCode/builtin-functions.cpp | 43 +++++++++++++++++++ 2 files changed, 45 insertions(+), 1 deletion(-) diff --git a/clang/lib/AST/ByteCode/Compiler.cpp b/clang/lib/AST/ByteCode/Compiler.cpp index 6c6c8d41d3b9..b228cea7cb58 100644 --- a/clang/lib/AST/ByteCode/Compiler.cpp +++ b/clang/lib/AST/ByteCode/Compiler.cpp @@ -5146,7 +5146,8 @@ bool Compiler::VisitCallExpr(const CallExpr *E) { if (!this->emitCheckPseudoDtor(E)) return false; const Expr *Base = PD->getBase(); - if (!Base->isGLValue()) + // E.g. `using T = int; 0.~T();`. + if (OptPrimType BaseT = classify(Base); !BaseT || BaseT != PT_Ptr) return this->discard(Base); if (!this->visit(Base)) return false; diff --git a/clang/test/AST/ByteCode/builtin-functions.cpp b/clang/test/AST/ByteCode/builtin-functions.cpp index 1223cf8bdc74..878c0d1a40f2 100644 --- a/clang/test/AST/ByteCode/builtin-functions.cpp +++ b/clang/test/AST/ByteCode/builtin-functions.cpp @@ -21,6 +21,27 @@ #error "huh?" #endif + +inline constexpr void* operator new(__SIZE_TYPE__, void* p) noexcept { return p; } +namespace std { + using size_t = decltype(sizeof(0)); + template struct allocator { + constexpr T *allocate(size_t N) { + return (T*)__builtin_operator_new(sizeof(T) * N); // #alloc + } + constexpr void deallocate(void *p, __SIZE_TYPE__) { + __builtin_operator_delete(p); + } + }; +template +constexpr T* construct_at(T* p, Args&&... args) { return ::new((void*)p) T(static_cast(args)...); } + + template + constexpr void destroy_at(T* p) { + p->~T(); + } +} + extern "C" { typedef decltype(sizeof(int)) size_t; extern size_t wcslen(const wchar_t *p); @@ -1767,6 +1788,28 @@ namespace WithinLifetime { } } xstd; // both-error {{is not a constant expression}} \ // both-note {{in call to}} + + consteval bool test_dynamic(bool read_after_deallocate) { + std::allocator a; + int* p = a.allocate(1); + // a.allocate starts the lifetime of an array, + // the complete object of *p has started its lifetime + if (__builtin_is_within_lifetime(p)) + return false; + std::construct_at(p); + if (!__builtin_is_within_lifetime(p)) + return false; + std::destroy_at(p); + if (__builtin_is_within_lifetime(p)) + return false; + a.deallocate(p, 1); + if (read_after_deallocate) + __builtin_is_within_lifetime(p); // both-note {{read of heap allocated object that has been deleted}} + return true; + } + static_assert(test_dynamic(false)); + static_assert(test_dynamic(true)); // both-error {{not an integral constant expression}} \ + // both-note {{in call to}} } #ifdef __SIZEOF_INT128__ From 5ae8a9b8cee3d4477fdec107a3ab29b633ec4f9f Mon Sep 17 00:00:00 2001 From: Andreas Jonson Date: Sun, 17 Aug 2025 09:53:40 +0200 Subject: [PATCH 093/214] [SimplifyCfg] Handle trunc nuw i1 condition in Equality comparison. (#153051) proof: https://alive2.llvm.org/ce/z/WVt4-F --- llvm/lib/Transforms/Utils/SimplifyCFG.cpp | 30 +++++++--- .../Transforms/SimplifyCFG/switch_create.ll | 57 +++++++++++++++++++ 2 files changed, 80 insertions(+), 7 deletions(-) diff --git a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp index 1436e479ba09..46d6c2a541a8 100644 --- a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp +++ b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp @@ -810,11 +810,15 @@ Value *SimplifyCFGOpt::isValueEqualityComparison(Instruction *TI) { if (!SI->getParent()->hasNPredecessorsOrMore(128 / SI->getNumSuccessors())) CV = SI->getCondition(); } else if (BranchInst *BI = dyn_cast(TI)) - if (BI->isConditional() && BI->getCondition()->hasOneUse()) + if (BI->isConditional() && BI->getCondition()->hasOneUse()) { if (ICmpInst *ICI = dyn_cast(BI->getCondition())) { if (ICI->isEquality() && getConstantInt(ICI->getOperand(1), DL)) CV = ICI->getOperand(0); + } else if (auto *Trunc = dyn_cast(BI->getCondition())) { + if (Trunc->hasNoUnsignedWrap()) + CV = Trunc->getOperand(0); } + } // Unwrap any lossless ptrtoint cast. if (CV) { @@ -840,11 +844,20 @@ BasicBlock *SimplifyCFGOpt::getValueEqualityComparisonCases( } BranchInst *BI = cast(TI); - ICmpInst *ICI = cast(BI->getCondition()); - BasicBlock *Succ = BI->getSuccessor(ICI->getPredicate() == ICmpInst::ICMP_NE); - Cases.push_back(ValueEqualityComparisonCase( - getConstantInt(ICI->getOperand(1), DL), Succ)); - return BI->getSuccessor(ICI->getPredicate() == ICmpInst::ICMP_EQ); + Value *Cond = BI->getCondition(); + ICmpInst::Predicate Pred; + ConstantInt *C; + if (auto *ICI = dyn_cast(Cond)) { + Pred = ICI->getPredicate(); + C = getConstantInt(ICI->getOperand(1), DL); + } else { + Pred = ICmpInst::ICMP_NE; + auto *Trunc = cast(Cond); + C = ConstantInt::get(cast(Trunc->getOperand(0)->getType()), 0); + } + BasicBlock *Succ = BI->getSuccessor(Pred == ICmpInst::ICMP_NE); + Cases.push_back(ValueEqualityComparisonCase(C, Succ)); + return BI->getSuccessor(Pred == ICmpInst::ICMP_EQ); } /// Given a vector of bb/value pairs, remove any entries @@ -1106,7 +1119,10 @@ static void getBranchWeights(Instruction *TI, // default weight to be the first entry. if (BranchInst *BI = dyn_cast(TI)) { assert(Weights.size() == 2); - ICmpInst *ICI = cast(BI->getCondition()); + auto *ICI = dyn_cast(BI->getCondition()); + if (!ICI) + return; + if (ICI->getPredicate() == ICmpInst::ICMP_EQ) std::swap(Weights.front(), Weights.back()); } diff --git a/llvm/test/Transforms/SimplifyCFG/switch_create.ll b/llvm/test/Transforms/SimplifyCFG/switch_create.ll index f446d718f820..a1533bdcffb4 100644 --- a/llvm/test/Transforms/SimplifyCFG/switch_create.ll +++ b/llvm/test/Transforms/SimplifyCFG/switch_create.ll @@ -1068,3 +1068,60 @@ if: else: ret void } + +define void @trunc_nuw_i1_condition(i32 %V) { +; CHECK-LABEL: @trunc_nuw_i1_condition( +; CHECK-NEXT: switch i32 [[V:%.*]], label [[F:%.*]] [ +; CHECK-NEXT: i32 2, label [[T:%.*]] +; CHECK-NEXT: i32 0, label [[T]] +; CHECK-NEXT: ] +; CHECK: common.ret: +; CHECK-NEXT: ret void +; CHECK: T: +; CHECK-NEXT: call void @foo1() +; CHECK-NEXT: br label [[COMMON_RET:%.*]] +; CHECK: F: +; CHECK-NEXT: call void @foo2() +; CHECK-NEXT: br label [[COMMON_RET]] +; + %C1 = icmp eq i32 %V, 2 + br i1 %C1, label %T, label %N +N: + %C2 = trunc nuw i32 %V to i1 + br i1 %C2, label %F, label %T +T: + call void @foo1( ) + ret void +F: + call void @foo2( ) + ret void +} + +define void @neg_trunc_i1_condition(i32 %V) { +; CHECK-LABEL: @neg_trunc_i1_condition( +; CHECK-NEXT: [[C1:%.*]] = icmp ne i32 [[V:%.*]], 2 +; CHECK-NEXT: [[C2:%.*]] = trunc i32 [[V]] to i1 +; CHECK-NEXT: [[OR_COND:%.*]] = and i1 [[C1]], [[C2]] +; CHECK-NEXT: br i1 [[OR_COND]], label [[F:%.*]], label [[T:%.*]] +; CHECK: common.ret: +; CHECK-NEXT: ret void +; CHECK: T: +; CHECK-NEXT: call void @foo1() +; CHECK-NEXT: br label [[COMMON_RET:%.*]] +; CHECK: F: +; CHECK-NEXT: call void @foo2() +; CHECK-NEXT: br label [[COMMON_RET]] +; + %C1 = icmp eq i32 %V, 2 + br i1 %C1, label %T, label %N +N: + %C2 = trunc i32 %V to i1 + br i1 %C2, label %F, label %T +T: + call void @foo1( ) + ret void +F: + call void @foo2( ) + ret void +} + From bd77e9acf0e86a46379e1780dd58a787a7ee78f5 Mon Sep 17 00:00:00 2001 From: Carlos Galvez Date: Sun, 17 Aug 2025 11:40:48 +0200 Subject: [PATCH 094/214] [clang-tidy] Avoid matching nodes in system headers (#151035) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This commit is a re-do of e4a8969e56572371201863594b3a549de2e23f32, which got reverted, with the same goal: dramatically speed-up clang-tidy by avoiding doing work in system headers (which is wasteful as warnings are later discarded). This proposal was already discussed here with favorable feedback: https://github.com/llvm/llvm-project/pull/132725 The novelty of this patch is: - It's less aggressive: it does not fiddle with AST traversal. This solves the issue with the previous patch, which impacted the ability to inspect parents of a given node. - Instead, what we optimize for is exitting early in each `Traverse*` function of `MatchASTVisitor` if the node is in a system header, thus avoiding calling the `match()` function with its corresponding callback (when there is a match). - It does not cause any failing tests. - It does not move `MatchFinderOptions` - instead we add a user-defined default constructor which solves the same problem. - It introduces a function `shouldSkipNode` which can be extended for adding more conditions. For example there's a PR open about skipping modules in clang-tidy where this could come handy: https://github.com/llvm/llvm-project/pull/145630 As a benchmark, I ran clang-tidy with all checks activated, on a single .cpp file which #includes all the standard C++ headers, then measure the time as well as found warnings. On trunk: ``` Suppressed 75413 warnings (75413 in non-user code). real 0m12.418s user 0m12.270s sys 0m0.129s ``` With this patch: ``` Suppressed 11448 warnings (11448 in non-user code). Use -header-filter=.* to display errors from all non-system headers. Use -system-headers to display errors from system headers as well. real 0m1.666s user 0m1.538s sys 0m0.129s ``` With the original patch that got reverted: ``` Suppressed 11428 warnings (11428 in non-user code). real 0m1.193s user 0m1.096s sys 0m0.096s ``` We therefore get a dramatic reduction in number of warnings and runtime, with no change in functionality. The remaining warnings are due to `PPCallbacks` - implementing a similar system-header exclusion mechanism there can lead to almost no warnings left in system headers. This does not bring the runtime down as much, though, so it's probably not worth the effort. Fixes #52959 Co-authored-by: Carlos Gálvez --- clang-tools-extra/clang-tidy/ClangTidy.cpp | 4 ++ clang-tools-extra/docs/ReleaseNotes.rst | 4 ++ .../clang-tidy/infrastructure/file-filter.cpp | 5 -- .../infrastructure/system-headers.cpp | 4 +- clang/docs/ReleaseNotes.rst | 3 + .../clang/ASTMatchers/ASTMatchFinder.h | 5 ++ clang/lib/ASTMatchers/ASTMatchFinder.cpp | 61 +++++++++++++++++-- 7 files changed, 74 insertions(+), 12 deletions(-) diff --git a/clang-tools-extra/clang-tidy/ClangTidy.cpp b/clang-tools-extra/clang-tidy/ClangTidy.cpp index 4ae2864d310d..b612d4f18acc 100644 --- a/clang-tools-extra/clang-tidy/ClangTidy.cpp +++ b/clang-tools-extra/clang-tidy/ClangTidy.cpp @@ -424,6 +424,10 @@ ClangTidyASTConsumerFactory::createASTConsumer( FinderOptions.CheckProfiling.emplace(Profiling->Records); } + // Avoid processing system headers, unless the user explicitly requests it + if (!Context.getOptions().SystemHeaders.value_or(false)) + FinderOptions.IgnoreSystemHeaders = true; + std::unique_ptr Finder( new ast_matchers::MatchFinder(std::move(FinderOptions))); diff --git a/clang-tools-extra/docs/ReleaseNotes.rst b/clang-tools-extra/docs/ReleaseNotes.rst index 1553f461634d..36703dd32c03 100644 --- a/clang-tools-extra/docs/ReleaseNotes.rst +++ b/clang-tools-extra/docs/ReleaseNotes.rst @@ -104,6 +104,10 @@ Improvements to clang-query Improvements to clang-tidy -------------------------- +- :program:`clang-tidy` no longer attemps to analyze code from system headers + by default, greatly improving performance. This behavior is disabled if the + `SystemHeaders` option is enabled. + - The :program:`run-clang-tidy.py` and :program:`clang-tidy-diff.py` scripts now run checks in parallel by default using all available hardware threads. Both scripts display the number of threads being used in their output. diff --git a/clang-tools-extra/test/clang-tidy/infrastructure/file-filter.cpp b/clang-tools-extra/test/clang-tidy/infrastructure/file-filter.cpp index 448ef9ddf166..d9ec1049963b 100644 --- a/clang-tools-extra/test/clang-tidy/infrastructure/file-filter.cpp +++ b/clang-tools-extra/test/clang-tidy/infrastructure/file-filter.cpp @@ -66,19 +66,14 @@ class A { A(int); }; // CHECK4-NOT: warning: // CHECK4-QUIET-NOT: warning: -// CHECK: Suppressed 3 warnings (3 in non-user code) // CHECK: Use -header-filter=.* to display errors from all non-system headers. // CHECK-QUIET-NOT: Suppressed -// CHECK2: Suppressed 1 warnings (1 in non-user code) -// CHECK2: Use -header-filter=.* {{.*}} // CHECK2-QUIET-NOT: Suppressed -// CHECK3: Suppressed 2 warnings (2 in non-user code) // CHECK3: Use -header-filter=.* {{.*}} // CHECK3-QUIET-NOT: Suppressed // CHECK4-NOT: Suppressed {{.*}} warnings // CHECK4-NOT: Use -header-filter=.* {{.*}} // CHECK4-QUIET-NOT: Suppressed -// CHECK6: Suppressed 2 warnings (2 in non-user code) // CHECK6: Use -header-filter=.* {{.*}} int x = 123; diff --git a/clang-tools-extra/test/clang-tidy/infrastructure/system-headers.cpp b/clang-tools-extra/test/clang-tidy/infrastructure/system-headers.cpp index 9fa990b6aac8..a25480e9aa39 100644 --- a/clang-tools-extra/test/clang-tidy/infrastructure/system-headers.cpp +++ b/clang-tools-extra/test/clang-tidy/infrastructure/system-headers.cpp @@ -11,9 +11,9 @@ // RUN: clang-tidy -help | FileCheck -check-prefix=CHECK-OPT-PRESENT %s // RUN: clang-tidy -checks='-*,google-explicit-constructor' -header-filter='.*' -system-headers=true %s -- -isystem %S/Inputs/system-headers 2>&1 | FileCheck -check-prefix=CHECK-SYSTEM-HEADERS %s -// RUN: clang-tidy -checks='-*,google-explicit-constructor' -header-filter='.*' -system-headers=false %s -- -isystem %S/Inputs/system-headers 2>&1 | FileCheck -check-prefix=CHECK-NO-SYSTEM-HEADERS %s +// RUN: clang-tidy -checks='-*,google-explicit-constructor' -header-filter='.*' -system-headers=false %s -- -isystem %S/Inputs/system-headers 2>&1 | FileCheck -check-prefix=CHECK-NO-SYSTEM-HEADERS --allow-empty %s // RUN: clang-tidy -checks='-*,google-explicit-constructor' -header-filter='.*' -config='SystemHeaders: true' %s -- -isystem %S/Inputs/system-headers 2>&1 | FileCheck -check-prefix=CHECK-SYSTEM-HEADERS %s -// RUN: clang-tidy -checks='-*,google-explicit-constructor' -header-filter='.*' -config='SystemHeaders: false' %s -- -isystem %S/Inputs/system-headers 2>&1 | FileCheck -check-prefix=CHECK-NO-SYSTEM-HEADERS %s +// RUN: clang-tidy -checks='-*,google-explicit-constructor' -header-filter='.*' -config='SystemHeaders: false' %s -- -isystem %S/Inputs/system-headers 2>&1 | FileCheck -check-prefix=CHECK-NO-SYSTEM-HEADERS --allow-empty %s #include // CHECK-SYSTEM-HEADERS: system_header.h:1:13: warning: single-argument constructors must be marked explicit diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst index 604b4c3f714b..b35f4ea42818 100644 --- a/clang/docs/ReleaseNotes.rst +++ b/clang/docs/ReleaseNotes.rst @@ -308,6 +308,9 @@ AST Matchers - Ensure ``hasBitWidth`` doesn't crash on bit widths that are dependent on template parameters. +- Add a boolean member ``IgnoreSystemHeaders`` to ``MatchFinderOptions``. This + allows it to ignore nodes in system headers when traversing the AST. + clang-format ------------ diff --git a/clang/include/clang/ASTMatchers/ASTMatchFinder.h b/clang/include/clang/ASTMatchers/ASTMatchFinder.h index 73cbcf1f2502..2d36e8c4fae1 100644 --- a/clang/include/clang/ASTMatchers/ASTMatchFinder.h +++ b/clang/include/clang/ASTMatchers/ASTMatchFinder.h @@ -135,10 +135,15 @@ public: llvm::StringMap &Records; }; + MatchFinderOptions() {} + /// Enables per-check timers. /// /// It prints a report after match. std::optional CheckProfiling; + + /// Avoids matching declarations in system headers. + bool IgnoreSystemHeaders{false}; }; MatchFinder(MatchFinderOptions Options = MatchFinderOptions()); diff --git a/clang/lib/ASTMatchers/ASTMatchFinder.cpp b/clang/lib/ASTMatchers/ASTMatchFinder.cpp index d43d1aec71b2..e8a0004c2e18 100644 --- a/clang/lib/ASTMatchers/ASTMatchFinder.cpp +++ b/clang/lib/ASTMatchers/ASTMatchFinder.cpp @@ -1344,6 +1344,41 @@ private: return false; } + template static SourceLocation getNodeLocation(const T &Node) { + return Node.getBeginLoc(); + } + + static SourceLocation getNodeLocation(const CXXCtorInitializer &Node) { + return Node.getSourceLocation(); + } + + static SourceLocation getNodeLocation(const TemplateArgumentLoc &Node) { + return Node.getLocation(); + } + + static SourceLocation getNodeLocation(const Attr &Node) { + return Node.getLocation(); + } + + bool isInSystemHeader(SourceLocation Loc) { + const SourceManager &SM = getASTContext().getSourceManager(); + return SM.isInSystemHeader(Loc); + } + + template bool shouldSkipNode(T &Node) { + if (Options.IgnoreSystemHeaders && isInSystemHeader(getNodeLocation(Node))) + return true; + return false; + } + + template bool shouldSkipNode(T *Node) { + return (Node == nullptr) || shouldSkipNode(*Node); + } + + bool shouldSkipNode(QualType &) { return false; } + + bool shouldSkipNode(NestedNameSpecifier &) { return false; } + /// Bucket to record map. /// /// Used to get the appropriate bucket for each matcher. @@ -1473,9 +1508,8 @@ bool MatchASTVisitor::objcClassIsDerivedFrom( } bool MatchASTVisitor::TraverseDecl(Decl *DeclNode) { - if (!DeclNode) { + if (shouldSkipNode(DeclNode)) return true; - } bool ScopedTraversal = TraversingASTNodeNotSpelledInSource || DeclNode->isImplicit(); @@ -1503,9 +1537,9 @@ bool MatchASTVisitor::TraverseDecl(Decl *DeclNode) { } bool MatchASTVisitor::TraverseStmt(Stmt *StmtNode, DataRecursionQueue *Queue) { - if (!StmtNode) { + if (shouldSkipNode(StmtNode)) return true; - } + bool ScopedTraversal = TraversingASTNodeNotSpelledInSource || TraversingASTChildrenNotSpelledInSource; @@ -1515,6 +1549,9 @@ bool MatchASTVisitor::TraverseStmt(Stmt *StmtNode, DataRecursionQueue *Queue) { } bool MatchASTVisitor::TraverseType(QualType TypeNode, bool TraverseQualifier) { + if (shouldSkipNode(TypeNode)) + return true; + match(TypeNode); return RecursiveASTVisitor::TraverseType(TypeNode, TraverseQualifier); @@ -1522,6 +1559,8 @@ bool MatchASTVisitor::TraverseType(QualType TypeNode, bool TraverseQualifier) { bool MatchASTVisitor::TraverseTypeLoc(TypeLoc TypeLocNode, bool TraverseQualifier) { + if (shouldSkipNode(TypeLocNode)) + return true; // The RecursiveASTVisitor only visits types if they're not within TypeLocs. // We still want to find those types via matchers, so we match them here. Note // that the TypeLocs are structurally a shadow-hierarchy to the expressed @@ -1534,6 +1573,9 @@ bool MatchASTVisitor::TraverseTypeLoc(TypeLoc TypeLocNode, } bool MatchASTVisitor::TraverseNestedNameSpecifier(NestedNameSpecifier NNS) { + if (shouldSkipNode(NNS)) + return true; + match(NNS); return RecursiveASTVisitor::TraverseNestedNameSpecifier(NNS); } @@ -1543,6 +1585,9 @@ bool MatchASTVisitor::TraverseNestedNameSpecifierLoc( if (!NNS) return true; + if (shouldSkipNode(NNS)) + return true; + match(NNS); // We only match the nested name specifier here (as opposed to traversing it) @@ -1555,7 +1600,7 @@ bool MatchASTVisitor::TraverseNestedNameSpecifierLoc( bool MatchASTVisitor::TraverseConstructorInitializer( CXXCtorInitializer *CtorInit) { - if (!CtorInit) + if (shouldSkipNode(CtorInit)) return true; bool ScopedTraversal = TraversingASTNodeNotSpelledInSource || @@ -1573,11 +1618,17 @@ bool MatchASTVisitor::TraverseConstructorInitializer( } bool MatchASTVisitor::TraverseTemplateArgumentLoc(TemplateArgumentLoc Loc) { + if (shouldSkipNode(Loc)) + return true; + match(Loc); return RecursiveASTVisitor::TraverseTemplateArgumentLoc(Loc); } bool MatchASTVisitor::TraverseAttr(Attr *AttrNode) { + if (shouldSkipNode(AttrNode)) + return true; + match(*AttrNode); return RecursiveASTVisitor::TraverseAttr(AttrNode); } From 326d749a368a842e71fd0d4cd7bb97c6d9f52f3e Mon Sep 17 00:00:00 2001 From: v1nh1shungry Date: Sun, 17 Aug 2025 17:42:38 +0800 Subject: [PATCH 095/214] [clang-tidy] Fix `cppcoreguidelines-prefer-member-initializer` false positive for inherited members (#153941) ```cpp struct Base { int m; }; template struct Derived : Base { Derived() { m = 0; } }; ``` would previously generate the following output: ``` :7:15: warning: 'm' should be initialized in a member initializer of the constructor [cppcoreguidelines-prefer-member-initializer] 7 | Derived() { m = 0; } | ^~~~~~ | : m(0) ``` This patch fixes this false positive. Note that before this patch the checker won't give false positive for ```cpp struct Derived : Base { Derived() { m = 0; } }; ``` and the constructor's AST is ``` `-CXXConstructorDecl 0x557df03d1fb0 col:3 Derived 'void ()' implicit-inline |-CXXCtorInitializer 'Base' | `-CXXConstructExpr 0x557df03d2748 'Base' 'void () noexcept' `-CompoundStmt 0x557df03d2898 `-BinaryOperator 0x557df03d2878 'int' lvalue '=' |-MemberExpr 0x557df03d2828 'int' lvalue ->m 0x557df03d1c40 | `-ImplicitCastExpr 0x557df03d2808 'Base *' | `-CXXThisExpr 0x557df03d27f8 'Derived *' implicit this `-IntegerLiteral 0x557df03d2858 'int' 0 ``` so `isAssignmentToMemberOf` would return empty due to https://github.com/llvm/llvm-project/blob/f0967fca04c880e9aabd5be043a85127faabb4c6/clang-tools-extra/clang-tidy/cppcoreguidelines/PreferMemberInitializerCheck.cpp#L118-L119 Fixes #104400 --- .../PreferMemberInitializerCheck.cpp | 3 +++ clang-tools-extra/docs/ReleaseNotes.rst | 4 ++++ .../cppcoreguidelines/prefer-member-initializer.cpp | 13 +++++++++++++ 3 files changed, 20 insertions(+) diff --git a/clang-tools-extra/clang-tidy/cppcoreguidelines/PreferMemberInitializerCheck.cpp b/clang-tools-extra/clang-tidy/cppcoreguidelines/PreferMemberInitializerCheck.cpp index 593a4f85d130..79cd4bbcc9a6 100644 --- a/clang-tools-extra/clang-tidy/cppcoreguidelines/PreferMemberInitializerCheck.cpp +++ b/clang-tools-extra/clang-tidy/cppcoreguidelines/PreferMemberInitializerCheck.cpp @@ -191,6 +191,9 @@ void PreferMemberInitializerCheck::check( if (!AssignmentToMember) continue; const FieldDecl *Field = AssignmentToMember->Field; + // Skip if the field is inherited from a base class. + if (Field->getParent() != Class) + continue; const Expr *InitValue = AssignmentToMember->Init; updateAssignmentLevel(Field, InitValue, Ctor, AssignedFields); if (!canAdvanceAssignment(AssignedFields[Field])) diff --git a/clang-tools-extra/docs/ReleaseNotes.rst b/clang-tools-extra/docs/ReleaseNotes.rst index 36703dd32c03..fd81b0d47e82 100644 --- a/clang-tools-extra/docs/ReleaseNotes.rst +++ b/clang-tools-extra/docs/ReleaseNotes.rst @@ -167,6 +167,10 @@ Changes in existing checks an additional matcher that generalizes the copy-and-swap idiom pattern detection. +- Improved :doc:`cppcoreguidelines-prefer-member-initializer + ` check to + avoid false positives on inherited members in class templates. + - Improved :doc:`misc-header-include-cycle ` check performance. diff --git a/clang-tools-extra/test/clang-tidy/checkers/cppcoreguidelines/prefer-member-initializer.cpp b/clang-tools-extra/test/clang-tidy/checkers/cppcoreguidelines/prefer-member-initializer.cpp index 7d6164946fc3..e8d7db17f3c6 100644 --- a/clang-tools-extra/test/clang-tidy/checkers/cppcoreguidelines/prefer-member-initializer.cpp +++ b/clang-tools-extra/test/clang-tidy/checkers/cppcoreguidelines/prefer-member-initializer.cpp @@ -650,3 +650,16 @@ struct InitFromBindingDecl { } }; } // namespace GH82970 + +struct A { + int m; +}; + +struct B : A { + B() { m = 0; } +}; + +template +struct C : A { + C() { m = 0; } +}; From 66a2d1b7580aab8ac2c171647c39fd85fe711013 Mon Sep 17 00:00:00 2001 From: Baranov Victor Date: Sun, 17 Aug 2025 13:25:22 +0300 Subject: [PATCH 096/214] [clang-tidy][NFC] Remove py2 conditions from clang-tidy scripts (#154005) --- clang-tools-extra/clang-tidy/tool/clang-tidy-diff.py | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/clang-tools-extra/clang-tidy/tool/clang-tidy-diff.py b/clang-tools-extra/clang-tidy/tool/clang-tidy-diff.py index 7cd21afd70f7..d7899e0a18d0 100755 --- a/clang-tools-extra/clang-tidy/tool/clang-tidy-diff.py +++ b/clang-tools-extra/clang-tidy/tool/clang-tidy-diff.py @@ -28,6 +28,7 @@ import glob import json import multiprocessing import os +import queue import re import shutil import subprocess @@ -42,13 +43,6 @@ try: except ImportError: yaml = None -is_py2 = sys.version[0] == "2" - -if is_py2: - import Queue as queue -else: - import queue as queue - def run_tidy(task_queue, lock, timeout, failed_files): watchdog = None From a66d8f62e6a6e64c77322533e4f6f2a2a5884aa2 Mon Sep 17 00:00:00 2001 From: Erik Davis Date: Sun, 17 Aug 2025 04:01:05 -0700 Subject: [PATCH 097/214] [mlir][doc] fixup code block (#153977) This fixes a small typo in the toy tutorial. A code block was not correctly terminated, causing it to run into the subsequent block. --- mlir/docs/Tutorials/Toy/Ch-4.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mlir/docs/Tutorials/Toy/Ch-4.md b/mlir/docs/Tutorials/Toy/Ch-4.md index e9abe36afc4d..621f6a684285 100644 --- a/mlir/docs/Tutorials/Toy/Ch-4.md +++ b/mlir/docs/Tutorials/Toy/Ch-4.md @@ -170,7 +170,7 @@ let arguments = (ins OptionalAttr:$arg_attrs, OptionalAttr:$res_attrs ); - +``` We have already provided the definition in the `extraClassDeclaration` field of the `FuncOp` class: From 65ffa53cb70909be4dbedacd9de9de0725161592 Mon Sep 17 00:00:00 2001 From: mdenson Date: Sun, 17 Aug 2025 08:59:47 -0500 Subject: [PATCH 098/214] [Clang] unrecognized html tag causing undesirable comment lexing (#152944) Simple fix for this particular html tag. A more complete solution should be implemented. 1. Add all html tags to table so they are recognized. Some input on what is desirable/safe would be appreciated 2. Change the lex strategy to deal with this in a different manner Fixes #32680 --------- Co-authored-by: Brock Denson --- clang/docs/ReleaseNotes.rst | 1 + clang/include/clang/AST/CommentHTMLTags.td | 5 +++ clang/test/AST/ast-dump-comment.cpp | 42 ++++++++++++++++++++++ 3 files changed, 48 insertions(+) diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst index b35f4ea42818..ac697e39dc18 100644 --- a/clang/docs/ReleaseNotes.rst +++ b/clang/docs/ReleaseNotes.rst @@ -230,6 +230,7 @@ Bug Fixes to AST Handling - Fix incorrect name qualifiers applied to alias CTAD. (#GH136624) - Fixed ElaboratedTypes appearing within NestedNameSpecifier, which was not a legal representation. This is fixed because ElaboratedTypes don't exist anymore. (#GH43179) (#GH68670) (#GH92757) +- Fix unrecognized html tag causing undesirable comment lexing (#GH152944) - Fix comment lexing of special command names (#GH152943) Miscellaneous Bug Fixes diff --git a/clang/include/clang/AST/CommentHTMLTags.td b/clang/include/clang/AST/CommentHTMLTags.td index a1ce8c6da96c..9b89bc0c811f 100644 --- a/clang/include/clang/AST/CommentHTMLTags.td +++ b/clang/include/clang/AST/CommentHTMLTags.td @@ -51,6 +51,11 @@ def Col : Tag<"col"> { let EndTagForbidden = 1; } def Tr : Tag<"tr"> { let EndTagOptional = 1; } def Th : Tag<"th"> { let EndTagOptional = 1; } def Td : Tag<"td"> { let EndTagOptional = 1; } +def Summary : Tag<"summary">; +def Details : Tag<"details">; +def Mark : Tag<"mark">; +def Figure : Tag<"figure">; +def FigCaption : Tag<"figcaption">; // Define a list of attributes that are not safe to pass through to HTML // output if the input is untrusted. diff --git a/clang/test/AST/ast-dump-comment.cpp b/clang/test/AST/ast-dump-comment.cpp index b67f79916d96..b5dbe2e317d8 100644 --- a/clang/test/AST/ast-dump-comment.cpp +++ b/clang/test/AST/ast-dump-comment.cpp @@ -132,8 +132,50 @@ void Test_TemplatedFunctionVariadic(int arg, ...); // CHECK-NEXT: ParagraphComment // CHECK-NEXT: TextComment{{.*}} Text=" More arguments" +/// \param[out] Aaa Short summary +int Test_HTMLSummaryTag(int Aaa); +// CHECK: FunctionDecl{{.*}}Test_HTMLSummaryTag +// CHECK: ParamCommandComment{{.*}} [out] explicitly Param="Aaa" +// CHECK-NEXT: ParagraphComment +// CHECK: HTMLStartTagComment{{.*}} Name="summary" +// CHECK-NEXT: TextComment{{.*}} Text="Short summary" +// CHECK-NEXT: HTMLEndTagComment{{.*}} Name="summary" + /// \thread_safe test for underscore in special command int Test_UnderscoreInSpecialCommand; // CHECK: VarDecl{{.*}}Test_UnderscoreInSpecialCommand 'int' // CHECK: InlineCommandComment{{.*}} Name="thread_safe" RenderNormal // CHECK-NEXT: TextComment{{.*}} Text=" test for underscore in special command" + +///
+/// +/// Summary +/// +///

Details

+///
+/// +/// Some highlighting +/// +///
+/// +///
Figure 1
+///
+int Test_AdditionalHTMLTags(int Aaa); +// CHECK: FunctionDecl{{.*}}Test_AdditionalHTMLTags 'int (int)' +// CHECK: HTMLStartTagComment{{.*}} Name="details" +// CHECK: HTMLStartTagComment{{.*}} Name="summary" +// CHECK-NEXT: TextComment{{.*}} Text=" Summary" +// CHECK: HTMLEndTagComment{{.*}} Name="summary" +// CHECK: HTMLStartTagComment{{.*}} Name="p" +// CHECK-NEXT: TextComment{{.*}} Text="Details" +// CHECK-NEXT: HTMLEndTagComment{{.*}} Name="p" +// CHECK: HTMLEndTagComment{{.*}} Name="details" +// CHECK: HTMLStartTagComment{{.*}} Name="mark" +// CHECK-NEXT: TextComment{{.*}} Text="highlighting" +// CHECK-NEXT: HTMLEndTagComment{{.*}} Name="mark" +// CHECK: HTMLStartTagComment{{.*}} Name="figure" +// CHECK: HTMLStartTagComment{{.*}} Name="img" Attrs: "src="pic.jpg" +// CHECK: HTMLStartTagComment{{.*}} Name="figcaption" +// CHECK-NEXT: TextComment{{.*}} Text="Figure 1" +// CHECK-NEXT: HTMLEndTagComment{{.*}} Name="figcaption" +// CHECK: HTMLEndTagComment{{.*}} Name="figure" From 71925a90c8b713d8fccbfae496e10c68f80b932b Mon Sep 17 00:00:00 2001 From: Aiden Grossman Date: Sun, 17 Aug 2025 08:52:29 -0700 Subject: [PATCH 099/214] [libc] Setup hdrgen for ioctl (#153976) This patch adds some hdrgen yaml for ioctl(). Otherwise the function never actually ends up being available in a full build. This is the last thing that is needed to enable turning on LIBCXX_ENABLE_RANDOM_DEVICE. --- libc/include/sys/ioctl.yaml | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/libc/include/sys/ioctl.yaml b/libc/include/sys/ioctl.yaml index 5f7b7f333191..7eb66b657664 100644 --- a/libc/include/sys/ioctl.yaml +++ b/libc/include/sys/ioctl.yaml @@ -5,4 +5,12 @@ macros: [] types: [] enums: [] objects: [] -functions: [] +functions: + - name: ioctl + standards: + - Linux + return_type: int + arguments: + - type: int + - type: unsigned long + - type: '...' From e1aa41522020a51a99389675d8de3d9c5910e2cf Mon Sep 17 00:00:00 2001 From: Veera <32646674+veera-sivarajan@users.noreply.github.com> Date: Sun, 17 Aug 2025 10:56:34 -0700 Subject: [PATCH 100/214] [mlir][InferIntRangeCommon] Fix Division by Zero Crash (#151637) Fixes #131273 Adds a check to avoid division when max value of denominator is zero. --- mlir/lib/Interfaces/Utils/InferIntRangeCommon.cpp | 3 +-- mlir/test/Dialect/Arith/int-range-interface.mlir | 9 +++++++++ 2 files changed, 10 insertions(+), 2 deletions(-) diff --git a/mlir/lib/Interfaces/Utils/InferIntRangeCommon.cpp b/mlir/lib/Interfaces/Utils/InferIntRangeCommon.cpp index 2f47939df5a0..af4ea5ac1cec 100644 --- a/mlir/lib/Interfaces/Utils/InferIntRangeCommon.cpp +++ b/mlir/lib/Interfaces/Utils/InferIntRangeCommon.cpp @@ -290,8 +290,7 @@ static ConstantIntRanges inferDivURange(const ConstantIntRanges &lhs, DivisionFixupFn fixup) { const APInt &lhsMin = lhs.umin(), &lhsMax = lhs.umax(), &rhsMin = rhs.umin(), &rhsMax = rhs.umax(); - - if (!rhsMin.isZero()) { + if (!rhsMin.isZero() && !rhsMax.isZero()) { auto udiv = [&fixup](const APInt &a, const APInt &b) -> std::optional { return fixup(a, b, a.udiv(b)); diff --git a/mlir/test/Dialect/Arith/int-range-interface.mlir b/mlir/test/Dialect/Arith/int-range-interface.mlir index 2128d36f1a28..130782ba9f52 100644 --- a/mlir/test/Dialect/Arith/int-range-interface.mlir +++ b/mlir/test/Dialect/Arith/int-range-interface.mlir @@ -224,6 +224,15 @@ func.func @ceil_divui(%arg0 : index) -> i1 { func.return %7 : i1 } +// CHECK-LABEL: func @ceil_divui_by_zero_issue_131273 +// CHECK-NEXT: return +func.func @ceil_divui_by_zero_issue_131273() { + %0 = test.with_bounds {smax = 0 : i32, smin = -1 : i32, umax = 0 : i32, umin = -1 : i32} : i32 + %c7_i32 = arith.constant 7 : i32 + %1 = arith.ceildivui %c7_i32, %0 : i32 + return +} + // CHECK-LABEL: func @ceil_divsi // CHECK: %[[ret:.*]] = arith.cmpi eq // CHECK: return %[[ret]] From 0561ff6a12e1219af0ea6146c62233b18b82475b Mon Sep 17 00:00:00 2001 From: Andreas Jonson Date: Sun, 17 Aug 2025 20:24:09 +0200 Subject: [PATCH 101/214] [LVI] Add support for trunc nuw range. (#154021) Proof: https://alive2.llvm.org/ce/z/a5Yjb8 --- llvm/lib/Analysis/LazyValueInfo.cpp | 9 ++++- .../CorrelatedValuePropagation/trunc.ll | 40 +++++++++++++++++++ 2 files changed, 47 insertions(+), 2 deletions(-) diff --git a/llvm/lib/Analysis/LazyValueInfo.cpp b/llvm/lib/Analysis/LazyValueInfo.cpp index 922f25de54e9..c7b0ca97a8e4 100644 --- a/llvm/lib/Analysis/LazyValueInfo.cpp +++ b/llvm/lib/Analysis/LazyValueInfo.cpp @@ -927,8 +927,13 @@ LazyValueInfoImpl::solveBlockValueCast(CastInst *CI, BasicBlock *BB) { // NOTE: We're currently limited by the set of operations that ConstantRange // can evaluate symbolically. Enhancing that set will allows us to analyze // more definitions. - return ValueLatticeElement::getRange(LHSRange.castOp(CI->getOpcode(), - ResultBitWidth)); + ConstantRange Res = ConstantRange::getEmpty(ResultBitWidth); + if (auto *Trunc = dyn_cast(CI)) + Res = LHSRange.truncate(ResultBitWidth, Trunc->getNoWrapKind()); + else + Res = LHSRange.castOp(CI->getOpcode(), ResultBitWidth); + + return ValueLatticeElement::getRange(Res); } std::optional diff --git a/llvm/test/Transforms/CorrelatedValuePropagation/trunc.ll b/llvm/test/Transforms/CorrelatedValuePropagation/trunc.ll index 9b6604298840..42a89ab0dbc0 100644 --- a/llvm/test/Transforms/CorrelatedValuePropagation/trunc.ll +++ b/llvm/test/Transforms/CorrelatedValuePropagation/trunc.ll @@ -106,3 +106,43 @@ define i1 @overdefined_range_negative(i8 %A, i8 %B) { %trunc = trunc i8 %xor to i1 ret i1 %trunc } + +define i1 @trunc_nuw_infere_false_for_icmp_ne_1(i8 %x) { +; CHECK-LABEL: define i1 @trunc_nuw_infere_false_for_icmp_ne_1( +; CHECK-SAME: i8 [[X:%.*]]) { +; CHECK-NEXT: [[ICMP:%.*]] = icmp ne i8 [[X]], 1 +; CHECK-NEXT: br i1 [[ICMP]], label %[[IFTRUE:.*]], label %[[IFFALSE:.*]] +; CHECK: [[IFTRUE]]: +; CHECK-NEXT: [[TRUNC:%.*]] = trunc nuw i8 [[X]] to i1 +; CHECK-NEXT: ret i1 false +; CHECK: [[IFFALSE]]: +; CHECK-NEXT: ret i1 true +; + %icmp = icmp ne i8 %x, 1 + br i1 %icmp, label %iftrue, label %iffalse +iftrue: + %trunc = trunc nuw i8 %x to i1 + ret i1 %trunc +iffalse: + ret i1 true +} + +define i1 @neg_trunc_do_not_infere_false_for_icmp_ne_1(i8 %x) { +; CHECK-LABEL: define i1 @neg_trunc_do_not_infere_false_for_icmp_ne_1( +; CHECK-SAME: i8 [[X:%.*]]) { +; CHECK-NEXT: [[ICMP:%.*]] = icmp ne i8 [[X]], 1 +; CHECK-NEXT: br i1 [[ICMP]], label %[[IFTRUE:.*]], label %[[IFFALSE:.*]] +; CHECK: [[IFTRUE]]: +; CHECK-NEXT: [[TRUNC:%.*]] = trunc i8 [[X]] to i1 +; CHECK-NEXT: ret i1 [[TRUNC]] +; CHECK: [[IFFALSE]]: +; CHECK-NEXT: ret i1 true +; + %icmp = icmp ne i8 %x, 1 + br i1 %icmp, label %iftrue, label %iffalse +iftrue: + %trunc = trunc i8 %x to i1 + ret i1 %trunc +iffalse: + ret i1 true +} From 350cb989b8b060083d5ada39abd1652e38ba62dd Mon Sep 17 00:00:00 2001 From: Adam Nemet Date: Sun, 17 Aug 2025 12:15:10 -0700 Subject: [PATCH 102/214] [X86] Explicitly widen larger than v4f16 to the legal v8f16 (NFC) (#153839) This patch makes the current behavior explicit to prepare for adding VTs for v[567]f16. Right now these types are EVTs and hence don't fall under getPreferredVectorAction and are simply widened to the next legal power-of-two vector type. For SSE2 this is v8f16. Without the preparatory patch however, the behavior would change after adding these types. getPreferredVectorAction would try to split them because this is the current behavior for any f16 vector type that is not legal. There is a lot more detail at https://github.com/llvm/llvm-project/issues/152150 in particular how splitting these new types leads to an inconsistency between NumRegistersForVT and getTypeAction. The patch ensures that after the new types are added they would continue to be widened rather than split. Once the patch to enable v[567]f16 lands, it will be an NFC for x86. --- llvm/lib/Target/X86/X86ISelLowering.cpp | 4 +++- llvm/test/CodeGen/X86/pr152150.ll | 14 ++++++++++++++ 2 files changed, 17 insertions(+), 1 deletion(-) create mode 100644 llvm/test/CodeGen/X86/pr152150.ll diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 7a816de53dbd..52e0bb8a9b83 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -2756,8 +2756,10 @@ X86TargetLowering::getPreferredVectorAction(MVT VT) const { !Subtarget.hasBWI()) return TypeSplitVector; + // Since v8f16 is legal, widen anything over v4f16. if (!VT.isScalableVector() && VT.getVectorNumElements() != 1 && - !Subtarget.hasF16C() && VT.getVectorElementType() == MVT::f16) + VT.getVectorNumElements() <= 4 && !Subtarget.hasF16C() && + VT.getVectorElementType() == MVT::f16) return TypeSplitVector; if (!VT.isScalableVector() && VT.getVectorNumElements() != 1 && diff --git a/llvm/test/CodeGen/X86/pr152150.ll b/llvm/test/CodeGen/X86/pr152150.ll new file mode 100644 index 000000000000..6db3e555028c --- /dev/null +++ b/llvm/test/CodeGen/X86/pr152150.ll @@ -0,0 +1,14 @@ +; RUN: llc < %s -mtriple=x86_64-unknown-unknown-eabi-elf | FileCheck %s + +; CHECK-LABEL: conv2d +define dso_local void @conv2d() { +.preheader: + br label %0 + +0: ; preds = %0, %.preheader + %1 = phi [4 x <7 x half>] [ zeroinitializer, %.preheader ], [ %4, %0 ] + %2 = extractvalue [4 x <7 x half>] %1, 0 + %3 = extractvalue [4 x <7 x half>] %1, 1 + %4 = insertvalue [4 x <7 x half>] poison, <7 x half> poison, 3 + br label %0 +} From 9a692e0f94a99abea781397da5629336c74e8f8e Mon Sep 17 00:00:00 2001 From: owenca Date: Sun, 17 Aug 2025 12:53:57 -0700 Subject: [PATCH 103/214] [clang-format] Don't annotate class property specifiers as StartOfName (#153525) Fixes #153443 --- clang/lib/Format/TokenAnnotator.cpp | 3 +++ clang/unittests/Format/TokenAnnotatorTest.cpp | 7 +++++++ 2 files changed, 10 insertions(+) diff --git a/clang/lib/Format/TokenAnnotator.cpp b/clang/lib/Format/TokenAnnotator.cpp index 4801d27b1395..88752903914b 100644 --- a/clang/lib/Format/TokenAnnotator.cpp +++ b/clang/lib/Format/TokenAnnotator.cpp @@ -2590,6 +2590,9 @@ private: if (!Tok.Previous || Tok.isNot(tok::identifier) || Tok.is(TT_ClassHeadName)) return false; + if (Tok.endsSequence(Keywords.kw_final, TT_ClassHeadName)) + return false; + if ((Style.isJavaScript() || Style.isJava()) && Tok.is(Keywords.kw_extends)) return false; diff --git a/clang/unittests/Format/TokenAnnotatorTest.cpp b/clang/unittests/Format/TokenAnnotatorTest.cpp index 7f99655b1fa4..85ccba38ac8c 100644 --- a/clang/unittests/Format/TokenAnnotatorTest.cpp +++ b/clang/unittests/Format/TokenAnnotatorTest.cpp @@ -618,6 +618,13 @@ TEST_F(TokenAnnotatorTest, UnderstandsStructs) { EXPECT_TOKEN(Tokens[19], tok::l_brace, TT_StructLBrace); EXPECT_TOKEN(Tokens[20], tok::r_brace, TT_StructRBrace); + Tokens = annotate("class Outer {\n" + " struct Inner final : Base {};\n" + "};"); + ASSERT_EQ(Tokens.size(), 14u) << Tokens; + EXPECT_TOKEN(Tokens[5], tok::identifier, TT_Unknown); // Not TT_StartOfName + EXPECT_TOKEN(Tokens[6], tok::colon, TT_InheritanceColon); + constexpr StringRef Code("struct EXPORT StructName {};"); Tokens = annotate(Code); From 5e57a10f502c8fc524bd119a53ddbf643686c7a0 Mon Sep 17 00:00:00 2001 From: owenca Date: Sun, 17 Aug 2025 12:54:23 -0700 Subject: [PATCH 104/214] [clang-format] Allow breaking before bit-field colons (#153529) Fixes #153448 --- clang/lib/Format/TokenAnnotator.cpp | 3 ++- clang/unittests/Format/FormatTest.cpp | 4 ++++ 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/clang/lib/Format/TokenAnnotator.cpp b/clang/lib/Format/TokenAnnotator.cpp index 88752903914b..4be3bfbf41b4 100644 --- a/clang/lib/Format/TokenAnnotator.cpp +++ b/clang/lib/Format/TokenAnnotator.cpp @@ -6272,7 +6272,8 @@ bool TokenAnnotator::canBreakBefore(const AnnotatedLine &Line, } if (Right.is(tok::colon) && - !Right.isOneOf(TT_CtorInitializerColon, TT_InlineASMColon)) { + !Right.isOneOf(TT_CtorInitializerColon, TT_InlineASMColon, + TT_BitFieldColon)) { return false; } if (Left.is(tok::colon) && Left.isOneOf(TT_DictLiteral, TT_ObjCMethodExpr)) { diff --git a/clang/unittests/Format/FormatTest.cpp b/clang/unittests/Format/FormatTest.cpp index 96cc650f52a5..6fc008f029b4 100644 --- a/clang/unittests/Format/FormatTest.cpp +++ b/clang/unittests/Format/FormatTest.cpp @@ -4050,6 +4050,10 @@ TEST_F(FormatTest, FormatsBitfields) { " uchar : 8;\n" " uchar other;\n" "};"); + verifyFormat("struct foo {\n" + " uint8_t i_am_a_bit_field_this_long\n" + " : struct_with_constexpr::i_am_a_constexpr_lengthhhhh;\n" + "};"); FormatStyle Style = getLLVMStyle(); Style.BitFieldColonSpacing = FormatStyle::BFCS_None; verifyFormat("struct Bitfields {\n" From a21d17f1d7173cdbc25f141595a0be9056760f77 Mon Sep 17 00:00:00 2001 From: owenca Date: Sun, 17 Aug 2025 12:54:48 -0700 Subject: [PATCH 105/214] [clang-format] Fix a bug in breaking before FunctionDeclarationName (#153924) Fixes #153891 --- clang/lib/Format/ContinuationIndenter.cpp | 11 +++++++++-- clang/unittests/Format/FormatTest.cpp | 10 +++++++++- 2 files changed, 18 insertions(+), 3 deletions(-) diff --git a/clang/lib/Format/ContinuationIndenter.cpp b/clang/lib/Format/ContinuationIndenter.cpp index 9a10403b858f..888d0faf8093 100644 --- a/clang/lib/Format/ContinuationIndenter.cpp +++ b/clang/lib/Format/ContinuationIndenter.cpp @@ -629,9 +629,16 @@ bool ContinuationIndenter::mustBreak(const LineState &State) { // name. !Style.isJavaScript() && Previous.isNot(tok::kw_template) && CurrentState.BreakBeforeParameter) { - for (const auto *Tok = &Previous; Tok; Tok = Tok->Previous) - if (Tok->FirstAfterPPLine || Tok->is(TT_LineComment)) + for (const auto *Tok = &Previous; Tok; Tok = Tok->Previous) { + if (Tok->is(TT_LineComment)) return false; + if (Tok->is(TT_TemplateCloser)) { + Tok = Tok->MatchingParen; + assert(Tok); + } + if (Tok->FirstAfterPPLine) + return false; + } return true; } diff --git a/clang/unittests/Format/FormatTest.cpp b/clang/unittests/Format/FormatTest.cpp index 6fc008f029b4..c21ef3d1e594 100644 --- a/clang/unittests/Format/FormatTest.cpp +++ b/clang/unittests/Format/FormatTest.cpp @@ -8618,7 +8618,7 @@ TEST_F(FormatTest, BreaksFunctionDeclarations) { verifyFormat("extern \"C\" //\n" " void f();"); - FormatStyle Style = getLLVMStyle(); + auto Style = getLLVMStyle(); Style.PointerAlignment = FormatStyle::PAS_Left; verifyFormat("void aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa(\n" " aaaaaaaaaaaaaaaaaaaaaaaaa* const aaaaaaaaaaaa) {}", @@ -8626,6 +8626,14 @@ TEST_F(FormatTest, BreaksFunctionDeclarations) { verifyFormat("void aaaaaaa(aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa*\n" " aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa) {}", Style); + + Style = getLLVMStyleWithColumns(45); + Style.PenaltyReturnTypeOnItsOwnLine = 400; + verifyFormat("template \n" + "static inline std::pair\n" + "myfunc(const char *buf, const char *&err);", + Style); } TEST_F(FormatTest, DontBreakBeforeQualifiedOperator) { From 6cfedea492c11cd46f03cfad76a638bf73de40f4 Mon Sep 17 00:00:00 2001 From: owenca Date: Sun, 17 Aug 2025 12:56:22 -0700 Subject: [PATCH 106/214] [clang-format] Add SpaceInEmptyBraces option (#153765) Also set it to SIEB_Always for WebKit style. Closes #85525. Closes #93635. --- clang/docs/ClangFormatStyleOptions.rst | 48 ++++++++++++++++++--- clang/docs/ReleaseNotes.rst | 1 + clang/include/clang/Format/Format.h | 47 ++++++++++++++++---- clang/lib/Format/Format.cpp | 24 +++++++++-- clang/lib/Format/TokenAnnotator.cpp | 22 +++++----- clang/lib/Format/UnwrappedLineFormatter.cpp | 3 +- clang/unittests/Format/ConfigParseTest.cpp | 12 +++++- clang/unittests/Format/FormatTest.cpp | 28 +++++++++++- 8 files changed, 155 insertions(+), 30 deletions(-) diff --git a/clang/docs/ClangFormatStyleOptions.rst b/clang/docs/ClangFormatStyleOptions.rst index 55ba261b7bdc..3ac9e3795cae 100644 --- a/clang/docs/ClangFormatStyleOptions.rst +++ b/clang/docs/ClangFormatStyleOptions.rst @@ -6486,13 +6486,51 @@ the configuration (without a prefix: ``Auto``). .. _SpaceInEmptyBlock: **SpaceInEmptyBlock** (``Boolean``) :versionbadge:`clang-format 10` :ref:`¶ ` - If ``true``, spaces will be inserted into ``{}``. + This option is **deprecated**. See ``Block`` of ``SpaceInEmptyBraces``. + +.. _SpaceInEmptyBraces: + +**SpaceInEmptyBraces** (``SpaceInEmptyBracesStyle``) :versionbadge:`clang-format 22` :ref:`¶ ` + Specifies when to insert a space in empty braces. + + .. note:: + + This option doesn't apply to initializer braces if + ``Cpp11BracedListStyle`` is set to ``true``. + + Possible values: + + * ``SIEB_Always`` (in configuration: ``Always``) + Always insert a space in empty braces. + + .. code-block:: c++ + + void f() { } + class Unit { }; + auto a = [] { }; + int x{ }; + + * ``SIEB_Block`` (in configuration: ``Block``) + Only insert a space in empty blocks. + + .. code-block:: c++ + + void f() { } + class Unit { }; + auto a = [] { }; + int x{}; + + * ``SIEB_Never`` (in configuration: ``Never``) + Never insert a space in empty braces. + + .. code-block:: c++ + + void f() {} + class Unit {}; + auto a = [] {}; + int x{}; - .. code-block:: c++ - true: false: - void f() { } vs. void f() {} - while (true) { } while (true) {} .. _SpaceInEmptyParentheses: diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst index ac697e39dc18..e04cc326b8a0 100644 --- a/clang/docs/ReleaseNotes.rst +++ b/clang/docs/ReleaseNotes.rst @@ -314,6 +314,7 @@ AST Matchers clang-format ------------ +- Add ``SpaceInEmptyBraces`` option and set it to ``Always`` for WebKit style. libclang -------- diff --git a/clang/include/clang/Format/Format.h b/clang/include/clang/Format/Format.h index 31582a40de86..5dfdb2359461 100644 --- a/clang/include/clang/Format/Format.h +++ b/clang/include/clang/Format/Format.h @@ -4813,14 +4813,45 @@ struct FormatStyle { /// \version 7 bool SpaceBeforeRangeBasedForLoopColon; - /// If ``true``, spaces will be inserted into ``{}``. - /// \code - /// true: false: - /// void f() { } vs. void f() {} - /// while (true) { } while (true) {} - /// \endcode + /// This option is **deprecated**. See ``Block`` of ``SpaceInEmptyBraces``. /// \version 10 - bool SpaceInEmptyBlock; + // bool SpaceInEmptyBlock; + + /// Style of when to insert a space in empty braces. + enum SpaceInEmptyBracesStyle : int8_t { + /// Always insert a space in empty braces. + /// \code + /// void f() { } + /// class Unit { }; + /// auto a = [] { }; + /// int x{ }; + /// \endcode + SIEB_Always, + /// Only insert a space in empty blocks. + /// \code + /// void f() { } + /// class Unit { }; + /// auto a = [] { }; + /// int x{}; + /// \endcode + SIEB_Block, + /// Never insert a space in empty braces. + /// \code + /// void f() {} + /// class Unit {}; + /// auto a = [] {}; + /// int x{}; + /// \endcode + SIEB_Never + }; + + /// Specifies when to insert a space in empty braces. + /// \note + /// This option doesn't apply to initializer braces if + /// ``Cpp11BracedListStyle`` is set to ``true``. + /// \endnote + /// \version 22 + SpaceInEmptyBracesStyle SpaceInEmptyBraces; /// If ``true``, spaces may be inserted into ``()``. /// This option is **deprecated**. See ``InEmptyParentheses`` of @@ -5494,7 +5525,7 @@ struct FormatStyle { SpaceBeforeRangeBasedForLoopColon == R.SpaceBeforeRangeBasedForLoopColon && SpaceBeforeSquareBrackets == R.SpaceBeforeSquareBrackets && - SpaceInEmptyBlock == R.SpaceInEmptyBlock && + SpaceInEmptyBraces == R.SpaceInEmptyBraces && SpacesBeforeTrailingComments == R.SpacesBeforeTrailingComments && SpacesInAngles == R.SpacesInAngles && SpacesInContainerLiterals == R.SpacesInContainerLiterals && diff --git a/clang/lib/Format/Format.cpp b/clang/lib/Format/Format.cpp index 063780721423..e3b22cdabacc 100644 --- a/clang/lib/Format/Format.cpp +++ b/clang/lib/Format/Format.cpp @@ -763,6 +763,15 @@ struct ScalarEnumerationTraits { } }; +template <> +struct ScalarEnumerationTraits { + static void enumeration(IO &IO, FormatStyle::SpaceInEmptyBracesStyle &Value) { + IO.enumCase(Value, "Always", FormatStyle::SIEB_Always); + IO.enumCase(Value, "Block", FormatStyle::SIEB_Block); + IO.enumCase(Value, "Never", FormatStyle::SIEB_Never); + } +}; + template <> struct ScalarEnumerationTraits { static void enumeration(IO &IO, FormatStyle::SpacesInAnglesStyle &Value) { IO.enumCase(Value, "Never", FormatStyle::SIAS_Never); @@ -931,6 +940,7 @@ template <> struct MappingTraits { bool DeriveLineEnding = true; bool UseCRLF = false; + bool SpaceInEmptyBlock = false; bool SpaceInEmptyParentheses = false; bool SpacesInConditionalStatement = false; bool SpacesInCStyleCastParentheses = false; @@ -960,6 +970,7 @@ template <> struct MappingTraits { IO.mapOptional("PointerBindsToType", Style.PointerAlignment); IO.mapOptional("SpaceAfterControlStatementKeyword", Style.SpaceBeforeParens); + IO.mapOptional("SpaceInEmptyBlock", SpaceInEmptyBlock); IO.mapOptional("SpaceInEmptyParentheses", SpaceInEmptyParentheses); IO.mapOptional("SpacesInConditionalStatement", SpacesInConditionalStatement); @@ -1193,7 +1204,7 @@ template <> struct MappingTraits { Style.SpaceBeforeRangeBasedForLoopColon); IO.mapOptional("SpaceBeforeSquareBrackets", Style.SpaceBeforeSquareBrackets); - IO.mapOptional("SpaceInEmptyBlock", Style.SpaceInEmptyBlock); + IO.mapOptional("SpaceInEmptyBraces", Style.SpaceInEmptyBraces); IO.mapOptional("SpacesBeforeTrailingComments", Style.SpacesBeforeTrailingComments); IO.mapOptional("SpacesInAngles", Style.SpacesInAngles); @@ -1276,6 +1287,13 @@ template <> struct MappingTraits { Style.LineEnding = FormatStyle::LE_DeriveCRLF; } + // If SpaceInEmptyBlock was specified but SpaceInEmptyBraces was not, + // initialize the latter from the former for backward compatibility. + if (SpaceInEmptyBlock && + Style.SpaceInEmptyBraces == FormatStyle::SIEB_Never) { + Style.SpaceInEmptyBraces = FormatStyle::SIEB_Block; + } + if (Style.SpacesInParens != FormatStyle::SIPO_Custom && (SpacesInParentheses || SpaceInEmptyParentheses || SpacesInConditionalStatement || SpacesInCStyleCastParentheses)) { @@ -1677,7 +1695,7 @@ FormatStyle getLLVMStyle(FormatStyle::LanguageKind Language) { LLVMStyle.SpaceBeforeParensOptions.AfterIfMacros = true; LLVMStyle.SpaceBeforeRangeBasedForLoopColon = true; LLVMStyle.SpaceBeforeSquareBrackets = false; - LLVMStyle.SpaceInEmptyBlock = false; + LLVMStyle.SpaceInEmptyBraces = FormatStyle::SIEB_Never; LLVMStyle.SpacesBeforeTrailingComments = 1; LLVMStyle.SpacesInAngles = FormatStyle::SIAS_Never; LLVMStyle.SpacesInContainerLiterals = true; @@ -1984,7 +2002,7 @@ FormatStyle getWebKitStyle() { Style.ObjCSpaceAfterProperty = true; Style.PointerAlignment = FormatStyle::PAS_Left; Style.SpaceBeforeCpp11BracedList = true; - Style.SpaceInEmptyBlock = true; + Style.SpaceInEmptyBraces = FormatStyle::SIEB_Always; return Style; } diff --git a/clang/lib/Format/TokenAnnotator.cpp b/clang/lib/Format/TokenAnnotator.cpp index 4be3bfbf41b4..a220de54f46b 100644 --- a/clang/lib/Format/TokenAnnotator.cpp +++ b/clang/lib/Format/TokenAnnotator.cpp @@ -4516,16 +4516,9 @@ bool TokenAnnotator::spaceRequiredBetween(const AnnotatedLine &Line, return Left.is(tok::hash); if (Left.isOneOf(tok::hashhash, tok::hash)) return Right.is(tok::hash); - if (Left.is(BK_Block) && Right.is(tok::r_brace) && - Right.MatchingParen == &Left && Line.Children.empty()) { - return Style.SpaceInEmptyBlock; - } if (Style.SpacesInParens == FormatStyle::SIPO_Custom) { - if ((Left.is(tok::l_paren) && Right.is(tok::r_paren)) || - (Left.is(tok::l_brace) && Left.isNot(BK_Block) && - Right.is(tok::r_brace) && Right.isNot(BK_Block))) { + if (Left.is(tok::l_paren) && Right.is(tok::r_paren)) return Style.SpacesInParensOptions.InEmptyParentheses; - } if (Style.SpacesInParensOptions.ExceptDoubleParentheses && Left.is(tok::r_paren) && Right.is(tok::r_paren)) { auto *InnerLParen = Left.MatchingParen; @@ -4803,8 +4796,6 @@ bool TokenAnnotator::spaceRequiredBetween(const AnnotatedLine &Line, Right.is(TT_ArraySubscriptLSquare))) { return false; } - if (Left.is(tok::l_brace) && Right.is(tok::r_brace)) - return !Left.Children.empty(); // No spaces in "{}". if ((Left.is(tok::l_brace) && Left.isNot(BK_Block)) || (Right.is(tok::r_brace) && Right.MatchingParen && Right.MatchingParen->isNot(BK_Block))) { @@ -4986,6 +4977,17 @@ bool TokenAnnotator::spaceRequiredBefore(const AnnotatedLine &Line, if (Left.is(tok::star) && Right.is(tok::comment)) return true; + if (Left.is(tok::l_brace) && Right.is(tok::r_brace) && + Left.Children.empty()) { + if (Left.is(BK_Block)) + return Style.SpaceInEmptyBraces != FormatStyle::SIEB_Never; + if (Style.Cpp11BracedListStyle) { + return Style.SpacesInParens == FormatStyle::SIPO_Custom && + Style.SpacesInParensOptions.InEmptyParentheses; + } + return Style.SpaceInEmptyBraces == FormatStyle::SIEB_Always; + } + const auto *BeforeLeft = Left.Previous; if (IsCpp) { diff --git a/clang/lib/Format/UnwrappedLineFormatter.cpp b/clang/lib/Format/UnwrappedLineFormatter.cpp index 0adf7ee9ed54..c938ff3965f9 100644 --- a/clang/lib/Format/UnwrappedLineFormatter.cpp +++ b/clang/lib/Format/UnwrappedLineFormatter.cpp @@ -864,7 +864,8 @@ private: if (ShouldMerge()) { // We merge empty blocks even if the line exceeds the column limit. Tok->SpacesRequiredBefore = - (Style.SpaceInEmptyBlock || Line.Last->is(tok::comment)) ? 1 : 0; + Style.SpaceInEmptyBraces != FormatStyle::SIEB_Never || + Line.Last->is(tok::comment); Tok->CanBreakBefore = true; return 1; } else if (Limit != 0 && !Line.startsWithNamespace() && diff --git a/clang/unittests/Format/ConfigParseTest.cpp b/clang/unittests/Format/ConfigParseTest.cpp index 9de3cca71630..7c993c0f8fd3 100644 --- a/clang/unittests/Format/ConfigParseTest.cpp +++ b/clang/unittests/Format/ConfigParseTest.cpp @@ -200,7 +200,6 @@ TEST(ConfigParseTest, ParsesConfigurationBools) { CHECK_PARSE_BOOL(RemoveSemicolon); CHECK_PARSE_BOOL(SkipMacroDefinitionBody); CHECK_PARSE_BOOL(SpacesInSquareBrackets); - CHECK_PARSE_BOOL(SpaceInEmptyBlock); CHECK_PARSE_BOOL(SpacesInContainerLiterals); CHECK_PARSE_BOOL(SpaceAfterCStyleCast); CHECK_PARSE_BOOL(SpaceAfterTemplateKeyword); @@ -688,6 +687,17 @@ TEST(ConfigParseTest, ParsesConfiguration) { SpaceBeforeParens, FormatStyle::SBPO_ControlStatementsExceptControlMacros); + Style.SpaceInEmptyBraces = FormatStyle::SIEB_Never; + CHECK_PARSE("SpaceInEmptyBraces: Always", SpaceInEmptyBraces, + FormatStyle::SIEB_Always); + CHECK_PARSE("SpaceInEmptyBraces: Block", SpaceInEmptyBraces, + FormatStyle::SIEB_Block); + CHECK_PARSE("SpaceInEmptyBraces: Never", SpaceInEmptyBraces, + FormatStyle::SIEB_Never); + // For backward compatibility: + CHECK_PARSE("SpaceInEmptyBlock: true", SpaceInEmptyBraces, + FormatStyle::SIEB_Block); + // For backward compatibility: Style.SpacesInParens = FormatStyle::SIPO_Never; Style.SpacesInParensOptions = {}; diff --git a/clang/unittests/Format/FormatTest.cpp b/clang/unittests/Format/FormatTest.cpp index c21ef3d1e594..83c664c3b81f 100644 --- a/clang/unittests/Format/FormatTest.cpp +++ b/clang/unittests/Format/FormatTest.cpp @@ -7059,7 +7059,7 @@ TEST_F(FormatTest, PutEmptyBlocksIntoOneLine) { verifyFormat("enum E {};"); verifyFormat("enum E {}"); FormatStyle Style = getLLVMStyle(); - Style.SpaceInEmptyBlock = true; + Style.SpaceInEmptyBraces = FormatStyle::SIEB_Block; verifyFormat("void f() { }", "void f() {}", Style); Style.AllowShortBlocksOnASingleLine = FormatStyle::SBS_Empty; verifyFormat("{ }", Style); @@ -7087,7 +7087,7 @@ TEST_F(FormatTest, PutEmptyBlocksIntoOneLine) { Style); Style = getLLVMStyle(FormatStyle::LK_CSharp); - Style.SpaceInEmptyBlock = true; + Style.SpaceInEmptyBraces = FormatStyle::SIEB_Block; verifyFormat("Event += () => { };", Style); } @@ -25596,6 +25596,30 @@ TEST_F(FormatTest, SpacesInConditionalStatement) { verifyFormat("MYIF( a )\n return;\nelse\n return;", Spaces); } +TEST_F(FormatTest, SpaceInEmptyBraces) { + constexpr StringRef Code("void f() {}\n" + "class Unit {};\n" + "auto a = [] {};\n" + "int x{};"); + verifyFormat(Code); + + auto Style = getWebKitStyle(); + EXPECT_EQ(Style.SpaceInEmptyBraces, FormatStyle::SIEB_Always); + + verifyFormat("void f() { }\n" + "class Unit { };\n" + "auto a = [] { };\n" + "int x { };", + Code, Style); + + Style.SpaceInEmptyBraces = FormatStyle::SIEB_Block; + verifyFormat("void f() { }\n" + "class Unit { };\n" + "auto a = [] { };\n" + "int x {};", + Code, Style); +} + TEST_F(FormatTest, AlternativeOperators) { // Test case for ensuring alternate operators are not // combined with their right most neighbour. From a10773c8646d482e8747ca37d5a51523505ffbb7 Mon Sep 17 00:00:00 2001 From: Sergei Barannikov Date: Sun, 17 Aug 2025 23:13:48 +0300 Subject: [PATCH 107/214] [TableGen][DecoderEmitter] Remove EncodingIDAndOpcode struct (NFC) (#154028) Most of the time we don't need instruction opcode. There is no need to carry it around all the time, we can easily get it by other means. Rename affected variables accordingly. Part of an effort to simplify DecoderEmitter code. --- llvm/utils/TableGen/DecoderEmitter.cpp | 123 +++++++++++-------------- 1 file changed, 55 insertions(+), 68 deletions(-) diff --git a/llvm/utils/TableGen/DecoderEmitter.cpp b/llvm/utils/TableGen/DecoderEmitter.cpp index 11d06d1613e8..b6d4363c7e4d 100644 --- a/llvm/utils/TableGen/DecoderEmitter.cpp +++ b/llvm/utils/TableGen/DecoderEmitter.cpp @@ -215,16 +215,6 @@ struct EncodingAndInst { : EncodingDef(EncodingDef), Inst(Inst), HwModeName(HwModeName) {} }; -struct EncodingIDAndOpcode { - unsigned EncodingID; - unsigned Opcode; - - EncodingIDAndOpcode() : EncodingID(0), Opcode(0) {} - EncodingIDAndOpcode(unsigned EncodingID, unsigned Opcode) - : EncodingID(EncodingID), Opcode(Opcode) {} -}; - -using EncodingIDsVec = std::vector; using NamespacesHwModesMap = std::map>; class DecoderEmitter { @@ -235,11 +225,13 @@ public: DecoderEmitter(const RecordKeeper &R, StringRef PredicateNamespace) : RK(R), Target(R), PredicateNamespace(PredicateNamespace) {} + const CodeGenTarget &getTarget() const { return Target; } + // Emit the decoder state machine table. Returns a mask of MCD decoder ops // that were emitted. unsigned emitTable(formatted_raw_ostream &OS, DecoderTable &Table, unsigned BitWidth, StringRef Namespace, - const EncodingIDsVec &EncodingIDs) const; + ArrayRef EncodingIDs) const; void emitInstrLenTable(formatted_raw_ostream &OS, ArrayRef InstrLen) const; void emitPredicateFunction(formatted_raw_ostream &OS, @@ -416,10 +408,10 @@ protected: unsigned NumBits; // number of bits to filter // Map of well-known segment value to the set of uid's with that value. - std::map> FilteredInstructions; + std::map> FilteredIDs; // Set of uid's with non-constant segment values. - std::vector VariableInstructions; + std::vector VariableIDs; // Map of well-known segment value to its delegate. std::map> FilterChooserMap; @@ -435,9 +427,9 @@ public: unsigned getNumFiltered() const { return NumFiltered; } - EncodingIDAndOpcode getSingletonOpc() const { + unsigned getSingletonEncodingID() const { assert(NumFiltered == 1); - return FilteredInstructions.begin()->second.front(); + return FilteredIDs.begin()->second.front(); } // Return the filter chooser for the group of instructions without constant @@ -498,9 +490,7 @@ protected: ArrayRef AllInstructions; // Vector of uid's for this filter chooser to work on. - // The first member of the pair is the opcode id being decoded, the second is - // the opcode id that should be emitted. - ArrayRef Opcodes; + ArrayRef EncodingIDs; // Lookup table for the operand decoding of instructions. const std::map> &Operands; @@ -528,22 +518,20 @@ protected: }; public: - FilterChooser(ArrayRef Insts, - ArrayRef IDs, + FilterChooser(ArrayRef Insts, ArrayRef EncodingIDs, const std::map> &Ops, unsigned BW, const DecoderEmitter *E) - : AllInstructions(Insts), Opcodes(IDs), Operands(Ops), + : AllInstructions(Insts), EncodingIDs(EncodingIDs), Operands(Ops), FilterBitValues(BW, BitValue::BIT_UNFILTERED), Parent(nullptr), BitWidth(BW), Emitter(E) { doFilter(); } - FilterChooser(ArrayRef Insts, - ArrayRef IDs, + FilterChooser(ArrayRef Insts, ArrayRef EncodingIDs, const std::map> &Ops, const std::vector &ParentFilterBitValues, const FilterChooser &parent) - : AllInstructions(Insts), Opcodes(IDs), Operands(Ops), + : AllInstructions(Insts), EncodingIDs(EncodingIDs), Operands(Ops), FilterBitValues(ParentFilterBitValues), Parent(&parent), BitWidth(parent.BitWidth), Emitter(parent.Emitter) { doFilter(); @@ -608,7 +596,7 @@ protected: // Emits table entries to decode the singleton. void emitSingletonTableEntry(DecoderTableInfo &TableInfo, - EncodingIDAndOpcode Opc) const; + unsigned EncodingID) const; // Emits code to decode the singleton, and then to decode the rest. void emitSingletonTableEntry(DecoderTableInfo &TableInfo, @@ -656,8 +644,8 @@ public: Filter::Filter(Filter &&f) : Owner(f.Owner), StartBit(f.StartBit), NumBits(f.NumBits), - FilteredInstructions(std::move(f.FilteredInstructions)), - VariableInstructions(std::move(f.VariableInstructions)), + FilteredIDs(std::move(f.FilteredIDs)), + VariableIDs(std::move(f.VariableIDs)), FilterChooserMap(std::move(f.FilterChooserMap)), NumFiltered(f.NumFiltered) {} @@ -667,9 +655,9 @@ Filter::Filter(const FilterChooser &owner, unsigned startBit, unsigned numBits) NumFiltered = 0; - for (const auto &OpcPair : Owner.Opcodes) { + for (unsigned EncodingID : Owner.EncodingIDs) { // Populates the insn given the uid. - insn_t Insn = Owner.insnWithID(OpcPair.EncodingID); + insn_t Insn = Owner.insnWithID(EncodingID); // Scans the segment for possibly well-specified encoding bits. std::optional Field = fieldFromInsn(Insn, StartBit, NumBits); @@ -677,16 +665,16 @@ Filter::Filter(const FilterChooser &owner, unsigned startBit, unsigned numBits) if (Field) { // The encoding bits are well-known. Lets add the uid of the // instruction into the bucket keyed off the constant field value. - FilteredInstructions[*Field].push_back(OpcPair); + FilteredIDs[*Field].push_back(EncodingID); ++NumFiltered; } else { // Some of the encoding bit(s) are unspecified. This contributes to // one additional member of "Variable" instructions. - VariableInstructions.push_back(OpcPair); + VariableIDs.push_back(EncodingID); } } - assert((FilteredInstructions.size() + VariableInstructions.size() > 0) && + assert((FilteredIDs.size() + VariableIDs.size() > 0) && "Filter returns no instruction categories"); } @@ -700,7 +688,7 @@ void Filter::recurse() { // Starts by inheriting our parent filter chooser's filter bit values. std::vector BitValueArray(Owner.FilterBitValues); - if (!VariableInstructions.empty()) { + if (!VariableIDs.empty()) { for (unsigned bitIndex = 0; bitIndex < NumBits; ++bitIndex) BitValueArray[StartBit + bitIndex] = BitValue::BIT_UNFILTERED; @@ -708,9 +696,8 @@ void Filter::recurse() { // group of instructions whose segment values are variable. FilterChooserMap.try_emplace( NO_FIXED_SEGMENTS_SENTINEL, - std::make_unique(Owner.AllInstructions, - VariableInstructions, Owner.Operands, - BitValueArray, Owner)); + std::make_unique(Owner.AllInstructions, VariableIDs, + Owner.Operands, BitValueArray, Owner)); } // No need to recurse for a singleton filtered instruction. @@ -721,7 +708,7 @@ void Filter::recurse() { } // Otherwise, create sub choosers. - for (const auto &Inst : FilteredInstructions) { + for (const auto &Inst : FilteredIDs) { // Marks all the segment positions with either BIT_TRUE or BIT_FALSE. for (unsigned bitIndex = 0; bitIndex < NumBits; ++bitIndex) BitValueArray[StartBit + bitIndex] = Inst.first & (1ULL << bitIndex) @@ -810,7 +797,7 @@ void Filter::emitTableEntry(DecoderTableInfo &TableInfo) const { // Returns the number of fanout produced by the filter. More fanout implies // the filter distinguishes more categories of instructions. unsigned Filter::usefulness() const { - return FilteredInstructions.size() + VariableInstructions.empty(); + return FilteredIDs.size() + VariableIDs.empty(); } ////////////////////////////////// @@ -824,14 +811,16 @@ unsigned Filter::usefulness() const { unsigned DecoderEmitter::emitTable(formatted_raw_ostream &OS, DecoderTable &Table, unsigned BitWidth, StringRef Namespace, - const EncodingIDsVec &EncodingIDs) const { + ArrayRef EncodingIDs) const { // We'll need to be able to map from a decoded opcode into the corresponding // EncodingID for this specific combination of BitWidth and Namespace. This // is used below to index into NumberedEncodings. DenseMap OpcodeToEncodingID; OpcodeToEncodingID.reserve(EncodingIDs.size()); - for (const auto &EI : EncodingIDs) - OpcodeToEncodingID[EI.Opcode] = EI.EncodingID; + for (unsigned EncodingID : EncodingIDs) { + const Record *InstDef = NumberedEncodings[EncodingID].Inst->TheDef; + OpcodeToEncodingID[Target.getInstrIntValue(InstDef)] = EncodingID; + } OS << "static const uint8_t DecoderTable" << Namespace << BitWidth << "[] = {\n"; @@ -1419,14 +1408,14 @@ void FilterChooser::emitSoftFailTableEntry(DecoderTableInfo &TableInfo, // Emits table entries to decode the singleton. void FilterChooser::emitSingletonTableEntry(DecoderTableInfo &TableInfo, - EncodingIDAndOpcode Opc) const { - insn_t Insn = insnWithID(Opc.EncodingID); + unsigned EncodingID) const { + insn_t Insn = insnWithID(EncodingID); // Look for islands of undecoded bits of the singleton. std::vector Islands = getIslands(Insn); // Emit the predicate table entry if one is needed. - emitPredicateTableEntry(TableInfo, Opc.EncodingID); + emitPredicateTableEntry(TableInfo, EncodingID); // Check any additional encoding fields needed. for (const Island &Ilnd : reverse(Islands)) { @@ -1451,10 +1440,10 @@ void FilterChooser::emitSingletonTableEntry(DecoderTableInfo &TableInfo, } // Check for soft failure of the match. - emitSoftFailTableEntry(TableInfo, Opc.EncodingID); + emitSoftFailTableEntry(TableInfo, EncodingID); auto [DIdx, HasCompleteDecoder] = - getDecoderIndex(TableInfo.Decoders, Opc.EncodingID); + getDecoderIndex(TableInfo.Decoders, EncodingID); // Produce OPC_Decode or OPC_TryDecode opcode based on the information // whether the instruction decoder is complete or not. If it is complete @@ -1471,7 +1460,8 @@ void FilterChooser::emitSingletonTableEntry(DecoderTableInfo &TableInfo, : MCD::OPC_TryDecode); TableInfo.Table.push_back(DecoderOp); NumEncodingsSupported++; - TableInfo.Table.insertULEB128(Opc.Opcode); + const Record *InstDef = AllInstructions[EncodingID].Inst->TheDef; + TableInfo.Table.insertULEB128(Emitter->getTarget().getInstrIntValue(InstDef)); TableInfo.Table.insertULEB128(DIdx); if (DecoderOp == MCD::OPC_TryDecode) { @@ -1483,12 +1473,10 @@ void FilterChooser::emitSingletonTableEntry(DecoderTableInfo &TableInfo, // Emits table entries to decode the singleton, and then to decode the rest. void FilterChooser::emitSingletonTableEntry(DecoderTableInfo &TableInfo, const Filter &Best) const { - EncodingIDAndOpcode Opc = Best.getSingletonOpc(); - // complex singletons need predicate checks from the first singleton // to refer forward to the variable filterchooser that follows. TableInfo.pushScope(); - emitSingletonTableEntry(TableInfo, Opc); + emitSingletonTableEntry(TableInfo, Best.getSingletonEncodingID()); TableInfo.popScope(); Best.getVariableFC().emitTableEntries(TableInfo); @@ -1516,15 +1504,15 @@ void FilterChooser::reportRegion(std::vector> &Filters, // recursively descends down the decoding tree. bool FilterChooser::filterProcessor(ArrayRef BitAttrs, bool AllowMixed, bool Greedy) { - assert(Opcodes.size() >= 2 && "Nothing to filter"); + assert(EncodingIDs.size() >= 2 && "Nothing to filter"); // Heuristics. See also doFilter()'s "Heuristics" comment when num of // instructions is 3. if (AllowMixed && !Greedy) { - assert(Opcodes.size() == 3); + assert(EncodingIDs.size() == 3); - for (const auto &Opcode : Opcodes) { - insn_t Insn = insnWithID(Opcode.EncodingID); + for (unsigned EncodingID : EncodingIDs) { + insn_t Insn = insnWithID(EncodingID); // Look for islands of undecoded bits of any instruction. std::vector Islands = getIslands(Insn); @@ -1681,10 +1669,10 @@ bool FilterChooser::filterProcessor(ArrayRef BitAttrs, // the instructions. A conflict of instructions may occur, in which case we // dump the conflict set to the standard error. void FilterChooser::doFilter() { - assert(!Opcodes.empty() && "FilterChooser created with no instructions"); + assert(!EncodingIDs.empty() && "FilterChooser created with no instructions"); // No filter needed. - if (Opcodes.size() < 2) + if (EncodingIDs.size() < 2) return; // We maintain BIT_WIDTH copies of the bitAttrs automaton. @@ -1712,8 +1700,8 @@ void FilterChooser::doFilter() { if (FilterBitValues[BitIndex].isSet()) BitAttrs[BitIndex] = ATTR_FILTERED; - for (const EncodingIDAndOpcode &OpcPair : Opcodes) { - insn_t EncodingBits = insnWithID(OpcPair.EncodingID); + for (unsigned EncodingID : EncodingIDs) { + insn_t EncodingBits = insnWithID(EncodingID); for (unsigned BitIndex = 0; BitIndex < BitWidth; ++BitIndex) { switch (BitAttrs[BitIndex]) { @@ -1750,7 +1738,7 @@ void FilterChooser::doFilter() { // no single instruction for the maximum ATTR_MIXED region Inst{14-4} has a // well-known encoding pattern. In such case, we backtrack and scan for the // the very first consecutive ATTR_ALL_SET region and assign a filter to it. - if (Opcodes.size() == 3 && + if (EncodingIDs.size() == 3 && filterProcessor(BitAttrs, /*AllowMixed=*/true, /*Greedy=*/false)) return; @@ -1766,8 +1754,8 @@ void FilterChooser::doFilter() { dumpStack(errs(), Indent); // Dump encodings. - for (EncodingIDAndOpcode Opcode : Opcodes) { - const EncodingAndInst &Enc = AllInstructions[Opcode.EncodingID]; + for (unsigned EncodingID : EncodingIDs) { + const EncodingAndInst &Enc = AllInstructions[EncodingID]; errs() << Indent; dumpBits(errs(), getBitsField(*Enc.EncodingDef, "Inst"), BitWidth); errs() << " " << Enc << '\n'; @@ -1778,11 +1766,11 @@ void FilterChooser::doFilter() { // emitTableEntries - Emit state machine entries to decode our share of // instructions. void FilterChooser::emitTableEntries(DecoderTableInfo &TableInfo) const { - if (Opcodes.size() == 1) { + if (EncodingIDs.size() == 1) { // There is only one instruction in the set, which is great! // Call emitSingletonDecoder() to see whether there are any remaining // encodings bits. - emitSingletonTableEntry(TableInfo, Opcodes[0]); + emitSingletonTableEntry(TableInfo, EncodingIDs[0]); return; } @@ -2528,8 +2516,8 @@ namespace { NumberedAlias, &Target.getInstruction(NumberedAlias->getValueAsDef("AliasOf"))); - std::map, std::vector> - OpcMap; + // Map of (namespace, size) tuple to encoding IDs. + std::map, std::vector> EncMap; std::map> Operands; std::vector InstrLen; bool IsVarLenInst = Target.hasVariableLengthEncodings(); @@ -2568,8 +2556,7 @@ namespace { EncodingDef->getValueAsString("DecoderNamespace").str(); if (!NumberedEncoding.HwModeName.empty()) DecoderNamespace += "_" + NumberedEncoding.HwModeName.str(); - OpcMap[{DecoderNamespace, Size}].emplace_back( - NEI, Target.getInstrIntValue(Def)); + EncMap[{DecoderNamespace, Size}].push_back(NEI); } else { NumEncodingsOmitted++; } @@ -2577,7 +2564,7 @@ namespace { DecoderTableInfo TableInfo; unsigned OpcodeMask = 0; - for (const auto &[NSAndByteSize, EncodingIDs] : OpcMap) { + for (const auto &[NSAndByteSize, EncodingIDs] : EncMap) { const std::string &DecoderNamespace = NSAndByteSize.first; const unsigned BitWidth = 8 * NSAndByteSize.second; // Emit the decoder for this namespace+width combination. From 6947fb455638593567e1754cf3ef04e1b379aa00 Mon Sep 17 00:00:00 2001 From: Sergei Barannikov Date: Sun, 17 Aug 2025 23:50:23 +0300 Subject: [PATCH 108/214] [TableGen] Use structured binding in one place (NFC) --- llvm/utils/TableGen/DecoderEmitter.cpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/llvm/utils/TableGen/DecoderEmitter.cpp b/llvm/utils/TableGen/DecoderEmitter.cpp index b6d4363c7e4d..b01a79e37ce4 100644 --- a/llvm/utils/TableGen/DecoderEmitter.cpp +++ b/llvm/utils/TableGen/DecoderEmitter.cpp @@ -708,18 +708,18 @@ void Filter::recurse() { } // Otherwise, create sub choosers. - for (const auto &Inst : FilteredIDs) { + for (const auto &[FilterVal, EncodingIDs] : FilteredIDs) { // Marks all the segment positions with either BIT_TRUE or BIT_FALSE. for (unsigned bitIndex = 0; bitIndex < NumBits; ++bitIndex) - BitValueArray[StartBit + bitIndex] = Inst.first & (1ULL << bitIndex) + BitValueArray[StartBit + bitIndex] = FilterVal & (1ULL << bitIndex) ? BitValue::BIT_TRUE : BitValue::BIT_FALSE; // Delegates to an inferior filter chooser for further processing on this // category of instructions. FilterChooserMap.try_emplace( - Inst.first, - std::make_unique(Owner.AllInstructions, Inst.second, + FilterVal, + std::make_unique(Owner.AllInstructions, EncodingIDs, Owner.Operands, BitValueArray, Owner)); } } From 5892a2beec4eaf77922211a9aec70faa0ae287db Mon Sep 17 00:00:00 2001 From: Florian Hahn Date: Sun, 17 Aug 2025 21:51:13 +0100 Subject: [PATCH 109/214] [VPlan] Remove dead code from GetBroadCastInstr (NFCI). All relevant places should already explicitly materialize broadcasts. Remove dead code from VPTransformState::get --- llvm/lib/Transforms/Vectorize/VPlan.cpp | 18 +----------------- 1 file changed, 1 insertion(+), 17 deletions(-) diff --git a/llvm/lib/Transforms/Vectorize/VPlan.cpp b/llvm/lib/Transforms/Vectorize/VPlan.cpp index 3682bd0e1720..724a38e56530 100644 --- a/llvm/lib/Transforms/Vectorize/VPlan.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlan.cpp @@ -295,27 +295,11 @@ Value *VPTransformState::get(const VPValue *Def, bool NeedsScalar) { if (hasVectorValue(Def)) return Data.VPV2Vector[Def]; - auto GetBroadcastInstrs = [this, Def](Value *V) { - bool SafeToHoist = - !Def->hasDefiningRecipe() || - VPDT.properlyDominates(Def->getDefiningRecipe()->getParent(), - Plan->getVectorPreheader()); - + auto GetBroadcastInstrs = [this](Value *V) { if (VF.isScalar()) return V; - // Place the code for broadcasting invariant variables in the new preheader. - IRBuilder<>::InsertPointGuard Guard(Builder); - if (SafeToHoist) { - BasicBlock *LoopVectorPreHeader = - CFG.VPBB2IRBB[Plan->getVectorPreheader()]; - if (LoopVectorPreHeader) - Builder.SetInsertPoint(LoopVectorPreHeader->getTerminator()); - } - - // Place the code for broadcasting invariant variables in the new preheader. // Broadcast the scalar into all locations in the vector. Value *Shuf = Builder.CreateVectorSplat(VF, V, "broadcast"); - return Shuf; }; From 40833eea21ebe40f0e6321d70780207214908124 Mon Sep 17 00:00:00 2001 From: Mohamed Emad Date: Mon, 18 Aug 2025 00:04:47 +0300 Subject: [PATCH 110/214] Reland "[libc][math][c23] Implement C23 math function asinpif16" (#152690) #146226 with fixing asinpi MPFR number function and make it work when mpfr < `4.2.0` --- libc/config/linux/aarch64/entrypoints.txt | 1 + libc/config/linux/x86_64/entrypoints.txt | 1 + libc/docs/headers/math/index.rst | 2 +- libc/include/math.yaml | 7 ++ libc/src/math/CMakeLists.txt | 2 + libc/src/math/asinpif16.h | 21 ++++ libc/src/math/generic/CMakeLists.txt | 19 +++ libc/src/math/generic/asinpif16.cpp | 127 ++++++++++++++++++++ libc/test/src/math/CMakeLists.txt | 11 ++ libc/test/src/math/asinpif16_test.cpp | 40 ++++++ libc/test/src/math/smoke/CMakeLists.txt | 12 ++ libc/test/src/math/smoke/asinpif16_test.cpp | 86 +++++++++++++ libc/utils/MPFRWrapper/MPCommon.cpp | 15 +++ libc/utils/MPFRWrapper/MPCommon.h | 1 + libc/utils/MPFRWrapper/MPFRUtils.cpp | 2 + libc/utils/MPFRWrapper/MPFRUtils.h | 1 + 16 files changed, 347 insertions(+), 1 deletion(-) create mode 100644 libc/src/math/asinpif16.h create mode 100644 libc/src/math/generic/asinpif16.cpp create mode 100644 libc/test/src/math/asinpif16_test.cpp create mode 100644 libc/test/src/math/smoke/asinpif16_test.cpp diff --git a/libc/config/linux/aarch64/entrypoints.txt b/libc/config/linux/aarch64/entrypoints.txt index e71dc2ee0d02..1bc5df9d45a9 100644 --- a/libc/config/linux/aarch64/entrypoints.txt +++ b/libc/config/linux/aarch64/entrypoints.txt @@ -660,6 +660,7 @@ if(LIBC_TYPES_HAS_FLOAT16) list(APPEND TARGET_LIBM_ENTRYPOINTS # math.h C23 _Float16 entrypoints # libc.src.math.acoshf16 + libc.src.math.asinpif16 libc.src.math.canonicalizef16 libc.src.math.ceilf16 libc.src.math.copysignf16 diff --git a/libc/config/linux/x86_64/entrypoints.txt b/libc/config/linux/x86_64/entrypoints.txt index 1ee10e6d3cad..1fc9a2b901c1 100644 --- a/libc/config/linux/x86_64/entrypoints.txt +++ b/libc/config/linux/x86_64/entrypoints.txt @@ -704,6 +704,7 @@ if(LIBC_TYPES_HAS_FLOAT16) libc.src.math.acospif16 libc.src.math.asinf16 libc.src.math.asinhf16 + libc.src.math.asinpif16 libc.src.math.atanf16 libc.src.math.atanhf16 libc.src.math.canonicalizef16 diff --git a/libc/docs/headers/math/index.rst b/libc/docs/headers/math/index.rst index add34d0e877f..de2849d1418d 100644 --- a/libc/docs/headers/math/index.rst +++ b/libc/docs/headers/math/index.rst @@ -268,7 +268,7 @@ Higher Math Functions +-----------+------------------+-----------------+------------------------+----------------------+------------------------+------------------------+----------------------------+ | asinh | |check| | | | |check| | | 7.12.5.2 | F.10.2.2 | +-----------+------------------+-----------------+------------------------+----------------------+------------------------+------------------------+----------------------------+ -| asinpi | | | | | | 7.12.4.9 | F.10.1.9 | +| asinpi | | | | |check| | | 7.12.4.9 | F.10.1.9 | +-----------+------------------+-----------------+------------------------+----------------------+------------------------+------------------------+----------------------------+ | atan | |check| | 1 ULP | | |check| | | 7.12.4.3 | F.10.1.3 | +-----------+------------------+-----------------+------------------------+----------------------+------------------------+------------------------+----------------------------+ diff --git a/libc/include/math.yaml b/libc/include/math.yaml index e8ac7ee5033d..4e398676bf91 100644 --- a/libc/include/math.yaml +++ b/libc/include/math.yaml @@ -79,6 +79,13 @@ functions: arguments: - type: _Float16 guard: LIBC_TYPES_HAS_FLOAT16 + - name: asinpif16 + standards: + - stdc + return_type: _Float16 + arguments: + - type: _Float16 + guard: LIBC_TYPES_HAS_FLOAT16 - name: atan standards: - stdc diff --git a/libc/src/math/CMakeLists.txt b/libc/src/math/CMakeLists.txt index 8db5901afa9c..187bc92e5c2c 100644 --- a/libc/src/math/CMakeLists.txt +++ b/libc/src/math/CMakeLists.txt @@ -58,6 +58,8 @@ add_math_entrypoint_object(asinh) add_math_entrypoint_object(asinhf) add_math_entrypoint_object(asinhf16) +add_math_entrypoint_object(asinpif16) + add_math_entrypoint_object(atan) add_math_entrypoint_object(atanf) add_math_entrypoint_object(atanf16) diff --git a/libc/src/math/asinpif16.h b/libc/src/math/asinpif16.h new file mode 100644 index 000000000000..b97166af63f5 --- /dev/null +++ b/libc/src/math/asinpif16.h @@ -0,0 +1,21 @@ +//===-- Implementation header for asinpif16 ---------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIBC_SRC_MATH_ASINPIF16_H +#define LLVM_LIBC_SRC_MATH_ASINPIF16_H + +#include "src/__support/macros/config.h" +#include "src/__support/macros/properties/types.h" + +namespace LIBC_NAMESPACE_DECL { + +float16 asinpif16(float16 x); + +} // namespace LIBC_NAMESPACE_DECL + +#endif // LLVM_LIBC_SRC_MATH_ASINPIF16_H diff --git a/libc/src/math/generic/CMakeLists.txt b/libc/src/math/generic/CMakeLists.txt index 65e483926844..22aa3cfedfbc 100644 --- a/libc/src/math/generic/CMakeLists.txt +++ b/libc/src/math/generic/CMakeLists.txt @@ -4035,6 +4035,25 @@ add_entrypoint_object( libc.src.__support.math.asinhf16 ) +add_entrypoint_object( + asinpif16 + SRCS + asinpif16.cpp + HDRS + ../asinpif16.h + DEPENDS + libc.hdr.errno_macros + libc.hdr.fenv_macros + libc.src.__support.FPUtil.cast + libc.src.__support.FPUtil.except_value_utils + libc.src.__support.FPUtil.fenv_impl + libc.src.__support.FPUtil.fp_bits + libc.src.__support.FPUtil.multiply_add + libc.src.__support.FPUtil.polyeval + libc.src.__support.FPUtil.sqrt + libc.src.__support.macros.optimization +) + add_entrypoint_object( atanhf SRCS diff --git a/libc/src/math/generic/asinpif16.cpp b/libc/src/math/generic/asinpif16.cpp new file mode 100644 index 000000000000..aabc0863ba52 --- /dev/null +++ b/libc/src/math/generic/asinpif16.cpp @@ -0,0 +1,127 @@ +//===-- Half-precision asinpif16(x) function ------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception. +// +//===----------------------------------------------------------------------===// + +#include "src/math/asinpif16.h" +#include "hdr/errno_macros.h" +#include "hdr/fenv_macros.h" +#include "src/__support/FPUtil/FEnvImpl.h" +#include "src/__support/FPUtil/FPBits.h" +#include "src/__support/FPUtil/PolyEval.h" +#include "src/__support/FPUtil/cast.h" +#include "src/__support/FPUtil/except_value_utils.h" +#include "src/__support/FPUtil/multiply_add.h" +#include "src/__support/FPUtil/sqrt.h" +#include "src/__support/macros/optimization.h" + +namespace LIBC_NAMESPACE_DECL { + +LLVM_LIBC_FUNCTION(float16, asinpif16, (float16 x)) { + using FPBits = fputil::FPBits; + + FPBits xbits(x); + bool is_neg = xbits.is_neg(); + double x_abs = fputil::cast(xbits.abs().get_val()); + + auto signed_result = [is_neg](auto r) -> auto { return is_neg ? -r : r; }; + + if (LIBC_UNLIKELY(x_abs > 1.0)) { + // aspinf16(NaN) = NaN + if (xbits.is_nan()) { + if (xbits.is_signaling_nan()) { + fputil::raise_except_if_required(FE_INVALID); + return FPBits::quiet_nan().get_val(); + } + return x; + } + + // 1 < |x| <= +/-inf + fputil::raise_except_if_required(FE_INVALID); + fputil::set_errno_if_required(EDOM); + + return FPBits::quiet_nan().get_val(); + } + + // the coefficients for the polynomial approximation of asin(x)/pi in the + // range [0, 0.5] extracted using python-sympy + // + // Python code to generate the coefficients: + // > from sympy import * + // > import math + // > x = symbols('x') + // > print(series(asin(x)/math.pi, x, 0, 21)) + // + // OUTPUT: + // + // 0.318309886183791*x + 0.0530516476972984*x**3 + 0.0238732414637843*x**5 + + // 0.0142102627760621*x**7 + 0.00967087327815336*x**9 + + // 0.00712127941391293*x**11 + 0.00552355646848375*x**13 + + // 0.00444514782463692*x**15 + 0.00367705242846804*x**17 + + // 0.00310721681820837*x**19 + O(x**21) + // + // it's very accurate in the range [0, 0.5] and has a maximum error of + // 0.0000000000000001 in the range [0, 0.5]. + constexpr double POLY_COEFFS[] = { + 0x1.45f306dc9c889p-2, // x^1 + 0x1.b2995e7b7b5fdp-5, // x^3 + 0x1.8723a1d588a36p-6, // x^5 + 0x1.d1a452f20430dp-7, // x^7 + 0x1.3ce52a3a09f61p-7, // x^9 + 0x1.d2b33e303d375p-8, // x^11 + 0x1.69fde663c674fp-8, // x^13 + 0x1.235134885f19bp-8, // x^15 + }; + // polynomial evaluation using horner's method + // work only for |x| in [0, 0.5] + auto asinpi_polyeval = [](double x) -> double { + return x * fputil::polyeval(x * x, POLY_COEFFS[0], POLY_COEFFS[1], + POLY_COEFFS[2], POLY_COEFFS[3], POLY_COEFFS[4], + POLY_COEFFS[5], POLY_COEFFS[6], POLY_COEFFS[7]); + }; + + // if |x| <= 0.5: + if (LIBC_UNLIKELY(x_abs <= 0.5)) { + // Use polynomial approximation of asin(x)/pi in the range [0, 0.5] + double result = asinpi_polyeval(fputil::cast(x)); + return fputil::cast(result); + } + + // If |x| > 0.5, we need to use the range reduction method: + // y = asin(x) => x = sin(y) + // because: sin(a) = cos(pi/2 - a) + // therefore: + // x = cos(pi/2 - y) + // let z = pi/2 - y, + // x = cos(z) + // because: cos(2a) = 1 - 2 * sin^2(a), z = 2a, a = z/2 + // therefore: + // cos(z) = 1 - 2 * sin^2(z/2) + // sin(z/2) = sqrt((1 - cos(z))/2) + // sin(z/2) = sqrt((1 - x)/2) + // let u = (1 - x)/2 + // then: + // sin(z/2) = sqrt(u) + // z/2 = asin(sqrt(u)) + // z = 2 * asin(sqrt(u)) + // pi/2 - y = 2 * asin(sqrt(u)) + // y = pi/2 - 2 * asin(sqrt(u)) + // y/pi = 1/2 - 2 * asin(sqrt(u))/pi + // + // Finally, we can write: + // asinpi(x) = 1/2 - 2 * asinpi(sqrt(u)) + // where u = (1 - x) /2 + // = 0.5 - 0.5 * x + // = multiply_add(-0.5, x, 0.5) + + double u = fputil::multiply_add(-0.5, x_abs, 0.5); + double asinpi_sqrt_u = asinpi_polyeval(fputil::sqrt(u)); + double result = fputil::multiply_add(-2.0, asinpi_sqrt_u, 0.5); + + return fputil::cast(signed_result(result)); +} + +} // namespace LIBC_NAMESPACE_DECL diff --git a/libc/test/src/math/CMakeLists.txt b/libc/test/src/math/CMakeLists.txt index 11bbf670c98d..e15df147c3c3 100644 --- a/libc/test/src/math/CMakeLists.txt +++ b/libc/test/src/math/CMakeLists.txt @@ -2282,6 +2282,17 @@ add_fp_unittest( libc.src.math.asinf16 ) +add_fp_unittest( + asinpif16_test + NEED_MPFR + SUITE + libc-math-unittests + SRCS + asinpif16_test.cpp + DEPENDS + libc.src.math.asinpif16 +) + add_fp_unittest( acosf_test NEED_MPFR diff --git a/libc/test/src/math/asinpif16_test.cpp b/libc/test/src/math/asinpif16_test.cpp new file mode 100644 index 000000000000..3718f39fd06a --- /dev/null +++ b/libc/test/src/math/asinpif16_test.cpp @@ -0,0 +1,40 @@ +//===-- Exhaustive test for asinpif16 -------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "src/math/asinpif16.h" +#include "test/UnitTest/FPMatcher.h" +#include "test/UnitTest/Test.h" +#include "utils/MPFRWrapper/MPFRUtils.h" + +using LlvmLibcAsinpif16Test = LIBC_NAMESPACE::testing::FPTest; + +namespace mpfr = LIBC_NAMESPACE::testing::mpfr; + +// Range: [0, Inf] +static constexpr uint16_t POS_START = 0x0000U; +static constexpr uint16_t POS_STOP = 0x7c00U; + +// Range: [-Inf, 0] +static constexpr uint16_t NEG_START = 0x8000U; +static constexpr uint16_t NEG_STOP = 0xfc00U; + +TEST_F(LlvmLibcAsinpif16Test, PositiveRange) { + for (uint16_t v = POS_START; v <= POS_STOP; ++v) { + float16 x = FPBits(v).get_val(); + EXPECT_MPFR_MATCH_ALL_ROUNDING(mpfr::Operation::Asinpi, x, + LIBC_NAMESPACE::asinpif16(x), 0.5); + } +} + +TEST_F(LlvmLibcAsinpif16Test, NegativeRange) { + for (uint16_t v = NEG_START; v <= NEG_STOP; ++v) { + float16 x = FPBits(v).get_val(); + EXPECT_MPFR_MATCH_ALL_ROUNDING(mpfr::Operation::Asinpi, x, + LIBC_NAMESPACE::asinpif16(x), 0.5); + } +} diff --git a/libc/test/src/math/smoke/CMakeLists.txt b/libc/test/src/math/smoke/CMakeLists.txt index 00881bd27f24..b800f7aba98d 100644 --- a/libc/test/src/math/smoke/CMakeLists.txt +++ b/libc/test/src/math/smoke/CMakeLists.txt @@ -4216,6 +4216,18 @@ add_fp_unittest( libc.src.math.asinhf16 ) +add_fp_unittest( + asinpif16_test + NEED_MPFR + SUITE + libc-math-unittests + SRCS + asinpif16_test.cpp + DEPENDS + libc.src.math.asinpif16 + libc.src.errno.errno +) + add_fp_unittest( acoshf_test SUITE diff --git a/libc/test/src/math/smoke/asinpif16_test.cpp b/libc/test/src/math/smoke/asinpif16_test.cpp new file mode 100644 index 000000000000..5303eed8f5da --- /dev/null +++ b/libc/test/src/math/smoke/asinpif16_test.cpp @@ -0,0 +1,86 @@ +//===-- Unittests for asinpif16 -------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "src/__support/libc_errno.h" +#include "src/math/asinpif16.h" +#include "test/UnitTest/FPMatcher.h" + +using LlvmLibcAsinpif16Test = LIBC_NAMESPACE::testing::FPTest; + +TEST_F(LlvmLibcAsinpif16Test, SpecialNumbers) { + // zero + EXPECT_FP_EQ(zero, LIBC_NAMESPACE::asinpif16(zero)); + + // +/-1 + EXPECT_FP_EQ(0.5f16, LIBC_NAMESPACE::asinpif16(1.0)); + EXPECT_FP_EQ(-0.5f16, LIBC_NAMESPACE::asinpif16(-1.0)); + + // NaN inputs + EXPECT_FP_EQ(FPBits::quiet_nan().get_val(), + LIBC_NAMESPACE::asinpif16(FPBits::quiet_nan().get_val())); + + EXPECT_FP_EQ(FPBits::quiet_nan().get_val(), + LIBC_NAMESPACE::asinpif16(FPBits::signaling_nan().get_val())); + + // infinity inputs -> should return NaN + libc_errno = 0; + EXPECT_FP_EQ(FPBits::quiet_nan().get_val(), LIBC_NAMESPACE::asinpif16(inf)); + EXPECT_MATH_ERRNO(EDOM); + + libc_errno = 0; + EXPECT_FP_EQ(FPBits::quiet_nan().get_val(), + LIBC_NAMESPACE::asinpif16(neg_inf)); + EXPECT_MATH_ERRNO(EDOM); +} + +TEST_F(LlvmLibcAsinpif16Test, OutOfRange) { + // Test values > 1 + libc_errno = 0; + EXPECT_FP_EQ(FPBits::quiet_nan().get_val(), + LIBC_NAMESPACE::asinpif16(1.5f16)); + EXPECT_MATH_ERRNO(EDOM); + + libc_errno = 0; + EXPECT_FP_EQ(FPBits::quiet_nan().get_val(), + LIBC_NAMESPACE::asinpif16(2.0f16)); + EXPECT_MATH_ERRNO(EDOM); + + // Test values < -1 + libc_errno = 0; + EXPECT_FP_EQ(FPBits::quiet_nan().get_val(), + LIBC_NAMESPACE::asinpif16(-1.5f16)); + EXPECT_MATH_ERRNO(EDOM); + + libc_errno = 0; + EXPECT_FP_EQ(FPBits::quiet_nan().get_val(), + LIBC_NAMESPACE::asinpif16(-2.0f16)); + EXPECT_MATH_ERRNO(EDOM); + + // Test maximum normal value (should be > 1 for float16) + libc_errno = 0; + EXPECT_FP_EQ(FPBits::quiet_nan().get_val(), + LIBC_NAMESPACE::asinpif16(FPBits::max_normal().get_val())); + EXPECT_MATH_ERRNO(EDOM); +} + +TEST_F(LlvmLibcAsinpif16Test, SymmetryProperty) { + // Test that asinpi(-x) = -asinpi(x) + constexpr float16 TEST_VALS[] = {0.1f16, 0.25f16, 0.5f16, 0.75f16, + 0.9f16, 0.99f16, 1.0f16}; + + for (float16 x : TEST_VALS) { + FPBits neg_x_bits(x); + neg_x_bits.set_sign(Sign::NEG); + float16 neg_x = neg_x_bits.get_val(); + + float16 pos_result = LIBC_NAMESPACE::asinpif16(x); + float16 neg_result = LIBC_NAMESPACE::asinpif16(neg_x); + + EXPECT_FP_EQ(pos_result, FPBits(neg_result).abs().get_val()); + } +} diff --git a/libc/utils/MPFRWrapper/MPCommon.cpp b/libc/utils/MPFRWrapper/MPCommon.cpp index 07339a06fff8..77039d4bf7df 100644 --- a/libc/utils/MPFRWrapper/MPCommon.cpp +++ b/libc/utils/MPFRWrapper/MPCommon.cpp @@ -105,6 +105,21 @@ MPFRNumber MPFRNumber::asinh() const { return result; } +MPFRNumber MPFRNumber::asinpi() const { + MPFRNumber result(*this); +#if MPFR_VERSION >= MPFR_VERSION_NUM(4, 2, 0) + mpfr_asinpi(result.value, value, mpfr_rounding); + return result; +#else + MPFRNumber value_asin(0.0, 1280); + mpfr_asin(value_asin.value, value, MPFR_RNDN); + MPFRNumber value_pi(0.0, 1280); + mpfr_const_pi(value_pi.value, MPFR_RNDN); + mpfr_div(result.value, value_asin.value, value_pi.value, mpfr_rounding); + return result; +#endif +} + MPFRNumber MPFRNumber::atan() const { MPFRNumber result(*this); mpfr_atan(result.value, value, mpfr_rounding); diff --git a/libc/utils/MPFRWrapper/MPCommon.h b/libc/utils/MPFRWrapper/MPCommon.h index 8bcc69c247a3..47d6293c06af 100644 --- a/libc/utils/MPFRWrapper/MPCommon.h +++ b/libc/utils/MPFRWrapper/MPCommon.h @@ -189,6 +189,7 @@ public: MPFRNumber add(const MPFRNumber &b) const; MPFRNumber asin() const; MPFRNumber asinh() const; + MPFRNumber asinpi() const; MPFRNumber atan() const; MPFRNumber atan2(const MPFRNumber &b); MPFRNumber atanh() const; diff --git a/libc/utils/MPFRWrapper/MPFRUtils.cpp b/libc/utils/MPFRWrapper/MPFRUtils.cpp index 3ab129a1a6fc..ff3bebb1c500 100644 --- a/libc/utils/MPFRWrapper/MPFRUtils.cpp +++ b/libc/utils/MPFRWrapper/MPFRUtils.cpp @@ -39,6 +39,8 @@ unary_operation(Operation op, InputType input, unsigned int precision, return mpfrInput.asin(); case Operation::Asinh: return mpfrInput.asinh(); + case Operation::Asinpi: + return mpfrInput.asinpi(); case Operation::Atan: return mpfrInput.atan(); case Operation::Atanh: diff --git a/libc/utils/MPFRWrapper/MPFRUtils.h b/libc/utils/MPFRWrapper/MPFRUtils.h index 45468c6cb19a..e805607328f6 100644 --- a/libc/utils/MPFRWrapper/MPFRUtils.h +++ b/libc/utils/MPFRWrapper/MPFRUtils.h @@ -30,6 +30,7 @@ enum class Operation : int { Acospi, Asin, Asinh, + Asinpi, Atan, Atanh, Cbrt, From 7610b1372955da55e3dc4e2eb1440f0304a56ac8 Mon Sep 17 00:00:00 2001 From: Shenghang Tsai Date: Mon, 18 Aug 2025 05:07:24 +0800 Subject: [PATCH 111/214] [MLIR] Split ExecutionEngine Initialization out of ctor into an explicit method call (#153524) Retry landing https://github.com/llvm/llvm-project/pull/153373 ## Major changes from previous attempt - remove the test in CAPI because no existing tests in CAPI deal with sanitizer exemptions - update `mlir/docs/Dialects/GPU.md` to reflect the new behavior: load GPU binary in global ctors, instead of loading them at call site. - skip the test on Aarch64 since we have an issue with initialization there --------- Co-authored-by: Mehdi Amini --- mlir/docs/Dialects/GPU.md | 17 ++- mlir/include/mlir-c/ExecutionEngine.h | 7 ++ .../mlir/ExecutionEngine/ExecutionEngine.h | 9 ++ .../Bindings/Python/ExecutionEngineModule.cpp | 13 +- .../CAPI/ExecutionEngine/ExecutionEngine.cpp | 9 +- mlir/lib/ExecutionEngine/ExecutionEngine.cpp | 20 ++-- mlir/lib/ExecutionEngine/JitRunner.cpp | 2 + .../mlir/_mlir_libs/_mlirExecutionEngine.pyi | 1 + mlir/test/CAPI/CMakeLists.txt | 7 ++ mlir/test/CAPI/global_constructors.c | 113 ++++++++++++++++++ mlir/test/CMakeLists.txt | 1 + mlir/test/lit.cfg.py | 1 + mlir/test/python/global_constructors.py | 72 +++++++++++ mlir/unittests/ExecutionEngine/Invoke.cpp | 51 ++++++++ 14 files changed, 310 insertions(+), 13 deletions(-) create mode 100644 mlir/test/CAPI/global_constructors.c create mode 100644 mlir/test/python/global_constructors.py diff --git a/mlir/docs/Dialects/GPU.md b/mlir/docs/Dialects/GPU.md index 94b053daa161..8d4d2ca3e574 100644 --- a/mlir/docs/Dialects/GPU.md +++ b/mlir/docs/Dialects/GPU.md @@ -193,10 +193,25 @@ llvm.func @foo() { // mlir-translate --mlir-to-llvmir: @binary_bin_cst = internal constant [6 x i8] c"AMDGPU", align 8 @binary_func_kernel_name = private unnamed_addr constant [7 x i8] c"func\00", align 1 +@binary_module = internal global ptr null +@llvm.global_ctors = appending global [1 x {i32, ptr, ptr}] [{i32 123, ptr @binary_load, ptr null}] +@llvm.global_dtors = appending global [1 x {i32, ptr, ptr}] [{i32 123, ptr @binary_unload, ptr null}] +define internal void @binary_load() section ".text.startup" { +entry: + %0 = call ptr @mgpuModuleLoad(ptr @binary_bin_cst) + store ptr %0, ptr @binary_module + ... +} +define internal void @binary_unload() section ".text.startup" { +entry: + %0 = load ptr, ptr @binary_module, align 8 + call void @mgpuModuleUnload(ptr %0) + ... +} ... define void @foo() { ... - %module = call ptr @mgpuModuleLoad(ptr @binary_bin_cst) + %module = load ptr, ptr @binary_module, align 8 %kernel = call ptr @mgpuModuleGetFunction(ptr %module, ptr @binary_func_kernel_name) call void @mgpuLaunchKernel(ptr %kernel, ...) ; Launch the kernel ... diff --git a/mlir/include/mlir-c/ExecutionEngine.h b/mlir/include/mlir-c/ExecutionEngine.h index 99cddc5c2598..1a58d68533f2 100644 --- a/mlir/include/mlir-c/ExecutionEngine.h +++ b/mlir/include/mlir-c/ExecutionEngine.h @@ -46,6 +46,13 @@ MLIR_CAPI_EXPORTED MlirExecutionEngine mlirExecutionEngineCreate( MlirModule op, int optLevel, int numPaths, const MlirStringRef *sharedLibPaths, bool enableObjectDump); +/// Initialize the ExecutionEngine. Global constructors specified by +/// `llvm.mlir.global_ctors` will be run. One common scenario is that kernel +/// binary compiled from `gpu.module` gets loaded during initialization. Make +/// sure all symbols are resolvable before initialization by calling +/// `mlirExecutionEngineRegisterSymbol` or including shared libraries. +MLIR_CAPI_EXPORTED void mlirExecutionEngineInitialize(MlirExecutionEngine jit); + /// Destroy an ExecutionEngine instance. MLIR_CAPI_EXPORTED void mlirExecutionEngineDestroy(MlirExecutionEngine jit); diff --git a/mlir/include/mlir/ExecutionEngine/ExecutionEngine.h b/mlir/include/mlir/ExecutionEngine/ExecutionEngine.h index 96ccebcd5685..5bd71d68d253 100644 --- a/mlir/include/mlir/ExecutionEngine/ExecutionEngine.h +++ b/mlir/include/mlir/ExecutionEngine/ExecutionEngine.h @@ -227,6 +227,13 @@ public: llvm::function_ref symbolMap); + /// Initialize the ExecutionEngine. Global constructors specified by + /// `llvm.mlir.global_ctors` will be run. One common scenario is that kernel + /// binary compiled from `gpu.module` gets loaded during initialization. Make + /// sure all symbols are resolvable before initialization by calling + /// `registerSymbols` or including shared libraries. + void initialize(); + private: /// Ordering of llvmContext and jit is important for destruction purposes: the /// jit must be destroyed before the context. @@ -250,6 +257,8 @@ private: /// Destroy functions in the libraries loaded by the ExecutionEngine that are /// called when this ExecutionEngine is destructed. SmallVector destroyFns; + + bool isInitialized = false; }; } // namespace mlir diff --git a/mlir/lib/Bindings/Python/ExecutionEngineModule.cpp b/mlir/lib/Bindings/Python/ExecutionEngineModule.cpp index 81dada355362..4885d62c56e6 100644 --- a/mlir/lib/Bindings/Python/ExecutionEngineModule.cpp +++ b/mlir/lib/Bindings/Python/ExecutionEngineModule.cpp @@ -7,8 +7,8 @@ //===----------------------------------------------------------------------===// #include "mlir-c/ExecutionEngine.h" -#include "mlir/Bindings/Python/NanobindAdaptors.h" #include "mlir/Bindings/Python/Nanobind.h" +#include "mlir/Bindings/Python/NanobindAdaptors.h" namespace nb = nanobind; using namespace mlir; @@ -124,6 +124,17 @@ NB_MODULE(_mlirExecutionEngine, m) { }, nb::arg("name"), nb::arg("callback"), "Register `callback` as the runtime symbol `name`.") + .def( + "initialize", + [](PyExecutionEngine &executionEngine) { + mlirExecutionEngineInitialize(executionEngine.get()); + }, + "Initialize the ExecutionEngine. Global constructors specified by " + "`llvm.mlir.global_ctors` will be run. One common scenario is that " + "kernel binary compiled from `gpu.module` gets loaded during " + "initialization. Make sure all symbols are resolvable before " + "initialization by calling `register_runtime` or including " + "shared libraries.") .def( "dump_to_object_file", [](PyExecutionEngine &executionEngine, const std::string &fileName) { diff --git a/mlir/lib/CAPI/ExecutionEngine/ExecutionEngine.cpp b/mlir/lib/CAPI/ExecutionEngine/ExecutionEngine.cpp index 306cebd236be..2dbb993b1640 100644 --- a/mlir/lib/CAPI/ExecutionEngine/ExecutionEngine.cpp +++ b/mlir/lib/CAPI/ExecutionEngine/ExecutionEngine.cpp @@ -68,6 +68,10 @@ mlirExecutionEngineCreate(MlirModule op, int optLevel, int numPaths, return wrap(jitOrError->release()); } +extern "C" void mlirExecutionEngineInitialize(MlirExecutionEngine jit) { + unwrap(jit)->initialize(); +} + extern "C" void mlirExecutionEngineDestroy(MlirExecutionEngine jit) { delete (unwrap(jit)); } @@ -106,9 +110,8 @@ extern "C" void mlirExecutionEngineRegisterSymbol(MlirExecutionEngine jit, void *sym) { unwrap(jit)->registerSymbols([&](llvm::orc::MangleAndInterner interner) { llvm::orc::SymbolMap symbolMap; - symbolMap[interner(unwrap(name))] = - { llvm::orc::ExecutorAddr::fromPtr(sym), - llvm::JITSymbolFlags::Exported }; + symbolMap[interner(unwrap(name))] = {llvm::orc::ExecutorAddr::fromPtr(sym), + llvm::JITSymbolFlags::Exported}; return symbolMap; }); } diff --git a/mlir/lib/ExecutionEngine/ExecutionEngine.cpp b/mlir/lib/ExecutionEngine/ExecutionEngine.cpp index f704fbfbe8ff..52162a43aeae 100644 --- a/mlir/lib/ExecutionEngine/ExecutionEngine.cpp +++ b/mlir/lib/ExecutionEngine/ExecutionEngine.cpp @@ -106,7 +106,7 @@ void ExecutionEngine::dumpToObjectFile(StringRef filename) { } // Compilation is lazy and it doesn't populate object cache unless requested. // In case object dump is requested before cache is populated, we need to - // force compilation manually. + // force compilation manually. if (cache->isEmpty()) { for (std::string &functionName : functionNames) { auto result = lookupPacked(functionName); @@ -400,13 +400,6 @@ ExecutionEngine::create(Operation *m, const ExecutionEngineOptions &options, return symbolMap; }; engine->registerSymbols(runtimeSymbolMap); - - // Execute the global constructors from the module being processed. - // TODO: Allow JIT initialize for AArch64. Currently there's a bug causing a - // crash for AArch64 see related issue #71963. - if (!engine->jit->getTargetTriple().isAArch64()) - cantFail(engine->jit->initialize(engine->jit->getMainJITDylib())); - return std::move(engine); } @@ -442,6 +435,7 @@ Expected ExecutionEngine::lookup(StringRef name) const { Error ExecutionEngine::invokePacked(StringRef name, MutableArrayRef args) { + initialize(); auto expectedFPtr = lookupPacked(name); if (!expectedFPtr) return expectedFPtr.takeError(); @@ -451,3 +445,13 @@ Error ExecutionEngine::invokePacked(StringRef name, return Error::success(); } + +void ExecutionEngine::initialize() { + if (isInitialized) + return; + // TODO: Allow JIT initialize for AArch64. Currently there's a bug causing a + // crash for AArch64 see related issue #71963. + if (!jit->getTargetTriple().isAArch64()) + cantFail(jit->initialize(jit->getMainJITDylib())); + isInitialized = true; +} diff --git a/mlir/lib/ExecutionEngine/JitRunner.cpp b/mlir/lib/ExecutionEngine/JitRunner.cpp index 2107df37d199..0ada4cc96570 100644 --- a/mlir/lib/ExecutionEngine/JitRunner.cpp +++ b/mlir/lib/ExecutionEngine/JitRunner.cpp @@ -202,6 +202,8 @@ compileAndExecute(Options &options, Operation *module, StringRef entryPoint, auto engine = std::move(*expectedEngine); + engine->initialize(); + auto expectedFPtr = engine->lookupPacked(entryPoint); if (!expectedFPtr) return expectedFPtr.takeError(); diff --git a/mlir/python/mlir/_mlir_libs/_mlirExecutionEngine.pyi b/mlir/python/mlir/_mlir_libs/_mlirExecutionEngine.pyi index 58d453d2b2d3..4b82c7848929 100644 --- a/mlir/python/mlir/_mlir_libs/_mlirExecutionEngine.pyi +++ b/mlir/python/mlir/_mlir_libs/_mlirExecutionEngine.pyi @@ -19,5 +19,6 @@ class ExecutionEngine: def dump_to_object_file(self, file_name: str) -> None: ... def raw_lookup(self, func_name: str) -> int: ... def raw_register_runtime(self, name: str, callback: object) -> None: ... + def init() -> None: ... @property def _CAPIPtr(self) -> object: ... diff --git a/mlir/test/CAPI/CMakeLists.txt b/mlir/test/CAPI/CMakeLists.txt index a7f9eb9b4efe..d45142510a49 100644 --- a/mlir/test/CAPI/CMakeLists.txt +++ b/mlir/test/CAPI/CMakeLists.txt @@ -30,6 +30,13 @@ if(MLIR_ENABLE_EXECUTION_ENGINE) MLIRCAPIConversion MLIRCAPIExecutionEngine MLIRCAPIRegisterEverything +) + _add_capi_test_executable(mlir-capi-global-constructors-test + global_constructors.c + LINK_LIBS PRIVATE + MLIRCAPIConversion + MLIRCAPIExecutionEngine + MLIRCAPIRegisterEverything ) endif() diff --git a/mlir/test/CAPI/global_constructors.c b/mlir/test/CAPI/global_constructors.c new file mode 100644 index 000000000000..bd2fe1416f0d --- /dev/null +++ b/mlir/test/CAPI/global_constructors.c @@ -0,0 +1,113 @@ +//===- global_constructors.c - Test JIT with the global constructors ------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM +// Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +// UNSUPPORTED: target=aarch64{{.*}}, target=arm64{{.*}} +/* RUN: mlir-capi-global-constructors-test 2>&1 | FileCheck %s + */ +/* REQUIRES: host-supports-jit + */ + +#include "mlir-c/Conversion.h" +#include "mlir-c/ExecutionEngine.h" +#include "mlir-c/IR.h" +#include "mlir-c/RegisterEverything.h" + +#include +#include +#include +#include +#include + +static void registerAllUpstreamDialects(MlirContext ctx) { + MlirDialectRegistry registry = mlirDialectRegistryCreate(); + mlirRegisterAllDialects(registry); + mlirContextAppendDialectRegistry(ctx, registry); + mlirDialectRegistryDestroy(registry); +} + +void lowerModuleToLLVM(MlirContext ctx, MlirModule module) { + MlirPassManager pm = mlirPassManagerCreate(ctx); + MlirOpPassManager opm = mlirPassManagerGetNestedUnder( + pm, mlirStringRefCreateFromCString("func.func")); + mlirPassManagerAddOwnedPass(pm, mlirCreateConversionConvertFuncToLLVMPass()); + mlirOpPassManagerAddOwnedPass( + opm, mlirCreateConversionArithToLLVMConversionPass()); + MlirLogicalResult status = + mlirPassManagerRunOnOp(pm, mlirModuleGetOperation(module)); + if (mlirLogicalResultIsFailure(status)) { + fprintf(stderr, "Unexpected failure running pass pipeline\n"); + exit(2); + } + mlirPassManagerDestroy(pm); +} + +// Helper variable to track callback invocations +static int initCnt = 0; + +// Callback function that will be called during JIT initialization +static void initCallback(void) { initCnt += 1; } + +// CHECK-LABEL: Running test 'testGlobalCtorJitCallback' +void testGlobalCtorJitCallback(void) { + MlirContext ctx = mlirContextCreate(); + registerAllUpstreamDialects(ctx); + + // Create module with global constructor that calls our callback + MlirModule module = mlirModuleCreateParse( + ctx, mlirStringRefCreateFromCString( + // clang-format off +"module { \n" +" llvm.mlir.global_ctors ctors = [@ctor], priorities = [0 : i32], data = [#llvm.zero] \n" +" llvm.func @ctor() { \n" +" func.call @init_callback() : () -> () \n" +" llvm.return \n" +" } \n" +" func.func private @init_callback() attributes { llvm.emit_c_interface } \n" +"} \n" + // clang-format on + )); + + lowerModuleToLLVM(ctx, module); + mlirRegisterAllLLVMTranslations(ctx); + + // Create execution engine with initialization disabled + MlirExecutionEngine jit = mlirExecutionEngineCreate( + module, /*optLevel=*/2, /*numPaths=*/0, /*sharedLibPaths=*/NULL, + /*enableObjectDump=*/false); + + if (mlirExecutionEngineIsNull(jit)) { + fprintf(stderr, "Execution engine creation failed"); + exit(2); + } + + // Register callback symbol before initialization + mlirExecutionEngineRegisterSymbol( + jit, mlirStringRefCreateFromCString("_mlir_ciface_init_callback"), + (void *)(uintptr_t)initCallback); + + mlirExecutionEngineInitialize(jit); + + // CHECK: Init count: 1 + printf("Init count: %d\n", initCnt); + + mlirExecutionEngineDestroy(jit); + mlirModuleDestroy(module); + mlirContextDestroy(ctx); +} + +int main(void) { + +#define _STRINGIFY(x) #x +#define STRINGIFY(x) _STRINGIFY(x) +#define TEST(test) \ + printf("Running test '" STRINGIFY(test) "'\n"); \ + test(); + TEST(testGlobalCtorJitCallback); + return 0; +} diff --git a/mlir/test/CMakeLists.txt b/mlir/test/CMakeLists.txt index 016d5e6f6e91..7736723ce2ae 100644 --- a/mlir/test/CMakeLists.txt +++ b/mlir/test/CMakeLists.txt @@ -141,6 +141,7 @@ if(LLVM_ENABLE_PIC AND TARGET ${LLVM_NATIVE_ARCH}) llc mlir_async_runtime mlir-capi-execution-engine-test + mlir-capi-global-constructors-test mlir_c_runner_utils mlir_runner_utils mlir_float16_utils diff --git a/mlir/test/lit.cfg.py b/mlir/test/lit.cfg.py index ba7eeeed8ef3..5e9347d784b3 100644 --- a/mlir/test/lit.cfg.py +++ b/mlir/test/lit.cfg.py @@ -190,6 +190,7 @@ tools = [ "mlir-translate", "mlir-lsp-server", "mlir-capi-execution-engine-test", + "mlir-capi-global-constructors-test", "mlir-capi-ir-test", "mlir-capi-irdl-test", "mlir-capi-llvm-test", diff --git a/mlir/test/python/global_constructors.py b/mlir/test/python/global_constructors.py new file mode 100644 index 000000000000..5020c00344a3 --- /dev/null +++ b/mlir/test/python/global_constructors.py @@ -0,0 +1,72 @@ +# UNSUPPORTED: target=aarch64{{.*}}, target=arm64{{.*}} +# RUN: %PYTHON %s 2>&1 | FileCheck %s +# REQUIRES: host-supports-jit +import gc, sys, os, tempfile +from mlir.ir import * +from mlir.passmanager import * +from mlir.execution_engine import * +from mlir.runtime import * + + +# Log everything to stderr and flush so that we have a unified stream to match +# errors/info emitted by MLIR to stderr. +def log(*args): + print(*args, file=sys.stderr) + sys.stderr.flush() + + +def run(f): + log("\nTEST:", f.__name__) + f() + gc.collect() + assert Context._get_live_count() == 0 + + +def lowerToLLVM(module): + pm = PassManager.parse( + "builtin.module(convert-func-to-llvm,reconcile-unrealized-casts)" + ) + pm.run(module.operation) + return module + + +# Test JIT callback in global constructor +# CHECK-LABEL: TEST: testJITCallbackInGlobalCtor +def testJITCallbackInGlobalCtor(): + init_cnt = 0 + + @ctypes.CFUNCTYPE(None) + def initCallback(): + nonlocal init_cnt + init_cnt += 1 + + with Context(): + module = Module.parse( + r""" +llvm.mlir.global_ctors ctors = [@ctor], priorities = [0 : i32], data = [#llvm.zero] +llvm.func @ctor() { + func.call @init_callback() : () -> () + llvm.return +} +func.func private @init_callback() attributes { llvm.emit_c_interface } + """ + ) + + # Setup execution engine + execution_engine = ExecutionEngine(lowerToLLVM(module)) + + # Validate initialization hasn't run yet + assert init_cnt == 0 + + # # Register callback + execution_engine.register_runtime("init_callback", initCallback) + + # # Initialize and verify + execution_engine.initialize() + assert init_cnt == 1 + # # Second initialization should be no-op + execution_engine.initialize() + assert init_cnt == 1 + + +run(testJITCallbackInGlobalCtor) diff --git a/mlir/unittests/ExecutionEngine/Invoke.cpp b/mlir/unittests/ExecutionEngine/Invoke.cpp index 312b10f28143..b9a46c5ce942 100644 --- a/mlir/unittests/ExecutionEngine/Invoke.cpp +++ b/mlir/unittests/ExecutionEngine/Invoke.cpp @@ -322,4 +322,55 @@ TEST(NativeMemRefJit, MAYBE_JITCallback) { ASSERT_EQ(elt, coefficient * count++); } +static int initCnt = 0; +// A helper function that will be called during the JIT's initialization. +static void initCallback() { initCnt += 1; } + +TEST(MLIRExecutionEngine, MAYBE_JITCallbackInGlobalCtor) { + auto tmBuilderOrError = llvm::orc::JITTargetMachineBuilder::detectHost(); + ASSERT_TRUE(!!tmBuilderOrError); + if (tmBuilderOrError->getTargetTriple().isAArch64()) { + GTEST_SKIP() << "Skipping global ctor initialization test on Aarch64 " + "because of bug #71963"; + return; + } + std::string moduleStr = R"mlir( + llvm.mlir.global_ctors ctors = [@ctor], priorities = [0 : i32], data = [#llvm.zero] + llvm.func @ctor() { + func.call @init_callback() : () -> () + llvm.return + } + func.func private @init_callback() attributes { llvm.emit_c_interface } + )mlir"; + + DialectRegistry registry; + registerAllDialects(registry); + registerBuiltinDialectTranslation(registry); + registerLLVMDialectTranslation(registry); + MLIRContext context(registry); + auto module = parseSourceString(moduleStr, &context); + ASSERT_TRUE(!!module); + ASSERT_TRUE(succeeded(lowerToLLVMDialect(*module))); + ExecutionEngineOptions jitOptions; + auto jitOrError = ExecutionEngine::create(*module, jitOptions); + ASSERT_TRUE(!!jitOrError); + // validate initialization is not run on construction + ASSERT_EQ(initCnt, 0); + auto jit = std::move(jitOrError.get()); + // Define any extra symbols so they're available at initialization. + jit->registerSymbols([&](llvm::orc::MangleAndInterner interner) { + llvm::orc::SymbolMap symbolMap; + symbolMap[interner("_mlir_ciface_init_callback")] = { + llvm::orc::ExecutorAddr::fromPtr(initCallback), + llvm::JITSymbolFlags::Exported}; + return symbolMap; + }); + jit->initialize(); + // validate the side effect of initialization + ASSERT_EQ(initCnt, 1); + // next initialization should be noop + jit->initialize(); + ASSERT_EQ(initCnt, 1); +} + #endif // _WIN32 From dff8dac9dca2b1bca3814e6240c41d9cbda42b71 Mon Sep 17 00:00:00 2001 From: Baranov Victor Date: Mon, 18 Aug 2025 00:18:32 +0300 Subject: [PATCH 112/214] [clang-tidy][docs] Add description of "clang-diagnostic-error" (#153870) This helps better distinguish warnings that could be disabled via `.clang-tidy` config (like `clang-diagnostic-literal-conversion`) from errors that could not be suppressed at all (like `clang-diagnostic-error`) because it's a hard compiler error. --- clang-tools-extra/docs/clang-tidy/index.rst | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/clang-tools-extra/docs/clang-tidy/index.rst b/clang-tools-extra/docs/clang-tidy/index.rst index e8ce903fcb07..e0cf5ef720b0 100644 --- a/clang-tools-extra/docs/clang-tidy/index.rst +++ b/clang-tools-extra/docs/clang-tidy/index.rst @@ -111,6 +111,13 @@ Diagnostics which have a corresponding warning option, are named ``-Wliteral-conversion`` will be reported with check name ``clang-diagnostic-literal-conversion``. +Clang compiler errors (such as syntax errors, semantic errors, or other failures +that prevent Clang from compiling the code) are reported with the check name +``clang-diagnostic-error``. These represent fundamental compilation failures that +must be fixed before :program:`clang-tidy` can perform its analysis. Unlike other +diagnostics, ``clang-diagnostic-error`` cannot be disabled, as :program:`clang-tidy` +requires valid code to function. + The ``-fix`` flag instructs :program:`clang-tidy` to fix found errors if supported by corresponding checks. From 12769aa7283b385a0695372ff13063189a9d1686 Mon Sep 17 00:00:00 2001 From: Abhinav Gaba Date: Sun, 17 Aug 2025 15:17:04 -0700 Subject: [PATCH 113/214] [Offload] Introduce ATTACH map-type support for pointer attachment. (#149036) This patch introduces libomptarget support for the ATTACH map-type, which can be used to implement OpenMP conditional compliant pointer attachment, based on whether the pointer/pointee is newly mapped on a given construct. For example, for the following: ```c int *p; #pragma omp target enter data map(p[1:10]) ``` The following maps can be emitted by clang: ``` (A) &p[0], &p[1], 10 * sizeof(p[1]), TO | FROM &p, &p[1], sizeof(p), ATTACH ``` Without this map-type, these two possible maps could be emitted by clang: ``` (B) &p[0], &p[1], 10 * sizeof(p[1]), TO | FROM (C) &p, &p[1], 10 * sizeof(p[1]), TO | FROM | PTR_AND_OBJ ```` (B) does not perform any pointer attachment, while (C) also maps the pointer p, which are both incorrect. In terms of implementation, maps with the ATTACH map-type are handled after all other maps have been processed, as it requires knowledge of which new allocations happened as part of the construct. As per OpenMP 5.0, an attachment should happen only when either the pointer or the pointee was newly mapped while handling the construct. Maps with ATTACH map-type-bit do not increase/decrease the ref-count. With OpenMP 6.1, `attach(always/never)` can be used to force/prevent attachment. For `attach(always)`, the compiler will insert the ALWAYS map-type, which would let libomptarget bypass the check about one of the pointer/pointee being new. With `attach(never)`, the ATTACH map will not be emitted at all. The size argument of the ATTACH map-type can specify values greater than `sizeof(void*)` which can be used to support pointer attachment on Fortran descriptors. Note that this also requires shadow-pointer tracking to also support them. That has not been implemented in this patch. This was worked upon in coordination with Ravi Narayanaswamy, who has since retired. Happy retirement, Ravi! --------- Co-authored-by: Alex Duran --- offload/include/OpenMP/Mapping.h | 40 ++- offload/include/device.h | 4 + offload/include/omptarget.h | 3 + offload/libomptarget/device.cpp | 4 + offload/libomptarget/interface.cpp | 19 +- offload/libomptarget/omptarget.cpp | 438 ++++++++++++++++++++++++++--- 6 files changed, 471 insertions(+), 37 deletions(-) diff --git a/offload/include/OpenMP/Mapping.h b/offload/include/OpenMP/Mapping.h index b9f5c1658293..93c1e56905ae 100644 --- a/offload/include/OpenMP/Mapping.h +++ b/offload/include/OpenMP/Mapping.h @@ -417,12 +417,42 @@ struct MapperComponentsTy { typedef void (*MapperFuncPtrTy)(void *, void *, void *, int64_t, int64_t, void *); +/// Structure to store information about a single ATTACH map entry. +struct AttachMapInfo { + void *PointerBase; + void *PointeeBegin; + int64_t PointerSize; + int64_t MapType; + map_var_info_t Pointername; + + AttachMapInfo(void *PointerBase, void *PointeeBegin, int64_t Size, + int64_t Type, map_var_info_t Name) + : PointerBase(PointerBase), PointeeBegin(PointeeBegin), PointerSize(Size), + MapType(Type), Pointername(Name) {} +}; + +/// Structure to track ATTACH entries and new allocations across recursive calls +/// (for handling mappers) to targetDataBegin for a given construct. +struct AttachInfoTy { + /// ATTACH map entries for deferred processing. + llvm::SmallVector AttachEntries; + + /// Key: host pointer, Value: allocation size. + llvm::DenseMap NewAllocations; + + AttachInfoTy() = default; + + // Delete copy constructor and copy assignment operator to prevent copying + AttachInfoTy(const AttachInfoTy &) = delete; + AttachInfoTy &operator=(const AttachInfoTy &) = delete; +}; + // Function pointer type for targetData* functions (targetDataBegin, // targetDataEnd and targetDataUpdate). typedef int (*TargetDataFuncPtrTy)(ident_t *, DeviceTy &, int32_t, void **, void **, int64_t *, int64_t *, map_var_info_t *, void **, AsyncInfoTy &, - bool); + AttachInfoTy *, bool); void dumpTargetPointerMappings(const ident_t *Loc, DeviceTy &Device, bool toStdOut = false); @@ -431,20 +461,26 @@ int targetDataBegin(ident_t *Loc, DeviceTy &Device, int32_t ArgNum, void **ArgsBase, void **Args, int64_t *ArgSizes, int64_t *ArgTypes, map_var_info_t *ArgNames, void **ArgMappers, AsyncInfoTy &AsyncInfo, + AttachInfoTy *AttachInfo = nullptr, bool FromMapper = false); int targetDataEnd(ident_t *Loc, DeviceTy &Device, int32_t ArgNum, void **ArgBases, void **Args, int64_t *ArgSizes, int64_t *ArgTypes, map_var_info_t *ArgNames, void **ArgMappers, AsyncInfoTy &AsyncInfo, - bool FromMapper = false); + AttachInfoTy *AttachInfo = nullptr, bool FromMapper = false); int targetDataUpdate(ident_t *Loc, DeviceTy &Device, int32_t ArgNum, void **ArgsBase, void **Args, int64_t *ArgSizes, int64_t *ArgTypes, map_var_info_t *ArgNames, void **ArgMappers, AsyncInfoTy &AsyncInfo, + AttachInfoTy *AttachInfo = nullptr, bool FromMapper = false); +// Process deferred ATTACH map entries collected during targetDataBegin. +int processAttachEntries(DeviceTy &Device, AttachInfoTy &AttachInfo, + AsyncInfoTy &AsyncInfo); + struct MappingInfoTy { MappingInfoTy(DeviceTy &Device) : Device(Device) {} diff --git a/offload/include/device.h b/offload/include/device.h index f4b10abbaa3f..1e85bb1876c8 100644 --- a/offload/include/device.h +++ b/offload/include/device.h @@ -98,6 +98,10 @@ struct DeviceTy { int32_t dataExchange(void *SrcPtr, DeviceTy &DstDev, void *DstPtr, int64_t Size, AsyncInfoTy &AsyncInfo); + // Insert a data fence between previous data operations and the following + // operations if necessary for the device. + int32_t dataFence(AsyncInfoTy &AsyncInfo); + /// Notify the plugin about a new mapping starting at the host address /// \p HstPtr and \p Size bytes. int32_t notifyDataMapped(void *HstPtr, int64_t Size); diff --git a/offload/include/omptarget.h b/offload/include/omptarget.h index 625bbaa0db85..8fd722bb1502 100644 --- a/offload/include/omptarget.h +++ b/offload/include/omptarget.h @@ -77,6 +77,9 @@ enum tgt_map_type { // the structured region // This is an OpenMP extension for the sake of OpenACC support. OMP_TGT_MAPTYPE_OMPX_HOLD = 0x2000, + // Attach pointer and pointee, after processing all other maps. + // Applicable to map-entering directives. Does not change ref-count. + OMP_TGT_MAPTYPE_ATTACH = 0x4000, // descriptor for non-contiguous target-update OMP_TGT_MAPTYPE_NON_CONTIG = 0x100000000000, // member of struct, member given by [16 MSBs] - 1 diff --git a/offload/libomptarget/device.cpp b/offload/libomptarget/device.cpp index f88e30ae9e76..6585286bf428 100644 --- a/offload/libomptarget/device.cpp +++ b/offload/libomptarget/device.cpp @@ -191,6 +191,10 @@ int32_t DeviceTy::dataExchange(void *SrcPtr, DeviceTy &DstDev, void *DstPtr, DstPtr, Size, AsyncInfo); } +int32_t DeviceTy::dataFence(AsyncInfoTy &AsyncInfo) { + return RTL->data_fence(RTLDeviceID, AsyncInfo); +} + int32_t DeviceTy::notifyDataMapped(void *HstPtr, int64_t Size) { DP("Notifying about new mapping: HstPtr=" DPxMOD ", Size=%" PRId64 "\n", DPxPTR(HstPtr), Size); diff --git a/offload/libomptarget/interface.cpp b/offload/libomptarget/interface.cpp index e9b148d8a260..fe1828976590 100644 --- a/offload/libomptarget/interface.cpp +++ b/offload/libomptarget/interface.cpp @@ -30,6 +30,7 @@ #include #include #include +#include #ifdef OMPT_SUPPORT using namespace llvm::omp::target::ompt; @@ -165,12 +166,24 @@ targetData(ident_t *Loc, int64_t DeviceId, int32_t ArgNum, void **ArgsBase, OMPT_GET_RETURN_ADDRESS);) int Rc = OFFLOAD_SUCCESS; + + // Only allocate AttachInfo for targetDataBegin + std::unique_ptr AttachInfo; + if (TargetDataFunction == targetDataBegin) + AttachInfo = std::make_unique(); + Rc = TargetDataFunction(Loc, *DeviceOrErr, ArgNum, ArgsBase, Args, ArgSizes, ArgTypes, ArgNames, ArgMappers, AsyncInfo, - false /*FromMapper=*/); + AttachInfo.get(), /*FromMapper=*/false); - if (Rc == OFFLOAD_SUCCESS) - Rc = AsyncInfo.synchronize(); + if (Rc == OFFLOAD_SUCCESS) { + // Process deferred ATTACH entries BEFORE synchronization + if (AttachInfo && !AttachInfo->AttachEntries.empty()) + Rc = processAttachEntries(*DeviceOrErr, *AttachInfo, AsyncInfo); + + if (Rc == OFFLOAD_SUCCESS) + Rc = AsyncInfo.synchronize(); + } handleTargetOutcome(Rc == OFFLOAD_SUCCESS, Loc); } diff --git a/offload/libomptarget/omptarget.cpp b/offload/libomptarget/omptarget.cpp index 5b25d955dd32..32e89cc75efc 100644 --- a/offload/libomptarget/omptarget.cpp +++ b/offload/libomptarget/omptarget.cpp @@ -293,7 +293,8 @@ void targetUnlockExplicit(void *HostPtr, int DeviceNum, const char *Name) { int targetDataMapper(ident_t *Loc, DeviceTy &Device, void *ArgBase, void *Arg, int64_t ArgSize, int64_t ArgType, map_var_info_t ArgNames, void *ArgMapper, AsyncInfoTy &AsyncInfo, - TargetDataFuncPtrTy TargetDataFunction) { + TargetDataFuncPtrTy TargetDataFunction, + AttachInfoTy *AttachInfo = nullptr) { DP("Calling the mapper function " DPxMOD "\n", DPxPTR(ArgMapper)); // The mapper function fills up Components. @@ -324,17 +325,178 @@ int targetDataMapper(ident_t *Loc, DeviceTy &Device, void *ArgBase, void *Arg, MapperArgsBase.data(), MapperArgs.data(), MapperArgSizes.data(), MapperArgTypes.data(), MapperArgNames.data(), /*arg_mappers*/ nullptr, - AsyncInfo, /*FromMapper=*/true); + AsyncInfo, AttachInfo, /*FromMapper=*/true); return Rc; } +/// Utility function to perform a pointer attachment operation. +/// +/// For something like: +/// ```cpp +/// int *p; +/// ... +/// #pragma omp target enter data map(to:p[10:10]) +/// ``` +/// +/// for which the attachment operation gets represented using: +/// ``` +/// &p, &p[10], sizeof(p), ATTACH +/// ``` +/// +/// (Hst|Tgt)PtrAddr represents &p +/// (Hst|Tgt)PteeBase represents &p[0] +/// (Hst|Tgt)PteeBegin represents &p[10] +/// +/// This function first computes the expected TgtPteeBase using: +/// `