llvm-project/llvm/test/CodeGen/AArch64/cgdata-merge-no-params.ll
Kyungwoo Lee fe69a20cc1 Reland [CGData][GMF] Skip No Params (#116548)
This update follows up on change #112671 and is mostly a NFC, with the following exceptions:
  - Introduced `-global-merging-skip-no-params` to bypass merging when no parameters are required.
  - Parameter count is now calculated based on the unique hash count.
  - Added `-global-merging-inst-overhead` to adjust the instruction overhead, reflecting the machine instruction size.
  - Costs and benefits are now computed using the double data type. Since the finalization process occurs offline, this should not significantly impact build time.
  - Moved a sorting operation outside of the loop.

This is a patch for
https://discourse.llvm.org/t/rfc-global-function-merging/82608.
2024-11-25 13:55:02 -08:00

40 lines
1.5 KiB
LLVM

; This test verifies whether two identical functions, f1 and f2, can be merged
; locally using the global merge function.
; The functions, f1.Tgm and f2.Tgm, will be folded by the linker through
; Identical Code Folding (ICF).
; While identical functions can already be folded by the linker, creating this
; canonical form can be beneficial in downstream passes. This merging process
; can be controlled by the -global-merging-skip-no-params option.
; RUN: llc -mtriple=arm64-apple-darwin -enable-global-merge-func=true -global-merging-skip-no-params=false < %s | FileCheck %s --check-prefix=MERGE
; RUN: llc -mtriple=arm64-apple-darwin -enable-global-merge-func=true -global-merging-skip-no-params=true < %s | FileCheck %s --implicit-check-not=".Tgm"
; MERGE: _f1.Tgm
; MERGE: _f2.Tgm
@g = external local_unnamed_addr global [0 x i32], align 4
@g1 = external global i32, align 4
@g2 = external global i32, align 4
define i32 @f1(i32 %a) {
entry:
%idxprom = sext i32 %a to i64
%arrayidx = getelementptr inbounds [0 x i32], [0 x i32]* @g, i64 0, i64 %idxprom
%0 = load i32, i32* %arrayidx, align 4
%1 = load volatile i32, i32* @g1, align 4
%mul = mul nsw i32 %1, %0
%add = add nsw i32 %mul, 1
ret i32 %add
}
define i32 @f2(i32 %a) {
entry:
%idxprom = sext i32 %a to i64
%arrayidx = getelementptr inbounds [0 x i32], [0 x i32]* @g, i64 0, i64 %idxprom
%0 = load i32, i32* %arrayidx, align 4
%1 = load volatile i32, i32* @g1, align 4
%mul = mul nsw i32 %1, %0
%add = add nsw i32 %mul, 1
ret i32 %add
}