Koakuma 118d4234ac
[SPARC] Prefer RDPC over CALL to implement GETPCX for 64-bit target
On 64-bit target, prefer using RDPC over CALL to get the value of %pc.
This is faster on modern processors (Niagara T1 and newer) and avoids
polluting the processor's predictor state.

The old behavior of using a fake CALL is still done when tuning for
classic UltraSPARC processors, since RDPC is much slower there.

A quick pgbench test on a SPARC T4 shows about 2% speedup on SELECT
loads, and about 7% speedup on INSERT/UPDATE loads.

Reviewed By: @s-barannikov

Github PR: https://github.com/llvm/llvm-project/pull/78280
2024-01-16 22:46:39 +07:00

52 lines
1.8 KiB
LLVM

; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -relocation-model=pic -mtriple=sparc | FileCheck --check-prefix=SPARC %s
; RUN: llc < %s -relocation-model=pic -mtriple=sparcv9 | FileCheck --check-prefix=SPARC64 %s
;; SPARC32 and SPARC64 for classic UltraSPARCs implement GETPCX
;; with a fake `call`.
;; All other SPARC64 targets implement it with `rd %pc, %o7`.
;; Need to do the tests in separate files because apparently `tune-cpu`
;; attribute applies to the entire file at once.
@value = external global i32
define i32 @testCall() nounwind #0 {
; SPARC-LABEL: testCall:
; SPARC: ! %bb.0:
; SPARC-NEXT: save %sp, -96, %sp
; SPARC-NEXT: .Ltmp0:
; SPARC-NEXT: call .Ltmp1
; SPARC-NEXT: .Ltmp2:
; SPARC-NEXT: sethi %hi(_GLOBAL_OFFSET_TABLE_+(.Ltmp2-.Ltmp0)), %i0
; SPARC-NEXT: .Ltmp1:
; SPARC-NEXT: or %i0, %lo(_GLOBAL_OFFSET_TABLE_+(.Ltmp1-.Ltmp0)), %i0
; SPARC-NEXT: add %i0, %o7, %i0
; SPARC-NEXT: sethi %hi(value), %i1
; SPARC-NEXT: add %i1, %lo(value), %i1
; SPARC-NEXT: ld [%i0+%i1], %i0
; SPARC-NEXT: ld [%i0], %i0
; SPARC-NEXT: ret
; SPARC-NEXT: restore
;
; SPARC64-LABEL: testCall:
; SPARC64: ! %bb.0:
; SPARC64-NEXT: save %sp, -128, %sp
; SPARC64-NEXT: .Ltmp0:
; SPARC64-NEXT: call .Ltmp1
; SPARC64-NEXT: .Ltmp2:
; SPARC64-NEXT: sethi %hi(_GLOBAL_OFFSET_TABLE_+(.Ltmp2-.Ltmp0)), %i0
; SPARC64-NEXT: .Ltmp1:
; SPARC64-NEXT: or %i0, %lo(_GLOBAL_OFFSET_TABLE_+(.Ltmp1-.Ltmp0)), %i0
; SPARC64-NEXT: add %i0, %o7, %i0
; SPARC64-NEXT: sethi %hi(value), %i1
; SPARC64-NEXT: add %i1, %lo(value), %i1
; SPARC64-NEXT: ldx [%i0+%i1], %i0
; SPARC64-NEXT: ld [%i0], %i0
; SPARC64-NEXT: ret
; SPARC64-NEXT: restore
%1 = load i32, ptr @value
ret i32 %1
}
attributes #0 = { "tune-cpu"="ultrasparc" }