
This patch utilizes the -maix-small-local-exec-tls option to produce a faster, non-TOC-based access sequence for the local-exec TLS model. Specifically, for when the offsets from the TLS variable are non-zero. In particular, this patch produces either a single: - addi/la with a displacement off of R13 plus a non-zero offset for when an address is calculated, or - load or store off of R13 plus a non-zero offset for when an address is calculated and used for further access where R13 is the thread pointer, respectively. In order to produce a single addi or load/store off of the thread pointer with a non-zero offset, this patch also adds the necessary support in the assembly printer when printing these instructions. Specifically: - The non-zero offset is added to the TLS variable address when the address of the TLS variable + it's offset is less than 32KB. - Otherwise, when the address of the TLS variable + its offset is greater than 32KB, the non-zero offset (and a multiple of 64KB) is subtracted from the TLS address. This handling in the assembly printer is necessary to ensure that the TLS address + the non-zero offset is between [-32768, 32768), so that the total displacement can fit within the addi/load/store instructions. This patch is meant to be a follow-up to 3f46e5453d9310b15d974e876f6132e3cf50c4b1 (where the optimization occurs for when the offset is zero).
171 lines
7.3 KiB
LLVM
171 lines
7.3 KiB
LLVM
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 3
|
|
; RUN: llc -verify-machineinstrs -mcpu=pwr7 -ppc-asm-full-reg-names \
|
|
; RUN: -mtriple powerpc64-ibm-aix-xcoff -mattr=+aix-small-local-exec-tls < %s \
|
|
; RUN: | FileCheck %s --check-prefix=SMALL-LOCAL-EXEC-SMALLCM64
|
|
; RUN: llc -verify-machineinstrs -mcpu=pwr7 -ppc-asm-full-reg-names \
|
|
; RUN: -mtriple powerpc64-ibm-aix-xcoff --code-model=large \
|
|
; RUN: -mattr=+aix-small-local-exec-tls < %s | FileCheck %s \
|
|
; RUN: --check-prefix=SMALL-LOCAL-EXEC-LARGECM64
|
|
|
|
@ThreadLocalVarInit = thread_local(localexec) global i8 1, align 1
|
|
@VarInit = local_unnamed_addr global i8 87, align 1
|
|
@IThreadLocalVarInit = internal thread_local(localexec) global i8 1, align 1
|
|
declare nonnull ptr @llvm.threadlocal.address.p0(ptr nonnull) #1
|
|
@c = thread_local(localexec) global [87 x i8] zeroinitializer, align 1
|
|
|
|
define nonnull ptr @AddrTest1() local_unnamed_addr #0 {
|
|
; SMALL-LOCAL-EXEC-SMALLCM64-LABEL: AddrTest1:
|
|
; SMALL-LOCAL-EXEC-SMALLCM64: # %bb.0: # %entry
|
|
; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: la r3, c[TL]@le+1(r13)
|
|
; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: blr
|
|
;
|
|
; SMALL-LOCAL-EXEC-LARGECM64-LABEL: AddrTest1:
|
|
; SMALL-LOCAL-EXEC-LARGECM64: # %bb.0: # %entry
|
|
; SMALL-LOCAL-EXEC-LARGECM64-NEXT: la r3, c[TL]@le+1(r13)
|
|
; SMALL-LOCAL-EXEC-LARGECM64-NEXT: blr
|
|
entry:
|
|
%0 = tail call align 1 ptr @llvm.threadlocal.address.p0(ptr align 1 @c)
|
|
%arrayidx = getelementptr inbounds [87 x i8], ptr %0, i64 0, i64 1
|
|
ret ptr %arrayidx
|
|
}
|
|
|
|
define void @storeITLInit(i8 noundef zeroext %x) {
|
|
; SMALL-LOCAL-EXEC-SMALLCM64-LABEL: storeITLInit:
|
|
; SMALL-LOCAL-EXEC-SMALLCM64: # %bb.0: # %entry
|
|
; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: stb r3, IThreadLocalVarInit[TL]@le(r13)
|
|
; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: blr
|
|
;
|
|
; SMALL-LOCAL-EXEC-LARGECM64-LABEL: storeITLInit:
|
|
; SMALL-LOCAL-EXEC-LARGECM64: # %bb.0: # %entry
|
|
; SMALL-LOCAL-EXEC-LARGECM64-NEXT: stb r3, IThreadLocalVarInit[TL]@le(r13)
|
|
; SMALL-LOCAL-EXEC-LARGECM64-NEXT: blr
|
|
entry:
|
|
%0 = tail call align 1 ptr @llvm.threadlocal.address.p0(ptr align 1 @IThreadLocalVarInit)
|
|
store i8 %x, ptr %0, align 1
|
|
ret void
|
|
}
|
|
|
|
define void @storeTLInit(i8 noundef zeroext %x) {
|
|
; SMALL-LOCAL-EXEC-SMALLCM64-LABEL: storeTLInit:
|
|
; SMALL-LOCAL-EXEC-SMALLCM64: # %bb.0: # %entry
|
|
; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: stb r3, ThreadLocalVarInit[TL]@le(r13)
|
|
; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: blr
|
|
;
|
|
; SMALL-LOCAL-EXEC-LARGECM64-LABEL: storeTLInit:
|
|
; SMALL-LOCAL-EXEC-LARGECM64: # %bb.0: # %entry
|
|
; SMALL-LOCAL-EXEC-LARGECM64-NEXT: stb r3, ThreadLocalVarInit[TL]@le(r13)
|
|
; SMALL-LOCAL-EXEC-LARGECM64-NEXT: blr
|
|
entry:
|
|
%0 = tail call align 1 ptr @llvm.threadlocal.address.p0(ptr align 1 @ThreadLocalVarInit)
|
|
store i8 %x, ptr %0, align 1
|
|
ret void
|
|
}
|
|
|
|
define zeroext i8 @loadITLInit() {
|
|
; SMALL-LOCAL-EXEC-SMALLCM64-LABEL: loadITLInit:
|
|
; SMALL-LOCAL-EXEC-SMALLCM64: # %bb.0: # %entry
|
|
; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: lbz r3, IThreadLocalVarInit[TL]@le(r13)
|
|
; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: blr
|
|
;
|
|
; SMALL-LOCAL-EXEC-LARGECM64-LABEL: loadITLInit:
|
|
; SMALL-LOCAL-EXEC-LARGECM64: # %bb.0: # %entry
|
|
; SMALL-LOCAL-EXEC-LARGECM64-NEXT: lbz r3, IThreadLocalVarInit[TL]@le(r13)
|
|
; SMALL-LOCAL-EXEC-LARGECM64-NEXT: blr
|
|
entry:
|
|
%0 = tail call align 1 ptr @llvm.threadlocal.address.p0(ptr align 1 @IThreadLocalVarInit)
|
|
%1 = load i8, ptr %0, align 1
|
|
ret i8 %1
|
|
}
|
|
|
|
define zeroext i8 @loadITLInit2() {
|
|
; SMALL-LOCAL-EXEC-SMALLCM64-LABEL: loadITLInit2:
|
|
; SMALL-LOCAL-EXEC-SMALLCM64: # %bb.0: # %entry
|
|
; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: ld r4, L..C0(r2) # @VarInit
|
|
; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: lbz r3, IThreadLocalVarInit[TL]@le(r13)
|
|
; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: lbz r4, 0(r4)
|
|
; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: add r3, r4, r3
|
|
; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: clrldi r3, r3, 56
|
|
; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: blr
|
|
;
|
|
; SMALL-LOCAL-EXEC-LARGECM64-LABEL: loadITLInit2:
|
|
; SMALL-LOCAL-EXEC-LARGECM64: # %bb.0: # %entry
|
|
; SMALL-LOCAL-EXEC-LARGECM64-NEXT: addis r4, L..C0@u(r2)
|
|
; SMALL-LOCAL-EXEC-LARGECM64-NEXT: lbz r3, IThreadLocalVarInit[TL]@le(r13)
|
|
; SMALL-LOCAL-EXEC-LARGECM64-NEXT: ld r4, L..C0@l(r4)
|
|
; SMALL-LOCAL-EXEC-LARGECM64-NEXT: lbz r4, 0(r4)
|
|
; SMALL-LOCAL-EXEC-LARGECM64-NEXT: add r3, r4, r3
|
|
; SMALL-LOCAL-EXEC-LARGECM64-NEXT: clrldi r3, r3, 56
|
|
; SMALL-LOCAL-EXEC-LARGECM64-NEXT: blr
|
|
entry:
|
|
%0 = tail call align 1 ptr @llvm.threadlocal.address.p0(ptr align 1 @IThreadLocalVarInit)
|
|
%1 = load i8, ptr %0, align 1
|
|
%2 = load i8, ptr @VarInit, align 1
|
|
%add = add i8 %2, %1
|
|
ret i8 %add
|
|
}
|
|
|
|
define zeroext i8 @loadTLInit() {
|
|
; SMALL-LOCAL-EXEC-SMALLCM64-LABEL: loadTLInit:
|
|
; SMALL-LOCAL-EXEC-SMALLCM64: # %bb.0: # %entry
|
|
; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: lbz r3, ThreadLocalVarInit[TL]@le(r13)
|
|
; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: blr
|
|
;
|
|
; SMALL-LOCAL-EXEC-LARGECM64-LABEL: loadTLInit:
|
|
; SMALL-LOCAL-EXEC-LARGECM64: # %bb.0: # %entry
|
|
; SMALL-LOCAL-EXEC-LARGECM64-NEXT: lbz r3, ThreadLocalVarInit[TL]@le(r13)
|
|
; SMALL-LOCAL-EXEC-LARGECM64-NEXT: blr
|
|
entry:
|
|
%0 = tail call align 1 ptr @llvm.threadlocal.address.p0(ptr align 1 @ThreadLocalVarInit)
|
|
%1 = load i8, ptr %0, align 1
|
|
ret i8 %1
|
|
}
|
|
|
|
define zeroext i8 @loadTLInit2() {
|
|
; SMALL-LOCAL-EXEC-SMALLCM64-LABEL: loadTLInit2:
|
|
; SMALL-LOCAL-EXEC-SMALLCM64: # %bb.0: # %entry
|
|
; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: ld r4, L..C0(r2) # @VarInit
|
|
; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: lbz r3, ThreadLocalVarInit[TL]@le(r13)
|
|
; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: lbz r4, 0(r4)
|
|
; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: add r3, r4, r3
|
|
; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: clrldi r3, r3, 56
|
|
; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: blr
|
|
;
|
|
; SMALL-LOCAL-EXEC-LARGECM64-LABEL: loadTLInit2:
|
|
; SMALL-LOCAL-EXEC-LARGECM64: # %bb.0: # %entry
|
|
; SMALL-LOCAL-EXEC-LARGECM64-NEXT: addis r4, L..C0@u(r2)
|
|
; SMALL-LOCAL-EXEC-LARGECM64-NEXT: lbz r3, ThreadLocalVarInit[TL]@le(r13)
|
|
; SMALL-LOCAL-EXEC-LARGECM64-NEXT: ld r4, L..C0@l(r4)
|
|
; SMALL-LOCAL-EXEC-LARGECM64-NEXT: lbz r4, 0(r4)
|
|
; SMALL-LOCAL-EXEC-LARGECM64-NEXT: add r3, r4, r3
|
|
; SMALL-LOCAL-EXEC-LARGECM64-NEXT: clrldi r3, r3, 56
|
|
; SMALL-LOCAL-EXEC-LARGECM64-NEXT: blr
|
|
entry:
|
|
%0 = tail call align 1 ptr @llvm.threadlocal.address.p0(ptr align 1 @ThreadLocalVarInit)
|
|
%1 = load i8, ptr %0, align 1
|
|
%2 = load i8, ptr @VarInit, align 1
|
|
%add = add i8 %2, %1
|
|
ret i8 %add
|
|
}
|
|
|
|
define void @loadStore1(i8 noundef zeroext %x) {
|
|
; SMALL-LOCAL-EXEC-SMALLCM64-LABEL: loadStore1:
|
|
; SMALL-LOCAL-EXEC-SMALLCM64: # %bb.0: # %entry
|
|
; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: lbz r3, IThreadLocalVarInit[TL]@le(r13)
|
|
; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: addi r3, r3, 9
|
|
; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: stb r3, IThreadLocalVarInit[TL]@le(r13)
|
|
; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: blr
|
|
;
|
|
; SMALL-LOCAL-EXEC-LARGECM64-LABEL: loadStore1:
|
|
; SMALL-LOCAL-EXEC-LARGECM64: # %bb.0: # %entry
|
|
; SMALL-LOCAL-EXEC-LARGECM64-NEXT: lbz r3, IThreadLocalVarInit[TL]@le(r13)
|
|
; SMALL-LOCAL-EXEC-LARGECM64-NEXT: addi r3, r3, 9
|
|
; SMALL-LOCAL-EXEC-LARGECM64-NEXT: stb r3, IThreadLocalVarInit[TL]@le(r13)
|
|
; SMALL-LOCAL-EXEC-LARGECM64-NEXT: blr
|
|
entry:
|
|
%0 = tail call align 1 ptr @llvm.threadlocal.address.p0(ptr align 1 @IThreadLocalVarInit)
|
|
%1 = load i8, ptr %0, align 1
|
|
%add = add i8 %1, 9
|
|
store i8 %add, ptr %0, align 1
|
|
ret void
|
|
}
|