llvm-project/llvm/test/CodeGen/PowerPC/pcrel-linkeropt.ll
Kai Nacke 5403c59c60 [PPC] Opaque pointer migration, part 2.
The LIT test cases were migrated with the script provided by
Nikita Popov. Due to the size of the change it is split into
several parts.

Reviewed By: nemanja, nikic

Differential Revision: https://reviews.llvm.org/D135474
2022-10-11 17:24:06 +00:00

403 lines
14 KiB
LLVM

; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \
; RUN: -mcpu=future -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr \
; RUN: < %s | FileCheck %s
; On future CPU with PC Relative addressing enabled, it is possible for the
; linker to optimize GOT indirect accesses. In order for the linker to do this
; the compiler needs to add a hint using the R_PPC64_PCREL_OPT relocation.
; This test checks that the compiler adds the R_PPC64_PCREL_OPT relocation
; correctly.
@input8 = external local_unnamed_addr global i8, align 1
@output8 = external local_unnamed_addr global i8, align 1
@input16 = external local_unnamed_addr global i16, align 2
@output16 = external local_unnamed_addr global i16, align 2
@input32 = external global i32, align 4
@output32 = external local_unnamed_addr global i32, align 4
@input64 = external local_unnamed_addr global i64, align 8
@output64 = external local_unnamed_addr global i64, align 8
@input128 = external local_unnamed_addr global i128, align 16
@output128 = external local_unnamed_addr global i128, align 16
@inputf32 = external local_unnamed_addr global float, align 4
@outputf32 = external local_unnamed_addr global float, align 4
@inputf64 = external local_unnamed_addr global double, align 8
@outputf64 = external local_unnamed_addr global double, align 8
@inputVi32 = external local_unnamed_addr global <4 x i32>, align 16
@outputVi32 = external local_unnamed_addr global <4 x i32>, align 16
@inputVi64 = external local_unnamed_addr global <2 x i64>, align 16
@outputVi64 = external local_unnamed_addr global <2 x i64>, align 16
@ArrayIn = external global [10 x i32], align 4
@ArrayOut = external local_unnamed_addr global [10 x i32], align 4
@IntPtrIn = external local_unnamed_addr global ptr, align 8
@IntPtrOut = external local_unnamed_addr global ptr, align 8
@FuncPtrIn = external local_unnamed_addr global ptr, align 8
@FuncPtrOut = external local_unnamed_addr global ptr, align 8
define dso_local void @ReadWrite8() local_unnamed_addr #0 {
; In this test the stb r3, 0(r4) cannot be optimized because it
; uses the register r3 and that register is defined by lbz r3, 0(r3)
; which is defined between the pld and the stb.
; CHECK-LABEL: ReadWrite8:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: pld r3, input8@got@pcrel(0), 1
; CHECK-NEXT: .Lpcrel0:
; CHECK-NEXT: pld r4, output8@got@pcrel(0), 1
; CHECK-NEXT: .reloc .Lpcrel0-8,R_PPC64_PCREL_OPT,.-(.Lpcrel0-8)
; CHECK-NEXT: lbz r3, 0(r3)
; CHECK-NEXT: stb r3, 0(r4)
; CHECK-NEXT: blr
entry:
%0 = load i8, ptr @input8, align 1
store i8 %0, ptr @output8, align 1
ret void
}
define dso_local void @ReadWrite16() local_unnamed_addr #0 {
; In this test the sth r3, 0(r4) cannot be optimized because it
; uses the register r3 and that register is defined by lhz r3, 0(r3)
; which is defined between the pld and the sth.
; CHECK-LABEL: ReadWrite16:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: pld r3, input16@got@pcrel(0), 1
; CHECK-NEXT: .Lpcrel1:
; CHECK-NEXT: pld r4, output16@got@pcrel(0), 1
; CHECK-NEXT: .reloc .Lpcrel1-8,R_PPC64_PCREL_OPT,.-(.Lpcrel1-8)
; CHECK-NEXT: lhz r3, 0(r3)
; CHECK-NEXT: sth r3, 0(r4)
; CHECK-NEXT: blr
entry:
%0 = load i16, ptr @input16, align 2
store i16 %0, ptr @output16, align 2
ret void
}
define dso_local void @ReadWrite32() local_unnamed_addr #0 {
; CHECK-LABEL: ReadWrite32:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: pld r3, input32@got@pcrel(0), 1
; CHECK-NEXT: .Lpcrel2:
; CHECK-NEXT: pld r4, output32@got@pcrel(0), 1
; CHECK-NEXT: .reloc .Lpcrel2-8,R_PPC64_PCREL_OPT,.-(.Lpcrel2-8)
; CHECK-NEXT: lwz r3, 0(r3)
; CHECK-NEXT: stw r3, 0(r4)
; CHECK-NEXT: blr
entry:
%0 = load i32, ptr @input32, align 4
store i32 %0, ptr @output32, align 4
ret void
}
define dso_local void @ReadWrite64() local_unnamed_addr #0 {
; CHECK-LABEL: ReadWrite64:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: pld r3, input64@got@pcrel(0), 1
; CHECK-NEXT: .Lpcrel3:
; CHECK-NEXT: pld r4, output64@got@pcrel(0), 1
; CHECK-NEXT: .reloc .Lpcrel3-8,R_PPC64_PCREL_OPT,.-(.Lpcrel3-8)
; CHECK-NEXT: ld r3, 0(r3)
; CHECK-NEXT: std r3, 0(r4)
; CHECK-NEXT: blr
entry:
%0 = load i64, ptr @input64, align 8
store i64 %0, ptr @output64, align 8
ret void
}
; FIXME: we should always convert X-Form instructions that use
; PPC::ZERO[8] to the corresponding D-Form so we can perform this opt.
define dso_local void @ReadWrite128() local_unnamed_addr #0 {
; CHECK-LABEL: ReadWrite128:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: pld r3, input128@got@pcrel(0), 1
; CHECK-NEXT: .Lpcrel4:
; CHECK-NEXT: .reloc .Lpcrel4-8,R_PPC64_PCREL_OPT,.-(.Lpcrel4-8)
; CHECK-NEXT: lxv vs0, 0(r3)
; CHECK-NEXT: pld r3, output128@got@pcrel(0), 1
; CHECK-NEXT: .Lpcrel5:
; CHECK-NEXT: .reloc .Lpcrel5-8,R_PPC64_PCREL_OPT,.-(.Lpcrel5-8)
; CHECK-NEXT: stxv vs0, 0(r3)
; CHECK-NEXT: blr
entry:
%0 = load i128, ptr @input128, align 16
store i128 %0, ptr @output128, align 16
ret void
}
define dso_local void @ReadWritef32() local_unnamed_addr #0 {
; CHECK-LABEL: ReadWritef32:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: pld r3, inputf32@got@pcrel(0), 1
; CHECK-NEXT: .Lpcrel6:
; CHECK-NEXT: xxspltidp vs1, 1078103900
; CHECK-NEXT: .reloc .Lpcrel6-8,R_PPC64_PCREL_OPT,.-(.Lpcrel6-8)
; CHECK-NEXT: lfs f0, 0(r3)
; CHECK-NEXT: pld r3, outputf32@got@pcrel(0), 1
; CHECK-NEXT: xsaddsp f0, f0, f1
; CHECK-NEXT: stfs f0, 0(r3)
; CHECK-NEXT: blr
entry:
%0 = load float, ptr @inputf32, align 4
%add = fadd float %0, 0x400851EB80000000
store float %add, ptr @outputf32, align 4
ret void
}
define dso_local void @ReadWritef64() local_unnamed_addr #0 {
; CHECK-LABEL: ReadWritef64:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: pld r3, inputf64@got@pcrel(0), 1
; CHECK-NEXT: .Lpcrel7:
; CHECK-NEXT: xxsplti32dx vs1, 0, 1075524403
; CHECK-NEXT: xxsplti32dx vs1, 1, 858993459
; CHECK-NEXT: .reloc .Lpcrel7-8,R_PPC64_PCREL_OPT,.-(.Lpcrel7-8)
; CHECK-NEXT: lfd f0, 0(r3)
; CHECK-NEXT: pld r3, outputf64@got@pcrel(0), 1
; CHECK-NEXT: xsadddp f0, f0, f1
; CHECK-NEXT: stfd f0, 0(r3)
; CHECK-NEXT: blr
entry:
%0 = load double, ptr @inputf64, align 8
%add = fadd double %0, 6.800000e+00
store double %add, ptr @outputf64, align 8
ret void
}
; FIXME: we should always convert X-Form instructions that use
; PPC::ZERO[8] to the corresponding D-Form so we can perform this opt.
define dso_local void @ReadWriteVi32() local_unnamed_addr #0 {
; CHECK-LABEL: ReadWriteVi32:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: pld r3, inputVi32@got@pcrel(0), 1
; CHECK-NEXT: .Lpcrel8:
; CHECK-NEXT: li r4, 45
; CHECK-NEXT: .reloc .Lpcrel8-8,R_PPC64_PCREL_OPT,.-(.Lpcrel8-8)
; CHECK-NEXT: lxv v2, 0(r3)
; CHECK-NEXT: pld r3, outputVi32@got@pcrel(0), 1
; CHECK-NEXT: vinsw v2, r4, 8
; CHECK-NEXT: stxv v2, 0(r3)
; CHECK-NEXT: blr
entry:
%0 = load <4 x i32>, ptr @inputVi32, align 16
%vecins = insertelement <4 x i32> %0, i32 45, i32 1
store <4 x i32> %vecins, ptr @outputVi32, align 16
ret void
}
define dso_local void @ReadWriteVi64() local_unnamed_addr #0 {
; CHECK-LABEL: ReadWriteVi64:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: pld r3, inputVi64@got@pcrel(0), 1
; CHECK-NEXT: .Lpcrel9:
; CHECK-NEXT: .reloc .Lpcrel9-8,R_PPC64_PCREL_OPT,.-(.Lpcrel9-8)
; CHECK-NEXT: lxv vs0, 0(r3)
; CHECK-NEXT: pld r3, outputVi64@got@pcrel(0), 1
; CHECK-NEXT: .Lpcrel10:
; CHECK-NEXT: .reloc .Lpcrel10-8,R_PPC64_PCREL_OPT,.-(.Lpcrel10-8)
; CHECK-NEXT: stxv vs0, 0(r3)
; CHECK-NEXT: blr
entry:
%0 = load <2 x i64>, ptr @inputVi64, align 16
store <2 x i64> %0, ptr @outputVi64, align 16
ret void
}
define dso_local void @ReadWriteArray() local_unnamed_addr #0 {
; CHECK-LABEL: ReadWriteArray:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: pld r3, ArrayIn@got@pcrel(0), 1
; CHECK-NEXT: .Lpcrel11:
; CHECK-NEXT: pld r4, ArrayOut@got@pcrel(0), 1
; CHECK-NEXT: .reloc .Lpcrel11-8,R_PPC64_PCREL_OPT,.-(.Lpcrel11-8)
; CHECK-NEXT: lwz r3, 28(r3)
; CHECK-NEXT: addi r3, r3, 42
; CHECK-NEXT: stw r3, 8(r4)
; CHECK-NEXT: blr
entry:
%0 = load i32, ptr getelementptr inbounds ([10 x i32], ptr @ArrayIn, i64 0, i64 7), align 4
%add = add nsw i32 %0, 42
store i32 %add, ptr getelementptr inbounds ([10 x i32], ptr @ArrayOut, i64 0, i64 2), align 4
ret void
}
define dso_local void @ReadWriteSameArray() local_unnamed_addr #0 {
; CHECK-LABEL: ReadWriteSameArray:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: pld r3, ArrayIn@got@pcrel(0), 1
; CHECK-NEXT: lwz r4, 12(r3)
; CHECK-NEXT: addi r4, r4, 8
; CHECK-NEXT: stw r4, 24(r3)
; CHECK-NEXT: blr
entry:
%0 = load i32, ptr getelementptr inbounds ([10 x i32], ptr @ArrayIn, i64 0, i64 3), align 4
%add = add nsw i32 %0, 8
store i32 %add, ptr getelementptr inbounds ([10 x i32], ptr @ArrayIn, i64 0, i64 6), align 4
ret void
}
define dso_local void @ReadWriteIntPtr() local_unnamed_addr #0 {
; CHECK-LABEL: ReadWriteIntPtr:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: pld r3, IntPtrIn@got@pcrel(0), 1
; CHECK-NEXT: .Lpcrel12:
; CHECK-NEXT: pld r4, IntPtrOut@got@pcrel(0), 1
; CHECK-NEXT: .Lpcrel13:
; CHECK-NEXT: .reloc .Lpcrel12-8,R_PPC64_PCREL_OPT,.-(.Lpcrel12-8)
; CHECK-NEXT: ld r3, 0(r3)
; CHECK-NEXT: .reloc .Lpcrel13-8,R_PPC64_PCREL_OPT,.-(.Lpcrel13-8)
; CHECK-NEXT: ld r4, 0(r4)
; CHECK-NEXT: lwz r5, 216(r3)
; CHECK-NEXT: lwz r3, 48(r3)
; CHECK-NEXT: add r3, r3, r5
; CHECK-NEXT: stw r3, 136(r4)
; CHECK-NEXT: blr
entry:
%0 = load ptr, ptr @IntPtrIn, align 8
%arrayidx = getelementptr inbounds i32, ptr %0, i64 54
%1 = load i32, ptr %arrayidx, align 4
%arrayidx1 = getelementptr inbounds i32, ptr %0, i64 12
%2 = load i32, ptr %arrayidx1, align 4
%add = add nsw i32 %2, %1
%3 = load ptr, ptr @IntPtrOut, align 8
%arrayidx2 = getelementptr inbounds i32, ptr %3, i64 34
store i32 %add, ptr %arrayidx2, align 4
ret void
}
define dso_local void @ReadWriteFuncPtr() local_unnamed_addr #0 {
; CHECK-LABEL: ReadWriteFuncPtr:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: pld r3, FuncPtrIn@got@pcrel(0), 1
; CHECK-NEXT: .Lpcrel14:
; CHECK-NEXT: pld r4, FuncPtrOut@got@pcrel(0), 1
; CHECK-NEXT: .reloc .Lpcrel14-8,R_PPC64_PCREL_OPT,.-(.Lpcrel14-8)
; CHECK-NEXT: ld r3, 0(r3)
; CHECK-NEXT: std r3, 0(r4)
; CHECK-NEXT: blr
entry:
%0 = load i64, ptr @FuncPtrIn, align 8
store i64 %0, ptr @FuncPtrOut, align 8
ret void
}
define dso_local void @FuncPtrCopy() local_unnamed_addr #0 {
; CHECK-LABEL: FuncPtrCopy:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: pld r3, FuncPtrOut@got@pcrel(0), 1
; CHECK-NEXT: pld r4, Callee@got@pcrel(0), 1
; CHECK-NEXT: std r4, 0(r3)
; CHECK-NEXT: blr
entry:
store ptr @Callee, ptr @FuncPtrOut, align 8
ret void
}
declare void @Callee(...)
define dso_local void @FuncPtrCall() local_unnamed_addr #0 {
; CHECK-LABEL: FuncPtrCall:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: pld r3, FuncPtrIn@got@pcrel(0), 1
; CHECK-NEXT: .Lpcrel15:
; CHECK-NEXT: .reloc .Lpcrel15-8,R_PPC64_PCREL_OPT,.-(.Lpcrel15-8)
; CHECK-NEXT: ld r12, 0(r3)
; CHECK-NEXT: mtctr r12
; CHECK-NEXT: bctr
; CHECK-NEXT: #TC_RETURNr8 ctr 0
entry:
%0 = load ptr, ptr @FuncPtrIn, align 8
tail call void %0()
ret void
}
define dso_local signext i32 @ReadVecElement() local_unnamed_addr #0 {
; CHECK-LABEL: ReadVecElement:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: pld r3, inputVi32@got@pcrel(0), 1
; CHECK-NEXT: .Lpcrel16:
; CHECK-NEXT: .reloc .Lpcrel16-8,R_PPC64_PCREL_OPT,.-(.Lpcrel16-8)
; CHECK-NEXT: lwa r3, 4(r3)
; CHECK-NEXT: blr
entry:
%0 = load <4 x i32>, ptr @inputVi32, align 16
%vecext = extractelement <4 x i32> %0, i32 1
ret i32 %vecext
}
define dso_local signext i32 @VecMultiUse() local_unnamed_addr #0 {
; CHECK-LABEL: VecMultiUse:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: mflr r0
; CHECK-NEXT: std r29, -24(r1) # 8-byte Folded Spill
; CHECK-NEXT: std r30, -16(r1) # 8-byte Folded Spill
; CHECK-NEXT: std r0, 16(r1)
; CHECK-NEXT: stdu r1, -64(r1)
; CHECK-NEXT: pld r30, inputVi32@got@pcrel(0), 1
; CHECK-NEXT: lwz r29, 4(r30)
; CHECK-NEXT: bl Callee@notoc
; CHECK-NEXT: lwz r3, 8(r30)
; CHECK-NEXT: add r29, r3, r29
; CHECK-NEXT: bl Callee@notoc
; CHECK-NEXT: lwz r3, 0(r30)
; CHECK-NEXT: add r3, r29, r3
; CHECK-NEXT: extsw r3, r3
; CHECK-NEXT: addi r1, r1, 64
; CHECK-NEXT: ld r0, 16(r1)
; CHECK-NEXT: ld r30, -16(r1) # 8-byte Folded Reload
; CHECK-NEXT: ld r29, -24(r1) # 8-byte Folded Reload
; CHECK-NEXT: mtlr r0
; CHECK-NEXT: blr
entry:
%0 = load <4 x i32>, ptr @inputVi32, align 16
tail call void @Callee()
%1 = load <4 x i32>, ptr @inputVi32, align 16
%2 = extractelement <4 x i32> %1, i32 2
%3 = extractelement <4 x i32> %0, i64 1
%4 = add nsw i32 %2, %3
tail call void @Callee()
%5 = load <4 x i32>, ptr @inputVi32, align 16
%vecext2 = extractelement <4 x i32> %5, i32 0
%add3 = add nsw i32 %4, %vecext2
ret i32 %add3
}
define dso_local signext i32 @UseAddr(i32 signext %a) local_unnamed_addr #0 {
; CHECK-LABEL: UseAddr:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: mflr r0
; CHECK-NEXT: std r30, -16(r1) # 8-byte Folded Spill
; CHECK-NEXT: std r0, 16(r1)
; CHECK-NEXT: stdu r1, -48(r1)
; CHECK-NEXT: pld r4, ArrayIn@got@pcrel(0), 1
; CHECK-NEXT: lwz r5, 16(r4)
; CHECK-NEXT: add r30, r5, r3
; CHECK-NEXT: mr r3, r4
; CHECK-NEXT: bl getAddr@notoc
; CHECK-NEXT: add r3, r30, r3
; CHECK-NEXT: extsw r3, r3
; CHECK-NEXT: addi r1, r1, 48
; CHECK-NEXT: ld r0, 16(r1)
; CHECK-NEXT: ld r30, -16(r1) # 8-byte Folded Reload
; CHECK-NEXT: mtlr r0
; CHECK-NEXT: blr
entry:
%0 = load i32, ptr getelementptr inbounds ([10 x i32], ptr @ArrayIn, i64 0, i64 4), align 4
%add = add nsw i32 %0, %a
%call = tail call signext i32 @getAddr(ptr @ArrayIn)
%add1 = add nsw i32 %add, %call
ret i32 %add1
}
declare signext i32 @getAddr(ptr) local_unnamed_addr
define dso_local nonnull ptr @AddrTaken32() local_unnamed_addr #0 {
; CHECK-LABEL: AddrTaken32:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: pld r3, input32@got@pcrel(0), 1
; CHECK-NEXT: blr
entry:
ret ptr @input32
}
attributes #0 = { nounwind }