
Previously by default, when not using `--ifc=`, lld would not deduplicate string literals. This reveals reliance on undefined behavior where string literal addresses are compared instead of using string equality checks. While ideally you would be able to easily identify and eliminate the reliance on this UB, this can be difficult, especially for third party code, and increases the friction and risk of users migrating to lld. This flips the default to deduplicate strings unless `--no-deduplicate-strings` is passed, matching ld64's behavior. Differential Revision: https://reviews.llvm.org/D140517
153 lines
4.4 KiB
ArmAsm
153 lines
4.4 KiB
ArmAsm
# REQUIRES: x86
|
|
# RUN: rm -rf %t; split-file %s %t
|
|
# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin %t/foo1.s -o %t/foo1.o
|
|
# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin %t/foo2.s -o %t/foo2.o
|
|
# RUN: %lld -dylib --icf=all -framework CoreFoundation %t/foo1.o %t/foo2.o -o %t/foo
|
|
# RUN: llvm-objdump --no-print-imm-hex --macho --rebase --bind --syms -d %t/foo | FileCheck %s --check-prefixes=CHECK,LITERALS
|
|
# RUN: %lld -dylib -framework CoreFoundation %t/foo1.o %t/foo2.o -o %t/foo
|
|
# RUN: llvm-objdump --no-print-imm-hex --macho --rebase --bind --syms -d %t/foo | FileCheck %s --check-prefix=LITERALS
|
|
|
|
# CHECK: (__TEXT,__text) section
|
|
# CHECK-NEXT: _foo1:
|
|
# CHECK-NEXT: _foo2:
|
|
# CHECK-NEXT: movq _named_cfstring(%rip), %rax
|
|
# CHECK-NEXT: _foo1_utf16:
|
|
# CHECK-NEXT: movq [[#]](%rip), %rax
|
|
# CHECK-NEXT: _named_foo1:
|
|
# CHECK-NEXT: _named_foo2:
|
|
# CHECK-NEXT: movq _named_cfstring(%rip), %rax
|
|
# CHECK-NEXT: _foo2_utf16:
|
|
# CHECK-NEXT: movq [[#]](%rip), %rax
|
|
|
|
# CHECK: SYMBOL TABLE:
|
|
# CHECK-DAG: [[#%.16x,FOO:]] g F __TEXT,__text _foo1
|
|
# CHECK-DAG: [[#FOO]] g F __TEXT,__text _foo2
|
|
|
|
## Make sure we don't emit redundant bind / rebase opcodes for folded sections.
|
|
# LITERALS: Rebase table:
|
|
# LITERALS-NEXT: segment section address type
|
|
# LITERALS-NEXT: __DATA_CONST __cfstring {{.*}} pointer
|
|
# LITERALS-NEXT: __DATA_CONST __cfstring {{.*}} pointer
|
|
# LITERALS-NEXT: __DATA_CONST __cfstring {{.*}} pointer
|
|
# LITERALS-EMPTY:
|
|
# LITERALS-NEXT: Bind table:
|
|
# LITERALS-NEXT: segment section address type addend dylib symbol
|
|
# LITERALS-NEXT: __DATA_CONST __cfstring {{.*}} pointer 0 CoreFoundation ___CFConstantStringClassReference
|
|
# LITERALS-NEXT: __DATA_CONST __cfstring {{.*}} pointer 0 CoreFoundation ___CFConstantStringClassReference
|
|
# LITERALS-NEXT: __DATA_CONST __cfstring {{.*}} pointer 0 CoreFoundation ___CFConstantStringClassReference
|
|
# LITERALS-EMPTY:
|
|
|
|
#--- foo1.s
|
|
.cstring
|
|
L_.str.0:
|
|
.asciz "bar"
|
|
## This string is at a different offset than the corresponding "foo" string in
|
|
## foo2.s. Make sure that we treat references to either string as equivalent.
|
|
L_.str:
|
|
.asciz "foo"
|
|
|
|
.section __DATA,__cfstring
|
|
.p2align 3
|
|
L__unnamed_cfstring_:
|
|
.quad ___CFConstantStringClassReference
|
|
.long 1992 ## utf-8
|
|
.space 4
|
|
.quad L_.str
|
|
.quad 3 ## strlen
|
|
|
|
_named_cfstring:
|
|
.quad ___CFConstantStringClassReference
|
|
.long 1992 ## utf-8
|
|
.space 4
|
|
.quad L_.str
|
|
.quad 3 ## strlen
|
|
|
|
.section __TEXT,__ustring
|
|
l_.ustr:
|
|
.short 102 ## f
|
|
.short 111 ## o
|
|
.short 0 ## \0
|
|
.short 111 ## o
|
|
.short 0 ## \0
|
|
|
|
## FIXME: We should be able to deduplicate UTF-16 CFStrings too.
|
|
## Note that this string contains a null byte in the middle -- any dedup code
|
|
## we add should take care to handle this correctly.
|
|
## Technically, UTF-8 should support encoding null bytes too, but since we
|
|
## atomize the __cstring section at every null byte, this isn't supported. ld64
|
|
## doesn't support it either, and clang seems to always emit a UTF-16 CFString
|
|
## if it needs to contain a null, so I think we're good here.
|
|
.section __DATA,__cfstring
|
|
.p2align 3
|
|
L__unnamed_cfstring_.2:
|
|
.quad ___CFConstantStringClassReference
|
|
.long 2000 ## utf-16
|
|
.space 4
|
|
.quad l_.ustr
|
|
.quad 4 ## strlen
|
|
|
|
.text
|
|
.globl _foo1, _foo1_utf16, _named_foo1
|
|
_foo1:
|
|
movq L__unnamed_cfstring_(%rip), %rax
|
|
|
|
_foo1_utf16:
|
|
movq L__unnamed_cfstring_.2(%rip), %rax
|
|
|
|
_named_foo1:
|
|
movq _named_cfstring(%rip), %rax
|
|
|
|
.subsections_via_symbols
|
|
|
|
#--- foo2.s
|
|
.cstring
|
|
L_.str:
|
|
.asciz "foo"
|
|
|
|
.section __DATA,__cfstring
|
|
.p2align 3
|
|
L__unnamed_cfstring_:
|
|
.quad ___CFConstantStringClassReference
|
|
.long 1992 ## utf-8
|
|
.space 4
|
|
.quad L_.str
|
|
.quad 3 ## strlen
|
|
|
|
_named_cfstring:
|
|
.quad ___CFConstantStringClassReference
|
|
.long 1992 ## utf-8
|
|
.space 4
|
|
.quad L_.str
|
|
.quad 3 ## strlen
|
|
|
|
.section __TEXT,__ustring
|
|
.p2align 1
|
|
l_.ustr:
|
|
.short 102 ## f
|
|
.short 111 ## o
|
|
.short 0 ## \0
|
|
.short 111 ## o
|
|
.short 0 ## \0
|
|
|
|
.section __DATA,__cfstring
|
|
.p2align 3
|
|
L__unnamed_cfstring_.2:
|
|
.quad ___CFConstantStringClassReference
|
|
.long 2000 ## utf-16
|
|
.space 4
|
|
.quad l_.ustr
|
|
.quad 4 ## strlen
|
|
|
|
.text
|
|
.globl _foo2, _foo2_utf16, _named_foo2
|
|
_foo2:
|
|
movq L__unnamed_cfstring_(%rip), %rax
|
|
|
|
_foo2_utf16:
|
|
movq L__unnamed_cfstring_.2(%rip), %rax
|
|
|
|
_named_foo2:
|
|
movq _named_cfstring(%rip), %rax
|
|
|
|
.subsections_via_symbols
|