Skip to content

Commit d64efe4

Browse files
authored
[lld-macho] Remove symbols to __mod_init_func with -init_offsets (#97156)
When `-fixup_chains`/`-init_offsets` is used, a different section, `__init_offsets` is synthesized from `__mod_init_func`. If there are any symbols defined inside `__mod_init_func`, they are added to the symbol table unconditionally while processing the input files. Later, when querying these symbols' addresses (when constructing the symtab or exports trie), we crash with a null deref, as there is no output section assigned to them. Just making the symbols point to `__init_offsets` is a bad idea, as the new section stores 32-bit integers instead of 64-bit pointers; accessing the symbols would not do what the programmer intended. We should entirely omit them from the output. This is what ld64 and ld-prime do. This patch uses the same mechanism as dead-stripping to mark these symbols as not needed in the output. There might be nicer fixes than the workaround, this is discussed in #97155. Fixes #79894 (comment) Fixes #94716
1 parent 2da0055 commit d64efe4

File tree

4 files changed

+43
-2
lines changed

4 files changed

+43
-2
lines changed

lld/MachO/Driver.cpp

+11
Original file line numberDiff line numberDiff line change
@@ -1393,6 +1393,12 @@ static void handleExplicitExports() {
13931393
}
13941394
}
13951395

1396+
static void eraseInitializerSymbols() {
1397+
for (ConcatInputSection *isec : in.initOffsets->inputs())
1398+
for (Defined *sym : isec->symbols)
1399+
sym->used = false;
1400+
}
1401+
13961402
namespace lld {
13971403
namespace macho {
13981404
bool link(ArrayRef<const char *> argsArr, llvm::raw_ostream &stdoutOS,
@@ -1971,6 +1977,11 @@ bool link(ArrayRef<const char *> argsArr, llvm::raw_ostream &stdoutOS,
19711977
if (config->deadStrip)
19721978
markLive();
19731979

1980+
// Ensure that no symbols point inside __mod_init_func sections if they are
1981+
// removed due to -init_offsets. This must run after dead stripping.
1982+
if (config->emitInitOffsets)
1983+
eraseInitializerSymbols();
1984+
19741985
// Categories are not subject to dead-strip. The __objc_catlist section is
19751986
// marked as NO_DEAD_STRIP and that propagates into all category data.
19761987
if (args.hasArg(OPT_check_category_conflicts))

lld/MachO/Writer.cpp

+11-1
Original file line numberDiff line numberDiff line change
@@ -640,7 +640,17 @@ void Writer::treatSpecialUndefineds() {
640640

641641
static void prepareSymbolRelocation(Symbol *sym, const InputSection *isec,
642642
const lld::macho::Reloc &r) {
643-
assert(sym->isLive());
643+
if (!sym->isLive()) {
644+
if (Defined *defined = dyn_cast<Defined>(sym)) {
645+
if (config->emitInitOffsets &&
646+
defined->isec()->getName() == section_names::moduleInitFunc)
647+
fatal(isec->getLocation(r.offset) + ": cannot reference " +
648+
sym->getName() +
649+
" defined in __mod_init_func when -init_offsets is used");
650+
}
651+
assert(false && "referenced symbol must be live");
652+
}
653+
644654
const RelocAttrs &relocAttrs = target->getRelocAttrs(r.type);
645655

646656
if (relocAttrs.hasAttr(RelocAttrBits::BRANCH)) {

lld/test/MachO/init-offsets.s

+5-1
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@
1212
# RUN: llvm-objcopy --dump-section=__TEXT,__init_offsets=%t/section.bin %t/out
1313
# RUN: echo "__TEXT,__init_offsets contents:" >> %t/dump.txt
1414
# RUN: od -An -txI %t/section.bin >> %t/dump.txt
15-
# RUN: FileCheck --check-prefix=CONTENT %s < %t/dump.txt
15+
# RUN: FileCheck --check-prefix=CONTENT --implicit-check-not=_init_ptr %s < %t/dump.txt
1616

1717
## This test checks that:
1818
## - __mod_init_func is replaced by __init_offsets.
@@ -21,6 +21,7 @@
2121
## command line, and in the order they show up within __mod_init_func.
2222
## - for undefined and dylib symbols, stubs are created, and the offsets point to those.
2323
## - offsets are relative to __TEXT's address, they aren't an absolute virtual address.
24+
## - symbols defined within __mod_init_func are ignored.
2425

2526
# FLAGS: sectname __init_offsets
2627
# FLAGS-NEXT: segname __TEXT
@@ -48,13 +49,15 @@
4849

4950
#--- first.s
5051
.globl _first_init, ___isnan, _main
52+
.globl _init_ptr_1
5153
.text
5254
_first_init:
5355
ret
5456
_main:
5557
ret
5658

5759
.section __DATA,__mod_init_func,mod_init_funcs
60+
_init_ptr_1:
5861
.quad _first_init
5962
.quad ___isnan
6063

@@ -68,6 +71,7 @@ _second_init:
6871

6972
.section __DATA,__mod_init_func,mod_init_funcs
7073
.quad _undefined
74+
_init_ptr_2:
7175
.quad _second_init
7276

7377
.subsections_via_symbols

lld/test/MachO/invalid/init-offsets.s

+16
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
# REQUIRES: x86
2+
3+
# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin %s -o %t.o
4+
# RUN: not %lld -lSystem -init_offsets %t.o -o /dev/null 2>&1 | FileCheck %s
5+
6+
# CHECK: error: {{.*}}init-offsets.s.tmp.o:(symbol _main+0x3): cannot reference _init_slot defined in __mod_init_func when -init_offsets is used
7+
8+
.globl _main
9+
.text
10+
_main:
11+
leaq _init_slot(%rip), %rax
12+
13+
.section __DATA,__mod_init_func,mod_init_funcs
14+
_init_slot:
15+
.quad _main
16+

0 commit comments

Comments
 (0)