-
Notifications
You must be signed in to change notification settings - Fork 13.5k
[AArch64,ELF] Allow implicit $d/$x at section beginning #99718
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
49f040d
db2a337
5ab5885
052ae84
76b781a
268ca0d
ba13751
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,45 @@ | ||
# REQUIRES: aarch64 | ||
# RUN: llvm-mc -filetype=obj -triple=aarch64 -implicit-mapsyms %s -o %t.o | ||
# RUN: ld.lld %t.o -z keep-text-section-prefix -o %t | ||
# RUN: llvm-objdump -d --no-print-imm-hex --show-all-symbols %t | FileCheck %s | ||
|
||
# CHECK: <_start>: | ||
# CHECK-NEXT: nop | ||
# CHECK-EMPTY: | ||
# CHECK-NEXT: <$d>: | ||
# CHECK-NEXT: .word 0x0000002a | ||
# CHECK-EMPTY: | ||
# CHECK-NEXT: <$x>: | ||
# CHECK-NEXT: nop | ||
# CHECK-EMPTY: | ||
# CHECK-NEXT: Disassembly of section .text.hot: | ||
# CHECK-EMPTY: | ||
# CHECK-NEXT: <.text.hot>: | ||
# CHECK-NEXT: nop | ||
# CHECK-EMPTY: | ||
# CHECK-NEXT: <$d>: | ||
# CHECK-NEXT: .word 0x0000002a | ||
# CHECK-EMPTY: | ||
# CHECK-NEXT: <$d>: | ||
# CHECK-NEXT: <$x>: | ||
# CHECK-NEXT: udf #42 | ||
# CHECK-EMPTY: | ||
# CHECK-NEXT: <$x>: | ||
# CHECK-NEXT: nop | ||
|
||
## Trailing data followed by a section starting with an instruction. | ||
.section .text.1,"ax" | ||
.globl _start | ||
_start: | ||
nop | ||
.long 42 | ||
.section .text.2,"ax" | ||
nop | ||
|
||
## Trailing data followed by a section starting with a data directive. | ||
.section .text.hot.1,"ax" | ||
nop | ||
.long 42 | ||
.section .text.hot.2,"ax" | ||
.long 42 | ||
nop |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -24,14 +24,15 @@ | |
#include "llvm/MC/MCAssembler.h" | ||
#include "llvm/MC/MCCodeEmitter.h" | ||
#include "llvm/MC/MCContext.h" | ||
#include "llvm/MC/MCELFObjectWriter.h" | ||
#include "llvm/MC/MCELFStreamer.h" | ||
#include "llvm/MC/MCExpr.h" | ||
#include "llvm/MC/MCInst.h" | ||
#include "llvm/MC/MCObjectWriter.h" | ||
#include "llvm/MC/MCSectionELF.h" | ||
#include "llvm/MC/MCStreamer.h" | ||
#include "llvm/MC/MCSubtargetInfo.h" | ||
#include "llvm/MC/MCSymbolELF.h" | ||
#include "llvm/MC/MCTargetOptions.h" | ||
#include "llvm/MC/MCWinCOFFStreamer.h" | ||
#include "llvm/Support/Casting.h" | ||
#include "llvm/Support/FormattedStream.h" | ||
|
@@ -176,19 +177,29 @@ void AArch64TargetAsmStreamer::emitInst(uint32_t Inst) { | |
/// by MachO. Beware! | ||
class AArch64ELFStreamer : public MCELFStreamer { | ||
public: | ||
friend AArch64TargetELFStreamer; | ||
AArch64ELFStreamer(MCContext &Context, std::unique_ptr<MCAsmBackend> TAB, | ||
std::unique_ptr<MCObjectWriter> OW, | ||
std::unique_ptr<MCCodeEmitter> Emitter) | ||
: MCELFStreamer(Context, std::move(TAB), std::move(OW), | ||
std::move(Emitter)), | ||
LastEMS(EMS_None) {} | ||
LastEMS(EMS_None) { | ||
auto *TO = getContext().getTargetOptions(); | ||
ImplicitMapSyms = TO && TO->ImplicitMapSyms; | ||
} | ||
|
||
void changeSection(MCSection *Section, uint32_t Subsection = 0) override { | ||
// We have to keep track of the mapping symbol state of any sections we | ||
// use. Each one should start off as EMS_None, which is provided as the | ||
// default constructor by DenseMap::lookup. | ||
// Save the mapping symbol state for potential reuse when revisiting the | ||
// section. When ImplicitMapSyms is true, the initial state is | ||
// EMS_A64 for text sections and EMS_Data for the others. | ||
LastMappingSymbols[getCurrentSection().first] = LastEMS; | ||
LastEMS = LastMappingSymbols.lookup(Section); | ||
auto It = LastMappingSymbols.find(Section); | ||
if (It != LastMappingSymbols.end()) | ||
LastEMS = It->second; | ||
else if (ImplicitMapSyms) | ||
LastEMS = Section->isText() ? EMS_A64 : EMS_Data; | ||
else | ||
LastEMS = EMS_None; | ||
|
||
MCELFStreamer::changeSection(Section, Subsection); | ||
} | ||
|
@@ -269,13 +280,15 @@ class AArch64ELFStreamer : public MCELFStreamer { | |
LastEMS = EMS_A64; | ||
} | ||
|
||
void emitMappingSymbol(StringRef Name) { | ||
MCSymbol *emitMappingSymbol(StringRef Name) { | ||
auto *Symbol = cast<MCSymbolELF>(getContext().createLocalSymbol(Name)); | ||
emitLabel(Symbol); | ||
return Symbol; | ||
MaskRay marked this conversation as resolved.
Show resolved
Hide resolved
|
||
} | ||
|
||
DenseMap<const MCSection *, ElfMappingSymbol> LastMappingSymbols; | ||
ElfMappingSymbol LastEMS; | ||
bool ImplicitMapSyms; | ||
}; | ||
} // end anonymous namespace | ||
|
||
|
@@ -297,6 +310,58 @@ void AArch64TargetELFStreamer::finish() { | |
AArch64ELFStreamer &S = getStreamer(); | ||
MCContext &Ctx = S.getContext(); | ||
auto &Asm = S.getAssembler(); | ||
|
||
// If ImplicitMapSyms is specified, ensure that text sections end with | ||
// the A64 state while non-text sections end with the data state. When | ||
// sections are combined by the linker, the subsequent section will start with | ||
// the right state. The ending mapping symbol is added right after the last | ||
// symbol relative to the section. When a dumb linker combines (.text.0; .word | ||
// 0) and (.text.1; .word 0), the ending $x of .text.0 precedes the $d of | ||
// .text.1, even if they have the same address. | ||
if (S.ImplicitMapSyms) { | ||
auto &Syms = Asm.getSymbols(); | ||
const size_t NumSyms = Syms.size(); | ||
DenseMap<MCSection *, std::pair<size_t, MCSymbol *>> EndMapSym; | ||
for (MCSection &Sec : Asm) { | ||
S.switchSection(&Sec); | ||
if (S.LastEMS == (Sec.isText() ? AArch64ELFStreamer::EMS_Data | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think this can only happen if someone creates a named section and forgets to put "x" in the flags. It is a bit disappointing that addition of an instruction doesn't implicitly set the "x" flag on a section. It looks like both GNU as and LLVM don't do this though. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. GNU assembler doesn't warn about instructions in data sections and LLVM follow suits: I'd hope that instructions in data sections lead to a warning. |
||
: AArch64ELFStreamer::EMS_A64)) | ||
EndMapSym.insert( | ||
{&Sec, {NumSyms, S.emitMappingSymbol(Sec.isText() ? "$x" : "$d")}}); | ||
} | ||
if (Syms.size() != NumSyms) { | ||
SmallVector<const MCSymbol *, 0> NewSyms; | ||
DenseMap<MCSection *, size_t> Cnt; | ||
Syms.truncate(NumSyms); | ||
// Find the last symbol index for each candidate section. | ||
for (auto [I, Sym] : llvm::enumerate(Syms)) { | ||
if (!Sym->isInSection()) | ||
continue; | ||
auto It = EndMapSym.find(&Sym->getSection()); | ||
if (It != EndMapSym.end()) | ||
It->second.first = I; | ||
} | ||
SmallVector<size_t, 0> Idx; | ||
for (auto [I, Sym] : llvm::enumerate(Syms)) { | ||
NewSyms.push_back(Sym); | ||
if (!Sym->isInSection()) | ||
continue; | ||
auto It = EndMapSym.find(&Sym->getSection()); | ||
// If `Sym` is the last symbol relative to the section, add the ending | ||
// mapping symbol after `Sym`. | ||
if (It != EndMapSym.end() && I == It->second.first) { | ||
NewSyms.push_back(It->second.second); | ||
Idx.push_back(I); | ||
} | ||
} | ||
Syms = std::move(NewSyms); | ||
// F.second holds the number of symbols added before the FILE symbol. | ||
// Take into account the inserted mapping symbols. | ||
for (auto &F : S.getWriter().getFileNames()) | ||
F.second += llvm::lower_bound(Idx, F.second) - Idx.begin(); | ||
} | ||
} | ||
|
||
MCSectionELF *MemtagSec = nullptr; | ||
for (const MCSymbol &Symbol : Asm.symbols()) { | ||
const auto &Sym = cast<MCSymbolELF>(Symbol); | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,5 +1,10 @@ | ||
// RUN: llvm-mc -triple=aarch64 -filetype=obj %s | llvm-objdump -t - | FileCheck %s --match-full-lines | ||
// RUN: llvm-mc -triple=aarch64 -filetype=obj -implicit-mapsyms %s | llvm-objdump -t - | FileCheck %s --check-prefix=CHECK1 --match-full-lines | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Not a comment on this test. I think it would be worth a test, probably in lld, or possibly via yaml2obj to test what the disassembler does when it gets two mapping symbols at the same address. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Added lld/test/ELF/aarch64-mapsyms-implicit.s |
||
|
||
/// The test covers many state transitions. Let's use the first state and the last state to describe a section. | ||
/// .text goes through cd -> dd -> cc -> dd. | ||
/// .data goes through dd -> dc -> cd. | ||
.file "0.s" | ||
.section .text1,"ax" | ||
add w0, w0, w0 | ||
|
||
|
@@ -12,29 +17,61 @@ add w0, w0, w0 | |
.popsection | ||
|
||
.text | ||
add w1, w1, w1 | ||
.word 42 | ||
|
||
.section .text1,"ax" | ||
add w1, w1, w1 | ||
|
||
.text | ||
add w1, w1, w1 | ||
|
||
.section .data,"aw" | ||
.word 42 | ||
add w0, w0, w0 | ||
|
||
.text | ||
.word 42 | ||
|
||
## .rodata and subsequent symbols should be after the FILE symbol of "1.s". | ||
.file "1.s" | ||
.section .rodata,"a" | ||
.word 42 | ||
add w0, w0, w0 | ||
|
||
.section .data,"aw" | ||
add w0, w0, w0 | ||
.word 42 | ||
|
||
.text | ||
|
||
.ident "clang" | ||
.section ".note.GNU-stack","",@progbits | ||
|
||
// CHECK: SYMBOL TABLE: | ||
// CHECK-NEXT: 0000000000000000 l .text1 0000000000000000 $x | ||
// CHECK-NEXT: 0000000000000000 l .text 0000000000000000 $x | ||
// CHECK-NEXT: 0000000000000004 l .text 0000000000000000 $d | ||
// CHECK-NEXT: 0000000000000000 l .data 0000000000000000 $d | ||
// CHECK-NEXT: 0000000000000008 l .text 0000000000000000 $x | ||
// CHECK-NEXT: 000000000000000c l .text 0000000000000000 $d | ||
// CHECK-NEXT: 0000000000000000 l .rodata 0000000000000000 $d | ||
// CHECK-NEXT: 0000000000000004 l .rodata 0000000000000000 $x | ||
// CHECK-NEXT: 0000000000000000 l .comment 0000000000000000 $d | ||
// CHECK-NEXT: 0000000000000000 l df *ABS* 0000000000000000 0.s | ||
// CHECK-NEXT: 0000000000000000 l .text1 0000000000000000 $x | ||
// CHECK-NEXT: 0000000000000000 l .text 0000000000000000 $x | ||
// CHECK-NEXT: 0000000000000004 l .text 0000000000000000 $d | ||
// CHECK-NEXT: 0000000000000000 l .data 0000000000000000 $d | ||
// CHECK-NEXT: 000000000000000c l .text 0000000000000000 $x | ||
// CHECK-NEXT: 0000000000000008 l .data 0000000000000000 $x | ||
// CHECK-NEXT: 0000000000000010 l .text 0000000000000000 $d | ||
// CHECK-NEXT: 0000000000000000 l df *ABS* 0000000000000000 1.s | ||
// CHECK-NEXT: 0000000000000000 l .rodata 0000000000000000 $d | ||
// CHECK-NEXT: 0000000000000004 l .rodata 0000000000000000 $x | ||
// CHECK-NEXT: 0000000000000010 l .data 0000000000000000 $d | ||
// CHECK-NEXT: 0000000000000000 l .comment 0000000000000000 $d | ||
// CHECK-NOT: {{.}} | ||
|
||
// CHECK1: SYMBOL TABLE: | ||
// CHECK1-NEXT: 0000000000000000 l df *ABS* 0000000000000000 0.s | ||
// CHECK1-NEXT: 0000000000000004 l .text 0000000000000000 $d | ||
// CHECK1-NEXT: 000000000000000c l .text 0000000000000000 $x | ||
// CHECK1-NEXT: 0000000000000008 l .data 0000000000000000 $x | ||
// CHECK1-NEXT: 0000000000000010 l .text 0000000000000000 $d | ||
// CHECK1-NEXT: 0000000000000014 l .text 0000000000000000 $x | ||
// CHECK1-NEXT: 0000000000000000 l df *ABS* 0000000000000000 1.s | ||
// CHECK1-NEXT: 0000000000000004 l .rodata 0000000000000000 $x | ||
// CHECK1-NEXT: 0000000000000008 l .rodata 0000000000000000 $d | ||
// CHECK1-NEXT: 0000000000000010 l .data 0000000000000000 $d | ||
// CHECK1-NOT: {{.}} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This points at an issue in llvm-objdump. It could be sorting symbols at the same address by name.
Looking at
llvm-readelf --symbols
for 0x201134 I can see $x before $d as we would expect, however llvm-objdump has put the $d before $x.This is unfortunate as it means every trailing
$x
is going to result in an incorrect disassembly when there's a leading$d
.