2
0

MachObjectWriter.cpp 34 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953
  1. //===- lib/MC/MachObjectWriter.cpp - Mach-O File Writer -------------------===//
  2. //
  3. // The LLVM Compiler Infrastructure
  4. //
  5. // This file is distributed under the University of Illinois Open Source
  6. // License. See LICENSE.TXT for details.
  7. //
  8. //===----------------------------------------------------------------------===//
  9. #include "llvm/MC/MCMachObjectWriter.h"
  10. #include "llvm/ADT/StringMap.h"
  11. #include "llvm/ADT/Twine.h"
  12. #include "llvm/MC/MCAsmBackend.h"
  13. #include "llvm/MC/MCAsmLayout.h"
  14. #include "llvm/MC/MCAssembler.h"
  15. #include "llvm/MC/MCExpr.h"
  16. #include "llvm/MC/MCFixupKindInfo.h"
  17. #include "llvm/MC/MCObjectWriter.h"
  18. #include "llvm/MC/MCSectionMachO.h"
  19. #include "llvm/MC/MCSymbolMachO.h"
  20. #include "llvm/MC/MCValue.h"
  21. #include "llvm/Support/Debug.h"
  22. #include "llvm/Support/ErrorHandling.h"
  23. #include "llvm/Support/MachO.h"
  24. #include "llvm/Support/raw_ostream.h"
  25. #include <vector>
  26. using namespace llvm;
  27. #define DEBUG_TYPE "mc"
  28. void MachObjectWriter::reset() {
  29. Relocations.clear();
  30. IndirectSymBase.clear();
  31. StringTable.clear();
  32. LocalSymbolData.clear();
  33. ExternalSymbolData.clear();
  34. UndefinedSymbolData.clear();
  35. MCObjectWriter::reset();
  36. }
  37. bool MachObjectWriter::doesSymbolRequireExternRelocation(const MCSymbol &S) {
  38. // Undefined symbols are always extern.
  39. if (S.isUndefined())
  40. return true;
  41. // References to weak definitions require external relocation entries; the
  42. // definition may not always be the one in the same object file.
  43. if (cast<MCSymbolMachO>(S).isWeakDefinition())
  44. return true;
  45. // Otherwise, we can use an internal relocation.
  46. return false;
  47. }
  48. bool MachObjectWriter::
  49. MachSymbolData::operator<(const MachSymbolData &RHS) const {
  50. return Symbol->getName() < RHS.Symbol->getName();
  51. }
  52. bool MachObjectWriter::isFixupKindPCRel(const MCAssembler &Asm, unsigned Kind) {
  53. const MCFixupKindInfo &FKI = Asm.getBackend().getFixupKindInfo(
  54. (MCFixupKind) Kind);
  55. return FKI.Flags & MCFixupKindInfo::FKF_IsPCRel;
  56. }
  57. uint64_t MachObjectWriter::getFragmentAddress(const MCFragment *Fragment,
  58. const MCAsmLayout &Layout) const {
  59. return getSectionAddress(Fragment->getParent()) +
  60. Layout.getFragmentOffset(Fragment);
  61. }
  62. uint64_t MachObjectWriter::getSymbolAddress(const MCSymbol &S,
  63. const MCAsmLayout &Layout) const {
  64. // If this is a variable, then recursively evaluate now.
  65. if (S.isVariable()) {
  66. if (const MCConstantExpr *C =
  67. dyn_cast<const MCConstantExpr>(S.getVariableValue()))
  68. return C->getValue();
  69. MCValue Target;
  70. if (!S.getVariableValue()->evaluateAsRelocatable(Target, &Layout, nullptr))
  71. report_fatal_error("unable to evaluate offset for variable '" +
  72. S.getName() + "'");
  73. // Verify that any used symbols are defined.
  74. if (Target.getSymA() && Target.getSymA()->getSymbol().isUndefined())
  75. report_fatal_error("unable to evaluate offset to undefined symbol '" +
  76. Target.getSymA()->getSymbol().getName() + "'");
  77. if (Target.getSymB() && Target.getSymB()->getSymbol().isUndefined())
  78. report_fatal_error("unable to evaluate offset to undefined symbol '" +
  79. Target.getSymB()->getSymbol().getName() + "'");
  80. uint64_t Address = Target.getConstant();
  81. if (Target.getSymA())
  82. Address += getSymbolAddress(Target.getSymA()->getSymbol(), Layout);
  83. if (Target.getSymB())
  84. Address += getSymbolAddress(Target.getSymB()->getSymbol(), Layout);
  85. return Address;
  86. }
  87. return getSectionAddress(S.getFragment()->getParent()) +
  88. Layout.getSymbolOffset(S);
  89. }
  90. uint64_t MachObjectWriter::getPaddingSize(const MCSection *Sec,
  91. const MCAsmLayout &Layout) const {
  92. uint64_t EndAddr = getSectionAddress(Sec) + Layout.getSectionAddressSize(Sec);
  93. unsigned Next = Sec->getLayoutOrder() + 1;
  94. if (Next >= Layout.getSectionOrder().size())
  95. return 0;
  96. const MCSection &NextSec = *Layout.getSectionOrder()[Next];
  97. if (NextSec.isVirtualSection())
  98. return 0;
  99. return OffsetToAlignment(EndAddr, NextSec.getAlignment());
  100. }
  101. void MachObjectWriter::writeHeader(unsigned NumLoadCommands,
  102. unsigned LoadCommandsSize,
  103. bool SubsectionsViaSymbols) {
  104. uint32_t Flags = 0;
  105. if (SubsectionsViaSymbols)
  106. Flags |= MachO::MH_SUBSECTIONS_VIA_SYMBOLS;
  107. // struct mach_header (28 bytes) or
  108. // struct mach_header_64 (32 bytes)
  109. uint64_t Start = OS.tell();
  110. (void) Start;
  111. write32(is64Bit() ? MachO::MH_MAGIC_64 : MachO::MH_MAGIC);
  112. write32(TargetObjectWriter->getCPUType());
  113. write32(TargetObjectWriter->getCPUSubtype());
  114. write32(MachO::MH_OBJECT);
  115. write32(NumLoadCommands);
  116. write32(LoadCommandsSize);
  117. write32(Flags);
  118. if (is64Bit())
  119. write32(0); // reserved
  120. assert(OS.tell() - Start ==
  121. (is64Bit()?sizeof(MachO::mach_header_64): sizeof(MachO::mach_header)));
  122. }
  123. /// writeSegmentLoadCommand - Write a segment load command.
  124. ///
  125. /// \param NumSections The number of sections in this segment.
  126. /// \param SectionDataSize The total size of the sections.
  127. void MachObjectWriter::writeSegmentLoadCommand(unsigned NumSections,
  128. uint64_t VMSize,
  129. uint64_t SectionDataStartOffset,
  130. uint64_t SectionDataSize) {
  131. // struct segment_command (56 bytes) or
  132. // struct segment_command_64 (72 bytes)
  133. uint64_t Start = OS.tell();
  134. (void) Start;
  135. unsigned SegmentLoadCommandSize =
  136. is64Bit() ? sizeof(MachO::segment_command_64):
  137. sizeof(MachO::segment_command);
  138. write32(is64Bit() ? MachO::LC_SEGMENT_64 : MachO::LC_SEGMENT);
  139. write32(SegmentLoadCommandSize +
  140. NumSections * (is64Bit() ? sizeof(MachO::section_64) :
  141. sizeof(MachO::section)));
  142. writeBytes("", 16);
  143. if (is64Bit()) {
  144. write64(0); // vmaddr
  145. write64(VMSize); // vmsize
  146. write64(SectionDataStartOffset); // file offset
  147. write64(SectionDataSize); // file size
  148. } else {
  149. write32(0); // vmaddr
  150. write32(VMSize); // vmsize
  151. write32(SectionDataStartOffset); // file offset
  152. write32(SectionDataSize); // file size
  153. }
  154. // maxprot
  155. write32(MachO::VM_PROT_READ | MachO::VM_PROT_WRITE | MachO::VM_PROT_EXECUTE);
  156. // initprot
  157. write32(MachO::VM_PROT_READ | MachO::VM_PROT_WRITE | MachO::VM_PROT_EXECUTE);
  158. write32(NumSections);
  159. write32(0); // flags
  160. assert(OS.tell() - Start == SegmentLoadCommandSize);
  161. }
  162. void MachObjectWriter::writeSection(const MCAssembler &Asm,
  163. const MCAsmLayout &Layout,
  164. const MCSection &Sec, uint64_t FileOffset,
  165. uint64_t RelocationsStart,
  166. unsigned NumRelocations) {
  167. uint64_t SectionSize = Layout.getSectionAddressSize(&Sec);
  168. const MCSectionMachO &Section = cast<MCSectionMachO>(Sec);
  169. // The offset is unused for virtual sections.
  170. if (Section.isVirtualSection()) {
  171. assert(Layout.getSectionFileSize(&Sec) == 0 && "Invalid file size!");
  172. FileOffset = 0;
  173. }
  174. // struct section (68 bytes) or
  175. // struct section_64 (80 bytes)
  176. uint64_t Start = OS.tell();
  177. (void) Start;
  178. writeBytes(Section.getSectionName(), 16);
  179. writeBytes(Section.getSegmentName(), 16);
  180. if (is64Bit()) {
  181. write64(getSectionAddress(&Sec)); // address
  182. write64(SectionSize); // size
  183. } else {
  184. write32(getSectionAddress(&Sec)); // address
  185. write32(SectionSize); // size
  186. }
  187. write32(FileOffset);
  188. unsigned Flags = Section.getTypeAndAttributes();
  189. if (Section.hasInstructions())
  190. Flags |= MachO::S_ATTR_SOME_INSTRUCTIONS;
  191. assert(isPowerOf2_32(Section.getAlignment()) && "Invalid alignment!");
  192. write32(Log2_32(Section.getAlignment()));
  193. write32(NumRelocations ? RelocationsStart : 0);
  194. write32(NumRelocations);
  195. write32(Flags);
  196. write32(IndirectSymBase.lookup(&Sec)); // reserved1
  197. write32(Section.getStubSize()); // reserved2
  198. if (is64Bit())
  199. write32(0); // reserved3
  200. assert(OS.tell() - Start == (is64Bit() ? sizeof(MachO::section_64) :
  201. sizeof(MachO::section)));
  202. }
  203. void MachObjectWriter::writeSymtabLoadCommand(uint32_t SymbolOffset,
  204. uint32_t NumSymbols,
  205. uint32_t StringTableOffset,
  206. uint32_t StringTableSize) {
  207. // struct symtab_command (24 bytes)
  208. uint64_t Start = OS.tell();
  209. (void) Start;
  210. write32(MachO::LC_SYMTAB);
  211. write32(sizeof(MachO::symtab_command));
  212. write32(SymbolOffset);
  213. write32(NumSymbols);
  214. write32(StringTableOffset);
  215. write32(StringTableSize);
  216. assert(OS.tell() - Start == sizeof(MachO::symtab_command));
  217. }
  218. void MachObjectWriter::writeDysymtabLoadCommand(uint32_t FirstLocalSymbol,
  219. uint32_t NumLocalSymbols,
  220. uint32_t FirstExternalSymbol,
  221. uint32_t NumExternalSymbols,
  222. uint32_t FirstUndefinedSymbol,
  223. uint32_t NumUndefinedSymbols,
  224. uint32_t IndirectSymbolOffset,
  225. uint32_t NumIndirectSymbols) {
  226. // struct dysymtab_command (80 bytes)
  227. uint64_t Start = OS.tell();
  228. (void) Start;
  229. write32(MachO::LC_DYSYMTAB);
  230. write32(sizeof(MachO::dysymtab_command));
  231. write32(FirstLocalSymbol);
  232. write32(NumLocalSymbols);
  233. write32(FirstExternalSymbol);
  234. write32(NumExternalSymbols);
  235. write32(FirstUndefinedSymbol);
  236. write32(NumUndefinedSymbols);
  237. write32(0); // tocoff
  238. write32(0); // ntoc
  239. write32(0); // modtaboff
  240. write32(0); // nmodtab
  241. write32(0); // extrefsymoff
  242. write32(0); // nextrefsyms
  243. write32(IndirectSymbolOffset);
  244. write32(NumIndirectSymbols);
  245. write32(0); // extreloff
  246. write32(0); // nextrel
  247. write32(0); // locreloff
  248. write32(0); // nlocrel
  249. assert(OS.tell() - Start == sizeof(MachO::dysymtab_command));
  250. }
  251. MachObjectWriter::MachSymbolData *
  252. MachObjectWriter::findSymbolData(const MCSymbol &Sym) {
  253. for (auto *SymbolData :
  254. {&LocalSymbolData, &ExternalSymbolData, &UndefinedSymbolData})
  255. for (MachSymbolData &Entry : *SymbolData)
  256. if (Entry.Symbol == &Sym)
  257. return &Entry;
  258. return nullptr;
  259. }
  260. const MCSymbol &MachObjectWriter::findAliasedSymbol(const MCSymbol &Sym) const {
  261. const MCSymbol *S = &Sym;
  262. while (S->isVariable()) {
  263. const MCExpr *Value = S->getVariableValue();
  264. const auto *Ref = dyn_cast<MCSymbolRefExpr>(Value);
  265. if (!Ref)
  266. return *S;
  267. S = &Ref->getSymbol();
  268. }
  269. return *S;
  270. }
  271. void MachObjectWriter::writeNlist(MachSymbolData &MSD,
  272. const MCAsmLayout &Layout) {
  273. const MCSymbol *Symbol = MSD.Symbol;
  274. const MCSymbol &Data = *Symbol;
  275. const MCSymbol *AliasedSymbol = &findAliasedSymbol(*Symbol);
  276. uint8_t SectionIndex = MSD.SectionIndex;
  277. uint8_t Type = 0;
  278. uint64_t Address = 0;
  279. bool IsAlias = Symbol != AliasedSymbol;
  280. const MCSymbol &OrigSymbol = *Symbol;
  281. MachSymbolData *AliaseeInfo;
  282. if (IsAlias) {
  283. AliaseeInfo = findSymbolData(*AliasedSymbol);
  284. if (AliaseeInfo)
  285. SectionIndex = AliaseeInfo->SectionIndex;
  286. Symbol = AliasedSymbol;
  287. // FIXME: Should this update Data as well? Do we need OrigSymbol at all?
  288. }
  289. // Set the N_TYPE bits. See <mach-o/nlist.h>.
  290. //
  291. // FIXME: Are the prebound or indirect fields possible here?
  292. if (IsAlias && Symbol->isUndefined())
  293. Type = MachO::N_INDR;
  294. else if (Symbol->isUndefined())
  295. Type = MachO::N_UNDF;
  296. else if (Symbol->isAbsolute())
  297. Type = MachO::N_ABS;
  298. else
  299. Type = MachO::N_SECT;
  300. // FIXME: Set STAB bits.
  301. if (Data.isPrivateExtern())
  302. Type |= MachO::N_PEXT;
  303. // Set external bit.
  304. if (Data.isExternal() || (!IsAlias && Symbol->isUndefined()))
  305. Type |= MachO::N_EXT;
  306. // Compute the symbol address.
  307. if (IsAlias && Symbol->isUndefined())
  308. Address = AliaseeInfo->StringIndex;
  309. else if (Symbol->isDefined())
  310. Address = getSymbolAddress(OrigSymbol, Layout);
  311. else if (Symbol->isCommon()) {
  312. // Common symbols are encoded with the size in the address
  313. // field, and their alignment in the flags.
  314. Address = Symbol->getCommonSize();
  315. }
  316. // struct nlist (12 bytes)
  317. write32(MSD.StringIndex);
  318. write8(Type);
  319. write8(SectionIndex);
  320. // The Mach-O streamer uses the lowest 16-bits of the flags for the 'desc'
  321. // value.
  322. write16(cast<MCSymbolMachO>(Symbol)->getEncodedFlags());
  323. if (is64Bit())
  324. write64(Address);
  325. else
  326. write32(Address);
  327. }
  328. void MachObjectWriter::writeLinkeditLoadCommand(uint32_t Type,
  329. uint32_t DataOffset,
  330. uint32_t DataSize) {
  331. uint64_t Start = OS.tell();
  332. (void) Start;
  333. write32(Type);
  334. write32(sizeof(MachO::linkedit_data_command));
  335. write32(DataOffset);
  336. write32(DataSize);
  337. assert(OS.tell() - Start == sizeof(MachO::linkedit_data_command));
  338. }
  339. static unsigned ComputeLinkerOptionsLoadCommandSize(
  340. const std::vector<std::string> &Options, bool is64Bit)
  341. {
  342. unsigned Size = sizeof(MachO::linker_option_command);
  343. for (const std::string &Option : Options)
  344. Size += Option.size() + 1;
  345. return RoundUpToAlignment(Size, is64Bit ? 8 : 4);
  346. }
  347. void MachObjectWriter::writeLinkerOptionsLoadCommand(
  348. const std::vector<std::string> &Options)
  349. {
  350. unsigned Size = ComputeLinkerOptionsLoadCommandSize(Options, is64Bit());
  351. uint64_t Start = OS.tell();
  352. (void) Start;
  353. write32(MachO::LC_LINKER_OPTION);
  354. write32(Size);
  355. write32(Options.size());
  356. uint64_t BytesWritten = sizeof(MachO::linker_option_command);
  357. for (const std::string &Option : Options) {
  358. // Write each string, including the null byte.
  359. writeBytes(Option.c_str(), Option.size() + 1);
  360. BytesWritten += Option.size() + 1;
  361. }
  362. // Pad to a multiple of the pointer size.
  363. writeBytes("", OffsetToAlignment(BytesWritten, is64Bit() ? 8 : 4));
  364. assert(OS.tell() - Start == Size);
  365. }
  366. void MachObjectWriter::recordRelocation(MCAssembler &Asm,
  367. const MCAsmLayout &Layout,
  368. const MCFragment *Fragment,
  369. const MCFixup &Fixup, MCValue Target,
  370. bool &IsPCRel, uint64_t &FixedValue) {
  371. TargetObjectWriter->recordRelocation(this, Asm, Layout, Fragment, Fixup,
  372. Target, FixedValue);
  373. }
  374. void MachObjectWriter::bindIndirectSymbols(MCAssembler &Asm) {
  375. // This is the point where 'as' creates actual symbols for indirect symbols
  376. // (in the following two passes). It would be easier for us to do this sooner
  377. // when we see the attribute, but that makes getting the order in the symbol
  378. // table much more complicated than it is worth.
  379. //
  380. // FIXME: Revisit this when the dust settles.
  381. // Report errors for use of .indirect_symbol not in a symbol pointer section
  382. // or stub section.
  383. for (MCAssembler::indirect_symbol_iterator it = Asm.indirect_symbol_begin(),
  384. ie = Asm.indirect_symbol_end(); it != ie; ++it) {
  385. const MCSectionMachO &Section = cast<MCSectionMachO>(*it->Section);
  386. if (Section.getType() != MachO::S_NON_LAZY_SYMBOL_POINTERS &&
  387. Section.getType() != MachO::S_LAZY_SYMBOL_POINTERS &&
  388. Section.getType() != MachO::S_SYMBOL_STUBS) {
  389. MCSymbol &Symbol = *it->Symbol;
  390. report_fatal_error("indirect symbol '" + Symbol.getName() +
  391. "' not in a symbol pointer or stub section");
  392. }
  393. }
  394. // Bind non-lazy symbol pointers first.
  395. unsigned IndirectIndex = 0;
  396. for (MCAssembler::indirect_symbol_iterator it = Asm.indirect_symbol_begin(),
  397. ie = Asm.indirect_symbol_end(); it != ie; ++it, ++IndirectIndex) {
  398. const MCSectionMachO &Section = cast<MCSectionMachO>(*it->Section);
  399. if (Section.getType() != MachO::S_NON_LAZY_SYMBOL_POINTERS)
  400. continue;
  401. // Initialize the section indirect symbol base, if necessary.
  402. IndirectSymBase.insert(std::make_pair(it->Section, IndirectIndex));
  403. Asm.registerSymbol(*it->Symbol);
  404. }
  405. // Then lazy symbol pointers and symbol stubs.
  406. IndirectIndex = 0;
  407. for (MCAssembler::indirect_symbol_iterator it = Asm.indirect_symbol_begin(),
  408. ie = Asm.indirect_symbol_end(); it != ie; ++it, ++IndirectIndex) {
  409. const MCSectionMachO &Section = cast<MCSectionMachO>(*it->Section);
  410. if (Section.getType() != MachO::S_LAZY_SYMBOL_POINTERS &&
  411. Section.getType() != MachO::S_SYMBOL_STUBS)
  412. continue;
  413. // Initialize the section indirect symbol base, if necessary.
  414. IndirectSymBase.insert(std::make_pair(it->Section, IndirectIndex));
  415. // Set the symbol type to undefined lazy, but only on construction.
  416. //
  417. // FIXME: Do not hardcode.
  418. bool Created;
  419. Asm.registerSymbol(*it->Symbol, &Created);
  420. if (Created)
  421. cast<MCSymbolMachO>(it->Symbol)->setReferenceTypeUndefinedLazy(true);
  422. }
  423. }
  424. /// computeSymbolTable - Compute the symbol table data
  425. void MachObjectWriter::computeSymbolTable(
  426. MCAssembler &Asm, std::vector<MachSymbolData> &LocalSymbolData,
  427. std::vector<MachSymbolData> &ExternalSymbolData,
  428. std::vector<MachSymbolData> &UndefinedSymbolData) {
  429. // Build section lookup table.
  430. DenseMap<const MCSection*, uint8_t> SectionIndexMap;
  431. unsigned Index = 1;
  432. for (MCAssembler::iterator it = Asm.begin(),
  433. ie = Asm.end(); it != ie; ++it, ++Index)
  434. SectionIndexMap[&*it] = Index;
  435. assert(Index <= 256 && "Too many sections!");
  436. // Build the string table.
  437. for (const MCSymbol &Symbol : Asm.symbols()) {
  438. if (!Asm.isSymbolLinkerVisible(Symbol))
  439. continue;
  440. StringTable.add(Symbol.getName());
  441. }
  442. StringTable.finalize(StringTableBuilder::MachO);
  443. // Build the symbol arrays but only for non-local symbols.
  444. //
  445. // The particular order that we collect and then sort the symbols is chosen to
  446. // match 'as'. Even though it doesn't matter for correctness, this is
  447. // important for letting us diff .o files.
  448. for (const MCSymbol &Symbol : Asm.symbols()) {
  449. // Ignore non-linker visible symbols.
  450. if (!Asm.isSymbolLinkerVisible(Symbol))
  451. continue;
  452. if (!Symbol.isExternal() && !Symbol.isUndefined())
  453. continue;
  454. MachSymbolData MSD;
  455. MSD.Symbol = &Symbol;
  456. MSD.StringIndex = StringTable.getOffset(Symbol.getName());
  457. if (Symbol.isUndefined()) {
  458. MSD.SectionIndex = 0;
  459. UndefinedSymbolData.push_back(MSD);
  460. } else if (Symbol.isAbsolute()) {
  461. MSD.SectionIndex = 0;
  462. ExternalSymbolData.push_back(MSD);
  463. } else {
  464. MSD.SectionIndex = SectionIndexMap.lookup(&Symbol.getSection());
  465. assert(MSD.SectionIndex && "Invalid section index!");
  466. ExternalSymbolData.push_back(MSD);
  467. }
  468. }
  469. // Now add the data for local symbols.
  470. for (const MCSymbol &Symbol : Asm.symbols()) {
  471. // Ignore non-linker visible symbols.
  472. if (!Asm.isSymbolLinkerVisible(Symbol))
  473. continue;
  474. if (Symbol.isExternal() || Symbol.isUndefined())
  475. continue;
  476. MachSymbolData MSD;
  477. MSD.Symbol = &Symbol;
  478. MSD.StringIndex = StringTable.getOffset(Symbol.getName());
  479. if (Symbol.isAbsolute()) {
  480. MSD.SectionIndex = 0;
  481. LocalSymbolData.push_back(MSD);
  482. } else {
  483. MSD.SectionIndex = SectionIndexMap.lookup(&Symbol.getSection());
  484. assert(MSD.SectionIndex && "Invalid section index!");
  485. LocalSymbolData.push_back(MSD);
  486. }
  487. }
  488. // External and undefined symbols are required to be in lexicographic order.
  489. std::sort(ExternalSymbolData.begin(), ExternalSymbolData.end());
  490. std::sort(UndefinedSymbolData.begin(), UndefinedSymbolData.end());
  491. // Set the symbol indices.
  492. Index = 0;
  493. for (auto *SymbolData :
  494. {&LocalSymbolData, &ExternalSymbolData, &UndefinedSymbolData})
  495. for (MachSymbolData &Entry : *SymbolData)
  496. Entry.Symbol->setIndex(Index++);
  497. for (const MCSection &Section : Asm) {
  498. for (RelAndSymbol &Rel : Relocations[&Section]) {
  499. if (!Rel.Sym)
  500. continue;
  501. // Set the Index and the IsExtern bit.
  502. unsigned Index = Rel.Sym->getIndex();
  503. assert(isInt<24>(Index));
  504. if (IsLittleEndian)
  505. Rel.MRE.r_word1 = (Rel.MRE.r_word1 & (~0U << 24)) | Index | (1 << 27);
  506. else
  507. Rel.MRE.r_word1 = (Rel.MRE.r_word1 & 0xff) | Index << 8 | (1 << 4);
  508. }
  509. }
  510. }
  511. void MachObjectWriter::computeSectionAddresses(const MCAssembler &Asm,
  512. const MCAsmLayout &Layout) {
  513. uint64_t StartAddress = 0;
  514. for (const MCSection *Sec : Layout.getSectionOrder()) {
  515. StartAddress = RoundUpToAlignment(StartAddress, Sec->getAlignment());
  516. SectionAddress[Sec] = StartAddress;
  517. StartAddress += Layout.getSectionAddressSize(Sec);
  518. // Explicitly pad the section to match the alignment requirements of the
  519. // following one. This is for 'gas' compatibility, it shouldn't
  520. /// strictly be necessary.
  521. StartAddress += getPaddingSize(Sec, Layout);
  522. }
  523. }
  524. void MachObjectWriter::executePostLayoutBinding(MCAssembler &Asm,
  525. const MCAsmLayout &Layout) {
  526. computeSectionAddresses(Asm, Layout);
  527. // Create symbol data for any indirect symbols.
  528. bindIndirectSymbols(Asm);
  529. }
  530. bool MachObjectWriter::isSymbolRefDifferenceFullyResolvedImpl(
  531. const MCAssembler &Asm, const MCSymbol &SymA, const MCFragment &FB,
  532. bool InSet, bool IsPCRel) const {
  533. if (InSet)
  534. return true;
  535. // The effective address is
  536. // addr(atom(A)) + offset(A)
  537. // - addr(atom(B)) - offset(B)
  538. // and the offsets are not relocatable, so the fixup is fully resolved when
  539. // addr(atom(A)) - addr(atom(B)) == 0.
  540. const MCSymbol &SA = findAliasedSymbol(SymA);
  541. const MCSection &SecA = SA.getSection();
  542. const MCSection &SecB = *FB.getParent();
  543. if (IsPCRel) {
  544. // The simple (Darwin, except on x86_64) way of dealing with this was to
  545. // assume that any reference to a temporary symbol *must* be a temporary
  546. // symbol in the same atom, unless the sections differ. Therefore, any PCrel
  547. // relocation to a temporary symbol (in the same section) is fully
  548. // resolved. This also works in conjunction with absolutized .set, which
  549. // requires the compiler to use .set to absolutize the differences between
  550. // symbols which the compiler knows to be assembly time constants, so we
  551. // don't need to worry about considering symbol differences fully resolved.
  552. //
  553. // If the file isn't using sub-sections-via-symbols, we can make the
  554. // same assumptions about any symbol that we normally make about
  555. // assembler locals.
  556. bool hasReliableSymbolDifference = isX86_64();
  557. if (!hasReliableSymbolDifference) {
  558. if (!SA.isInSection() || &SecA != &SecB ||
  559. (!SA.isTemporary() && FB.getAtom() != SA.getFragment()->getAtom() &&
  560. Asm.getSubsectionsViaSymbols()))
  561. return false;
  562. return true;
  563. }
  564. // For Darwin x86_64, there is one special case when the reference IsPCRel.
  565. // If the fragment with the reference does not have a base symbol but meets
  566. // the simple way of dealing with this, in that it is a temporary symbol in
  567. // the same atom then it is assumed to be fully resolved. This is needed so
  568. // a relocation entry is not created and so the static linker does not
  569. // mess up the reference later.
  570. else if(!FB.getAtom() &&
  571. SA.isTemporary() && SA.isInSection() && &SecA == &SecB){
  572. return true;
  573. }
  574. }
  575. // If they are not in the same section, we can't compute the diff.
  576. if (&SecA != &SecB)
  577. return false;
  578. const MCFragment *FA = SA.getFragment();
  579. // Bail if the symbol has no fragment.
  580. if (!FA)
  581. return false;
  582. // If the atoms are the same, they are guaranteed to have the same address.
  583. if (FA->getAtom() == FB.getAtom())
  584. return true;
  585. // Otherwise, we can't prove this is fully resolved.
  586. return false;
  587. }
  588. void MachObjectWriter::writeObject(MCAssembler &Asm,
  589. const MCAsmLayout &Layout) {
  590. // Compute symbol table information and bind symbol indices.
  591. computeSymbolTable(Asm, LocalSymbolData, ExternalSymbolData,
  592. UndefinedSymbolData);
  593. unsigned NumSections = Asm.size();
  594. const MCAssembler::VersionMinInfoType &VersionInfo =
  595. Layout.getAssembler().getVersionMinInfo();
  596. // The section data starts after the header, the segment load command (and
  597. // section headers) and the symbol table.
  598. unsigned NumLoadCommands = 1;
  599. uint64_t LoadCommandsSize = is64Bit() ?
  600. sizeof(MachO::segment_command_64) + NumSections * sizeof(MachO::section_64):
  601. sizeof(MachO::segment_command) + NumSections * sizeof(MachO::section);
  602. // Add the deployment target version info load command size, if used.
  603. if (VersionInfo.Major != 0) {
  604. ++NumLoadCommands;
  605. LoadCommandsSize += sizeof(MachO::version_min_command);
  606. }
  607. // Add the data-in-code load command size, if used.
  608. unsigned NumDataRegions = Asm.getDataRegions().size();
  609. if (NumDataRegions) {
  610. ++NumLoadCommands;
  611. LoadCommandsSize += sizeof(MachO::linkedit_data_command);
  612. }
  613. // Add the loh load command size, if used.
  614. uint64_t LOHRawSize = Asm.getLOHContainer().getEmitSize(*this, Layout);
  615. uint64_t LOHSize = RoundUpToAlignment(LOHRawSize, is64Bit() ? 8 : 4);
  616. if (LOHSize) {
  617. ++NumLoadCommands;
  618. LoadCommandsSize += sizeof(MachO::linkedit_data_command);
  619. }
  620. // Add the symbol table load command sizes, if used.
  621. unsigned NumSymbols = LocalSymbolData.size() + ExternalSymbolData.size() +
  622. UndefinedSymbolData.size();
  623. if (NumSymbols) {
  624. NumLoadCommands += 2;
  625. LoadCommandsSize += (sizeof(MachO::symtab_command) +
  626. sizeof(MachO::dysymtab_command));
  627. }
  628. // Add the linker option load commands sizes.
  629. for (const auto &Option : Asm.getLinkerOptions()) {
  630. ++NumLoadCommands;
  631. LoadCommandsSize += ComputeLinkerOptionsLoadCommandSize(Option, is64Bit());
  632. }
  633. // Compute the total size of the section data, as well as its file size and vm
  634. // size.
  635. uint64_t SectionDataStart = (is64Bit() ? sizeof(MachO::mach_header_64) :
  636. sizeof(MachO::mach_header)) + LoadCommandsSize;
  637. uint64_t SectionDataSize = 0;
  638. uint64_t SectionDataFileSize = 0;
  639. uint64_t VMSize = 0;
  640. for (const MCSection &Sec : Asm) {
  641. uint64_t Address = getSectionAddress(&Sec);
  642. uint64_t Size = Layout.getSectionAddressSize(&Sec);
  643. uint64_t FileSize = Layout.getSectionFileSize(&Sec);
  644. FileSize += getPaddingSize(&Sec, Layout);
  645. VMSize = std::max(VMSize, Address + Size);
  646. if (Sec.isVirtualSection())
  647. continue;
  648. SectionDataSize = std::max(SectionDataSize, Address + Size);
  649. SectionDataFileSize = std::max(SectionDataFileSize, Address + FileSize);
  650. }
  651. // The section data is padded to 4 bytes.
  652. //
  653. // FIXME: Is this machine dependent?
  654. unsigned SectionDataPadding = OffsetToAlignment(SectionDataFileSize, 4);
  655. SectionDataFileSize += SectionDataPadding;
  656. // Write the prolog, starting with the header and load command...
  657. writeHeader(NumLoadCommands, LoadCommandsSize,
  658. Asm.getSubsectionsViaSymbols());
  659. writeSegmentLoadCommand(NumSections, VMSize,
  660. SectionDataStart, SectionDataSize);
  661. // ... and then the section headers.
  662. uint64_t RelocTableEnd = SectionDataStart + SectionDataFileSize;
  663. for (const MCSection &Sec : Asm) {
  664. std::vector<RelAndSymbol> &Relocs = Relocations[&Sec];
  665. unsigned NumRelocs = Relocs.size();
  666. uint64_t SectionStart = SectionDataStart + getSectionAddress(&Sec);
  667. writeSection(Asm, Layout, Sec, SectionStart, RelocTableEnd, NumRelocs);
  668. RelocTableEnd += NumRelocs * sizeof(MachO::any_relocation_info);
  669. }
  670. // Write out the deployment target information, if it's available.
  671. if (VersionInfo.Major != 0) {
  672. assert(VersionInfo.Update < 256 && "unencodable update target version");
  673. assert(VersionInfo.Minor < 256 && "unencodable minor target version");
  674. assert(VersionInfo.Major < 65536 && "unencodable major target version");
  675. uint32_t EncodedVersion = VersionInfo.Update | (VersionInfo.Minor << 8) |
  676. (VersionInfo.Major << 16);
  677. write32(VersionInfo.Kind == MCVM_OSXVersionMin ? MachO::LC_VERSION_MIN_MACOSX :
  678. MachO::LC_VERSION_MIN_IPHONEOS);
  679. write32(sizeof(MachO::version_min_command));
  680. write32(EncodedVersion);
  681. write32(0); // reserved.
  682. }
  683. // Write the data-in-code load command, if used.
  684. uint64_t DataInCodeTableEnd = RelocTableEnd + NumDataRegions * 8;
  685. if (NumDataRegions) {
  686. uint64_t DataRegionsOffset = RelocTableEnd;
  687. uint64_t DataRegionsSize = NumDataRegions * 8;
  688. writeLinkeditLoadCommand(MachO::LC_DATA_IN_CODE, DataRegionsOffset,
  689. DataRegionsSize);
  690. }
  691. // Write the loh load command, if used.
  692. uint64_t LOHTableEnd = DataInCodeTableEnd + LOHSize;
  693. if (LOHSize)
  694. writeLinkeditLoadCommand(MachO::LC_LINKER_OPTIMIZATION_HINT,
  695. DataInCodeTableEnd, LOHSize);
  696. // Write the symbol table load command, if used.
  697. if (NumSymbols) {
  698. unsigned FirstLocalSymbol = 0;
  699. unsigned NumLocalSymbols = LocalSymbolData.size();
  700. unsigned FirstExternalSymbol = FirstLocalSymbol + NumLocalSymbols;
  701. unsigned NumExternalSymbols = ExternalSymbolData.size();
  702. unsigned FirstUndefinedSymbol = FirstExternalSymbol + NumExternalSymbols;
  703. unsigned NumUndefinedSymbols = UndefinedSymbolData.size();
  704. unsigned NumIndirectSymbols = Asm.indirect_symbol_size();
  705. unsigned NumSymTabSymbols =
  706. NumLocalSymbols + NumExternalSymbols + NumUndefinedSymbols;
  707. uint64_t IndirectSymbolSize = NumIndirectSymbols * 4;
  708. uint64_t IndirectSymbolOffset = 0;
  709. // If used, the indirect symbols are written after the section data.
  710. if (NumIndirectSymbols)
  711. IndirectSymbolOffset = LOHTableEnd;
  712. // The symbol table is written after the indirect symbol data.
  713. uint64_t SymbolTableOffset = LOHTableEnd + IndirectSymbolSize;
  714. // The string table is written after symbol table.
  715. uint64_t StringTableOffset =
  716. SymbolTableOffset + NumSymTabSymbols * (is64Bit() ?
  717. sizeof(MachO::nlist_64) :
  718. sizeof(MachO::nlist));
  719. writeSymtabLoadCommand(SymbolTableOffset, NumSymTabSymbols,
  720. StringTableOffset, StringTable.data().size());
  721. writeDysymtabLoadCommand(FirstLocalSymbol, NumLocalSymbols,
  722. FirstExternalSymbol, NumExternalSymbols,
  723. FirstUndefinedSymbol, NumUndefinedSymbols,
  724. IndirectSymbolOffset, NumIndirectSymbols);
  725. }
  726. // Write the linker options load commands.
  727. for (const auto &Option : Asm.getLinkerOptions())
  728. writeLinkerOptionsLoadCommand(Option);
  729. // Write the actual section data.
  730. for (const MCSection &Sec : Asm) {
  731. Asm.writeSectionData(&Sec, Layout);
  732. uint64_t Pad = getPaddingSize(&Sec, Layout);
  733. WriteZeros(Pad);
  734. }
  735. // Write the extra padding.
  736. WriteZeros(SectionDataPadding);
  737. // Write the relocation entries.
  738. for (const MCSection &Sec : Asm) {
  739. // Write the section relocation entries, in reverse order to match 'as'
  740. // (approximately, the exact algorithm is more complicated than this).
  741. std::vector<RelAndSymbol> &Relocs = Relocations[&Sec];
  742. for (const RelAndSymbol &Rel : make_range(Relocs.rbegin(), Relocs.rend())) {
  743. write32(Rel.MRE.r_word0);
  744. write32(Rel.MRE.r_word1);
  745. }
  746. }
  747. // Write out the data-in-code region payload, if there is one.
  748. for (MCAssembler::const_data_region_iterator
  749. it = Asm.data_region_begin(), ie = Asm.data_region_end();
  750. it != ie; ++it) {
  751. const DataRegionData *Data = &(*it);
  752. uint64_t Start = getSymbolAddress(*Data->Start, Layout);
  753. uint64_t End = getSymbolAddress(*Data->End, Layout);
  754. DEBUG(dbgs() << "data in code region-- kind: " << Data->Kind
  755. << " start: " << Start << "(" << Data->Start->getName() << ")"
  756. << " end: " << End << "(" << Data->End->getName() << ")"
  757. << " size: " << End - Start
  758. << "\n");
  759. write32(Start);
  760. write16(End - Start);
  761. write16(Data->Kind);
  762. }
  763. // Write out the loh commands, if there is one.
  764. if (LOHSize) {
  765. #ifndef NDEBUG
  766. unsigned Start = OS.tell();
  767. #endif
  768. Asm.getLOHContainer().emit(*this, Layout);
  769. // Pad to a multiple of the pointer size.
  770. writeBytes("", OffsetToAlignment(LOHRawSize, is64Bit() ? 8 : 4));
  771. assert(OS.tell() - Start == LOHSize);
  772. }
  773. // Write the symbol table data, if used.
  774. if (NumSymbols) {
  775. // Write the indirect symbol entries.
  776. for (MCAssembler::const_indirect_symbol_iterator
  777. it = Asm.indirect_symbol_begin(),
  778. ie = Asm.indirect_symbol_end(); it != ie; ++it) {
  779. // Indirect symbols in the non-lazy symbol pointer section have some
  780. // special handling.
  781. const MCSectionMachO &Section =
  782. static_cast<const MCSectionMachO &>(*it->Section);
  783. if (Section.getType() == MachO::S_NON_LAZY_SYMBOL_POINTERS) {
  784. // If this symbol is defined and internal, mark it as such.
  785. if (it->Symbol->isDefined() && !it->Symbol->isExternal()) {
  786. uint32_t Flags = MachO::INDIRECT_SYMBOL_LOCAL;
  787. if (it->Symbol->isAbsolute())
  788. Flags |= MachO::INDIRECT_SYMBOL_ABS;
  789. write32(Flags);
  790. continue;
  791. }
  792. }
  793. write32(it->Symbol->getIndex());
  794. }
  795. // FIXME: Check that offsets match computed ones.
  796. // Write the symbol table entries.
  797. for (auto *SymbolData :
  798. {&LocalSymbolData, &ExternalSymbolData, &UndefinedSymbolData})
  799. for (MachSymbolData &Entry : *SymbolData)
  800. writeNlist(Entry, Layout);
  801. // Write the string table.
  802. OS << StringTable.data();
  803. }
  804. }
  805. MCObjectWriter *llvm::createMachObjectWriter(MCMachObjectTargetWriter *MOTW,
  806. raw_pwrite_stream &OS,
  807. bool IsLittleEndian) {
  808. return new MachObjectWriter(MOTW, OS, IsLittleEndian);
  809. }