peReader.ml 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495
  1. (*
  2. * This file is part of ilLib
  3. * Copyright (c)2004-2013 Haxe Foundation
  4. *
  5. * This program is free software; you can redistribute it and/or modify
  6. * it under the terms of the GNU General Public License as published by
  7. * the Free Software Foundation; either version 2 of the License, or
  8. * (at your option) any later version.
  9. *
  10. * This program is distributed in the hope that it will be useful,
  11. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  12. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  13. * GNU General Public License for more details.
  14. *
  15. * You should have received a copy of the GNU General Public License
  16. * along with this program; if not, write to the Free Software
  17. * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
  18. *)
  19. open PeData;;
  20. open IO;;
  21. open ExtString;;
  22. open ExtList;;
  23. exception Error_message of string
  24. type reader_ctx = {
  25. ch : Stdlib.in_channel;
  26. i : IO.input;
  27. verbose : bool;
  28. }
  29. type ctx = {
  30. r : reader_ctx;
  31. pe_header : pe_header;
  32. read_word : IO.input -> pointer;
  33. }
  34. let error msg = raise (Error_message msg)
  35. let seek r pos =
  36. seek_in r.ch pos
  37. let pos r =
  38. Stdlib.pos_in r.ch
  39. let info r msg =
  40. if r.verbose then
  41. print_endline (msg())
  42. let machine_type_of_int i = match i with
  43. | 0x0 -> TUnknown (* 0 - unmanaged PE files only *)
  44. | 0x014c -> Ti386 (* 0x014c - i386 *)
  45. | 0x0162 -> TR3000 (* 0x0162 - R3000 MIPS Little Endian *)
  46. | 0x0166 -> TR4000 (* 0x0166 - R4000 MIPS Little Endian *)
  47. | 0x0168 -> TR10000 (* 0x0168 - R10000 MIPS Little Endian *)
  48. | 0x0169 -> TWCeMipsV2 (* 0x0169 - MIPS Litlte Endian running MS Windows CE 2 *)
  49. | 0x0184 -> TAlpha (* 0x0184 - Alpha AXP *)
  50. | 0x01a2 -> TSh3 (* 0x01a2 - SH3 Little Endian *)
  51. | 0x01a3 -> TSh3Dsp (* 0x01a3 SH3DSP Little Endian *)
  52. | 0x01a4 -> TSh3e (* 0x01a4 SH3E Little Endian *)
  53. | 0x01a6 -> TSh4 (* 0x01a6 SH4 Little Endian *)
  54. | 0x01a8 -> TSh5
  55. | 0x01c0 -> TArm (* 0x1c0 ARM Little Endian *)
  56. | 0x01c2 -> TThumb (* 0x1c2 ARM processor with Thumb decompressor *)
  57. | 0x01c4 -> TArmN (* 0x1c0 ARM Little Endian *)
  58. | 0xaa64 -> TArm64
  59. | 0xebc -> TEbc
  60. | 0x01d3 -> TAm33 (* 0x1d3 AM33 processor *)
  61. | 0x01f0 -> TPowerPC (* 0x01f0 IBM PowerPC Little Endian *)
  62. | 0x01f1 -> TPowerPCFP (* 0x01f1 IBM PowerPC with FPU *)
  63. | 0x0200 -> TItanium64 (* 0x0200 Intel IA64 (Itanium( *)
  64. | 0x0266 -> TMips16 (* 0x0266 MIPS *)
  65. | 0x0284 -> TAlpha64 (* 0x0284 Alpha AXP64 *)
  66. | 0x0366 -> TMipsFpu (* 0x0366 MIPS with FPU *)
  67. | 0x0466 -> TMipsFpu16 (* 0x0466 MIPS16 with FPU *)
  68. | 0x0520 -> TTriCore (* 0x0520 Infineon *)
  69. | 0x8664 -> TAmd64 (* 0x8664 AMD x64 and Intel E64T *)
  70. | 0x9041 -> TM32R (* 0x9041 M32R *)
  71. | 0xC020 -> TOSXAmd64 (* 0xC020 OSX AMD x64 *)
  72. | 0xFD1D -> TLinuxAmd64 (* 0xFD1D Linux AMD x64 *)
  73. | _ -> assert false
  74. let coff_props_of_int iprops = List.fold_left (fun acc i ->
  75. if (iprops land i) = i then (match i with
  76. | 0x1 -> RelocsStripped (* 0x1 *)
  77. | 0x2 -> ExecutableImage (* 0x2 *)
  78. | 0x4 -> LineNumsStripped (* 0x4 *)
  79. | 0x8 -> LocalSymsStripped (* 0x8 *)
  80. | 0x10 -> AgressiveWsTrim (* 0x10 *)
  81. | 0x20 -> LargeAddressAware (* 0x20 *)
  82. | 0x80 -> BytesReversedLO (* 0x80 *)
  83. | 0x100 -> Machine32Bit (* 0x100 *)
  84. | 0x200 -> DebugStripped (* 0x200 *)
  85. | 0x400 -> RemovableRunFromSwap (* 0x400 *)
  86. | 0x800 -> NetRunFromSwap (* 0x800 *)
  87. | 0x1000 -> FileSystem (* 0x1000 *)
  88. | 0x2000 -> FileDll (* 0x2000 *)
  89. | 0x4000 -> UpSystemOnly (* 0x4000 *)
  90. | 0x8000 -> BytesReversedHI (* 0x8000 *)
  91. | _ -> assert false) :: acc
  92. else
  93. acc) [] [0x1;0x2;0x4;0x8;0x10;0x20;0x80;0x100;0x200;0x400;0x800;0x1000;0x2000;0x4000;0x8000]
  94. let section_props_of_int32 props = List.fold_left (fun acc i ->
  95. if (Int32.logand props i) = i then (match i with
  96. | 0x8l -> SNoPad
  97. | 0x20l -> SHasCode
  98. | 0x40l -> SHasIData
  99. | 0x80l -> SHasData
  100. | 0x200l -> SHasLinkInfo
  101. | 0x1000l -> SLinkRemove
  102. | 0x8000l -> SGlobalRel
  103. | 0x20000l -> SHas16BitMem
  104. | 0x100000l -> SAlign1Bytes
  105. | 0x200000l -> SAlign2Bytes
  106. | 0x300000l -> SAlign4Bytes
  107. | 0x400000l -> SAlign8Bytes
  108. | 0x500000l -> SAlign16Bytes
  109. | 0x600000l -> SAlign32Bytes
  110. | 0x700000l -> SAlign64Bytes
  111. | 0x800000l -> SAlign128Bytes
  112. | 0x900000l -> SAlign256Bytes
  113. | 0xA00000l -> SAlign512Bytes
  114. | 0xB00000l -> SAlign1024Bytes
  115. | 0xC00000l -> SAlign2048Bytes
  116. | 0xD00000l -> SAlign4096Bytes
  117. | 0xE00000l -> SAlign8192Bytes
  118. | 0x1000000l -> SHasExtRelocs
  119. | 0x02000000l -> SCanDiscard
  120. | 0x04000000l -> SNotCached
  121. | 0x08000000l -> SNotPaged
  122. | 0x10000000l -> SShared
  123. | 0x20000000l -> SExec
  124. | 0x40000000l -> SRead
  125. | 0x80000000l -> SWrite
  126. | _ -> assert false) :: acc
  127. else
  128. acc) [] [ 0x8l; 0x20l; 0x40l; 0x80l; 0x200l; 0x1000l; 0x8000l; 0x20000l; 0x100000l; 0x200000l; 0x300000l; 0x400000l; 0x500000l; 0x600000l; 0x700000l; 0x800000l; 0x900000l; 0xA00000l; 0xB00000l; 0xC00000l; 0xD00000l; 0xE00000l; 0x1000000l; 0x02000000l; 0x04000000l; 0x08000000l; 0x10000000l; 0x20000000l; 0x40000000l; 0x80000000l; ]
  129. let subsystem_of_int i = match i with
  130. | 0 -> SUnknown (* 0 *)
  131. | 1 -> SNative (* 1 *)
  132. | 2 -> SWGui (* 2 *)
  133. | 3 -> SWCui (* 3 *)
  134. | 7 -> SPCui (* 7 *)
  135. | 9 -> SWCeGui (* 9 *)
  136. | 10 -> SEfi (* 10 *)
  137. | 11 -> SEfiBoot (* 11 *)
  138. | 12 -> SEfiRuntime (* 12 *)
  139. | 13 -> SEfiRom (* 13 *)
  140. | 14 -> SXbox (* 14 *)
  141. | _ -> error ("Unknown subsystem " ^ string_of_int i)
  142. let dll_props_of_int iprops = List.fold_left (fun acc i ->
  143. if (iprops land i) = i then (match i with
  144. | 0x0040 -> DDynamicBase (* 0x0040 *)
  145. | 0x0080 -> DForceIntegrity (* 0x0080 *)
  146. | 0x0100 -> DNxCompat (* 0x0100 *)
  147. | 0x0200 -> DNoIsolation (* 0x0200 *)
  148. | 0x0400 -> DNoSeh (* 0x0400 *)
  149. | 0x0800 -> DNoBind (* 0x0800 *)
  150. | 0x2000 -> DWdmDriver (* 0x2000 *)
  151. | 0x8000 -> DTerminalServer (* 0x8000 *)
  152. | _ -> assert false) :: acc
  153. else
  154. acc) [] [0x40;0x80;0x100;0x200;0x400;0x800;0x2000;0x8000]
  155. let pe_magic_of_int i = match i with
  156. | 0x10b -> P32
  157. | 0x107 -> PRom
  158. | 0x20b -> P64
  159. | _ -> error ("Unknown PE magic number: " ^ string_of_int i)
  160. let clr_flags_of_int iprops = List.fold_left (fun acc i ->
  161. if (iprops land i) = i then (match i with
  162. | 0x1 -> FIlOnly (* 0x1 *)
  163. | 0x2 -> F32BitRequired (* 0x2 *)
  164. | 0x4 -> FIlLibrary (* 0x4 *)
  165. | 0x8 -> FSigned (* 0x8 *)
  166. | 0x10 -> FNativeEntry (* 0x10 *)
  167. | 0x10000 -> FTrackDebug (* 0x10000 *)
  168. | _ -> assert false) :: acc
  169. else
  170. acc) [] [0x1;0x2;0x4;0x8;0x10;0x10000]
  171. let get_dir dir ctx =
  172. let idx,name = directory_type_info dir in
  173. try
  174. ctx.pe_header.pe_data_dirs.(idx)
  175. with
  176. | Invalid_argument _ ->
  177. error (Printf.sprintf "The directory '%s' of index '%i' is required but is missing on this file" name idx)
  178. let read_rva = read_real_i32
  179. let read_word is64 i =
  180. if is64 then read_i64 i else Int64.logand (Int64.of_int32 (read_real_i32 i)) 0xFFFFFFFFL
  181. let read_coff_header i =
  182. let machine = machine_type_of_int (read_ui16 i) in
  183. let nsections = read_ui16 i in
  184. let stamp = read_real_i32 i in
  185. let symbol_table_pointer = read_rva i in
  186. let nsymbols = read_i32 i in
  187. let optheader_size = read_ui16 i in
  188. let props = read_ui16 i in
  189. let props = coff_props_of_int (props) in
  190. {
  191. coff_machine = machine;
  192. coff_nsections = nsections;
  193. coff_timestamp = stamp;
  194. coff_symbol_table_pointer = symbol_table_pointer;
  195. coff_nsymbols = nsymbols;
  196. coff_optheader_size = optheader_size;
  197. coff_props = props;
  198. }
  199. let read_pe_header r header =
  200. let i = r.i in
  201. let sections_offset = (pos r) + header.coff_optheader_size in
  202. let magic = pe_magic_of_int (read_ui16 i) in
  203. let major = read_byte i in
  204. let minor = read_byte i in
  205. let code_size = read_i32 i in
  206. let init_size = read_i32 i in
  207. let uinit_size = read_i32 i in
  208. let entry_addr = read_rva i in
  209. let base_code = read_rva i in
  210. let base_data, read_word = match magic with
  211. | P32 | PRom ->
  212. read_rva i, read_word false
  213. | P64 ->
  214. Int32.zero, read_word true
  215. in
  216. (* COFF Windows extension *)
  217. let image_base = read_word i in
  218. let section_alignment = read_i32 i in
  219. let file_alignment = read_i32 i in
  220. let major_osver = read_ui16 i in
  221. let minor_osver = read_ui16 i in
  222. let major_imgver = read_ui16 i in
  223. let minor_imgver = read_ui16 i in
  224. let major_subsysver = read_ui16 i in
  225. let minor_subsysver = read_ui16 i in
  226. ignore (read_i32 i); (* reserved *)
  227. let image_size = read_i32 i in
  228. let headers_size = read_i32 i in
  229. let checksum = read_real_i32 i in
  230. let subsystem = subsystem_of_int (read_ui16 i) in
  231. let dll_props = dll_props_of_int (read_ui16 i) in
  232. let stack_reserve = read_word i in
  233. let stack_commit = read_word i in
  234. let heap_reserve = read_word i in
  235. let heap_commit = read_word i in
  236. ignore (read_i32 i); (* reserved *)
  237. let ndata_dir = read_i32 i in
  238. let data_dirs = Array.init ndata_dir (fun n ->
  239. let addr = read_rva i in
  240. let size = read_rva i in
  241. addr,size)
  242. in
  243. (* sections *)
  244. let nsections = header.coff_nsections in
  245. seek r sections_offset;
  246. let sections = Array.init nsections (fun n ->
  247. let name = nread_string i 8 in
  248. let name = try
  249. let index = String.index name '\x00' in
  250. String.sub name 0 index
  251. with | Not_found ->
  252. name
  253. in
  254. (*TODO check for slash names *)
  255. let vsize = read_rva i in
  256. let vaddr = read_rva i in
  257. let raw_size = read_rva i in
  258. let raw_pointer = read_i32 i in
  259. let reloc_pointer = read_i32 i in
  260. let line_num_pointer = read_i32 i in
  261. let nrelocs = read_ui16 i in
  262. let nline_nums = read_ui16 i in
  263. let props = section_props_of_int32 (read_rva i) in
  264. {
  265. s_name = name;
  266. s_vsize =vsize;
  267. s_vaddr =vaddr;
  268. s_raw_size =raw_size;
  269. s_raw_pointer =raw_pointer;
  270. s_reloc_pointer =reloc_pointer;
  271. s_line_num_pointer =line_num_pointer;
  272. s_nrelocs =nrelocs;
  273. s_nline_nums =nline_nums;
  274. s_props =props;
  275. }
  276. ) in
  277. {
  278. pe_coff_header = header;
  279. pe_magic = magic;
  280. pe_major = major;
  281. pe_minor = minor;
  282. pe_code_size = code_size;
  283. pe_init_size = init_size;
  284. pe_uinit_size = uinit_size;
  285. pe_entry_addr = entry_addr;
  286. pe_base_code = base_code;
  287. pe_base_data = base_data;
  288. pe_image_base = image_base;
  289. pe_section_alignment = section_alignment;
  290. pe_file_alignment = file_alignment;
  291. pe_major_osver = major_osver;
  292. pe_minor_osver = minor_osver;
  293. pe_major_imgver = major_imgver;
  294. pe_minor_imgver = minor_imgver;
  295. pe_major_subsysver = major_subsysver;
  296. pe_minor_subsysver = minor_subsysver;
  297. pe_image_size = image_size;
  298. pe_headers_size = headers_size;
  299. pe_checksum = checksum;
  300. pe_subsystem = subsystem;
  301. pe_dll_props = dll_props;
  302. pe_stack_reserve = stack_reserve;
  303. pe_stack_commit = stack_commit;
  304. pe_heap_reserve = heap_reserve;
  305. pe_heap_commit = heap_commit;
  306. pe_ndata_dir = ndata_dir;
  307. pe_data_dirs = data_dirs;
  308. pe_sections = sections;
  309. }
  310. let create_r ch props =
  311. let verbose = PMap.mem "IL_VERBOSE" props in
  312. let i = IO.input_channel ch in
  313. {
  314. ch = ch;
  315. i = i;
  316. verbose = verbose;
  317. }
  318. (* converts an RVA into a file offset. *)
  319. let convert_rva ctx rva =
  320. let sections = ctx.pe_header.pe_sections in
  321. let nsections = Array.length sections in
  322. let sec =
  323. (* linear search. TODO maybe binary search for many sections? *)
  324. let rec loop n =
  325. if n >= nsections then error (Printf.sprintf "The RVA %lx is outside sections bounds!" rva);
  326. let sec = sections.(n) in
  327. if rva >= sec.s_vaddr && (rva < (Int32.add sec.s_vaddr sec.s_raw_size)) then
  328. sec
  329. else
  330. loop (n+1)
  331. in
  332. loop 0
  333. in
  334. let diff = Int32.to_int (Int32.sub rva sec.s_vaddr) in
  335. sec.s_raw_pointer + diff
  336. let seek_rva ctx rva = seek ctx.r (convert_rva ctx rva)
  337. let read_cstring i =
  338. let ret = Buffer.create 8 in
  339. let rec loop () =
  340. let chr = read i in
  341. if chr = '\x00' then
  342. Buffer.contents ret
  343. else begin
  344. Buffer.add_char ret chr;
  345. loop()
  346. end
  347. in
  348. loop()
  349. (* reads import data *)
  350. let read_idata ctx = match get_dir ImportTable ctx with
  351. | 0l,_ | _,0l ->
  352. []
  353. | rva,size ->
  354. seek_rva ctx rva;
  355. let i = ctx.r.i in
  356. let rec loop acc =
  357. let lookup_table = read_rva i in
  358. if lookup_table = Int32.zero then
  359. acc
  360. else begin
  361. let timestamp = read_real_i32 i in
  362. let fchain = read_real_i32 i in
  363. let name_rva = read_rva i in
  364. let addr_table = read_rva i in
  365. ignore addr_table; ignore fchain; ignore timestamp;
  366. loop ((lookup_table,name_rva) :: acc)
  367. end
  368. in
  369. let tables = loop [] in
  370. List.rev_map (function (lookup_table,name_rva) ->
  371. seek_rva ctx lookup_table;
  372. let is_64 = ctx.pe_header.pe_magic = P64 in
  373. let imports_data = if not is_64 then
  374. let rec loop acc =
  375. let flags = read_real_i32 i in
  376. if flags = Int32.zero then
  377. acc
  378. else begin
  379. let is_ordinal = Int32.logand flags 0x80000000l = 0x80000000l in
  380. loop ( (is_ordinal, if is_ordinal then Int32.logand flags 0xFFFFl else Int32.logand flags 0x7FFFFFFFl) :: acc )
  381. end
  382. in
  383. loop []
  384. else
  385. let rec loop acc =
  386. let flags = read_i64 i in
  387. if flags = Int64.zero then
  388. acc
  389. else begin
  390. let is_ordinal = Int64.logand flags 0x8000000000000000L = 0x8000000000000000L in
  391. loop ( (is_ordinal, Int64.to_int32 (if is_ordinal then Int64.logand flags 0xFFFFL else Int64.logand flags 0x7FFFFFFFL)) :: acc )
  392. end
  393. in
  394. loop []
  395. in
  396. let imports = List.rev_map (function
  397. | true, ord ->
  398. SOrdinal (Int32.to_int ord)
  399. | false, rva ->
  400. seek_rva ctx rva;
  401. let hint = read_ui16 i in
  402. SName (hint, read_cstring i)
  403. ) imports_data in
  404. seek_rva ctx name_rva;
  405. let name = read_cstring i in
  406. {
  407. imp_name = name;
  408. imp_imports = imports;
  409. }
  410. ) tables
  411. let has_clr_header ctx = match get_dir ClrRuntimeHeader ctx with
  412. | 0l,_ | _,0l ->
  413. false
  414. | _ ->
  415. true
  416. let read_clr_header ctx = match get_dir ClrRuntimeHeader ctx with
  417. | 0l,_ | _,0l ->
  418. error "This PE file does not have managed content"
  419. | rva,size ->
  420. seek_rva ctx rva;
  421. let i = ctx.r.i in
  422. let cb = read_i32 i in
  423. let major = read_ui16 i in
  424. let minor = read_ui16 i in
  425. let read_tbl i =
  426. let rva = read_rva i in
  427. let size = read_real_i32 i in
  428. rva,size
  429. in
  430. let meta = read_tbl i in
  431. let corflags = clr_flags_of_int (read_i32 i) in
  432. let entry_point = read_rva i in
  433. let res = read_tbl i in
  434. let clrsig = read_tbl i in
  435. let codeman = read_tbl i in
  436. let vtable_fix = read_tbl i in
  437. let export_addr = read_tbl i in
  438. {
  439. clr_cb = cb;
  440. clr_major = major;
  441. clr_minor = minor;
  442. clr_meta = meta;
  443. clr_flags = corflags;
  444. clr_entry_point = entry_point;
  445. clr_res = res;
  446. clr_sig = clrsig;
  447. clr_codeman = codeman;
  448. clr_vtable_fix = vtable_fix;
  449. clr_export_address = export_addr;
  450. }
  451. let read r =
  452. let i = r.i in
  453. if read i <> 'M' || read i <> 'Z' then
  454. error "MZ magic header not found: Is the target file really a PE?";
  455. seek r 0x3c;
  456. let pe_sig_offset = read_i32 i in
  457. seek r pe_sig_offset;
  458. if really_nread_string i 4 <> "PE\x00\x00" then
  459. error "Invalid PE header signature: PE expected";
  460. let header = read_coff_header i in
  461. let pe_header = read_pe_header r header in
  462. {
  463. r = r;
  464. pe_header = pe_header;
  465. read_word = read_word (pe_header.pe_magic = P64);
  466. }