peReader.ml 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493
  1. (*
  2. * This file is part of ilLib
  3. * Copyright (c)2004-2013 Haxe Foundation
  4. *
  5. * This program is free software; you can redistribute it and/or modify
  6. * it under the terms of the GNU General Public License as published by
  7. * the Free Software Foundation; either version 2 of the License, or
  8. * (at your option) any later version.
  9. *
  10. * This program is distributed in the hope that it will be useful,
  11. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  12. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  13. * GNU General Public License for more details.
  14. *
  15. * You should have received a copy of the GNU General Public License
  16. * along with this program; if not, write to the Free Software
  17. * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
  18. *)
  19. open PeData;;
  20. open IO;;
  21. open ExtString;;
  22. open ExtList;;
  23. exception Error_message of string
  24. type reader_ctx = {
  25. ch : Pervasives.in_channel;
  26. i : IO.input;
  27. verbose : bool;
  28. }
  29. type ctx = {
  30. r : reader_ctx;
  31. pe_header : pe_header;
  32. read_word : IO.input -> pointer;
  33. }
  34. let error msg = raise (Error_message msg)
  35. let seek r pos =
  36. seek_in r.ch pos
  37. let pos r =
  38. Pervasives.pos_in r.ch
  39. let info r msg =
  40. if r.verbose then
  41. print_endline (msg())
  42. let machine_type_of_int i = match i with
  43. | 0x0 -> TUnknown (* 0 - unmanaged PE files only *)
  44. | 0x014c -> Ti386 (* 0x014c - i386 *)
  45. | 0x0162 -> TR3000 (* 0x0162 - R3000 MIPS Little Endian *)
  46. | 0x0166 -> TR4000 (* 0x0166 - R4000 MIPS Little Endian *)
  47. | 0x0168 -> TR10000 (* 0x0168 - R10000 MIPS Little Endian *)
  48. | 0x0169 -> TWCeMipsV2 (* 0x0169 - MIPS Litlte Endian running MS Windows CE 2 *)
  49. | 0x0184 -> TAlpha (* 0x0184 - Alpha AXP *)
  50. | 0x01a2 -> TSh3 (* 0x01a2 - SH3 Little Endian *)
  51. | 0x01a3 -> TSh3Dsp (* 0x01a3 SH3DSP Little Endian *)
  52. | 0x01a4 -> TSh3e (* 0x01a4 SH3E Little Endian *)
  53. | 0x01a6 -> TSh4 (* 0x01a6 SH4 Little Endian *)
  54. | 0x01a8 -> TSh5
  55. | 0x01c0 -> TArm (* 0x1c0 ARM Little Endian *)
  56. | 0x01c2 -> TThumb (* 0x1c2 ARM processor with Thumb decompressor *)
  57. | 0x01c4 -> TArmN (* 0x1c0 ARM Little Endian *)
  58. | 0xaa64 -> TArm64
  59. | 0xebc -> TEbc
  60. | 0x01d3 -> TAm33 (* 0x1d3 AM33 processor *)
  61. | 0x01f0 -> TPowerPC (* 0x01f0 IBM PowerPC Little Endian *)
  62. | 0x01f1 -> TPowerPCFP (* 0x01f1 IBM PowerPC with FPU *)
  63. | 0x0200 -> TItanium64 (* 0x0200 Intel IA64 (Itanium( *)
  64. | 0x0266 -> TMips16 (* 0x0266 MIPS *)
  65. | 0x0284 -> TAlpha64 (* 0x0284 Alpha AXP64 *)
  66. | 0x0366 -> TMipsFpu (* 0x0366 MIPS with FPU *)
  67. | 0x0466 -> TMipsFpu16 (* 0x0466 MIPS16 with FPU *)
  68. | 0x0520 -> TTriCore (* 0x0520 Infineon *)
  69. | 0x8664 -> TAmd64 (* 0x8664 AMD x64 and Intel E64T *)
  70. | 0x9041 -> TM32R (* 0x9041 M32R *)
  71. | _ -> assert false
  72. let coff_props_of_int iprops = List.fold_left (fun acc i ->
  73. if (iprops land i) = i then (match i with
  74. | 0x1 -> RelocsStripped (* 0x1 *)
  75. | 0x2 -> ExecutableImage (* 0x2 *)
  76. | 0x4 -> LineNumsStripped (* 0x4 *)
  77. | 0x8 -> LocalSymsStripped (* 0x8 *)
  78. | 0x10 -> AgressiveWsTrim (* 0x10 *)
  79. | 0x20 -> LargeAddressAware (* 0x20 *)
  80. | 0x80 -> BytesReversedLO (* 0x80 *)
  81. | 0x100 -> Machine32Bit (* 0x100 *)
  82. | 0x200 -> DebugStripped (* 0x200 *)
  83. | 0x400 -> RemovableRunFromSwap (* 0x400 *)
  84. | 0x800 -> NetRunFromSwap (* 0x800 *)
  85. | 0x1000 -> FileSystem (* 0x1000 *)
  86. | 0x2000 -> FileDll (* 0x2000 *)
  87. | 0x4000 -> UpSystemOnly (* 0x4000 *)
  88. | 0x8000 -> BytesReversedHI (* 0x8000 *)
  89. | _ -> assert false) :: acc
  90. else
  91. acc) [] [0x1;0x2;0x4;0x8;0x10;0x20;0x80;0x100;0x200;0x400;0x800;0x1000;0x2000;0x4000;0x8000]
  92. let section_props_of_int32 props = List.fold_left (fun acc i ->
  93. if (Int32.logand props i) = i then (match i with
  94. | 0x8l -> SNoPad
  95. | 0x20l -> SHasCode
  96. | 0x40l -> SHasIData
  97. | 0x80l -> SHasData
  98. | 0x200l -> SHasLinkInfo
  99. | 0x1000l -> SLinkRemove
  100. | 0x8000l -> SGlobalRel
  101. | 0x20000l -> SHas16BitMem
  102. | 0x100000l -> SAlign1Bytes
  103. | 0x200000l -> SAlign2Bytes
  104. | 0x300000l -> SAlign4Bytes
  105. | 0x400000l -> SAlign8Bytes
  106. | 0x500000l -> SAlign16Bytes
  107. | 0x600000l -> SAlign32Bytes
  108. | 0x700000l -> SAlign64Bytes
  109. | 0x800000l -> SAlign128Bytes
  110. | 0x900000l -> SAlign256Bytes
  111. | 0xA00000l -> SAlign512Bytes
  112. | 0xB00000l -> SAlign1024Bytes
  113. | 0xC00000l -> SAlign2048Bytes
  114. | 0xD00000l -> SAlign4096Bytes
  115. | 0xE00000l -> SAlign8192Bytes
  116. | 0x1000000l -> SHasExtRelocs
  117. | 0x02000000l -> SCanDiscard
  118. | 0x04000000l -> SNotCached
  119. | 0x08000000l -> SNotPaged
  120. | 0x10000000l -> SShared
  121. | 0x20000000l -> SExec
  122. | 0x40000000l -> SRead
  123. | 0x80000000l -> SWrite
  124. | _ -> assert false) :: acc
  125. else
  126. acc) [] [ 0x8l; 0x20l; 0x40l; 0x80l; 0x200l; 0x1000l; 0x8000l; 0x20000l; 0x100000l; 0x200000l; 0x300000l; 0x400000l; 0x500000l; 0x600000l; 0x700000l; 0x800000l; 0x900000l; 0xA00000l; 0xB00000l; 0xC00000l; 0xD00000l; 0xE00000l; 0x1000000l; 0x02000000l; 0x04000000l; 0x08000000l; 0x10000000l; 0x20000000l; 0x40000000l; 0x80000000l; ]
  127. let subsystem_of_int i = match i with
  128. | 0 -> SUnknown (* 0 *)
  129. | 1 -> SNative (* 1 *)
  130. | 2 -> SWGui (* 2 *)
  131. | 3 -> SWCui (* 3 *)
  132. | 7 -> SPCui (* 7 *)
  133. | 9 -> SWCeGui (* 9 *)
  134. | 10 -> SEfi (* 10 *)
  135. | 11 -> SEfiBoot (* 11 *)
  136. | 12 -> SEfiRuntime (* 12 *)
  137. | 13 -> SEfiRom (* 13 *)
  138. | 14 -> SXbox (* 14 *)
  139. | _ -> error ("Unknown subsystem " ^ string_of_int i)
  140. let dll_props_of_int iprops = List.fold_left (fun acc i ->
  141. if (iprops land i) = i then (match i with
  142. | 0x0040 -> DDynamicBase (* 0x0040 *)
  143. | 0x0080 -> DForceIntegrity (* 0x0080 *)
  144. | 0x0100 -> DNxCompat (* 0x0100 *)
  145. | 0x0200 -> DNoIsolation (* 0x0200 *)
  146. | 0x0400 -> DNoSeh (* 0x0400 *)
  147. | 0x0800 -> DNoBind (* 0x0800 *)
  148. | 0x2000 -> DWdmDriver (* 0x2000 *)
  149. | 0x8000 -> DTerminalServer (* 0x8000 *)
  150. | _ -> assert false) :: acc
  151. else
  152. acc) [] [0x40;0x80;0x100;0x200;0x400;0x800;0x2000;0x8000]
  153. let pe_magic_of_int i = match i with
  154. | 0x10b -> P32
  155. | 0x107 -> PRom
  156. | 0x20b -> P64
  157. | _ -> error ("Unknown PE magic number: " ^ string_of_int i)
  158. let clr_flags_of_int iprops = List.fold_left (fun acc i ->
  159. if (iprops land i) = i then (match i with
  160. | 0x1 -> FIlOnly (* 0x1 *)
  161. | 0x2 -> F32BitRequired (* 0x2 *)
  162. | 0x4 -> FIlLibrary (* 0x4 *)
  163. | 0x8 -> FSigned (* 0x8 *)
  164. | 0x10 -> FNativeEntry (* 0x10 *)
  165. | 0x10000 -> FTrackDebug (* 0x10000 *)
  166. | _ -> assert false) :: acc
  167. else
  168. acc) [] [0x1;0x2;0x4;0x8;0x10;0x10000]
  169. let get_dir dir ctx =
  170. let idx,name = directory_type_info dir in
  171. try
  172. ctx.pe_header.pe_data_dirs.(idx)
  173. with
  174. | Invalid_argument _ ->
  175. error (Printf.sprintf "The directory '%s' of index '%i' is required but is missing on this file" name idx)
  176. let read_rva = read_real_i32
  177. let read_word is64 i =
  178. if is64 then read_i64 i else Int64.logand (Int64.of_int32 (read_real_i32 i)) 0xFFFFFFFFL
  179. let read_coff_header i =
  180. let machine = machine_type_of_int (read_ui16 i) in
  181. let nsections = read_ui16 i in
  182. let stamp = read_real_i32 i in
  183. let symbol_table_pointer = read_rva i in
  184. let nsymbols = read_i32 i in
  185. let optheader_size = read_ui16 i in
  186. let props = read_ui16 i in
  187. let props = coff_props_of_int (props) in
  188. {
  189. coff_machine = machine;
  190. coff_nsections = nsections;
  191. coff_timestamp = stamp;
  192. coff_symbol_table_pointer = symbol_table_pointer;
  193. coff_nsymbols = nsymbols;
  194. coff_optheader_size = optheader_size;
  195. coff_props = props;
  196. }
  197. let read_pe_header r header =
  198. let i = r.i in
  199. let sections_offset = (pos r) + header.coff_optheader_size in
  200. let magic = pe_magic_of_int (read_ui16 i) in
  201. let major = read_byte i in
  202. let minor = read_byte i in
  203. let code_size = read_i32 i in
  204. let init_size = read_i32 i in
  205. let uinit_size = read_i32 i in
  206. let entry_addr = read_rva i in
  207. let base_code = read_rva i in
  208. let base_data, read_word = match magic with
  209. | P32 | PRom ->
  210. read_rva i, read_word false
  211. | P64 ->
  212. Int32.zero, read_word true
  213. in
  214. (* COFF Windows extension *)
  215. let image_base = read_word i in
  216. let section_alignment = read_i32 i in
  217. let file_alignment = read_i32 i in
  218. let major_osver = read_ui16 i in
  219. let minor_osver = read_ui16 i in
  220. let major_imgver = read_ui16 i in
  221. let minor_imgver = read_ui16 i in
  222. let major_subsysver = read_ui16 i in
  223. let minor_subsysver = read_ui16 i in
  224. ignore (read_i32 i); (* reserved *)
  225. let image_size = read_i32 i in
  226. let headers_size = read_i32 i in
  227. let checksum = read_real_i32 i in
  228. let subsystem = subsystem_of_int (read_ui16 i) in
  229. let dll_props = dll_props_of_int (read_ui16 i) in
  230. let stack_reserve = read_word i in
  231. let stack_commit = read_word i in
  232. let heap_reserve = read_word i in
  233. let heap_commit = read_word i in
  234. ignore (read_i32 i); (* reserved *)
  235. let ndata_dir = read_i32 i in
  236. let data_dirs = Array.init ndata_dir (fun n ->
  237. let addr = read_rva i in
  238. let size = read_rva i in
  239. addr,size)
  240. in
  241. (* sections *)
  242. let nsections = header.coff_nsections in
  243. seek r sections_offset;
  244. let sections = Array.init nsections (fun n ->
  245. let name = nread_string i 8 in
  246. let name = try
  247. let index = String.index name '\x00' in
  248. String.sub name 0 index
  249. with | Not_found ->
  250. name
  251. in
  252. (*TODO check for slash names *)
  253. let vsize = read_rva i in
  254. let vaddr = read_rva i in
  255. let raw_size = read_rva i in
  256. let raw_pointer = read_i32 i in
  257. let reloc_pointer = read_i32 i in
  258. let line_num_pointer = read_i32 i in
  259. let nrelocs = read_ui16 i in
  260. let nline_nums = read_ui16 i in
  261. let props = section_props_of_int32 (read_rva i) in
  262. {
  263. s_name = name;
  264. s_vsize =vsize;
  265. s_vaddr =vaddr;
  266. s_raw_size =raw_size;
  267. s_raw_pointer =raw_pointer;
  268. s_reloc_pointer =reloc_pointer;
  269. s_line_num_pointer =line_num_pointer;
  270. s_nrelocs =nrelocs;
  271. s_nline_nums =nline_nums;
  272. s_props =props;
  273. }
  274. ) in
  275. {
  276. pe_coff_header = header;
  277. pe_magic = magic;
  278. pe_major = major;
  279. pe_minor = minor;
  280. pe_code_size = code_size;
  281. pe_init_size = init_size;
  282. pe_uinit_size = uinit_size;
  283. pe_entry_addr = entry_addr;
  284. pe_base_code = base_code;
  285. pe_base_data = base_data;
  286. pe_image_base = image_base;
  287. pe_section_alignment = section_alignment;
  288. pe_file_alignment = file_alignment;
  289. pe_major_osver = major_osver;
  290. pe_minor_osver = minor_osver;
  291. pe_major_imgver = major_imgver;
  292. pe_minor_imgver = minor_imgver;
  293. pe_major_subsysver = major_subsysver;
  294. pe_minor_subsysver = minor_subsysver;
  295. pe_image_size = image_size;
  296. pe_headers_size = headers_size;
  297. pe_checksum = checksum;
  298. pe_subsystem = subsystem;
  299. pe_dll_props = dll_props;
  300. pe_stack_reserve = stack_reserve;
  301. pe_stack_commit = stack_commit;
  302. pe_heap_reserve = heap_reserve;
  303. pe_heap_commit = heap_commit;
  304. pe_ndata_dir = ndata_dir;
  305. pe_data_dirs = data_dirs;
  306. pe_sections = sections;
  307. }
  308. let create_r ch props =
  309. let verbose = PMap.mem "IL_VERBOSE" props in
  310. let i = IO.input_channel ch in
  311. {
  312. ch = ch;
  313. i = i;
  314. verbose = verbose;
  315. }
  316. (* converts an RVA into a file offset. *)
  317. let convert_rva ctx rva =
  318. let sections = ctx.pe_header.pe_sections in
  319. let nsections = Array.length sections in
  320. let sec =
  321. (* linear search. TODO maybe binary search for many sections? *)
  322. let rec loop n =
  323. if n >= nsections then error (Printf.sprintf "The RVA %lx is outside sections bounds!" rva);
  324. let sec = sections.(n) in
  325. if rva >= sec.s_vaddr && (rva < (Int32.add sec.s_vaddr sec.s_raw_size)) then
  326. sec
  327. else
  328. loop (n+1)
  329. in
  330. loop 0
  331. in
  332. let diff = Int32.to_int (Int32.sub rva sec.s_vaddr) in
  333. sec.s_raw_pointer + diff
  334. let seek_rva ctx rva = seek ctx.r (convert_rva ctx rva)
  335. let read_cstring i =
  336. let ret = Buffer.create 8 in
  337. let rec loop () =
  338. let chr = read i in
  339. if chr = '\x00' then
  340. Buffer.contents ret
  341. else begin
  342. Buffer.add_char ret chr;
  343. loop()
  344. end
  345. in
  346. loop()
  347. (* reads import data *)
  348. let read_idata ctx = match get_dir ImportTable ctx with
  349. | 0l,_ | _,0l ->
  350. []
  351. | rva,size ->
  352. seek_rva ctx rva;
  353. let i = ctx.r.i in
  354. let rec loop acc =
  355. let lookup_table = read_rva i in
  356. if lookup_table = Int32.zero then
  357. acc
  358. else begin
  359. let timestamp = read_real_i32 i in
  360. let fchain = read_real_i32 i in
  361. let name_rva = read_rva i in
  362. let addr_table = read_rva i in
  363. ignore addr_table; ignore fchain; ignore timestamp;
  364. loop ((lookup_table,name_rva) :: acc)
  365. end
  366. in
  367. let tables = loop [] in
  368. List.rev_map (function (lookup_table,name_rva) ->
  369. seek_rva ctx lookup_table;
  370. let is_64 = ctx.pe_header.pe_magic = P64 in
  371. let imports_data = if not is_64 then
  372. let rec loop acc =
  373. let flags = read_real_i32 i in
  374. if flags = Int32.zero then
  375. acc
  376. else begin
  377. let is_ordinal = Int32.logand flags 0x80000000l = 0x80000000l in
  378. loop ( (is_ordinal, if is_ordinal then Int32.logand flags 0xFFFFl else Int32.logand flags 0x7FFFFFFFl) :: acc )
  379. end
  380. in
  381. loop []
  382. else
  383. let rec loop acc =
  384. let flags = read_i64 i in
  385. if flags = Int64.zero then
  386. acc
  387. else begin
  388. let is_ordinal = Int64.logand flags 0x8000000000000000L = 0x8000000000000000L in
  389. loop ( (is_ordinal, Int64.to_int32 (if is_ordinal then Int64.logand flags 0xFFFFL else Int64.logand flags 0x7FFFFFFFL)) :: acc )
  390. end
  391. in
  392. loop []
  393. in
  394. let imports = List.rev_map (function
  395. | true, ord ->
  396. SOrdinal (Int32.to_int ord)
  397. | false, rva ->
  398. seek_rva ctx rva;
  399. let hint = read_ui16 i in
  400. SName (hint, read_cstring i)
  401. ) imports_data in
  402. seek_rva ctx name_rva;
  403. let name = read_cstring i in
  404. {
  405. imp_name = name;
  406. imp_imports = imports;
  407. }
  408. ) tables
  409. let has_clr_header ctx = match get_dir ClrRuntimeHeader ctx with
  410. | 0l,_ | _,0l ->
  411. false
  412. | _ ->
  413. true
  414. let read_clr_header ctx = match get_dir ClrRuntimeHeader ctx with
  415. | 0l,_ | _,0l ->
  416. error "This PE file does not have managed content"
  417. | rva,size ->
  418. seek_rva ctx rva;
  419. let i = ctx.r.i in
  420. let cb = read_i32 i in
  421. let major = read_ui16 i in
  422. let minor = read_ui16 i in
  423. let read_tbl i =
  424. let rva = read_rva i in
  425. let size = read_real_i32 i in
  426. rva,size
  427. in
  428. let meta = read_tbl i in
  429. let corflags = clr_flags_of_int (read_i32 i) in
  430. let entry_point = read_rva i in
  431. let res = read_tbl i in
  432. let clrsig = read_tbl i in
  433. let codeman = read_tbl i in
  434. let vtable_fix = read_tbl i in
  435. let export_addr = read_tbl i in
  436. {
  437. clr_cb = cb;
  438. clr_major = major;
  439. clr_minor = minor;
  440. clr_meta = meta;
  441. clr_flags = corflags;
  442. clr_entry_point = entry_point;
  443. clr_res = res;
  444. clr_sig = clrsig;
  445. clr_codeman = codeman;
  446. clr_vtable_fix = vtable_fix;
  447. clr_export_address = export_addr;
  448. }
  449. let read r =
  450. let i = r.i in
  451. if read i <> 'M' || read i <> 'Z' then
  452. error "MZ magic header not found: Is the target file really a PE?";
  453. seek r 0x3c;
  454. let pe_sig_offset = read_i32 i in
  455. seek r pe_sig_offset;
  456. if really_nread_string i 4 <> "PE\x00\x00" then
  457. error "Invalid PE header signature: PE expected";
  458. let header = read_coff_header i in
  459. let pe_header = read_pe_header r header in
  460. {
  461. r = r;
  462. pe_header = pe_header;
  463. read_word = read_word (pe_header.pe_magic = P64);
  464. }