i386.inc 40 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581
  1. {
  2. This file is part of the Free Pascal run time library.
  3. Copyright (c) 1999-2000 by the Free Pascal development team.
  4. Processor dependent implementation for the system unit for
  5. intel i386+
  6. See the file COPYING.FPC, included in this distribution,
  7. for details about the copyright.
  8. This program is distributed in the hope that it will be useful,
  9. but WITHOUT ANY WARRANTY; without even the implied warranty of
  10. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
  11. **********************************************************************}
  12. {$if not(defined(VER3_0)) and defined(linux)}
  13. {$define FPC_SYSTEM_STACKALIGNMENT16}
  14. {$endif not(defined(VER3_0)) and defined(linux)}
  15. {****************************************************************************
  16. Primitives
  17. ****************************************************************************}
  18. var
  19. os_supports_sse : boolean;
  20. { this variable is set to true, if currently an sse check is executed and no sig ill should be generated }
  21. sse_check : boolean;
  22. {$asmmode ATT}
  23. function cpuid_support : boolean;assembler;nostackframe;
  24. {
  25. Check if the ID-flag can be changed, if changed then CpuID is supported.
  26. Tested under go32v1 and Linux on c6x86 with CpuID enabled and disabled (PFV)
  27. }
  28. asm
  29. pushfl
  30. movl (%esp),%eax
  31. xorl $0x200000,%eax
  32. pushl %eax
  33. popfl
  34. pushfl
  35. popl %eax
  36. xorl (%esp),%eax
  37. popfl
  38. testl $0x200000,%eax
  39. setnz %al
  40. end;
  41. {$ifndef FPC_PIC}
  42. {$ifndef FPC_SYSTEM_HAS_MOVE}
  43. {$ifndef OLD_ASSEMBLER}
  44. {$define USE_FASTMOVE}
  45. {$i fastmove.inc}
  46. {$endif not OLD_ASSEMBLER}
  47. {$endif FPC_SYSTEM_HAS_MOVE}
  48. {$endif FPC_PIC}
  49. procedure fpc_cpuinit;
  50. begin
  51. { because of the brain dead sse detection on x86, this test is post poned to fpc_cpucodeinit which
  52. must be implemented OS dependend (FK)
  53. has_sse_support:=sse_support;
  54. has_mmx_support:=mmx_support;
  55. setup_fastmove;
  56. }
  57. end;
  58. {$ifndef darwin}
  59. function fpc_geteipasebx : pointer; [public, alias: 'fpc_geteipasebx'];assembler; nostackframe;
  60. asm
  61. movl (%esp),%ebx
  62. end;
  63. function fpc_geteipasecx : pointer; [public, alias: 'fpc_geteipasecx'];assembler; nostackframe;
  64. asm
  65. movl (%esp),%ecx
  66. end;
  67. {$endif}
  68. {$ifndef FPC_SYSTEM_HAS_MOVE}
  69. {$define FPC_SYSTEM_HAS_MOVE}
  70. procedure Move(const source;var dest;count:SizeInt);[public, alias: 'FPC_MOVE'];assembler;
  71. var
  72. saveesi,saveedi : longint;
  73. asm
  74. movl %edi,saveedi
  75. movl %esi,saveesi
  76. movl %eax,%esi
  77. movl %edx,%edi
  78. movl %ecx,%edx
  79. movl %edi,%eax
  80. { check for zero or negative count }
  81. cmpl $0,%edx
  82. jle .LMoveEnd
  83. { Check for back or forward }
  84. sub %esi,%eax
  85. jz .LMoveEnd { Do nothing when source=dest }
  86. jc .LFMove { Do forward, dest<source }
  87. cmp %edx,%eax
  88. jb .LBMove { Dest is in range of move, do backward }
  89. { Forward Copy }
  90. .LFMove:
  91. {$ifdef FPC_ENABLED_CLD}
  92. cld
  93. {$endif FPC_ENABLED_CLD}
  94. cmpl $15,%edx
  95. jl .LFMove1
  96. movl %edi,%ecx { Align on 32bits }
  97. negl %ecx
  98. andl $3,%ecx
  99. subl %ecx,%edx
  100. rep
  101. movsb
  102. movl %edx,%ecx
  103. andl $3,%edx
  104. shrl $2,%ecx
  105. rep
  106. movsl
  107. .LFMove1:
  108. movl %edx,%ecx
  109. rep
  110. movsb
  111. jmp .LMoveEnd
  112. { Backward Copy }
  113. .LBMove:
  114. std
  115. addl %edx,%esi
  116. addl %edx,%edi
  117. movl %edi,%ecx
  118. decl %esi
  119. decl %edi
  120. cmpl $15,%edx
  121. jl .LBMove1
  122. negl %ecx { Align on 32bits }
  123. andl $3,%ecx
  124. subl %ecx,%edx
  125. rep
  126. movsb
  127. movl %edx,%ecx
  128. andl $3,%edx
  129. shrl $2,%ecx
  130. subl $3,%esi
  131. subl $3,%edi
  132. rep
  133. movsl
  134. addl $3,%esi
  135. addl $3,%edi
  136. .LBMove1:
  137. movl %edx,%ecx
  138. rep
  139. movsb
  140. cld
  141. .LMoveEnd:
  142. movl saveedi,%edi
  143. movl saveesi,%esi
  144. end;
  145. {$endif FPC_SYSTEM_HAS_MOVE}
  146. {$ifndef FPC_SYSTEM_HAS_FILLCHAR}
  147. {$define FPC_SYSTEM_HAS_FILLCHAR}
  148. Procedure FillChar(var x;count:SizeInt;value:byte);assembler; nostackframe;
  149. asm
  150. cmpl $22,%edx { empirically determined value on a Core 2 Duo Conroe }
  151. jg .LFillFull
  152. orl %edx,%edx
  153. jle .LFillZero
  154. .LFillLoop:
  155. movb %cl,(%eax)
  156. incl %eax
  157. decl %edx
  158. jne .LFillLoop
  159. .LFillZero:
  160. ret
  161. .LFillFull:
  162. {$ifdef FPC_ENABLED_CLD}
  163. cld
  164. {$endif FPC_ENABLED_CLD}
  165. push %edi
  166. movl %eax,%edi
  167. movzbl %cl,%eax
  168. movl %edx,%ecx
  169. imul $0x01010101,%eax { Expand al into a 4 subbytes of eax}
  170. shrl $2,%ecx
  171. andl $3,%edx
  172. rep
  173. stosl
  174. movl %edx,%ecx
  175. .LFill1:
  176. rep
  177. stosb
  178. .LFillEnd:
  179. pop %edi
  180. end;
  181. {$endif FPC_SYSTEM_HAS_FILLCHAR}
  182. {$ifndef FPC_SYSTEM_HAS_FILLWORD}
  183. {$define FPC_SYSTEM_HAS_FILLWORD}
  184. procedure fillword(var x;count : SizeInt;value : word);assembler;
  185. var
  186. saveedi : longint;
  187. asm
  188. movl %edi,saveedi
  189. movl %eax,%edi
  190. movzwl %cx,%eax
  191. movl %edx,%ecx
  192. { check for zero or negative count }
  193. cmpl $0,%ecx
  194. jle .LFillWordEnd
  195. movl %eax,%edx
  196. shll $16,%eax
  197. orl %edx,%eax
  198. movl %ecx,%edx
  199. shrl $1,%ecx
  200. {$ifdef FPC_ENABLED_CLD}
  201. cld
  202. {$endif FPC_ENABLED_CLD}
  203. rep
  204. stosl
  205. movl %edx,%ecx
  206. andl $1,%ecx
  207. rep
  208. stosw
  209. .LFillWordEnd:
  210. movl saveedi,%edi
  211. end;
  212. {$endif FPC_SYSTEM_HAS_FILLWORD}
  213. {$ifndef FPC_SYSTEM_HAS_FILLDWORD}
  214. {$define FPC_SYSTEM_HAS_FILLDWORD}
  215. procedure filldword(var x;count : SizeInt;value : dword);assembler;
  216. var
  217. saveedi : longint;
  218. asm
  219. movl %edi,saveedi
  220. movl %eax,%edi
  221. movl %ecx,%eax
  222. movl %edx,%ecx
  223. { check for zero or negative count }
  224. cmpl $0,%ecx
  225. jle .LFillDWordEnd
  226. {$ifdef FPC_ENABLED_CLD}
  227. cld
  228. {$endif FPC_ENABLED_CLD}
  229. rep
  230. stosl
  231. .LFillDWordEnd:
  232. movl saveedi,%edi
  233. end;
  234. {$endif FPC_SYSTEM_HAS_FILLDWORD}
  235. {$ifndef FPC_SYSTEM_HAS_INDEXBYTE}
  236. {$define FPC_SYSTEM_HAS_INDEXBYTE}
  237. function IndexByte(Const buf;len:SizeInt;b:byte):SizeInt; assembler; nostackframe;
  238. asm
  239. push %esi
  240. push %edi
  241. push %eax { save initial value of 'buf' }
  242. cmp $4,%edx { less than 4 bytes, just test byte by byte. }
  243. jb .Ltail
  244. mov %cl,%ch { prepare pattern }
  245. movzwl %cx,%esi
  246. shl $16,%ecx
  247. or %esi,%ecx
  248. .Lalignloop:
  249. test $3,%al { align to 4 bytes if necessary }
  250. je .Laligned
  251. cmp %cl,(%eax)
  252. je .Lexit
  253. inc %eax
  254. dec %edx
  255. jmp .Lalignloop
  256. .balign 16 { Main loop, unrolled 4 times for speed }
  257. .Lloop:
  258. mov (%eax),%esi { load dword }
  259. xor %ecx,%esi { XOR with pattern, bytes equal to target are now 0 }
  260. lea -0x01010101(%esi),%edi
  261. xor %esi,%edi { (x-0x01010101) xor x }
  262. not %esi
  263. and $0x80808080,%esi
  264. and %edi,%esi { ((x-0x01010101) xor x) and (not x) and 0x80808080 }
  265. jnz .Lfound { one of the bytes matches }
  266. mov 4(%eax),%esi
  267. xor %ecx,%esi
  268. lea -0x01010101(%esi),%edi
  269. xor %esi,%edi
  270. not %esi
  271. and $0x80808080,%esi
  272. and %edi,%esi
  273. jnz .Lfound4
  274. mov 8(%eax),%esi
  275. xor %ecx,%esi
  276. lea -0x01010101(%esi),%edi
  277. xor %esi,%edi
  278. not %esi
  279. and $0x80808080,%esi
  280. and %edi,%esi
  281. jnz .Lfound8
  282. mov 12(%eax),%esi
  283. xor %ecx,%esi
  284. lea -0x01010101(%esi),%edi
  285. xor %esi,%edi
  286. not %esi
  287. and $0x80808080,%esi
  288. and %edi,%esi
  289. jnz .Lfound12
  290. add $16,%eax
  291. .Laligned:
  292. sub $16,%edx
  293. jae .Lloop { Still more than 16 bytes remaining }
  294. { Process remaining bytes (<16 left at this point) }
  295. { length is offset by -16 at this point }
  296. .Lloop2:
  297. cmp $4-16,%edx { < 4 bytes left? }
  298. jb .Ltail
  299. mov (%eax),%esi
  300. xor %ecx,%esi
  301. lea -0x01010101(%esi),%edi
  302. xor %esi,%edi
  303. not %esi
  304. and $0x80808080,%esi
  305. and %edi,%esi
  306. jne .Lfound
  307. add $4,%eax
  308. sub $4,%edx
  309. jmp .Lloop2
  310. .Ltail: { Less than 4 bytes remaining, check one by one }
  311. and $3, %edx
  312. jz .Lnotfound
  313. .Lloop3:
  314. cmp %cl,(%eax)
  315. je .Lexit
  316. inc %eax
  317. dec %edx
  318. jnz .Lloop3
  319. .Lnotfound:
  320. or $-1,%eax
  321. jmp .Lexit1
  322. { add missing source pointer increments }
  323. .Lfound12:
  324. add $4,%eax
  325. .Lfound8:
  326. add $4,%eax
  327. .Lfound4:
  328. add $4,%eax
  329. .Lfound:
  330. test $0xff,%esi
  331. jnz .Lexit
  332. inc %eax
  333. test $0xff00,%esi
  334. jnz .Lexit
  335. inc %eax
  336. test $0xff0000,%esi
  337. jnz .Lexit
  338. inc %eax
  339. .Lexit:
  340. sub (%esp),%eax
  341. .Lexit1:
  342. pop %ecx { removes initial 'buf' value }
  343. pop %edi
  344. pop %esi
  345. end;
  346. {$endif FPC_SYSTEM_HAS_INDEXBYTE}
  347. {$ifndef FPC_SYSTEM_HAS_INDEXWORD}
  348. {$define FPC_SYSTEM_HAS_INDEXWORD}
  349. function Indexword(Const buf;len:SizeInt;b:word):SizeInt; assembler;
  350. var
  351. saveedi,saveebx : longint;
  352. asm
  353. movl %edi,saveedi
  354. movl %ebx,saveebx
  355. movl Buf,%edi // Load String
  356. movw b,%bx
  357. movl Len,%ecx // Load len
  358. xorl %eax,%eax
  359. testl %ecx,%ecx
  360. jz .Lcharposnotfound
  361. {$ifdef FPC_ENABLED_CLD}
  362. cld
  363. {$endif FPC_ENABLED_CLD}
  364. movl %ecx,%edx // Copy for easy manipulation
  365. movw %bx,%ax
  366. repne
  367. scasw
  368. jne .Lcharposnotfound
  369. incl %ecx
  370. subl %ecx,%edx
  371. movl %edx,%eax
  372. jmp .Lready
  373. .Lcharposnotfound:
  374. movl $-1,%eax
  375. .Lready:
  376. movl saveedi,%edi
  377. movl saveebx,%ebx
  378. end;
  379. {$endif FPC_SYSTEM_HAS_INDEXWORD}
  380. {$ifndef FPC_SYSTEM_HAS_INDEXDWORD}
  381. {$define FPC_SYSTEM_HAS_INDEXDWORD}
  382. function IndexDWord(Const buf;len:SizeInt;b:DWord):SizeInt; assembler;
  383. var
  384. saveedi,saveebx : longint;
  385. asm
  386. movl %edi,saveedi
  387. movl %ebx,saveebx
  388. movl %eax,%edi
  389. movl %ecx,%ebx
  390. movl %edx,%ecx
  391. xorl %eax,%eax
  392. testl %ecx,%ecx
  393. jz .Lcharposnotfound
  394. {$ifdef FPC_ENABLED_CLD}
  395. cld
  396. {$endif FPC_ENABLED_CLD}
  397. movl %ecx,%edx // Copy for easy manipulation
  398. movl %ebx,%eax
  399. repne
  400. scasl
  401. jne .Lcharposnotfound
  402. incl %ecx
  403. subl %ecx,%edx
  404. movl %edx,%eax
  405. jmp .Lready
  406. .Lcharposnotfound:
  407. movl $-1,%eax
  408. .Lready:
  409. movl saveedi,%edi
  410. movl saveebx,%ebx
  411. end;
  412. {$endif FPC_SYSTEM_HAS_INDEXDWORD}
  413. {$ifndef FPC_SYSTEM_HAS_COMPAREBYTE}
  414. {$define FPC_SYSTEM_HAS_COMPAREBYTE}
  415. function CompareByte(Const buf1,buf2;len:SizeInt):SizeInt; assembler; nostackframe;
  416. asm
  417. cmpl $57,%ecx { empirically determined value on a Core 2 Duo Conroe }
  418. jg .LCmpbyteFull
  419. testl %ecx,%ecx
  420. je .LCmpbyteZero
  421. pushl %ebx
  422. .LCmpbyteLoop:
  423. movb (%eax),%bl
  424. cmpb (%edx),%bl
  425. leal 1(%eax),%eax
  426. leal 1(%edx),%edx
  427. jne .LCmpbyteExitFast
  428. decl %ecx
  429. jne .LCmpbyteLoop
  430. .LCmpbyteExitFast:
  431. movzbl -1(%edx),%ecx { Compare last position }
  432. movzbl %bl,%eax
  433. subl %ecx,%eax
  434. popl %ebx
  435. ret
  436. .LCmpbyteZero:
  437. movl $0,%eax
  438. ret
  439. .LCmpbyteFull:
  440. pushl %esi
  441. pushl %edi
  442. {$ifdef FPC_ENABLED_CLD}
  443. cld
  444. {$endif FPC_ENABLED_CLD}
  445. movl %eax,%edi
  446. movl %edx,%esi
  447. movl %ecx,%eax
  448. movl %edi,%ecx { Align on 32bits }
  449. negl %ecx { calc bytes to align (%edi and 3) xor 3= -%edi and 3 }
  450. andl $3,%ecx
  451. subl %ecx,%eax { Subtract from number of bytes to go }
  452. orl %ecx,%ecx
  453. repe
  454. cmpsb { The actual 32-bit Aligning }
  455. jne .LCmpbyte3
  456. movl %eax,%ecx { bytes to do, divide by 4 }
  457. andl $3,%eax { remainder }
  458. shrl $2,%ecx { The actual division }
  459. orl %ecx,%ecx { Sets zero flag if ecx=0 -> no cmp }
  460. repe
  461. cmpsl
  462. je .LCmpbyte2 { All equal? then to the left over bytes }
  463. movl $4,%eax { Not equal. Rescan the last 4 bytes bytewise }
  464. subl %eax,%esi
  465. subl %eax,%edi
  466. .LCmpbyte2:
  467. movl %eax,%ecx { bytes still to (re)scan }
  468. orl %eax,%eax { prevent disaster in case %eax=0 }
  469. repe
  470. cmpsb
  471. .LCmpbyte3:
  472. movzbl -1(%esi),%ecx
  473. movzbl -1(%edi),%eax { Compare failing (or equal) position }
  474. subl %ecx,%eax
  475. .LCmpbyteExit:
  476. popl %edi
  477. popl %esi
  478. end;
  479. {$endif FPC_SYSTEM_HAS_COMPAREBYTE}
  480. {$ifndef FPC_SYSTEM_HAS_COMPAREWORD}
  481. {$define FPC_SYSTEM_HAS_COMPAREWORD}
  482. function CompareWord(Const buf1,buf2;len:SizeInt):SizeInt; assembler; nostackframe;
  483. asm
  484. cmpl $32,%ecx { empirical average value, on a Athlon XP the
  485. break even is at 14, on a Core 2 Duo > 100 }
  486. jg .LCmpWordFull
  487. testl %ecx,%ecx
  488. je .LCmpWordZero
  489. pushl %ebx
  490. .LCmpWordLoop:
  491. movw (%eax),%bx
  492. cmpw (%edx),%bx
  493. leal 2(%eax),%eax
  494. leal 2(%edx),%edx
  495. jne .LCmpWordExitFast
  496. decl %ecx
  497. jne .LCmpWordLoop
  498. .LCmpWordExitFast:
  499. movzwl -2(%edx),%ecx { Compare last position }
  500. movzwl %bx,%eax
  501. subl %ecx,%eax
  502. popl %ebx
  503. ret
  504. .LCmpWordZero:
  505. movl $0,%eax
  506. ret
  507. .LCmpWordFull:
  508. pushl %esi
  509. pushl %edi
  510. pushl %ebx
  511. {$ifdef FPC_ENABLED_CLD}
  512. cld
  513. {$endif FPC_ENABLED_CLD}
  514. movl %eax,%edi
  515. movl %edx,%esi
  516. movl %ecx,%eax
  517. movl (%edi),%ebx // Compare alignment bytes.
  518. cmpl (%esi),%ebx
  519. jne .LCmpword2 // Aligning will go wrong already. Max 2 words will be scanned Branch NOW
  520. shll $1,%eax {Convert word count to bytes}
  521. movl %edi,%edx { Align comparing is already done, so simply add}
  522. negl %edx { calc bytes to align -%edi and 3}
  523. andl $3,%edx
  524. addl %edx,%esi { Skip max 3 bytes alignment}
  525. addl %edx,%edi
  526. subl %edx,%eax { Subtract from number of bytes to go}
  527. movl %eax,%ecx { Make copy of bytes to go}
  528. andl $3,%eax { Calc remainder (mod 4) }
  529. andl $1,%edx { %edx is 1 if array not 2-aligned, 0 otherwise}
  530. shrl $2,%ecx { divide bytes to go by 4, DWords to go}
  531. orl %ecx,%ecx { Sets zero flag if ecx=0 -> no cmp}
  532. repe { Compare entire DWords}
  533. cmpsl
  534. je .LCmpword2a { All equal? then to the left over bytes}
  535. movl $4,%eax { Not equal. Rescan the last 4 bytes bytewise}
  536. subl %eax,%esi { Go back one DWord}
  537. subl %eax,%edi
  538. incl %eax {if not odd then this does nothing, else it makes
  539. sure that adding %edx increases from 2 to 3 words}
  540. .LCmpword2a:
  541. subl %edx,%esi { Subtract alignment}
  542. subl %edx,%edi
  543. addl %edx,%eax
  544. shrl $1,%eax
  545. .LCmpword2:
  546. movl %eax,%ecx {words still to (re)scan}
  547. orl %eax,%eax {prevent disaster in case %eax=0}
  548. repe
  549. cmpsw
  550. .LCmpword3:
  551. movzwl -2(%esi),%ecx
  552. movzwl -2(%edi),%eax // Compare failing (or equal) position
  553. subl %ecx,%eax // calculate end result.
  554. .LCmpwordExit:
  555. popl %ebx
  556. popl %edi
  557. popl %esi
  558. end;
  559. {$endif FPC_SYSTEM_HAS_COMPAREWORD}
  560. {$ifndef FPC_SYSTEM_HAS_COMPAREDWORD}
  561. {$define FPC_SYSTEM_HAS_COMPAREDWORD}
  562. function CompareDWord(Const buf1,buf2;len:SizeInt):SizeInt; assembler; nostackframe;
  563. asm
  564. cmpl $32,%ecx { empirical average value, on a Athlon XP the
  565. break even is at 12, on a Core 2 Duo > 100 }
  566. jg .LCmpDWordFull
  567. testl %ecx,%ecx
  568. je .LCmpDWordZero
  569. pushl %ebx
  570. .LCmpDWordLoop:
  571. movl (%eax),%ebx
  572. cmpl (%edx),%ebx
  573. leal 4(%eax),%eax
  574. leal 4(%edx),%edx
  575. jne .LCmpDWordExitFast
  576. decl %ecx
  577. jne .LCmpDWordLoop
  578. .LCmpDWordExitFast:
  579. xorl %eax,%eax
  580. movl -4(%edx),%edx // Compare failing (or equal) position
  581. subl %edx,%ebx // calculate end result.
  582. setb %dl
  583. seta %cl
  584. addb %cl,%al
  585. subb %dl,%al
  586. movsbl %al,%eax
  587. popl %ebx
  588. ret
  589. .LCmpDWordZero:
  590. movl $0,%eax
  591. ret
  592. .LCmpDWordFull:
  593. pushl %esi
  594. pushl %edi
  595. {$ifdef FPC_ENABLED_CLD}
  596. cld
  597. {$endif FPC_ENABLED_CLD}
  598. movl %eax,%edi
  599. movl %edx,%esi
  600. xorl %eax,%eax
  601. repe { Compare entire DWords}
  602. cmpsl
  603. movl -4(%edi),%edi // Compare failing (or equal) position
  604. subl -4(%esi),%edi // calculate end result.
  605. setb %dl
  606. seta %cl
  607. addb %cl,%al
  608. subb %dl,%al
  609. movsbl %al,%eax
  610. .LCmpDwordExit:
  611. popl %edi
  612. popl %esi
  613. end;
  614. {$endif FPC_SYSTEM_HAS_COMPAREDWORD}
  615. {$ifndef FPC_SYSTEM_HAS_INDEXCHAR0}
  616. {$define FPC_SYSTEM_HAS_INDEXCHAR0}
  617. function IndexChar0(Const buf;len:SizeInt;b:Char):SizeInt; assembler;
  618. var
  619. saveesi,saveebx : longint;
  620. asm
  621. movl %esi,saveesi
  622. movl %ebx,saveebx
  623. // Can't use scasb, or will have to do it twice, think this
  624. // is faster for small "len"
  625. movl %eax,%esi // Load address
  626. movzbl %cl,%ebx // Load searchpattern
  627. testl %edx,%edx
  628. je .LFound
  629. xorl %ecx,%ecx // zero index in Buf
  630. xorl %eax,%eax // To make DWord compares possible
  631. .balign 4
  632. .LLoop:
  633. movb (%esi),%al // Load byte
  634. cmpb %al,%bl
  635. je .LFound // byte the same?
  636. incl %ecx
  637. incl %esi
  638. cmpl %edx,%ecx // Maximal distance reached?
  639. je .LNotFound
  640. testl %eax,%eax // Nullchar = end of search?
  641. jne .LLoop
  642. .LNotFound:
  643. movl $-1,%ecx // Not found return -1
  644. .LFound:
  645. movl %ecx,%eax
  646. movl saveesi,%esi
  647. movl saveebx,%ebx
  648. end;
  649. {$endif FPC_SYSTEM_HAS_INDEXCHAR0}
  650. {****************************************************************************
  651. String
  652. ****************************************************************************}
  653. {$ifndef FPC_SYSTEM_HAS_FPC_SHORTSTR_ASSIGN}
  654. {$define FPC_SYSTEM_HAS_FPC_SHORTSTR_ASSIGN}
  655. procedure fpc_shortstr_to_shortstr(out res:shortstring; const sstr: shortstring);assembler;[public,alias:'FPC_SHORTSTR_TO_SHORTSTR']; compilerproc;
  656. var
  657. saveesi,saveedi : longint;
  658. asm
  659. {$ifdef FPC_PROFILE}
  660. push %eax
  661. push %edx
  662. push %ecx
  663. call mcount
  664. pop %ecx
  665. pop %edx
  666. pop %eax
  667. {$endif FPC_PROFILE}
  668. movl %edi,saveedi
  669. movl %esi,saveesi
  670. {$ifdef FPC_ENABLED_CLD}
  671. cld
  672. {$endif FPC_ENABLED_CLD}
  673. movl res,%edi
  674. movl sstr,%esi
  675. movl %edx,%ecx
  676. xorl %eax,%eax
  677. lodsb
  678. cmpl %ecx,%eax
  679. jbe .LStrCopy1
  680. movl %ecx,%eax
  681. .LStrCopy1:
  682. stosb
  683. cmpl $7,%eax
  684. jl .LStrCopy2
  685. movl %edi,%ecx { Align on 32bits }
  686. negl %ecx
  687. andl $3,%ecx
  688. subl %ecx,%eax
  689. rep
  690. movsb
  691. movl %eax,%ecx
  692. andl $3,%eax
  693. shrl $2,%ecx
  694. rep
  695. movsl
  696. .LStrCopy2:
  697. movl %eax,%ecx
  698. rep
  699. movsb
  700. movl saveedi,%edi
  701. movl saveesi,%esi
  702. end;
  703. procedure fpc_shortstr_assign(len:longint;sstr,dstr:pointer);[public,alias:'FPC_SHORTSTR_ASSIGN'];
  704. begin
  705. asm
  706. {$ifdef FPC_PROFILE}
  707. push %eax
  708. push %edx
  709. push %ecx
  710. call mcount
  711. pop %ecx
  712. pop %edx
  713. pop %eax
  714. {$endif FPC_PROFILE}
  715. pushl %eax
  716. pushl %ecx
  717. {$ifdef FPC_ENABLED_CLD}
  718. cld
  719. {$endif FPC_ENABLED_CLD}
  720. movl dstr,%edi
  721. movl sstr,%esi
  722. xorl %eax,%eax
  723. movl len,%ecx
  724. lodsb
  725. cmpl %ecx,%eax
  726. jbe .LStrCopy1
  727. movl %ecx,%eax
  728. .LStrCopy1:
  729. stosb
  730. cmpl $7,%eax
  731. jl .LStrCopy2
  732. movl %edi,%ecx { Align on 32bits }
  733. negl %ecx
  734. andl $3,%ecx
  735. subl %ecx,%eax
  736. rep
  737. movsb
  738. movl %eax,%ecx
  739. andl $3,%eax
  740. shrl $2,%ecx
  741. rep
  742. movsl
  743. .LStrCopy2:
  744. movl %eax,%ecx
  745. rep
  746. movsb
  747. popl %ecx
  748. popl %eax
  749. end ['ESI','EDI'];
  750. end;
  751. {$endif FPC_SYSTEM_HAS_FPC_SHORTSTR_ASSIGN}
  752. {$ifndef FPC_SYSTEM_HAS_FPC_SHORTSTR_COMPARE}
  753. {$define FPC_SYSTEM_HAS_FPC_SHORTSTR_COMPARE}
  754. function fpc_shortstr_compare(const left,right:shortstring): longint;assembler; [public,alias:'FPC_SHORTSTR_COMPARE']; compilerproc;
  755. var
  756. saveesi,saveedi,saveebx : longint;
  757. asm
  758. {$ifdef FPC_PROFILE}
  759. push %eax
  760. push %edx
  761. push %ecx
  762. call mcount
  763. pop %ecx
  764. pop %edx
  765. pop %eax
  766. {$endif FPC_PROFILE}
  767. movl %edi,saveedi
  768. movl %esi,saveesi
  769. movl %ebx,saveebx
  770. {$ifdef FPC_ENABLED_CLD}
  771. cld
  772. {$endif FPC_ENABLED_CLD}
  773. movl right,%esi
  774. movl left,%edi
  775. movzbl (%esi),%eax
  776. movzbl (%edi),%ebx
  777. movl %eax,%edx
  778. incl %esi
  779. incl %edi
  780. cmpl %ebx,%eax
  781. jbe .LStrCmp1
  782. movl %ebx,%eax
  783. .LStrCmp1:
  784. cmpl $7,%eax
  785. jl .LStrCmp2
  786. movl %edi,%ecx { Align on 32bits }
  787. negl %ecx
  788. andl $3,%ecx
  789. subl %ecx,%eax
  790. orl %ecx,%ecx
  791. repe
  792. cmpsb
  793. jne .LStrCmp3
  794. movl %eax,%ecx
  795. andl $3,%eax
  796. shrl $2,%ecx
  797. orl %ecx,%ecx
  798. repe
  799. cmpsl
  800. je .LStrCmp2
  801. movl $4,%eax
  802. subl %eax,%esi
  803. subl %eax,%edi
  804. .LStrCmp2:
  805. movl %eax,%ecx
  806. orl %eax,%eax
  807. repe
  808. cmpsb
  809. je .LStrCmp4
  810. .LStrCmp3:
  811. movzbl -1(%esi),%edx // Compare failing (or equal) position
  812. movzbl -1(%edi),%ebx
  813. .LStrCmp4:
  814. movl %ebx,%eax // Compare length or position
  815. subl %edx,%eax
  816. movl saveedi,%edi
  817. movl saveesi,%esi
  818. movl saveebx,%ebx
  819. end;
  820. {$endif FPC_SYSTEM_HAS_FPC_SHORTSTR_COMPARE}
  821. {$ifndef FPC_SYSTEM_HAS_FPC_PCHAR_TO_SHORTSTR}
  822. {$define FPC_SYSTEM_HAS_FPC_PCHAR_TO_SHORTSTR}
  823. procedure fpc_pchar_to_shortstr(out res : shortstring;p:pchar);assembler;[public,alias:'FPC_PCHAR_TO_SHORTSTR']; compilerproc;
  824. var
  825. saveres,saveebx,saveesi,saveedi : longint;
  826. asm
  827. {$ifdef FPC_PROFILE}
  828. push %eax
  829. push %edx
  830. push %ecx
  831. call mcount
  832. pop %ecx
  833. pop %edx
  834. pop %eax
  835. {$endif FPC_PROFILE}
  836. movl %ebx,saveebx
  837. movl %esi,saveesi
  838. movl %edi,saveedi
  839. movl %ecx,%esi
  840. movl %eax,%edi
  841. movl %edi,saveres
  842. movl $1,%ecx
  843. testl %esi,%esi
  844. movl %esi,%eax
  845. jz .LStrPasDone
  846. leal 3(%esi),%edx
  847. andl $-4,%edx
  848. // skip length byte
  849. incl %edi
  850. subl %esi,%edx
  851. jz .LStrPasAligned
  852. // align source to multiple of 4 (not dest, because we can't read past
  853. // the end of the source, since that may be past the end of the heap
  854. // -> sigsegv!!)
  855. .LStrPasAlignLoop:
  856. movb (%esi),%al
  857. incl %esi
  858. testb %al,%al
  859. jz .LStrPasDone
  860. incl %edi
  861. incb %cl
  862. decb %dl
  863. movb %al,-1(%edi)
  864. jne .LStrPasAlignLoop
  865. .balign 16
  866. .LStrPasAligned:
  867. movl (%esi),%ebx
  868. addl $4,%edi
  869. leal 0x0fefefeff(%ebx),%eax
  870. movl %ebx,%edx
  871. addl $4,%esi
  872. notl %edx
  873. andl %edx,%eax
  874. addl $4,%ecx
  875. andl $0x080808080,%eax
  876. movl %ebx,-4(%edi)
  877. jnz .LStrPasEndFound
  878. cmpl $252,%ecx
  879. ja .LStrPasPreEndLoop
  880. jmp .LStrPasAligned
  881. .LStrPasEndFound:
  882. subl $4,%ecx
  883. // this won't overwrite data since the result = 255 char string
  884. // and we never process more than the first 255 chars of p
  885. shrl $8,%eax
  886. jc .LStrPasDone
  887. incl %ecx
  888. shrl $8,%eax
  889. jc .LStrPasDone
  890. incl %ecx
  891. shrl $8,%eax
  892. jc .LStrPasDone
  893. incl %ecx
  894. jmp .LStrPasDone
  895. .LStrPasPreEndLoop:
  896. testb %cl,%cl
  897. jz .LStrPasDone
  898. movl (%esi),%eax
  899. .LStrPasEndLoop:
  900. testb %al,%al
  901. jz .LStrPasDone
  902. movb %al,(%edi)
  903. shrl $8,%eax
  904. incl %edi
  905. incb %cl
  906. jnz .LStrPasEndLoop
  907. .LStrPasDone:
  908. movl saveres,%edi
  909. addb $255,%cl
  910. movb %cl,(%edi)
  911. movl saveesi,%esi
  912. movl saveedi,%edi
  913. movl saveebx,%ebx
  914. end;
  915. {$endif FPC_SYSTEM_HAS_FPC_PCHAR_TO_SHORTSTR}
  916. {$IFNDEF INTERNAL_BACKTRACE}
  917. {$define FPC_SYSTEM_HAS_GET_FRAME}
  918. function get_frame:pointer;assembler;nostackframe;{$ifdef SYSTEMINLINE}inline;{$endif}
  919. asm
  920. movl %ebp,%eax
  921. end;
  922. {$ENDIF not INTERNAL_BACKTRACE}
  923. {$define FPC_SYSTEM_HAS_GET_PC_ADDR}
  924. Function Get_pc_addr : Pointer;assembler;nostackframe;
  925. asm
  926. movl (%esp),%eax
  927. end;
  928. {$define FPC_SYSTEM_HAS_GET_CALLER_ADDR}
  929. function get_caller_addr(framebp:pointer;addr:pointer=nil):pointer;
  930. {$if defined(win32)}
  931. { Windows has StackTop always properly set }
  932. begin
  933. if assigned(framebp) and (framebp<=StackTop) and (framebp>=Sptr) then
  934. Result:=PPointer(framebp+4)^
  935. else
  936. Result:=nil;
  937. end;
  938. {$else defined(win32)}
  939. nostackframe;assembler;
  940. asm
  941. orl %eax,%eax
  942. jz .Lg_a_null
  943. movl 4(%eax),%eax
  944. .Lg_a_null:
  945. end;
  946. {$endif defined(win32)}
  947. {$define FPC_SYSTEM_HAS_GET_CALLER_FRAME}
  948. function get_caller_frame(framebp:pointer;addr:pointer=nil):pointer;
  949. {$if defined(win32)}
  950. { Windows has StackTop always properly set }
  951. begin
  952. if assigned(framebp) and (framebp<=StackTop) and (framebp>=Sptr) then
  953. Result:=PPointer(framebp)^
  954. else
  955. Result:=nil;
  956. end;
  957. {$else defined(win32)}
  958. nostackframe;assembler;
  959. asm
  960. orl %eax,%eax
  961. jz .Lgnf_null
  962. movl (%eax),%eax
  963. .Lgnf_null:
  964. end;
  965. {$endif defined(win32)}
  966. {$define FPC_SYSTEM_HAS_SPTR}
  967. Function Sptr : Pointer;assembler;nostackframe;
  968. asm
  969. movl %esp,%eax
  970. end;
  971. {****************************************************************************
  972. Str()
  973. ****************************************************************************}
  974. {$if defined(disabled) and defined(regcall) }
  975. {$define FPC_SYSTEM_HAS_INT_STR_LONGWORD}
  976. {$define FPC_SYSTEM_HAS_INT_STR_LONGINT}
  977. label str_int_shortcut;
  978. procedure int_str(l:longword;out s:string);assembler;nostackframe;
  979. asm
  980. pushl %esi
  981. pushl %edi
  982. pushl %ebx
  983. mov %edx,%edi
  984. xor %edx,%edx
  985. jmp str_int_shortcut
  986. end;
  987. procedure int_str(l:longint;out s:string);assembler;nostackframe;
  988. {Optimized for speed, but balanced with size.}
  989. const digits:array[0..9] of cardinal=(0,10,100,1000,10000,
  990. 100000,1000000,10000000,
  991. 100000000,1000000000);
  992. asm
  993. {$ifdef FPC_PROFILE}
  994. push %eax
  995. push %edx
  996. push %ecx
  997. call mcount
  998. pop %ecx
  999. pop %edx
  1000. pop %eax
  1001. {$endif FPC_PROFILE}
  1002. push %esi
  1003. push %edi
  1004. push %ebx
  1005. movl %edx,%edi
  1006. { Calculate absolute value and put sign in edx}
  1007. cltd
  1008. xorl %edx,%eax
  1009. subl %edx,%eax
  1010. negl %edx
  1011. str_int_shortcut:
  1012. movl %ecx,%esi
  1013. {Calculate amount of digits in ecx.}
  1014. xorl %ecx,%ecx
  1015. bsrl %eax,%ecx
  1016. incl %ecx
  1017. imul $1233,%ecx
  1018. shr $12,%ecx
  1019. {$ifdef FPC_PIC}
  1020. call fpc_geteipasebx
  1021. {$ifdef darwin}
  1022. movl digits-.Lpic(%ebx),%ebx
  1023. {$else}
  1024. addl $_GLOBAL_OFFSET_TABLE_,%ebx
  1025. movl digits@GOT(%ebx),%ebx
  1026. {$endif}
  1027. cmpl (%ebx,%ecx,4),%eax
  1028. {$else}
  1029. cmpl digits(,%ecx,4),%eax
  1030. {$endif}
  1031. cmc
  1032. adcl $0,%ecx {Nr. digits ready in ecx.}
  1033. {Write length & sign.}
  1034. lea (%edx,%ecx),%ebx
  1035. movb $45,%bh {movb $'-,%bh Not supported by our ATT reader.}
  1036. movw %bx,(%edi)
  1037. addl %edx,%edi
  1038. subl %edx,%esi
  1039. {Skip digits beyond string length.}
  1040. movl %eax,%edx
  1041. subl %ecx,%esi
  1042. jae .Lloop_write
  1043. .balign 4
  1044. .Lloop_skip:
  1045. movl $0xcccccccd,%eax {Divide by 10 using mul+shr}
  1046. mull %edx
  1047. shrl $3,%edx
  1048. decl %ecx
  1049. jz .Ldone {If (l<0) and (high(s)=1) this jump is taken.}
  1050. incl %esi
  1051. jnz .Lloop_skip
  1052. {Write out digits.}
  1053. .balign 4
  1054. .Lloop_write:
  1055. movl $0xcccccccd,%eax {Divide by 10 using mul+shr}
  1056. {Pre-add '0'}
  1057. leal 48(%edx),%ebx {leal $'0(,%edx),%ebx Not supported by our ATT reader.}
  1058. mull %edx
  1059. shrl $3,%edx
  1060. leal (%edx,%edx,8),%eax {x mod 10 = x-10*(x div 10)}
  1061. subl %edx,%ebx
  1062. subl %eax,%ebx
  1063. movb %bl,(%edi,%ecx)
  1064. decl %ecx
  1065. jnz .Lloop_write
  1066. .Ldone:
  1067. popl %ebx
  1068. popl %edi
  1069. popl %esi
  1070. end;
  1071. {$endif}
  1072. {****************************************************************************
  1073. Bounds Check
  1074. ****************************************************************************}
  1075. { do a thread-safe inc/dec }
  1076. {$define FPC_SYSTEM_HAS_DECLOCKED_LONGINT}
  1077. function cpudeclocked(var l : longint) : boolean;assembler;nostackframe;
  1078. asm
  1079. { this check should be done because a lock takes a lot }
  1080. { of time! }
  1081. lock
  1082. decl (%eax)
  1083. setzb %al
  1084. end;
  1085. {$define FPC_SYSTEM_HAS_INCLOCKED_LONGINT}
  1086. procedure cpuinclocked(var l : longint);assembler;nostackframe;
  1087. asm
  1088. lock
  1089. incl (%eax)
  1090. end;
  1091. // inline SMP check and normal lock.
  1092. // the locked one is so slow, inlining doesn't matter.
  1093. function declocked(var l : longint) : boolean; inline;
  1094. begin
  1095. if not ismultithread then
  1096. begin
  1097. dec(l);
  1098. declocked:=l=0;
  1099. end
  1100. else
  1101. declocked:=cpudeclocked(l);
  1102. end;
  1103. procedure inclocked(var l : longint); inline;
  1104. begin
  1105. if not ismultithread then
  1106. inc(l)
  1107. else
  1108. cpuinclocked(l);
  1109. end;
  1110. function InterLockedDecrement (var Target: longint) : longint; assembler;
  1111. asm
  1112. movl $-1,%edx
  1113. xchgl %edx,%eax
  1114. lock
  1115. xaddl %eax, (%edx)
  1116. decl %eax
  1117. end;
  1118. function InterLockedIncrement (var Target: longint) : longint; assembler;
  1119. asm
  1120. movl $1,%edx
  1121. xchgl %edx,%eax
  1122. lock
  1123. xaddl %eax, (%edx)
  1124. incl %eax
  1125. end;
  1126. function InterLockedExchange (var Target: longint;Source : longint) : longint; assembler;
  1127. asm
  1128. xchgl (%eax),%edx
  1129. movl %edx,%eax
  1130. end;
  1131. function InterLockedExchangeAdd (var Target: longint;Source : longint) : longint; assembler;
  1132. asm
  1133. xchgl %eax,%edx
  1134. lock
  1135. xaddl %eax, (%edx)
  1136. end;
  1137. function InterlockedCompareExchange(var Target: longint; NewValue: longint; Comperand: longint): longint; assembler;
  1138. asm
  1139. xchgl %eax,%ecx
  1140. lock
  1141. cmpxchgl %edx, (%ecx)
  1142. end;
  1143. function InterlockedCompareExchange64(var Target: int64; NewValue: int64; Comperand: int64): int64; assembler;
  1144. asm
  1145. pushl %ebx
  1146. pushl %edi
  1147. movl %eax,%edi
  1148. movl Comperand+4,%edx
  1149. movl Comperand+0,%eax
  1150. movl NewValue+4,%ecx
  1151. movl NewValue+0,%ebx
  1152. lock cmpxchg8b (%edi)
  1153. pop %edi
  1154. pop %ebx
  1155. end;
  1156. {****************************************************************************
  1157. FPU
  1158. ****************************************************************************}
  1159. const
  1160. { Internal constants for use in system unit }
  1161. FPU_Invalid = 1;
  1162. FPU_Denormal = 2;
  1163. FPU_DivisionByZero = 4;
  1164. FPU_Overflow = 8;
  1165. FPU_Underflow = $10;
  1166. FPU_StackUnderflow = $20;
  1167. FPU_StackOverflow = $40;
  1168. FPU_ExceptionMask = $ff;
  1169. MM_Invalid = 1;
  1170. MM_Denormal = 2;
  1171. MM_DivisionByZero = 4;
  1172. MM_Overflow = 8;
  1173. MM_Underflow = $10;
  1174. MM_Precicion = $20;
  1175. MM_ExceptionMask = $3f;
  1176. MM_MaskInvalidOp = %0000000010000000;
  1177. MM_MaskDenorm = %0000000100000000;
  1178. MM_MaskDivZero = %0000001000000000;
  1179. MM_MaskOverflow = %0000010000000000;
  1180. MM_MaskUnderflow = %0000100000000000;
  1181. MM_MaskPrecision = %0001000000000000;
  1182. {$define FPC_SYSTEM_HAS_SYSINITFPU}
  1183. Procedure SysInitFPU;
  1184. begin
  1185. end;
  1186. {$define FPC_SYSTEM_HAS_SYSRESETFPU}
  1187. Procedure SysResetFPU;
  1188. var
  1189. { these locals are so we don't have to hack pic code in the assembler }
  1190. localmxcsr: dword;
  1191. localfpucw: word;
  1192. begin
  1193. localfpucw:=Default8087CW;
  1194. asm
  1195. fninit
  1196. fwait
  1197. fldcw localfpucw
  1198. end;
  1199. if has_sse_support then
  1200. begin
  1201. localmxcsr:=DefaultMXCSR;
  1202. asm
  1203. { setup sse exceptions }
  1204. {$ifndef OLD_ASSEMBLER}
  1205. ldmxcsr localmxcsr
  1206. {$else OLD_ASSEMBLER}
  1207. mov localmxcsr,%eax
  1208. subl $4,%esp
  1209. mov %eax,(%esp)
  1210. //ldmxcsr (%esp)
  1211. .byte 0x0f,0xae,0x14,0x24
  1212. addl $4,%esp
  1213. {$endif OLD_ASSEMBLER}
  1214. end;
  1215. end;
  1216. end;
  1217. { because of the brain dead sse detection on x86, this test is post poned }
  1218. procedure fpc_cpucodeinit;
  1219. var
  1220. _ecx,_edx : longint;
  1221. begin
  1222. if cpuid_support then
  1223. begin
  1224. asm
  1225. movl $1,%eax
  1226. cpuid
  1227. movl %edx,_edx
  1228. movl %ecx,_ecx
  1229. end ['ebx'];
  1230. has_mmx_support:=(_edx and $800000)<>0;
  1231. if ((_edx and $2000000)<>0) then
  1232. begin
  1233. os_supports_sse:=true;
  1234. sse_check:=true;
  1235. asm
  1236. { force an sse exception if no sse is supported, the exception handler sets
  1237. os_supports_sse to false then }
  1238. { don't change this instruction, the code above depends on its size }
  1239. {$ifdef OLD_ASSEMBLER}
  1240. .byte 0x0f,0x28,0xf7
  1241. {$else}
  1242. movaps %xmm7, %xmm6
  1243. {$endif not EMX}
  1244. end;
  1245. sse_check:=false;
  1246. has_sse_support:=os_supports_sse;
  1247. end;
  1248. if has_sse_support then
  1249. begin
  1250. has_sse2_support:=((_edx and $4000000)<>0);
  1251. has_sse3_support:=((_ecx and $200)<>0);
  1252. end;
  1253. end;
  1254. { don't let libraries influence the FPU cw set by the host program }
  1255. if IsLibrary then
  1256. begin
  1257. Default8087CW:=Get8087CW;
  1258. if has_sse_support then
  1259. DefaultMXCSR:=GetMXCSR;
  1260. end;
  1261. SysResetFPU;
  1262. {$ifdef USE_FASTMOVE}
  1263. setup_fastmove;
  1264. {$endif}
  1265. end;
  1266. {$if not defined(darwin) and defined(regcall) }
  1267. { darwin requires that the stack is aligned to 16 bytes when calling another function }
  1268. {$ifdef FPC_HAS_FEATURE_ANSISTRINGS}
  1269. {$define FPC_SYSTEM_HAS_ANSISTR_DECR_REF}
  1270. Procedure fpc_AnsiStr_Decr_Ref (Var S : Pointer); [Public,Alias:'FPC_ANSISTR_DECR_REF']; compilerproc; nostackframe; assembler;
  1271. asm
  1272. cmpl $0,(%eax)
  1273. je .Lquit
  1274. pushl %esi
  1275. movl (%eax),%esi
  1276. subl $12,%esi // points to start of allocation
  1277. movl $0,(%eax) // s:=nil
  1278. cmpl $0,4(%esi) // exit if refcount<0
  1279. jl .Lj3596
  1280. {$ifdef FPC_PIC}
  1281. pushl %ebx
  1282. call fpc_geteipasebx
  1283. addl $_GLOBAL_OFFSET_TABLE_,%ebx
  1284. movl ismultithread@GOT(%ebx),%ebx
  1285. movl (%ebx),%ebx
  1286. cmp $0, %ebx
  1287. popl %ebx
  1288. {$else FPC_PIC}
  1289. cmpl $0,ismultithread
  1290. {$endif FPC_PIC}
  1291. jne .Lj3610
  1292. decl 4(%esi)
  1293. je .Lj3620
  1294. jmp .Lj3596
  1295. .Lj3610:
  1296. leal 4(%esi),%eax
  1297. call cpudeclocked
  1298. testb %al,%al
  1299. je .Lj3596
  1300. .Lj3620:
  1301. movl %esi,%eax
  1302. { freemem is not an assembler leaf function like fpc_geteipasebx and cpudeclocked, so it
  1303. needs to be called with proper stack alignment }
  1304. {$ifdef FPC_SYSTEM_STACKALIGNMENT16}
  1305. leal -8(%esp),%esp
  1306. {$endif FPC_SYSTEM_STACKALIGNMENT16}
  1307. call FPC_FREEMEM
  1308. {$ifdef FPC_SYSTEM_STACKALIGNMENT16}
  1309. leal 8(%esp),%esp
  1310. {$endif FPC_SYSTEM_STACKALIGNMENT16}
  1311. .Lj3596:
  1312. popl %esi
  1313. .Lquit:
  1314. end;
  1315. function fpc_truely_ansistr_unique(Var S : Pointer): Pointer; forward;
  1316. {$define FPC_SYSTEM_HAS_ANSISTR_UNIQUE}
  1317. Function fpc_ansistr_Unique(Var S : Pointer): Pointer; [Public,Alias : 'FPC_ANSISTR_UNIQUE']; compilerproc; nostackframe;assembler;
  1318. asm
  1319. // Var S located in register
  1320. // Var $result located in register
  1321. movl %eax,%edx
  1322. // [437] pointer(result) := pointer(s);
  1323. movl (%eax),%eax
  1324. // [438] If Pointer(S)=Nil then
  1325. testl %eax,%eax
  1326. je .Lj4031
  1327. .Lj4036:
  1328. // [440] if PAnsiRec(Pointer(S)-Firstoff)^.Ref<>1 then
  1329. movl -8(%eax),%ecx
  1330. cmpl $1,%ecx
  1331. je .Lj4038
  1332. // [441] result:=fpc_truely_ansistr_unique(s);
  1333. movl %edx,%eax
  1334. {$ifdef FPC_SYSTEM_STACKALIGNMENT16}
  1335. leal -12(%esp),%esp
  1336. {$endif FPC_SYSTEM_STACKALIGNMENT16}
  1337. call fpc_truely_ansistr_unique
  1338. {$ifdef FPC_SYSTEM_STACKALIGNMENT16}
  1339. leal 12(%esp),%esp
  1340. {$endif FPC_SYSTEM_STACKALIGNMENT16}
  1341. .Lj4038:
  1342. .Lj4031:
  1343. // [442] end;
  1344. end;
  1345. {$endif FPC_HAS_FEATURE_ANSISTRINGS}
  1346. {$endif ndef darwin and defined(regcall) }
  1347. {$ifndef FPC_SYSTEM_HAS_MEM_BARRIER}
  1348. {$define FPC_SYSTEM_HAS_MEM_BARRIER}
  1349. procedure ReadBarrier;assembler;nostackframe;
  1350. asm
  1351. lock
  1352. addl $0,0(%esp)
  1353. { alternative: lfence on SSE capable CPUs }
  1354. end;
  1355. procedure ReadDependencyBarrier;{$ifdef SYSTEMINLINE}inline;{$endif}
  1356. begin
  1357. { reads imply barrier on earlier reads depended on }
  1358. end;
  1359. procedure ReadWriteBarrier;assembler;nostackframe;
  1360. asm
  1361. lock
  1362. addl $0,0(%esp)
  1363. { alternative: mfence on SSE capable CPUs }
  1364. end;
  1365. procedure WriteBarrier;assembler;nostackframe;
  1366. asm
  1367. { no write reordering on intel CPUs (yet) }
  1368. end;
  1369. {$endif}
  1370. {$ifndef FPC_SYSTEM_HAS_BSF_QWORD}
  1371. {$define FPC_SYSTEM_HAS_BSF_QWORD}
  1372. function BsfQWord(Const AValue : QWord): cardinal; assembler; nostackframe;
  1373. asm
  1374. bsfl 4(%esp),%eax
  1375. jnz .L2
  1376. .L1:
  1377. bsfl 8(%esp),%eax
  1378. jnz .L3
  1379. movl $223,%eax
  1380. .L3:
  1381. addl $32,%eax
  1382. .L2:
  1383. end;
  1384. {$endif FPC_SYSTEM_HAS_BSF_QWORD}
  1385. {$ifndef FPC_SYSTEM_HAS_BSR_QWORD}
  1386. {$define FPC_SYSTEM_HAS_BSR_QWORD}
  1387. function BsrQWord(Const AValue : QWord): cardinal; assembler; nostackframe;
  1388. asm
  1389. bsrl 8(%esp),%eax
  1390. jz .L1
  1391. add $32,%eax
  1392. jmp .L2
  1393. .L1:
  1394. bsrl 4(%esp),%eax
  1395. jnz .L2
  1396. movl $255,%eax
  1397. .L2:
  1398. end;
  1399. {$endif FPC_SYSTEM_HAS_BSR_QWORD}
  1400. {$ifndef FPC_SYSTEM_HAS_SAR_QWORD}
  1401. {$define FPC_SYSTEM_HAS_SAR_QWORD}
  1402. function fpc_SarInt64(Const AValue : Int64;const Shift : Byte): Int64; [Public,Alias:'FPC_SARINT64']; compilerproc; assembler; nostackframe;
  1403. asm
  1404. movb %al,%cl
  1405. movl 8(%esp),%edx
  1406. movl 4(%esp),%eax
  1407. andb $63,%cl
  1408. cmpb $32,%cl
  1409. jnb .L1
  1410. shrdl %cl,%edx,%eax
  1411. sarl %cl,%edx
  1412. jmp .Lexit
  1413. .L1:
  1414. movl %edx,%eax
  1415. sarl $31,%edx
  1416. andb $31,%cl
  1417. sarl %cl,%eax
  1418. .Lexit:
  1419. end;
  1420. {$endif FPC_SYSTEM_HAS_SAR_QWORD}