i386.inc 41 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650165116521653
  1. {
  2. This file is part of the Free Pascal run time library.
  3. Copyright (c) 1999-2000 by the Free Pascal development team.
  4. Processor dependent implementation for the system unit for
  5. intel i386+
  6. See the file COPYING.FPC, included in this distribution,
  7. for details about the copyright.
  8. This program is distributed in the hope that it will be useful,
  9. but WITHOUT ANY WARRANTY; without even the implied warranty of
  10. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
  11. **********************************************************************}
  12. {****************************************************************************
  13. Primitives
  14. ****************************************************************************}
  15. var
  16. os_supports_sse : boolean;
  17. { this variable is set to true, if currently an sse check is executed and no sig ill should be generated }
  18. sse_check : boolean;
  19. {$asmmode intel}
  20. function cpuid_support : boolean;assembler;
  21. {
  22. Check if the ID-flag can be changed, if changed then CpuID is supported.
  23. Tested under go32v1 and Linux on c6x86 with CpuID enabled and disabled (PFV)
  24. }
  25. asm
  26. push ebx
  27. pushfd
  28. pushfd
  29. pop eax
  30. mov ebx,eax
  31. xor eax,200000h
  32. push eax
  33. popfd
  34. pushfd
  35. pop eax
  36. popfd
  37. and eax,200000h
  38. and ebx,200000h
  39. cmp eax,ebx
  40. setnz al
  41. pop ebx
  42. end;
  43. {$asmmode ATT}
  44. procedure check_sse_support;
  45. var
  46. _ecx,_edx : longint;
  47. begin
  48. if cpuid_support then
  49. begin
  50. asm
  51. pushl %ebx
  52. movl $1,%eax
  53. cpuid
  54. movl %edx,_edx
  55. movl %ecx,_ecx
  56. popl %ebx
  57. end;
  58. has_sse_support:=((_edx and $2000000)<>0) and os_supports_sse;
  59. has_sse2_support:=((_edx and $4000000)<>0) and os_supports_sse;
  60. has_sse3_support:=((_ecx and $200)<>0) and os_supports_sse;
  61. end
  62. else
  63. begin
  64. { a cpu with without cpuid instruction supports never sse }
  65. has_sse_support:=false;
  66. has_sse2_support:=false;
  67. has_sse3_support:=false;
  68. end;
  69. end;
  70. { returns true, if the processor supports the mmx instructions }
  71. function mmx_support : boolean;
  72. var
  73. _edx : longint;
  74. begin
  75. if cpuid_support then
  76. begin
  77. asm
  78. pushl %ebx
  79. movl $1,%eax
  80. cpuid
  81. movl %edx,_edx
  82. popl %ebx
  83. end;
  84. mmx_support:=(_edx and $800000)<>0;
  85. end
  86. else
  87. { a cpu with without cpuid instruction supports never mmx }
  88. mmx_support:=false;
  89. end;
  90. {$ifndef FPC_PIC}
  91. {$ifndef FPC_SYSTEM_HAS_MOVE}
  92. {$define USE_FASTMOVE}
  93. {$i fastmove.inc}
  94. {$endif FPC_SYSTEM_HAS_MOVE}
  95. {$endif FPC_PIC}
  96. procedure fpc_cpuinit;
  97. begin
  98. { because of the brain dead sse detection on x86, this test is post poned to fpc_cpucodeinit which
  99. must be implemented OS dependend (FK)
  100. has_sse_support:=sse_support;
  101. has_mmx_support:=mmx_support;
  102. setup_fastmove;
  103. }
  104. os_supports_sse:=false;
  105. { don't let libraries influence the FPU cw set by the host program }
  106. if IsLibrary then
  107. Default8087CW:=Get8087CW;
  108. end;
  109. {$ifndef darwin}
  110. function fpc_geteipasebx : pointer; [public, alias: 'fpc_geteipasebx'];assembler; nostackframe;
  111. asm
  112. movl (%esp),%ebx
  113. end;
  114. function fpc_geteipasecx : pointer; [public, alias: 'fpc_geteipasecx'];assembler; nostackframe;
  115. asm
  116. movl (%esp),%ecx
  117. end;
  118. {$endif}
  119. {$ifndef FPC_SYSTEM_HAS_MOVE}
  120. {$define FPC_SYSTEM_HAS_MOVE}
  121. procedure Move(const source;var dest;count:SizeInt);[public, alias: 'FPC_MOVE'];assembler;
  122. var
  123. saveesi,saveedi : longint;
  124. asm
  125. movl %edi,saveedi
  126. movl %esi,saveesi
  127. movl %eax,%esi
  128. movl %edx,%edi
  129. movl %ecx,%edx
  130. movl %edi,%eax
  131. { check for zero or negative count }
  132. cmpl $0,%edx
  133. jle .LMoveEnd
  134. { Check for back or forward }
  135. sub %esi,%eax
  136. jz .LMoveEnd { Do nothing when source=dest }
  137. jc .LFMove { Do forward, dest<source }
  138. cmp %edx,%eax
  139. jb .LBMove { Dest is in range of move, do backward }
  140. { Forward Copy }
  141. .LFMove:
  142. {$ifdef FPC_ENABLED_CLD}
  143. cld
  144. {$endif FPC_ENABLED_CLD}
  145. cmpl $15,%edx
  146. jl .LFMove1
  147. movl %edi,%ecx { Align on 32bits }
  148. negl %ecx
  149. andl $3,%ecx
  150. subl %ecx,%edx
  151. rep
  152. movsb
  153. movl %edx,%ecx
  154. andl $3,%edx
  155. shrl $2,%ecx
  156. rep
  157. movsl
  158. .LFMove1:
  159. movl %edx,%ecx
  160. rep
  161. movsb
  162. jmp .LMoveEnd
  163. { Backward Copy }
  164. .LBMove:
  165. std
  166. addl %edx,%esi
  167. addl %edx,%edi
  168. movl %edi,%ecx
  169. decl %esi
  170. decl %edi
  171. cmpl $15,%edx
  172. jl .LBMove1
  173. negl %ecx { Align on 32bits }
  174. andl $3,%ecx
  175. subl %ecx,%edx
  176. rep
  177. movsb
  178. movl %edx,%ecx
  179. andl $3,%edx
  180. shrl $2,%ecx
  181. subl $3,%esi
  182. subl $3,%edi
  183. rep
  184. movsl
  185. addl $3,%esi
  186. addl $3,%edi
  187. .LBMove1:
  188. movl %edx,%ecx
  189. rep
  190. movsb
  191. cld
  192. .LMoveEnd:
  193. movl saveedi,%edi
  194. movl saveesi,%esi
  195. end;
  196. {$endif FPC_SYSTEM_HAS_MOVE}
  197. {$ifndef FPC_SYSTEM_HAS_FILLCHAR}
  198. {$define FPC_SYSTEM_HAS_FILLCHAR}
  199. Procedure FillChar(var x;count:SizeInt;value:byte);assembler; nostackframe;
  200. asm
  201. cmpl $22,%edx { empirically determined value on a Core 2 Duo Conroe }
  202. jg .LFillFull
  203. orl %edx,%edx
  204. jle .LFillZero
  205. .LFillLoop:
  206. movb %cl,(%eax)
  207. incl %eax
  208. decl %edx
  209. jne .LFillLoop
  210. .LFillZero:
  211. ret
  212. .LFillFull:
  213. {$ifdef FPC_ENABLED_CLD}
  214. cld
  215. {$endif FPC_ENABLED_CLD}
  216. push %edi
  217. movl %eax,%edi
  218. movzbl %cl,%eax
  219. movl %edx,%ecx
  220. imul $0x01010101,%eax { Expand al into a 4 subbytes of eax}
  221. shrl $2,%ecx
  222. andl $3,%edx
  223. rep
  224. stosl
  225. movl %edx,%ecx
  226. .LFill1:
  227. rep
  228. stosb
  229. .LFillEnd:
  230. pop %edi
  231. end;
  232. {$endif FPC_SYSTEM_HAS_FILLCHAR}
  233. {$ifndef FPC_SYSTEM_HAS_FILLWORD}
  234. {$define FPC_SYSTEM_HAS_FILLWORD}
  235. procedure fillword(var x;count : SizeInt;value : word);assembler;
  236. var
  237. saveedi : longint;
  238. asm
  239. movl %edi,saveedi
  240. movl %eax,%edi
  241. movzwl %cx,%eax
  242. movl %edx,%ecx
  243. { check for zero or negative count }
  244. cmpl $0,%ecx
  245. jle .LFillWordEnd
  246. movl %eax,%edx
  247. shll $16,%eax
  248. orl %edx,%eax
  249. movl %ecx,%edx
  250. shrl $1,%ecx
  251. {$ifdef FPC_ENABLED_CLD}
  252. cld
  253. {$endif FPC_ENABLED_CLD}
  254. rep
  255. stosl
  256. movl %edx,%ecx
  257. andl $1,%ecx
  258. rep
  259. stosw
  260. .LFillWordEnd:
  261. movl saveedi,%edi
  262. end;
  263. {$endif FPC_SYSTEM_HAS_FILLWORD}
  264. {$ifndef FPC_SYSTEM_HAS_FILLDWORD}
  265. {$define FPC_SYSTEM_HAS_FILLDWORD}
  266. procedure filldword(var x;count : SizeInt;value : dword);assembler;
  267. var
  268. saveedi : longint;
  269. asm
  270. movl %edi,saveedi
  271. movl %eax,%edi
  272. movl %ecx,%eax
  273. movl %edx,%ecx
  274. { check for zero or negative count }
  275. cmpl $0,%ecx
  276. jle .LFillDWordEnd
  277. {$ifdef FPC_ENABLED_CLD}
  278. cld
  279. {$endif FPC_ENABLED_CLD}
  280. rep
  281. stosl
  282. .LFillDWordEnd:
  283. movl saveedi,%edi
  284. end;
  285. {$endif FPC_SYSTEM_HAS_FILLDWORD}
  286. {$ifndef FPC_SYSTEM_HAS_INDEXBYTE}
  287. {$define FPC_SYSTEM_HAS_INDEXBYTE}
  288. function IndexByte(Const buf;len:SizeInt;b:byte):SizeInt; assembler; nostackframe;
  289. asm
  290. push %esi
  291. push %edi
  292. push %eax { save initial value of 'buf' }
  293. cmp $4,%edx { less than 4 bytes, just test byte by byte. }
  294. jb .Ltail
  295. mov %cl,%ch { prepare pattern }
  296. movzwl %cx,%esi
  297. shl $16,%ecx
  298. or %esi,%ecx
  299. .Lalignloop:
  300. test $3,%al { align to 4 bytes if necessary }
  301. je .Laligned
  302. cmp %cl,(%eax)
  303. je .Lexit
  304. inc %eax
  305. dec %edx
  306. jmp .Lalignloop
  307. .balign 16 { Main loop, unrolled 4 times for speed }
  308. .Lloop:
  309. mov (%eax),%esi { load dword }
  310. xor %ecx,%esi { XOR with pattern, bytes equal to target are now 0 }
  311. lea -0x01010101(%esi),%edi
  312. xor %esi,%edi { (x-0x01010101) xor x }
  313. not %esi
  314. and $0x80808080,%esi
  315. and %edi,%esi { ((x-0x01010101) xor x) and (not x) and 0x80808080 }
  316. jnz .Lfound { one of the bytes matches }
  317. mov 4(%eax),%esi
  318. xor %ecx,%esi
  319. lea -0x01010101(%esi),%edi
  320. xor %esi,%edi
  321. not %esi
  322. and $0x80808080,%esi
  323. and %edi,%esi
  324. jnz .Lfound4
  325. mov 8(%eax),%esi
  326. xor %ecx,%esi
  327. lea -0x01010101(%esi),%edi
  328. xor %esi,%edi
  329. not %esi
  330. and $0x80808080,%esi
  331. and %edi,%esi
  332. jnz .Lfound8
  333. mov 12(%eax),%esi
  334. xor %ecx,%esi
  335. lea -0x01010101(%esi),%edi
  336. xor %esi,%edi
  337. not %esi
  338. and $0x80808080,%esi
  339. and %edi,%esi
  340. jnz .Lfound12
  341. add $16,%eax
  342. .Laligned:
  343. sub $16,%edx
  344. jae .Lloop { Still more than 16 bytes remaining }
  345. { Process remaining bytes (<16 left at this point) }
  346. { length is offset by -16 at this point }
  347. .Lloop2:
  348. cmp $4-16,%edx { < 4 bytes left? }
  349. jb .Ltail
  350. mov (%eax),%esi
  351. xor %ecx,%esi
  352. lea -0x01010101(%esi),%edi
  353. xor %esi,%edi
  354. not %esi
  355. and $0x80808080,%esi
  356. and %edi,%esi
  357. jne .Lfound
  358. add $4,%eax
  359. sub $4,%edx
  360. jmp .Lloop2
  361. .Ltail: { Less than 4 bytes remaining, check one by one }
  362. and $3, %edx
  363. jz .Lnotfound
  364. .Lloop3:
  365. cmp %cl,(%eax)
  366. je .Lexit
  367. inc %eax
  368. dec %edx
  369. jnz .Lloop3
  370. .Lnotfound:
  371. or $-1,%eax
  372. jmp .Lexit1
  373. { add missing source pointer increments }
  374. .Lfound12:
  375. add $4,%eax
  376. .Lfound8:
  377. add $4,%eax
  378. .Lfound4:
  379. add $4,%eax
  380. .Lfound:
  381. test $0xff,%esi
  382. jnz .Lexit
  383. inc %eax
  384. test $0xff00,%esi
  385. jnz .Lexit
  386. inc %eax
  387. test $0xff0000,%esi
  388. jnz .Lexit
  389. inc %eax
  390. .Lexit:
  391. sub (%esp),%eax
  392. .Lexit1:
  393. pop %ecx { removes initial 'buf' value }
  394. pop %edi
  395. pop %esi
  396. end;
  397. {$endif FPC_SYSTEM_HAS_INDEXBYTE}
  398. {$ifndef FPC_SYSTEM_HAS_INDEXWORD}
  399. {$define FPC_SYSTEM_HAS_INDEXWORD}
  400. function Indexword(Const buf;len:SizeInt;b:word):SizeInt; assembler;
  401. var
  402. saveedi,saveebx : longint;
  403. asm
  404. movl %edi,saveedi
  405. movl %ebx,saveebx
  406. movl Buf,%edi // Load String
  407. movw b,%bx
  408. movl Len,%ecx // Load len
  409. xorl %eax,%eax
  410. testl %ecx,%ecx
  411. jz .Lcharposnotfound
  412. {$ifdef FPC_ENABLED_CLD}
  413. cld
  414. {$endif FPC_ENABLED_CLD}
  415. movl %ecx,%edx // Copy for easy manipulation
  416. movw %bx,%ax
  417. repne
  418. scasw
  419. jne .Lcharposnotfound
  420. incl %ecx
  421. subl %ecx,%edx
  422. movl %edx,%eax
  423. jmp .Lready
  424. .Lcharposnotfound:
  425. movl $-1,%eax
  426. .Lready:
  427. movl saveedi,%edi
  428. movl saveebx,%ebx
  429. end;
  430. {$endif FPC_SYSTEM_HAS_INDEXWORD}
  431. {$ifndef FPC_SYSTEM_HAS_INDEXDWORD}
  432. {$define FPC_SYSTEM_HAS_INDEXDWORD}
  433. function IndexDWord(Const buf;len:SizeInt;b:DWord):SizeInt; assembler;
  434. var
  435. saveedi,saveebx : longint;
  436. asm
  437. movl %edi,saveedi
  438. movl %ebx,saveebx
  439. movl %eax,%edi
  440. movl %ecx,%ebx
  441. movl %edx,%ecx
  442. xorl %eax,%eax
  443. testl %ecx,%ecx
  444. jz .Lcharposnotfound
  445. {$ifdef FPC_ENABLED_CLD}
  446. cld
  447. {$endif FPC_ENABLED_CLD}
  448. movl %ecx,%edx // Copy for easy manipulation
  449. movl %ebx,%eax
  450. repne
  451. scasl
  452. jne .Lcharposnotfound
  453. incl %ecx
  454. subl %ecx,%edx
  455. movl %edx,%eax
  456. jmp .Lready
  457. .Lcharposnotfound:
  458. movl $-1,%eax
  459. .Lready:
  460. movl saveedi,%edi
  461. movl saveebx,%ebx
  462. end;
  463. {$endif FPC_SYSTEM_HAS_INDEXDWORD}
  464. {$ifndef FPC_SYSTEM_HAS_COMPAREBYTE}
  465. {$define FPC_SYSTEM_HAS_COMPAREBYTE}
  466. function CompareByte(Const buf1,buf2;len:SizeInt):SizeInt; assembler; nostackframe;
  467. asm
  468. cmpl $57,%ecx { empirically determined value on a Core 2 Duo Conroe }
  469. jg .LCmpbyteFull
  470. testl %ecx,%ecx
  471. je .LCmpbyteZero
  472. pushl %ebx
  473. .LCmpbyteLoop:
  474. movb (%eax),%bl
  475. cmpb (%edx),%bl
  476. leal 1(%eax),%eax
  477. leal 1(%edx),%edx
  478. jne .LCmpbyteExitFast
  479. decl %ecx
  480. jne .LCmpbyteLoop
  481. .LCmpbyteExitFast:
  482. movzbl -1(%edx),%ecx { Compare last position }
  483. movzbl %bl,%eax
  484. subl %ecx,%eax
  485. popl %ebx
  486. ret
  487. .LCmpbyteZero:
  488. movl $0,%eax
  489. ret
  490. .LCmpbyteFull:
  491. pushl %esi
  492. pushl %edi
  493. {$ifdef FPC_ENABLED_CLD}
  494. cld
  495. {$endif FPC_ENABLED_CLD}
  496. movl %eax,%edi
  497. movl %edx,%esi
  498. movl %ecx,%eax
  499. movl %edi,%ecx { Align on 32bits }
  500. negl %ecx { calc bytes to align (%edi and 3) xor 3= -%edi and 3 }
  501. andl $3,%ecx
  502. subl %ecx,%eax { Subtract from number of bytes to go }
  503. orl %ecx,%ecx
  504. rep
  505. cmpsb { The actual 32-bit Aligning }
  506. jne .LCmpbyte3
  507. movl %eax,%ecx { bytes to do, divide by 4 }
  508. andl $3,%eax { remainder }
  509. shrl $2,%ecx { The actual division }
  510. orl %ecx,%ecx { Sets zero flag if ecx=0 -> no cmp }
  511. rep
  512. cmpsl
  513. je .LCmpbyte2 { All equal? then to the left over bytes }
  514. movl $4,%eax { Not equal. Rescan the last 4 bytes bytewise }
  515. subl %eax,%esi
  516. subl %eax,%edi
  517. .LCmpbyte2:
  518. movl %eax,%ecx { bytes still to (re)scan }
  519. orl %eax,%eax { prevent disaster in case %eax=0 }
  520. rep
  521. cmpsb
  522. .LCmpbyte3:
  523. movzbl -1(%esi),%ecx
  524. movzbl -1(%edi),%eax { Compare failing (or equal) position }
  525. subl %ecx,%eax
  526. .LCmpbyteExit:
  527. popl %edi
  528. popl %esi
  529. end;
  530. {$endif FPC_SYSTEM_HAS_COMPAREBYTE}
  531. {$ifndef FPC_SYSTEM_HAS_COMPAREWORD}
  532. {$define FPC_SYSTEM_HAS_COMPAREWORD}
  533. function CompareWord(Const buf1,buf2;len:SizeInt):SizeInt; assembler; nostackframe;
  534. asm
  535. cmpl $32,%ecx { empirical average value, on a Athlon XP the
  536. break even is at 14, on a Core 2 Duo > 100 }
  537. jg .LCmpWordFull
  538. testl %ecx,%ecx
  539. je .LCmpWordZero
  540. pushl %ebx
  541. .LCmpWordLoop:
  542. movw (%eax),%bx
  543. cmpw (%edx),%bx
  544. leal 2(%eax),%eax
  545. leal 2(%edx),%edx
  546. jne .LCmpWordExitFast
  547. decl %ecx
  548. jne .LCmpWordLoop
  549. .LCmpWordExitFast:
  550. movzwl -2(%edx),%ecx { Compare last position }
  551. movzwl %bx,%eax
  552. subl %ecx,%eax
  553. popl %ebx
  554. ret
  555. .LCmpWordZero:
  556. movl $0,%eax
  557. ret
  558. .LCmpWordFull:
  559. pushl %esi
  560. pushl %edi
  561. pushl %ebx
  562. {$ifdef FPC_ENABLED_CLD}
  563. cld
  564. {$endif FPC_ENABLED_CLD}
  565. movl %eax,%edi
  566. movl %edx,%esi
  567. movl %ecx,%eax
  568. movl (%edi),%ebx // Compare alignment bytes.
  569. cmpl (%esi),%ebx
  570. jne .LCmpword2 // Aligning will go wrong already. Max 2 words will be scanned Branch NOW
  571. shll $1,%eax {Convert word count to bytes}
  572. movl %edi,%edx { Align comparing is already done, so simply add}
  573. negl %edx { calc bytes to align -%edi and 3}
  574. andl $3,%edx
  575. addl %edx,%esi { Skip max 3 bytes alignment}
  576. addl %edx,%edi
  577. subl %edx,%eax { Subtract from number of bytes to go}
  578. movl %eax,%ecx { Make copy of bytes to go}
  579. andl $3,%eax { Calc remainder (mod 4) }
  580. andl $1,%edx { %edx is 1 if array not 2-aligned, 0 otherwise}
  581. shrl $2,%ecx { divide bytes to go by 4, DWords to go}
  582. orl %ecx,%ecx { Sets zero flag if ecx=0 -> no cmp}
  583. rep { Compare entire DWords}
  584. cmpsl
  585. je .LCmpword2a { All equal? then to the left over bytes}
  586. movl $4,%eax { Not equal. Rescan the last 4 bytes bytewise}
  587. subl %eax,%esi { Go back one DWord}
  588. subl %eax,%edi
  589. incl %eax {if not odd then this does nothing, else it makes
  590. sure that adding %edx increases from 2 to 3 words}
  591. .LCmpword2a:
  592. subl %edx,%esi { Subtract alignment}
  593. subl %edx,%edi
  594. addl %edx,%eax
  595. shrl $1,%eax
  596. .LCmpword2:
  597. movl %eax,%ecx {words still to (re)scan}
  598. orl %eax,%eax {prevent disaster in case %eax=0}
  599. rep
  600. cmpsw
  601. .LCmpword3:
  602. movzwl -2(%esi),%ecx
  603. movzwl -2(%edi),%eax // Compare failing (or equal) position
  604. subl %ecx,%eax // calculate end result.
  605. .LCmpwordExit:
  606. popl %ebx
  607. popl %edi
  608. popl %esi
  609. end;
  610. {$endif FPC_SYSTEM_HAS_COMPAREWORD}
  611. {$ifndef FPC_SYSTEM_HAS_COMPAREDWORD}
  612. {$define FPC_SYSTEM_HAS_COMPAREDWORD}
  613. function CompareDWord(Const buf1,buf2;len:SizeInt):SizeInt; assembler; nostackframe;
  614. asm
  615. cmpl $32,%ecx { empirical average value, on a Athlon XP the
  616. break even is at 12, on a Core 2 Duo > 100 }
  617. jg .LCmpDWordFull
  618. testl %ecx,%ecx
  619. je .LCmpDWordZero
  620. pushl %ebx
  621. .LCmpDWordLoop:
  622. movl (%eax),%ebx
  623. cmpl (%edx),%ebx
  624. leal 4(%eax),%eax
  625. leal 4(%edx),%edx
  626. jne .LCmpDWordExitFast
  627. decl %ecx
  628. jne .LCmpDWordLoop
  629. .LCmpDWordExitFast:
  630. xorl %eax,%eax
  631. movl -4(%edx),%edx // Compare failing (or equal) position
  632. subl %edx,%ebx // calculate end result.
  633. setb %dl
  634. seta %cl
  635. addb %cl,%al
  636. subb %dl,%al
  637. movsbl %al,%eax
  638. popl %ebx
  639. ret
  640. .LCmpDWordZero:
  641. movl $0,%eax
  642. ret
  643. .LCmpDWordFull:
  644. pushl %esi
  645. pushl %edi
  646. {$ifdef FPC_ENABLED_CLD}
  647. cld
  648. {$endif FPC_ENABLED_CLD}
  649. movl %eax,%edi
  650. movl %edx,%esi
  651. xorl %eax,%eax
  652. rep { Compare entire DWords}
  653. cmpsl
  654. movl -4(%edi),%edi // Compare failing (or equal) position
  655. subl -4(%esi),%edi // calculate end result.
  656. setb %dl
  657. seta %cl
  658. addb %cl,%al
  659. subb %dl,%al
  660. movsbl %al,%eax
  661. .LCmpDwordExit:
  662. popl %edi
  663. popl %esi
  664. end;
  665. {$endif FPC_SYSTEM_HAS_COMPAREDWORD}
  666. {$ifndef FPC_SYSTEM_HAS_INDEXCHAR0}
  667. {$define FPC_SYSTEM_HAS_INDEXCHAR0}
  668. function IndexChar0(Const buf;len:SizeInt;b:Char):SizeInt; assembler;
  669. var
  670. saveesi,saveebx : longint;
  671. asm
  672. movl %esi,saveesi
  673. movl %ebx,saveebx
  674. // Can't use scasb, or will have to do it twice, think this
  675. // is faster for small "len"
  676. movl %eax,%esi // Load address
  677. movzbl %cl,%ebx // Load searchpattern
  678. testl %edx,%edx
  679. je .LFound
  680. xorl %ecx,%ecx // zero index in Buf
  681. xorl %eax,%eax // To make DWord compares possible
  682. .balign 4
  683. .LLoop:
  684. movb (%esi),%al // Load byte
  685. cmpb %al,%bl
  686. je .LFound // byte the same?
  687. incl %ecx
  688. incl %esi
  689. cmpl %edx,%ecx // Maximal distance reached?
  690. je .LNotFound
  691. testl %eax,%eax // Nullchar = end of search?
  692. jne .LLoop
  693. .LNotFound:
  694. movl $-1,%ecx // Not found return -1
  695. .LFound:
  696. movl %ecx,%eax
  697. movl saveesi,%esi
  698. movl saveebx,%ebx
  699. end;
  700. {$endif FPC_SYSTEM_HAS_INDEXCHAR0}
  701. {****************************************************************************
  702. String
  703. ****************************************************************************}
  704. {$ifndef FPC_SYSTEM_HAS_FPC_SHORTSTR_ASSIGN}
  705. {$define FPC_SYSTEM_HAS_FPC_SHORTSTR_ASSIGN}
  706. procedure fpc_shortstr_to_shortstr(out res:shortstring; const sstr: shortstring);assembler;[public,alias:'FPC_SHORTSTR_TO_SHORTSTR']; compilerproc;
  707. var
  708. saveesi,saveedi : longint;
  709. asm
  710. {$ifdef FPC_PROFILE}
  711. push %eax
  712. push %edx
  713. push %ecx
  714. call mcount
  715. pop %ecx
  716. pop %edx
  717. pop %eax
  718. {$endif FPC_PROFILE}
  719. movl %edi,saveedi
  720. movl %esi,saveesi
  721. {$ifdef FPC_ENABLED_CLD}
  722. cld
  723. {$endif FPC_ENABLED_CLD}
  724. movl res,%edi
  725. movl sstr,%esi
  726. movl %edx,%ecx
  727. xorl %eax,%eax
  728. lodsb
  729. cmpl %ecx,%eax
  730. jbe .LStrCopy1
  731. movl %ecx,%eax
  732. .LStrCopy1:
  733. stosb
  734. cmpl $7,%eax
  735. jl .LStrCopy2
  736. movl %edi,%ecx { Align on 32bits }
  737. negl %ecx
  738. andl $3,%ecx
  739. subl %ecx,%eax
  740. rep
  741. movsb
  742. movl %eax,%ecx
  743. andl $3,%eax
  744. shrl $2,%ecx
  745. rep
  746. movsl
  747. .LStrCopy2:
  748. movl %eax,%ecx
  749. rep
  750. movsb
  751. movl saveedi,%edi
  752. movl saveesi,%esi
  753. end;
  754. procedure fpc_shortstr_assign(len:longint;sstr,dstr:pointer);[public,alias:'FPC_SHORTSTR_ASSIGN'];
  755. begin
  756. asm
  757. {$ifdef FPC_PROFILE}
  758. push %eax
  759. push %edx
  760. push %ecx
  761. call mcount
  762. pop %ecx
  763. pop %edx
  764. pop %eax
  765. {$endif FPC_PROFILE}
  766. pushl %eax
  767. pushl %ecx
  768. {$ifdef FPC_ENABLED_CLD}
  769. cld
  770. {$endif FPC_ENABLED_CLD}
  771. movl dstr,%edi
  772. movl sstr,%esi
  773. xorl %eax,%eax
  774. movl len,%ecx
  775. lodsb
  776. cmpl %ecx,%eax
  777. jbe .LStrCopy1
  778. movl %ecx,%eax
  779. .LStrCopy1:
  780. stosb
  781. cmpl $7,%eax
  782. jl .LStrCopy2
  783. movl %edi,%ecx { Align on 32bits }
  784. negl %ecx
  785. andl $3,%ecx
  786. subl %ecx,%eax
  787. rep
  788. movsb
  789. movl %eax,%ecx
  790. andl $3,%eax
  791. shrl $2,%ecx
  792. rep
  793. movsl
  794. .LStrCopy2:
  795. movl %eax,%ecx
  796. rep
  797. movsb
  798. popl %ecx
  799. popl %eax
  800. end ['ESI','EDI'];
  801. end;
  802. {$endif FPC_SYSTEM_HAS_FPC_SHORTSTR_ASSIGN}
  803. {$ifndef FPC_SYSTEM_HAS_FPC_SHORTSTR_COMPARE}
  804. {$define FPC_SYSTEM_HAS_FPC_SHORTSTR_COMPARE}
  805. function fpc_shortstr_compare(const left,right:shortstring): longint;assembler; [public,alias:'FPC_SHORTSTR_COMPARE']; compilerproc;
  806. var
  807. saveesi,saveedi,saveebx : longint;
  808. asm
  809. {$ifdef FPC_PROFILE}
  810. push %eax
  811. push %edx
  812. push %ecx
  813. call mcount
  814. pop %ecx
  815. pop %edx
  816. pop %eax
  817. {$endif FPC_PROFILE}
  818. movl %edi,saveedi
  819. movl %esi,saveesi
  820. movl %ebx,saveebx
  821. {$ifdef FPC_ENABLED_CLD}
  822. cld
  823. {$endif FPC_ENABLED_CLD}
  824. movl right,%esi
  825. movl left,%edi
  826. movzbl (%esi),%eax
  827. movzbl (%edi),%ebx
  828. movl %eax,%edx
  829. incl %esi
  830. incl %edi
  831. cmpl %ebx,%eax
  832. jbe .LStrCmp1
  833. movl %ebx,%eax
  834. .LStrCmp1:
  835. cmpl $7,%eax
  836. jl .LStrCmp2
  837. movl %edi,%ecx { Align on 32bits }
  838. negl %ecx
  839. andl $3,%ecx
  840. subl %ecx,%eax
  841. orl %ecx,%ecx
  842. rep
  843. cmpsb
  844. jne .LStrCmp3
  845. movl %eax,%ecx
  846. andl $3,%eax
  847. shrl $2,%ecx
  848. orl %ecx,%ecx
  849. rep
  850. cmpsl
  851. je .LStrCmp2
  852. movl $4,%eax
  853. subl %eax,%esi
  854. subl %eax,%edi
  855. .LStrCmp2:
  856. movl %eax,%ecx
  857. orl %eax,%eax
  858. rep
  859. cmpsb
  860. je .LStrCmp4
  861. .LStrCmp3:
  862. movzbl -1(%esi),%edx // Compare failing (or equal) position
  863. movzbl -1(%edi),%ebx
  864. .LStrCmp4:
  865. movl %ebx,%eax // Compare length or position
  866. subl %edx,%eax
  867. movl saveedi,%edi
  868. movl saveesi,%esi
  869. movl saveebx,%ebx
  870. end;
  871. {$endif FPC_SYSTEM_HAS_FPC_SHORTSTR_COMPARE}
  872. {$ifndef FPC_SYSTEM_HAS_FPC_PCHAR_TO_SHORTSTR}
  873. {$define FPC_SYSTEM_HAS_FPC_PCHAR_TO_SHORTSTR}
  874. procedure fpc_pchar_to_shortstr(out res : shortstring;p:pchar);assembler;[public,alias:'FPC_PCHAR_TO_SHORTSTR']; compilerproc;
  875. var
  876. saveres,saveebx,saveesi,saveedi : longint;
  877. asm
  878. {$ifdef FPC_PROFILE}
  879. push %eax
  880. push %edx
  881. push %ecx
  882. call mcount
  883. pop %ecx
  884. pop %edx
  885. pop %eax
  886. {$endif FPC_PROFILE}
  887. movl %ebx,saveebx
  888. movl %esi,saveesi
  889. movl %edi,saveedi
  890. movl %ecx,%esi
  891. movl %eax,%edi
  892. movl %edi,saveres
  893. movl $1,%ecx
  894. testl %esi,%esi
  895. movl %esi,%eax
  896. jz .LStrPasDone
  897. leal 3(%esi),%edx
  898. andl $-4,%edx
  899. // skip length byte
  900. incl %edi
  901. subl %esi,%edx
  902. jz .LStrPasAligned
  903. // align source to multiple of 4 (not dest, because we can't read past
  904. // the end of the source, since that may be past the end of the heap
  905. // -> sigsegv!!)
  906. .LStrPasAlignLoop:
  907. movb (%esi),%al
  908. incl %esi
  909. testb %al,%al
  910. jz .LStrPasDone
  911. incl %edi
  912. incb %cl
  913. decb %dl
  914. movb %al,-1(%edi)
  915. jne .LStrPasAlignLoop
  916. .balign 16
  917. .LStrPasAligned:
  918. movl (%esi),%ebx
  919. addl $4,%edi
  920. leal 0x0fefefeff(%ebx),%eax
  921. movl %ebx,%edx
  922. addl $4,%esi
  923. notl %edx
  924. andl %edx,%eax
  925. addl $4,%ecx
  926. andl $0x080808080,%eax
  927. movl %ebx,-4(%edi)
  928. jnz .LStrPasEndFound
  929. cmpl $252,%ecx
  930. ja .LStrPasPreEndLoop
  931. jmp .LStrPasAligned
  932. .LStrPasEndFound:
  933. subl $4,%ecx
  934. // this won't overwrite data since the result = 255 char string
  935. // and we never process more than the first 255 chars of p
  936. shrl $8,%eax
  937. jc .LStrPasDone
  938. incl %ecx
  939. shrl $8,%eax
  940. jc .LStrPasDone
  941. incl %ecx
  942. shrl $8,%eax
  943. jc .LStrPasDone
  944. incl %ecx
  945. jmp .LStrPasDone
  946. .LStrPasPreEndLoop:
  947. testb %cl,%cl
  948. jz .LStrPasDone
  949. movl (%esi),%eax
  950. .LStrPasEndLoop:
  951. testb %al,%al
  952. jz .LStrPasDone
  953. movb %al,(%edi)
  954. shrl $8,%eax
  955. incl %edi
  956. incb %cl
  957. jnz .LStrPasEndLoop
  958. .LStrPasDone:
  959. movl saveres,%edi
  960. addb $255,%cl
  961. movb %cl,(%edi)
  962. movl saveesi,%esi
  963. movl saveedi,%edi
  964. movl saveebx,%ebx
  965. end;
  966. {$endif FPC_SYSTEM_HAS_FPC_PCHAR_TO_SHORTSTR}
  967. {$ifndef FPC_SYSTEM_HAS_FPC_PCHAR_LENGTH}
  968. {$define FPC_SYSTEM_HAS_FPC_PCHAR_LENGTH}
  969. function fpc_pchar_length(p:pchar):sizeint;assembler;[public,alias:'FPC_PCHAR_LENGTH']; compilerproc;
  970. var
  971. saveedi : longint;
  972. asm
  973. {$ifdef FPC_PROFILE}
  974. push %eax
  975. push %edx
  976. push %ecx
  977. call mcount
  978. pop %ecx
  979. pop %edx
  980. pop %eax
  981. {$endif FPC_PROFILE}
  982. test %eax,%eax
  983. jz .LStrLenDone
  984. movl %edi,saveedi
  985. movl %eax,%edi
  986. movl $0xffffffff,%ecx
  987. xorl %eax,%eax
  988. {$ifdef FPC_ENABLED_CLD}
  989. cld
  990. {$endif FPC_ENABLED_CLD}
  991. repne
  992. scasb
  993. movl $0xfffffffe,%eax
  994. subl %ecx,%eax
  995. movl saveedi,%edi
  996. .LStrLenDone:
  997. end;
  998. {$endif FPC_SYSTEM_HAS_FPC_PCHAR_LENGTH}
  999. {$IFNDEF INTERNAL_BACKTRACE}
  1000. {$define FPC_SYSTEM_HAS_GET_FRAME}
  1001. function get_frame:pointer;assembler;nostackframe;{$ifdef SYSTEMINLINE}inline;{$endif}
  1002. asm
  1003. movl %ebp,%eax
  1004. end;
  1005. {$ENDIF not INTERNAL_BACKTRACE}
  1006. {$define FPC_SYSTEM_HAS_GET_PC_ADDR}
  1007. Function Get_pc_addr : Pointer;assembler;nostackframe;
  1008. asm
  1009. movl (%esp),%eax
  1010. end;
  1011. {$define FPC_SYSTEM_HAS_GET_CALLER_ADDR}
  1012. function get_caller_addr(framebp:pointer;addr:pointer=nil):pointer;
  1013. {$if defined(win32)}
  1014. { Windows has StackTop always properly set }
  1015. begin
  1016. if assigned(framebp) and (framebp<=StackTop) and (framebp>=Sptr) then
  1017. Result:=PPointer(framebp+4)^
  1018. else
  1019. Result:=nil;
  1020. end;
  1021. {$else defined(win32)}
  1022. nostackframe;assembler;
  1023. asm
  1024. orl %eax,%eax
  1025. jz .Lg_a_null
  1026. movl 4(%eax),%eax
  1027. .Lg_a_null:
  1028. end;
  1029. {$endif defined(win32)}
  1030. {$define FPC_SYSTEM_HAS_GET_CALLER_FRAME}
  1031. function get_caller_frame(framebp:pointer;addr:pointer=nil):pointer;
  1032. {$if defined(win32)}
  1033. { Windows has StackTop always properly set }
  1034. begin
  1035. if assigned(framebp) and (framebp<=StackTop) and (framebp>=Sptr) then
  1036. Result:=PPointer(framebp)^
  1037. else
  1038. Result:=nil;
  1039. end;
  1040. {$else defined(win32)}
  1041. nostackframe;assembler;
  1042. asm
  1043. orl %eax,%eax
  1044. jz .Lgnf_null
  1045. movl (%eax),%eax
  1046. .Lgnf_null:
  1047. end;
  1048. {$endif defined(win32)}
  1049. {$define FPC_SYSTEM_HAS_SPTR}
  1050. Function Sptr : Pointer;assembler;nostackframe;
  1051. asm
  1052. movl %esp,%eax
  1053. end;
  1054. {****************************************************************************
  1055. Str()
  1056. ****************************************************************************}
  1057. {$if defined(disabled) and defined(regcall) }
  1058. {$define FPC_SYSTEM_HAS_INT_STR_LONGWORD}
  1059. {$define FPC_SYSTEM_HAS_INT_STR_LONGINT}
  1060. label str_int_shortcut;
  1061. procedure int_str(l:longword;out s:string);assembler;nostackframe;
  1062. asm
  1063. pushl %esi
  1064. pushl %edi
  1065. pushl %ebx
  1066. mov %edx,%edi
  1067. xor %edx,%edx
  1068. jmp str_int_shortcut
  1069. end;
  1070. procedure int_str(l:longint;out s:string);assembler;nostackframe;
  1071. {Optimized for speed, but balanced with size.}
  1072. const digits:array[0..9] of cardinal=(0,10,100,1000,10000,
  1073. 100000,1000000,10000000,
  1074. 100000000,1000000000);
  1075. asm
  1076. {$ifdef FPC_PROFILE}
  1077. push %eax
  1078. push %edx
  1079. push %ecx
  1080. call mcount
  1081. pop %ecx
  1082. pop %edx
  1083. pop %eax
  1084. {$endif FPC_PROFILE}
  1085. push %esi
  1086. push %edi
  1087. push %ebx
  1088. movl %edx,%edi
  1089. { Calculate absolute value and put sign in edx}
  1090. cltd
  1091. xorl %edx,%eax
  1092. subl %edx,%eax
  1093. negl %edx
  1094. str_int_shortcut:
  1095. movl %ecx,%esi
  1096. {Calculate amount of digits in ecx.}
  1097. xorl %ecx,%ecx
  1098. bsrl %eax,%ecx
  1099. incl %ecx
  1100. imul $1233,%ecx
  1101. shr $12,%ecx
  1102. {$ifdef FPC_PIC}
  1103. call fpc_geteipasebx
  1104. {$ifdef darwin}
  1105. movl digits-.Lpic(%ebx),%ebx
  1106. {$else}
  1107. addl $_GLOBAL_OFFSET_TABLE_,%ebx
  1108. movl digits@GOT(%ebx),%ebx
  1109. {$endif}
  1110. cmpl (%ebx,%ecx,4),%eax
  1111. {$else}
  1112. cmpl digits(,%ecx,4),%eax
  1113. {$endif}
  1114. cmc
  1115. adcl $0,%ecx {Nr. digits ready in ecx.}
  1116. {Write length & sign.}
  1117. lea (%edx,%ecx),%ebx
  1118. movb $45,%bh {movb $'-,%bh Not supported by our ATT reader.}
  1119. movw %bx,(%edi)
  1120. addl %edx,%edi
  1121. subl %edx,%esi
  1122. {Skip digits beyond string length.}
  1123. movl %eax,%edx
  1124. subl %ecx,%esi
  1125. jae .Lloop_write
  1126. .balign 4
  1127. .Lloop_skip:
  1128. movl $0xcccccccd,%eax {Divide by 10 using mul+shr}
  1129. mull %edx
  1130. shrl $3,%edx
  1131. decl %ecx
  1132. jz .Ldone {If (l<0) and (high(s)=1) this jump is taken.}
  1133. incl %esi
  1134. jnz .Lloop_skip
  1135. {Write out digits.}
  1136. .balign 4
  1137. .Lloop_write:
  1138. movl $0xcccccccd,%eax {Divide by 10 using mul+shr}
  1139. {Pre-add '0'}
  1140. leal 48(%edx),%ebx {leal $'0(,%edx),%ebx Not supported by our ATT reader.}
  1141. mull %edx
  1142. shrl $3,%edx
  1143. leal (%edx,%edx,8),%eax {x mod 10 = x-10*(x div 10)}
  1144. subl %edx,%ebx
  1145. subl %eax,%ebx
  1146. movb %bl,(%edi,%ecx)
  1147. decl %ecx
  1148. jnz .Lloop_write
  1149. .Ldone:
  1150. popl %ebx
  1151. popl %edi
  1152. popl %esi
  1153. end;
  1154. {$endif}
  1155. {****************************************************************************
  1156. Bounds Check
  1157. ****************************************************************************}
  1158. { do a thread-safe inc/dec }
  1159. {$define FPC_SYSTEM_HAS_DECLOCKED_LONGINT}
  1160. function cpudeclocked(var l : longint) : boolean;assembler;nostackframe;
  1161. asm
  1162. { this check should be done because a lock takes a lot }
  1163. { of time! }
  1164. lock
  1165. decl (%eax)
  1166. setzb %al
  1167. end;
  1168. {$define FPC_SYSTEM_HAS_INCLOCKED_LONGINT}
  1169. procedure cpuinclocked(var l : longint);assembler;nostackframe;
  1170. asm
  1171. lock
  1172. incl (%eax)
  1173. end;
  1174. // inline SMP check and normal lock.
  1175. // the locked one is so slow, inlining doesn't matter.
  1176. function declocked(var l : longint) : boolean; inline;
  1177. begin
  1178. if not ismultithread then
  1179. begin
  1180. dec(l);
  1181. declocked:=l=0;
  1182. end
  1183. else
  1184. declocked:=cpudeclocked(l);
  1185. end;
  1186. procedure inclocked(var l : longint); inline;
  1187. begin
  1188. if not ismultithread then
  1189. inc(l)
  1190. else
  1191. cpuinclocked(l);
  1192. end;
  1193. function InterLockedDecrement (var Target: longint) : longint; assembler;
  1194. asm
  1195. movl $-1,%edx
  1196. xchgl %edx,%eax
  1197. lock
  1198. xaddl %eax, (%edx)
  1199. decl %eax
  1200. end;
  1201. function InterLockedIncrement (var Target: longint) : longint; assembler;
  1202. asm
  1203. movl $1,%edx
  1204. xchgl %edx,%eax
  1205. lock
  1206. xaddl %eax, (%edx)
  1207. incl %eax
  1208. end;
  1209. function InterLockedExchange (var Target: longint;Source : longint) : longint; assembler;
  1210. asm
  1211. xchgl (%eax),%edx
  1212. movl %edx,%eax
  1213. end;
  1214. function InterLockedExchangeAdd (var Target: longint;Source : longint) : longint; assembler;
  1215. asm
  1216. xchgl %eax,%edx
  1217. lock
  1218. xaddl %eax, (%edx)
  1219. end;
  1220. function InterlockedCompareExchange(var Target: longint; NewValue: longint; Comperand: longint): longint; assembler;
  1221. asm
  1222. xchgl %eax,%ecx
  1223. lock
  1224. cmpxchgl %edx, (%ecx)
  1225. end;
  1226. function InterlockedCompareExchange64(var Target: int64; NewValue: int64; Comperand: int64): int64; assembler;
  1227. asm
  1228. pushl %ebx
  1229. pushl %edi
  1230. movl %eax,%edi
  1231. movl Comperand+4,%edx
  1232. movl Comperand+0,%eax
  1233. movl NewValue+4,%ecx
  1234. movl NewValue+0,%ebx
  1235. lock cmpxchg8b (%edi)
  1236. pop %edi
  1237. pop %ebx
  1238. end;
  1239. {****************************************************************************
  1240. FPU
  1241. ****************************************************************************}
  1242. const
  1243. { Internal constants for use in system unit }
  1244. FPU_Invalid = 1;
  1245. FPU_Denormal = 2;
  1246. FPU_DivisionByZero = 4;
  1247. FPU_Overflow = 8;
  1248. FPU_Underflow = $10;
  1249. FPU_StackUnderflow = $20;
  1250. FPU_StackOverflow = $40;
  1251. FPU_ExceptionMask = $ff;
  1252. { use Default8087CW instead
  1253. fpucw : word = $1300 or FPU_StackUnderflow or FPU_Underflow or FPU_Denormal;
  1254. }
  1255. MM_MaskInvalidOp = %0000000010000000;
  1256. MM_MaskDenorm = %0000000100000000;
  1257. MM_MaskDivZero = %0000001000000000;
  1258. MM_MaskOverflow = %0000010000000000;
  1259. MM_MaskUnderflow = %0000100000000000;
  1260. MM_MaskPrecision = %0001000000000000;
  1261. mxcsr : dword = MM_MaskUnderflow or MM_MaskPrecision or MM_MaskDenorm;
  1262. {$define FPC_SYSTEM_HAS_SYSINITFPU}
  1263. Procedure SysInitFPU;
  1264. var
  1265. { these locals are so we don't have to hack pic code in the assembler }
  1266. localmxcsr: dword;
  1267. localfpucw: word;
  1268. begin
  1269. localfpucw:=Default8087CW;
  1270. asm
  1271. fninit
  1272. fldcw localfpucw
  1273. fwait
  1274. end;
  1275. if has_sse_support then
  1276. begin
  1277. localmxcsr:=mxcsr;
  1278. asm
  1279. { setup sse exceptions }
  1280. ldmxcsr localmxcsr
  1281. end;
  1282. end;
  1283. softfloat_exception_mask:=float_flag_underflow or float_flag_inexact or float_flag_denormal;
  1284. end;
  1285. {$define FPC_SYSTEM_HAS_SYSRESETFPU}
  1286. Procedure SysResetFPU;
  1287. var
  1288. { these locals are so we don't have to hack pic code in the assembler }
  1289. localmxcsr: dword;
  1290. localfpucw: word;
  1291. begin
  1292. localfpucw:=Default8087CW;
  1293. asm
  1294. fninit
  1295. fwait
  1296. fldcw localfpucw
  1297. end;
  1298. if has_sse_support then
  1299. begin
  1300. localmxcsr:=mxcsr;
  1301. asm
  1302. { setup sse exceptions }
  1303. ldmxcsr localmxcsr
  1304. end;
  1305. end;
  1306. softfloat_exception_flags:=0;
  1307. end;
  1308. { because of the brain dead sse detection on x86, this test is post poned }
  1309. procedure fpc_cpucodeinit;
  1310. begin
  1311. os_supports_sse:=true;
  1312. check_sse_support;
  1313. os_supports_sse:=has_sse_support;
  1314. if os_supports_sse then
  1315. begin
  1316. sse_check:=true;
  1317. asm
  1318. { force an sse exception if no sse is supported, the exception handler sets
  1319. os_supports_sse to false then }
  1320. { don't change this instruction, the code above depends on its size }
  1321. movaps %xmm7, %xmm6
  1322. end;
  1323. sse_check:=false;
  1324. end;
  1325. has_sse_support:=os_supports_sse;
  1326. if not(has_sse_support) then
  1327. begin
  1328. has_sse2_support:=false;
  1329. has_sse3_support:=false;
  1330. end;
  1331. { don't let libraries influence the FPU cw set by the host program }
  1332. if has_sse_support and
  1333. IsLibrary then
  1334. mxcsr:=GetSSECSR;
  1335. has_mmx_support:=mmx_support;
  1336. SysResetFPU;
  1337. if not(IsLibrary) then
  1338. SysInitFPU;
  1339. {$ifdef USE_FASTMOVE}
  1340. setup_fastmove;
  1341. {$endif}
  1342. end;
  1343. {$if not defined(darwin) and defined(regcall) }
  1344. { darwin requires that the stack is aligned to 16 bytes when calling another function }
  1345. {$ifdef FPC_HAS_FEATURE_ANSISTRINGS}
  1346. {$define FPC_SYSTEM_HAS_ANSISTR_DECR_REF}
  1347. function fpc_freemem_x(p:pointer):ptrint; [external name 'FPC_FREEMEM_X'];
  1348. Procedure fpc_AnsiStr_Decr_Ref (Var S : Pointer); [Public,Alias:'FPC_ANSISTR_DECR_REF']; compilerproc; nostackframe; assembler;
  1349. asm
  1350. cmpl $0,(%eax)
  1351. je .Lquit
  1352. pushl %esi
  1353. movl (%eax),%esi
  1354. subl $12,%esi // points to start of allocation
  1355. movl $0,(%eax) // s:=nil
  1356. cmpl $0,4(%esi) // exit if refcount<0
  1357. jl .Lj3596
  1358. {$ifdef FPC_PIC}
  1359. pushl %ebx
  1360. call fpc_geteipasebx
  1361. addl $_GLOBAL_OFFSET_TABLE_,%ebx
  1362. movl ismultithread@GOT(%ebx),%ebx
  1363. movl (%ebx),%ebx
  1364. cmp $0, %ebx
  1365. popl %ebx
  1366. {$else FPC_PIC}
  1367. cmpl $0,ismultithread
  1368. {$endif FPC_PIC}
  1369. jne .Lj3610
  1370. decl 4(%esi)
  1371. je .Lj3620
  1372. jmp .Lj3596
  1373. .Lj3610:
  1374. leal 4(%esi),%eax
  1375. call cpudeclocked
  1376. testb %al,%al
  1377. je .Lj3596
  1378. .Lj3620:
  1379. movl %esi,%eax
  1380. call FPC_FREEMEM_X
  1381. .Lj3596:
  1382. popl %esi
  1383. .Lquit:
  1384. end;
  1385. function fpc_truely_ansistr_unique(Var S : Pointer): Pointer; forward;
  1386. {$define FPC_SYSTEM_HAS_ANSISTR_UNIQUE}
  1387. Function fpc_ansistr_Unique(Var S : Pointer): Pointer; [Public,Alias : 'FPC_ANSISTR_UNIQUE']; compilerproc; nostackframe;assembler;
  1388. asm
  1389. // Var S located in register
  1390. // Var $result located in register
  1391. movl %eax,%edx
  1392. // [437] pointer(result) := pointer(s);
  1393. movl (%eax),%eax
  1394. // [438] If Pointer(S)=Nil then
  1395. testl %eax,%eax
  1396. je .Lj4031
  1397. .Lj4036:
  1398. // [440] if PAnsiRec(Pointer(S)-Firstoff)^.Ref<>1 then
  1399. movl -8(%eax),%ecx
  1400. cmpl $1,%ecx
  1401. je .Lj4038
  1402. // [441] result:=fpc_truely_ansistr_unique(s);
  1403. movl %edx,%eax
  1404. call fpc_truely_ansistr_unique
  1405. .Lj4038:
  1406. .Lj4031:
  1407. // [442] end;
  1408. end;
  1409. {$endif FPC_HAS_FEATURE_ANSISTRINGS}
  1410. {$endif ndef darwin and defined(regcall) }
  1411. {$ifndef FPC_SYSTEM_HAS_MEM_BARRIER}
  1412. {$define FPC_SYSTEM_HAS_MEM_BARRIER}
  1413. procedure ReadBarrier;assembler;nostackframe;
  1414. asm
  1415. lock
  1416. addl $0,0(%esp)
  1417. { alternative: lfence on SSE capable CPUs }
  1418. end;
  1419. procedure ReadDependencyBarrier;{$ifdef SYSTEMINLINE}inline;{$endif}
  1420. begin
  1421. { reads imply barrier on earlier reads depended on }
  1422. end;
  1423. procedure ReadWriteBarrier;assembler;nostackframe;
  1424. asm
  1425. lock
  1426. addl $0,0(%esp)
  1427. { alternative: mfence on SSE capable CPUs }
  1428. end;
  1429. procedure WriteBarrier;assembler;nostackframe;
  1430. asm
  1431. { no write reordering on intel CPUs (yet) }
  1432. end;
  1433. {$endif}
  1434. {$ifndef FPC_SYSTEM_HAS_BSF_QWORD}
  1435. {$define FPC_SYSTEM_HAS_BSF_QWORD}
  1436. function BsfQWord(Const AValue : QWord): cardinal; assembler; nostackframe;
  1437. asm
  1438. bsfl 4(%esp),%eax
  1439. jnz .L2
  1440. .L1:
  1441. bsfl 8(%esp),%eax
  1442. jnz .L3
  1443. movl $223,%eax
  1444. .L3:
  1445. addl $32,%eax
  1446. .L2:
  1447. end;
  1448. {$endif FPC_SYSTEM_HAS_BSF_QWORD}
  1449. {$ifndef FPC_SYSTEM_HAS_BSR_QWORD}
  1450. {$define FPC_SYSTEM_HAS_BSR_QWORD}
  1451. function BsrQWord(Const AValue : QWord): cardinal; assembler; nostackframe;
  1452. asm
  1453. bsrl 8(%esp),%eax
  1454. jz .L1
  1455. add $32,%eax
  1456. jmp .L2
  1457. .L1:
  1458. bsrl 4(%esp),%eax
  1459. jnz .L2
  1460. movl $255,%eax
  1461. .L2:
  1462. end;
  1463. {$endif FPC_SYSTEM_HAS_BSR_QWORD}
  1464. {$ifndef FPC_SYSTEM_HAS_SAR_QWORD}
  1465. {$define FPC_SYSTEM_HAS_SAR_QWORD}
  1466. function fpc_SarInt64(Const AValue : Int64;const Shift : Byte): Int64; [Public,Alias:'FPC_SARINT64']; compilerproc; assembler; nostackframe;
  1467. asm
  1468. movb %al,%cl
  1469. movl 8(%esp),%edx
  1470. movl 4(%esp),%eax
  1471. andb $63,%cl
  1472. cmpb $32,%cl
  1473. jnb .L1
  1474. shrdl %cl,%edx,%eax
  1475. sarl %cl,%edx
  1476. jmp .Lexit
  1477. .L1:
  1478. movl %edx,%eax
  1479. sarl $31,%edx
  1480. andb $31,%cl
  1481. sarl %cl,%eax
  1482. .Lexit:
  1483. end;
  1484. {$endif FPC_SYSTEM_HAS_SAR_QWORD}