i386.inc 39 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511151215131514151515161517151815191520152115221523152415251526152715281529153015311532153315341535153615371538153915401541154215431544154515461547154815491550155115521553155415551556155715581559
  1. {
  2. This file is part of the Free Pascal run time library.
  3. Copyright (c) 1999-2000 by the Free Pascal development team.
  4. Processor dependent implementation for the system unit for
  5. intel i386+
  6. See the file COPYING.FPC, included in this distribution,
  7. for details about the copyright.
  8. This program is distributed in the hope that it will be useful,
  9. but WITHOUT ANY WARRANTY; without even the implied warranty of
  10. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
  11. **********************************************************************}
  12. {****************************************************************************
  13. Primitives
  14. ****************************************************************************}
  15. var
  16. os_supports_sse : boolean;
  17. { this variable is set to true, if currently an sse check is executed and no sig ill should be generated }
  18. sse_check : boolean;
  19. {$asmmode intel}
  20. function cpuid_support : boolean;assembler;
  21. {
  22. Check if the ID-flag can be changed, if changed then CpuID is supported.
  23. Tested under go32v1 and Linux on c6x86 with CpuID enabled and disabled (PFV)
  24. }
  25. asm
  26. push ebx
  27. pushfd
  28. pushfd
  29. pop eax
  30. mov ebx,eax
  31. xor eax,200000h
  32. push eax
  33. popfd
  34. pushfd
  35. pop eax
  36. popfd
  37. and eax,200000h
  38. and ebx,200000h
  39. cmp eax,ebx
  40. setnz al
  41. pop ebx
  42. end;
  43. {$asmmode ATT}
  44. function sse_support : boolean;
  45. var
  46. _edx : longint;
  47. begin
  48. if cpuid_support then
  49. begin
  50. asm
  51. pushl %ebx
  52. movl $1,%eax
  53. cpuid
  54. movl %edx,_edx
  55. popl %ebx
  56. end;
  57. sse_support:=((_edx and $2000000)<>0) and os_supports_sse;
  58. end
  59. else
  60. { a cpu with without cpuid instruction supports never sse }
  61. sse_support:=false;
  62. end;
  63. { returns true, if the processor supports the mmx instructions }
  64. function mmx_support : boolean;
  65. var
  66. _edx : longint;
  67. begin
  68. if cpuid_support then
  69. begin
  70. asm
  71. pushl %ebx
  72. movl $1,%eax
  73. cpuid
  74. movl %edx,_edx
  75. popl %ebx
  76. end;
  77. mmx_support:=(_edx and $800000)<>0;
  78. end
  79. else
  80. { a cpu with without cpuid instruction supports never mmx }
  81. mmx_support:=false;
  82. end;
  83. {$ifndef FPC_PIC}
  84. {$ifndef FPC_SYSTEM_HAS_MOVE}
  85. {$define USE_FASTMOVE}
  86. {$i fastmove.inc}
  87. {$endif FPC_SYSTEM_HAS_MOVE}
  88. {$endif FPC_PIC}
  89. procedure fpc_cpuinit;
  90. begin
  91. { because of the brain dead sse detection on x86, this test is post poned to fpc_cpucodeinit which
  92. must be implemented OS dependend (FK)
  93. has_sse_support:=sse_support;
  94. has_mmx_support:=mmx_support;
  95. setup_fastmove;
  96. }
  97. os_supports_sse:=false;
  98. { don't let libraries influence the FPU cw set by the host program }
  99. if IsLibrary then
  100. Default8087CW:=Get8087CW;
  101. end;
  102. {$ifndef darwin}
  103. function fpc_geteipasebx : pointer; [public, alias: 'fpc_geteipasebx'];assembler; nostackframe;
  104. asm
  105. movl (%esp),%ebx
  106. end;
  107. function fpc_geteipasecx : pointer; [public, alias: 'fpc_geteipasecx'];assembler; nostackframe;
  108. asm
  109. movl (%esp),%ecx
  110. end;
  111. {$endif}
  112. {$ifndef FPC_SYSTEM_HAS_MOVE}
  113. {$define FPC_SYSTEM_HAS_MOVE}
  114. procedure Move(const source;var dest;count:SizeInt);[public, alias: 'FPC_MOVE'];assembler;
  115. var
  116. saveesi,saveedi : longint;
  117. asm
  118. movl %edi,saveedi
  119. movl %esi,saveesi
  120. movl %eax,%esi
  121. movl %edx,%edi
  122. movl %ecx,%edx
  123. movl %edi,%eax
  124. { check for zero or negative count }
  125. cmpl $0,%edx
  126. jle .LMoveEnd
  127. { Check for back or forward }
  128. sub %esi,%eax
  129. jz .LMoveEnd { Do nothing when source=dest }
  130. jc .LFMove { Do forward, dest<source }
  131. cmp %edx,%eax
  132. jb .LBMove { Dest is in range of move, do backward }
  133. { Forward Copy }
  134. .LFMove:
  135. cld
  136. cmpl $15,%edx
  137. jl .LFMove1
  138. movl %edi,%ecx { Align on 32bits }
  139. negl %ecx
  140. andl $3,%ecx
  141. subl %ecx,%edx
  142. rep
  143. movsb
  144. movl %edx,%ecx
  145. andl $3,%edx
  146. shrl $2,%ecx
  147. rep
  148. movsl
  149. .LFMove1:
  150. movl %edx,%ecx
  151. rep
  152. movsb
  153. jmp .LMoveEnd
  154. { Backward Copy }
  155. .LBMove:
  156. std
  157. addl %edx,%esi
  158. addl %edx,%edi
  159. movl %edi,%ecx
  160. decl %esi
  161. decl %edi
  162. cmpl $15,%edx
  163. jl .LBMove1
  164. negl %ecx { Align on 32bits }
  165. andl $3,%ecx
  166. subl %ecx,%edx
  167. rep
  168. movsb
  169. movl %edx,%ecx
  170. andl $3,%edx
  171. shrl $2,%ecx
  172. subl $3,%esi
  173. subl $3,%edi
  174. rep
  175. movsl
  176. addl $3,%esi
  177. addl $3,%edi
  178. .LBMove1:
  179. movl %edx,%ecx
  180. rep
  181. movsb
  182. cld
  183. .LMoveEnd:
  184. movl saveedi,%edi
  185. movl saveesi,%esi
  186. end;
  187. {$endif FPC_SYSTEM_HAS_MOVE}
  188. {$ifndef FPC_SYSTEM_HAS_FILLCHAR}
  189. {$define FPC_SYSTEM_HAS_FILLCHAR}
  190. Procedure FillChar(var x;count:SizeInt;value:byte);assembler; nostackframe;
  191. asm
  192. cmpl $22,%edx { empirically determined value on a Core 2 Duo Conroe }
  193. jg .LFillFull
  194. orl %edx,%edx
  195. jle .LFillZero
  196. .LFillLoop:
  197. movb %cl,(%eax)
  198. incl %eax
  199. decl %edx
  200. jne .LFillLoop
  201. .LFillZero:
  202. ret
  203. .LFillFull:
  204. cld
  205. push %edi
  206. movl %eax,%edi
  207. movzbl %cl,%eax
  208. movl %edx,%ecx
  209. imul $0x01010101,%eax { Expand al into a 4 subbytes of eax}
  210. shrl $2,%ecx
  211. andl $3,%edx
  212. rep
  213. stosl
  214. movl %edx,%ecx
  215. .LFill1:
  216. rep
  217. stosb
  218. .LFillEnd:
  219. pop %edi
  220. end;
  221. {$endif FPC_SYSTEM_HAS_FILLCHAR}
  222. {$ifndef FPC_SYSTEM_HAS_FILLWORD}
  223. {$define FPC_SYSTEM_HAS_FILLWORD}
  224. procedure fillword(var x;count : SizeInt;value : word);assembler;
  225. var
  226. saveedi : longint;
  227. asm
  228. movl %edi,saveedi
  229. movl %eax,%edi
  230. movzwl %cx,%eax
  231. movl %edx,%ecx
  232. { check for zero or negative count }
  233. cmpl $0,%ecx
  234. jle .LFillWordEnd
  235. movl %eax,%edx
  236. shll $16,%eax
  237. orl %edx,%eax
  238. movl %ecx,%edx
  239. shrl $1,%ecx
  240. cld
  241. rep
  242. stosl
  243. movl %edx,%ecx
  244. andl $1,%ecx
  245. rep
  246. stosw
  247. .LFillWordEnd:
  248. movl saveedi,%edi
  249. end;
  250. {$endif FPC_SYSTEM_HAS_FILLWORD}
  251. {$ifndef FPC_SYSTEM_HAS_FILLDWORD}
  252. {$define FPC_SYSTEM_HAS_FILLDWORD}
  253. procedure filldword(var x;count : SizeInt;value : dword);assembler;
  254. var
  255. saveedi : longint;
  256. asm
  257. movl %edi,saveedi
  258. movl %eax,%edi
  259. movl %ecx,%eax
  260. movl %edx,%ecx
  261. { check for zero or negative count }
  262. cmpl $0,%ecx
  263. jle .LFillDWordEnd
  264. cld
  265. rep
  266. stosl
  267. .LFillDWordEnd:
  268. movl saveedi,%edi
  269. end;
  270. {$endif FPC_SYSTEM_HAS_FILLDWORD}
  271. {$ifndef FPC_SYSTEM_HAS_INDEXBYTE}
  272. {$define FPC_SYSTEM_HAS_INDEXBYTE}
  273. function IndexByte(Const buf;len:SizeInt;b:byte):SizeInt; assembler; nostackframe;
  274. asm
  275. push %esi
  276. push %edi
  277. push %eax { save initial value of 'buf' }
  278. cmp $4,%edx { less than 4 bytes, just test byte by byte. }
  279. jb .Ltail
  280. mov %cl,%ch { prepare pattern }
  281. movzwl %cx,%esi
  282. shl $16,%ecx
  283. or %esi,%ecx
  284. .Lalignloop:
  285. test $3,%al { align to 4 bytes if necessary }
  286. je .Laligned
  287. cmp %cl,(%eax)
  288. je .Lexit
  289. inc %eax
  290. dec %edx
  291. jmp .Lalignloop
  292. .balign 16 { Main loop, unrolled 4 times for speed }
  293. .Lloop:
  294. mov (%eax),%esi { load dword }
  295. xor %ecx,%esi { XOR with pattern, bytes equal to target are now 0 }
  296. lea -0x01010101(%esi),%edi
  297. xor %esi,%edi { (x-0x01010101) xor x }
  298. not %esi
  299. and $0x80808080,%esi
  300. and %edi,%esi { ((x-0x01010101) xor x) and (not x) and 0x80808080 }
  301. jnz .Lfound { one of the bytes matches }
  302. mov 4(%eax),%esi
  303. xor %ecx,%esi
  304. lea -0x01010101(%esi),%edi
  305. xor %esi,%edi
  306. not %esi
  307. and $0x80808080,%esi
  308. and %edi,%esi
  309. jnz .Lfound4
  310. mov 8(%eax),%esi
  311. xor %ecx,%esi
  312. lea -0x01010101(%esi),%edi
  313. xor %esi,%edi
  314. not %esi
  315. and $0x80808080,%esi
  316. and %edi,%esi
  317. jnz .Lfound8
  318. mov 12(%eax),%esi
  319. xor %ecx,%esi
  320. lea -0x01010101(%esi),%edi
  321. xor %esi,%edi
  322. not %esi
  323. and $0x80808080,%esi
  324. and %edi,%esi
  325. jnz .Lfound12
  326. add $16,%eax
  327. .Laligned:
  328. sub $16,%edx
  329. jae .Lloop { Still more than 16 bytes remaining }
  330. { Process remaining bytes (<16 left at this point) }
  331. { length is offset by -16 at this point }
  332. .Lloop2:
  333. cmp $4-16,%edx { < 4 bytes left? }
  334. jb .Ltail
  335. mov (%eax),%esi
  336. xor %ecx,%esi
  337. lea -0x01010101(%esi),%edi
  338. xor %esi,%edi
  339. not %esi
  340. and $0x80808080,%esi
  341. and %edi,%esi
  342. jne .Lfound
  343. add $4,%eax
  344. sub $4,%edx
  345. jmp .Lloop2
  346. .Ltail: { Less than 4 bytes remaining, check one by one }
  347. and $3, %edx
  348. jz .Lnotfound
  349. .Lloop3:
  350. cmp %cl,(%eax)
  351. je .Lexit
  352. inc %eax
  353. dec %edx
  354. jnz .Lloop3
  355. .Lnotfound:
  356. or $-1,%eax
  357. jmp .Lexit1
  358. { add missing source pointer increments }
  359. .Lfound12:
  360. add $4,%eax
  361. .Lfound8:
  362. add $4,%eax
  363. .Lfound4:
  364. add $4,%eax
  365. .Lfound:
  366. test $0xff,%esi
  367. jnz .Lexit
  368. inc %eax
  369. test $0xff00,%esi
  370. jnz .Lexit
  371. inc %eax
  372. test $0xff0000,%esi
  373. jnz .Lexit
  374. inc %eax
  375. .Lexit:
  376. sub (%esp),%eax
  377. .Lexit1:
  378. pop %ecx { removes initial 'buf' value }
  379. pop %edi
  380. pop %esi
  381. end;
  382. {$endif FPC_SYSTEM_HAS_INDEXBYTE}
  383. {$ifndef FPC_SYSTEM_HAS_INDEXWORD}
  384. {$define FPC_SYSTEM_HAS_INDEXWORD}
  385. function Indexword(Const buf;len:SizeInt;b:word):SizeInt; assembler;
  386. var
  387. saveedi,saveebx : longint;
  388. asm
  389. movl %edi,saveedi
  390. movl %ebx,saveebx
  391. movl Buf,%edi // Load String
  392. movw b,%bx
  393. movl Len,%ecx // Load len
  394. xorl %eax,%eax
  395. testl %ecx,%ecx
  396. jz .Lcharposnotfound
  397. cld
  398. movl %ecx,%edx // Copy for easy manipulation
  399. movw %bx,%ax
  400. repne
  401. scasw
  402. jne .Lcharposnotfound
  403. incl %ecx
  404. subl %ecx,%edx
  405. movl %edx,%eax
  406. jmp .Lready
  407. .Lcharposnotfound:
  408. movl $-1,%eax
  409. .Lready:
  410. movl saveedi,%edi
  411. movl saveebx,%ebx
  412. end;
  413. {$endif FPC_SYSTEM_HAS_INDEXWORD}
  414. {$ifndef FPC_SYSTEM_HAS_INDEXDWORD}
  415. {$define FPC_SYSTEM_HAS_INDEXDWORD}
  416. function IndexDWord(Const buf;len:SizeInt;b:DWord):SizeInt; assembler;
  417. var
  418. saveedi,saveebx : longint;
  419. asm
  420. movl %edi,saveedi
  421. movl %ebx,saveebx
  422. movl %eax,%edi
  423. movl %ecx,%ebx
  424. movl %edx,%ecx
  425. xorl %eax,%eax
  426. testl %ecx,%ecx
  427. jz .Lcharposnotfound
  428. cld
  429. movl %ecx,%edx // Copy for easy manipulation
  430. movl %ebx,%eax
  431. repne
  432. scasl
  433. jne .Lcharposnotfound
  434. incl %ecx
  435. subl %ecx,%edx
  436. movl %edx,%eax
  437. jmp .Lready
  438. .Lcharposnotfound:
  439. movl $-1,%eax
  440. .Lready:
  441. movl saveedi,%edi
  442. movl saveebx,%ebx
  443. end;
  444. {$endif FPC_SYSTEM_HAS_INDEXDWORD}
  445. {$ifndef FPC_SYSTEM_HAS_COMPAREBYTE}
  446. {$define FPC_SYSTEM_HAS_COMPAREBYTE}
  447. function CompareByte(Const buf1,buf2;len:SizeInt):SizeInt; assembler; nostackframe;
  448. asm
  449. cmpl $57,%ecx { empirically determined value on a Core 2 Duo Conroe }
  450. jg .LCmpbyteFull
  451. testl %ecx,%ecx
  452. je .LCmpbyteZero
  453. pushl %ebx
  454. .LCmpbyteLoop:
  455. movb (%eax),%bl
  456. cmpb (%edx),%bl
  457. leal 1(%eax),%eax
  458. leal 1(%edx),%edx
  459. jne .LCmpbyteExitFast
  460. decl %ecx
  461. jne .LCmpbyteLoop
  462. .LCmpbyteExitFast:
  463. movzbl -1(%edx),%ecx { Compare last position }
  464. movzbl %bl,%eax
  465. subl %ecx,%eax
  466. popl %ebx
  467. ret
  468. .LCmpbyteZero:
  469. movl $0,%eax
  470. ret
  471. .LCmpbyteFull:
  472. pushl %esi
  473. pushl %edi
  474. cld
  475. movl %eax,%edi
  476. movl %edx,%esi
  477. movl %ecx,%eax
  478. movl %edi,%ecx { Align on 32bits }
  479. negl %ecx { calc bytes to align (%edi and 3) xor 3= -%edi and 3 }
  480. andl $3,%ecx
  481. subl %ecx,%eax { Subtract from number of bytes to go }
  482. orl %ecx,%ecx
  483. rep
  484. cmpsb { The actual 32-bit Aligning }
  485. jne .LCmpbyte3
  486. movl %eax,%ecx { bytes to do, divide by 4 }
  487. andl $3,%eax { remainder }
  488. shrl $2,%ecx { The actual division }
  489. orl %ecx,%ecx { Sets zero flag if ecx=0 -> no cmp }
  490. rep
  491. cmpsl
  492. je .LCmpbyte2 { All equal? then to the left over bytes }
  493. movl $4,%eax { Not equal. Rescan the last 4 bytes bytewise }
  494. subl %eax,%esi
  495. subl %eax,%edi
  496. .LCmpbyte2:
  497. movl %eax,%ecx { bytes still to (re)scan }
  498. orl %eax,%eax { prevent disaster in case %eax=0 }
  499. rep
  500. cmpsb
  501. .LCmpbyte3:
  502. movzbl -1(%esi),%ecx
  503. movzbl -1(%edi),%eax { Compare failing (or equal) position }
  504. subl %ecx,%eax
  505. .LCmpbyteExit:
  506. popl %edi
  507. popl %esi
  508. end;
  509. {$endif FPC_SYSTEM_HAS_COMPAREBYTE}
  510. {$ifndef FPC_SYSTEM_HAS_COMPAREWORD}
  511. {$define FPC_SYSTEM_HAS_COMPAREWORD}
  512. function CompareWord(Const buf1,buf2;len:SizeInt):SizeInt; assembler; nostackframe;
  513. asm
  514. cmpl $32,%ecx { empirical average value, on a Athlon XP the
  515. break even is at 14, on a Core 2 Duo > 100 }
  516. jg .LCmpWordFull
  517. testl %ecx,%ecx
  518. je .LCmpWordZero
  519. pushl %ebx
  520. .LCmpWordLoop:
  521. movw (%eax),%bx
  522. cmpw (%edx),%bx
  523. leal 2(%eax),%eax
  524. leal 2(%edx),%edx
  525. jne .LCmpWordExitFast
  526. decl %ecx
  527. jne .LCmpWordLoop
  528. .LCmpWordExitFast:
  529. movzwl -2(%edx),%ecx { Compare last position }
  530. movzwl %bx,%eax
  531. subl %ecx,%eax
  532. popl %ebx
  533. ret
  534. .LCmpWordZero:
  535. movl $0,%eax
  536. ret
  537. .LCmpWordFull:
  538. pushl %esi
  539. pushl %edi
  540. pushl %ebx
  541. cld
  542. movl %eax,%edi
  543. movl %edx,%esi
  544. movl %ecx,%eax
  545. movl (%edi),%ebx // Compare alignment bytes.
  546. cmpl (%esi),%ebx
  547. jne .LCmpword2 // Aligning will go wrong already. Max 2 words will be scanned Branch NOW
  548. shll $1,%eax {Convert word count to bytes}
  549. movl %edi,%edx { Align comparing is already done, so simply add}
  550. negl %edx { calc bytes to align -%edi and 3}
  551. andl $3,%edx
  552. addl %edx,%esi { Skip max 3 bytes alignment}
  553. addl %edx,%edi
  554. subl %edx,%eax { Subtract from number of bytes to go}
  555. movl %eax,%ecx { Make copy of bytes to go}
  556. andl $3,%eax { Calc remainder (mod 4) }
  557. andl $1,%edx { %edx is 1 if array not 2-aligned, 0 otherwise}
  558. shrl $2,%ecx { divide bytes to go by 4, DWords to go}
  559. orl %ecx,%ecx { Sets zero flag if ecx=0 -> no cmp}
  560. rep { Compare entire DWords}
  561. cmpsl
  562. je .LCmpword2a { All equal? then to the left over bytes}
  563. movl $4,%eax { Not equal. Rescan the last 4 bytes bytewise}
  564. subl %eax,%esi { Go back one DWord}
  565. subl %eax,%edi
  566. incl %eax {if not odd then this does nothing, else it makes
  567. sure that adding %edx increases from 2 to 3 words}
  568. .LCmpword2a:
  569. subl %edx,%esi { Subtract alignment}
  570. subl %edx,%edi
  571. addl %edx,%eax
  572. shrl $1,%eax
  573. .LCmpword2:
  574. movl %eax,%ecx {words still to (re)scan}
  575. orl %eax,%eax {prevent disaster in case %eax=0}
  576. rep
  577. cmpsw
  578. .LCmpword3:
  579. movzwl -2(%esi),%ecx
  580. movzwl -2(%edi),%eax // Compare failing (or equal) position
  581. subl %ecx,%eax // calculate end result.
  582. .LCmpwordExit:
  583. popl %ebx
  584. popl %edi
  585. popl %esi
  586. end;
  587. {$endif FPC_SYSTEM_HAS_COMPAREWORD}
  588. {$ifndef FPC_SYSTEM_HAS_COMPAREDWORD}
  589. {$define FPC_SYSTEM_HAS_COMPAREDWORD}
  590. function CompareDWord(Const buf1,buf2;len:SizeInt):SizeInt; assembler; nostackframe;
  591. asm
  592. cmpl $32,%ecx { empirical average value, on a Athlon XP the
  593. break even is at 12, on a Core 2 Duo > 100 }
  594. jg .LCmpDWordFull
  595. testl %ecx,%ecx
  596. je .LCmpDWordZero
  597. pushl %ebx
  598. .LCmpDWordLoop:
  599. movl (%eax),%ebx
  600. cmpl (%edx),%ebx
  601. leal 4(%eax),%eax
  602. leal 4(%edx),%edx
  603. jne .LCmpDWordExitFast
  604. decl %ecx
  605. jne .LCmpDWordLoop
  606. .LCmpDWordExitFast:
  607. xorl %eax,%eax
  608. movl -4(%edx),%edx // Compare failing (or equal) position
  609. subl %edx,%ebx // calculate end result.
  610. setb %dl
  611. seta %cl
  612. addb %cl,%al
  613. subb %dl,%al
  614. movsbl %al,%eax
  615. popl %ebx
  616. ret
  617. .LCmpDWordZero:
  618. movl $0,%eax
  619. ret
  620. .LCmpDWordFull:
  621. pushl %esi
  622. pushl %edi
  623. cld
  624. movl %eax,%edi
  625. movl %edx,%esi
  626. xorl %eax,%eax
  627. rep { Compare entire DWords}
  628. cmpsl
  629. movl -4(%edi),%edi // Compare failing (or equal) position
  630. subl -4(%esi),%edi // calculate end result.
  631. setb %dl
  632. seta %cl
  633. addb %cl,%al
  634. subb %dl,%al
  635. movsbl %al,%eax
  636. .LCmpDwordExit:
  637. popl %edi
  638. popl %esi
  639. end;
  640. {$endif FPC_SYSTEM_HAS_COMPAREDWORD}
  641. {$ifndef FPC_SYSTEM_HAS_INDEXCHAR0}
  642. {$define FPC_SYSTEM_HAS_INDEXCHAR0}
  643. function IndexChar0(Const buf;len:SizeInt;b:Char):SizeInt; assembler;
  644. var
  645. saveesi,saveebx : longint;
  646. asm
  647. movl %esi,saveesi
  648. movl %ebx,saveebx
  649. // Can't use scasb, or will have to do it twice, think this
  650. // is faster for small "len"
  651. movl %eax,%esi // Load address
  652. movzbl %cl,%ebx // Load searchpattern
  653. testl %edx,%edx
  654. je .LFound
  655. xorl %ecx,%ecx // zero index in Buf
  656. xorl %eax,%eax // To make DWord compares possible
  657. .balign 4
  658. .LLoop:
  659. movb (%esi),%al // Load byte
  660. cmpb %al,%bl
  661. je .LFound // byte the same?
  662. incl %ecx
  663. incl %esi
  664. cmpl %edx,%ecx // Maximal distance reached?
  665. je .LNotFound
  666. testl %eax,%eax // Nullchar = end of search?
  667. jne .LLoop
  668. .LNotFound:
  669. movl $-1,%ecx // Not found return -1
  670. .LFound:
  671. movl %ecx,%eax
  672. movl saveesi,%esi
  673. movl saveebx,%ebx
  674. end;
  675. {$endif FPC_SYSTEM_HAS_INDEXCHAR0}
  676. {****************************************************************************
  677. String
  678. ****************************************************************************}
  679. {$ifndef FPC_SYSTEM_HAS_FPC_SHORTSTR_ASSIGN}
  680. {$define FPC_SYSTEM_HAS_FPC_SHORTSTR_ASSIGN}
  681. procedure fpc_shortstr_to_shortstr(out res:shortstring; const sstr: shortstring);assembler;[public,alias:'FPC_SHORTSTR_TO_SHORTSTR']; compilerproc;
  682. var
  683. saveesi,saveedi : longint;
  684. asm
  685. {$ifdef FPC_PROFILE}
  686. push %eax
  687. push %edx
  688. push %ecx
  689. call mcount
  690. pop %ecx
  691. pop %edx
  692. pop %eax
  693. {$endif FPC_PROFILE}
  694. movl %edi,saveedi
  695. movl %esi,saveesi
  696. cld
  697. movl res,%edi
  698. movl sstr,%esi
  699. movl %edx,%ecx
  700. xorl %eax,%eax
  701. lodsb
  702. cmpl %ecx,%eax
  703. jbe .LStrCopy1
  704. movl %ecx,%eax
  705. .LStrCopy1:
  706. stosb
  707. cmpl $7,%eax
  708. jl .LStrCopy2
  709. movl %edi,%ecx { Align on 32bits }
  710. negl %ecx
  711. andl $3,%ecx
  712. subl %ecx,%eax
  713. rep
  714. movsb
  715. movl %eax,%ecx
  716. andl $3,%eax
  717. shrl $2,%ecx
  718. rep
  719. movsl
  720. .LStrCopy2:
  721. movl %eax,%ecx
  722. rep
  723. movsb
  724. movl saveedi,%edi
  725. movl saveesi,%esi
  726. end;
  727. procedure fpc_shortstr_assign(len:longint;sstr,dstr:pointer);[public,alias:'FPC_SHORTSTR_ASSIGN'];
  728. begin
  729. asm
  730. {$ifdef FPC_PROFILE}
  731. push %eax
  732. push %edx
  733. push %ecx
  734. call mcount
  735. pop %ecx
  736. pop %edx
  737. pop %eax
  738. {$endif FPC_PROFILE}
  739. pushl %eax
  740. pushl %ecx
  741. cld
  742. movl dstr,%edi
  743. movl sstr,%esi
  744. xorl %eax,%eax
  745. movl len,%ecx
  746. lodsb
  747. cmpl %ecx,%eax
  748. jbe .LStrCopy1
  749. movl %ecx,%eax
  750. .LStrCopy1:
  751. stosb
  752. cmpl $7,%eax
  753. jl .LStrCopy2
  754. movl %edi,%ecx { Align on 32bits }
  755. negl %ecx
  756. andl $3,%ecx
  757. subl %ecx,%eax
  758. rep
  759. movsb
  760. movl %eax,%ecx
  761. andl $3,%eax
  762. shrl $2,%ecx
  763. rep
  764. movsl
  765. .LStrCopy2:
  766. movl %eax,%ecx
  767. rep
  768. movsb
  769. popl %ecx
  770. popl %eax
  771. end ['ESI','EDI'];
  772. end;
  773. {$endif FPC_SYSTEM_HAS_FPC_SHORTSTR_ASSIGN}
  774. {$ifndef FPC_SYSTEM_HAS_FPC_SHORTSTR_COMPARE}
  775. {$define FPC_SYSTEM_HAS_FPC_SHORTSTR_COMPARE}
  776. function fpc_shortstr_compare(const left,right:shortstring): longint;assembler; [public,alias:'FPC_SHORTSTR_COMPARE']; compilerproc;
  777. var
  778. saveesi,saveedi,saveebx : longint;
  779. asm
  780. {$ifdef FPC_PROFILE}
  781. push %eax
  782. push %edx
  783. push %ecx
  784. call mcount
  785. pop %ecx
  786. pop %edx
  787. pop %eax
  788. {$endif FPC_PROFILE}
  789. movl %edi,saveedi
  790. movl %esi,saveesi
  791. movl %ebx,saveebx
  792. cld
  793. movl right,%esi
  794. movl left,%edi
  795. movzbl (%esi),%eax
  796. movzbl (%edi),%ebx
  797. movl %eax,%edx
  798. incl %esi
  799. incl %edi
  800. cmpl %ebx,%eax
  801. jbe .LStrCmp1
  802. movl %ebx,%eax
  803. .LStrCmp1:
  804. cmpl $7,%eax
  805. jl .LStrCmp2
  806. movl %edi,%ecx { Align on 32bits }
  807. negl %ecx
  808. andl $3,%ecx
  809. subl %ecx,%eax
  810. orl %ecx,%ecx
  811. rep
  812. cmpsb
  813. jne .LStrCmp3
  814. movl %eax,%ecx
  815. andl $3,%eax
  816. shrl $2,%ecx
  817. orl %ecx,%ecx
  818. rep
  819. cmpsl
  820. je .LStrCmp2
  821. movl $4,%eax
  822. subl %eax,%esi
  823. subl %eax,%edi
  824. .LStrCmp2:
  825. movl %eax,%ecx
  826. orl %eax,%eax
  827. rep
  828. cmpsb
  829. je .LStrCmp4
  830. .LStrCmp3:
  831. movzbl -1(%esi),%edx // Compare failing (or equal) position
  832. movzbl -1(%edi),%ebx
  833. .LStrCmp4:
  834. movl %ebx,%eax // Compare length or position
  835. subl %edx,%eax
  836. movl saveedi,%edi
  837. movl saveesi,%esi
  838. movl saveebx,%ebx
  839. end;
  840. {$endif FPC_SYSTEM_HAS_FPC_SHORTSTR_COMPARE}
  841. {$ifndef FPC_SYSTEM_HAS_FPC_PCHAR_TO_SHORTSTR}
  842. {$define FPC_SYSTEM_HAS_FPC_PCHAR_TO_SHORTSTR}
  843. procedure fpc_pchar_to_shortstr(out res : shortstring;p:pchar);assembler;[public,alias:'FPC_PCHAR_TO_SHORTSTR']; compilerproc;
  844. var
  845. saveres,saveebx,saveesi,saveedi : longint;
  846. asm
  847. {$ifdef FPC_PROFILE}
  848. push %eax
  849. push %edx
  850. push %ecx
  851. call mcount
  852. pop %ecx
  853. pop %edx
  854. pop %eax
  855. {$endif FPC_PROFILE}
  856. movl %ebx,saveebx
  857. movl %esi,saveesi
  858. movl %edi,saveedi
  859. movl %ecx,%esi
  860. movl %eax,%edi
  861. movl %edi,saveres
  862. movl $1,%ecx
  863. testl %esi,%esi
  864. movl %esi,%eax
  865. jz .LStrPasDone
  866. leal 3(%esi),%edx
  867. andl $-4,%edx
  868. // skip length byte
  869. incl %edi
  870. subl %esi,%edx
  871. jz .LStrPasAligned
  872. // align source to multiple of 4 (not dest, because we can't read past
  873. // the end of the source, since that may be past the end of the heap
  874. // -> sigsegv!!)
  875. .LStrPasAlignLoop:
  876. movb (%esi),%al
  877. incl %esi
  878. testb %al,%al
  879. jz .LStrPasDone
  880. incl %edi
  881. incb %cl
  882. decb %dl
  883. movb %al,-1(%edi)
  884. jne .LStrPasAlignLoop
  885. .balign 16
  886. .LStrPasAligned:
  887. movl (%esi),%ebx
  888. addl $4,%edi
  889. leal 0x0fefefeff(%ebx),%eax
  890. movl %ebx,%edx
  891. addl $4,%esi
  892. notl %edx
  893. andl %edx,%eax
  894. addl $4,%ecx
  895. andl $0x080808080,%eax
  896. movl %ebx,-4(%edi)
  897. jnz .LStrPasEndFound
  898. cmpl $252,%ecx
  899. ja .LStrPasPreEndLoop
  900. jmp .LStrPasAligned
  901. .LStrPasEndFound:
  902. subl $4,%ecx
  903. // this won't overwrite data since the result = 255 char string
  904. // and we never process more than the first 255 chars of p
  905. shrl $8,%eax
  906. jc .LStrPasDone
  907. incl %ecx
  908. shrl $8,%eax
  909. jc .LStrPasDone
  910. incl %ecx
  911. shrl $8,%eax
  912. jc .LStrPasDone
  913. incl %ecx
  914. jmp .LStrPasDone
  915. .LStrPasPreEndLoop:
  916. testb %cl,%cl
  917. jz .LStrPasDone
  918. movl (%esi),%eax
  919. .LStrPasEndLoop:
  920. testb %al,%al
  921. jz .LStrPasDone
  922. movb %al,(%edi)
  923. shrl $8,%eax
  924. incl %edi
  925. incb %cl
  926. jnz .LStrPasEndLoop
  927. .LStrPasDone:
  928. movl saveres,%edi
  929. addb $255,%cl
  930. movb %cl,(%edi)
  931. movl saveesi,%esi
  932. movl saveedi,%edi
  933. movl saveebx,%ebx
  934. end;
  935. {$endif FPC_SYSTEM_HAS_FPC_PCHAR_TO_SHORTSTR}
  936. {$ifndef FPC_SYSTEM_HAS_FPC_PCHAR_LENGTH}
  937. {$define FPC_SYSTEM_HAS_FPC_PCHAR_LENGTH}
  938. function fpc_pchar_length(p:pchar):sizeint;assembler;[public,alias:'FPC_PCHAR_LENGTH']; compilerproc;
  939. var
  940. saveedi : longint;
  941. asm
  942. {$ifdef FPC_PROFILE}
  943. push %eax
  944. push %edx
  945. push %ecx
  946. call mcount
  947. pop %ecx
  948. pop %edx
  949. pop %eax
  950. {$endif FPC_PROFILE}
  951. test %eax,%eax
  952. jz .LStrLenDone
  953. movl %edi,saveedi
  954. movl %eax,%edi
  955. movl $0xffffffff,%ecx
  956. xorl %eax,%eax
  957. cld
  958. repne
  959. scasb
  960. movl $0xfffffffe,%eax
  961. subl %ecx,%eax
  962. movl saveedi,%edi
  963. .LStrLenDone:
  964. end;
  965. {$endif FPC_SYSTEM_HAS_FPC_PCHAR_LENGTH}
  966. {$IFNDEF INTERNAL_BACKTRACE}
  967. {$define FPC_SYSTEM_HAS_GET_FRAME}
  968. function get_frame:pointer;assembler;nostackframe;{$ifdef SYSTEMINLINE}inline;{$endif}
  969. asm
  970. movl %ebp,%eax
  971. end;
  972. {$ENDIF not INTERNAL_BACKTRACE}
  973. {$define FPC_SYSTEM_HAS_GET_PC_ADDR}
  974. Function Get_pc_addr : Pointer;assembler;nostackframe;
  975. asm
  976. movl (%esp),%eax
  977. end;
  978. {$define FPC_SYSTEM_HAS_GET_CALLER_ADDR}
  979. function get_caller_addr(framebp:pointer;addr:pointer=nil):pointer;nostackframe;assembler;
  980. asm
  981. orl %eax,%eax
  982. jz .Lg_a_null
  983. movl 4(%eax),%eax
  984. .Lg_a_null:
  985. end;
  986. {$define FPC_SYSTEM_HAS_GET_CALLER_FRAME}
  987. function get_caller_frame(framebp:pointer;addr:pointer=nil):pointer;nostackframe;assembler;
  988. asm
  989. orl %eax,%eax
  990. jz .Lgnf_null
  991. movl (%eax),%eax
  992. .Lgnf_null:
  993. end;
  994. {$define FPC_SYSTEM_HAS_SPTR}
  995. Function Sptr : Pointer;assembler;nostackframe;
  996. asm
  997. movl %esp,%eax
  998. end;
  999. {****************************************************************************
  1000. Str()
  1001. ****************************************************************************}
  1002. {$if defined(disabled) and defined(regcall) }
  1003. {$define FPC_SYSTEM_HAS_INT_STR_LONGWORD}
  1004. {$define FPC_SYSTEM_HAS_INT_STR_LONGINT}
  1005. label str_int_shortcut;
  1006. procedure int_str(l:longword;out s:string);assembler;nostackframe;
  1007. asm
  1008. pushl %esi
  1009. pushl %edi
  1010. pushl %ebx
  1011. mov %edx,%edi
  1012. xor %edx,%edx
  1013. jmp str_int_shortcut
  1014. end;
  1015. procedure int_str(l:longint;out s:string);assembler;nostackframe;
  1016. {Optimized for speed, but balanced with size.}
  1017. const digits:array[0..9] of cardinal=(0,10,100,1000,10000,
  1018. 100000,1000000,10000000,
  1019. 100000000,1000000000);
  1020. asm
  1021. {$ifdef FPC_PROFILE}
  1022. push %eax
  1023. push %edx
  1024. push %ecx
  1025. call mcount
  1026. pop %ecx
  1027. pop %edx
  1028. pop %eax
  1029. {$endif FPC_PROFILE}
  1030. push %esi
  1031. push %edi
  1032. push %ebx
  1033. movl %edx,%edi
  1034. { Calculate absolute value and put sign in edx}
  1035. cltd
  1036. xorl %edx,%eax
  1037. subl %edx,%eax
  1038. negl %edx
  1039. str_int_shortcut:
  1040. movl %ecx,%esi
  1041. {Calculate amount of digits in ecx.}
  1042. xorl %ecx,%ecx
  1043. bsrl %eax,%ecx
  1044. incl %ecx
  1045. imul $1233,%ecx
  1046. shr $12,%ecx
  1047. {$ifdef FPC_PIC}
  1048. call fpc_geteipasebx
  1049. {$ifdef darwin}
  1050. movl digits-.Lpic(%ebx),%ebx
  1051. {$else}
  1052. addl $_GLOBAL_OFFSET_TABLE_,%ebx
  1053. movl digits@GOT(%ebx),%ebx
  1054. {$endif}
  1055. cmpl (%ebx,%ecx,4),%eax
  1056. {$else}
  1057. cmpl digits(,%ecx,4),%eax
  1058. {$endif}
  1059. cmc
  1060. adcl $0,%ecx {Nr. digits ready in ecx.}
  1061. {Write length & sign.}
  1062. lea (%edx,%ecx),%ebx
  1063. movb $45,%bh {movb $'-,%bh Not supported by our ATT reader.}
  1064. movw %bx,(%edi)
  1065. addl %edx,%edi
  1066. subl %edx,%esi
  1067. {Skip digits beyond string length.}
  1068. movl %eax,%edx
  1069. subl %ecx,%esi
  1070. jae .Lloop_write
  1071. .balign 4
  1072. .Lloop_skip:
  1073. movl $0xcccccccd,%eax {Divide by 10 using mul+shr}
  1074. mull %edx
  1075. shrl $3,%edx
  1076. decl %ecx
  1077. jz .Ldone {If (l<0) and (high(s)=1) this jump is taken.}
  1078. incl %esi
  1079. jnz .Lloop_skip
  1080. {Write out digits.}
  1081. .balign 4
  1082. .Lloop_write:
  1083. movl $0xcccccccd,%eax {Divide by 10 using mul+shr}
  1084. {Pre-add '0'}
  1085. leal 48(%edx),%ebx {leal $'0(,%edx),%ebx Not supported by our ATT reader.}
  1086. mull %edx
  1087. shrl $3,%edx
  1088. leal (%edx,%edx,8),%eax {x mod 10 = x-10*(x div 10)}
  1089. subl %edx,%ebx
  1090. subl %eax,%ebx
  1091. movb %bl,(%edi,%ecx)
  1092. decl %ecx
  1093. jnz .Lloop_write
  1094. .Ldone:
  1095. popl %ebx
  1096. popl %edi
  1097. popl %esi
  1098. end;
  1099. {$endif}
  1100. {****************************************************************************
  1101. Bounds Check
  1102. ****************************************************************************}
  1103. { do a thread-safe inc/dec }
  1104. {$define FPC_SYSTEM_HAS_DECLOCKED_LONGINT}
  1105. function cpudeclocked(var l : longint) : boolean;assembler;nostackframe;
  1106. asm
  1107. { this check should be done because a lock takes a lot }
  1108. { of time! }
  1109. lock
  1110. decl (%eax)
  1111. setzb %al
  1112. end;
  1113. {$define FPC_SYSTEM_HAS_INCLOCKED_LONGINT}
  1114. procedure cpuinclocked(var l : longint);assembler;nostackframe;
  1115. asm
  1116. lock
  1117. incl (%eax)
  1118. end;
  1119. // inline SMP check and normal lock.
  1120. // the locked one is so slow, inlining doesn't matter.
  1121. function declocked(var l : longint) : boolean; inline;
  1122. begin
  1123. if not ismultithread then
  1124. begin
  1125. dec(l);
  1126. declocked:=l=0;
  1127. end
  1128. else
  1129. declocked:=cpudeclocked(l);
  1130. end;
  1131. procedure inclocked(var l : longint); inline;
  1132. begin
  1133. if not ismultithread then
  1134. inc(l)
  1135. else
  1136. cpuinclocked(l);
  1137. end;
  1138. function InterLockedDecrement (var Target: longint) : longint; assembler;
  1139. asm
  1140. movl $-1,%edx
  1141. xchgl %edx,%eax
  1142. lock
  1143. xaddl %eax, (%edx)
  1144. decl %eax
  1145. end;
  1146. function InterLockedIncrement (var Target: longint) : longint; assembler;
  1147. asm
  1148. movl $1,%edx
  1149. xchgl %edx,%eax
  1150. lock
  1151. xaddl %eax, (%edx)
  1152. incl %eax
  1153. end;
  1154. function InterLockedExchange (var Target: longint;Source : longint) : longint; assembler;
  1155. asm
  1156. xchgl (%eax),%edx
  1157. movl %edx,%eax
  1158. end;
  1159. function InterLockedExchangeAdd (var Target: longint;Source : longint) : longint; assembler;
  1160. asm
  1161. xchgl %eax,%edx
  1162. lock
  1163. xaddl %eax, (%edx)
  1164. end;
  1165. function InterlockedCompareExchange(var Target: longint; NewValue: longint; Comperand: longint): longint; assembler;
  1166. asm
  1167. xchgl %eax,%ecx
  1168. lock
  1169. cmpxchgl %edx, (%ecx)
  1170. end;
  1171. function InterlockedCompareExchange64(var Target: int64; NewValue: int64; Comperand: int64): int64; assembler;
  1172. asm
  1173. pushl %ebx
  1174. pushl %edi
  1175. movl %eax,%edi
  1176. movl Comperand+4,%edx
  1177. movl Comperand+0,%eax
  1178. movl NewValue+4,%ecx
  1179. movl NewValue+0,%ebx
  1180. lock cmpxchg8b (%edi)
  1181. pop %edi
  1182. pop %ebx
  1183. end;
  1184. {****************************************************************************
  1185. FPU
  1186. ****************************************************************************}
  1187. const
  1188. { Internal constants for use in system unit }
  1189. FPU_Invalid = 1;
  1190. FPU_Denormal = 2;
  1191. FPU_DivisionByZero = 4;
  1192. FPU_Overflow = 8;
  1193. FPU_Underflow = $10;
  1194. FPU_StackUnderflow = $20;
  1195. FPU_StackOverflow = $40;
  1196. FPU_ExceptionMask = $ff;
  1197. { use Default8087CW instead
  1198. fpucw : word = $1300 or FPU_StackUnderflow or FPU_Underflow or FPU_Denormal;
  1199. }
  1200. MM_MaskInvalidOp = %0000000010000000;
  1201. MM_MaskDenorm = %0000000100000000;
  1202. MM_MaskDivZero = %0000001000000000;
  1203. MM_MaskOverflow = %0000010000000000;
  1204. MM_MaskUnderflow = %0000100000000000;
  1205. MM_MaskPrecision = %0001000000000000;
  1206. mxcsr : dword = MM_MaskUnderflow or MM_MaskPrecision or MM_MaskDenorm;
  1207. {$define FPC_SYSTEM_HAS_SYSINITFPU}
  1208. Procedure SysInitFPU;
  1209. var
  1210. { these locals are so we don't have to hack pic code in the assembler }
  1211. localmxcsr: dword;
  1212. localfpucw: word;
  1213. begin
  1214. localfpucw:=Default8087CW;
  1215. asm
  1216. fninit
  1217. fldcw localfpucw
  1218. fwait
  1219. end;
  1220. if has_sse_support then
  1221. begin
  1222. localmxcsr:=mxcsr;
  1223. asm
  1224. { setup sse exceptions }
  1225. ldmxcsr localmxcsr
  1226. end;
  1227. end;
  1228. softfloat_exception_mask:=float_flag_underflow or float_flag_inexact or float_flag_denormal;
  1229. end;
  1230. {$define FPC_SYSTEM_HAS_SYSRESETFPU}
  1231. Procedure SysResetFPU;
  1232. var
  1233. { these locals are so we don't have to hack pic code in the assembler }
  1234. localmxcsr: dword;
  1235. localfpucw: word;
  1236. begin
  1237. localfpucw:=Default8087CW;
  1238. asm
  1239. fninit
  1240. fwait
  1241. fldcw localfpucw
  1242. end;
  1243. if has_sse_support then
  1244. begin
  1245. localmxcsr:=mxcsr;
  1246. asm
  1247. { setup sse exceptions }
  1248. ldmxcsr localmxcsr
  1249. end;
  1250. end;
  1251. softfloat_exception_flags:=0;
  1252. end;
  1253. { because of the brain dead sse detection on x86, this test is post poned }
  1254. procedure fpc_cpucodeinit;
  1255. begin
  1256. os_supports_sse:=true;
  1257. os_supports_sse:=sse_support;
  1258. if os_supports_sse then
  1259. begin
  1260. sse_check:=true;
  1261. asm
  1262. { force an sse exception if no sse is supported, the exception handler sets
  1263. os_supports_sse to false then }
  1264. { don't change this instruction, the code above depends on its size }
  1265. movaps %xmm7, %xmm6
  1266. end;
  1267. sse_check:=false;
  1268. end;
  1269. has_sse_support:=os_supports_sse;
  1270. { don't let libraries influence the FPU cw set by the host program }
  1271. if has_sse_support and
  1272. IsLibrary then
  1273. mxcsr:=GetSSECSR;
  1274. has_mmx_support:=mmx_support;
  1275. SysResetFPU;
  1276. if not(IsLibrary) then
  1277. SysInitFPU;
  1278. {$ifdef USE_FASTMOVE}
  1279. setup_fastmove;
  1280. {$endif}
  1281. end;
  1282. {$if not defined(darwin) and defined(regcall) }
  1283. { darwin requires that the stack is aligned to 16 bytes when calling another function }
  1284. {$ifdef FPC_HAS_FEATURE_ANSISTRINGS}
  1285. {$define FPC_SYSTEM_HAS_ANSISTR_DECR_REF}
  1286. function fpc_freemem_x(p:pointer):ptrint; [external name 'FPC_FREEMEM_X'];
  1287. Procedure fpc_AnsiStr_Decr_Ref (Var S : Pointer); [Public,Alias:'FPC_ANSISTR_DECR_REF']; compilerproc; nostackframe; assembler;
  1288. asm
  1289. cmpl $0,(%eax)
  1290. je .Lquit
  1291. pushl %esi
  1292. movl (%eax),%esi
  1293. subl $12,%esi // points to start of allocation
  1294. movl $0,(%eax) // s:=nil
  1295. cmpl $0,4(%esi) // exit if refcount<0
  1296. jl .Lj3596
  1297. {$ifdef FPC_PIC}
  1298. pushl %ebx
  1299. call fpc_geteipasebx
  1300. addl $_GLOBAL_OFFSET_TABLE_,%ebx
  1301. movl ismultithread@GOT(%ebx),%ebx
  1302. movl (%ebx),%ebx
  1303. cmp $0, %ebx
  1304. popl %ebx
  1305. {$else FPC_PIC}
  1306. cmpl $0,ismultithread
  1307. {$endif FPC_PIC}
  1308. jne .Lj3610
  1309. decl 4(%esi)
  1310. je .Lj3620
  1311. jmp .Lj3596
  1312. .Lj3610:
  1313. leal 4(%esi),%eax
  1314. call cpudeclocked
  1315. testb %al,%al
  1316. je .Lj3596
  1317. .Lj3620:
  1318. movl %esi,%eax
  1319. call FPC_FREEMEM_X
  1320. .Lj3596:
  1321. popl %esi
  1322. .Lquit:
  1323. end;
  1324. function fpc_truely_ansistr_unique(Var S : Pointer): Pointer; forward;
  1325. {$define FPC_SYSTEM_HAS_ANSISTR_UNIQUE}
  1326. Function fpc_ansistr_Unique(Var S : Pointer): Pointer; [Public,Alias : 'FPC_ANSISTR_UNIQUE']; compilerproc; nostackframe;assembler;
  1327. asm
  1328. // Var S located in register
  1329. // Var $result located in register
  1330. movl %eax,%edx
  1331. // [437] pointer(result) := pointer(s);
  1332. movl (%eax),%eax
  1333. // [438] If Pointer(S)=Nil then
  1334. testl %eax,%eax
  1335. je .Lj4031
  1336. .Lj4036:
  1337. // [440] if PAnsiRec(Pointer(S)-Firstoff)^.Ref<>1 then
  1338. movl -8(%eax),%ecx
  1339. cmpl $1,%ecx
  1340. je .Lj4038
  1341. // [441] result:=fpc_truely_ansistr_unique(s);
  1342. movl %edx,%eax
  1343. call fpc_truely_ansistr_unique
  1344. .Lj4038:
  1345. .Lj4031:
  1346. // [442] end;
  1347. end;
  1348. {$endif FPC_HAS_FEATURE_ANSISTRINGS}
  1349. {$endif ndef darwin and defined(regcall) }
  1350. {$ifndef FPC_SYSTEM_HAS_MEM_BARRIER}
  1351. {$define FPC_SYSTEM_HAS_MEM_BARRIER}
  1352. procedure ReadBarrier;assembler;nostackframe;
  1353. asm
  1354. lock
  1355. addl $0,0(%esp)
  1356. { alternative: lfence on SSE capable CPUs }
  1357. end;
  1358. procedure ReadDependencyBarrier;{$ifdef SYSTEMINLINE}inline;{$endif}
  1359. begin
  1360. { reads imply barrier on earlier reads depended on }
  1361. end;
  1362. procedure ReadWriteBarrier;assembler;nostackframe;
  1363. asm
  1364. lock
  1365. addl $0,0(%esp)
  1366. { alternative: mfence on SSE capable CPUs }
  1367. end;
  1368. procedure WriteBarrier;assembler;nostackframe;
  1369. asm
  1370. { no write reordering on intel CPUs (yet) }
  1371. end;
  1372. {$endif}
  1373. {$ifndef FPC_SYSTEM_HAS_BSX_QWORD}
  1374. {$define FPC_SYSTEM_HAS_BSX_QWORD}
  1375. function BsfQWord(Const AValue : QWord): cardinal; assembler; nostackframe;
  1376. asm
  1377. bsfl 4(%esp),%eax
  1378. jnz .L2
  1379. .L1: bsfl 8(%esp),%eax
  1380. add $32,%eax
  1381. .L2:
  1382. end;
  1383. function BsrQWord(Const AValue : QWord): cardinal; assembler; nostackframe;
  1384. asm
  1385. bsrl 8(%esp),%eax
  1386. jz .L1
  1387. add $32,%eax
  1388. jmp .L2
  1389. .L1: bsrl 4(%esp),%eax
  1390. .L2:
  1391. end;
  1392. {$endif}