i386.inc 53 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511151215131514151515161517151815191520152115221523152415251526152715281529153015311532153315341535153615371538153915401541154215431544154515461547154815491550155115521553155415551556155715581559156015611562156315641565156615671568156915701571157215731574157515761577157815791580158115821583158415851586158715881589159015911592159315941595159615971598159916001601160216031604160516061607160816091610161116121613161416151616161716181619162016211622162316241625162616271628162916301631163216331634163516361637163816391640164116421643164416451646164716481649165016511652165316541655165616571658165916601661166216631664166516661667166816691670167116721673167416751676167716781679168016811682168316841685168616871688168916901691169216931694169516961697169816991700170117021703170417051706170717081709171017111712171317141715171617171718171917201721172217231724172517261727172817291730173117321733173417351736173717381739174017411742174317441745174617471748174917501751175217531754175517561757175817591760176117621763176417651766176717681769177017711772177317741775177617771778177917801781178217831784178517861787178817891790179117921793179417951796179717981799180018011802180318041805180618071808180918101811181218131814181518161817181818191820182118221823182418251826182718281829183018311832183318341835183618371838183918401841184218431844184518461847184818491850185118521853185418551856185718581859186018611862186318641865186618671868186918701871187218731874187518761877187818791880188118821883188418851886188718881889189018911892189318941895189618971898189919001901190219031904190519061907190819091910191119121913191419151916191719181919192019211922192319241925192619271928192919301931193219331934193519361937193819391940194119421943194419451946194719481949195019511952195319541955195619571958195919601961196219631964196519661967196819691970197119721973197419751976197719781979198019811982198319841985198619871988198919901991199219931994199519961997199819992000200120022003200420052006200720082009201020112012201320142015201620172018201920202021202220232024202520262027202820292030203120322033203420352036203720382039204020412042204320442045204620472048204920502051205220532054205520562057205820592060206120622063206420652066206720682069207020712072207320742075207620772078207920802081208220832084208520862087208820892090209120922093209420952096209720982099210021012102210321042105210621072108210921102111211221132114211521162117211821192120212121222123212421252126212721282129213021312132213321342135213621372138213921402141214221432144214521462147214821492150215121522153215421552156215721582159216021612162216321642165216621672168216921702171217221732174217521762177217821792180218121822183218421852186218721882189219021912192219321942195
  1. {
  2. This file is part of the Free Pascal run time library.
  3. Copyright (c) 1999-2000 by the Free Pascal development team.
  4. Processor dependent implementation for the system unit for
  5. intel i386+
  6. See the file COPYING.FPC, included in this distribution,
  7. for details about the copyright.
  8. This program is distributed in the hope that it will be useful,
  9. but WITHOUT ANY WARRANTY; without even the implied warranty of
  10. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
  11. **********************************************************************}
  12. {$if not(defined(VER3_0)) and defined(linux)}
  13. {$define FPC_SYSTEM_STACKALIGNMENT16}
  14. {$endif not(defined(VER3_0)) and defined(linux)}
  15. {****************************************************************************
  16. Primitives
  17. ****************************************************************************}
  18. var
  19. os_supports_sse : boolean;
  20. { this variable is set to true, if currently an sse check is executed and no sig ill should be generated }
  21. sse_check : boolean;
  22. {$asmmode ATT}
  23. function cpuid_support : boolean;assembler;nostackframe;
  24. {
  25. Check if the ID-flag can be changed, if changed then CpuID is supported.
  26. Tested under go32v1 and Linux on c6x86 with CpuID enabled and disabled (PFV)
  27. }
  28. asm
  29. pushfl
  30. movl (%esp),%eax
  31. xorl $0x200000,%eax
  32. pushl %eax
  33. popfl
  34. pushfl
  35. popl %eax
  36. xorl (%esp),%eax
  37. popfl
  38. testl $0x200000,%eax
  39. setnz %al
  40. end;
  41. {$ifndef FPC_PIC}
  42. {$ifndef FPC_SYSTEM_HAS_MOVE}
  43. {$ifndef OLD_ASSEMBLER}
  44. {$define USE_FASTMOVE}
  45. {$i fastmove.inc}
  46. {$endif not OLD_ASSEMBLER}
  47. {$endif FPC_SYSTEM_HAS_MOVE}
  48. {$endif FPC_PIC}
  49. {$define FPC_SYSTEM_HAS_FPC_CPUINIT}
  50. procedure fpc_cpuinit;
  51. begin
  52. { because of the brain dead sse detection on x86, this test is post poned to fpc_cpucodeinit which
  53. must be implemented OS dependend (FK)
  54. has_sse_support:=sse_support;
  55. has_mmx_support:=mmx_support;
  56. setup_fastmove;
  57. }
  58. end;
  59. {$ifndef darwin}
  60. procedure fpc_geteipasebx; [public, alias: 'fpc_geteipasebx'];assembler; nostackframe;
  61. asm
  62. movl (%esp),%ebx
  63. end;
  64. procedure fpc_geteipasecx; [public, alias: 'fpc_geteipasecx'];assembler; nostackframe;
  65. asm
  66. movl (%esp),%ecx
  67. end;
  68. {$endif}
  69. {$ifndef FPC_SYSTEM_HAS_MOVE}
  70. {$define FPC_SYSTEM_HAS_MOVE}
  71. procedure Move(const source;var dest;count:SizeInt);[public, alias: 'FPC_MOVE'];assembler;
  72. var
  73. saveesi,saveedi : longint;
  74. asm
  75. movl %edi,saveedi
  76. movl %esi,saveesi
  77. movl %eax,%esi
  78. movl %edx,%edi
  79. movl %ecx,%edx
  80. movl %edi,%eax
  81. { check for zero or negative count }
  82. cmpl $0,%edx
  83. jle .LMoveEnd
  84. { Check for back or forward }
  85. sub %esi,%eax
  86. jz .LMoveEnd { Do nothing when source=dest }
  87. jc .LFMove { Do forward, dest<source }
  88. cmp %edx,%eax
  89. jb .LBMove { Dest is in range of move, do backward }
  90. { Forward Copy }
  91. .LFMove:
  92. {$ifdef FPC_ENABLED_CLD}
  93. cld
  94. {$endif FPC_ENABLED_CLD}
  95. cmpl $15,%edx
  96. jl .LFMove1
  97. movl %edi,%ecx { Align on 32bits }
  98. negl %ecx
  99. andl $3,%ecx
  100. subl %ecx,%edx
  101. rep
  102. movsb
  103. movl %edx,%ecx
  104. andl $3,%edx
  105. shrl $2,%ecx
  106. rep
  107. movsl
  108. .LFMove1:
  109. movl %edx,%ecx
  110. rep
  111. movsb
  112. jmp .LMoveEnd
  113. { Backward Copy }
  114. .LBMove:
  115. std
  116. addl %edx,%esi
  117. addl %edx,%edi
  118. movl %edi,%ecx
  119. decl %esi
  120. decl %edi
  121. cmpl $15,%edx
  122. jl .LBMove1
  123. negl %ecx { Align on 32bits }
  124. andl $3,%ecx
  125. subl %ecx,%edx
  126. rep
  127. movsb
  128. movl %edx,%ecx
  129. andl $3,%edx
  130. shrl $2,%ecx
  131. subl $3,%esi
  132. subl $3,%edi
  133. rep
  134. movsl
  135. addl $3,%esi
  136. addl $3,%edi
  137. .LBMove1:
  138. movl %edx,%ecx
  139. rep
  140. movsb
  141. cld
  142. .LMoveEnd:
  143. movl saveedi,%edi
  144. movl saveesi,%esi
  145. end;
  146. {$endif FPC_SYSTEM_HAS_MOVE}
  147. {$ifndef FPC_SYSTEM_HAS_FILLCHAR}
  148. {$define FPC_SYSTEM_HAS_FILLCHAR}
  149. Procedure FillChar(var x;count:SizeInt;value:byte);assembler; nostackframe;
  150. asm
  151. cmpl $22,%edx { empirically determined value on a Core 2 Duo Conroe }
  152. jg .LFillFull
  153. orl %edx,%edx
  154. jle .LFillZero
  155. .LFillLoop:
  156. movb %cl,(%eax)
  157. incl %eax
  158. decl %edx
  159. jne .LFillLoop
  160. .LFillZero:
  161. ret
  162. .LFillFull:
  163. {$ifdef FPC_ENABLED_CLD}
  164. cld
  165. {$endif FPC_ENABLED_CLD}
  166. push %edi
  167. movl %eax,%edi
  168. movzbl %cl,%eax
  169. movl %edx,%ecx
  170. imul $0x01010101,%eax { Expand al into a 4 subbytes of eax}
  171. shrl $2,%ecx
  172. andl $3,%edx
  173. rep
  174. stosl
  175. movl %edx,%ecx
  176. .LFill1:
  177. rep
  178. stosb
  179. .LFillEnd:
  180. pop %edi
  181. end;
  182. {$endif FPC_SYSTEM_HAS_FILLCHAR}
  183. {$ifndef FPC_SYSTEM_HAS_FILLWORD}
  184. {$define FPC_SYSTEM_HAS_FILLWORD}
  185. procedure fillword(var x;count : SizeInt;value : word);assembler;
  186. var
  187. saveedi : longint;
  188. asm
  189. movl %edi,saveedi
  190. movl %eax,%edi
  191. movzwl %cx,%eax
  192. movl %edx,%ecx
  193. { check for zero or negative count }
  194. cmpl $0,%ecx
  195. jle .LFillWordEnd
  196. movl %eax,%edx
  197. shll $16,%eax
  198. orl %edx,%eax
  199. movl %ecx,%edx
  200. shrl $1,%ecx
  201. {$ifdef FPC_ENABLED_CLD}
  202. cld
  203. {$endif FPC_ENABLED_CLD}
  204. rep
  205. stosl
  206. movl %edx,%ecx
  207. andl $1,%ecx
  208. rep
  209. stosw
  210. .LFillWordEnd:
  211. movl saveedi,%edi
  212. end;
  213. {$endif FPC_SYSTEM_HAS_FILLWORD}
  214. {$ifndef FPC_SYSTEM_HAS_FILLDWORD}
  215. {$define FPC_SYSTEM_HAS_FILLDWORD}
  216. procedure filldword(var x;count : SizeInt;value : dword);assembler;
  217. var
  218. saveedi : longint;
  219. asm
  220. movl %edi,saveedi
  221. movl %eax,%edi
  222. movl %ecx,%eax
  223. movl %edx,%ecx
  224. { check for zero or negative count }
  225. cmpl $0,%ecx
  226. jle .LFillDWordEnd
  227. {$ifdef FPC_ENABLED_CLD}
  228. cld
  229. {$endif FPC_ENABLED_CLD}
  230. rep
  231. stosl
  232. .LFillDWordEnd:
  233. movl saveedi,%edi
  234. end;
  235. {$endif FPC_SYSTEM_HAS_FILLDWORD}
  236. {$ifndef FPC_SYSTEM_HAS_INDEXBYTE}
  237. {$define FPC_SYSTEM_HAS_INDEXBYTE}
  238. function IndexByte_Plain(Const buf;len:SizeInt;b:byte):SizeInt; assembler; nostackframe;
  239. asm
  240. push %esi
  241. push %edi
  242. push %eax { save initial value of 'buf' }
  243. cmp $4,%edx { less than 4 bytes, just test byte by byte. }
  244. jb .Ltail
  245. mov %cl,%ch { prepare pattern }
  246. movzwl %cx,%esi
  247. shl $16,%ecx
  248. or %esi,%ecx
  249. .Lalignloop:
  250. test $3,%al { align to 4 bytes if necessary }
  251. je .Laligned
  252. cmp %cl,(%eax)
  253. je .Lexit
  254. inc %eax
  255. dec %edx
  256. jmp .Lalignloop
  257. .balign 16 { Main loop, unrolled 4 times for speed }
  258. .Lloop:
  259. mov (%eax),%esi { load dword }
  260. xor %ecx,%esi { XOR with pattern, bytes equal to target are now 0 }
  261. lea -0x01010101(%esi),%edi
  262. xor %esi,%edi { (x-0x01010101) xor x }
  263. not %esi
  264. and $0x80808080,%esi
  265. and %edi,%esi { ((x-0x01010101) xor x) and (not x) and 0x80808080 }
  266. jnz .Lfound { one of the bytes matches }
  267. mov 4(%eax),%esi
  268. xor %ecx,%esi
  269. lea -0x01010101(%esi),%edi
  270. xor %esi,%edi
  271. not %esi
  272. and $0x80808080,%esi
  273. and %edi,%esi
  274. jnz .Lfound4
  275. mov 8(%eax),%esi
  276. xor %ecx,%esi
  277. lea -0x01010101(%esi),%edi
  278. xor %esi,%edi
  279. not %esi
  280. and $0x80808080,%esi
  281. and %edi,%esi
  282. jnz .Lfound8
  283. mov 12(%eax),%esi
  284. xor %ecx,%esi
  285. lea -0x01010101(%esi),%edi
  286. xor %esi,%edi
  287. not %esi
  288. and $0x80808080,%esi
  289. and %edi,%esi
  290. jnz .Lfound12
  291. add $16,%eax
  292. .Laligned:
  293. sub $16,%edx
  294. jae .Lloop { Still more than 16 bytes remaining }
  295. { Process remaining bytes (<16 left at this point) }
  296. { length is offset by -16 at this point }
  297. .Lloop2:
  298. cmp $4-16,%edx { < 4 bytes left? }
  299. jb .Ltail
  300. mov (%eax),%esi
  301. xor %ecx,%esi
  302. lea -0x01010101(%esi),%edi
  303. xor %esi,%edi
  304. not %esi
  305. and $0x80808080,%esi
  306. and %edi,%esi
  307. jne .Lfound
  308. add $4,%eax
  309. sub $4,%edx
  310. jmp .Lloop2
  311. .Ltail: { Less than 4 bytes remaining, check one by one }
  312. and $3, %edx
  313. jz .Lnotfound
  314. .Lloop3:
  315. cmp %cl,(%eax)
  316. je .Lexit
  317. inc %eax
  318. dec %edx
  319. jnz .Lloop3
  320. .Lnotfound:
  321. or $-1,%eax
  322. jmp .Lexit1
  323. { add missing source pointer increments }
  324. .Lfound12:
  325. add $4,%eax
  326. .Lfound8:
  327. add $4,%eax
  328. .Lfound4:
  329. add $4,%eax
  330. .Lfound:
  331. test $0xff,%esi
  332. jnz .Lexit
  333. inc %eax
  334. test $0xff00,%esi
  335. jnz .Lexit
  336. inc %eax
  337. test $0xff0000,%esi
  338. jnz .Lexit
  339. inc %eax
  340. .Lexit:
  341. sub (%esp),%eax
  342. .Lexit1:
  343. pop %ecx { removes initial 'buf' value }
  344. pop %edi
  345. pop %esi
  346. end;
  347. function IndexByte_SSE2(const buf;len:SizeInt;b:byte):SizeInt; assembler; nostackframe;
  348. asm
  349. test %edx, %edx
  350. je .LNothing
  351. push %ebx
  352. mov %eax, %ebx
  353. and $-16, %eax
  354. pxor %xmm1, %xmm1
  355. movd %ecx, %xmm1
  356. punpcklbw %xmm1, %xmm1
  357. punpcklwd %xmm1, %xmm1
  358. pshufd $0, %xmm1, %xmm1
  359. lea 16(%eax), %ecx
  360. movdqa %xmm1, %xmm0
  361. pcmpeqb (%eax), %xmm0
  362. sub %ebx, %ecx
  363. pmovmskb %xmm0, %eax
  364. sal %cl, %eax
  365. xor %ax, %ax
  366. shr %cl, %eax
  367. jz .L16xAligned_Test
  368. sub $16, %ecx
  369. .LFound:
  370. bsf %eax, %eax
  371. add %ecx, %eax
  372. pop %ebx
  373. cmp %edx, %eax
  374. jnb .LNothing
  375. ret
  376. .balign 16
  377. .L16xAligned_Body:
  378. movdqa %xmm1, %xmm0
  379. pcmpeqb (%ebx,%ecx), %xmm0
  380. pmovmskb %xmm0, %eax
  381. test %eax, %eax
  382. jne .LFound
  383. add $16, %ecx
  384. .L16xAligned_Test:
  385. cmp %edx, %ecx
  386. jb .L16xAligned_Body
  387. pop %ebx
  388. .LNothing:
  389. mov $-1, %eax
  390. end;
  391. function IndexByte_Dispatch(const buf;len:SizeInt;b:byte):SizeInt; forward;
  392. var
  393. IndexByte_Impl: function(const buf;len:SizeInt;b:byte):SizeInt = @IndexByte_Dispatch;
  394. function IndexByte_Dispatch(const buf;len:SizeInt;b:byte):SizeInt;
  395. begin
  396. if has_sse2_support then
  397. IndexByte_Impl:=@IndexByte_SSE2
  398. else
  399. IndexByte_Impl:=@IndexByte_Plain;
  400. result:=IndexByte_Impl(buf,len,b);
  401. end;
  402. function IndexByte(const buf;len:SizeInt;b:byte):SizeInt;
  403. begin
  404. result:=IndexByte_Impl(buf,len,b);
  405. end;
  406. {$endif FPC_SYSTEM_HAS_INDEXBYTE}
  407. {$ifndef FPC_SYSTEM_HAS_INDEXWORD}
  408. {$define FPC_SYSTEM_HAS_INDEXWORD}
  409. function IndexWord_Plain(Const buf;len:SizeInt;b:word):SizeInt; assembler; nostackframe;
  410. asm
  411. push %eax
  412. cmp $1073741823, %edx
  413. ja .LUnbounded
  414. lea (%eax,%edx,2), %edx
  415. cmp %edx, %eax
  416. je .LNotFound
  417. .LWordwise_Body:
  418. cmp %cx, (%eax)
  419. je .LFound
  420. add $2, %eax
  421. cmp %edx, %eax
  422. jne .LWordwise_Body
  423. .LNotFound:
  424. pop %eax
  425. mov $-1, %eax
  426. ret
  427. .LFound:
  428. pop %edx
  429. sub %edx, %eax
  430. shr $1, %eax
  431. ret
  432. .LUnbounded:
  433. mov %eax, %edx
  434. jmp .LWordwise_Body
  435. end;
  436. function IndexWord_SSE2(const buf;len:SizeInt;b:word):SizeInt; assembler; nostackframe;
  437. asm
  438. test %edx, %edx
  439. je .LInstantNothing
  440. push %edi
  441. movd %ecx, %xmm0
  442. push %esi
  443. mov %eax, %esi
  444. push %ebx
  445. and $-0x10, %esi
  446. punpcklwd %xmm0, %xmm0
  447. movdqa (%esi), %xmm2
  448. sub %eax, %esi
  449. mov %edx, %edi
  450. pshufd $0, %xmm0, %xmm0
  451. lea 16(%esi), %edx
  452. mov %eax, %ebx
  453. movdqa %xmm0, %xmm1
  454. mov %edx, %ecx
  455. test $1, %al
  456. jnz .LUnaligned
  457. pcmpeqw %xmm0, %xmm2
  458. pmovmskb %xmm2, %eax
  459. shl %cl, %eax
  460. xor %ax, %ax
  461. shr $1, %edx
  462. shr %cl, %eax
  463. jz .LLoopTest
  464. lea -8(%edx), %ecx
  465. .LMatch:
  466. bsf %eax, %eax
  467. shr $1, %eax
  468. add %ecx, %eax
  469. cmp %edi, %eax
  470. jnb .LNothing
  471. pop %ebx
  472. pop %esi
  473. pop %edi
  474. ret
  475. .balign 16
  476. .LLoop:
  477. movdqa (%ebx,%edx,2), %xmm0
  478. mov %edx, %ecx
  479. add $8, %edx
  480. pcmpeqw %xmm1, %xmm0
  481. pmovmskb %xmm0, %eax
  482. test %eax, %eax
  483. jne .LMatch
  484. .LLoopTest:
  485. cmp %edi, %edx
  486. jb .LLoop
  487. .LNothing:
  488. pop %ebx
  489. pop %esi
  490. pop %edi
  491. .LInstantNothing:
  492. mov $-1, %eax
  493. ret
  494. .LUnaligned:
  495. psllw $8, %xmm1
  496. add %edi, %edi
  497. psrlw $8, %xmm0
  498. por %xmm1, %xmm0
  499. pcmpeqb %xmm0, %xmm2
  500. movdqa %xmm0, %xmm1
  501. pmovmskb %xmm2, %eax
  502. shl %cl, %eax
  503. xor %ax, %ax
  504. shr %cl, %eax
  505. lea (%eax,%eax), %ecx
  506. and %ecx, %eax
  507. and $0x5555, %eax
  508. je .LUnalignedLoopTest
  509. .LUnalignedMatch:
  510. bsf %eax, %eax
  511. add %esi, %eax
  512. cmp %edi, %eax
  513. jnb .LNothing
  514. pop %ebx
  515. shr $1, %eax
  516. pop %esi
  517. pop %edi
  518. ret
  519. .balign 16
  520. .LUnalignedLoop:
  521. movdqa (%ebx,%edx), %xmm0
  522. shr $16, %ecx
  523. mov %edx, %esi
  524. add $16, %edx
  525. pcmpeqb %xmm1, %xmm0
  526. pmovmskb %xmm0, %eax
  527. add %eax, %eax
  528. or %eax, %ecx
  529. mov %ecx, %eax
  530. shr $1, %eax
  531. and %ecx, %eax
  532. and $0x5555, %eax
  533. jne .LUnalignedMatch
  534. .LUnalignedLoopTest:
  535. cmp %edi, %edx
  536. jb .LUnalignedLoop
  537. pop %ebx
  538. pop %esi
  539. pop %edi
  540. mov $-1, %eax
  541. end;
  542. function IndexWord_Dispatch(const buf;len:SizeInt;b:word):SizeInt; forward;
  543. var
  544. IndexWord_Impl: function(const buf;len:SizeInt;b:word):SizeInt = @IndexWord_Dispatch;
  545. function IndexWord_Dispatch(const buf;len:SizeInt;b:word):SizeInt;
  546. begin
  547. if has_sse2_support then
  548. IndexWord_Impl:=@IndexWord_SSE2
  549. else
  550. IndexWord_Impl:=@IndexWord_Plain;
  551. result:=IndexWord_Impl(buf,len,b);
  552. end;
  553. function IndexWord(const buf;len:SizeInt;b:word):SizeInt; inline;
  554. begin
  555. result:=IndexWord_Impl(buf,len,b);
  556. end;
  557. {$endif FPC_SYSTEM_HAS_INDEXWORD}
  558. {$ifndef FPC_SYSTEM_HAS_INDEXDWORD}
  559. {$define FPC_SYSTEM_HAS_INDEXDWORD}
  560. function IndexDWord_Plain(Const buf;len:SizeInt;b:DWord):SizeInt; assembler; nostackframe;
  561. asm
  562. push %eax
  563. cmp $536870911, %edx
  564. ja .LUnbounded
  565. lea (%eax,%edx,4), %edx
  566. cmp %edx, %eax
  567. je .LNotFound
  568. .LDWordwise_Body:
  569. cmp %ecx, (%eax)
  570. je .LFound
  571. add $4, %eax
  572. cmp %edx, %eax
  573. jne .LDWordwise_Body
  574. .LNotFound:
  575. pop %eax
  576. mov $-1, %eax
  577. ret
  578. .LFound:
  579. pop %edx
  580. sub %edx, %eax
  581. shr $2, %eax
  582. ret
  583. .LUnbounded:
  584. mov %eax, %edx
  585. jmp .LDWordwise_Body
  586. end;
  587. function IndexDWord_SSE2(const buf;len:SizeInt;b:DWord):SizeInt; assembler; nostackframe;
  588. asm
  589. push %esi
  590. lea (%eax,%edx,4), %esi
  591. push %ebx
  592. mov %eax, %ebx
  593. cmp $536870911, %edx
  594. ja .LUnbounded
  595. and $-4, %edx
  596. jz .LDWordwise_Test
  597. push %edi
  598. shl $2, %edx
  599. movd %ecx, %xmm2
  600. add %eax, %edx
  601. pshufd $0, %xmm2, %xmm1
  602. .balign 16
  603. .L4x_Body:
  604. movdqu (%eax), %xmm0
  605. pcmpeqd %xmm1, %xmm0
  606. pmovmskb %xmm0, %edi
  607. test %edi, %edi
  608. jnz .L4x_Found
  609. .L4x_Next:
  610. add $16, %eax
  611. cmp %eax, %edx
  612. jne .L4x_Body
  613. cmp %esi, %eax
  614. je .LNothing
  615. lea -16(%esi), %eax
  616. movdqu (%eax), %xmm0
  617. pcmpeqd %xmm1, %xmm0
  618. pmovmskb %xmm0, %edi
  619. test %edi, %edi
  620. jnz .L4x_Found
  621. .LNothing:
  622. pop %edi
  623. pop %ebx
  624. pop %esi
  625. mov $-1, %eax
  626. ret
  627. .balign 16
  628. .L4x_Found:
  629. bsf %edi, %edi
  630. add %edi, %eax
  631. pop %edi
  632. .LDWordwise_Found:
  633. sub %ebx, %eax
  634. shr $2, %eax
  635. pop %ebx
  636. pop %esi
  637. ret
  638. .balign 16
  639. .LDWordwise_Body:
  640. cmp %ecx, (%eax)
  641. je .LDWordwise_Found
  642. add $4, %eax
  643. .LDWordwise_Test:
  644. cmp %esi, %eax
  645. jne .LDWordwise_Body
  646. mov $-1, %eax
  647. pop %ebx
  648. pop %esi
  649. ret
  650. .LUnbounded:
  651. mov %eax, %esi
  652. jmp .LDWordwise_Body
  653. end;
  654. function IndexDWord_Dispatch(const buf;len:SizeInt;b:DWord):SizeInt; forward;
  655. var
  656. IndexDWord_Impl: function(const buf;len:SizeInt;b:DWord):SizeInt = @IndexDWord_Dispatch;
  657. function IndexDWord_Dispatch(const buf;len:SizeInt;b:DWord):SizeInt;
  658. begin
  659. if has_sse2_support then
  660. IndexDWord_Impl:=@IndexDWord_SSE2
  661. else
  662. IndexDWord_Impl:=@IndexDWord_Plain;
  663. result:=IndexDWord_Impl(buf,len,b);
  664. end;
  665. function IndexDWord(const buf;len:SizeInt;b:DWord):SizeInt;
  666. begin
  667. result:=IndexDWord_Impl(buf,len,b);
  668. end;
  669. {$endif FPC_SYSTEM_HAS_INDEXDWORD}
  670. {$ifndef FPC_SYSTEM_HAS_COMPAREBYTE}
  671. {$define FPC_SYSTEM_HAS_COMPAREBYTE}
  672. function CompareByte_Plain(Const buf1,buf2;len:SizeInt):SizeInt; assembler; nostackframe;
  673. asm
  674. sub %eax, %edx
  675. cmp $6, %ecx
  676. push %esi
  677. lea (%eax,%ecx), %esi
  678. jle .LBytewiseTail_Prepare
  679. push %ebx
  680. lea 3(%eax), %ebx
  681. and $-4, %ebx
  682. cmp %ebx, %eax
  683. jne .LBytewiseHead_Body
  684. .L4x_Prepare:
  685. mov %esi, %eax
  686. and $-4, %eax
  687. jmp .L4x_Body
  688. .balign 16
  689. .L4x_Next:
  690. add $4, %ebx
  691. cmp %ebx, %eax
  692. je .LBytewiseTail_PrepareFromHeadAnd4x
  693. .L4x_Body:
  694. mov (%ebx,%edx), %ecx
  695. cmp %ecx, (%ebx)
  696. je .L4x_Next
  697. mov (%ebx), %eax
  698. {$ifdef CPUX86_HAS_BSWAP}
  699. bswap %ecx
  700. {$else}
  701. rol $8, %cx
  702. rol $16, %ecx
  703. rol $8, %cx
  704. {$endif}
  705. pop %ebx
  706. pop %esi
  707. {$ifdef CPUX86_HAS_BSWAP}
  708. bswap %eax
  709. {$else}
  710. rol $8, %ax
  711. rol $16, %eax
  712. rol $8, %ax
  713. {$endif}
  714. cmp %eax, %ecx
  715. sbb %eax, %eax
  716. and $2, %eax
  717. sub $1, %eax
  718. ret
  719. .LBytewiseHead_Next:
  720. add $1, %eax
  721. cmp %eax, %ebx
  722. je .L4x_Prepare
  723. .LBytewiseHead_Body:
  724. movzbl (%eax,%edx), %ecx
  725. cmp (%eax), %cl
  726. je .LBytewiseHead_Next
  727. pop %ebx
  728. jmp .LBytesDiffer
  729. .LBytewiseTail_PrepareFromHeadAnd4x:
  730. pop %ebx
  731. .LBytewiseTail_Prepare:
  732. cmp %esi, %eax
  733. jne .LBytewiseTail_Body
  734. .LNothingFound:
  735. xor %eax, %eax
  736. pop %esi
  737. ret
  738. .LBytewiseTail_Next:
  739. add $1, %eax
  740. cmp %eax, %esi
  741. je .LNothingFound
  742. .LBytewiseTail_Body:
  743. movzbl (%eax,%edx), %ecx
  744. cmp (%eax), %cl
  745. je .LBytewiseTail_Next
  746. .LBytesDiffer:
  747. movzbl (%eax), %eax
  748. pop %esi
  749. sub %ecx, %eax
  750. end;
  751. function CompareByte_SSE2(const buf1, buf2; len: SizeInt): SizeInt; assembler; nostackframe;
  752. asm
  753. cmp $3, %ecx
  754. push %esi
  755. lea (%eax,%ecx), %esi { esi = buf1 end }
  756. jle .LBytewise_Test
  757. push %ebx
  758. and $-16, %ecx
  759. lea (%eax,%ecx), %ebx { ebx = end of full XMMs in buf1 }
  760. cmp %ebx, %eax
  761. jne .L16x_Body
  762. lea 15(%ebx), %eax { check if tails don't cross page boundaries and can be over-read to XMMs }
  763. lea 15(%edx), %ecx
  764. xor %ebx, %eax
  765. xor %edx, %ecx
  766. or %ecx, %eax
  767. cmp $4095, %eax
  768. ja .LCantOverReadBoth
  769. movdqu (%ebx), %xmm0
  770. movdqu (%edx), %xmm2
  771. pcmpeqb %xmm2, %xmm0
  772. pmovmskb %xmm0, %eax
  773. xor $65535, %eax
  774. jz .LReturnEAX
  775. bsf %eax, %ecx
  776. add %ecx, %ebx
  777. cmp %esi, %ebx { ignore over-read garbage bytes }
  778. jnb .L16x_Nothing
  779. movzbl (%ebx), %eax
  780. movzbl (%edx,%ecx), %edx
  781. sub %edx, %eax
  782. .LReturnEAX:
  783. pop %ebx
  784. pop %esi
  785. ret
  786. .balign 16
  787. .L16x_Body:
  788. movdqu (%edx), %xmm0
  789. movdqu (%eax), %xmm1
  790. pcmpeqb %xmm1, %xmm0
  791. pmovmskb %xmm0, %ecx
  792. xor $65535, %ecx
  793. jnz .L16x_Found
  794. add $16, %eax
  795. add $16, %edx
  796. cmp %eax, %ebx
  797. jne .L16x_Body
  798. cmp %ebx, %esi
  799. je .L16x_Nothing
  800. sub %eax, %edx
  801. lea -16(%esi), %eax
  802. add %eax, %edx
  803. movdqu (%edx), %xmm0
  804. movdqu (%eax), %xmm1
  805. pcmpeqb %xmm1, %xmm0
  806. pmovmskb %xmm0, %ecx
  807. xor $65535, %ecx
  808. jnz .L16x_Found
  809. .L16x_Nothing:
  810. pop %ebx
  811. xor %eax, %eax
  812. pop %esi
  813. ret
  814. .balign 16
  815. .L16x_Found:
  816. bsf %ecx, %ecx
  817. pop %ebx
  818. movzbl (%eax,%ecx), %eax
  819. movzbl (%edx,%ecx), %edx
  820. pop %esi
  821. sub %edx, %eax
  822. ret
  823. .LCantOverReadBoth:
  824. mov %esi, %eax
  825. sub %ebx, %eax
  826. and $-4, %eax
  827. add %ebx, %eax
  828. cmp %eax, %ebx
  829. je .LPopEbxAndGoBytewise
  830. .L4x_Body:
  831. mov (%ebx), %ecx
  832. cmp (%edx), %ecx
  833. jne .L4x_Found
  834. add $4, %ebx
  835. add $4, %edx
  836. cmp %ebx, %eax
  837. jne .L4x_Body
  838. .LPopEbxAndGoBytewise:
  839. pop %ebx
  840. .LBytewise_Test:
  841. cmp %esi, %eax
  842. je .LBytewise_Nothing
  843. .LBytewise_Body:
  844. movzbl (%edx), %ecx
  845. cmp (%eax), %cl
  846. jne .LDoSbb
  847. add $1, %eax
  848. add $1, %edx
  849. cmp %esi, %eax
  850. jne .LBytewise_Body
  851. .LBytewise_Nothing:
  852. xor %eax, %eax
  853. pop %esi
  854. ret
  855. .L4x_Found:
  856. mov (%edx), %eax
  857. bswap %ecx
  858. bswap %eax
  859. cmp %ecx, %eax
  860. pop %ebx
  861. .LDoSbb:
  862. sbb %eax, %eax
  863. and $2, %eax
  864. sub $1, %eax
  865. pop %esi
  866. end;
  867. function CompareByte_Dispatch(const buf1, buf2; len: SizeInt): SizeInt; forward;
  868. var
  869. CompareByte_Impl: function(const buf1, buf2; len: SizeInt): SizeInt = @CompareByte_Dispatch;
  870. function CompareByte_Dispatch(const buf1, buf2; len: SizeInt): SizeInt;
  871. begin
  872. if has_sse2_support then
  873. CompareByte_Impl:=@CompareByte_SSE2
  874. else
  875. CompareByte_Impl:=@CompareByte_Plain;
  876. result:=CompareByte_Impl(buf1, buf2, len);
  877. end;
  878. function CompareByte(const buf1, buf2; len: SizeInt): SizeInt;
  879. begin
  880. result:=CompareByte_Impl(buf1, buf2, len);
  881. end;
  882. {$endif FPC_SYSTEM_HAS_COMPAREBYTE}
  883. {$ifndef FPC_SYSTEM_HAS_COMPAREWORD}
  884. {$define FPC_SYSTEM_HAS_COMPAREWORD}
  885. function CompareWord_Plain(Const buf1,buf2;len:SizeInt):SizeInt; assembler; nostackframe;
  886. asm
  887. sub %eax, %edx
  888. push %esi
  889. cmp $1073741823, %ecx
  890. ja .LUnbounded
  891. cmp $3, %ecx
  892. lea (%eax,%ecx,2), %esi
  893. jle .LWordwise_Test
  894. push %ebx
  895. test $3, %al
  896. je .LPtrUintWise_Prepare
  897. movzwl (%edx,%eax), %ebx
  898. cmp (%eax), %bx
  899. jne .LPopEbxAndDoSbb
  900. add $2, %eax
  901. sub $1, %ecx
  902. .LPtrUintWise_Prepare:
  903. and $-2, %ecx
  904. lea (%eax,%ecx,2), %ecx
  905. .balign 16
  906. .LPtrUintWise_Next:
  907. mov (%edx,%eax), %ebx
  908. cmp (%eax), %ebx
  909. jne .LPtrUintsDiffer
  910. add $4, %eax
  911. cmp %eax, %ecx
  912. jne .LPtrUintWise_Next
  913. pop %ebx
  914. .LWordwise_Test:
  915. cmp %esi, %eax
  916. je .LNothingFound
  917. .LWordwise_Body:
  918. movzwl (%edx,%eax), %ecx
  919. cmp (%eax), %cx
  920. jne .LDoSbb
  921. add $2, %eax
  922. cmp %esi, %eax
  923. jne .LWordwise_Body
  924. .LNothingFound:
  925. xor %eax, %eax
  926. pop %esi
  927. ret
  928. .LPtrUintsDiffer:
  929. cmp (%eax), %bx
  930. jne .LPopEbxAndDoSbb
  931. shr $16, %ebx
  932. cmp 2(%eax), %bx
  933. .LPopEbxAndDoSbb:
  934. pop %ebx
  935. .LDoSbb:
  936. sbb %eax, %eax
  937. and $2, %eax
  938. sub $1, %eax
  939. pop %esi
  940. ret
  941. .LUnbounded:
  942. mov %eax, %esi
  943. jmp .LWordwise_Body
  944. end;
  945. function CompareWord_SSE2(Const buf1,buf2;len:SizeInt):SizeInt; assembler; nostackframe;
  946. asm
  947. push %ebx
  948. cmp $1073741823, %ecx
  949. ja .LUnbounded
  950. lea (%eax,%ecx,2), %ebx { ebx = buf1 end }
  951. cmp $3, %ecx
  952. jle .LWordwise_Test
  953. push %esi
  954. and $-8, %ecx
  955. lea (%eax,%ecx,2), %esi { esi = end of full XMMs in buf1 }
  956. cmp %esi, %eax
  957. jne .L8x_Body
  958. lea 15(%esi), %eax
  959. lea 15(%edx), %ecx
  960. xor %esi, %eax
  961. xor %edx, %ecx
  962. or %ecx, %eax
  963. cmp $4095, %eax
  964. ja .LCantOverReadBoth
  965. movdqu (%esi), %xmm0
  966. movdqu (%edx), %xmm2
  967. pcmpeqw %xmm2, %xmm0
  968. pmovmskb %xmm0, %eax
  969. xor $65535, %eax
  970. jz .LReturnEAX
  971. bsf %eax, %eax
  972. lea (%esi,%eax), %ecx
  973. cmp %ebx, %ecx
  974. jnb .LNothing
  975. movzwl (%esi,%eax), %ebx
  976. cmp %bx, (%edx,%eax)
  977. .L8x_DoSbb:
  978. pop %esi
  979. .LWordwise_DoSbb:
  980. pop %ebx
  981. sbb %eax, %eax
  982. and $2, %eax
  983. sub $1, %eax
  984. ret
  985. .balign 16
  986. .L8x_Body:
  987. movdqu (%edx), %xmm0
  988. movdqu (%eax), %xmm1
  989. pcmpeqw %xmm1, %xmm0
  990. pmovmskb %xmm0, %ecx
  991. xor $65535, %ecx
  992. jnz .L8x_Found
  993. add $16, %eax
  994. add $16, %edx
  995. cmp %eax, %esi
  996. jne .L8x_Body
  997. cmp %esi, %ebx
  998. je .LNothing
  999. sub %eax, %edx
  1000. lea -16(%ebx), %eax
  1001. add %eax, %edx
  1002. movdqu (%edx), %xmm0
  1003. movdqu (%eax), %xmm1
  1004. pcmpeqw %xmm1, %xmm0
  1005. pmovmskb %xmm0, %ecx
  1006. xor $65535, %ecx
  1007. jnz .L8x_Found
  1008. .LNothing:
  1009. xor %eax, %eax
  1010. .LReturnEAX:
  1011. pop %esi
  1012. pop %ebx
  1013. ret
  1014. .L8x_Found:
  1015. bsf %ecx, %ecx
  1016. movzwl (%eax,%ecx), %eax
  1017. cmp %ax, (%edx,%ecx)
  1018. jmp .L8x_DoSbb
  1019. .LCantOverReadBoth:
  1020. mov %esi, %eax
  1021. pop %esi
  1022. .LWordwise_Body:
  1023. movzwl (%eax), %ecx
  1024. cmp %cx, (%edx)
  1025. jne .LWordwise_DoSbb
  1026. .LWordwise_Next:
  1027. add $2, %eax
  1028. add $2, %edx
  1029. .LWordwise_Test:
  1030. cmp %ebx, %eax
  1031. jne .LWordwise_Body
  1032. xor %eax, %eax
  1033. pop %ebx
  1034. ret
  1035. .LUnbounded:
  1036. mov %eax, %ebx
  1037. jmp .LWordwise_Body
  1038. end;
  1039. function CompareWord_Dispatch(const buf1, buf2; len: SizeInt): SizeInt; forward;
  1040. var
  1041. CompareWord_Impl: function(const buf1, buf2; len: SizeInt): SizeInt = @CompareWord_Dispatch;
  1042. function CompareWord_Dispatch(const buf1, buf2; len: SizeInt): SizeInt;
  1043. begin
  1044. if has_sse2_support then
  1045. CompareWord_Impl:=@CompareWord_SSE2
  1046. else
  1047. CompareWord_Impl:=@CompareWord_Plain;
  1048. result:=CompareWord_Impl(buf1, buf2, len);
  1049. end;
  1050. function CompareWord(const buf1, buf2; len: SizeInt): SizeInt;
  1051. begin
  1052. result:=CompareWord_Impl(buf1, buf2, len);
  1053. end;
  1054. {$endif FPC_SYSTEM_HAS_COMPAREWORD}
  1055. {$ifndef FPC_SYSTEM_HAS_COMPAREDWORD}
  1056. {$define FPC_SYSTEM_HAS_COMPAREDWORD}
  1057. function CompareDWord_Plain(Const buf1,buf2;len:SizeInt):SizeInt; assembler; nostackframe;
  1058. asm
  1059. cmp $536870912, %ecx
  1060. push %ebx
  1061. jnb .LUnbounded
  1062. lea (%eax,%ecx,4), %ebx
  1063. cmp %ebx, %eax
  1064. je .LNothing
  1065. .balign 16
  1066. .LDwordwise_Body:
  1067. mov (%edx), %ecx
  1068. cmp (%eax), %ecx
  1069. jne .LDoSbb
  1070. add $4, %eax
  1071. add $4, %edx
  1072. cmp %eax, %ebx
  1073. jne .LDwordwise_Body
  1074. .LNothing:
  1075. xor %eax, %eax
  1076. pop %ebx
  1077. ret
  1078. .LDoSbb:
  1079. pop %ebx
  1080. sbb %eax, %eax
  1081. and $2, %eax
  1082. sub $1, %eax
  1083. ret
  1084. .LUnbounded:
  1085. mov %eax, %ebx
  1086. jmp .LDwordwise_Body
  1087. end;
  1088. function CompareDWord_SSE2(Const buf1,buf2;len:SizeInt):SizeInt; assembler; nostackframe;
  1089. asm
  1090. push %esi
  1091. cmp $536870912, %ecx
  1092. jnb .LUnbounded
  1093. lea (%eax,%ecx,4), %esi { esi = buf1 end }
  1094. cmp $3, %ecx
  1095. jle .LDWordwise_Test
  1096. push %ebx
  1097. and $-4, %ecx
  1098. lea (%eax,%ecx,4), %ecx { ecx = end of full XMMs in buf1 }
  1099. .balign 16
  1100. .L4x_Body:
  1101. movdqu (%edx), %xmm0
  1102. movdqu (%eax), %xmm1
  1103. pcmpeqd %xmm1, %xmm0
  1104. pmovmskb %xmm0, %ebx
  1105. xor $65535, %ebx
  1106. jnz .L4x_Found
  1107. add $16, %eax
  1108. add $16, %edx
  1109. cmp %eax, %ecx
  1110. jne .L4x_Body
  1111. cmp %esi, %ecx
  1112. je .LNothing
  1113. sub %eax, %edx
  1114. lea -16(%esi), %eax
  1115. add %eax, %edx
  1116. movdqu (%edx), %xmm0
  1117. movdqu (%eax), %xmm1
  1118. pcmpeqd %xmm1, %xmm0
  1119. pmovmskb %xmm0, %ebx
  1120. xor $65535, %ebx
  1121. jnz .L4x_Found
  1122. .LNothing:
  1123. pop %ebx
  1124. pop %esi
  1125. xor %eax, %eax
  1126. ret
  1127. .balign 16
  1128. .LDWordwise_Body:
  1129. mov (%eax), %ecx
  1130. cmp %ecx, (%edx)
  1131. jne .LDoSbb
  1132. add $4, %eax
  1133. add $4, %edx
  1134. .LDWordwise_Test:
  1135. cmp %esi, %eax
  1136. jne .LDWordwise_Body
  1137. xor %eax, %eax
  1138. pop %esi
  1139. ret
  1140. .L4x_Found:
  1141. bsf %ebx, %ebx
  1142. mov (%eax,%ebx), %eax
  1143. cmp %eax, (%edx,%ebx)
  1144. pop %ebx
  1145. .LDoSbb:
  1146. pop %esi
  1147. sbb %eax, %eax
  1148. and $2, %eax
  1149. sub $1, %eax
  1150. ret
  1151. .LUnbounded:
  1152. mov %eax, %esi
  1153. jmp .LDWordwise_Body
  1154. end;
  1155. function CompareDWord_Dispatch(const buf1, buf2; len: SizeInt): SizeInt; forward;
  1156. var
  1157. CompareDWord_Impl: function(const buf1, buf2; len: SizeInt): SizeInt = @CompareDWord_Dispatch;
  1158. function CompareDWord_Dispatch(const buf1, buf2; len: SizeInt): SizeInt;
  1159. begin
  1160. if has_sse2_support then
  1161. CompareDWord_Impl:=@CompareDWord_SSE2
  1162. else
  1163. CompareDWord_Impl:=@CompareDWord_Plain;
  1164. result:=CompareDWord_Impl(buf1, buf2, len);
  1165. end;
  1166. function CompareDWord(const buf1, buf2; len: SizeInt): SizeInt;
  1167. begin
  1168. result:=CompareDWord_Impl(buf1, buf2, len);
  1169. end;
  1170. {$endif FPC_SYSTEM_HAS_COMPAREDWORD}
  1171. {$ifndef FPC_SYSTEM_HAS_INDEXCHAR0}
  1172. {$define FPC_SYSTEM_HAS_INDEXCHAR0}
  1173. function IndexChar0(Const buf;len:SizeInt;b:AnsiChar):SizeInt; assembler;
  1174. var
  1175. saveesi,saveebx : longint;
  1176. asm
  1177. movl %esi,saveesi
  1178. movl %ebx,saveebx
  1179. // Can't use scasb, or will have to do it twice, think this
  1180. // is faster for small "len"
  1181. movl %eax,%esi // Load address
  1182. movzbl %cl,%ebx // Load searchpattern
  1183. testl %edx,%edx
  1184. je .LFound
  1185. xorl %ecx,%ecx // zero index in Buf
  1186. xorl %eax,%eax // To make DWord compares possible
  1187. .balign 4
  1188. .LLoop:
  1189. movb (%esi),%al // Load byte
  1190. cmpb %al,%bl
  1191. je .LFound // byte the same?
  1192. incl %ecx
  1193. incl %esi
  1194. cmpl %edx,%ecx // Maximal distance reached?
  1195. je .LNotFound
  1196. testl %eax,%eax // Nullchar = end of search?
  1197. jne .LLoop
  1198. .LNotFound:
  1199. movl $-1,%ecx // Not found return -1
  1200. .LFound:
  1201. movl %ecx,%eax
  1202. movl saveesi,%esi
  1203. movl saveebx,%ebx
  1204. end;
  1205. {$endif FPC_SYSTEM_HAS_INDEXCHAR0}
  1206. {****************************************************************************
  1207. String
  1208. ****************************************************************************}
  1209. {$ifndef FPC_SYSTEM_HAS_FPC_SHORTSTR_ASSIGN}
  1210. {$define FPC_SYSTEM_HAS_FPC_SHORTSTR_ASSIGN}
  1211. procedure fpc_shortstr_to_shortstr(out res:shortstring; const sstr: shortstring);assembler;[public,alias:'FPC_SHORTSTR_TO_SHORTSTR']; compilerproc;
  1212. var
  1213. saveesi,saveedi : longint;
  1214. asm
  1215. {$ifdef FPC_PROFILE}
  1216. push %eax
  1217. push %edx
  1218. push %ecx
  1219. call mcount
  1220. pop %ecx
  1221. pop %edx
  1222. pop %eax
  1223. {$endif FPC_PROFILE}
  1224. movl %edi,saveedi
  1225. movl %esi,saveesi
  1226. {$ifdef FPC_ENABLED_CLD}
  1227. cld
  1228. {$endif FPC_ENABLED_CLD}
  1229. movl res,%edi
  1230. movl sstr,%esi
  1231. movl %edx,%ecx
  1232. xorl %eax,%eax
  1233. lodsb
  1234. cmpl %ecx,%eax
  1235. jbe .LStrCopy1
  1236. movl %ecx,%eax
  1237. .LStrCopy1:
  1238. stosb
  1239. cmpl $7,%eax
  1240. jl .LStrCopy2
  1241. movl %edi,%ecx { Align on 32bits }
  1242. negl %ecx
  1243. andl $3,%ecx
  1244. subl %ecx,%eax
  1245. rep
  1246. movsb
  1247. movl %eax,%ecx
  1248. andl $3,%eax
  1249. shrl $2,%ecx
  1250. rep
  1251. movsl
  1252. .LStrCopy2:
  1253. movl %eax,%ecx
  1254. rep
  1255. movsb
  1256. movl saveedi,%edi
  1257. movl saveesi,%esi
  1258. end;
  1259. procedure fpc_shortstr_assign(len:longint;sstr,dstr:pointer);[public,alias:'FPC_SHORTSTR_ASSIGN'];
  1260. begin
  1261. asm
  1262. {$ifdef FPC_PROFILE}
  1263. push %eax
  1264. push %edx
  1265. push %ecx
  1266. call mcount
  1267. pop %ecx
  1268. pop %edx
  1269. pop %eax
  1270. {$endif FPC_PROFILE}
  1271. pushl %eax
  1272. pushl %ecx
  1273. {$ifdef FPC_ENABLED_CLD}
  1274. cld
  1275. {$endif FPC_ENABLED_CLD}
  1276. movl dstr,%edi
  1277. movl sstr,%esi
  1278. xorl %eax,%eax
  1279. movl len,%ecx
  1280. lodsb
  1281. cmpl %ecx,%eax
  1282. jbe .LStrCopy1
  1283. movl %ecx,%eax
  1284. .LStrCopy1:
  1285. stosb
  1286. cmpl $7,%eax
  1287. jl .LStrCopy2
  1288. movl %edi,%ecx { Align on 32bits }
  1289. negl %ecx
  1290. andl $3,%ecx
  1291. subl %ecx,%eax
  1292. rep
  1293. movsb
  1294. movl %eax,%ecx
  1295. andl $3,%eax
  1296. shrl $2,%ecx
  1297. rep
  1298. movsl
  1299. .LStrCopy2:
  1300. movl %eax,%ecx
  1301. rep
  1302. movsb
  1303. popl %ecx
  1304. popl %eax
  1305. end ['ESI','EDI'];
  1306. end;
  1307. {$endif FPC_SYSTEM_HAS_FPC_SHORTSTR_ASSIGN}
  1308. {$ifndef FPC_SYSTEM_HAS_FPC_SHORTSTR_COMPARE}
  1309. {$define FPC_SYSTEM_HAS_FPC_SHORTSTR_COMPARE}
  1310. function fpc_shortstr_compare(const left,right:shortstring): longint;assembler; [public,alias:'FPC_SHORTSTR_COMPARE']; compilerproc;
  1311. var
  1312. saveesi,saveedi,saveebx : longint;
  1313. asm
  1314. {$ifdef FPC_PROFILE}
  1315. push %eax
  1316. push %edx
  1317. push %ecx
  1318. call mcount
  1319. pop %ecx
  1320. pop %edx
  1321. pop %eax
  1322. {$endif FPC_PROFILE}
  1323. movl %edi,saveedi
  1324. movl %esi,saveesi
  1325. movl %ebx,saveebx
  1326. {$ifdef FPC_ENABLED_CLD}
  1327. cld
  1328. {$endif FPC_ENABLED_CLD}
  1329. movl right,%esi
  1330. movl left,%edi
  1331. movzbl (%esi),%eax
  1332. movzbl (%edi),%ebx
  1333. movl %eax,%edx
  1334. incl %esi
  1335. incl %edi
  1336. cmpl %ebx,%eax
  1337. jbe .LStrCmp1
  1338. movl %ebx,%eax
  1339. .LStrCmp1:
  1340. cmpl $7,%eax
  1341. jl .LStrCmp2
  1342. movl %edi,%ecx { Align on 32bits }
  1343. negl %ecx
  1344. andl $3,%ecx
  1345. subl %ecx,%eax
  1346. orl %ecx,%ecx
  1347. repe
  1348. cmpsb
  1349. jne .LStrCmp3
  1350. movl %eax,%ecx
  1351. andl $3,%eax
  1352. shrl $2,%ecx
  1353. orl %ecx,%ecx
  1354. repe
  1355. cmpsl
  1356. je .LStrCmp2
  1357. movl $4,%eax
  1358. subl %eax,%esi
  1359. subl %eax,%edi
  1360. .LStrCmp2:
  1361. movl %eax,%ecx
  1362. orl %eax,%eax
  1363. repe
  1364. cmpsb
  1365. je .LStrCmp4
  1366. .LStrCmp3:
  1367. movzbl -1(%esi),%edx // Compare failing (or equal) position
  1368. movzbl -1(%edi),%ebx
  1369. .LStrCmp4:
  1370. movl %ebx,%eax // Compare length or position
  1371. subl %edx,%eax
  1372. movl saveedi,%edi
  1373. movl saveesi,%esi
  1374. movl saveebx,%ebx
  1375. end;
  1376. {$endif FPC_SYSTEM_HAS_FPC_SHORTSTR_COMPARE}
  1377. {$ifndef FPC_SYSTEM_HAS_FPC_PCHAR_TO_SHORTSTR}
  1378. {$define FPC_SYSTEM_HAS_FPC_PCHAR_TO_SHORTSTR}
  1379. procedure fpc_pchar_to_shortstr(out res : shortstring;p:PAnsiChar);assembler;[public,alias:'FPC_PCHAR_TO_SHORTSTR']; compilerproc;
  1380. var
  1381. saveres,saveebx,saveesi,saveedi : longint;
  1382. asm
  1383. {$ifdef FPC_PROFILE}
  1384. push %eax
  1385. push %edx
  1386. push %ecx
  1387. call mcount
  1388. pop %ecx
  1389. pop %edx
  1390. pop %eax
  1391. {$endif FPC_PROFILE}
  1392. movl %ebx,saveebx
  1393. movl %esi,saveesi
  1394. movl %edi,saveedi
  1395. movl %ecx,%esi
  1396. movl %eax,%edi
  1397. movl %edi,saveres
  1398. movl $1,%ecx
  1399. testl %esi,%esi
  1400. movl %esi,%eax
  1401. jz .LStrPasDone
  1402. leal 3(%esi),%edx
  1403. andl $-4,%edx
  1404. // skip length byte
  1405. incl %edi
  1406. subl %esi,%edx
  1407. jz .LStrPasAligned
  1408. // align source to multiple of 4 (not dest, because we can't read past
  1409. // the end of the source, since that may be past the end of the heap
  1410. // -> sigsegv!!)
  1411. .LStrPasAlignLoop:
  1412. movb (%esi),%al
  1413. incl %esi
  1414. testb %al,%al
  1415. jz .LStrPasDone
  1416. incl %edi
  1417. incb %cl
  1418. decb %dl
  1419. movb %al,-1(%edi)
  1420. jne .LStrPasAlignLoop
  1421. .balign 16
  1422. .LStrPasAligned:
  1423. movl (%esi),%ebx
  1424. addl $4,%edi
  1425. leal 0x0fefefeff(%ebx),%eax
  1426. movl %ebx,%edx
  1427. addl $4,%esi
  1428. notl %edx
  1429. andl %edx,%eax
  1430. addl $4,%ecx
  1431. andl $0x080808080,%eax
  1432. movl %ebx,-4(%edi)
  1433. jnz .LStrPasEndFound
  1434. cmpl $252,%ecx
  1435. ja .LStrPasPreEndLoop
  1436. jmp .LStrPasAligned
  1437. .LStrPasEndFound:
  1438. subl $4,%ecx
  1439. // this won't overwrite data since the result = 255 AnsiChar string
  1440. // and we never process more than the first 255 chars of p
  1441. shrl $8,%eax
  1442. jc .LStrPasDone
  1443. incl %ecx
  1444. shrl $8,%eax
  1445. jc .LStrPasDone
  1446. incl %ecx
  1447. shrl $8,%eax
  1448. jc .LStrPasDone
  1449. incl %ecx
  1450. jmp .LStrPasDone
  1451. .LStrPasPreEndLoop:
  1452. testb %cl,%cl
  1453. jz .LStrPasDone
  1454. movl (%esi),%eax
  1455. .LStrPasEndLoop:
  1456. testb %al,%al
  1457. jz .LStrPasDone
  1458. movb %al,(%edi)
  1459. shrl $8,%eax
  1460. incl %edi
  1461. incb %cl
  1462. jnz .LStrPasEndLoop
  1463. .LStrPasDone:
  1464. movl saveres,%edi
  1465. addb $255,%cl
  1466. movb %cl,(%edi)
  1467. movl saveesi,%esi
  1468. movl saveedi,%edi
  1469. movl saveebx,%ebx
  1470. end;
  1471. {$endif FPC_SYSTEM_HAS_FPC_PCHAR_TO_SHORTSTR}
  1472. {$IFNDEF INTERNAL_BACKTRACE}
  1473. {$define FPC_SYSTEM_HAS_GET_FRAME}
  1474. function get_frame:pointer;assembler;nostackframe;{$ifdef SYSTEMINLINE}inline;{$endif}
  1475. asm
  1476. movl %ebp,%eax
  1477. end;
  1478. {$ENDIF not INTERNAL_BACKTRACE}
  1479. {$define FPC_SYSTEM_HAS_GET_PC_ADDR}
  1480. Function Get_pc_addr : Pointer;assembler;nostackframe;
  1481. asm
  1482. movl (%esp),%eax
  1483. end;
  1484. {$define FPC_SYSTEM_HAS_GET_CALLER_ADDR}
  1485. function get_caller_addr(framebp:pointer;addr:pointer=nil):pointer;
  1486. {$if defined(win32)}
  1487. { Windows has StackTop always properly set }
  1488. begin
  1489. if assigned(framebp) and (framebp<=StackTop) and (framebp>=Sptr) then
  1490. Result:=PPointer(framebp+4)^
  1491. else
  1492. Result:=nil;
  1493. end;
  1494. {$else defined(win32)}
  1495. nostackframe;assembler;
  1496. asm
  1497. orl %eax,%eax
  1498. jz .Lg_a_null
  1499. movl 4(%eax),%eax
  1500. .Lg_a_null:
  1501. end;
  1502. {$endif defined(win32)}
  1503. {$define FPC_SYSTEM_HAS_GET_CALLER_FRAME}
  1504. function get_caller_frame(framebp:pointer;addr:pointer=nil):pointer;
  1505. {$if defined(win32)}
  1506. { Windows has StackTop always properly set }
  1507. begin
  1508. if assigned(framebp) and (framebp<=StackTop) and (framebp>=Sptr) then
  1509. Result:=PPointer(framebp)^
  1510. else
  1511. Result:=nil;
  1512. end;
  1513. {$else defined(win32)}
  1514. nostackframe;assembler;
  1515. asm
  1516. orl %eax,%eax
  1517. jz .Lgnf_null
  1518. movl (%eax),%eax
  1519. .Lgnf_null:
  1520. end;
  1521. {$endif defined(win32)}
  1522. {$define FPC_SYSTEM_HAS_SPTR}
  1523. Function Sptr : Pointer;assembler;nostackframe;
  1524. asm
  1525. movl %esp,%eax
  1526. end;
  1527. {****************************************************************************
  1528. Str()
  1529. ****************************************************************************}
  1530. {$if defined(disabled) and defined(regcall) }
  1531. {$define FPC_SYSTEM_HAS_INT_STR_LONGWORD}
  1532. {$define FPC_SYSTEM_HAS_INT_STR_LONGINT}
  1533. label str_int_shortcut;
  1534. procedure int_str(l:longword;out s:shortstring);assembler;nostackframe;
  1535. asm
  1536. pushl %esi
  1537. pushl %edi
  1538. pushl %ebx
  1539. mov %edx,%edi
  1540. xor %edx,%edx
  1541. jmp str_int_shortcut
  1542. end;
  1543. procedure int_str(l:longint;out s:shortstring);assembler;nostackframe;
  1544. {Optimized for speed, but balanced with size.}
  1545. const digits:array[0..9] of cardinal=(0,10,100,1000,10000,
  1546. 100000,1000000,10000000,
  1547. 100000000,1000000000);
  1548. asm
  1549. {$ifdef FPC_PROFILE}
  1550. push %eax
  1551. push %edx
  1552. push %ecx
  1553. call mcount
  1554. pop %ecx
  1555. pop %edx
  1556. pop %eax
  1557. {$endif FPC_PROFILE}
  1558. push %esi
  1559. push %edi
  1560. push %ebx
  1561. movl %edx,%edi
  1562. { Calculate absolute value and put sign in edx}
  1563. cltd
  1564. xorl %edx,%eax
  1565. subl %edx,%eax
  1566. negl %edx
  1567. str_int_shortcut:
  1568. movl %ecx,%esi
  1569. {Calculate amount of digits in ecx.}
  1570. xorl %ecx,%ecx
  1571. bsrl %eax,%ecx
  1572. incl %ecx
  1573. imul $1233,%ecx
  1574. shr $12,%ecx
  1575. {$ifdef FPC_PIC}
  1576. call fpc_geteipasebx
  1577. {$ifdef darwin}
  1578. movl digits-.Lpic(%ebx),%ebx
  1579. {$else}
  1580. addl $_GLOBAL_OFFSET_TABLE_,%ebx
  1581. movl digits@GOT(%ebx),%ebx
  1582. {$endif}
  1583. cmpl (%ebx,%ecx,4),%eax
  1584. {$else}
  1585. cmpl digits(,%ecx,4),%eax
  1586. {$endif}
  1587. cmc
  1588. adcl $0,%ecx {Nr. digits ready in ecx.}
  1589. {Write length & sign.}
  1590. lea (%edx,%ecx),%ebx
  1591. movb $45,%bh {movb $'-,%bh Not supported by our ATT reader.}
  1592. movw %bx,(%edi)
  1593. addl %edx,%edi
  1594. subl %edx,%esi
  1595. {Skip digits beyond string length.}
  1596. movl %eax,%edx
  1597. subl %ecx,%esi
  1598. jae .Lloop_write
  1599. .balign 4
  1600. .Lloop_skip:
  1601. movl $0xcccccccd,%eax {Divide by 10 using mul+shr}
  1602. mull %edx
  1603. shrl $3,%edx
  1604. decl %ecx
  1605. jz .Ldone {If (l<0) and (high(s)=1) this jump is taken.}
  1606. incl %esi
  1607. jnz .Lloop_skip
  1608. {Write out digits.}
  1609. .balign 4
  1610. .Lloop_write:
  1611. movl $0xcccccccd,%eax {Divide by 10 using mul+shr}
  1612. {Pre-add '0'}
  1613. leal 48(%edx),%ebx {leal $'0(,%edx),%ebx Not supported by our ATT reader.}
  1614. mull %edx
  1615. shrl $3,%edx
  1616. leal (%edx,%edx,8),%eax {x mod 10 = x-10*(x div 10)}
  1617. subl %edx,%ebx
  1618. subl %eax,%ebx
  1619. movb %bl,(%edi,%ecx)
  1620. decl %ecx
  1621. jnz .Lloop_write
  1622. .Ldone:
  1623. popl %ebx
  1624. popl %edi
  1625. popl %esi
  1626. end;
  1627. {$endif}
  1628. {****************************************************************************
  1629. Bounds Check
  1630. ****************************************************************************}
  1631. { do a thread-safe inc/dec }
  1632. {$define FPC_SYSTEM_HAS_DECLOCKED_LONGINT}
  1633. function cpudeclocked(var l : longint) : boolean;assembler;nostackframe;
  1634. asm
  1635. lock
  1636. decl (%eax)
  1637. setzb %al
  1638. end;
  1639. {$define FPC_SYSTEM_HAS_INCLOCKED_LONGINT}
  1640. procedure cpuinclocked(var l : longint);assembler;nostackframe;
  1641. asm
  1642. lock
  1643. incl (%eax)
  1644. end;
  1645. // inline SMP check and normal lock.
  1646. // the locked one is so slow, inlining doesn't matter.
  1647. function declocked(var l : longint) : boolean; inline;
  1648. begin
  1649. if not ismultithread then
  1650. begin
  1651. dec(l);
  1652. declocked:=l=0;
  1653. end
  1654. else
  1655. declocked:=cpudeclocked(l);
  1656. end;
  1657. procedure inclocked(var l : longint); inline;
  1658. begin
  1659. if not ismultithread then
  1660. inc(l)
  1661. else
  1662. cpuinclocked(l);
  1663. end;
  1664. function InterLockedDecrement (var Target: longint) : longint; assembler;
  1665. asm
  1666. movl $-1,%edx
  1667. xchgl %edx,%eax
  1668. lock
  1669. xaddl %eax, (%edx)
  1670. decl %eax
  1671. end;
  1672. function InterLockedIncrement (var Target: longint) : longint; assembler;
  1673. asm
  1674. movl $1,%edx
  1675. xchgl %edx,%eax
  1676. lock
  1677. xaddl %eax, (%edx)
  1678. incl %eax
  1679. end;
  1680. function InterLockedExchange (var Target: longint;Source : longint) : longint; assembler;
  1681. asm
  1682. xchgl (%eax),%edx
  1683. movl %edx,%eax
  1684. end;
  1685. function InterLockedExchangeAdd (var Target: longint;Source : longint) : longint; assembler;
  1686. asm
  1687. xchgl %eax,%edx
  1688. lock
  1689. xaddl %eax, (%edx)
  1690. end;
  1691. function InterlockedCompareExchange(var Target: longint; NewValue: longint; Comperand: longint): longint; assembler;
  1692. asm
  1693. xchgl %eax,%ecx
  1694. lock
  1695. cmpxchgl %edx, (%ecx)
  1696. end;
  1697. function InterlockedCompareExchange64(var Target: int64; NewValue: int64; Comperand: int64): int64; assembler;
  1698. asm
  1699. pushl %ebx
  1700. pushl %edi
  1701. movl %eax,%edi
  1702. movl Comperand+4,%edx
  1703. movl Comperand+0,%eax
  1704. movl NewValue+4,%ecx
  1705. movl NewValue+0,%ebx
  1706. lock cmpxchg8b (%edi)
  1707. pop %edi
  1708. pop %ebx
  1709. end;
  1710. {****************************************************************************
  1711. FPU
  1712. ****************************************************************************}
  1713. const
  1714. { Internal constants for use in system unit }
  1715. FPU_Invalid = 1;
  1716. FPU_Denormal = 2;
  1717. FPU_DivisionByZero = 4;
  1718. FPU_Overflow = 8;
  1719. FPU_Underflow = $10;
  1720. FPU_StackUnderflow = $20;
  1721. FPU_StackOverflow = $40;
  1722. FPU_ExceptionMask = $ff;
  1723. MM_Invalid = 1;
  1724. MM_Denormal = 2;
  1725. MM_DivisionByZero = 4;
  1726. MM_Overflow = 8;
  1727. MM_Underflow = $10;
  1728. MM_Precicion = $20;
  1729. MM_ExceptionMask = $3f;
  1730. MM_MaskInvalidOp = %0000000010000000;
  1731. MM_MaskDenorm = %0000000100000000;
  1732. MM_MaskDivZero = %0000001000000000;
  1733. MM_MaskOverflow = %0000010000000000;
  1734. MM_MaskUnderflow = %0000100000000000;
  1735. MM_MaskPrecision = %0001000000000000;
  1736. {$define FPC_SYSTEM_HAS_SYSINITFPU}
  1737. Procedure SysInitFPU;
  1738. begin
  1739. end;
  1740. {$define FPC_SYSTEM_HAS_SYSRESETFPU}
  1741. Procedure SysResetFPU;
  1742. var
  1743. { these locals are so we don't have to hack pic code in the assembler }
  1744. localmxcsr: dword;
  1745. localfpucw: word;
  1746. begin
  1747. localfpucw:=Default8087CW;
  1748. asm
  1749. fninit
  1750. fwait
  1751. fldcw localfpucw
  1752. end;
  1753. if has_sse_support then
  1754. begin
  1755. localmxcsr:=DefaultMXCSR;
  1756. asm
  1757. { setup sse exceptions }
  1758. {$ifndef OLD_ASSEMBLER}
  1759. ldmxcsr localmxcsr
  1760. {$else OLD_ASSEMBLER}
  1761. mov localmxcsr,%eax
  1762. subl $4,%esp
  1763. mov %eax,(%esp)
  1764. //ldmxcsr (%esp)
  1765. .byte 0x0f,0xae,0x14,0x24
  1766. addl $4,%esp
  1767. {$endif OLD_ASSEMBLER}
  1768. end;
  1769. end;
  1770. end;
  1771. { because of the brain dead sse detection on x86, this test is post poned }
  1772. procedure fpc_cpucodeinit;
  1773. var
  1774. _ecx,_edx : longint;
  1775. begin
  1776. if cpuid_support then
  1777. begin
  1778. asm
  1779. movl $1,%eax
  1780. cpuid
  1781. movl %edx,_edx
  1782. movl %ecx,_ecx
  1783. end ['ebx'];
  1784. has_mmx_support:=(_edx and $800000)<>0;
  1785. if ((_edx and $2000000)<>0) then
  1786. begin
  1787. os_supports_sse:=true;
  1788. sse_check:=true;
  1789. asm
  1790. { force an sse exception if no sse is supported, the exception handler sets
  1791. os_supports_sse to false then }
  1792. { don't change this instruction, the code above depends on its size }
  1793. {$ifdef OLD_ASSEMBLER}
  1794. .byte 0x0f,0x28,0xf7
  1795. {$else}
  1796. movaps %xmm7, %xmm6
  1797. {$endif not EMX}
  1798. end;
  1799. sse_check:=false;
  1800. has_sse_support:=os_supports_sse;
  1801. end;
  1802. if has_sse_support then
  1803. begin
  1804. has_sse2_support:=((_edx and $4000000)<>0);
  1805. has_sse3_support:=((_ecx and $200)<>0);
  1806. end;
  1807. end;
  1808. { don't let libraries influence the FPU cw set by the host program }
  1809. if IsLibrary then
  1810. begin
  1811. Default8087CW:=Get8087CW;
  1812. if has_sse_support then
  1813. DefaultMXCSR:=GetMXCSR;
  1814. end;
  1815. SysResetFPU;
  1816. {$ifdef USE_FASTMOVE}
  1817. setup_fastmove;
  1818. {$endif}
  1819. end;
  1820. {$if not defined(darwin) and defined(regcall) }
  1821. { darwin requires that the stack is aligned to 16 bytes when calling another function }
  1822. {$ifdef FPC_HAS_FEATURE_ANSISTRINGS}
  1823. {$define FPC_SYSTEM_HAS_ANSISTR_DECR_REF}
  1824. Procedure fpc_AnsiStr_Decr_Ref (Var S : Pointer); [Public,Alias:'FPC_ANSISTR_DECR_REF']; compilerproc; nostackframe; assembler;
  1825. asm
  1826. movl (%eax),%edx
  1827. testl %edx,%edx
  1828. jz .Lquit
  1829. movl $0,(%eax) // s:=nil
  1830. cmpl $0,-8(%edx) // exit if refcount<0
  1831. jl .Lquit
  1832. {$ifdef FPC_PIC}
  1833. call fpc_geteipasecx
  1834. addl $_GLOBAL_OFFSET_TABLE_,%ecx
  1835. movl ismultithread@GOT(%ecx),%ecx
  1836. cmpl $0,(%ecx)
  1837. {$else FPC_PIC}
  1838. cmpl $0,ismultithread
  1839. {$endif FPC_PIC}
  1840. je .Lskiplock
  1841. .byte 0xF0 // LOCK prefix, jumped over if IsMultiThread = false. FPC assembler does not accept disjoint LOCK mnemonic.
  1842. .Lskiplock:
  1843. decl -8(%edx)
  1844. jz .Lfree
  1845. .Lquit:
  1846. ret
  1847. .Lfree:
  1848. leal -12(%edx),%eax // points to start of allocation
  1849. { freemem is not an assembler leaf function like fpc_geteipasecx, so it
  1850. needs to be called with proper stack alignment }
  1851. {$ifdef FPC_SYSTEM_STACKALIGNMENT16}
  1852. leal -12(%esp),%esp
  1853. call FPC_FREEMEM
  1854. leal 12(%esp),%esp
  1855. {$else FPC_SYSTEM_STACKALIGNMENT16}
  1856. jmp FPC_FREEMEM // can perform a tail call
  1857. {$endif FPC_SYSTEM_STACKALIGNMENT16}
  1858. end;
  1859. function fpc_truely_ansistr_unique(Var S : Pointer): Pointer; forward;
  1860. {$define FPC_SYSTEM_HAS_ANSISTR_UNIQUE}
  1861. Function fpc_ansistr_Unique(Var S : Pointer): Pointer; [Public,Alias : 'FPC_ANSISTR_UNIQUE']; compilerproc; nostackframe;assembler;
  1862. asm
  1863. // Var S located in register
  1864. // Var $result located in register
  1865. movl %eax,%edx
  1866. // [437] pointer(result) := pointer(s);
  1867. movl (%eax),%eax
  1868. // [438] If Pointer(S)=Nil then
  1869. testl %eax,%eax
  1870. je .Lj4031
  1871. .Lj4036:
  1872. // [440] if PAnsiRec(Pointer(S)-Firstoff)^.Ref<>1 then
  1873. movl -8(%eax),%ecx
  1874. cmpl $1,%ecx
  1875. je .Lj4038
  1876. // [441] result:=fpc_truely_ansistr_unique(s);
  1877. movl %edx,%eax
  1878. {$ifdef FPC_SYSTEM_STACKALIGNMENT16}
  1879. leal -12(%esp),%esp
  1880. {$endif FPC_SYSTEM_STACKALIGNMENT16}
  1881. call fpc_truely_ansistr_unique
  1882. {$ifdef FPC_SYSTEM_STACKALIGNMENT16}
  1883. leal 12(%esp),%esp
  1884. {$endif FPC_SYSTEM_STACKALIGNMENT16}
  1885. .Lj4038:
  1886. .Lj4031:
  1887. // [442] end;
  1888. end;
  1889. {$endif FPC_HAS_FEATURE_ANSISTRINGS}
  1890. {$endif ndef darwin and defined(regcall) }
  1891. {$ifndef FPC_SYSTEM_HAS_MEM_BARRIER}
  1892. {$define FPC_SYSTEM_HAS_MEM_BARRIER}
  1893. procedure ReadBarrier;assembler;nostackframe;
  1894. asm
  1895. {$ifdef CPUX86_HAS_SSE2}
  1896. lfence
  1897. {$else CPUX86_HAS_SSE2}
  1898. lock
  1899. addl $0,0(%esp)
  1900. {$endif CPUX86_HAS_SSE2}
  1901. end;
  1902. procedure ReadDependencyBarrier;{$ifdef SYSTEMINLINE}inline;{$endif}
  1903. begin
  1904. { reads imply barrier on earlier reads depended on }
  1905. end;
  1906. procedure ReadWriteBarrier;assembler;nostackframe;
  1907. asm
  1908. {$ifdef CPUX86_HAS_SSE2}
  1909. mfence
  1910. {$else CPUX86_HAS_SSE2}
  1911. lock
  1912. addl $0,0(%esp)
  1913. {$endif CPUX86_HAS_SSE2}
  1914. end;
  1915. procedure WriteBarrier;assembler;nostackframe;
  1916. asm
  1917. {$ifdef CPUX86_HAS_SSEUNIT}
  1918. sfence
  1919. {$endif CPUX86_HAS_SSEUNIT}
  1920. end;
  1921. {$endif}
  1922. {$ifndef FPC_SYSTEM_HAS_BSF_QWORD}
  1923. {$define FPC_SYSTEM_HAS_BSF_QWORD}
  1924. function BsfQWord(Const AValue : QWord): cardinal; assembler; nostackframe;
  1925. asm
  1926. bsfl 4(%esp),%eax
  1927. jnz .L2
  1928. .L1:
  1929. bsfl 8(%esp),%eax
  1930. jnz .L3
  1931. movl $223,%eax
  1932. .L3:
  1933. addl $32,%eax
  1934. .L2:
  1935. end;
  1936. {$endif FPC_SYSTEM_HAS_BSF_QWORD}
  1937. {$ifndef FPC_SYSTEM_HAS_BSR_QWORD}
  1938. {$define FPC_SYSTEM_HAS_BSR_QWORD}
  1939. function BsrQWord(Const AValue : QWord): cardinal; assembler; nostackframe;
  1940. asm
  1941. bsrl 8(%esp),%eax
  1942. jz .L1
  1943. add $32,%eax
  1944. jmp .L2
  1945. .L1:
  1946. bsrl 4(%esp),%eax
  1947. jnz .L2
  1948. movl $255,%eax
  1949. .L2:
  1950. end;
  1951. {$endif FPC_SYSTEM_HAS_BSR_QWORD}
  1952. {$ifndef FPC_SYSTEM_HAS_SAR_QWORD}
  1953. {$define FPC_SYSTEM_HAS_SAR_QWORD}
  1954. function fpc_SarInt64(Const AValue : Int64;const Shift : Byte): Int64; [Public,Alias:'FPC_SARINT64']; compilerproc; assembler; nostackframe;
  1955. asm
  1956. movb %al,%cl
  1957. movl 8(%esp),%edx
  1958. movl 4(%esp),%eax
  1959. andb $63,%cl
  1960. cmpb $32,%cl
  1961. jnb .L1
  1962. shrdl %cl,%edx,%eax
  1963. sarl %cl,%edx
  1964. jmp .Lexit
  1965. .L1:
  1966. movl %edx,%eax
  1967. sarl $31,%edx
  1968. andb $31,%cl
  1969. sarl %cl,%eax
  1970. .Lexit:
  1971. end;
  1972. {$endif FPC_SYSTEM_HAS_SAR_QWORD}