i386.inc 54 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546154715481549155015511552155315541555155615571558155915601561156215631564156515661567156815691570157115721573157415751576157715781579158015811582158315841585158615871588158915901591159215931594159515961597159815991600160116021603160416051606160716081609161016111612161316141615161616171618161916201621162216231624162516261627162816291630163116321633163416351636163716381639164016411642164316441645164616471648164916501651165216531654165516561657165816591660166116621663166416651666166716681669167016711672167316741675167616771678167916801681168216831684168516861687168816891690169116921693169416951696169716981699170017011702170317041705170617071708170917101711171217131714171517161717171817191720172117221723172417251726172717281729173017311732173317341735173617371738173917401741174217431744174517461747174817491750175117521753175417551756175717581759176017611762176317641765176617671768176917701771177217731774177517761777177817791780178117821783178417851786178717881789179017911792179317941795179617971798179918001801180218031804180518061807180818091810181118121813181418151816181718181819182018211822182318241825182618271828182918301831183218331834183518361837183818391840184118421843184418451846184718481849185018511852185318541855185618571858185918601861186218631864186518661867186818691870187118721873187418751876187718781879188018811882188318841885188618871888188918901891189218931894189518961897189818991900190119021903190419051906190719081909191019111912191319141915191619171918191919201921192219231924192519261927192819291930193119321933193419351936193719381939194019411942194319441945194619471948194919501951195219531954195519561957195819591960196119621963196419651966196719681969197019711972197319741975197619771978197919801981198219831984198519861987198819891990199119921993199419951996199719981999200020012002200320042005200620072008200920102011201220132014201520162017201820192020202120222023202420252026202720282029203020312032203320342035203620372038203920402041204220432044204520462047204820492050205120522053205420552056205720582059206020612062206320642065206620672068206920702071207220732074207520762077207820792080208120822083208420852086208720882089209020912092209320942095209620972098209921002101210221032104210521062107210821092110211121122113211421152116211721182119212021212122212321242125212621272128212921302131213221332134213521362137213821392140214121422143214421452146214721482149215021512152215321542155215621572158215921602161216221632164216521662167216821692170217121722173217421752176
  1. {
  2. This file is part of the Free Pascal run time library.
  3. Copyright (c) 1999-2000 by the Free Pascal development team.
  4. Processor dependent implementation for the system unit for
  5. intel i386+
  6. See the file COPYING.FPC, included in this distribution,
  7. for details about the copyright.
  8. This program is distributed in the hope that it will be useful,
  9. but WITHOUT ANY WARRANTY; without even the implied warranty of
  10. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
  11. **********************************************************************}
  12. {$if not(defined(VER3_0)) and defined(linux)}
  13. {$define FPC_SYSTEM_STACKALIGNMENT16}
  14. {$endif not(defined(VER3_0)) and defined(linux)}
  15. {****************************************************************************
  16. Primitives
  17. ****************************************************************************}
  18. var
  19. os_supports_sse : boolean;
  20. { this variable is set to true, if currently an sse check is executed and no sig ill should be generated }
  21. sse_check : boolean;
  22. {$asmmode ATT}
  23. function cpuid_support : boolean;assembler;nostackframe;
  24. {
  25. Check if the ID-flag can be changed, if changed then CpuID is supported.
  26. Tested under go32v1 and Linux on c6x86 with CpuID enabled and disabled (PFV)
  27. }
  28. asm
  29. pushfl
  30. movl (%esp),%eax
  31. xorl $0x200000,%eax
  32. pushl %eax
  33. popfl
  34. pushfl
  35. popl %eax
  36. xorl (%esp),%eax
  37. popfl
  38. testl $0x200000,%eax
  39. setnz %al
  40. end;
  41. {$ifndef FPC_PIC}
  42. {$ifndef FPC_SYSTEM_HAS_MOVE}
  43. {$ifndef OLD_ASSEMBLER}
  44. {$define USE_FASTMOVE}
  45. {$i fastmove.inc}
  46. {$endif not OLD_ASSEMBLER}
  47. {$endif FPC_SYSTEM_HAS_MOVE}
  48. {$endif FPC_PIC}
  49. {$define FPC_SYSTEM_HAS_FPC_CPUINIT}
  50. procedure fpc_cpuinit;
  51. begin
  52. { because of the brain dead sse detection on x86, this test is post poned to fpc_cpucodeinit which
  53. must be implemented OS dependend (FK)
  54. has_sse_support:=sse_support;
  55. has_mmx_support:=mmx_support;
  56. setup_fastmove;
  57. }
  58. end;
  59. {$ifndef darwin}
  60. procedure fpc_geteipasebx; [public, alias: 'fpc_geteipasebx'];assembler; nostackframe;
  61. asm
  62. movl (%esp),%ebx
  63. end;
  64. procedure fpc_geteipasecx; [public, alias: 'fpc_geteipasecx'];assembler; nostackframe;
  65. asm
  66. movl (%esp),%ecx
  67. end;
  68. {$endif}
  69. {$ifndef FPC_SYSTEM_HAS_MOVE}
  70. {$define FPC_SYSTEM_HAS_MOVE}
  71. procedure Move(const source;var dest;count:SizeInt);[public, alias: 'FPC_MOVE'];assembler;
  72. var
  73. saveesi,saveedi : longint;
  74. asm
  75. movl %edi,saveedi
  76. movl %esi,saveesi
  77. movl %eax,%esi
  78. movl %edx,%edi
  79. movl %ecx,%edx
  80. movl %edi,%eax
  81. { check for zero or negative count }
  82. cmpl $0,%edx
  83. jle .LMoveEnd
  84. { Check for back or forward }
  85. sub %esi,%eax
  86. jz .LMoveEnd { Do nothing when source=dest }
  87. jc .LFMove { Do forward, dest<source }
  88. cmp %edx,%eax
  89. jb .LBMove { Dest is in range of move, do backward }
  90. { Forward Copy }
  91. .LFMove:
  92. {$ifdef FPC_ENABLED_CLD}
  93. cld
  94. {$endif FPC_ENABLED_CLD}
  95. cmpl $15,%edx
  96. jl .LFMove1
  97. movl %edi,%ecx { Align on 32bits }
  98. negl %ecx
  99. andl $3,%ecx
  100. subl %ecx,%edx
  101. rep
  102. movsb
  103. movl %edx,%ecx
  104. andl $3,%edx
  105. shrl $2,%ecx
  106. rep
  107. movsl
  108. .LFMove1:
  109. movl %edx,%ecx
  110. rep
  111. movsb
  112. jmp .LMoveEnd
  113. { Backward Copy }
  114. .LBMove:
  115. std
  116. addl %edx,%esi
  117. addl %edx,%edi
  118. movl %edi,%ecx
  119. decl %esi
  120. decl %edi
  121. cmpl $15,%edx
  122. jl .LBMove1
  123. negl %ecx { Align on 32bits }
  124. andl $3,%ecx
  125. subl %ecx,%edx
  126. rep
  127. movsb
  128. movl %edx,%ecx
  129. andl $3,%edx
  130. shrl $2,%ecx
  131. subl $3,%esi
  132. subl $3,%edi
  133. rep
  134. movsl
  135. addl $3,%esi
  136. addl $3,%edi
  137. .LBMove1:
  138. movl %edx,%ecx
  139. rep
  140. movsb
  141. cld
  142. .LMoveEnd:
  143. movl saveedi,%edi
  144. movl saveesi,%esi
  145. end;
  146. {$endif FPC_SYSTEM_HAS_MOVE}
  147. {$ifndef FPC_SYSTEM_HAS_FILLCHAR}
  148. {$define FPC_SYSTEM_HAS_FILLCHAR}
  149. Procedure FillChar(var x;count:SizeInt;value:byte);assembler; nostackframe;
  150. asm
  151. cmpl $22,%edx { empirically determined value on a Core 2 Duo Conroe }
  152. jg .LFillFull
  153. orl %edx,%edx
  154. jle .LFillZero
  155. .LFillLoop:
  156. movb %cl,(%eax)
  157. incl %eax
  158. decl %edx
  159. jne .LFillLoop
  160. .LFillZero:
  161. ret
  162. .LFillFull:
  163. {$ifdef FPC_ENABLED_CLD}
  164. cld
  165. {$endif FPC_ENABLED_CLD}
  166. push %edi
  167. movl %eax,%edi
  168. movzbl %cl,%eax
  169. movl %edx,%ecx
  170. imul $0x01010101,%eax { Expand al into a 4 subbytes of eax}
  171. shrl $2,%ecx
  172. andl $3,%edx
  173. rep
  174. stosl
  175. movl %edx,%ecx
  176. .LFill1:
  177. rep
  178. stosb
  179. .LFillEnd:
  180. pop %edi
  181. end;
  182. {$endif FPC_SYSTEM_HAS_FILLCHAR}
  183. {$ifndef FPC_SYSTEM_HAS_FILLWORD}
  184. {$define FPC_SYSTEM_HAS_FILLWORD}
  185. procedure fillword(var x;count : SizeInt;value : word);assembler;
  186. var
  187. saveedi : longint;
  188. asm
  189. movl %edi,saveedi
  190. movl %eax,%edi
  191. movzwl %cx,%eax
  192. movl %edx,%ecx
  193. { check for zero or negative count }
  194. cmpl $0,%ecx
  195. jle .LFillWordEnd
  196. movl %eax,%edx
  197. shll $16,%eax
  198. orl %edx,%eax
  199. movl %ecx,%edx
  200. shrl $1,%ecx
  201. {$ifdef FPC_ENABLED_CLD}
  202. cld
  203. {$endif FPC_ENABLED_CLD}
  204. rep
  205. stosl
  206. movl %edx,%ecx
  207. andl $1,%ecx
  208. rep
  209. stosw
  210. .LFillWordEnd:
  211. movl saveedi,%edi
  212. end;
  213. {$endif FPC_SYSTEM_HAS_FILLWORD}
  214. {$ifndef FPC_SYSTEM_HAS_FILLDWORD}
  215. {$define FPC_SYSTEM_HAS_FILLDWORD}
  216. procedure filldword(var x;count : SizeInt;value : dword);assembler;
  217. var
  218. saveedi : longint;
  219. asm
  220. movl %edi,saveedi
  221. movl %eax,%edi
  222. movl %ecx,%eax
  223. movl %edx,%ecx
  224. { check for zero or negative count }
  225. cmpl $0,%ecx
  226. jle .LFillDWordEnd
  227. {$ifdef FPC_ENABLED_CLD}
  228. cld
  229. {$endif FPC_ENABLED_CLD}
  230. rep
  231. stosl
  232. .LFillDWordEnd:
  233. movl saveedi,%edi
  234. end;
  235. {$endif FPC_SYSTEM_HAS_FILLDWORD}
  236. {$ifndef FPC_SYSTEM_HAS_INDEXBYTE}
  237. {$define FPC_SYSTEM_HAS_INDEXBYTE}
  238. function IndexByte_Plain(Const buf;len:SizeInt;b:byte):SizeInt; assembler; nostackframe;
  239. asm
  240. push %esi
  241. push %edi
  242. push %eax { save initial value of 'buf' }
  243. cmp $4,%edx { less than 4 bytes, just test byte by byte. }
  244. jb .Ltail
  245. mov %cl,%ch { prepare pattern }
  246. movzwl %cx,%esi
  247. shl $16,%ecx
  248. or %esi,%ecx
  249. .Lalignloop:
  250. test $3,%al { align to 4 bytes if necessary }
  251. je .Laligned
  252. cmp %cl,(%eax)
  253. je .Lexit
  254. inc %eax
  255. dec %edx
  256. jmp .Lalignloop
  257. .balign 16 { Main loop, unrolled 4 times for speed }
  258. .Lloop:
  259. mov (%eax),%esi { load dword }
  260. xor %ecx,%esi { XOR with pattern, bytes equal to target are now 0 }
  261. lea -0x01010101(%esi),%edi
  262. xor %esi,%edi { (x-0x01010101) xor x }
  263. not %esi
  264. and $0x80808080,%esi
  265. and %edi,%esi { ((x-0x01010101) xor x) and (not x) and 0x80808080 }
  266. jnz .Lfound { one of the bytes matches }
  267. mov 4(%eax),%esi
  268. xor %ecx,%esi
  269. lea -0x01010101(%esi),%edi
  270. xor %esi,%edi
  271. not %esi
  272. and $0x80808080,%esi
  273. and %edi,%esi
  274. jnz .Lfound4
  275. mov 8(%eax),%esi
  276. xor %ecx,%esi
  277. lea -0x01010101(%esi),%edi
  278. xor %esi,%edi
  279. not %esi
  280. and $0x80808080,%esi
  281. and %edi,%esi
  282. jnz .Lfound8
  283. mov 12(%eax),%esi
  284. xor %ecx,%esi
  285. lea -0x01010101(%esi),%edi
  286. xor %esi,%edi
  287. not %esi
  288. and $0x80808080,%esi
  289. and %edi,%esi
  290. jnz .Lfound12
  291. add $16,%eax
  292. .Laligned:
  293. sub $16,%edx
  294. jae .Lloop { Still more than 16 bytes remaining }
  295. { Process remaining bytes (<16 left at this point) }
  296. { length is offset by -16 at this point }
  297. .Lloop2:
  298. cmp $4-16,%edx { < 4 bytes left? }
  299. jb .Ltail
  300. mov (%eax),%esi
  301. xor %ecx,%esi
  302. lea -0x01010101(%esi),%edi
  303. xor %esi,%edi
  304. not %esi
  305. and $0x80808080,%esi
  306. and %edi,%esi
  307. jne .Lfound
  308. add $4,%eax
  309. sub $4,%edx
  310. jmp .Lloop2
  311. .Ltail: { Less than 4 bytes remaining, check one by one }
  312. and $3, %edx
  313. jz .Lnotfound
  314. .Lloop3:
  315. cmp %cl,(%eax)
  316. je .Lexit
  317. inc %eax
  318. dec %edx
  319. jnz .Lloop3
  320. .Lnotfound:
  321. or $-1,%eax
  322. jmp .Lexit1
  323. { add missing source pointer increments }
  324. .Lfound12:
  325. add $4,%eax
  326. .Lfound8:
  327. add $4,%eax
  328. .Lfound4:
  329. add $4,%eax
  330. .Lfound:
  331. test $0xff,%esi
  332. jnz .Lexit
  333. inc %eax
  334. test $0xff00,%esi
  335. jnz .Lexit
  336. inc %eax
  337. test $0xff0000,%esi
  338. jnz .Lexit
  339. inc %eax
  340. .Lexit:
  341. sub (%esp),%eax
  342. .Lexit1:
  343. pop %ecx { removes initial 'buf' value }
  344. pop %edi
  345. pop %esi
  346. end;
  347. function IndexByte_SSE2(const buf;len:SizeInt;b:byte):SizeInt; assembler; nostackframe;
  348. asm
  349. test %edx, %edx
  350. je .LNothing
  351. push %ebx
  352. mov %eax, %ebx
  353. and $-16, %eax
  354. pxor %xmm1, %xmm1
  355. movd %ecx, %xmm1
  356. punpcklbw %xmm1, %xmm1
  357. punpcklwd %xmm1, %xmm1
  358. pshufd $0, %xmm1, %xmm1
  359. lea 16(%eax), %ecx
  360. movdqa %xmm1, %xmm0
  361. pcmpeqb (%eax), %xmm0
  362. sub %ebx, %ecx
  363. pmovmskb %xmm0, %eax
  364. sal %cl, %eax
  365. xor %ax, %ax
  366. shr %cl, %eax
  367. jz .L16xAligned_Test
  368. sub $16, %ecx
  369. .LFound:
  370. bsf %eax, %eax
  371. add %ecx, %eax
  372. pop %ebx
  373. cmp %edx, %eax
  374. jnb .LNothing
  375. ret
  376. .balign 16
  377. .L16xAligned_Body:
  378. movdqa %xmm1, %xmm0
  379. pcmpeqb (%ebx,%ecx), %xmm0
  380. pmovmskb %xmm0, %eax
  381. test %eax, %eax
  382. jne .LFound
  383. add $16, %ecx
  384. .L16xAligned_Test:
  385. cmp %edx, %ecx
  386. jb .L16xAligned_Body
  387. pop %ebx
  388. .LNothing:
  389. mov $-1, %eax
  390. end;
  391. function IndexByte_Dispatch(const buf;len:SizeInt;b:byte):SizeInt; forward;
  392. var
  393. IndexByte_Impl: function(const buf;len:SizeInt;b:byte):SizeInt = @IndexByte_Dispatch;
  394. {$define has_i386_IndexByte_Impl} { used in assembler to manually inline IndexByte }
  395. function IndexByte_Dispatch(const buf;len:SizeInt;b:byte):SizeInt;
  396. begin
  397. if has_sse2_support then
  398. IndexByte_Impl:=@IndexByte_SSE2
  399. else
  400. IndexByte_Impl:=@IndexByte_Plain;
  401. result:=IndexByte_Impl(buf,len,b);
  402. end;
  403. function IndexByte(const buf;len:SizeInt;b:byte):SizeInt;
  404. begin
  405. result:=IndexByte_Impl(buf,len,b);
  406. end;
  407. {$endif FPC_SYSTEM_HAS_INDEXBYTE}
  408. {$ifndef FPC_SYSTEM_HAS_INDEXWORD}
  409. {$define FPC_SYSTEM_HAS_INDEXWORD}
  410. function IndexWord_Plain(Const buf;len:SizeInt;b:word):SizeInt; assembler; nostackframe;
  411. asm
  412. push %eax
  413. cmp $1073741823, %edx
  414. ja .LUnbounded
  415. lea (%eax,%edx,2), %edx
  416. cmp %edx, %eax
  417. je .LNotFound
  418. .LWordwise_Body:
  419. cmp %cx, (%eax)
  420. je .LFound
  421. add $2, %eax
  422. cmp %edx, %eax
  423. jne .LWordwise_Body
  424. .LNotFound:
  425. pop %eax
  426. mov $-1, %eax
  427. ret
  428. .LFound:
  429. pop %edx
  430. sub %edx, %eax
  431. shr $1, %eax
  432. ret
  433. .LUnbounded:
  434. mov %eax, %edx
  435. jmp .LWordwise_Body
  436. end;
  437. function IndexWord_SSE2(const buf;len:SizeInt;b:word):SizeInt; assembler; nostackframe;
  438. asm
  439. test %edx, %edx
  440. je .LInstantNothing
  441. push %edi
  442. movd %ecx, %xmm0
  443. push %esi
  444. mov %eax, %esi
  445. push %ebx
  446. and $-0x10, %esi
  447. punpcklwd %xmm0, %xmm0
  448. movdqa (%esi), %xmm2
  449. sub %eax, %esi
  450. mov %edx, %edi
  451. pshufd $0, %xmm0, %xmm0
  452. lea 16(%esi), %edx
  453. mov %eax, %ebx
  454. movdqa %xmm0, %xmm1
  455. mov %edx, %ecx
  456. test $1, %al
  457. jnz .LUnaligned
  458. pcmpeqw %xmm0, %xmm2
  459. pmovmskb %xmm2, %eax
  460. shl %cl, %eax
  461. xor %ax, %ax
  462. shr $1, %edx
  463. shr %cl, %eax
  464. jz .LLoopTest
  465. lea -8(%edx), %ecx
  466. .LMatch:
  467. bsf %eax, %eax
  468. shr $1, %eax
  469. add %ecx, %eax
  470. cmp %edi, %eax
  471. jnb .LNothing
  472. pop %ebx
  473. pop %esi
  474. pop %edi
  475. ret
  476. .balign 16
  477. .LLoop:
  478. movdqa (%ebx,%edx,2), %xmm0
  479. mov %edx, %ecx
  480. add $8, %edx
  481. pcmpeqw %xmm1, %xmm0
  482. pmovmskb %xmm0, %eax
  483. test %eax, %eax
  484. jne .LMatch
  485. .LLoopTest:
  486. cmp %edi, %edx
  487. jb .LLoop
  488. .LNothing:
  489. pop %ebx
  490. pop %esi
  491. pop %edi
  492. .LInstantNothing:
  493. mov $-1, %eax
  494. ret
  495. .LUnaligned:
  496. psllw $8, %xmm1
  497. add %edi, %edi
  498. psrlw $8, %xmm0
  499. por %xmm1, %xmm0
  500. pcmpeqb %xmm0, %xmm2
  501. movdqa %xmm0, %xmm1
  502. pmovmskb %xmm2, %eax
  503. shl %cl, %eax
  504. xor %ax, %ax
  505. shr %cl, %eax
  506. lea (%eax,%eax), %ecx
  507. and %ecx, %eax
  508. and $0x5555, %eax
  509. je .LUnalignedLoopTest
  510. .LUnalignedMatch:
  511. bsf %eax, %eax
  512. add %esi, %eax
  513. cmp %edi, %eax
  514. jnb .LNothing
  515. pop %ebx
  516. shr $1, %eax
  517. pop %esi
  518. pop %edi
  519. ret
  520. .balign 16
  521. .LUnalignedLoop:
  522. movdqa (%ebx,%edx), %xmm0
  523. shr $16, %ecx
  524. mov %edx, %esi
  525. add $16, %edx
  526. pcmpeqb %xmm1, %xmm0
  527. pmovmskb %xmm0, %eax
  528. add %eax, %eax
  529. or %eax, %ecx
  530. mov %ecx, %eax
  531. shr $1, %eax
  532. and %ecx, %eax
  533. and $0x5555, %eax
  534. jne .LUnalignedMatch
  535. .LUnalignedLoopTest:
  536. cmp %edi, %edx
  537. jb .LUnalignedLoop
  538. pop %ebx
  539. pop %esi
  540. pop %edi
  541. mov $-1, %eax
  542. end;
  543. function IndexWord_Dispatch(const buf;len:SizeInt;b:word):SizeInt; forward;
  544. var
  545. IndexWord_Impl: function(const buf;len:SizeInt;b:word):SizeInt = @IndexWord_Dispatch;
  546. function IndexWord_Dispatch(const buf;len:SizeInt;b:word):SizeInt;
  547. begin
  548. if has_sse2_support then
  549. IndexWord_Impl:=@IndexWord_SSE2
  550. else
  551. IndexWord_Impl:=@IndexWord_Plain;
  552. result:=IndexWord_Impl(buf,len,b);
  553. end;
  554. function IndexWord(const buf;len:SizeInt;b:word):SizeInt; inline;
  555. begin
  556. result:=IndexWord_Impl(buf,len,b);
  557. end;
  558. {$endif FPC_SYSTEM_HAS_INDEXWORD}
  559. {$ifndef FPC_SYSTEM_HAS_INDEXDWORD}
  560. {$define FPC_SYSTEM_HAS_INDEXDWORD}
  561. function IndexDWord_Plain(Const buf;len:SizeInt;b:DWord):SizeInt; assembler; nostackframe;
  562. asm
  563. push %eax
  564. cmp $536870911, %edx
  565. ja .LUnbounded
  566. lea (%eax,%edx,4), %edx
  567. cmp %edx, %eax
  568. je .LNotFound
  569. .LDWordwise_Body:
  570. cmp %ecx, (%eax)
  571. je .LFound
  572. add $4, %eax
  573. cmp %edx, %eax
  574. jne .LDWordwise_Body
  575. .LNotFound:
  576. pop %eax
  577. mov $-1, %eax
  578. ret
  579. .LFound:
  580. pop %edx
  581. sub %edx, %eax
  582. shr $2, %eax
  583. ret
  584. .LUnbounded:
  585. mov %eax, %edx
  586. jmp .LDWordwise_Body
  587. end;
  588. function IndexDWord_SSE2(const buf;len:SizeInt;b:DWord):SizeInt; assembler; nostackframe;
  589. asm
  590. push %esi
  591. lea (%eax,%edx,4), %esi
  592. push %ebx
  593. mov %eax, %ebx
  594. cmp $536870911, %edx
  595. ja .LUnbounded
  596. and $-4, %edx
  597. jz .LDWordwise_Test
  598. push %edi
  599. shl $2, %edx
  600. movd %ecx, %xmm2
  601. add %eax, %edx
  602. pshufd $0, %xmm2, %xmm1
  603. .balign 16
  604. .L4x_Body:
  605. movdqu (%eax), %xmm0
  606. pcmpeqd %xmm1, %xmm0
  607. pmovmskb %xmm0, %edi
  608. test %edi, %edi
  609. jnz .L4x_Found
  610. .L4x_Next:
  611. add $16, %eax
  612. cmp %eax, %edx
  613. jne .L4x_Body
  614. cmp %esi, %eax
  615. je .LNothing
  616. lea -16(%esi), %eax
  617. movdqu (%eax), %xmm0
  618. pcmpeqd %xmm1, %xmm0
  619. pmovmskb %xmm0, %edi
  620. test %edi, %edi
  621. jnz .L4x_Found
  622. .LNothing:
  623. pop %edi
  624. pop %ebx
  625. pop %esi
  626. mov $-1, %eax
  627. ret
  628. .balign 16
  629. .L4x_Found:
  630. bsf %edi, %edi
  631. add %edi, %eax
  632. pop %edi
  633. .LDWordwise_Found:
  634. sub %ebx, %eax
  635. shr $2, %eax
  636. pop %ebx
  637. pop %esi
  638. ret
  639. .balign 16
  640. .LDWordwise_Body:
  641. cmp %ecx, (%eax)
  642. je .LDWordwise_Found
  643. add $4, %eax
  644. .LDWordwise_Test:
  645. cmp %esi, %eax
  646. jne .LDWordwise_Body
  647. mov $-1, %eax
  648. pop %ebx
  649. pop %esi
  650. ret
  651. .LUnbounded:
  652. mov %eax, %esi
  653. jmp .LDWordwise_Body
  654. end;
  655. function IndexDWord_Dispatch(const buf;len:SizeInt;b:DWord):SizeInt; forward;
  656. var
  657. IndexDWord_Impl: function(const buf;len:SizeInt;b:DWord):SizeInt = @IndexDWord_Dispatch;
  658. function IndexDWord_Dispatch(const buf;len:SizeInt;b:DWord):SizeInt;
  659. begin
  660. if has_sse2_support then
  661. IndexDWord_Impl:=@IndexDWord_SSE2
  662. else
  663. IndexDWord_Impl:=@IndexDWord_Plain;
  664. result:=IndexDWord_Impl(buf,len,b);
  665. end;
  666. function IndexDWord(const buf;len:SizeInt;b:DWord):SizeInt;
  667. begin
  668. result:=IndexDWord_Impl(buf,len,b);
  669. end;
  670. {$endif FPC_SYSTEM_HAS_INDEXDWORD}
  671. {$ifndef FPC_SYSTEM_HAS_COMPAREBYTE}
  672. {$define FPC_SYSTEM_HAS_COMPAREBYTE}
  673. function CompareByte_Plain(Const buf1,buf2;len:SizeInt):SizeInt; assembler; nostackframe;
  674. asm
  675. sub %eax, %edx
  676. cmp $6, %ecx
  677. push %esi
  678. lea (%eax,%ecx), %esi
  679. jle .LBytewiseTail_Prepare
  680. push %ebx
  681. lea 3(%eax), %ebx
  682. and $-4, %ebx
  683. cmp %ebx, %eax
  684. jne .LBytewiseHead_Body
  685. .L4x_Prepare:
  686. mov %esi, %eax
  687. and $-4, %eax
  688. jmp .L4x_Body
  689. .balign 16
  690. .L4x_Next:
  691. add $4, %ebx
  692. cmp %ebx, %eax
  693. je .LBytewiseTail_PrepareFromHeadAnd4x
  694. .L4x_Body:
  695. mov (%ebx,%edx), %ecx
  696. cmp %ecx, (%ebx)
  697. je .L4x_Next
  698. mov (%ebx), %eax
  699. {$ifdef CPUX86_HAS_BSWAP}
  700. bswap %ecx
  701. {$else}
  702. rol $8, %cx
  703. rol $16, %ecx
  704. rol $8, %cx
  705. {$endif}
  706. pop %ebx
  707. pop %esi
  708. {$ifdef CPUX86_HAS_BSWAP}
  709. bswap %eax
  710. {$else}
  711. rol $8, %ax
  712. rol $16, %eax
  713. rol $8, %ax
  714. {$endif}
  715. cmp %eax, %ecx
  716. sbb %eax, %eax
  717. and $2, %eax
  718. sub $1, %eax
  719. ret
  720. .LBytewiseHead_Next:
  721. add $1, %eax
  722. cmp %eax, %ebx
  723. je .L4x_Prepare
  724. .LBytewiseHead_Body:
  725. movzbl (%eax,%edx), %ecx
  726. cmp (%eax), %cl
  727. je .LBytewiseHead_Next
  728. pop %ebx
  729. jmp .LBytesDiffer
  730. .LBytewiseTail_PrepareFromHeadAnd4x:
  731. pop %ebx
  732. .LBytewiseTail_Prepare:
  733. cmp %esi, %eax
  734. jne .LBytewiseTail_Body
  735. .LNothingFound:
  736. xor %eax, %eax
  737. pop %esi
  738. ret
  739. .LBytewiseTail_Next:
  740. add $1, %eax
  741. cmp %eax, %esi
  742. je .LNothingFound
  743. .LBytewiseTail_Body:
  744. movzbl (%eax,%edx), %ecx
  745. cmp (%eax), %cl
  746. je .LBytewiseTail_Next
  747. .LBytesDiffer:
  748. movzbl (%eax), %eax
  749. pop %esi
  750. sub %ecx, %eax
  751. end;
  752. function CompareByte_SSE2(const buf1, buf2; len: SizeInt): SizeInt; assembler; nostackframe;
  753. asm
  754. cmp $3, %ecx
  755. push %esi
  756. lea (%eax,%ecx), %esi { esi = buf1 end }
  757. jle .LBytewise_Test
  758. push %ebx
  759. and $-16, %ecx
  760. lea (%eax,%ecx), %ebx { ebx = end of full XMMs in buf1 }
  761. cmp %ebx, %eax
  762. jne .L16x_Body
  763. lea 15(%ebx), %eax { check if tails don't cross page boundaries and can be over-read to XMMs }
  764. lea 15(%edx), %ecx
  765. xor %ebx, %eax
  766. xor %edx, %ecx
  767. or %ecx, %eax
  768. cmp $4095, %eax
  769. ja .LCantOverReadBoth
  770. movdqu (%ebx), %xmm0
  771. movdqu (%edx), %xmm2
  772. pcmpeqb %xmm2, %xmm0
  773. pmovmskb %xmm0, %eax
  774. xor $65535, %eax
  775. jz .LReturnEAX
  776. bsf %eax, %ecx
  777. add %ecx, %ebx
  778. cmp %esi, %ebx { ignore over-read garbage bytes }
  779. jnb .L16x_Nothing
  780. movzbl (%ebx), %eax
  781. movzbl (%edx,%ecx), %edx
  782. sub %edx, %eax
  783. .LReturnEAX:
  784. pop %ebx
  785. pop %esi
  786. ret
  787. .balign 16
  788. .L16x_Body:
  789. movdqu (%edx), %xmm0
  790. movdqu (%eax), %xmm1
  791. pcmpeqb %xmm1, %xmm0
  792. pmovmskb %xmm0, %ecx
  793. xor $65535, %ecx
  794. jnz .L16x_Found
  795. add $16, %eax
  796. add $16, %edx
  797. cmp %eax, %ebx
  798. jne .L16x_Body
  799. cmp %ebx, %esi
  800. je .L16x_Nothing
  801. sub %eax, %edx
  802. lea -16(%esi), %eax
  803. add %eax, %edx
  804. movdqu (%edx), %xmm0
  805. movdqu (%eax), %xmm1
  806. pcmpeqb %xmm1, %xmm0
  807. pmovmskb %xmm0, %ecx
  808. xor $65535, %ecx
  809. jnz .L16x_Found
  810. .L16x_Nothing:
  811. pop %ebx
  812. xor %eax, %eax
  813. pop %esi
  814. ret
  815. .balign 16
  816. .L16x_Found:
  817. bsf %ecx, %ecx
  818. pop %ebx
  819. movzbl (%eax,%ecx), %eax
  820. movzbl (%edx,%ecx), %edx
  821. pop %esi
  822. sub %edx, %eax
  823. ret
  824. .LCantOverReadBoth:
  825. mov %esi, %eax
  826. sub %ebx, %eax
  827. and $-4, %eax
  828. add %ebx, %eax
  829. cmp %eax, %ebx
  830. je .LPopEbxAndGoBytewise
  831. .L4x_Body:
  832. mov (%ebx), %ecx
  833. cmp (%edx), %ecx
  834. jne .L4x_Found
  835. add $4, %ebx
  836. add $4, %edx
  837. cmp %ebx, %eax
  838. jne .L4x_Body
  839. .LPopEbxAndGoBytewise:
  840. pop %ebx
  841. .LBytewise_Test:
  842. cmp %esi, %eax
  843. je .LBytewise_Nothing
  844. .LBytewise_Body:
  845. movzbl (%edx), %ecx
  846. cmp (%eax), %cl
  847. jne .LDoSbb
  848. add $1, %eax
  849. add $1, %edx
  850. cmp %esi, %eax
  851. jne .LBytewise_Body
  852. .LBytewise_Nothing:
  853. xor %eax, %eax
  854. pop %esi
  855. ret
  856. .L4x_Found:
  857. mov (%edx), %eax
  858. bswap %ecx
  859. bswap %eax
  860. cmp %ecx, %eax
  861. pop %ebx
  862. .LDoSbb:
  863. sbb %eax, %eax
  864. and $2, %eax
  865. sub $1, %eax
  866. pop %esi
  867. end;
  868. function CompareByte_Dispatch(const buf1, buf2; len: SizeInt): SizeInt; forward;
  869. var
  870. CompareByte_Impl: function(const buf1, buf2; len: SizeInt): SizeInt = @CompareByte_Dispatch;
  871. function CompareByte_Dispatch(const buf1, buf2; len: SizeInt): SizeInt;
  872. begin
  873. if has_sse2_support then
  874. CompareByte_Impl:=@CompareByte_SSE2
  875. else
  876. CompareByte_Impl:=@CompareByte_Plain;
  877. result:=CompareByte_Impl(buf1, buf2, len);
  878. end;
  879. function CompareByte(const buf1, buf2; len: SizeInt): SizeInt;
  880. begin
  881. result:=CompareByte_Impl(buf1, buf2, len);
  882. end;
  883. {$endif FPC_SYSTEM_HAS_COMPAREBYTE}
  884. {$ifndef FPC_SYSTEM_HAS_COMPAREWORD}
  885. {$define FPC_SYSTEM_HAS_COMPAREWORD}
  886. function CompareWord_Plain(Const buf1,buf2;len:SizeInt):SizeInt; assembler; nostackframe;
  887. asm
  888. sub %eax, %edx
  889. push %esi
  890. cmp $1073741823, %ecx
  891. ja .LUnbounded
  892. cmp $3, %ecx
  893. lea (%eax,%ecx,2), %esi
  894. jle .LWordwise_Test
  895. push %ebx
  896. test $3, %al
  897. je .LPtrUintWise_Prepare
  898. movzwl (%edx,%eax), %ebx
  899. cmp (%eax), %bx
  900. jne .LPopEbxAndDoSbb
  901. add $2, %eax
  902. sub $1, %ecx
  903. .LPtrUintWise_Prepare:
  904. and $-2, %ecx
  905. lea (%eax,%ecx,2), %ecx
  906. .balign 16
  907. .LPtrUintWise_Next:
  908. mov (%edx,%eax), %ebx
  909. cmp (%eax), %ebx
  910. jne .LPtrUintsDiffer
  911. add $4, %eax
  912. cmp %eax, %ecx
  913. jne .LPtrUintWise_Next
  914. pop %ebx
  915. .LWordwise_Test:
  916. cmp %esi, %eax
  917. je .LNothingFound
  918. .LWordwise_Body:
  919. movzwl (%edx,%eax), %ecx
  920. cmp (%eax), %cx
  921. jne .LDoSbb
  922. add $2, %eax
  923. cmp %esi, %eax
  924. jne .LWordwise_Body
  925. .LNothingFound:
  926. xor %eax, %eax
  927. pop %esi
  928. ret
  929. .LPtrUintsDiffer:
  930. cmp (%eax), %bx
  931. jne .LPopEbxAndDoSbb
  932. shr $16, %ebx
  933. cmp 2(%eax), %bx
  934. .LPopEbxAndDoSbb:
  935. pop %ebx
  936. .LDoSbb:
  937. sbb %eax, %eax
  938. and $2, %eax
  939. sub $1, %eax
  940. pop %esi
  941. ret
  942. .LUnbounded:
  943. mov %eax, %esi
  944. jmp .LWordwise_Body
  945. end;
  946. function CompareWord_SSE2(Const buf1,buf2;len:SizeInt):SizeInt; assembler; nostackframe;
  947. asm
  948. push %ebx
  949. cmp $1073741823, %ecx
  950. ja .LUnbounded
  951. lea (%eax,%ecx,2), %ebx { ebx = buf1 end }
  952. cmp $3, %ecx
  953. jle .LWordwise_Test
  954. push %esi
  955. and $-8, %ecx
  956. lea (%eax,%ecx,2), %esi { esi = end of full XMMs in buf1 }
  957. cmp %esi, %eax
  958. jne .L8x_Body
  959. lea 15(%esi), %eax
  960. lea 15(%edx), %ecx
  961. xor %esi, %eax
  962. xor %edx, %ecx
  963. or %ecx, %eax
  964. cmp $4095, %eax
  965. ja .LCantOverReadBoth
  966. movdqu (%esi), %xmm0
  967. movdqu (%edx), %xmm2
  968. pcmpeqw %xmm2, %xmm0
  969. pmovmskb %xmm0, %eax
  970. xor $65535, %eax
  971. jz .LReturnEAX
  972. bsf %eax, %eax
  973. lea (%esi,%eax), %ecx
  974. cmp %ebx, %ecx
  975. jnb .LNothing
  976. movzwl (%esi,%eax), %ebx
  977. cmp %bx, (%edx,%eax)
  978. .L8x_DoSbb:
  979. pop %esi
  980. .LWordwise_DoSbb:
  981. pop %ebx
  982. sbb %eax, %eax
  983. and $2, %eax
  984. sub $1, %eax
  985. ret
  986. .balign 16
  987. .L8x_Body:
  988. movdqu (%edx), %xmm0
  989. movdqu (%eax), %xmm1
  990. pcmpeqw %xmm1, %xmm0
  991. pmovmskb %xmm0, %ecx
  992. xor $65535, %ecx
  993. jnz .L8x_Found
  994. add $16, %eax
  995. add $16, %edx
  996. cmp %eax, %esi
  997. jne .L8x_Body
  998. cmp %esi, %ebx
  999. je .LNothing
  1000. sub %eax, %edx
  1001. lea -16(%ebx), %eax
  1002. add %eax, %edx
  1003. movdqu (%edx), %xmm0
  1004. movdqu (%eax), %xmm1
  1005. pcmpeqw %xmm1, %xmm0
  1006. pmovmskb %xmm0, %ecx
  1007. xor $65535, %ecx
  1008. jnz .L8x_Found
  1009. .LNothing:
  1010. xor %eax, %eax
  1011. .LReturnEAX:
  1012. pop %esi
  1013. pop %ebx
  1014. ret
  1015. .L8x_Found:
  1016. bsf %ecx, %ecx
  1017. movzwl (%eax,%ecx), %eax
  1018. cmp %ax, (%edx,%ecx)
  1019. jmp .L8x_DoSbb
  1020. .LCantOverReadBoth:
  1021. mov %esi, %eax
  1022. pop %esi
  1023. .LWordwise_Body:
  1024. movzwl (%eax), %ecx
  1025. cmp %cx, (%edx)
  1026. jne .LWordwise_DoSbb
  1027. .LWordwise_Next:
  1028. add $2, %eax
  1029. add $2, %edx
  1030. .LWordwise_Test:
  1031. cmp %ebx, %eax
  1032. jne .LWordwise_Body
  1033. xor %eax, %eax
  1034. pop %ebx
  1035. ret
  1036. .LUnbounded:
  1037. mov %eax, %ebx
  1038. jmp .LWordwise_Body
  1039. end;
  1040. function CompareWord_Dispatch(const buf1, buf2; len: SizeInt): SizeInt; forward;
  1041. var
  1042. CompareWord_Impl: function(const buf1, buf2; len: SizeInt): SizeInt = @CompareWord_Dispatch;
  1043. function CompareWord_Dispatch(const buf1, buf2; len: SizeInt): SizeInt;
  1044. begin
  1045. if has_sse2_support then
  1046. CompareWord_Impl:=@CompareWord_SSE2
  1047. else
  1048. CompareWord_Impl:=@CompareWord_Plain;
  1049. result:=CompareWord_Impl(buf1, buf2, len);
  1050. end;
  1051. function CompareWord(const buf1, buf2; len: SizeInt): SizeInt;
  1052. begin
  1053. result:=CompareWord_Impl(buf1, buf2, len);
  1054. end;
  1055. {$endif FPC_SYSTEM_HAS_COMPAREWORD}
  1056. {$ifndef FPC_SYSTEM_HAS_COMPAREDWORD}
  1057. {$define FPC_SYSTEM_HAS_COMPAREDWORD}
  1058. function CompareDWord_Plain(Const buf1,buf2;len:SizeInt):SizeInt; assembler; nostackframe;
  1059. asm
  1060. cmp $536870912, %ecx
  1061. push %ebx
  1062. jnb .LUnbounded
  1063. lea (%eax,%ecx,4), %ebx
  1064. cmp %ebx, %eax
  1065. je .LNothing
  1066. .balign 16
  1067. .LDwordwise_Body:
  1068. mov (%edx), %ecx
  1069. cmp (%eax), %ecx
  1070. jne .LDoSbb
  1071. add $4, %eax
  1072. add $4, %edx
  1073. cmp %eax, %ebx
  1074. jne .LDwordwise_Body
  1075. .LNothing:
  1076. xor %eax, %eax
  1077. pop %ebx
  1078. ret
  1079. .LDoSbb:
  1080. pop %ebx
  1081. sbb %eax, %eax
  1082. and $2, %eax
  1083. sub $1, %eax
  1084. ret
  1085. .LUnbounded:
  1086. mov %eax, %ebx
  1087. jmp .LDwordwise_Body
  1088. end;
  1089. function CompareDWord_SSE2(Const buf1,buf2;len:SizeInt):SizeInt; assembler; nostackframe;
  1090. asm
  1091. push %esi
  1092. cmp $536870912, %ecx
  1093. jnb .LUnbounded
  1094. lea (%eax,%ecx,4), %esi { esi = buf1 end }
  1095. cmp $3, %ecx
  1096. jle .LDWordwise_Test
  1097. push %ebx
  1098. and $-4, %ecx
  1099. lea (%eax,%ecx,4), %ecx { ecx = end of full XMMs in buf1 }
  1100. .balign 16
  1101. .L4x_Body:
  1102. movdqu (%edx), %xmm0
  1103. movdqu (%eax), %xmm1
  1104. pcmpeqd %xmm1, %xmm0
  1105. pmovmskb %xmm0, %ebx
  1106. xor $65535, %ebx
  1107. jnz .L4x_Found
  1108. add $16, %eax
  1109. add $16, %edx
  1110. cmp %eax, %ecx
  1111. jne .L4x_Body
  1112. cmp %esi, %ecx
  1113. je .LNothing
  1114. sub %eax, %edx
  1115. lea -16(%esi), %eax
  1116. add %eax, %edx
  1117. movdqu (%edx), %xmm0
  1118. movdqu (%eax), %xmm1
  1119. pcmpeqd %xmm1, %xmm0
  1120. pmovmskb %xmm0, %ebx
  1121. xor $65535, %ebx
  1122. jnz .L4x_Found
  1123. .LNothing:
  1124. pop %ebx
  1125. pop %esi
  1126. xor %eax, %eax
  1127. ret
  1128. .balign 16
  1129. .LDWordwise_Body:
  1130. mov (%eax), %ecx
  1131. cmp %ecx, (%edx)
  1132. jne .LDoSbb
  1133. add $4, %eax
  1134. add $4, %edx
  1135. .LDWordwise_Test:
  1136. cmp %esi, %eax
  1137. jne .LDWordwise_Body
  1138. xor %eax, %eax
  1139. pop %esi
  1140. ret
  1141. .L4x_Found:
  1142. bsf %ebx, %ebx
  1143. mov (%eax,%ebx), %eax
  1144. cmp %eax, (%edx,%ebx)
  1145. pop %ebx
  1146. .LDoSbb:
  1147. pop %esi
  1148. sbb %eax, %eax
  1149. and $2, %eax
  1150. sub $1, %eax
  1151. ret
  1152. .LUnbounded:
  1153. mov %eax, %esi
  1154. jmp .LDWordwise_Body
  1155. end;
  1156. function CompareDWord_Dispatch(const buf1, buf2; len: SizeInt): SizeInt; forward;
  1157. var
  1158. CompareDWord_Impl: function(const buf1, buf2; len: SizeInt): SizeInt = @CompareDWord_Dispatch;
  1159. function CompareDWord_Dispatch(const buf1, buf2; len: SizeInt): SizeInt;
  1160. begin
  1161. if has_sse2_support then
  1162. CompareDWord_Impl:=@CompareDWord_SSE2
  1163. else
  1164. CompareDWord_Impl:=@CompareDWord_Plain;
  1165. result:=CompareDWord_Impl(buf1, buf2, len);
  1166. end;
  1167. function CompareDWord(const buf1, buf2; len: SizeInt): SizeInt;
  1168. begin
  1169. result:=CompareDWord_Impl(buf1, buf2, len);
  1170. end;
  1171. {$endif FPC_SYSTEM_HAS_COMPAREDWORD}
  1172. {$ifndef FPC_SYSTEM_HAS_INDEXCHAR0}
  1173. {$define FPC_SYSTEM_HAS_INDEXCHAR0}
  1174. function IndexChar0(Const buf;len:SizeInt;b:AnsiChar):SizeInt; assembler;
  1175. var
  1176. saveesi,saveebx : longint;
  1177. asm
  1178. movl %esi,saveesi
  1179. movl %ebx,saveebx
  1180. // Can't use scasb, or will have to do it twice, think this
  1181. // is faster for small "len"
  1182. movl %eax,%esi // Load address
  1183. movzbl %cl,%ebx // Load searchpattern
  1184. testl %edx,%edx
  1185. je .LFound
  1186. xorl %ecx,%ecx // zero index in Buf
  1187. xorl %eax,%eax // To make DWord compares possible
  1188. .balign 4
  1189. .LLoop:
  1190. movb (%esi),%al // Load byte
  1191. cmpb %al,%bl
  1192. je .LFound // byte the same?
  1193. incl %ecx
  1194. incl %esi
  1195. cmpl %edx,%ecx // Maximal distance reached?
  1196. je .LNotFound
  1197. testl %eax,%eax // Nullchar = end of search?
  1198. jne .LLoop
  1199. .LNotFound:
  1200. movl $-1,%ecx // Not found return -1
  1201. .LFound:
  1202. movl %ecx,%eax
  1203. movl saveesi,%esi
  1204. movl saveebx,%ebx
  1205. end;
  1206. {$endif FPC_SYSTEM_HAS_INDEXCHAR0}
  1207. {****************************************************************************
  1208. String
  1209. ****************************************************************************}
  1210. {$ifndef FPC_SYSTEM_HAS_FPC_SHORTSTR_ASSIGN}
  1211. {$define FPC_SYSTEM_HAS_FPC_SHORTSTR_ASSIGN}
  1212. procedure fpc_shortstr_to_shortstr(out res:shortstring; const sstr: shortstring);assembler;[public,alias:'FPC_SHORTSTR_TO_SHORTSTR']; compilerproc;
  1213. var
  1214. saveesi,saveedi : longint;
  1215. asm
  1216. {$ifdef FPC_PROFILE}
  1217. push %eax
  1218. push %edx
  1219. push %ecx
  1220. call mcount
  1221. pop %ecx
  1222. pop %edx
  1223. pop %eax
  1224. {$endif FPC_PROFILE}
  1225. movl %edi,saveedi
  1226. movl %esi,saveesi
  1227. {$ifdef FPC_ENABLED_CLD}
  1228. cld
  1229. {$endif FPC_ENABLED_CLD}
  1230. movl res,%edi
  1231. movl sstr,%esi
  1232. movl %edx,%ecx
  1233. xorl %eax,%eax
  1234. lodsb
  1235. cmpl %ecx,%eax
  1236. jbe .LStrCopy1
  1237. movl %ecx,%eax
  1238. .LStrCopy1:
  1239. stosb
  1240. cmpl $7,%eax
  1241. jl .LStrCopy2
  1242. movl %edi,%ecx { Align on 32bits }
  1243. negl %ecx
  1244. andl $3,%ecx
  1245. subl %ecx,%eax
  1246. rep
  1247. movsb
  1248. movl %eax,%ecx
  1249. andl $3,%eax
  1250. shrl $2,%ecx
  1251. rep
  1252. movsl
  1253. .LStrCopy2:
  1254. movl %eax,%ecx
  1255. rep
  1256. movsb
  1257. movl saveedi,%edi
  1258. movl saveesi,%esi
  1259. end;
  1260. procedure fpc_shortstr_assign(len:longint;sstr,dstr:pointer);[public,alias:'FPC_SHORTSTR_ASSIGN'];
  1261. begin
  1262. asm
  1263. {$ifdef FPC_PROFILE}
  1264. push %eax
  1265. push %edx
  1266. push %ecx
  1267. call mcount
  1268. pop %ecx
  1269. pop %edx
  1270. pop %eax
  1271. {$endif FPC_PROFILE}
  1272. pushl %eax
  1273. pushl %ecx
  1274. {$ifdef FPC_ENABLED_CLD}
  1275. cld
  1276. {$endif FPC_ENABLED_CLD}
  1277. movl dstr,%edi
  1278. movl sstr,%esi
  1279. xorl %eax,%eax
  1280. movl len,%ecx
  1281. lodsb
  1282. cmpl %ecx,%eax
  1283. jbe .LStrCopy1
  1284. movl %ecx,%eax
  1285. .LStrCopy1:
  1286. stosb
  1287. cmpl $7,%eax
  1288. jl .LStrCopy2
  1289. movl %edi,%ecx { Align on 32bits }
  1290. negl %ecx
  1291. andl $3,%ecx
  1292. subl %ecx,%eax
  1293. rep
  1294. movsb
  1295. movl %eax,%ecx
  1296. andl $3,%eax
  1297. shrl $2,%ecx
  1298. rep
  1299. movsl
  1300. .LStrCopy2:
  1301. movl %eax,%ecx
  1302. rep
  1303. movsb
  1304. popl %ecx
  1305. popl %eax
  1306. end ['ESI','EDI'];
  1307. end;
  1308. {$endif FPC_SYSTEM_HAS_FPC_SHORTSTR_ASSIGN}
  1309. {$ifndef FPC_SYSTEM_HAS_FPC_SHORTSTR_COMPARE}
  1310. {$define FPC_SYSTEM_HAS_FPC_SHORTSTR_COMPARE}
  1311. function fpc_shortstr_compare(const left,right:shortstring): longint;assembler; [public,alias:'FPC_SHORTSTR_COMPARE']; compilerproc;
  1312. var
  1313. saveesi,saveedi,saveebx : longint;
  1314. asm
  1315. {$ifdef FPC_PROFILE}
  1316. push %eax
  1317. push %edx
  1318. push %ecx
  1319. call mcount
  1320. pop %ecx
  1321. pop %edx
  1322. pop %eax
  1323. {$endif FPC_PROFILE}
  1324. movl %edi,saveedi
  1325. movl %esi,saveesi
  1326. movl %ebx,saveebx
  1327. {$ifdef FPC_ENABLED_CLD}
  1328. cld
  1329. {$endif FPC_ENABLED_CLD}
  1330. movl right,%esi
  1331. movl left,%edi
  1332. movzbl (%esi),%eax
  1333. movzbl (%edi),%ebx
  1334. movl %eax,%edx
  1335. incl %esi
  1336. incl %edi
  1337. cmpl %ebx,%eax
  1338. jbe .LStrCmp1
  1339. movl %ebx,%eax
  1340. .LStrCmp1:
  1341. cmpl $7,%eax
  1342. jl .LStrCmp2
  1343. movl %edi,%ecx { Align on 32bits }
  1344. negl %ecx
  1345. andl $3,%ecx
  1346. subl %ecx,%eax
  1347. orl %ecx,%ecx
  1348. repe
  1349. cmpsb
  1350. jne .LStrCmp3
  1351. movl %eax,%ecx
  1352. andl $3,%eax
  1353. shrl $2,%ecx
  1354. orl %ecx,%ecx
  1355. repe
  1356. cmpsl
  1357. je .LStrCmp2
  1358. movl $4,%eax
  1359. subl %eax,%esi
  1360. subl %eax,%edi
  1361. .LStrCmp2:
  1362. movl %eax,%ecx
  1363. orl %eax,%eax
  1364. repe
  1365. cmpsb
  1366. je .LStrCmp4
  1367. .LStrCmp3:
  1368. movzbl -1(%esi),%edx // Compare failing (or equal) position
  1369. movzbl -1(%edi),%ebx
  1370. .LStrCmp4:
  1371. movl %ebx,%eax // Compare length or position
  1372. subl %edx,%eax
  1373. movl saveedi,%edi
  1374. movl saveesi,%esi
  1375. movl saveebx,%ebx
  1376. end;
  1377. {$endif FPC_SYSTEM_HAS_FPC_SHORTSTR_COMPARE}
  1378. {$ifndef FPC_SYSTEM_HAS_FPC_PCHAR_TO_SHORTSTR}
  1379. {$define FPC_SYSTEM_HAS_FPC_PCHAR_TO_SHORTSTR}
  1380. procedure fpc_pchar_to_shortstr(out res : shortstring;p:PAnsiChar);assembler;[public,alias:'FPC_PCHAR_TO_SHORTSTR']; compilerproc;
  1381. {$ifndef FPC_PROFILE}
  1382. nostackframe;
  1383. {$endif}
  1384. // eax = res, edx = high(res), ecx = p
  1385. asm
  1386. {$ifdef FPC_PROFILE}
  1387. push %eax
  1388. push %edx
  1389. push %ecx
  1390. call mcount
  1391. pop %ecx
  1392. pop %edx
  1393. pop %eax
  1394. {$endif FPC_PROFILE}
  1395. test %ecx, %ecx
  1396. jz .LEmpty
  1397. push %eax { save res }
  1398. push %ecx { save p }
  1399. push %edx { save high(res) }
  1400. mov %ecx, %eax { eax = IndexByte.buf }
  1401. { edx is already high(res) = IndexByte.count.
  1402. Careful: using high(res) instead of -1 limits the scan by high(res) which is a good thing,
  1403. but assumes that IndexByte is “safe” and won’t read potentially invalid memory past the searched byte even if formally (and wrongly) allowed by ‘count’.
  1404. Generic and x86 versions are “safe”. }
  1405. xor %ecx, %ecx { ecx = 0 = IndexByte.value }
  1406. { Stack is already aligned on 16 bytes if the function is nostackframe: return address + push eax + push ecx + push edx.
  1407. With a stack frame, there is an additional push ebp and need 12 more bytes to align. }
  1408. {$if defined(FPC_SYSTEM_STACKALIGNMENT16) and defined(FPC_PROFILE)}
  1409. leal -12(%esp), %esp
  1410. {$endif defined(FPC_SYSTEM_STACKALIGNMENT16) and defined(FPC_PROFILE)}
  1411. {$if defined(FPC_PIC) or not defined(has_i386_IndexByte_Impl)}
  1412. call IndexByte
  1413. {$else}
  1414. call IndexByte_Impl { manually inline IndexByte }
  1415. {$endif}
  1416. {$if defined(FPC_SYSTEM_STACKALIGNMENT16) and defined(FPC_PROFILE)}
  1417. leal 12(%esp), %esp
  1418. {$endif defined(FPC_SYSTEM_STACKALIGNMENT16) and defined(FPC_PROFILE)}
  1419. pop %ecx { ecx = high(res) = Move.len }
  1420. test %eax, %eax { If IndexByte result (eax) is non-negative (terminator is among first high(res) characters), use it, otherwise keep high(res). }
  1421. {$ifdef CPUX86_HAS_CMOV}
  1422. cmovns %eax, %ecx
  1423. {$else}
  1424. js .LEcxIsLen
  1425. mov %eax, %ecx
  1426. .LEcxIsLen:
  1427. {$endif}
  1428. pop %eax { pop p to eax = Move.src }
  1429. pop %edx { pop res to edx }
  1430. mov %cl, (%edx) { res[0] := len }
  1431. inc %edx { res[1] = Move.dst }
  1432. {$ifdef FPC_PROFILE}
  1433. {$ifdef FPC_SYSTEM_STACKALIGNMENT16}
  1434. leal -12(%esp), %esp
  1435. {$endif FPC_SYSTEM_STACKALIGNMENT16}
  1436. call Move
  1437. {$ifdef FPC_SYSTEM_STACKALIGNMENT16}
  1438. leal 12(%esp), %esp
  1439. {$endif FPC_SYSTEM_STACKALIGNMENT16}
  1440. jmp .LReturn
  1441. {$else FPC_PROFILE}
  1442. jmp Move { can perform a tail call }
  1443. {$endif FPC_PROFILE}
  1444. .LEmpty:
  1445. movb $0, (%eax)
  1446. {$ifdef FPC_PROFILE}
  1447. .LReturn:
  1448. {$endif}
  1449. end;
  1450. {$endif FPC_SYSTEM_HAS_FPC_PCHAR_TO_SHORTSTR}
  1451. {$undef has_i386_IndexByte_Impl} { no longer required }
  1452. {$IFNDEF INTERNAL_BACKTRACE}
  1453. {$define FPC_SYSTEM_HAS_GET_FRAME}
  1454. function get_frame:pointer;assembler;nostackframe;{$ifdef SYSTEMINLINE}inline;{$endif}
  1455. asm
  1456. movl %ebp,%eax
  1457. end;
  1458. {$ENDIF not INTERNAL_BACKTRACE}
  1459. {$define FPC_SYSTEM_HAS_GET_PC_ADDR}
  1460. Function Get_pc_addr : Pointer;assembler;nostackframe;
  1461. asm
  1462. movl (%esp),%eax
  1463. end;
  1464. {$define FPC_SYSTEM_HAS_GET_CALLER_ADDR}
  1465. function get_caller_addr(framebp:pointer;addr:pointer=nil):pointer;
  1466. {$if defined(win32)}
  1467. { Windows has StackTop always properly set }
  1468. begin
  1469. if assigned(framebp) and (framebp<=StackTop) and (framebp>=Sptr) then
  1470. Result:=PPointer(framebp+4)^
  1471. else
  1472. Result:=nil;
  1473. end;
  1474. {$else defined(win32)}
  1475. nostackframe;assembler;
  1476. asm
  1477. orl %eax,%eax
  1478. jz .Lg_a_null
  1479. movl 4(%eax),%eax
  1480. .Lg_a_null:
  1481. end;
  1482. {$endif defined(win32)}
  1483. {$define FPC_SYSTEM_HAS_GET_CALLER_FRAME}
  1484. function get_caller_frame(framebp:pointer;addr:pointer=nil):pointer;
  1485. {$if defined(win32)}
  1486. { Windows has StackTop always properly set }
  1487. begin
  1488. if assigned(framebp) and (framebp<=StackTop) and (framebp>=Sptr) then
  1489. Result:=PPointer(framebp)^
  1490. else
  1491. Result:=nil;
  1492. end;
  1493. {$else defined(win32)}
  1494. nostackframe;assembler;
  1495. asm
  1496. orl %eax,%eax
  1497. jz .Lgnf_null
  1498. movl (%eax),%eax
  1499. .Lgnf_null:
  1500. end;
  1501. {$endif defined(win32)}
  1502. {$define FPC_SYSTEM_HAS_SPTR}
  1503. Function Sptr : Pointer;assembler;nostackframe;
  1504. asm
  1505. movl %esp,%eax
  1506. end;
  1507. {****************************************************************************
  1508. Str()
  1509. ****************************************************************************}
  1510. {$if defined(disabled) and defined(regcall) }
  1511. {$define FPC_SYSTEM_HAS_INT_STR_LONGWORD}
  1512. {$define FPC_SYSTEM_HAS_INT_STR_LONGINT}
  1513. label str_int_shortcut;
  1514. procedure int_str(l:longword;out s:shortstring);assembler;nostackframe;
  1515. asm
  1516. pushl %esi
  1517. pushl %edi
  1518. pushl %ebx
  1519. mov %edx,%edi
  1520. xor %edx,%edx
  1521. jmp str_int_shortcut
  1522. end;
  1523. procedure int_str(l:longint;out s:shortstring);assembler;nostackframe;
  1524. {Optimized for speed, but balanced with size.}
  1525. const digits:array[0..9] of cardinal=(0,10,100,1000,10000,
  1526. 100000,1000000,10000000,
  1527. 100000000,1000000000);
  1528. asm
  1529. {$ifdef FPC_PROFILE}
  1530. push %eax
  1531. push %edx
  1532. push %ecx
  1533. call mcount
  1534. pop %ecx
  1535. pop %edx
  1536. pop %eax
  1537. {$endif FPC_PROFILE}
  1538. push %esi
  1539. push %edi
  1540. push %ebx
  1541. movl %edx,%edi
  1542. { Calculate absolute value and put sign in edx}
  1543. cltd
  1544. xorl %edx,%eax
  1545. subl %edx,%eax
  1546. negl %edx
  1547. str_int_shortcut:
  1548. movl %ecx,%esi
  1549. {Calculate amount of digits in ecx.}
  1550. xorl %ecx,%ecx
  1551. bsrl %eax,%ecx
  1552. incl %ecx
  1553. imul $1233,%ecx
  1554. shr $12,%ecx
  1555. {$ifdef FPC_PIC}
  1556. call fpc_geteipasebx
  1557. {$ifdef darwin}
  1558. movl digits-.Lpic(%ebx),%ebx
  1559. {$else}
  1560. addl $_GLOBAL_OFFSET_TABLE_,%ebx
  1561. movl digits@GOT(%ebx),%ebx
  1562. {$endif}
  1563. cmpl (%ebx,%ecx,4),%eax
  1564. {$else}
  1565. cmpl digits(,%ecx,4),%eax
  1566. {$endif}
  1567. cmc
  1568. adcl $0,%ecx {Nr. digits ready in ecx.}
  1569. {Write length & sign.}
  1570. lea (%edx,%ecx),%ebx
  1571. movb $45,%bh {movb $'-,%bh Not supported by our ATT reader.}
  1572. movw %bx,(%edi)
  1573. addl %edx,%edi
  1574. subl %edx,%esi
  1575. {Skip digits beyond string length.}
  1576. movl %eax,%edx
  1577. subl %ecx,%esi
  1578. jae .Lloop_write
  1579. .balign 4
  1580. .Lloop_skip:
  1581. movl $0xcccccccd,%eax {Divide by 10 using mul+shr}
  1582. mull %edx
  1583. shrl $3,%edx
  1584. decl %ecx
  1585. jz .Ldone {If (l<0) and (high(s)=1) this jump is taken.}
  1586. incl %esi
  1587. jnz .Lloop_skip
  1588. {Write out digits.}
  1589. .balign 4
  1590. .Lloop_write:
  1591. movl $0xcccccccd,%eax {Divide by 10 using mul+shr}
  1592. {Pre-add '0'}
  1593. leal 48(%edx),%ebx {leal $'0(,%edx),%ebx Not supported by our ATT reader.}
  1594. mull %edx
  1595. shrl $3,%edx
  1596. leal (%edx,%edx,8),%eax {x mod 10 = x-10*(x div 10)}
  1597. subl %edx,%ebx
  1598. subl %eax,%ebx
  1599. movb %bl,(%edi,%ecx)
  1600. decl %ecx
  1601. jnz .Lloop_write
  1602. .Ldone:
  1603. popl %ebx
  1604. popl %edi
  1605. popl %esi
  1606. end;
  1607. {$endif}
  1608. {****************************************************************************
  1609. Bounds Check
  1610. ****************************************************************************}
  1611. { do a thread-safe inc/dec }
  1612. {$define FPC_SYSTEM_HAS_DECLOCKED_LONGINT}
  1613. function cpudeclocked(var l : longint) : boolean;assembler;nostackframe;
  1614. asm
  1615. lock
  1616. decl (%eax)
  1617. setzb %al
  1618. end;
  1619. {$define FPC_SYSTEM_HAS_INCLOCKED_LONGINT}
  1620. procedure cpuinclocked(var l : longint);assembler;nostackframe;
  1621. asm
  1622. lock
  1623. incl (%eax)
  1624. end;
  1625. // inline SMP check and normal lock.
  1626. // the locked one is so slow, inlining doesn't matter.
  1627. function declocked(var l : longint) : boolean; inline;
  1628. begin
  1629. if not ismultithread then
  1630. begin
  1631. dec(l);
  1632. declocked:=l=0;
  1633. end
  1634. else
  1635. declocked:=cpudeclocked(l);
  1636. end;
  1637. procedure inclocked(var l : longint); inline;
  1638. begin
  1639. if not ismultithread then
  1640. inc(l)
  1641. else
  1642. cpuinclocked(l);
  1643. end;
  1644. function InterLockedDecrement (var Target: longint) : longint; assembler;
  1645. asm
  1646. movl $-1,%edx
  1647. xchgl %edx,%eax
  1648. lock
  1649. xaddl %eax, (%edx)
  1650. decl %eax
  1651. end;
  1652. function InterLockedIncrement (var Target: longint) : longint; assembler;
  1653. asm
  1654. movl $1,%edx
  1655. xchgl %edx,%eax
  1656. lock
  1657. xaddl %eax, (%edx)
  1658. incl %eax
  1659. end;
  1660. function InterLockedExchange (var Target: longint;Source : longint) : longint; assembler;
  1661. asm
  1662. xchgl (%eax),%edx
  1663. movl %edx,%eax
  1664. end;
  1665. function InterLockedExchangeAdd (var Target: longint;Source : longint) : longint; assembler;
  1666. asm
  1667. xchgl %eax,%edx
  1668. lock
  1669. xaddl %eax, (%edx)
  1670. end;
  1671. function InterlockedCompareExchange(var Target: longint; NewValue: longint; Comperand: longint): longint; assembler;
  1672. asm
  1673. xchgl %eax,%ecx
  1674. lock
  1675. cmpxchgl %edx, (%ecx)
  1676. end;
  1677. function InterlockedCompareExchange64(var Target: int64; NewValue: int64; Comperand: int64): int64; assembler;
  1678. asm
  1679. pushl %ebx
  1680. pushl %edi
  1681. movl %eax,%edi
  1682. movl Comperand+4,%edx
  1683. movl Comperand+0,%eax
  1684. movl NewValue+4,%ecx
  1685. movl NewValue+0,%ebx
  1686. lock cmpxchg8b (%edi)
  1687. pop %edi
  1688. pop %ebx
  1689. end;
  1690. {****************************************************************************
  1691. FPU
  1692. ****************************************************************************}
  1693. const
  1694. { Internal constants for use in system unit }
  1695. FPU_Invalid = 1;
  1696. FPU_Denormal = 2;
  1697. FPU_DivisionByZero = 4;
  1698. FPU_Overflow = 8;
  1699. FPU_Underflow = $10;
  1700. FPU_StackUnderflow = $20;
  1701. FPU_StackOverflow = $40;
  1702. FPU_ExceptionMask = $ff;
  1703. MM_Invalid = 1;
  1704. MM_Denormal = 2;
  1705. MM_DivisionByZero = 4;
  1706. MM_Overflow = 8;
  1707. MM_Underflow = $10;
  1708. MM_Precicion = $20;
  1709. MM_ExceptionMask = $3f;
  1710. MM_MaskInvalidOp = %0000000010000000;
  1711. MM_MaskDenorm = %0000000100000000;
  1712. MM_MaskDivZero = %0000001000000000;
  1713. MM_MaskOverflow = %0000010000000000;
  1714. MM_MaskUnderflow = %0000100000000000;
  1715. MM_MaskPrecision = %0001000000000000;
  1716. {$define FPC_SYSTEM_HAS_SYSINITFPU}
  1717. Procedure SysInitFPU;
  1718. begin
  1719. end;
  1720. {$define FPC_SYSTEM_HAS_SYSRESETFPU}
  1721. Procedure SysResetFPU;
  1722. var
  1723. { these locals are so we don't have to hack pic code in the assembler }
  1724. localmxcsr: dword;
  1725. localfpucw: word;
  1726. begin
  1727. localfpucw:=Default8087CW;
  1728. asm
  1729. fninit
  1730. fwait
  1731. fldcw localfpucw
  1732. end;
  1733. if has_sse_support then
  1734. begin
  1735. localmxcsr:=DefaultMXCSR;
  1736. asm
  1737. { setup sse exceptions }
  1738. {$ifndef OLD_ASSEMBLER}
  1739. ldmxcsr localmxcsr
  1740. {$else OLD_ASSEMBLER}
  1741. mov localmxcsr,%eax
  1742. subl $4,%esp
  1743. mov %eax,(%esp)
  1744. //ldmxcsr (%esp)
  1745. .byte 0x0f,0xae,0x14,0x24
  1746. addl $4,%esp
  1747. {$endif OLD_ASSEMBLER}
  1748. end;
  1749. end;
  1750. end;
  1751. { because of the brain dead sse detection on x86, this test is post poned }
  1752. procedure fpc_cpucodeinit;
  1753. var
  1754. _ecx,_edx : longint;
  1755. begin
  1756. if cpuid_support then
  1757. begin
  1758. asm
  1759. movl $1,%eax
  1760. cpuid
  1761. movl %edx,_edx
  1762. movl %ecx,_ecx
  1763. end ['ebx'];
  1764. has_mmx_support:=(_edx and $800000)<>0;
  1765. if ((_edx and $2000000)<>0) then
  1766. begin
  1767. os_supports_sse:=true;
  1768. sse_check:=true;
  1769. asm
  1770. { force an sse exception if no sse is supported, the exception handler sets
  1771. os_supports_sse to false then }
  1772. { don't change this instruction, the code above depends on its size }
  1773. {$ifdef OLD_ASSEMBLER}
  1774. .byte 0x0f,0x28,0xf7
  1775. {$else}
  1776. movaps %xmm7, %xmm6
  1777. {$endif not EMX}
  1778. end;
  1779. sse_check:=false;
  1780. has_sse_support:=os_supports_sse;
  1781. end;
  1782. if has_sse_support then
  1783. begin
  1784. has_sse2_support:=((_edx and $4000000)<>0);
  1785. has_sse3_support:=((_ecx and $200)<>0);
  1786. end;
  1787. end;
  1788. { don't let libraries influence the FPU cw set by the host program }
  1789. if IsLibrary then
  1790. begin
  1791. Default8087CW:=Get8087CW;
  1792. if has_sse_support then
  1793. DefaultMXCSR:=GetMXCSR;
  1794. end;
  1795. SysResetFPU;
  1796. {$ifdef USE_FASTMOVE}
  1797. setup_fastmove;
  1798. {$endif}
  1799. end;
  1800. {$if not defined(darwin) and defined(regcall) }
  1801. { darwin requires that the stack is aligned to 16 bytes when calling another function }
  1802. {$ifdef FPC_HAS_FEATURE_ANSISTRINGS}
  1803. {$define FPC_SYSTEM_HAS_ANSISTR_DECR_REF}
  1804. Procedure fpc_AnsiStr_Decr_Ref (Var S : Pointer); [Public,Alias:'FPC_ANSISTR_DECR_REF']; compilerproc; nostackframe; assembler;
  1805. asm
  1806. movl (%eax),%edx
  1807. testl %edx,%edx
  1808. jz .Lquit
  1809. movl $0,(%eax) // s:=nil
  1810. cmpl $0,-8(%edx) // exit if refcount<0
  1811. jl .Lquit
  1812. {$ifdef FPC_PIC}
  1813. call fpc_geteipasecx
  1814. addl $_GLOBAL_OFFSET_TABLE_,%ecx
  1815. movl ismultithread@GOT(%ecx),%ecx
  1816. cmpl $0,(%ecx)
  1817. {$else FPC_PIC}
  1818. cmpl $0,ismultithread
  1819. {$endif FPC_PIC}
  1820. je .Lskiplock
  1821. .byte 0xF0 // LOCK prefix, jumped over if IsMultiThread = false. FPC assembler does not accept disjoint LOCK mnemonic.
  1822. .Lskiplock:
  1823. decl -8(%edx)
  1824. jz .Lfree
  1825. .Lquit:
  1826. ret
  1827. .Lfree:
  1828. leal -12(%edx),%eax // points to start of allocation
  1829. { freemem is not an assembler leaf function like fpc_geteipasecx, so it
  1830. needs to be called with proper stack alignment }
  1831. {$ifdef FPC_SYSTEM_STACKALIGNMENT16}
  1832. leal -12(%esp),%esp
  1833. call FPC_FREEMEM
  1834. leal 12(%esp),%esp
  1835. {$else FPC_SYSTEM_STACKALIGNMENT16}
  1836. jmp FPC_FREEMEM // can perform a tail call
  1837. {$endif FPC_SYSTEM_STACKALIGNMENT16}
  1838. end;
  1839. function fpc_truely_ansistr_unique(Var S : Pointer): Pointer; forward;
  1840. {$define FPC_SYSTEM_HAS_ANSISTR_UNIQUE}
  1841. Function fpc_ansistr_Unique(Var S : Pointer): Pointer; [Public,Alias : 'FPC_ANSISTR_UNIQUE']; compilerproc; nostackframe;assembler;
  1842. asm
  1843. // Var S located in register
  1844. // Var $result located in register
  1845. movl %eax,%edx
  1846. // [437] pointer(result) := pointer(s);
  1847. movl (%eax),%eax
  1848. // [438] If Pointer(S)=Nil then
  1849. testl %eax,%eax
  1850. je .Lj4031
  1851. .Lj4036:
  1852. // [440] if PAnsiRec(Pointer(S)-Firstoff)^.Ref<>1 then
  1853. movl -8(%eax),%ecx
  1854. cmpl $1,%ecx
  1855. je .Lj4038
  1856. // [441] result:=fpc_truely_ansistr_unique(s);
  1857. movl %edx,%eax
  1858. {$ifdef FPC_SYSTEM_STACKALIGNMENT16}
  1859. leal -12(%esp),%esp
  1860. {$endif FPC_SYSTEM_STACKALIGNMENT16}
  1861. call fpc_truely_ansistr_unique
  1862. {$ifdef FPC_SYSTEM_STACKALIGNMENT16}
  1863. leal 12(%esp),%esp
  1864. {$endif FPC_SYSTEM_STACKALIGNMENT16}
  1865. .Lj4038:
  1866. .Lj4031:
  1867. // [442] end;
  1868. end;
  1869. {$endif FPC_HAS_FEATURE_ANSISTRINGS}
  1870. {$endif ndef darwin and defined(regcall) }
  1871. {$ifndef FPC_SYSTEM_HAS_MEM_BARRIER}
  1872. {$define FPC_SYSTEM_HAS_MEM_BARRIER}
  1873. procedure ReadBarrier;assembler;nostackframe;
  1874. asm
  1875. {$ifdef CPUX86_HAS_SSE2}
  1876. lfence
  1877. {$else CPUX86_HAS_SSE2}
  1878. lock
  1879. addl $0,0(%esp)
  1880. {$endif CPUX86_HAS_SSE2}
  1881. end;
  1882. procedure ReadDependencyBarrier;{$ifdef SYSTEMINLINE}inline;{$endif}
  1883. begin
  1884. { reads imply barrier on earlier reads depended on }
  1885. end;
  1886. procedure ReadWriteBarrier;assembler;nostackframe;
  1887. asm
  1888. {$ifdef CPUX86_HAS_SSE2}
  1889. mfence
  1890. {$else CPUX86_HAS_SSE2}
  1891. lock
  1892. addl $0,0(%esp)
  1893. {$endif CPUX86_HAS_SSE2}
  1894. end;
  1895. procedure WriteBarrier;assembler;nostackframe;
  1896. asm
  1897. {$ifdef CPUX86_HAS_SSEUNIT}
  1898. sfence
  1899. {$endif CPUX86_HAS_SSEUNIT}
  1900. end;
  1901. {$endif}
  1902. {$ifndef FPC_SYSTEM_HAS_BSF_QWORD}
  1903. {$define FPC_SYSTEM_HAS_BSF_QWORD}
  1904. function BsfQWord(Const AValue : QWord): cardinal; assembler; nostackframe;
  1905. asm
  1906. bsfl 4(%esp),%eax
  1907. jnz .L2
  1908. .L1:
  1909. bsfl 8(%esp),%eax
  1910. jnz .L3
  1911. movl $223,%eax
  1912. .L3:
  1913. addl $32,%eax
  1914. .L2:
  1915. end;
  1916. {$endif FPC_SYSTEM_HAS_BSF_QWORD}
  1917. {$ifndef FPC_SYSTEM_HAS_BSR_QWORD}
  1918. {$define FPC_SYSTEM_HAS_BSR_QWORD}
  1919. function BsrQWord(Const AValue : QWord): cardinal; assembler; nostackframe;
  1920. asm
  1921. bsrl 8(%esp),%eax
  1922. jz .L1
  1923. add $32,%eax
  1924. jmp .L2
  1925. .L1:
  1926. bsrl 4(%esp),%eax
  1927. jnz .L2
  1928. movl $255,%eax
  1929. .L2:
  1930. end;
  1931. {$endif FPC_SYSTEM_HAS_BSR_QWORD}
  1932. {$ifndef FPC_SYSTEM_HAS_SAR_QWORD}
  1933. {$define FPC_SYSTEM_HAS_SAR_QWORD}
  1934. function fpc_SarInt64(Const AValue : Int64;const Shift : Byte): Int64; [Public,Alias:'FPC_SARINT64']; compilerproc; assembler; nostackframe;
  1935. asm
  1936. movb %al,%cl
  1937. movl 8(%esp),%edx
  1938. movl 4(%esp),%eax
  1939. andb $63,%cl
  1940. cmpb $32,%cl
  1941. jnb .L1
  1942. shrdl %cl,%edx,%eax
  1943. sarl %cl,%edx
  1944. jmp .Lexit
  1945. .L1:
  1946. movl %edx,%eax
  1947. sarl $31,%edx
  1948. andb $31,%cl
  1949. sarl %cl,%eax
  1950. .Lexit:
  1951. end;
  1952. {$endif FPC_SYSTEM_HAS_SAR_QWORD}