12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511151215131514151515161517151815191520152115221523152415251526152715281529153015311532153315341535153615371538153915401541154215431544154515461547154815491550155115521553155415551556155715581559156015611562156315641565156615671568156915701571157215731574157515761577157815791580158115821583158415851586158715881589159015911592159315941595159615971598159916001601160216031604160516061607160816091610161116121613161416151616161716181619162016211622162316241625162616271628162916301631163216331634163516361637163816391640164116421643164416451646164716481649165016511652165316541655165616571658165916601661166216631664166516661667166816691670167116721673167416751676167716781679168016811682168316841685168616871688168916901691169216931694169516961697169816991700170117021703170417051706170717081709171017111712171317141715171617171718171917201721172217231724172517261727172817291730173117321733173417351736173717381739174017411742174317441745174617471748174917501751175217531754175517561757175817591760176117621763176417651766176717681769177017711772177317741775177617771778177917801781178217831784178517861787178817891790179117921793179417951796179717981799180018011802180318041805180618071808180918101811181218131814181518161817181818191820182118221823182418251826182718281829183018311832183318341835183618371838183918401841184218431844184518461847184818491850185118521853185418551856185718581859186018611862186318641865186618671868186918701871187218731874187518761877187818791880188118821883188418851886188718881889189018911892189318941895189618971898189919001901190219031904190519061907190819091910191119121913191419151916191719181919192019211922192319241925192619271928192919301931193219331934193519361937193819391940194119421943194419451946194719481949195019511952195319541955195619571958195919601961196219631964196519661967196819691970197119721973197419751976197719781979198019811982198319841985198619871988198919901991199219931994199519961997199819992000200120022003200420052006200720082009201020112012201320142015201620172018201920202021202220232024202520262027202820292030203120322033203420352036203720382039204020412042204320442045204620472048204920502051205220532054205520562057205820592060206120622063206420652066206720682069207020712072207320742075207620772078207920802081208220832084208520862087208820892090209120922093209420952096209720982099210021012102210321042105210621072108210921102111211221132114211521162117211821192120212121222123212421252126212721282129213021312132213321342135213621372138213921402141214221432144214521462147214821492150215121522153215421552156215721582159216021612162216321642165216621672168216921702171217221732174217521762177217821792180218121822183218421852186218721882189219021912192219321942195219621972198219922002201220222032204220522062207220822092210221122122213221422152216221722182219222022212222222322242225222622272228222922302231223222332234223522362237223822392240224122422243224422452246224722482249225022512252225322542255225622572258225922602261226222632264226522662267226822692270227122722273227422752276 |
- {
- This file is part of the Free Pascal run time library.
- Copyright (c) 1999-2000 by the Free Pascal development team.
- Processor dependent implementation for the system unit for
- intel i386+
- See the file COPYING.FPC, included in this distribution,
- for details about the copyright.
- This program is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
- **********************************************************************}
- {$if not(defined(VER3_0)) and defined(linux)}
- {$define FPC_SYSTEM_STACKALIGNMENT16}
- {$endif not(defined(VER3_0)) and defined(linux)}
- {****************************************************************************
- Primitives
- ****************************************************************************}
- var
- os_supports_sse : boolean;
- { this variable is set to true, if currently an sse check is executed and no sig ill should be generated }
- sse_check : boolean;
- fast_large_repmovstosb : boolean; { Enhanced REP MOVSB and STOSB (ERMSB) feature @ CPUID(7).ebx[9]. }
- fpc_cpucodeinit_performed : boolean; { Code before fpc_cpucodeinit can call certain dispatched functions, such as Move. }
- {$asmmode ATT}
- function cpuid_support : boolean;assembler;nostackframe;
- {
- Check if the ID-flag can be changed, if changed then CpuID is supported.
- Tested under go32v1 and Linux on c6x86 with CpuID enabled and disabled (PFV)
- }
- asm
- pushfl
- movl (%esp),%eax
- xorl $0x200000,%eax
- pushl %eax
- popfl
- pushfl
- popl %eax
- xorl (%esp),%eax
- popfl
- testl $0x200000,%eax
- setnz %al
- end;
- {$define FPC_SYSTEM_HAS_FPC_CPUINIT}
- procedure fpc_cpuinit;
- begin
- { because of the brain dead sse detection on x86, this test is post poned to fpc_cpucodeinit which
- must be implemented OS dependend (FK)
- has_sse_support:=sse_support;
- has_mmx_support:=mmx_support;
- }
- end;
- {$ifndef darwin}
- procedure fpc_geteipasebx; [public, alias: 'fpc_geteipasebx'];assembler; nostackframe;
- asm
- movl (%esp),%ebx
- end;
- procedure fpc_geteipasecx; [public, alias: 'fpc_geteipasecx'];assembler; nostackframe;
- asm
- movl (%esp),%ecx
- end;
- {$endif}
- {$if not defined(FPC_SYSTEM_HAS_MOVE)
- and not defined(OLD_ASSEMBLER)
- and not defined(darwin)}
- {$i fastmove.inc}
- {$endif}
- {$ifndef FPC_SYSTEM_HAS_MOVE}
- {$define FPC_SYSTEM_HAS_MOVE}
- procedure Move(const source;var dest;count:SizeInt);[public, alias: 'FPC_MOVE'];assembler;
- var
- saveesi,saveedi : longint;
- asm
- movl %edi,saveedi
- movl %esi,saveesi
- movl %eax,%esi
- movl %edx,%edi
- movl %ecx,%edx
- movl %edi,%eax
- { check for zero or negative count }
- cmpl $0,%edx
- jle .LMoveEnd
- { Check for back or forward }
- sub %esi,%eax
- jz .LMoveEnd { Do nothing when source=dest }
- jc .LFMove { Do forward, dest<source }
- cmp %edx,%eax
- jb .LBMove { Dest is in range of move, do backward }
- { Forward Copy }
- .LFMove:
- {$ifdef FPC_ENABLED_CLD}
- cld
- {$endif FPC_ENABLED_CLD}
- cmpl $15,%edx
- jl .LFMove1
- movl %edi,%ecx { Align on 32bits }
- negl %ecx
- andl $3,%ecx
- subl %ecx,%edx
- rep
- movsb
- movl %edx,%ecx
- andl $3,%edx
- shrl $2,%ecx
- rep
- movsl
- .LFMove1:
- movl %edx,%ecx
- rep
- movsb
- jmp .LMoveEnd
- { Backward Copy }
- .LBMove:
- std
- addl %edx,%esi
- addl %edx,%edi
- movl %edi,%ecx
- decl %esi
- decl %edi
- cmpl $15,%edx
- jl .LBMove1
- negl %ecx { Align on 32bits }
- andl $3,%ecx
- subl %ecx,%edx
- rep
- movsb
- movl %edx,%ecx
- andl $3,%edx
- shrl $2,%ecx
- subl $3,%esi
- subl $3,%edi
- rep
- movsl
- addl $3,%esi
- addl $3,%edi
- .LBMove1:
- movl %edx,%ecx
- rep
- movsb
- cld
- .LMoveEnd:
- movl saveedi,%edi
- movl saveesi,%esi
- end;
- {$endif FPC_SYSTEM_HAS_MOVE}
- {$ifndef FPC_SYSTEM_HAS_FILLCHAR}
- {$define FPC_SYSTEM_HAS_FILLCHAR}
- Procedure FillChar(var x;count:SizeInt;value:byte);assembler; nostackframe;
- asm
- cmpl $22,%edx { empirically determined value on a Core 2 Duo Conroe }
- jg .LFillFull
- orl %edx,%edx
- jle .LFillZero
- .LFillLoop:
- movb %cl,(%eax)
- incl %eax
- decl %edx
- jne .LFillLoop
- .LFillZero:
- ret
- .LFillFull:
- {$ifdef FPC_ENABLED_CLD}
- cld
- {$endif FPC_ENABLED_CLD}
- push %edi
- movl %eax,%edi
- movzbl %cl,%eax
- movl %edx,%ecx
- imul $0x01010101,%eax { Expand al into a 4 subbytes of eax}
- shrl $2,%ecx
- andl $3,%edx
- rep
- stosl
- movl %edx,%ecx
- .LFill1:
- rep
- stosb
- .LFillEnd:
- pop %edi
- end;
- {$endif FPC_SYSTEM_HAS_FILLCHAR}
- {$ifndef FPC_SYSTEM_HAS_FILLWORD}
- {$define FPC_SYSTEM_HAS_FILLWORD}
- procedure fillword(var x;count : SizeInt;value : word);assembler;
- var
- saveedi : longint;
- asm
- movl %edi,saveedi
- movl %eax,%edi
- movzwl %cx,%eax
- movl %edx,%ecx
- { check for zero or negative count }
- cmpl $0,%ecx
- jle .LFillWordEnd
- movl %eax,%edx
- shll $16,%eax
- orl %edx,%eax
- movl %ecx,%edx
- shrl $1,%ecx
- {$ifdef FPC_ENABLED_CLD}
- cld
- {$endif FPC_ENABLED_CLD}
- rep
- stosl
- movl %edx,%ecx
- andl $1,%ecx
- rep
- stosw
- .LFillWordEnd:
- movl saveedi,%edi
- end;
- {$endif FPC_SYSTEM_HAS_FILLWORD}
- {$ifndef FPC_SYSTEM_HAS_FILLDWORD}
- {$define FPC_SYSTEM_HAS_FILLDWORD}
- procedure filldword(var x;count : SizeInt;value : dword);assembler;
- var
- saveedi : longint;
- asm
- movl %edi,saveedi
- movl %eax,%edi
- movl %ecx,%eax
- movl %edx,%ecx
- { check for zero or negative count }
- cmpl $0,%ecx
- jle .LFillDWordEnd
- {$ifdef FPC_ENABLED_CLD}
- cld
- {$endif FPC_ENABLED_CLD}
- rep
- stosl
- .LFillDWordEnd:
- movl saveedi,%edi
- end;
- {$endif FPC_SYSTEM_HAS_FILLDWORD}
- {$ifndef FPC_SYSTEM_HAS_INDEXBYTE}
- {$define FPC_SYSTEM_HAS_INDEXBYTE}
- function IndexByte_Plain(Const buf;len:SizeInt;b:byte):SizeInt; assembler; nostackframe;
- asm
- push %esi
- push %edi
- push %eax { save initial value of 'buf' }
- cmp $4,%edx { less than 4 bytes, just test byte by byte. }
- jb .Ltail
- mov %cl,%ch { prepare pattern }
- movzwl %cx,%esi
- shl $16,%ecx
- or %esi,%ecx
- .Lalignloop:
- test $3,%al { align to 4 bytes if necessary }
- je .Laligned
- cmp %cl,(%eax)
- je .Lexit
- inc %eax
- dec %edx
- jmp .Lalignloop
- .balign 16 { Main loop, unrolled 4 times for speed }
- .Lloop:
- mov (%eax),%esi { load dword }
- xor %ecx,%esi { XOR with pattern, bytes equal to target are now 0 }
- lea -0x01010101(%esi),%edi
- xor %esi,%edi { (x-0x01010101) xor x }
- not %esi
- and $0x80808080,%esi
- and %edi,%esi { ((x-0x01010101) xor x) and (not x) and 0x80808080 }
- jnz .Lfound { one of the bytes matches }
- mov 4(%eax),%esi
- xor %ecx,%esi
- lea -0x01010101(%esi),%edi
- xor %esi,%edi
- not %esi
- and $0x80808080,%esi
- and %edi,%esi
- jnz .Lfound4
- mov 8(%eax),%esi
- xor %ecx,%esi
- lea -0x01010101(%esi),%edi
- xor %esi,%edi
- not %esi
- and $0x80808080,%esi
- and %edi,%esi
- jnz .Lfound8
- mov 12(%eax),%esi
- xor %ecx,%esi
- lea -0x01010101(%esi),%edi
- xor %esi,%edi
- not %esi
- and $0x80808080,%esi
- and %edi,%esi
- jnz .Lfound12
- add $16,%eax
- .Laligned:
- sub $16,%edx
- jae .Lloop { Still more than 16 bytes remaining }
- { Process remaining bytes (<16 left at this point) }
- { length is offset by -16 at this point }
- .Lloop2:
- cmp $4-16,%edx { < 4 bytes left? }
- jb .Ltail
- mov (%eax),%esi
- xor %ecx,%esi
- lea -0x01010101(%esi),%edi
- xor %esi,%edi
- not %esi
- and $0x80808080,%esi
- and %edi,%esi
- jne .Lfound
- add $4,%eax
- sub $4,%edx
- jmp .Lloop2
- .Ltail: { Less than 4 bytes remaining, check one by one }
- and $3, %edx
- jz .Lnotfound
- .Lloop3:
- cmp %cl,(%eax)
- je .Lexit
- inc %eax
- dec %edx
- jnz .Lloop3
- .Lnotfound:
- or $-1,%eax
- jmp .Lexit1
- { add missing source pointer increments }
- .Lfound12:
- add $4,%eax
- .Lfound8:
- add $4,%eax
- .Lfound4:
- add $4,%eax
- .Lfound:
- test $0xff,%esi
- jnz .Lexit
- inc %eax
- test $0xff00,%esi
- jnz .Lexit
- inc %eax
- test $0xff0000,%esi
- jnz .Lexit
- inc %eax
- .Lexit:
- sub (%esp),%eax
- .Lexit1:
- pop %ecx { removes initial 'buf' value }
- pop %edi
- pop %esi
- end;
- function IndexByte_SSE2(const buf;len:SizeInt;b:byte):SizeInt; assembler; nostackframe;
- asm
- test %edx, %edx
- jz .Lnotfound { exit if len=0 }
- push %ebx
- movd %ecx, %xmm1
- lea 16(%eax), %ecx { eax = original ptr, ecx = buf + 16 for aligning & shifts. }
- punpcklbw %xmm1, %xmm1
- and $-0x10, %ecx { first aligned address after buf }
- punpcklbw %xmm1, %xmm1
- pshufd $0, %xmm1, %xmm1
- movdqa -16(%ecx), %xmm0 { Fetch first 16 bytes (up to 15 bytes before target) }
- sub %eax, %ecx { ecx=number of valid bytes, eax=original ptr }
- pcmpeqb %xmm1, %xmm0 { compare with pattern and get bitmask }
- pmovmskb %xmm0, %ebx
- shl %cl, %ebx { shift valid bits into high word }
- and $0xffff0000, %ebx { clear low word containing invalid bits }
- shr %cl, %ebx { shift back }
- jz .Lcontinue
- .Lmatch:
- bsf %ebx, %ebx
- lea -16(%ecx,%ebx), %eax
- pop %ebx
- cmp %eax, %edx { check against the buffer length }
- jbe .Lnotfound
- ret
- .balign 16
- .Lloop:
- movdqa (%eax,%ecx), %xmm0 { eax and ecx may have any values, }
- add $16, %ecx { but their sum is evenly divisible by 16. }
- pcmpeqb %xmm1, %xmm0
- pmovmskb %xmm0, %ebx
- test %ebx, %ebx
- jnz .Lmatch
- .Lcontinue:
- cmp %ecx, %edx
- ja .Lloop
- pop %ebx
- .Lnotfound:
- or $-1, %eax
- end;
- function IndexByte_Dispatch(const buf;len:SizeInt;b:byte):SizeInt; forward;
- var
- IndexByte_Impl: function(const buf;len:SizeInt;b:byte):SizeInt = @IndexByte_Dispatch;
- {$define has_i386_IndexByte_Impl} { used in assembler to manually inline IndexByte }
- function IndexByte_Dispatch(const buf;len:SizeInt;b:byte):SizeInt;
- begin
- if has_sse2_support then
- IndexByte_Impl:=@IndexByte_SSE2
- else
- IndexByte_Impl:=@IndexByte_Plain;
- result:=IndexByte_Impl(buf,len,b);
- end;
- function IndexByte(const buf;len:SizeInt;b:byte):SizeInt;
- begin
- result:=IndexByte_Impl(buf,len,b);
- end;
- {$endif FPC_SYSTEM_HAS_INDEXBYTE}
- {$ifndef FPC_SYSTEM_HAS_INDEXWORD}
- {$define FPC_SYSTEM_HAS_INDEXWORD}
- function IndexWord_Plain(Const buf;len:SizeInt;b:word):SizeInt; assembler; nostackframe;
- asm
- test %edx, %edx
- jz .LNotFound
- push %eax
- .LWordwise_Body: { Loop does not cross cache line if the function entry is aligned on 16 bytes. }
- cmp %cx, (%eax)
- je .LFound
- add $2, %eax
- dec %edx
- jnz .LWordwise_Body
- pop %edx
- .LNotFound:
- or $-1, %eax
- ret
- .LFound:
- pop %edx
- sub %edx, %eax
- shr $1, %eax
- end;
- function IndexWord_SSE2(const buf;len:SizeInt;b:word):SizeInt; assembler; nostackframe;
- asm
- test %edx, %edx { exit if len=0 }
- je .Lnotfound
- push %ebx
- movd %ecx, %xmm1
- punpcklwd %xmm1, %xmm1
- pshufd $0, %xmm1, %xmm1
- lea 16(%eax), %ecx
- and $-16, %ecx
- movdqa -16(%ecx), %xmm0 { Fetch first 16 bytes (up to 14 bytes before target) }
- sub %eax, %ecx
- test $1, %eax { if buffer isn't aligned to word boundary, }
- jnz .Lunaligned { use a different algorithm }
- pcmpeqw %xmm1, %xmm0
- pmovmskb %xmm0, %ebx
- shl %cl, %ebx
- and $0xffff0000, %ebx
- shr %cl, %ebx
- shr $1, %ecx { ecx=number of valid bytes }
- test %ebx, %ebx
- jz .Lcontinue
- .Lmatch:
- bsf %ebx, %ebx
- shr $1, %ebx { in words }
- lea -8(%ecx,%ebx), %eax
- pop %ebx
- cmp %eax, %edx
- jbe .Lnotfound { if match is after the specified length, ignore it }
- ret
- .balign 16
- .Lloop:
- movdqa (%eax,%ecx,2), %xmm0
- add $8, %ecx
- pcmpeqw %xmm1, %xmm0
- pmovmskb %xmm0, %ebx
- test %ebx, %ebx
- jnz .Lmatch
- .Lcontinue:
- cmp %ecx, %edx
- ja .Lloop
- pop %ebx
- .Lnotfound:
- or $-1, %eax
- ret
- .Lunaligned:
- push %esi
- movdqa %xmm1, %xmm2 { (mis)align the pattern (in this particular case: }
- psllw $8, %xmm1 { swap bytes of each word of pattern) }
- psrlw $8, %xmm2
- por %xmm2, %xmm1
- pcmpeqb %xmm1, %xmm0
- pmovmskb %xmm0, %ebx
- shl %cl, %ebx
- and $0xffff0000, %ebx
- shr %cl, %ebx
- xor %esi, %esi { nothing to merge yet }
- add %edx, %edx { length words -> bytes }
- jmp .Lcontinue_u
- .balign 16
- .Lloop_u:
- movdqa (%eax,%ecx), %xmm0
- add $16, %ecx
- pcmpeqb %xmm1, %xmm0 { compare by bytes }
- shr $16, %esi { bit 16 shifts into 0 }
- pmovmskb %xmm0, %ebx
- .Lcontinue_u:
- shl $1, %ebx { 15:0 -> 16:1 }
- or %esi, %ebx { merge bit 0 from previous round }
- mov %ebx, %esi
- shr $1, %ebx { now AND together adjacent pairs of bits }
- and %esi, %ebx
- and $0x5555, %ebx { also reset odd bits }
- jnz .Lmatch_u
- cmp %ecx, %edx
- ja .Lloop_u
- .Lnotfound_u:
- pop %esi
- pop %ebx
- or $-1, %eax
- ret
- .Lmatch_u:
- bsf %ebx, %ebx
- lea -16(%ecx,%ebx), %eax
- cmp %eax, %edx
- jbe .Lnotfound_u { if match is after the specified length, ignore it }
- sar $1, %eax { in words }
- pop %esi
- pop %ebx
- end;
- function IndexWord_Dispatch(const buf;len:SizeInt;b:word):SizeInt; forward;
- var
- IndexWord_Impl: function(const buf;len:SizeInt;b:word):SizeInt = @IndexWord_Dispatch;
- function IndexWord_Dispatch(const buf;len:SizeInt;b:word):SizeInt;
- begin
- if has_sse2_support then
- IndexWord_Impl:=@IndexWord_SSE2
- else
- IndexWord_Impl:=@IndexWord_Plain;
- result:=IndexWord_Impl(buf,len,b);
- end;
- function IndexWord(const buf;len:SizeInt;b:word):SizeInt; inline;
- begin
- result:=IndexWord_Impl(buf,len,b);
- end;
- {$endif FPC_SYSTEM_HAS_INDEXWORD}
- {$ifndef FPC_SYSTEM_HAS_INDEXDWORD}
- {$define FPC_SYSTEM_HAS_INDEXDWORD}
- function IndexDWord_Plain(Const buf;len:SizeInt;b:DWord):SizeInt; assembler; nostackframe;
- asm
- push %eax
- sub $4, %eax
- .LDWordwise_Next: { Loop does not cross cache line if the function entry is aligned on 16 bytes. }
- add $4, %eax
- sub $1, %edx
- jb .LNotFound
- cmp %ecx, (%eax)
- jne .LDWordwise_Next
- pop %edx
- sub %edx, %eax
- shr $2, %eax
- ret
- .LNotFound:
- pop %edx
- mov $-1, %eax
- end;
- function IndexDWord_SSE2(const buf;len:SizeInt;b:DWord):SizeInt; assembler; nostackframe;
- asm
- push %eax
- sub $4, %edx
- jle .LDwordwise_Prepare
- movd %ecx, %xmm1
- pshufd $0, %xmm1, %xmm1
- .balign 16 { 1-byte NOP. }
- .L4x_Body:
- movdqu (%eax), %xmm0
- pcmpeqd %xmm1, %xmm0
- pmovmskb %xmm0, %ecx
- test %ecx, %ecx
- jnz .LFoundAtMask
- add $16, %eax
- sub $4, %edx
- jg .L4x_Body
- lea (%eax,%edx,4), %eax
- movdqu (%eax), %xmm0
- pcmpeqd %xmm1, %xmm0
- pmovmskb %xmm0, %ecx
- test %ecx, %ecx
- jz .LNothing
- .LFoundAtMask:
- bsf %ecx, %ecx
- add %ecx, %eax
- .LFoundAtEax:
- pop %edx
- sub %edx, %eax
- shr $2, %eax
- ret
- nop { Turns .balign 16 before .LDwordwise_Body into a no-op. }
- .LDwordwise_Prepare:
- add $3, %edx
- cmp $-1, %edx
- je .LNothing
- .balign 16 { no-op }
- .LDwordwise_Body:
- cmp (%eax), %ecx
- je .LFoundAtEax
- add $4, %eax
- sub $1, %edx
- jae .LDwordwise_Body
- .LNothing:
- pop %edx
- or $-1, %eax
- end;
- function IndexDWord_Dispatch(const buf;len:SizeInt;b:DWord):SizeInt; forward;
- var
- IndexDWord_Impl: function(const buf;len:SizeInt;b:DWord):SizeInt = @IndexDWord_Dispatch;
- function IndexDWord_Dispatch(const buf;len:SizeInt;b:DWord):SizeInt;
- begin
- if has_sse2_support then
- IndexDWord_Impl:=@IndexDWord_SSE2
- else
- IndexDWord_Impl:=@IndexDWord_Plain;
- result:=IndexDWord_Impl(buf,len,b);
- end;
- function IndexDWord(const buf;len:SizeInt;b:DWord):SizeInt;
- begin
- result:=IndexDWord_Impl(buf,len,b);
- end;
- {$endif FPC_SYSTEM_HAS_INDEXDWORD}
- {$ifndef FPC_SYSTEM_HAS_INDEXQWORD}
- {$define FPC_SYSTEM_HAS_INDEXQWORD}
- function IndexQWord(const buf;len:SizeInt;b:QWord):SizeInt; assembler; nostackframe;
- { eax = buf, edx = len, [esp+4] = b }
- asm
- push %ebx
- mov 8(%esp), %ecx { ecx = b[0:31] }
- mov 12(%esp), %ebx { ebx = b[32:63] }
- mov %eax, 8(%esp) { remember original buf }
- sub $8, %eax
- .balign 16 { no-op }
- .LQWordwise_Next:
- add $8, %eax
- sub $1, %edx
- jb .LNotFound
- cmp %ecx, (%eax)
- jne .LQWordwise_Next
- cmp %ebx, 4(%eax)
- jne .LQWordwise_Next
- sub 8(%esp), %eax
- pop %ebx
- shr $3, %eax
- ret $8
- .LNotFound:
- pop %ebx
- mov $-1, %eax
- end;
- {$endif FPC_SYSTEM_HAS_INDEXQWORD}
- {$ifndef FPC_SYSTEM_HAS_COMPAREBYTE}
- {$define FPC_SYSTEM_HAS_COMPAREBYTE}
- function CompareByte_Plain(Const buf1,buf2;len:SizeInt):SizeInt; assembler; nostackframe;
- asm
- { eax = buf1, edx = buf2, ecx = len }
- push %ebx
- sub %eax, %edx { edx = buf2 - buf1 }
- cmp $3, %ecx
- jle .LBytewise_Prepare
- { Align buf1 on 4 bytes. }
- mov (%edx,%eax), %ebx
- cmp (%eax), %ebx
- jne .L4xDiffer
- lea -4(%eax,%ecx), %ecx { ecx = buf1 end - (4 + buf1 and -4) = count remaining }
- and $-4, %eax
- sub %eax, %ecx
- .balign 16
- .L4x_Next:
- add $4, %eax
- sub $4, %ecx { at .LLast4, ecx is 4 less than remaining bytes }
- jle .LLast4
- mov (%edx,%eax), %ebx
- cmp (%eax), %ebx
- je .L4x_Next
- .L4xDiffer:
- mov (%eax), %edx
- {$ifdef CPUX86_HAS_BSWAP}
- bswap %ebx
- bswap %edx
- {$else}
- rol $8, %bx
- rol $16, %ebx
- rol $8, %bx
- rol $8, %dx
- rol $16, %edx
- rol $8, %dx
- {$endif}
- cmp %ebx, %edx
- .LDoSbb:
- sbb %eax, %eax
- or $1, %eax
- pop %ebx
- ret
- .LLast4:
- add %ecx, %eax
- mov (%edx,%eax), %ebx
- cmp (%eax), %ebx
- jne .L4xDiffer
- xor %eax, %eax
- pop %ebx
- ret
- .LBytewise_Prepare:
- sub $1, %ecx
- jb .LNothing
- .balign 16 { no-op }
- .LBytewise_Body:
- movzbl (%edx,%eax), %ebx
- cmp %bl, (%eax)
- jne .LDoSbb
- add $1, %eax
- sub $1, %ecx
- jae .LBytewise_Body
- .LNothing:
- xor %eax, %eax
- pop %ebx
- end;
- function CompareByte_SSE2(const buf1, buf2; len: SizeInt): SizeInt; assembler; nostackframe;
- asm
- { eax = buf1, edx = buf2, ecx = len }
- cmp $1, %ecx
- jle .L1OrLess
- push %ebx
- cmp $16, %ecx
- jae .LVecOrMore
- { 2 to 15 bytes: check for page cross. Pessimistic variant that has false positives, but uses 1 less register and 2 less instructions. }
- mov %eax, %ebx
- or %edx, %ebx
- and $4095, %ebx
- cmp $4080, %ebx
- ja .LCantOverReadBoth
- { Over-read both as XMMs. }
- movdqu (%eax), %xmm0
- movdqu (%edx), %xmm1
- pcmpeqb %xmm1, %xmm0
- pmovmskb %xmm0, %ebx
- inc %bx { Here and below, 2-byte 'inc r16' after 'pmovmskb' can be replaced with 5-byte 'add $1, r16' or 6-byte 'xor $65535, r32'. }
- jz .LNothing
- bsf %ebx, %ebx
- cmp %ecx, %ebx { Ignore garbage beyond 'len'. }
- jae .LNothing
- movzbl (%eax,%ebx), %eax
- movzbl (%edx,%ebx), %edx
- sub %edx, %eax
- pop %ebx
- ret
- .LNothing:
- pop %ebx
- xor %eax, %eax
- ret
- .LVecOrMore:
- { Compare first vectors. }
- movdqu (%eax), %xmm0
- movdqu (%edx), %xmm1
- pcmpeqb %xmm1, %xmm0
- pmovmskb %xmm0, %ebx
- inc %bx
- jnz .LVec0Differs
- sub $32, %ecx { now ecx is len - 32... mostly just to save bytes on offsets improving .LAligned32xLoop_Body alignment :) }
- jbe .LLastVec
- { Compare second vectors. }
- movdqu 16(%eax), %xmm0
- movdqu 16(%edx), %xmm1
- pcmpeqb %xmm1, %xmm0
- pmovmskb %xmm0, %ebx
- inc %bx
- jnz .LVec1Differs
- { More than four vectors: aligned loop. }
- cmp $32, %ecx
- ja .LAligned32xLoop_Prepare
- { Compare last two vectors. }
- movdqu (%eax,%ecx), %xmm0
- movdqu (%edx,%ecx), %xmm1
- pcmpeqb %xmm1, %xmm0
- pmovmskb %xmm0, %ebx
- inc %bx
- jnz .LVecEm2Differs
- .LLastVec:
- movdqu 16(%eax,%ecx), %xmm0
- movdqu 16(%edx,%ecx), %xmm1
- pcmpeqb %xmm1, %xmm0
- pmovmskb %xmm0, %ebx
- inc %bx
- jnz .LVecEm1Differs
- pop %ebx
- xor %eax, %eax
- ret
- .LVecEm2Differs:
- sub $16, %ecx
- .LVecEm1Differs:
- bsf %ebx, %ebx
- add %ecx, %ebx
- movzbl 16(%eax,%ebx), %eax
- movzbl 16(%edx,%ebx), %edx
- sub %edx, %eax
- pop %ebx
- ret
- nop { Turn .balign 16 before .LAligned32xLoop_Body into a no-op. }
- .LAligned32xLoop_Prepare:
- lea -32(%eax,%ecx), %ecx { buffer end - last two vectors handled separately - first two vectors already analyzed (by the fact ecx was still len - 32) }
- sub %eax, %edx { edx = buf2 - buf1 }
- and $-16, %eax { Align buf1. First two vectors already analyzed are skipped by +32 on the first loop iteration. }
- sub %eax, %ecx { ecx = count to be handled with loop }
- .balign 16 { No-op. }
- .LAligned32xLoop_Body:
- add $32, %eax
- { Compare two XMMs, reduce the result with 'and'. }
- movdqu (%edx,%eax), %xmm0
- pcmpeqb (%eax), %xmm0 { xmm0 = pcmpeqb(buf1, buf2) }
- movdqu 16(%edx,%eax), %xmm1
- pcmpeqb 16(%eax), %xmm1
- pand %xmm0, %xmm1 { xmm1 = xmm0 and pcmpeqb(buf1 + 16, buf2 + 16) }
- pmovmskb %xmm1, %ebx
- inc %bx
- jnz .LAligned32xLoop_TwoVectorsDiffer
- sub $32, %ecx
- ja .LAligned32xLoop_Body
- { Compare last two vectors after the loop by doing one more loop iteration, modified. }
- lea 32(%eax,%ecx), %eax
- movdqu (%edx,%eax), %xmm0
- movdqu (%eax), %xmm2
- pcmpeqb %xmm2, %xmm0
- movdqu 16(%edx,%eax), %xmm1
- movdqu 16(%eax), %xmm2
- pcmpeqb %xmm2, %xmm1
- pand %xmm0, %xmm1
- pmovmskb %xmm1, %ebx
- inc %bx
- jnz .LAligned32xLoop_TwoVectorsDiffer
- pop %ebx
- xor %eax, %eax
- ret
- .LAligned32xLoop_TwoVectorsDiffer:
- add %eax, %edx { restore edx = buf2 }
- pmovmskb %xmm0, %ecx { Is there a difference in the first vector? }
- inc %cx
- jz .LVec1Differs { No difference in the first vector, xmm0 is all ones, ebx = pmovmskb(pcmpeqb(buf1 + 16, buf2 + 16)) from the loop body. }
- bsf %ecx, %ebx
- movzbl (%eax,%ebx), %eax
- movzbl (%edx,%ebx), %edx
- sub %edx, %eax
- pop %ebx
- ret
- .LVec1Differs:
- add $16, %eax
- add $16, %edx
- .LVec0Differs:
- bsf %ebx, %ebx
- movzbl (%eax,%ebx), %eax
- movzbl (%edx,%ebx), %edx
- sub %edx, %eax
- pop %ebx
- ret
- .LCantOverReadBoth:
- cmp $3, %ecx
- jle .L2to3
- push %esi
- mov (%eax), %ebx
- mov (%edx), %esi
- cmp %esi, %ebx
- jne .L4xDiffer
- cmp $8, %ecx
- jbe .LLast4x
- mov 4(%eax), %ebx
- mov 4(%edx), %esi
- cmp %esi, %ebx
- jne .L4xDiffer
- mov -8(%eax,%ecx), %ebx
- mov -8(%edx,%ecx), %esi
- cmp %esi, %ebx
- jne .L4xDiffer
- .LLast4x:
- mov -4(%eax,%ecx), %ebx
- mov -4(%edx,%ecx), %esi
- cmp %esi, %ebx
- jne .L4xDiffer
- pop %esi
- pop %ebx
- xor %eax, %eax
- ret
- .L4xDiffer:
- bswap %ebx
- bswap %esi
- cmp %esi, %ebx
- pop %esi
- sbb %eax, %eax
- or $1, %eax
- pop %ebx
- ret
- .L2to3:
- movzwl (%edx), %ebx
- bswap %ebx
- shr $1, %ebx
- mov -1(%edx,%ecx), %bl
- movzwl (%eax), %edx
- bswap %edx
- shr $1, %edx
- mov -1(%eax,%ecx), %dl
- mov %edx, %eax
- sub %ebx, %eax
- pop %ebx
- ret
- .L1OrLess:
- jl .LUnbounded_Prepare
- movzbl (%eax), %eax
- movzbl (%edx), %edx
- sub %edx, %eax
- ret
- .LUnbounded_Prepare:
- sub %eax, %edx { edx = buf2 - buf1 }
- test %ecx, %ecx
- jnz .LUnbounded_Body
- xor %eax, %eax
- ret
- .balign 16
- .LUnbounded_Next:
- add $1, %eax
- .LUnbounded_Body:
- movzbl (%edx,%eax), %ecx
- cmp %cl, (%eax)
- je .LUnbounded_Next
- sbb %eax, %eax
- or $1, %eax
- end;
- function CompareByte_Dispatch(const buf1, buf2; len: SizeInt): SizeInt; forward;
- var
- CompareByte_Impl: function(const buf1, buf2; len: SizeInt): SizeInt = @CompareByte_Dispatch;
- function CompareByte_Dispatch(const buf1, buf2; len: SizeInt): SizeInt;
- begin
- if has_sse2_support then
- CompareByte_Impl:=@CompareByte_SSE2
- else
- CompareByte_Impl:=@CompareByte_Plain;
- result:=CompareByte_Impl(buf1, buf2, len);
- end;
- function CompareByte(const buf1, buf2; len: SizeInt): SizeInt;
- begin
- result:=CompareByte_Impl(buf1, buf2, len);
- end;
- {$endif FPC_SYSTEM_HAS_COMPAREBYTE}
- {$ifndef FPC_SYSTEM_HAS_COMPAREWORD}
- {$define FPC_SYSTEM_HAS_COMPAREWORD}
- function CompareWord_Plain(Const buf1,buf2;len:SizeInt):SizeInt; assembler; nostackframe;
- asm
- push %ebx
- sub %eax, %edx { edx = buf2 - buf1 }
- lea -4(%ecx), %ebx { Go wordwise if ecx <= 3 or ecx > 1073741823 (High(int32) div 2) ==> uint32(ecx - 4) > 1073741819. }
- cmp $1073741819, %ebx
- ja .LWordwise_Prepare
- test $2, %al
- je .LAlignedToPtrUintOrNaturallyMisaligned
- movzwl (%edx,%eax), %ebx
- cmp %bx, (%eax)
- jne .LDoSbb
- add $2, %eax
- sub $1, %ecx
- .LAlignedToPtrUintOrNaturallyMisaligned:
- sub $2, %ecx
- .balign 16
- .LPtrUintWise_Next:
- mov (%edx,%eax), %ebx
- cmp %ebx, (%eax)
- jne .LPtrUintsDiffer
- add $4, %eax
- sub $2, %ecx
- jg .LPtrUintWise_Next
- lea (%eax,%ecx,2), %eax
- mov (%edx,%eax), %ebx
- cmp %ebx, (%eax)
- jne .LPtrUintsDiffer
- pop %ebx
- xor %eax, %eax
- ret
- .LPtrUintsDiffer:
- cmp %bx, (%eax)
- jne .LDoSbb
- shr $16, %ebx
- cmp %bx, 2(%eax)
- .LDoSbb:
- sbb %eax, %eax
- or $1, %eax
- pop %ebx
- ret
- .balign 16
- .LWordwise_Body:
- movzwl (%edx,%eax), %ebx
- cmp %bx, (%eax)
- jne .LDoSbb
- add $2, %eax
- .LWordwise_Prepare:
- sub $1, %ecx
- jnb .LWordwise_Body
- pop %ebx
- xor %eax, %eax
- end;
- function CompareWord_SSE2(Const buf1,buf2;len:SizeInt):SizeInt; assembler; nostackframe;
- asm
- push %ebx
- sub %eax, %edx { edx = buf2 - buf1 }
- lea -2(%ecx), %ebx { Go wordwise if ecx <= 1 or uint32(ecx) > 1073741823 (High(int32) div 2) ==> uint32(ecx - 2) > 1073741821. }
- cmp $1073741821, %ebx
- ja .LWordwise_Prepare
- cmp $8, %ecx
- jge .LVecOrMore
- lea (%edx,%eax), %ebx
- or %eax, %ebx
- and $4095, %ebx
- cmp $4080, %ebx
- ja .LWordwise_Prepare
- movdqu (%edx,%eax), %xmm0
- movdqu (%eax), %xmm1
- pcmpeqw %xmm1, %xmm0
- pmovmskb %xmm0, %ebx
- inc %bx
- jz .LNothing
- shl $1, %ecx { convert to bytes }
- bsf %ebx, %ebx
- cmp %ecx, %ebx
- jb .LSubtractWords
- .LNothing:
- pop %ebx
- xor %eax, %eax
- ret
- .balign 16
- .LWordwise_Body:
- movzwl (%edx,%eax), %ebx
- cmp %bx, (%eax)
- jne .LDoSbb
- add $2, %eax
- .LWordwise_Prepare:
- sub $1, %ecx
- jae .LWordwise_Body
- xor %eax, %eax
- pop %ebx
- ret
- .LDoSbb:
- sbb %eax, %eax
- or $1, %eax
- pop %ebx
- ret
- .LVecOrMore:
- movdqu (%edx,%eax), %xmm0 { Compare first vectors. }
- movdqu (%eax), %xmm1
- pcmpeqw %xmm1, %xmm0
- pmovmskb %xmm0, %ebx
- inc %bx
- jnz .LVec0Differs
- shl $1, %ecx { convert to bytes }
- sub $32, %ecx { first 16 bytes already analyzed + last 16 bytes analyzed separately }
- jle .LLastVec
- push %eax { save original buf1 to recover word position if byte mismatch found (aligned loop works in bytes to support misaligned buf1). }
- add %eax, %ecx
- and $-16, %eax { align buf1; +16 is performed by the loop. }
- sub %eax, %ecx
- .balign 16
- .LAligned8xLoop_Body:
- add $16, %eax
- movdqu (%edx,%eax), %xmm0
- pcmpeqb (%eax), %xmm0
- pmovmskb %xmm0, %ebx
- inc %bx
- jnz .LAligned8xLoop_VecDiffers
- sub $16, %ecx
- ja .LAligned8xLoop_Body
- pop %ebx { drop original buf1 }
- .LLastVec:
- lea 16(%eax,%ecx), %eax { point to the last 16 bytes }
- movdqu (%edx,%eax), %xmm0
- movdqu (%eax), %xmm1
- pcmpeqw %xmm1, %xmm0
- pmovmskb %xmm0, %ebx
- inc %bx
- jnz .LVec0Differs
- pop %ebx
- xor %eax, %eax
- ret
- .LVec0Differs:
- bsf %ebx, %ebx
- .LSubtractWords:
- add %eax, %edx
- movzwl (%eax,%ebx), %eax
- movzwl (%edx,%ebx), %edx
- sub %edx, %eax
- pop %ebx
- ret
- .LAligned8xLoop_VecDiffers:
- bsf %ebx, %ebx
- add %ebx, %eax
- pop %ecx
- sub %ecx, %eax
- and $-2, %eax
- add %ecx, %eax
- movzwl (%edx,%eax), %edx
- movzwl (%eax), %eax
- sub %edx, %eax
- pop %ebx
- end;
- function CompareWord_Dispatch(const buf1, buf2; len: SizeInt): SizeInt; forward;
- var
- CompareWord_Impl: function(const buf1, buf2; len: SizeInt): SizeInt = @CompareWord_Dispatch;
- function CompareWord_Dispatch(const buf1, buf2; len: SizeInt): SizeInt;
- begin
- if has_sse2_support then
- CompareWord_Impl:=@CompareWord_SSE2
- else
- CompareWord_Impl:=@CompareWord_Plain;
- result:=CompareWord_Impl(buf1, buf2, len);
- end;
- function CompareWord(const buf1, buf2; len: SizeInt): SizeInt;
- begin
- result:=CompareWord_Impl(buf1, buf2, len);
- end;
- {$endif FPC_SYSTEM_HAS_COMPAREWORD}
- {$ifndef FPC_SYSTEM_HAS_COMPAREDWORD}
- {$define FPC_SYSTEM_HAS_COMPAREDWORD}
- function CompareDWord_Plain(Const buf1,buf2;len:SizeInt):SizeInt; assembler; nostackframe;
- asm
- sub $1, %ecx
- jb .LNothing
- push %ebx
- sub %eax, %edx
- .balign 16
- .LDwordwise_Body:
- mov (%edx,%eax), %ebx
- cmp %ebx, (%eax)
- jne .LDoSbb
- add $4, %eax
- sub $1, %ecx
- jnb .LDwordwise_Body
- pop %ebx
- .LNothing:
- xor %eax, %eax
- ret
- .LDoSbb:
- pop %ebx
- sbb %eax, %eax
- or $1, %eax
- end;
- function CompareDWord_SSE2(Const buf1,buf2;len:SizeInt):SizeInt; assembler; nostackframe;
- asm
- push %ebx
- sub %eax, %edx { edx = buf2 - buf1 }
- lea -5(%ecx), %ebx { Go dwordwise if ecx <= 4 or ecx > 536870911 (High(int32) div 4) ==> uint32(ecx - 5) > 536870906. }
- cmp $536870906, %ebx
- ja .LDwordwise_Prepare
- shl $2, %ecx { convert to bytes }
- movdqu (%edx,%eax), %xmm1 { Compare first vectors. }
- movdqu (%eax), %xmm0
- pcmpeqd %xmm1, %xmm0
- pmovmskb %xmm0, %ebx
- inc %bx
- jnz .LVec0Differs
- sub $32, %ecx { first 16 bytes already analyzed + last 16 bytes analyzed separately }
- jle .LLastVec
- push %eax { save original buf1 to recover uint32 position if byte mismatch found (aligned loop works in bytes to support misaligned buf1). }
- add %eax, %ecx
- and $-16, %eax { align buf1; +16 is performed by the loop. }
- sub %eax, %ecx
- .balign 16
- .LAligned4xLoop_Body:
- add $16, %eax
- movdqu (%eax,%edx), %xmm0
- pcmpeqb (%eax), %xmm0
- pmovmskb %xmm0, %ebx
- inc %bx
- jnz .LAligned4xLoop_VecDiffers
- sub $16, %ecx
- ja .LAligned4xLoop_Body
- pop %ebx { drop original buf1 }
- .LLastVec:
- lea 16(%eax,%ecx), %eax { point to the last 16 bytes }
- movdqu (%edx,%eax), %xmm1
- movdqu (%eax), %xmm0
- pcmpeqd %xmm1, %xmm0
- pmovmskb %xmm0, %ebx
- inc %bx
- jnz .LVec0Differs
- pop %ebx
- xor %eax, %eax
- ret
- .LVec0Differs:
- bsf %ebx, %ebx
- add %eax, %edx { recover edx = buf2 }
- mov (%edx,%ebx), %edx
- cmp %edx, (%eax,%ebx)
- sbb %eax, %eax
- or $1, %eax
- pop %ebx
- ret
- .LAligned4xLoop_VecDiffers:
- bsf %ebx, %ebx
- add %ebx, %eax
- pop %ecx
- sub %ecx, %eax
- and $-4, %eax
- add %ecx, %eax
- mov (%edx,%eax), %edx
- cmp %edx, (%eax)
- .LDoSbb:
- sbb %eax, %eax
- or $1, %eax
- pop %ebx
- ret
- .balign 16
- .LDwordwise_Body:
- mov (%edx,%eax), %ebx
- cmp %ebx, (%eax)
- jne .LDoSbb
- add $4, %eax
- .LDwordwise_Prepare:
- sub $1, %ecx
- jnb .LDwordwise_Body
- pop %ebx
- xor %eax, %eax
- end;
- function CompareDWord_Dispatch(const buf1, buf2; len: SizeInt): SizeInt; forward;
- var
- CompareDWord_Impl: function(const buf1, buf2; len: SizeInt): SizeInt = @CompareDWord_Dispatch;
- function CompareDWord_Dispatch(const buf1, buf2; len: SizeInt): SizeInt;
- begin
- if has_sse2_support then
- CompareDWord_Impl:=@CompareDWord_SSE2
- else
- CompareDWord_Impl:=@CompareDWord_Plain;
- result:=CompareDWord_Impl(buf1, buf2, len);
- end;
- function CompareDWord(const buf1, buf2; len: SizeInt): SizeInt;
- begin
- result:=CompareDWord_Impl(buf1, buf2, len);
- end;
- {$endif FPC_SYSTEM_HAS_COMPAREDWORD}
- {$ifndef FPC_SYSTEM_HAS_INDEXCHAR0}
- {$define FPC_SYSTEM_HAS_INDEXCHAR0}
- function IndexChar0(Const buf;len:SizeInt;b:AnsiChar):SizeInt; assembler;
- var
- saveesi,saveebx : longint;
- asm
- movl %esi,saveesi
- movl %ebx,saveebx
- // Can't use scasb, or will have to do it twice, think this
- // is faster for small "len"
- movl %eax,%esi // Load address
- movzbl %cl,%ebx // Load searchpattern
- testl %edx,%edx
- je .LFound
- xorl %ecx,%ecx // zero index in Buf
- xorl %eax,%eax // To make DWord compares possible
- .balign 4
- .LLoop:
- movb (%esi),%al // Load byte
- cmpb %al,%bl
- je .LFound // byte the same?
- incl %ecx
- incl %esi
- cmpl %edx,%ecx // Maximal distance reached?
- je .LNotFound
- testl %eax,%eax // Nullchar = end of search?
- jne .LLoop
- .LNotFound:
- movl $-1,%ecx // Not found return -1
- .LFound:
- movl %ecx,%eax
- movl saveesi,%esi
- movl saveebx,%ebx
- end;
- {$endif FPC_SYSTEM_HAS_INDEXCHAR0}
- {****************************************************************************
- String
- ****************************************************************************}
- {$ifndef FPC_SYSTEM_HAS_FPC_SHORTSTR_ASSIGN}
- {$define FPC_SYSTEM_HAS_FPC_SHORTSTR_ASSIGN}
- procedure fpc_shortstr_to_shortstr(out res:shortstring; const sstr: shortstring);assembler;[public,alias:'FPC_SHORTSTR_TO_SHORTSTR']; compilerproc;
- var
- saveesi,saveedi : longint;
- asm
- {$ifdef FPC_PROFILE}
- push %eax
- push %edx
- push %ecx
- call mcount
- pop %ecx
- pop %edx
- pop %eax
- {$endif FPC_PROFILE}
- movl %edi,saveedi
- movl %esi,saveesi
- {$ifdef FPC_ENABLED_CLD}
- cld
- {$endif FPC_ENABLED_CLD}
- movl res,%edi
- movl sstr,%esi
- movl %edx,%ecx
- xorl %eax,%eax
- lodsb
- cmpl %ecx,%eax
- jbe .LStrCopy1
- movl %ecx,%eax
- .LStrCopy1:
- stosb
- cmpl $7,%eax
- jl .LStrCopy2
- movl %edi,%ecx { Align on 32bits }
- negl %ecx
- andl $3,%ecx
- subl %ecx,%eax
- rep
- movsb
- movl %eax,%ecx
- andl $3,%eax
- shrl $2,%ecx
- rep
- movsl
- .LStrCopy2:
- movl %eax,%ecx
- rep
- movsb
- movl saveedi,%edi
- movl saveesi,%esi
- end;
- procedure fpc_shortstr_assign(len:longint;sstr,dstr:pointer);[public,alias:'FPC_SHORTSTR_ASSIGN'];
- begin
- asm
- {$ifdef FPC_PROFILE}
- push %eax
- push %edx
- push %ecx
- call mcount
- pop %ecx
- pop %edx
- pop %eax
- {$endif FPC_PROFILE}
- pushl %eax
- pushl %ecx
- {$ifdef FPC_ENABLED_CLD}
- cld
- {$endif FPC_ENABLED_CLD}
- movl dstr,%edi
- movl sstr,%esi
- xorl %eax,%eax
- movl len,%ecx
- lodsb
- cmpl %ecx,%eax
- jbe .LStrCopy1
- movl %ecx,%eax
- .LStrCopy1:
- stosb
- cmpl $7,%eax
- jl .LStrCopy2
- movl %edi,%ecx { Align on 32bits }
- negl %ecx
- andl $3,%ecx
- subl %ecx,%eax
- rep
- movsb
- movl %eax,%ecx
- andl $3,%eax
- shrl $2,%ecx
- rep
- movsl
- .LStrCopy2:
- movl %eax,%ecx
- rep
- movsb
- popl %ecx
- popl %eax
- end ['ESI','EDI'];
- end;
- {$endif FPC_SYSTEM_HAS_FPC_SHORTSTR_ASSIGN}
- {$ifndef FPC_SYSTEM_HAS_FPC_SHORTSTR_COMPARE}
- {$define FPC_SYSTEM_HAS_FPC_SHORTSTR_COMPARE}
- function fpc_shortstr_compare(const left,right:shortstring): longint;assembler; [public,alias:'FPC_SHORTSTR_COMPARE']; compilerproc;
- var
- saveesi,saveedi,saveebx : longint;
- asm
- {$ifdef FPC_PROFILE}
- push %eax
- push %edx
- push %ecx
- call mcount
- pop %ecx
- pop %edx
- pop %eax
- {$endif FPC_PROFILE}
- movl %edi,saveedi
- movl %esi,saveesi
- movl %ebx,saveebx
- {$ifdef FPC_ENABLED_CLD}
- cld
- {$endif FPC_ENABLED_CLD}
- movl right,%esi
- movl left,%edi
- movzbl (%esi),%eax
- movzbl (%edi),%ebx
- movl %eax,%edx
- incl %esi
- incl %edi
- cmpl %ebx,%eax
- jbe .LStrCmp1
- movl %ebx,%eax
- .LStrCmp1:
- cmpl $7,%eax
- jl .LStrCmp2
- movl %edi,%ecx { Align on 32bits }
- negl %ecx
- andl $3,%ecx
- subl %ecx,%eax
- orl %ecx,%ecx
- repe
- cmpsb
- jne .LStrCmp3
- movl %eax,%ecx
- andl $3,%eax
- shrl $2,%ecx
- orl %ecx,%ecx
- repe
- cmpsl
- je .LStrCmp2
- movl $4,%eax
- subl %eax,%esi
- subl %eax,%edi
- .LStrCmp2:
- movl %eax,%ecx
- orl %eax,%eax
- repe
- cmpsb
- je .LStrCmp4
- .LStrCmp3:
- movzbl -1(%esi),%edx // Compare failing (or equal) position
- movzbl -1(%edi),%ebx
- .LStrCmp4:
- movl %ebx,%eax // Compare length or position
- subl %edx,%eax
- movl saveedi,%edi
- movl saveesi,%esi
- movl saveebx,%ebx
- end;
- {$endif FPC_SYSTEM_HAS_FPC_SHORTSTR_COMPARE}
- {$ifndef FPC_SYSTEM_HAS_FPC_PCHAR_TO_SHORTSTR}
- {$define FPC_SYSTEM_HAS_FPC_PCHAR_TO_SHORTSTR}
- procedure fpc_pchar_to_shortstr(out res : shortstring;p:PAnsiChar);assembler;[public,alias:'FPC_PCHAR_TO_SHORTSTR']; compilerproc;
- {$ifndef FPC_PROFILE}
- nostackframe;
- {$endif}
- // eax = res, edx = high(res), ecx = p
- asm
- {$ifdef FPC_PROFILE}
- push %eax
- push %edx
- push %ecx
- call mcount
- pop %ecx
- pop %edx
- pop %eax
- {$endif FPC_PROFILE}
- test %ecx, %ecx
- jz .LEmpty
- push %eax { save res }
- push %ecx { save p }
- push %edx { save high(res) }
- mov %ecx, %eax { eax = IndexByte.buf }
- { edx is already high(res) = IndexByte.count.
- Careful: using high(res) instead of -1 limits the scan by high(res) which is a good thing,
- but assumes that IndexByte is “safe” and won’t read potentially invalid memory past the searched byte even if formally (and wrongly) allowed by ‘count’.
- Generic and x86 versions are “safe”. }
- xor %ecx, %ecx { ecx = 0 = IndexByte.value }
- { Stack is already aligned on 16 bytes if the function is nostackframe: return address + push eax + push ecx + push edx.
- With a stack frame, there is an additional push ebp and need 12 more bytes to align. }
- {$if defined(FPC_SYSTEM_STACKALIGNMENT16) and defined(FPC_PROFILE)}
- leal -12(%esp), %esp
- {$endif defined(FPC_SYSTEM_STACKALIGNMENT16) and defined(FPC_PROFILE)}
- {$if defined(FPC_PIC) or not defined(has_i386_IndexByte_Impl)}
- call IndexByte
- {$else}
- call IndexByte_Impl { manually inline IndexByte }
- {$endif}
- {$if defined(FPC_SYSTEM_STACKALIGNMENT16) and defined(FPC_PROFILE)}
- leal 12(%esp), %esp
- {$endif defined(FPC_SYSTEM_STACKALIGNMENT16) and defined(FPC_PROFILE)}
- pop %ecx { ecx = high(res) = Move.len }
- test %eax, %eax { If IndexByte result (eax) is non-negative (terminator is among first high(res) characters), use it, otherwise keep high(res). }
- {$ifdef CPUX86_HAS_CMOV}
- cmovns %eax, %ecx
- {$else}
- js .LEcxIsLen
- mov %eax, %ecx
- .LEcxIsLen:
- {$endif}
- pop %eax { pop p to eax = Move.src }
- pop %edx { pop res to edx }
- mov %cl, (%edx) { res[0] := len }
- inc %edx { res[1] = Move.dst }
- {$ifdef FPC_PROFILE}
- {$ifdef FPC_SYSTEM_STACKALIGNMENT16}
- leal -12(%esp), %esp
- {$endif FPC_SYSTEM_STACKALIGNMENT16}
- call Move
- {$ifdef FPC_SYSTEM_STACKALIGNMENT16}
- leal 12(%esp), %esp
- {$endif FPC_SYSTEM_STACKALIGNMENT16}
- jmp .LReturn
- {$else FPC_PROFILE}
- jmp Move { can perform a tail call }
- {$endif FPC_PROFILE}
- .LEmpty:
- movb $0, (%eax)
- {$ifdef FPC_PROFILE}
- .LReturn:
- {$endif}
- end;
- {$endif FPC_SYSTEM_HAS_FPC_PCHAR_TO_SHORTSTR}
- {$undef has_i386_IndexByte_Impl} { no longer required }
- {$IFNDEF INTERNAL_BACKTRACE}
- {$define FPC_SYSTEM_HAS_GET_FRAME}
- function get_frame:pointer;assembler;nostackframe;{$ifdef SYSTEMINLINE}inline;{$endif}
- asm
- movl %ebp,%eax
- end;
- {$ENDIF not INTERNAL_BACKTRACE}
- {$define FPC_SYSTEM_HAS_GET_PC_ADDR}
- Function Get_pc_addr : Pointer;assembler;nostackframe;
- asm
- movl (%esp),%eax
- end;
- {$define FPC_SYSTEM_HAS_GET_CALLER_ADDR}
- function get_caller_addr(framebp:pointer;addr:pointer=nil):pointer;
- {$if defined(win32)}
- { Windows has StackTop always properly set }
- begin
- if assigned(framebp) and (framebp<=StackTop) and (framebp>=Sptr) then
- Result:=PPointer(framebp+4)^
- else
- Result:=nil;
- end;
- {$else defined(win32)}
- nostackframe;assembler;
- asm
- orl %eax,%eax
- jz .Lg_a_null
- movl 4(%eax),%eax
- .Lg_a_null:
- end;
- {$endif defined(win32)}
- {$define FPC_SYSTEM_HAS_GET_CALLER_FRAME}
- function get_caller_frame(framebp:pointer;addr:pointer=nil):pointer;
- {$if defined(win32)}
- { Windows has StackTop always properly set }
- begin
- if assigned(framebp) and (framebp<=StackTop) and (framebp>=Sptr) then
- Result:=PPointer(framebp)^
- else
- Result:=nil;
- end;
- {$else defined(win32)}
- nostackframe;assembler;
- asm
- orl %eax,%eax
- jz .Lgnf_null
- movl (%eax),%eax
- .Lgnf_null:
- end;
- {$endif defined(win32)}
- {$define FPC_SYSTEM_HAS_SPTR}
- Function Sptr : Pointer;assembler;nostackframe;
- asm
- movl %esp,%eax
- end;
- {****************************************************************************
- Str()
- ****************************************************************************}
- {$if defined(disabled) and defined(regcall) }
- {$define FPC_SYSTEM_HAS_INT_STR_LONGWORD}
- {$define FPC_SYSTEM_HAS_INT_STR_LONGINT}
- label str_int_shortcut;
- procedure int_str(l:longword;out s:shortstring);assembler;nostackframe;
- asm
- pushl %esi
- pushl %edi
- pushl %ebx
- mov %edx,%edi
- xor %edx,%edx
- jmp str_int_shortcut
- end;
- procedure int_str(l:longint;out s:shortstring);assembler;nostackframe;
- {Optimized for speed, but balanced with size.}
- const digits:array[0..9] of cardinal=(0,10,100,1000,10000,
- 100000,1000000,10000000,
- 100000000,1000000000);
- asm
- {$ifdef FPC_PROFILE}
- push %eax
- push %edx
- push %ecx
- call mcount
- pop %ecx
- pop %edx
- pop %eax
- {$endif FPC_PROFILE}
- push %esi
- push %edi
- push %ebx
- movl %edx,%edi
- { Calculate absolute value and put sign in edx}
- cltd
- xorl %edx,%eax
- subl %edx,%eax
- negl %edx
- str_int_shortcut:
- movl %ecx,%esi
- {Calculate amount of digits in ecx.}
- xorl %ecx,%ecx
- bsrl %eax,%ecx
- incl %ecx
- imul $1233,%ecx
- shr $12,%ecx
- {$ifdef FPC_PIC}
- call fpc_geteipasebx
- {$ifdef darwin}
- movl digits-.Lpic(%ebx),%ebx
- {$else}
- addl $_GLOBAL_OFFSET_TABLE_,%ebx
- movl digits@GOT(%ebx),%ebx
- {$endif}
- cmpl (%ebx,%ecx,4),%eax
- {$else}
- cmpl digits(,%ecx,4),%eax
- {$endif}
- cmc
- adcl $0,%ecx {Nr. digits ready in ecx.}
- {Write length & sign.}
- lea (%edx,%ecx),%ebx
- movb $45,%bh {movb $'-,%bh Not supported by our ATT reader.}
- movw %bx,(%edi)
- addl %edx,%edi
- subl %edx,%esi
- {Skip digits beyond string length.}
- movl %eax,%edx
- subl %ecx,%esi
- jae .Lloop_write
- .balign 4
- .Lloop_skip:
- movl $0xcccccccd,%eax {Divide by 10 using mul+shr}
- mull %edx
- shrl $3,%edx
- decl %ecx
- jz .Ldone {If (l<0) and (high(s)=1) this jump is taken.}
- incl %esi
- jnz .Lloop_skip
- {Write out digits.}
- .balign 4
- .Lloop_write:
- movl $0xcccccccd,%eax {Divide by 10 using mul+shr}
- {Pre-add '0'}
- leal 48(%edx),%ebx {leal $'0(,%edx),%ebx Not supported by our ATT reader.}
- mull %edx
- shrl $3,%edx
- leal (%edx,%edx,8),%eax {x mod 10 = x-10*(x div 10)}
- subl %edx,%ebx
- subl %eax,%ebx
- movb %bl,(%edi,%ecx)
- decl %ecx
- jnz .Lloop_write
- .Ldone:
- popl %ebx
- popl %edi
- popl %esi
- end;
- {$endif}
- {****************************************************************************
- Bounds Check
- ****************************************************************************}
- { do a thread-safe inc/dec }
- {$define FPC_SYSTEM_HAS_DECLOCKED_LONGINT}
- function cpudeclocked(var l : longint) : boolean;assembler;nostackframe;
- asm
- lock
- decl (%eax)
- setzb %al
- end;
- {$define FPC_SYSTEM_HAS_INCLOCKED_LONGINT}
- procedure cpuinclocked(var l : longint);assembler;nostackframe;
- asm
- lock
- incl (%eax)
- end;
- // inline SMP check and normal lock.
- // the locked one is so slow, inlining doesn't matter.
- function declocked(var l : longint) : boolean; inline;
- begin
- if not ismultithread then
- begin
- dec(l);
- declocked:=l=0;
- end
- else
- declocked:=cpudeclocked(l);
- end;
- procedure inclocked(var l : longint); inline;
- begin
- if not ismultithread then
- inc(l)
- else
- cpuinclocked(l);
- end;
- function InterLockedDecrement (var Target: longint) : longint; assembler;
- asm
- movl $-1,%edx
- xchgl %edx,%eax
- lock
- xaddl %eax, (%edx)
- decl %eax
- end;
- function InterLockedIncrement (var Target: longint) : longint; assembler;
- asm
- movl $1,%edx
- xchgl %edx,%eax
- lock
- xaddl %eax, (%edx)
- incl %eax
- end;
- function InterLockedExchange (var Target: longint;Source : longint) : longint; assembler;
- asm
- xchgl (%eax),%edx
- movl %edx,%eax
- end;
- function InterLockedExchangeAdd (var Target: longint;Source : longint) : longint; assembler;
- asm
- xchgl %eax,%edx
- lock
- xaddl %eax, (%edx)
- end;
- function InterlockedCompareExchange(var Target: longint; NewValue: longint; Comperand: longint): longint; assembler;
- asm
- xchgl %eax,%ecx
- lock
- cmpxchgl %edx, (%ecx)
- end;
- function InterlockedCompareExchange64(var Target: int64; NewValue: int64; Comperand: int64): int64; assembler;
- asm
- pushl %ebx
- pushl %edi
- movl %eax,%edi
- movl Comperand+4,%edx
- movl Comperand+0,%eax
- movl NewValue+4,%ecx
- movl NewValue+0,%ebx
- lock cmpxchg8b (%edi)
- pop %edi
- pop %ebx
- end;
- {****************************************************************************
- FPU
- ****************************************************************************}
- const
- { Internal constants for use in system unit }
- FPU_Invalid = 1;
- FPU_Denormal = 2;
- FPU_DivisionByZero = 4;
- FPU_Overflow = 8;
- FPU_Underflow = $10;
- FPU_StackUnderflow = $20;
- FPU_StackOverflow = $40;
- FPU_ExceptionMask = $ff;
- MM_Invalid = 1;
- MM_Denormal = 2;
- MM_DivisionByZero = 4;
- MM_Overflow = 8;
- MM_Underflow = $10;
- MM_Precicion = $20;
- MM_ExceptionMask = $3f;
- MM_MaskInvalidOp = %0000000010000000;
- MM_MaskDenorm = %0000000100000000;
- MM_MaskDivZero = %0000001000000000;
- MM_MaskOverflow = %0000010000000000;
- MM_MaskUnderflow = %0000100000000000;
- MM_MaskPrecision = %0001000000000000;
- {$define FPC_SYSTEM_HAS_SYSINITFPU}
- Procedure SysInitFPU;
- begin
- end;
- {$define FPC_SYSTEM_HAS_SYSRESETFPU}
- Procedure SysResetFPU;
- var
- { these locals are so we don't have to hack pic code in the assembler }
- localmxcsr: dword;
- localfpucw: word;
- begin
- localfpucw:=Default8087CW;
- asm
- fninit
- fwait
- fldcw localfpucw
- end;
- if has_sse_support then
- begin
- localmxcsr:=DefaultMXCSR;
- asm
- { setup sse exceptions }
- {$ifndef OLD_ASSEMBLER}
- ldmxcsr localmxcsr
- {$else OLD_ASSEMBLER}
- mov localmxcsr,%eax
- subl $4,%esp
- mov %eax,(%esp)
- //ldmxcsr (%esp)
- .byte 0x0f,0xae,0x14,0x24
- addl $4,%esp
- {$endif OLD_ASSEMBLER}
- end;
- end;
- end;
- { because of the brain dead sse detection on x86, this test is post poned }
- procedure fpc_cpucodeinit;
- var
- _eax,_ecx_cpuid1,_edx_cpuid1,_ebx_cpuid7 : longint;
- begin
- if cpuid_support then
- begin
- asm
- movl $1,%eax
- xorl %ecx,%ecx
- cpuid
- movl %edx,_edx_cpuid1
- movl %ecx,_ecx_cpuid1
- end ['ebx'];
- has_mmx_support:=(_edx_cpuid1 and $800000)<>0;
- if ((_edx_cpuid1 and $2000000)<>0) then
- begin
- os_supports_sse:=true;
- sse_check:=true;
- asm
- { force an sse exception if no sse is supported, the exception handler sets
- os_supports_sse to false then }
- { don't change this instruction, the code above depends on its size }
- {$ifdef OLD_ASSEMBLER}
- .byte 0x0f,0x28,0xf7
- {$else}
- movaps %xmm7, %xmm6
- {$endif not EMX}
- end;
- sse_check:=false;
- has_sse_support:=os_supports_sse;
- end;
- if has_sse_support then
- begin
- has_sse2_support:=((_edx_cpuid1 and $4000000)<>0);
- has_sse3_support:=((_ecx_cpuid1 and $200)<>0);
- { now avx }
- asm
- xorl %eax,%eax
- cpuid
- movl %eax,_eax
- end;
- if _eax>=7 then
- begin
- asm
- movl $7,%eax
- xorl %ecx,%ecx
- cpuid
- movl %ebx,_ebx_cpuid7
- end;
- fast_large_repmovstosb:=_ebx_cpuid7 and (1 shl 9)<>0;
- if (_ecx_cpuid1 and $08000000<>0 {XGETBV support?}) then
- begin
- asm
- xorl %ecx,%ecx
- .byte 0x0f,0x01,0xd0 { xgetbv }
- movl %eax,_eax
- end;
- if (_eax and 6)=6 then
- begin
- has_avx_support:=(_ecx_cpuid1 and $10000000)<>0;
- has_avx2_support:=(_ebx_cpuid7 and $20)<>0;
- end;
- end;
- end;
- end;
- end;
- { don't let libraries influence the FPU cw set by the host program }
- if IsLibrary then
- begin
- Default8087CW:=Get8087CW;
- if has_sse_support then
- DefaultMXCSR:=GetMXCSR;
- end;
- SysResetFPU;
- fpc_cpucodeinit_performed:=true;
- end;
- {$if not defined(darwin) and defined(regcall) }
- { darwin requires that the stack is aligned to 16 bytes when calling another function }
- {$ifdef FPC_HAS_FEATURE_ANSISTRINGS}
- {$define FPC_SYSTEM_HAS_ANSISTR_DECR_REF}
- Procedure fpc_AnsiStr_Decr_Ref (Var S : Pointer); [Public,Alias:'FPC_ANSISTR_DECR_REF']; compilerproc; nostackframe; assembler;
- asm
- movl (%eax),%edx
- testl %edx,%edx
- jz .Lquit
- movl $0,(%eax) // s:=nil
- cmpl $0,-8(%edx) // exit if refcount<0
- jl .Lquit
- {$ifdef FPC_PIC}
- call fpc_geteipasecx
- addl $_GLOBAL_OFFSET_TABLE_,%ecx
- movl ismultithread@GOT(%ecx),%ecx
- cmpl $0,(%ecx)
- {$else FPC_PIC}
- cmpl $0,ismultithread
- {$endif FPC_PIC}
- je .Lskiplock
- .byte 0xF0 // LOCK prefix, jumped over if IsMultiThread = false. FPC assembler does not accept disjoint LOCK mnemonic.
- .Lskiplock:
- decl -8(%edx)
- jz .Lfree
- .Lquit:
- ret
- .Lfree:
- leal -12(%edx),%eax // points to start of allocation
- { freemem is not an assembler leaf function like fpc_geteipasecx, so it
- needs to be called with proper stack alignment }
- {$ifdef FPC_SYSTEM_STACKALIGNMENT16}
- leal -12(%esp),%esp
- call FPC_FREEMEM
- leal 12(%esp),%esp
- {$else FPC_SYSTEM_STACKALIGNMENT16}
- jmp FPC_FREEMEM // can perform a tail call
- {$endif FPC_SYSTEM_STACKALIGNMENT16}
- end;
- function fpc_truely_ansistr_unique(Var S : Pointer): Pointer; forward;
- {$define FPC_SYSTEM_HAS_ANSISTR_UNIQUE}
- Function fpc_ansistr_Unique(Var S : Pointer): Pointer; [Public,Alias : 'FPC_ANSISTR_UNIQUE']; compilerproc; nostackframe;assembler;
- asm
- // Var S located in register
- // Var $result located in register
- movl %eax,%edx
- // [437] pointer(result) := pointer(s);
- movl (%eax),%eax
- // [438] If Pointer(S)=Nil then
- testl %eax,%eax
- je .Lj4031
- .Lj4036:
- // [440] if PAnsiRec(Pointer(S)-Firstoff)^.Ref<>1 then
- movl -8(%eax),%ecx
- cmpl $1,%ecx
- je .Lj4038
- // [441] result:=fpc_truely_ansistr_unique(s);
- movl %edx,%eax
- {$ifdef FPC_SYSTEM_STACKALIGNMENT16}
- leal -12(%esp),%esp
- {$endif FPC_SYSTEM_STACKALIGNMENT16}
- call fpc_truely_ansistr_unique
- {$ifdef FPC_SYSTEM_STACKALIGNMENT16}
- leal 12(%esp),%esp
- {$endif FPC_SYSTEM_STACKALIGNMENT16}
- .Lj4038:
- .Lj4031:
- // [442] end;
- end;
- {$endif FPC_HAS_FEATURE_ANSISTRINGS}
- {$endif ndef darwin and defined(regcall) }
- {$ifndef FPC_SYSTEM_HAS_MEM_BARRIER}
- {$define FPC_SYSTEM_HAS_MEM_BARRIER}
- procedure ReadBarrier;assembler;nostackframe;
- asm
- {$ifdef CPUX86_HAS_SSE2}
- lfence
- {$else CPUX86_HAS_SSE2}
- lock
- addl $0,0(%esp)
- {$endif CPUX86_HAS_SSE2}
- end;
- procedure ReadDependencyBarrier;{$ifdef SYSTEMINLINE}inline;{$endif}
- begin
- { reads imply barrier on earlier reads depended on }
- end;
- procedure ReadWriteBarrier;assembler;nostackframe;
- asm
- {$ifdef CPUX86_HAS_SSE2}
- mfence
- {$else CPUX86_HAS_SSE2}
- lock
- addl $0,0(%esp)
- {$endif CPUX86_HAS_SSE2}
- end;
- procedure WriteBarrier;assembler;nostackframe;
- asm
- {$ifdef CPUX86_HAS_SSEUNIT}
- sfence
- {$endif CPUX86_HAS_SSEUNIT}
- end;
- {$endif}
- {$ifndef FPC_SYSTEM_HAS_BSF_QWORD}
- {$define FPC_SYSTEM_HAS_BSF_QWORD}
- function BsfQWord(Const AValue : QWord): cardinal; assembler; nostackframe;
- asm
- bsfl 4(%esp),%eax
- jnz .L2
- .L1:
- bsfl 8(%esp),%eax
- jnz .L3
- movl $223,%eax
- .L3:
- addl $32,%eax
- .L2:
- end;
- {$endif FPC_SYSTEM_HAS_BSF_QWORD}
- {$ifndef FPC_SYSTEM_HAS_BSR_QWORD}
- {$define FPC_SYSTEM_HAS_BSR_QWORD}
- function BsrQWord(Const AValue : QWord): cardinal; assembler; nostackframe;
- asm
- bsrl 8(%esp),%eax
- jz .L1
- add $32,%eax
- jmp .L2
- .L1:
- bsrl 4(%esp),%eax
- jnz .L2
- movl $255,%eax
- .L2:
- end;
- {$endif FPC_SYSTEM_HAS_BSR_QWORD}
- {$ifndef FPC_SYSTEM_HAS_SAR_QWORD}
- {$define FPC_SYSTEM_HAS_SAR_QWORD}
- function fpc_SarInt64(Const AValue : Int64;const Shift : Byte): Int64; [Public,Alias:'FPC_SARINT64']; compilerproc; assembler; nostackframe;
- asm
- movb %al,%cl
- movl 8(%esp),%edx
- movl 4(%esp),%eax
- andb $63,%cl
- cmpb $32,%cl
- jnb .L1
- shrdl %cl,%edx,%eax
- sarl %cl,%edx
- jmp .Lexit
- .L1:
- movl %edx,%eax
- sarl $31,%edx
- andb $31,%cl
- sarl %cl,%eax
- .Lexit:
- end;
- {$endif FPC_SYSTEM_HAS_SAR_QWORD}
|