| 12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511151215131514151515161517151815191520152115221523152415251526152715281529153015311532153315341535153615371538153915401541154215431544154515461547154815491550155115521553155415551556155715581559156015611562156315641565156615671568156915701571157215731574157515761577157815791580158115821583158415851586158715881589159015911592159315941595159615971598159916001601160216031604160516061607160816091610161116121613161416151616161716181619162016211622162316241625162616271628162916301631163216331634163516361637163816391640164116421643164416451646164716481649165016511652165316541655165616571658165916601661166216631664166516661667166816691670167116721673167416751676167716781679168016811682168316841685168616871688168916901691169216931694169516961697169816991700170117021703170417051706170717081709171017111712171317141715171617171718171917201721172217231724172517261727172817291730173117321733173417351736173717381739174017411742174317441745174617471748174917501751175217531754175517561757175817591760176117621763176417651766176717681769177017711772177317741775177617771778177917801781178217831784178517861787178817891790179117921793179417951796179717981799180018011802180318041805180618071808180918101811181218131814181518161817181818191820182118221823182418251826182718281829183018311832183318341835183618371838183918401841184218431844184518461847184818491850185118521853185418551856185718581859186018611862186318641865186618671868186918701871187218731874187518761877187818791880188118821883188418851886188718881889189018911892189318941895189618971898189919001901190219031904190519061907190819091910191119121913191419151916191719181919192019211922192319241925192619271928192919301931193219331934193519361937193819391940194119421943194419451946194719481949195019511952195319541955195619571958195919601961196219631964196519661967196819691970197119721973197419751976197719781979198019811982198319841985198619871988198919901991199219931994199519961997199819992000200120022003200420052006200720082009201020112012201320142015201620172018201920202021202220232024202520262027202820292030203120322033203420352036203720382039204020412042204320442045204620472048204920502051205220532054205520562057205820592060206120622063206420652066206720682069207020712072207320742075207620772078207920802081208220832084208520862087208820892090209120922093209420952096209720982099210021012102210321042105210621072108210921102111211221132114211521162117211821192120212121222123212421252126212721282129213021312132213321342135213621372138213921402141214221432144214521462147214821492150215121522153215421552156215721582159216021612162216321642165216621672168216921702171217221732174217521762177217821792180218121822183218421852186218721882189219021912192219321942195219621972198219922002201220222032204220522062207220822092210221122122213221422152216221722182219222022212222222322242225222622272228222922302231223222332234223522362237223822392240224122422243224422452246224722482249225022512252225322542255225622572258225922602261226222632264226522662267226822692270227122722273227422752276227722782279228022812282228322842285228622872288228922902291229222932294229522962297229822992300230123022303230423052306230723082309231023112312231323142315231623172318231923202321232223232324232523262327232823292330233123322333233423352336233723382339234023412342234323442345234623472348234923502351235223532354235523562357235823592360236123622363236423652366236723682369237023712372237323742375237623772378237923802381238223832384238523862387238823892390239123922393239423952396239723982399240024012402240324042405240624072408240924102411241224132414241524162417241824192420242124222423242424252426242724282429243024312432243324342435243624372438243924402441244224432444244524462447244824492450245124522453245424552456245724582459246024612462246324642465246624672468246924702471247224732474247524762477247824792480248124822483248424852486248724882489249024912492249324942495249624972498249925002501250225032504250525062507250825092510251125122513251425152516251725182519252025212522252325242525252625272528252925302531253225332534253525362537253825392540254125422543254425452546254725482549255025512552255325542555255625572558255925602561256225632564256525662567256825692570257125722573257425752576257725782579258025812582258325842585258625872588258925902591259225932594259525962597259825992600260126022603260426052606260726082609261026112612261326142615261626172618261926202621262226232624262526262627262826292630263126322633263426352636263726382639264026412642264326442645264626472648264926502651265226532654265526562657265826592660266126622663266426652666266726682669267026712672267326742675267626772678267926802681268226832684268526862687268826892690269126922693269426952696269726982699270027012702270327042705270627072708270927102711271227132714271527162717271827192720272127222723272427252726272727282729273027312732273327342735273627372738273927402741274227432744274527462747274827492750275127522753275427552756275727582759276027612762276327642765276627672768276927702771277227732774277527762777277827792780278127822783278427852786278727882789279027912792279327942795279627972798279928002801280228032804280528062807280828092810281128122813281428152816281728182819282028212822282328242825282628272828282928302831283228332834283528362837283828392840284128422843284428452846284728482849285028512852285328542855285628572858285928602861286228632864286528662867286828692870287128722873287428752876287728782879288028812882288328842885288628872888288928902891289228932894289528962897289828992900290129022903290429052906290729082909291029112912291329142915291629172918291929202921292229232924292529262927292829292930293129322933293429352936293729382939294029412942294329442945294629472948294929502951295229532954295529562957295829592960296129622963296429652966296729682969297029712972297329742975297629772978297929802981298229832984298529862987298829892990299129922993299429952996299729982999300030013002300330043005300630073008300930103011301230133014301530163017301830193020302130223023302430253026302730283029303030313032303330343035303630373038303930403041304230433044304530463047304830493050305130523053305430553056305730583059306030613062306330643065306630673068306930703071307230733074307530763077307830793080308130823083308430853086308730883089309030913092309330943095309630973098309931003101310231033104310531063107310831093110311131123113311431153116311731183119312031213122312331243125312631273128312931303131313231333134313531363137313831393140314131423143314431453146314731483149315031513152315331543155315631573158315931603161316231633164316531663167316831693170317131723173317431753176317731783179318031813182318331843185318631873188318931903191319231933194319531963197319831993200320132023203320432053206320732083209321032113212321332143215321632173218321932203221322232233224322532263227322832293230323132323233323432353236323732383239324032413242324332443245324632473248324932503251325232533254325532563257325832593260326132623263326432653266326732683269327032713272327332743275327632773278327932803281328232833284328532863287328832893290329132923293329432953296329732983299330033013302330333043305330633073308330933103311331233133314331533163317331833193320332133223323332433253326332733283329333033313332333333343335333633373338333933403341334233433344334533463347334833493350335133523353335433553356335733583359336033613362336333643365336633673368336933703371337233733374337533763377337833793380338133823383338433853386338733883389339033913392339333943395339633973398339934003401340234033404340534063407340834093410341134123413341434153416341734183419342034213422342334243425342634273428342934303431343234333434343534363437343834393440344134423443344434453446344734483449345034513452345334543455345634573458345934603461346234633464346534663467346834693470347134723473347434753476347734783479348034813482348334843485348634873488348934903491349234933494349534963497349834993500350135023503350435053506350735083509351035113512351335143515351635173518351935203521352235233524352535263527352835293530353135323533353435353536353735383539354035413542354335443545354635473548354935503551355235533554355535563557355835593560356135623563356435653566356735683569357035713572357335743575357635773578357935803581358235833584358535863587358835893590359135923593359435953596359735983599360036013602360336043605360636073608360936103611361236133614361536163617361836193620362136223623362436253626362736283629363036313632363336343635363636373638363936403641364236433644364536463647364836493650365136523653365436553656365736583659366036613662366336643665366636673668366936703671367236733674367536763677367836793680368136823683368436853686368736883689369036913692369336943695369636973698369937003701370237033704370537063707370837093710371137123713371437153716371737183719372037213722372337243725372637273728372937303731373237333734373537363737373837393740374137423743374437453746374737483749375037513752375337543755375637573758375937603761376237633764376537663767376837693770377137723773377437753776377737783779378037813782378337843785378637873788378937903791379237933794379537963797379837993800380138023803380438053806380738083809381038113812381338143815381638173818381938203821382238233824382538263827382838293830383138323833383438353836383738383839384038413842384338443845384638473848384938503851385238533854385538563857385838593860386138623863386438653866386738683869387038713872387338743875387638773878387938803881388238833884388538863887388838893890389138923893389438953896389738983899390039013902390339043905390639073908390939103911391239133914391539163917391839193920392139223923392439253926392739283929393039313932393339343935393639373938393939403941394239433944394539463947394839493950395139523953395439553956395739583959396039613962396339643965396639673968396939703971397239733974397539763977397839793980398139823983398439853986398739883989399039913992399339943995399639973998399940004001400240034004400540064007400840094010401140124013401440154016401740184019402040214022402340244025402640274028402940304031403240334034403540364037403840394040404140424043404440454046404740484049405040514052405340544055405640574058405940604061406240634064406540664067406840694070407140724073407440754076407740784079408040814082408340844085408640874088408940904091409240934094409540964097409840994100410141024103410441054106410741084109411041114112411341144115411641174118411941204121412241234124412541264127412841294130413141324133413441354136413741384139414041414142414341444145414641474148414941504151415241534154415541564157415841594160416141624163416441654166416741684169417041714172417341744175417641774178417941804181418241834184418541864187418841894190419141924193419441954196419741984199420042014202420342044205420642074208420942104211421242134214421542164217421842194220422142224223422442254226422742284229423042314232423342344235423642374238423942404241424242434244424542464247424842494250425142524253425442554256425742584259426042614262426342644265426642674268426942704271427242734274427542764277427842794280428142824283428442854286428742884289429042914292429342944295429642974298429943004301430243034304430543064307430843094310431143124313431443154316431743184319432043214322432343244325432643274328432943304331433243334334433543364337433843394340434143424343434443454346434743484349435043514352435343544355435643574358435943604361436243634364436543664367436843694370437143724373437443754376437743784379438043814382438343844385438643874388438943904391439243934394439543964397439843994400440144024403440444054406440744084409441044114412441344144415441644174418441944204421442244234424442544264427442844294430443144324433443444354436443744384439444044414442444344444445444644474448444944504451445244534454445544564457445844594460446144624463446444654466446744684469447044714472447344744475447644774478447944804481448244834484448544864487448844894490449144924493449444954496449744984499450045014502450345044505450645074508450945104511451245134514451545164517451845194520452145224523452445254526452745284529453045314532453345344535453645374538453945404541454245434544454545464547454845494550455145524553455445554556455745584559456045614562456345644565456645674568456945704571457245734574457545764577457845794580458145824583458445854586458745884589459045914592459345944595459645974598459946004601460246034604460546064607460846094610461146124613461446154616461746184619462046214622462346244625462646274628462946304631463246334634463546364637463846394640464146424643464446454646464746484649465046514652465346544655465646574658465946604661466246634664466546664667466846694670467146724673467446754676467746784679468046814682468346844685 | 
{*===============================================================================The original notice of the softfloat package is shown below. The conversionto pascal was done by Carl Eric Codere in 2002 ([email protected]).===============================================================================This C source file is part of the SoftFloat IEC/IEEE Floating-PointArithmetic Package, Release 2a.Written by John R. Hauser.  This work was made possible in part by theInternational Computer Science Institute, located at Suite 600, 1947 CenterStreet, Berkeley, California 94704.  Funding was partially provided by theNational Science Foundation under grant MIP-9311980.  The original versionof this code was written as part of a project to build a fixed-point vectorprocessor in collaboration with the University of California at Berkeley,overseen by Profs. Nelson Morgan and John Wawrzynek.  More informationis available through the Web page`http://HTTP.CS.Berkeley.EDU/~jhauser/arithmetic/SoftFloat.html'.THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE.  Although reasonable efforthas been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL ATTIMES RESULT IN INCORRECT BEHAVIOR.  USE OF THIS SOFTWARE IS RESTRICTED TOPERSONS AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ANYAND ALL LOSSES, COSTS, OR OTHER PROBLEMS ARISING FROM ITS USE.Derivative works are acceptable, even for commercial purposes, so long as(1) they include prominent notice that the work is derivative, and (2) theyinclude prominent notice akin to these four paragraphs for those parts ofthis code that are retained.===============================================================================*}unit softfpu;{ Overflow checking must be disabled,  since some operations expect overflow!}{$Q-}{$ifndef ver1_0}{$ifdef fpc}{$define hascompilerproc}{$endif}{$endif}{$ifdef fpc}{$goto on}{$endif}interface{-------------------------------------------------------------------------------Software IEC/IEEE floating-point types.-------------------------------------------------------------------------------}TYPE  float32 = longword;  flag = byte;  uint8 = byte;  int8 = shortint;  uint16 = word;  int16 = integer;  uint32 = longword;  int32 = longint;  bits8 = byte;  sbits8 = shortint;  bits16 = word;  sbits16 = integer;  sbits32 = longint;  bits32 = longword;{$ifndef fpc}  qword = int64;{$endif}  uint64 = qword;  bits64 = qword;  sbits64 = int64;{$ifdef ENDIAN_LITTLE}  float64 = packed record    low: bits32;    high: bits32;  end;  int64rec = packed record    low: bits32;    high: bits32;  end;{$else} float64 = packed record   high,low : bits32; end; int64rec = packed record   high,low : bits32; end;{$endif}{*-------------------------------------------------------------------------------Returns 1 if the double-precision floating-point value `a' is less thanthe corresponding value `b', and 0 otherwise.  The comparison is performedaccording to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.-------------------------------------------------------------------------------*}Function float64_lt(a: float64;b: float64): flag; {$ifdef hascompilerproc} compilerproc; {$endif}{*-------------------------------------------------------------------------------Returns 1 if the double-precision floating-point value `a' is less thanor equal to the corresponding value `b', and 0 otherwise.  The comparisonis performed according to the IEC/IEEE Standard for Binary Floating-PointArithmetic.-------------------------------------------------------------------------------*}Function float64_le(a: float64;b: float64): flag; {$ifdef hascompilerproc} compilerproc; {$endif}{*-------------------------------------------------------------------------------Returns 1 if the double-precision floating-point value `a' is equal tothe corresponding value `b', and 0 otherwise.  The comparison is performedaccording to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.-------------------------------------------------------------------------------*}Function float64_eq(a: float64;b: float64): flag; {$ifdef hascompilerproc} compilerproc; {$endif}{*-------------------------------------------------------------------------------Returns the square root of the double-precision floating-point value `a'.The operation is performed according to the IEC/IEEE Standard for BinaryFloating-Point Arithmetic.-------------------------------------------------------------------------------*}Procedure float64_sqrt( a: float64; var out: float64 ); {$ifdef hascompilerproc} compilerproc; {$endif}{*-------------------------------------------------------------------------------Returns the remainder of the double-precision floating-point value `a'with respect to the corresponding value `b'.  The operation is performedaccording to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.-------------------------------------------------------------------------------*}Procedure float64_rem(a: float64; b : float64; var out: float64); {$ifdef hascompilerproc} compilerproc; {$endif}{*-------------------------------------------------------------------------------Returns the result of dividing the double-precision floating-point value `a'by the corresponding value `b'.  The operation is performed according to theIEC/IEEE Standard for Binary Floating-Point Arithmetic.-------------------------------------------------------------------------------*}Procedure float64_div(a: float64; b : float64 ; var out: float64 ); {$ifdef hascompilerproc} compilerproc; {$endif}{*-------------------------------------------------------------------------------Returns the result of multiplying the double-precision floating-point values`a' and `b'.  The operation is performed according to the IEC/IEEE Standardfor Binary Floating-Point Arithmetic.-------------------------------------------------------------------------------*}Procedure float64_mul( a: float64; b:float64; Var out: float64); {$ifdef hascompilerproc} compilerproc; {$endif}{*-------------------------------------------------------------------------------Returns the result of subtracting the double-precision floating-point values`a' and `b'.  The operation is performed according to the IEC/IEEE Standardfor Binary Floating-Point Arithmetic.-------------------------------------------------------------------------------*}Procedure float64_sub(a: float64; b : float64; var out: float64); {$ifdef hascompilerproc} compilerproc; {$endif}{*-------------------------------------------------------------------------------Returns the result of adding the double-precision floating-point values `a'and `b'.  The operation is performed according to the IEC/IEEE Standard forBinary Floating-Point Arithmetic.-------------------------------------------------------------------------------*}Procedure float64_add( a: float64; b : float64; Var out : float64); {$ifdef hascompilerproc} compilerproc; {$endif}{*-------------------------------------------------------------------------------Rounds the double-precision floating-point value `a' to an integer,and returns the result as a double-precision floating-point value.  Theoperation is performed according to the IEC/IEEE Standard for BinaryFloating-Point Arithmetic.-------------------------------------------------------------------------------*}Procedure float64_round_to_int(a: float64; var out: float64 ); {$ifdef hascompilerproc} compilerproc; {$endif}{*-------------------------------------------------------------------------------Returns the result of converting the double-precision floating-point value`a' to the single-precision floating-point format.  The conversion isperformed according to the IEC/IEEE Standard for Binary Floating-PointArithmetic.-------------------------------------------------------------------------------*}Function float64_to_float32(a: float64 ): float32; {$ifdef hascompilerproc} compilerproc; {$endif}{*-------------------------------------------------------------------------------Returns the result of converting the double-precision floating-point value`a' to the 32-bit two's complement integer format.  The conversion isperformed according to the IEC/IEEE Standard for Binary Floating-PointArithmetic, except that the conversion is always rounded toward zero.If `a' is a NaN, the largest positive integer is returned.  Otherwise, ifthe conversion overflows, the largest integer with the same sign as `a' isreturned.-------------------------------------------------------------------------------*}Function float64_to_int32_round_to_zero(a: float64 ): int32; {$ifdef hascompilerproc} compilerproc; {$endif}{*-------------------------------------------------------------------------------Returns the result of converting the double-precision floating-point value`a' to the 32-bit two's complement integer format.  The conversion isperformed according to the IEC/IEEE Standard for Binary Floating-PointArithmetic---which means in particular that the conversion is roundedaccording to the current rounding mode.  If `a' is a NaN, the largestpositive integer is returned.  Otherwise, if the conversion overflows, thelargest integer with the same sign as `a' is returned.-------------------------------------------------------------------------------*}Function float64_to_int32(a: float64): int32; {$ifdef hascompilerproc} compilerproc; {$endif}{*-------------------------------------------------------------------------------Returns 1 if the single-precision floating-point value `a' is less thanthe corresponding value `b', and 0 otherwise.  The comparison is performedaccording to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.-------------------------------------------------------------------------------*}Function float32_lt( a:float32 ; b : float32): flag; {$ifdef hascompilerproc} compilerproc; {$endif}{*-------------------------------------------------------------------------------Returns 1 if the single-precision floating-point value `a' is less thanor equal to the corresponding value `b', and 0 otherwise.  The comparisonis performed according to the IEC/IEEE Standard for Binary Floating-PointArithmetic.-------------------------------------------------------------------------------*}Function float32_le( a: float32; b : float32 ):flag; {$ifdef hascompilerproc} compilerproc; {$endif}{*-------------------------------------------------------------------------------Returns 1 if the single-precision floating-point value `a' is equal tothe corresponding value `b', and 0 otherwise.  The comparison is performedaccording to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.-------------------------------------------------------------------------------*}Function float32_eq( a:float32; b:float32): flag; {$ifdef hascompilerproc} compilerproc; {$endif}{*-------------------------------------------------------------------------------Returns the square root of the single-precision floating-point value `a'.The operation is performed according to the IEC/IEEE Standard for BinaryFloating-Point Arithmetic.-------------------------------------------------------------------------------*}Function float32_sqrt(a: float32 ): float32; {$ifdef hascompilerproc} compilerproc; {$endif}{*-------------------------------------------------------------------------------Returns the remainder of the single-precision floating-point value `a'with respect to the corresponding value `b'.  The operation is performedaccording to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.-------------------------------------------------------------------------------*}Function float32_rem(a: float32; b: float32 ):float32; {$ifdef hascompilerproc} compilerproc; {$endif}{*-------------------------------------------------------------------------------Returns the result of dividing the single-precision floating-point value `a'by the corresponding value `b'.  The operation is performed according to theIEC/IEEE Standard for Binary Floating-Point Arithmetic.-------------------------------------------------------------------------------*}Function float32_div(a: float32;b: float32 ): float32; {$ifdef hascompilerproc} compilerproc; {$endif}{*-------------------------------------------------------------------------------Returns the result of multiplying the single-precision floating-point values`a' and `b'.  The operation is performed according to the IEC/IEEE Standardfor Binary Floating-Point Arithmetic.-------------------------------------------------------------------------------*}Function float32_mul(a: float32; b: float32 ) : float32; {$ifdef hascompilerproc} compilerproc; {$endif}{*-------------------------------------------------------------------------------Returns the result of subtracting the single-precision floating-point values`a' and `b'.  The operation is performed according to the IEC/IEEE Standardfor Binary Floating-Point Arithmetic.-------------------------------------------------------------------------------*}Function float32_sub( a: float32 ; b:float32 ): float32; {$ifdef hascompilerproc} compilerproc; {$endif}{*-------------------------------------------------------------------------------Returns the result of adding the single-precision floating-point values `a'and `b'.  The operation is performed according to the IEC/IEEE Standard forBinary Floating-Point Arithmetic.-------------------------------------------------------------------------------*}Function float32_add( a: float32; b:float32 ): float32; {$ifdef hascompilerproc} compilerproc; {$endif}{*-------------------------------------------------------------------------------Rounds the single-precision floating-point value `a' to an integer,and returns the result as a single-precision floating-point value.  Theoperation is performed according to the IEC/IEEE Standard for BinaryFloating-Point Arithmetic.-------------------------------------------------------------------------------*}Function float32_round_to_int( a: float32): float32; {$ifdef hascompilerproc} compilerproc; {$endif}{*-------------------------------------------------------------------------------Returns the result of converting the single-precision floating-point value`a' to the double-precision floating-point format.  The conversion isperformed according to the IEC/IEEE Standard for Binary Floating-PointArithmetic.-------------------------------------------------------------------------------*}Procedure float32_to_float64( a : float32; var out: Float64); {$ifdef hascompilerproc} compilerproc; {$endif}{*-------------------------------------------------------------------------------Returns the result of converting the single-precision floating-point value`a' to the 32-bit two's complement integer format.  The conversion isperformed according to the IEC/IEEE Standard for Binary Floating-PointArithmetic, except that the conversion is always rounded toward zero.If `a' is a NaN, the largest positive integer is returned.  Otherwise, ifthe conversion overflows, the largest integer with the same sign as `a' isreturned.-------------------------------------------------------------------------------*}Function float32_to_int32_round_to_zero( a: Float32 ): int32; {$ifdef hascompilerproc} compilerproc; {$endif}{*-------------------------------------------------------------------------------Returns the result of converting the single-precision floating-point value`a' to the 32-bit two's complement integer format.  The conversion isperformed according to the IEC/IEEE Standard for Binary Floating-PointArithmetic---which means in particular that the conversion is roundedaccording to the current rounding mode.  If `a' is a NaN, the largestpositive integer is returned.  Otherwise, if the conversion overflows, thelargest integer with the same sign as `a' is returned.-------------------------------------------------------------------------------*}Function float32_to_int32( a : float32) : int32; {$ifdef hascompilerproc} compilerproc; {$endif}{*-------------------------------------------------------------------------------Returns the result of converting the 32-bit two's complement integer `a' tothe double-precision floating-point format.  The conversion is performedaccording to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.-------------------------------------------------------------------------------*}Procedure int32_to_float64( a: int32; var c: float64 ); {$ifdef hascompilerproc} compilerproc; {$endif}{*-------------------------------------------------------------------------------Returns the result of converting the 32-bit two's complement integer `a' tothe single-precision floating-point format.  The conversion is performedaccording to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.-------------------------------------------------------------------------------*}Function int32_to_float32( a: int32): float32; {$ifdef hascompilerproc} compilerproc; {$endif}{*----------------------------------------------------------------------------| Returns the result of converting the 64-bit two's complement integer `a'| to the double-precision floating-point format.  The conversion is performed| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.*----------------------------------------------------------------------------*}function int64_to_float64( a: int64 ): float64; {$ifdef hascompilerproc} compilerproc; {$endif}{*----------------------------------------------------------------------------| Returns the result of converting the 64-bit two's complement integer `a'| to the single-precision floating-point format.  The conversion is performed| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.*----------------------------------------------------------------------------*}function int64_to_float32( a: int64 ): float32; {$ifdef hascompilerproc} compilerproc; {$endif}CONST{-------------------------------------------------------------------------------Software IEC/IEEE floating-point underflow tininess-detection mode.-------------------------------------------------------------------------------*}    float_tininess_after_rounding  = 0;    float_tininess_before_rounding = 1;{*-------------------------------------------------------------------------------Software IEC/IEEE floating-point rounding mode.-------------------------------------------------------------------------------*}{Round to nearest.This is the default mode. It should be used unless there is a specificneed for one of the others. In this mode results are rounded to thenearest representable value. If the result is midway between tworepresentable values, the even representable is chosen. Even heremeans the lowest-order bit is zero. This rounding mode preventsstatistical bias and guarantees numeric stability: round-off errorsin a lengthy calculation will remain smaller than half of FLT_EPSILON.Round toward plus Infinity.All results are rounded to the smallest representable value which isgreater than the result.Round toward minus Infinity.All results are rounded to the largest representable value which isless than the result.Round toward zero.All results are rounded to the largest representable value whosemagnitude is less than that of the result. In other words, if theresult is negative it is rounded up; if it is positive, it isrounded down.}    float_round_nearest_even = 0;    float_round_down         = 1;    float_round_up           = 2;    float_round_to_zero      = 3;{*-------------------------------------------------------------------------------Software IEC/IEEE floating-point exception flags.-------------------------------------------------------------------------------*}    float_flag_invalid   =  1;    float_flag_divbyzero =  4;    float_flag_overflow  =  8;    float_flag_underflow = 16;    float_flag_inexact   = 32;{*-------------------------------------------------------------------------------Floating-point rounding mode and exception flags.-------------------------------------------------------------------------------*}const float_rounding_mode : Byte = float_round_nearest_even; float_exception_flags : Byte = 0;{*-------------------------------------------------------------------------------Underflow tininess-detection mode, statically initialized to default value.(The declaration in `softfloat.h' must match the `int8' type here.)-------------------------------------------------------------------------------*}const float_detect_tininess: int8 = float_tininess_after_rounding;implementation{*-------------------------------------------------------------------------------Raises the exceptions specified by `flags'.  Floating-point traps can bedefined here if desired.  It is currently not possible for such a trapto substitute a result value.  If traps are not implemented, this routineshould be simply `float_exception_flags |= flags;'.-------------------------------------------------------------------------------*}procedure float_raise( i: shortint );Begin  float_exception_flags := float_exception_flags or i;  if (float_exception_flags and float_flag_invalid) <> 0 then     RunError(207)  else  if (float_exception_flags and float_flag_divbyzero) <> 0 then     RunError(200)  else  if (float_exception_flags and float_flag_overflow) <> 0 then     RunError(205)  else  if (float_exception_flags and float_flag_underflow) <> 0 then     RunError(206);end;(*****************************************************************************)(*----------------------------------------------------------------------------*)(* Primitive arithmetic functions, including multi-word arithmetic, and       *)(* division and square root approximations.  (Can be specialized to target if *)(* desired.)                                                                  *)(* ---------------------------------------------------------------------------*)(*****************************************************************************){*-------------------------------------------------------------------------------Shifts `a' right by the number of bits given in `count'.  If any nonzerobits are shifted off, they are ``jammed'' into the least significant bit ofthe result by setting the least significant bit to 1.  The value of `count'can be arbitrarily large; in particular, if `count' is greater than 32, theresult will be either 0 or 1, depending on whether `a' is zero or nonzero.The result is stored in the location pointed to by `zPtr'.-------------------------------------------------------------------------------*}Procedure shift32RightJamming( a: bits32 ; count: int16 ; VAR zPtr :bits32);var  z: Bits32;Begin    if ( count = 0 ) then        z := a   else    if ( count < 32 ) then    Begin        z := ( a shr count ) or bits32( (( a shl ( ( - count ) AND 31 )) ) <> 0);    End   else    Begin        z := bits32( a <> 0 );    End;    zPtr := z;End;{*-------------------------------------------------------------------------------Shifts the 64-bit value formed by concatenating `a0' and `a1' right by thenumber of bits given in `count'.  Any bits shifted off are lost.  The valueof `count' can be arbitrarily large; in particular, if `count' is greaterthan 64, the result will be 0.  The result is broken into two 32-bit pieceswhich are stored at the locations pointed to by `z0Ptr' and `z1Ptr'.-------------------------------------------------------------------------------*}Procedure shift64Right(     a0 :bits32; a1: bits32; count:int16; VAR z0Ptr:bits32; VAR z1Ptr:bits32);Var  z0, z1: bits32;  negCount : int8;Begin    negCount := ( - count ) AND 31;    if ( count = 0 ) then    Begin        z1 := a1;        z0 := a0;    End    else if ( count < 32 ) then    Begin        z1 := ( a0 shl negCount ) OR ( a1 shr count );        z0 := a0 shr count;    End   else    Begin        if (count < 64) then          z1 := ( a0 shr ( count AND 31 ) )        else          z1 := 0;        z0 := 0;    End;    z1Ptr := z1;    z0Ptr := z0;End;{*-------------------------------------------------------------------------------Shifts the 64-bit value formed by concatenating `a0' and `a1' right by thenumber of bits given in `count'.  If any nonzero bits are shifted off, theyare ``jammed'' into the least significant bit of the result by setting theleast significant bit to 1.  The value of `count' can be arbitrarily large;in particular, if `count' is greater than 64, the result will be either 0or 1, depending on whether the concatenation of `a0' and `a1' is zero ornonzero.  The result is broken into two 32-bit pieces which are stored atthe locations pointed to by `z0Ptr' and `z1Ptr'.-------------------------------------------------------------------------------*}Procedure shift64RightJamming(     a0:bits32; a1: bits32; count:int16; VAR Z0Ptr :bits32;VAR z1Ptr: bits32 );VAR    z0, z1 : bits32;    negCount : int8;Begin    negCount := ( - count ) AND 31;    if ( count = 0 ) then    Begin        z1 := a1;        z0 := a0;    End   else    if ( count < 32 ) then    Begin        z1 := ( a0 shl negCount ) OR ( a1 shr count ) OR bits32( ( a1 shl negCount ) <> 0 );        z0 := a0 shr count;    End   else    Begin        if ( count = 32 ) then        Begin            z1 := a0 OR bits32( a1 <> 0 );        End       else        if ( count < 64 ) Then        Begin            z1 := ( a0 shr ( count AND 31 ) ) OR bits32( ( ( a0 shl negCount ) OR a1 ) <> 0 );        End       else        Begin            z1 := bits32( ( a0 OR a1 ) <> 0 );        End;        z0 := 0;    End;    z1Ptr := z1;    z0Ptr := z0;End;{*-------------------------------------------------------------------------------Shifts the 96-bit value formed by concatenating `a0', `a1', and `a2' rightby 32 _plus_ the number of bits given in `count'.  The shifted result isat most 64 nonzero bits; these are broken into two 32-bit pieces which arestored at the locations pointed to by `z0Ptr' and `z1Ptr'.  The bits shiftedoff form a third 32-bit result as follows:  The _last_ bit shifted off isthe most-significant bit of the extra result, and the other 31 bits of theextra result are all zero if and only if _all_but_the_last_ bits shifted offwere all zero.  This extra result is stored in the location pointed to by`z2Ptr'.  The value of `count' can be arbitrarily large.    (This routine makes more sense if `a0', `a1', and `a2' are consideredto form a fixed-point value with binary point between `a1' and `a2'.  Thisfixed-point value is shifted right by the number of bits given in `count',and the integer part of the result is returned at the locations pointed toby `z0Ptr' and `z1Ptr'.  The fractional part of the result may be slightlycorrupted as described above, and is returned at the location pointed to by`z2Ptr'.)-------------------------------------------------------------------------------}Procedure shift64ExtraRightJamming(     a0: bits32;     a1: bits32;     a2: bits32;     count: int16;     VAR z0Ptr: bits32;     VAR z1Ptr: bits32;     VAR z2Ptr: bits32 );Var    z0, z1, z2: bits32;    negCount : int8;Begin    negCount := ( - count ) AND 31;    if ( count = 0 ) then    Begin        z2 := a2;        z1 := a1;        z0 := a0;    End   else    Begin        if ( count < 32 ) Then        Begin            z2 := a1 shl negCount;            z1 := ( a0 shl negCount ) OR ( a1 shr count );            z0 := a0 shr count;        End       else        Begin            if ( count = 32 ) then            Begin                z2 := a1;                z1 := a0;            End           else            Begin                a2 := a2 or a1;                if ( count < 64 ) then                Begin                    z2 := a0 shl negCount;                    z1 := a0 shr ( count AND 31 );                End               else                Begin                    if count = 64 then                       z2 := a0                    else                       z2 := bits32(a0 <> 0);                    z1 := 0;                End;            End;            z0 := 0;        End;        z2 := z2 or bits32( a2 <> 0 );    End;    z2Ptr := z2;    z1Ptr := z1;    z0Ptr := z0;End;{*-------------------------------------------------------------------------------Shifts the 64-bit value formed by concatenating `a0' and `a1' left by thenumber of bits given in `count'.  Any bits shifted off are lost.  The valueof `count' must be less than 32.  The result is broken into two 32-bitpieces which are stored at the locations pointed to by `z0Ptr' and `z1Ptr'.-------------------------------------------------------------------------------*}Procedure shortShift64Left(     a0:bits32; a1:bits32; count:int16; VAR z0Ptr:bits32; VAR z1Ptr:bits32 );Begin    z1Ptr := a1 shl count;    if count = 0 then      z0Ptr := a0    else      z0Ptr := ( a0 shl count ) OR ( a1 shr ( ( - count ) AND 31 ) );End;{*-------------------------------------------------------------------------------Shifts the 96-bit value formed by concatenating `a0', `a1', and `a2' leftby the number of bits given in `count'.  Any bits shifted off are lost.The value of `count' must be less than 32.  The result is broken into three32-bit pieces which are stored at the locations pointed to by `z0Ptr',`z1Ptr', and `z2Ptr'.-------------------------------------------------------------------------------*}Procedure shortShift96Left(     a0: bits32;     a1: bits32;     a2: bits32;     count: int16;     VAR z0Ptr: bits32;     VAR z1Ptr: bits32;     VAR z2Ptr: bits32 );Var    z0, z1, z2: bits32;    negCount: int8;Begin    z2 := a2 shl count;    z1 := a1 shl count;    z0 := a0 shl count;    if ( 0 < count ) then    Begin        negCount := ( ( - count ) AND 31 );        z1 := z1 or (a2 shr negCount);        z0 := z0 or (a1 shr negCount);    End;    z2Ptr := z2;    z1Ptr := z1;    z0Ptr := z0;End;{*-------------------------------------------------------------------------------Adds the 64-bit value formed by concatenating `a0' and `a1' to the 64-bitvalue formed by concatenating `b0' and `b1'.  Addition is modulo 2^64, soany carry out is lost.  The result is broken into two 32-bit pieces whichare stored at the locations pointed to by `z0Ptr' and `z1Ptr'.-------------------------------------------------------------------------------*}Procedure add64(     a0:bits32; a1:bits32; b0:bits32; b1:bits32; VAR z0Ptr:bits32; VAR z1Ptr:bits32 );Var    z1: bits32;Begin    z1 := a1 + b1;    z1Ptr := z1;    z0Ptr := a0 + b0 + bits32( z1 < a1 );End;{*-------------------------------------------------------------------------------Adds the 96-bit value formed by concatenating `a0', `a1', and `a2' to the96-bit value formed by concatenating `b0', `b1', and `b2'.  Addition ismodulo 2^96, so any carry out is lost.  The result is broken into three32-bit pieces which are stored at the locations pointed to by `z0Ptr',`z1Ptr', and `z2Ptr'.-------------------------------------------------------------------------------*}Procedure add96(     a0: bits32;     a1: bits32;     a2: bits32;     b0: bits32;     b1: bits32;     b2: bits32;     VAR z0Ptr: bits32;     VAR z1Ptr: bits32;     VAR z2Ptr: bits32 );var    z0, z1, z2: bits32;    carry0, carry1: int8;Begin    z2 := a2 + b2;    carry1 := int8( z2 < a2 );    z1 := a1 + b1;    carry0 := int8( z1 < a1 );    z0 := a0 + b0;    z1 := z1 + carry1;    z0 := z0 + bits32( z1 < carry1 );    z0 := z0 + carry0;    z2Ptr := z2;    z1Ptr := z1;    z0Ptr := z0;End;{*-------------------------------------------------------------------------------Subtracts the 64-bit value formed by concatenating `b0' and `b1' from the64-bit value formed by concatenating `a0' and `a1'.  Subtraction is modulo2^64, so any borrow out (carry out) is lost.  The result is broken into two32-bit pieces which are stored at the locations pointed to by `z0Ptr' and`z1Ptr'.-------------------------------------------------------------------------------*}Procedure sub64(     a0: bits32; a1 : bits32; b0 :bits32; b1: bits32; VAR z0Ptr:bits32; VAR z1Ptr: bits32 );Begin    z1Ptr := a1 - b1;    z0Ptr := a0 - b0 - bits32( a1 < b1 );End;{*-------------------------------------------------------------------------------Subtracts the 96-bit value formed by concatenating `b0', `b1', and `b2' fromthe 96-bit value formed by concatenating `a0', `a1', and `a2'.  Subtractionis modulo 2^96, so any borrow out (carry out) is lost.  The result is brokeninto three 32-bit pieces which are stored at the locations pointed to by`z0Ptr', `z1Ptr', and `z2Ptr'.-------------------------------------------------------------------------------*}Procedure sub96(     a0:bits32;     a1:bits32;     a2:bits32;     b0:bits32;     b1:bits32;     b2:bits32;     VAR z0Ptr:bits32;     VAR z1Ptr:bits32;     VAR z2Ptr:bits32 );Var    z0, z1, z2: bits32;    borrow0, borrow1: int8;Begin    z2 := a2 - b2;    borrow1 := int8( a2 < b2 );    z1 := a1 - b1;    borrow0 := int8( a1 < b1 );    z0 := a0 - b0;    z0 := z0 - bits32( z1 < borrow1 );    z1 := z1 - borrow1;    z0 := z0 -borrow0;    z2Ptr := z2;    z1Ptr := z1;    z0Ptr := z0;End;{*-------------------------------------------------------------------------------Multiplies `a' by `b' to obtain a 64-bit product.  The product is brokeninto two 32-bit pieces which are stored at the locations pointed to by`z0Ptr' and `z1Ptr'.-------------------------------------------------------------------------------*}Procedure mul32To64( a:bits32; b:bits32; VAR z0Ptr: bits32; VAR z1Ptr:bits32 );Var    aHigh, aLow, bHigh, bLow: bits16;    z0, zMiddleA, zMiddleB, z1: bits32;Begin    aLow := a and $ffff;    aHigh := a shr 16;    bLow := b and $ffff;    bHigh := b shr 16;    z1 := ( bits32( aLow) ) * bLow;    zMiddleA := ( bits32 (aLow) ) * bHigh;    zMiddleB := ( bits32 (aHigh) ) * bLow;    z0 := ( bits32 (aHigh) ) * bHigh;    zMiddleA := zMiddleA + zMiddleB;    z0 := z0 + ( ( bits32 ( zMiddleA < zMiddleB ) ) shl 16 ) + ( zMiddleA shr 16 );    zMiddleA := zmiddleA shl 16;    z1 := z1 + zMiddleA;    z0 := z0 + bits32( z1 < zMiddleA );    z1Ptr := z1;    z0Ptr := z0;End;{*-------------------------------------------------------------------------------Multiplies the 64-bit value formed by concatenating `a0' and `a1' by `b'to obtain a 96-bit product.  The product is broken into three 32-bit pieceswhich are stored at the locations pointed to by `z0Ptr', `z1Ptr', and`z2Ptr'.-------------------------------------------------------------------------------*}Procedure mul64By32To96(     a0:bits32;     a1:bits32;     b:bits32;     VAR z0Ptr:bits32;     VAR z1Ptr:bits32;     VAR z2Ptr:bits32 );Var    z0, z1, z2, more1: bits32;Begin    mul32To64( a1, b, z1, z2 );    mul32To64( a0, b, z0, more1 );    add64( z0, more1, 0, z1, z0, z1 );    z2Ptr := z2;    z1Ptr := z1;    z0Ptr := z0;End;{*-------------------------------------------------------------------------------Multiplies the 64-bit value formed by concatenating `a0' and `a1' to the64-bit value formed by concatenating `b0' and `b1' to obtain a 128-bitproduct.  The product is broken into four 32-bit pieces which are stored atthe locations pointed to by `z0Ptr', `z1Ptr', `z2Ptr', and `z3Ptr'.-------------------------------------------------------------------------------*}Procedure mul64To128(     a0:bits32;     a1:bits32;     b0:bits32;     b1:bits32;     VAR z0Ptr:bits32;     VAR z1Ptr:bits32;     VAR z2Ptr:bits32;     VAR z3Ptr:bits32 );Var    z0, z1, z2, z3: bits32;    more1, more2: bits32;Begin    mul32To64( a1, b1, z2, z3 );    mul32To64( a1, b0, z1, more2 );    add64( z1, more2, 0, z2, z1, z2 );    mul32To64( a0, b0, z0, more1 );    add64( z0, more1, 0, z1, z0, z1 );    mul32To64( a0, b1, more1, more2 );    add64( more1, more2, 0, z2, more1, z2 );    add64( z0, z1, 0, more1, z0, z1 );    z3Ptr := z3;    z2Ptr := z2;    z1Ptr := z1;    z0Ptr := z0;End;{*-------------------------------------------------------------------------------Returns an approximation to the 32-bit integer quotient obtained by dividing`b' into the 64-bit value formed by concatenating `a0' and `a1'.  Thedivisor `b' must be at least 2^31.  If q is the exact quotient truncatedtoward zero, the approximation returned lies between q and q + 2 inclusive.If the exact quotient q is larger than 32 bits, the maximum positive 32-bitunsigned integer is returned.-------------------------------------------------------------------------------*}Function estimateDiv64To32( a0:bits32; a1: bits32; b:bits32): bits32;Var    b0, b1: bits32;    rem0, rem1, term0, term1: bits32;    z: bits32;Begin    if ( b <= a0 ) then    Begin       estimateDiv64To32 := $FFFFFFFF;       exit;    End;    b0 := b shr 16;    if ( b0 shl 16 <= a0 ) then       z:= $FFFF0000     else       z:= ( a0 div b0 ) shl 16;    mul32To64( b, z, term0, term1 );    sub64( a0, a1, term0, term1, rem0, rem1 );    while ( ( sbits32 (rem0) ) < 0 ) do    Begin        z := z - $10000;        b1 := b shl 16;        add64( rem0, rem1, b0, b1, rem0, rem1 );    End;    rem0 := ( rem0 shl 16 ) OR ( rem1 shr 16 );    if ( b0 shl 16 <= rem0 ) then      z := z or $FFFF    else      z := z or (rem0 div b0);    estimateDiv64To32 := z;End;{*-------------------------------------------------------------------------------Returns an approximation to the square root of the 32-bit significand givenby `a'.  Considered as an integer, `a' must be at least 2^31.  If bit 0 of`aExp' (the least significant bit) is 1, the integer returned approximates2^31*sqrt(`a'/2^31), where `a' is considered an integer.  If bit 0 of `aExp'is 0, the integer returned approximates 2^31*sqrt(`a'/2^30).  In eithercase, the approximation returned lies strictly within +/-2 of the exactvalue.-------------------------------------------------------------------------------*}Function estimateSqrt32( aExp: int16; a: bits32 ): bits32;    const sqrtOddAdjustments: array[0..15] of bits16 = (        $0004, $0022, $005D, $00B1, $011D, $019F, $0236, $02E0,        $039C, $0468, $0545, $0631, $072B, $0832, $0946, $0A67    );    const sqrtEvenAdjustments: array[0..15] of bits16 = (        $0A2D, $08AF, $075A, $0629, $051A, $0429, $0356, $029E,        $0200, $0179, $0109, $00AF, $0068, $0034, $0012, $0002    );Var    index: int8;    z: bits32;Begin    index := ( a shr 27 ) AND 15;    if ( aExp AND 1 ) <> 0  then    Begin        z := $4000 + ( a shr 17 ) - sqrtOddAdjustments[ index ];        z := ( ( a div z ) shl 14 ) + ( z shl 15 );        a := a shr 1;    End    else    Begin        z := $8000 + ( a shr 17 ) - sqrtEvenAdjustments[ index ];        z := a div z + z;        if ( $20000 <= z ) then          z := $FFFF8000        else          z := ( z shl 15 );        if ( z <= a ) then        Begin           estimateSqrt32 := bits32 ( ( sbits32 (a )) shr 1 );           exit;        End;    End;    estimateSqrt32 := ( ( estimateDiv64To32( a, 0, z ) ) shr 1 ) + ( z shr 1 );End;{*-------------------------------------------------------------------------------Returns the number of leading 0 bits before the most-significant 1 bit of`a'.  If `a' is zero, 32 is returned.-------------------------------------------------------------------------------*}Function countLeadingZeros32( a:bits32 ): int8;    const countLeadingZerosHigh:array[0..255] of int8 = (        8, 7, 6, 6, 5, 5, 5, 5, 4, 4, 4, 4, 4, 4, 4, 4,        3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,        2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,        2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0    );Var    shiftCount: int8;Begin    shiftCount := 0;    if ( a < $10000 ) then    Begin        shiftCount := shiftcount + 16;        a := a shl 16;    End;    if ( a < $1000000 ) then    Begin        shiftCount := shiftcount + 8;        a := a shl 8;    end;    shiftCount := shiftcount + countLeadingZerosHigh[ a shr 24 ];    countLeadingZeros32:= shiftCount;End;{*----------------------------------------------------------------------------| Returns the number of leading 0 bits before the most-significant 1 bit of| `a'.  If `a' is zero, 64 is returned.*----------------------------------------------------------------------------*}function countLeadingZeros64( a : bits64): int8;var shiftcount : int8;Begin    shiftCount := 0;    if ( a <  (bits64(1)  shl 32 )) then        shiftCount := shiftcount + 32    else        a := a shr 32;    shiftCount := shiftCount + countLeadingZeros32( a );    countLeadingZeros64:= shiftCount;End;{*-------------------------------------------------------------------------------Returns 1 if the 64-bit value formed by concatenating `a0' and `a1' isequal to the 64-bit value formed by concatenating `b0' and `b1'.  Otherwise,returns 0.-------------------------------------------------------------------------------*}Function eq64( a0: bits32; a1:bits32 ;b0:bits32; b1:bits32 ): flag;Begin    eq64 :=  flag( a0 = b0 ) and flag( a1 = b1 );End;{*-------------------------------------------------------------------------------Returns 1 if the 64-bit value formed by concatenating `a0' and `a1' is lessthan or equal to the 64-bit value formed by concatenating `b0' and `b1'.Otherwise, returns 0.-------------------------------------------------------------------------------*}Function le64( a0: bits32; a1:bits32 ;b0:bits32; b1:bits32 ): flag;Begin    le64:= flag( a0 < b0 ) or flag( ( a0 = b0 ) and ( a1 <= b1 ) );End;{*-------------------------------------------------------------------------------Returns 1 if the 64-bit value formed by concatenating `a0' and `a1' is lessthan the 64-bit value formed by concatenating `b0' and `b1'.  Otherwise,returns 0.-------------------------------------------------------------------------------*}Function lt64( a0: bits32; a1:bits32 ;b0:bits32; b1:bits32 ): flag;Begin    lt64 := flag( a0 < b0 ) or flag( ( a0 = b0 ) and ( a1 < b1 ) );End;{*-------------------------------------------------------------------------------Returns 1 if the 64-bit value formed by concatenating `a0' and `a1' is notequal to the 64-bit value formed by concatenating `b0' and `b1'.  Otherwise,returns 0.-------------------------------------------------------------------------------*}Function ne64( a0: bits32; a1:bits32 ;b0:bits32; b1:bits32 ): flag;Begin    ne64:= flag( a0 <> b0 ) or flag( a1 <> b1 );End;(*****************************************************************************)(*                      End Low-Level arithmetic                             *)(*****************************************************************************){*-------------------------------------------------------------------------------Functions and definitions to determine:  (1) whether tininess for underflowis detected before or after rounding by default, (2) what (if anything)happens when exceptions are raised, (3) how signaling NaNs are distinguishedfrom quiet NaNs, (4) the default generated quiet NaNs, and (4) how NaNsare propagated from function inputs to output.  These details are ENDIANspecific-------------------------------------------------------------------------------*}{$IFDEF ENDIAN_LITTLE}{*-------------------------------------------------------------------------------Internal canonical NaN format.-------------------------------------------------------------------------------*}TYPE commonNaNT = packed record   sign: flag;   high, low : bits32; end;{*-------------------------------------------------------------------------------The pattern for a default generated single-precision NaN.-------------------------------------------------------------------------------*}const float32_default_nan = $FFC00000;{*-------------------------------------------------------------------------------Returns 1 if the single-precision floating-point value `a' is a NaN;otherwise returns 0.-------------------------------------------------------------------------------*}Function float32_is_nan( a : float32 ): flag;Begin    float32_is_nan:= flag( $FF000000 < bits32 ( a shl 1 ) );End;{*-------------------------------------------------------------------------------Returns 1 if the single-precision floating-point value `a' is a signalingNaN; otherwise returns 0.-------------------------------------------------------------------------------*}Function float32_is_signaling_nan( a : float32  ): flag;Begin    float32_is_signaling_nan := flag      ( ( ( a shr 22 ) and $1FF ) = $1FE ) and( a and $003FFFFF );End;{*-------------------------------------------------------------------------------Returns the result of converting the single-precision floating-point NaN`a' to the canonical NaN format.  If `a' is a signaling NaN, the invalidexception is raised.-------------------------------------------------------------------------------*}Procedure float32ToCommonNaN( a: float32; VAR c:commonNaNT  );var    z : commonNaNT ;Begin    if ( float32_is_signaling_nan( a ) <> 0) then       float_raise( float_flag_invalid );    z.sign := a shr 31;    z.low := 0;    z.high := a shl 9;    c := z;End;{*-------------------------------------------------------------------------------Returns the result of converting the canonical NaN `a' to the single-precision floating-point format.-------------------------------------------------------------------------------*}Function commonNaNToFloat32( a : commonNaNT ): float32;Begin    commonNaNToFloat32 := ( ( bits32 (a.sign) ) shl 31 ) or $7FC00000 or ( a.high shr 9 );End;{*-------------------------------------------------------------------------------Takes two single-precision floating-point values `a' and `b', one of whichis a NaN, and returns the appropriate NaN result.  If either `a' or `b' is asignaling NaN, the invalid exception is raised.-------------------------------------------------------------------------------*}Function propagateFloat32NaN( a : float32 ; b: float32 ): float32;Var    aIsNaN, aIsSignalingNaN, bIsNaN, bIsSignalingNaN: flag;label returnLargerSignificand;Begin    aIsNaN := float32_is_nan( a );    aIsSignalingNaN := float32_is_signaling_nan( a );    bIsNaN := float32_is_nan( b );    bIsSignalingNaN := float32_is_signaling_nan( b );    a := a or $00400000;    b := b or $00400000;    if ( aIsSignalingNaN or bIsSignalingNaN ) <> 0 then        float_raise( float_flag_invalid );    if ( aIsSignalingNaN )<> 0  then    Begin        if ( bIsSignalingNaN ) <>  0 then          goto returnLargerSignificand;        if bIsNan <> 0 then          propagateFloat32NaN := b        else          propagateFloat32NaN := a;        exit;    End    else if ( aIsNaN <> 0) then    Begin        if ( bIsSignalingNaN or not bIsNaN )<> 0 then        Begin           propagateFloat32NaN := a;           exit;        End; returnLargerSignificand:        if ( bits32 ( a shl 1 ) < bits32 ( b shl 1 ) ) then        Begin           propagateFloat32NaN := b;           exit;        End;        if ( bits32 ( b shl 1 ) < bits32 ( a shl 1 ) ) then        Begin           propagateFloat32NaN :=  a;        End;        if a < b then          propagateFloat32NaN := a        else          propagateFloat32NaN := b;        exit;    End    else    Begin        propagateFloat32NaN := b;        exit;    End;End;{*-------------------------------------------------------------------------------The pattern for a default generated double-precision NaN.  The `high' and`low' values hold the most- and least-significant bits, respectively.-------------------------------------------------------------------------------*}const    float64_default_nan_high = $FFF80000;    float64_default_nan_low  = $00000000;{*-------------------------------------------------------------------------------Returns 1 if the double-precision floating-point value `a' is a NaN;otherwise returns 0.-------------------------------------------------------------------------------*}Function float64_is_nan( a : float64 ) : flag;Begin    float64_is_nan :=           flag( $FFE00000 <= bits32 ( a.high shl 1 ) )        and ( a.low or ( a.high and $000FFFFF ) );End;{*-------------------------------------------------------------------------------Returns 1 if the double-precision floating-point value `a' is a signalingNaN; otherwise returns 0.-------------------------------------------------------------------------------*}Function float64_is_signaling_nan( a : float64 ): flag;Begin    float64_is_signaling_nan :=           flag( ( ( a.high shr 19 ) and $FFF ) = $FFE )        and ( a.low or ( a.high and $0007FFFF ) );End;{*-------------------------------------------------------------------------------Returns the result of converting the double-precision floating-point NaN`a' to the canonical NaN format.  If `a' is a signaling NaN, the invalidexception is raised.-------------------------------------------------------------------------------*}Procedure float64ToCommonNaN( a : float64; VAR c:commonNaNT );Var    z : commonNaNT;Begin    if ( float64_is_signaling_nan( a )<>0 ) then        float_raise( float_flag_invalid );    z.sign := a.high shr 31;    shortShift64Left( a.high, a.low, 12, z.high, z.low );    c := z;End;{*-------------------------------------------------------------------------------Returns the result of converting the canonical NaN `a' to the double-precision floating-point format.-------------------------------------------------------------------------------*}Procedure commonNaNToFloat64( a : commonNaNT; VAR c: float64  );Var    z: float64;Begin    shift64Right( a.high, a.low, 12, z.high, z.low );    z.high := z.high or ( ( bits32 (a.sign) ) shl 31 ) or $7FF80000;    c := z;End;{*-------------------------------------------------------------------------------Takes two double-precision floating-point values `a' and `b', one of whichis a NaN, and returns the appropriate NaN result.  If either `a' or `b' is asignaling NaN, the invalid exception is raised.-------------------------------------------------------------------------------*}Procedure propagateFloat64NaN( a: float64; b: float64 ; VAR c: float64 );Var    aIsNaN, aIsSignalingNaN, bIsNaN, bIsSignalingNaN: flag;    label returnLargerSignificand;Begin    aIsNaN := float64_is_nan( a );    aIsSignalingNaN := float64_is_signaling_nan( a );    bIsNaN := float64_is_nan( b );    bIsSignalingNaN := float64_is_signaling_nan( b );    a.high := a.high or $00080000;    b.high := b.high or $00080000;    if ( aIsSignalingNaN or bIsSignalingNaN )<> 0 then        float_raise( float_flag_invalid );    if ( aIsSignalingNaN )<>0 then    Begin        if ( bIsSignalingNaN )<>0 then            goto returnLargerSignificand;        if bIsNan <> 0 then           c := b        else           c := a;        exit;    End    else if ( aIsNaN )<> 0 then    Begin        if ( bIsSignalingNaN or not bIsNaN ) <> 0 then        Begin          c := a;           exit;        End; returnLargerSignificand:        if ( lt64( a.high shl 1, a.low, b.high shl 1, b.low ) ) <> 0 then        Begin           c := b;           exit;        End;        if ( lt64( b.high shl 1, b.low, a.high shl 1, a.low ) ) <> 0 then        Begin           c := a;           exit;        End;        if a.high < b.high then         c := a        else         c := b;        exit;    End    else    Begin        c := b;        exit;    End;End;{$ELSE}{ Big endian code }(*----------------------------------------------------------------------------| Internal canonical NaN format.*----------------------------------------------------------------------------*)type commonNANT = packed record  sign : flag;  high, low : bits32; end;(*----------------------------------------------------------------------------| The pattern for a default generated single-precision NaN.*----------------------------------------------------------------------------*)const float32_default_nan = $7FFFFFFF;(*----------------------------------------------------------------------------| Returns 1 if the single-precision floating-point value `a' is a NaN;| otherwise returns 0.*----------------------------------------------------------------------------*)function float32_is_nan(a:  float32): flag;begin    float32_is_nan := flag( $FF000000 < bits32( a shl 1 ) );end;(*----------------------------------------------------------------------------| Returns 1 if the single-precision floating-point value `a' is a signaling| NaN; otherwise returns 0.*----------------------------------------------------------------------------*)function float32_is_signaling_nan(a: float32):flag; begin   float32_is_signaling_nan := flag( ( ( a shr 22 ) and $1FF ) = $1FE ) and flag( boolean((a and $003FFFFF)<>0) ); end;(*----------------------------------------------------------------------------| Returns the result of converting the single-precision floating-point NaN| `a' to the canonical NaN format.  If `a' is a signaling NaN, the invalid| exception is raised.*----------------------------------------------------------------------------*)Procedure float32ToCommonNaN( a: float32; VAR c:commonNaNT  ); var  z: commonNANT; begin   if float32_is_signaling_nan(a)<>0 then      float_raise(float_flag_invalid);   z.sign := a shr 31;   z.low := 0;   z.high := a shl 9;   c:=z; end;(*----------------------------------------------------------------------------| Returns the result of converting the canonical NaN `a' to the single-| precision floating-point format.*----------------------------------------------------------------------------*)function CommonNanToFloat32(a : CommonNaNT): float32; begin    CommonNanToFloat32:= ( ( bits32( a.sign )) shl 31 ) OR $7FC00000 OR ( a.high shr 9 ); end;(*----------------------------------------------------------------------------| Takes two single-precision floating-point values `a' and `b', one of which| is a NaN, and returns the appropriate NaN result.  If either `a' or `b' is a| signaling NaN, the invalid exception is raised.*----------------------------------------------------------------------------*)function  propagateFloat32NaN( a: float32 ; b: float32): float32; var  aIsNaN, aIsSignalingNaN, bIsNaN, bIsSignalingNaN: flag; begin    aIsNaN := float32_is_nan( a );    aIsSignalingNaN := float32_is_signaling_nan( a );    bIsNaN := float32_is_nan( b );    bIsSignalingNaN := float32_is_signaling_nan( b );    a := a or $00400000;    b := b or $00400000;    if ( aIsSignalingNaN or bIsSignalingNaN )<>0 then       float_raise( float_flag_invalid );    if bIsSignalingNaN<>0 then        propagateFloat32Nan := b    else if aIsSignalingNan<>0 then        propagateFloat32Nan := a    else if bIsNan<>0 then        propagateFloat32Nan := b    else        propagateFloat32Nan := a; end;(*----------------------------------------------------------------------------| The pattern for a default generated double-precision NaN.  The `high' and| `low' values hold the most- and least-significant bits, respectively.*----------------------------------------------------------------------------*)const    float64_default_nan_high = $7FFFFFFF;    float64_default_nan_low  = $FFFFFFFF;(*----------------------------------------------------------------------------| Returns 1 if the double-precision floating-point value `a' is a NaN;| otherwise returns 0.*----------------------------------------------------------------------------*)function float64_is_nan(a: float64): flag; begin    float64_is_nan := flag (           ( $FFE00000 <= bits32 ( a.high shl 1 ) )        and ( (a.low<>0) or (( a.high and $000FFFFF )<>0) )); end;(*----------------------------------------------------------------------------| Returns 1 if the double-precision floating-point value `a' is a signaling| NaN; otherwise returns 0.*----------------------------------------------------------------------------*)function float64_is_signaling_nan( a:float64): flag; begin    float64_is_signaling_nan := flag           ( ( ( a.high shr 19 ) and $FFF ) = $FFE )        and ( (a.low<>0) or ( boolean(( a.high and $0007FFFF )<>0)) ); end;(*----------------------------------------------------------------------------| Returns the result of converting the double-precision floating-point NaN| `a' to the canonical NaN format.  If `a' is a signaling NaN, the invalid| exception is raised.*----------------------------------------------------------------------------*)Procedure float64ToCommonNaN( a : float64; VAR c:commonNaNT ); var   z : commonNaNT; begin    if ( float64_is_signaling_nan( a )<>0 ) then        float_raise( float_flag_invalid );    z.sign := a.high shr 31;    shortShift64Left( a.high, a.low, 12, z.high, z.low );    c:=z; end;(*----------------------------------------------------------------------------| Returns the result of converting the canonical NaN `a' to the double-| precision floating-point format.*----------------------------------------------------------------------------*)Procedure commonNaNToFloat64( a : commonNaNT; VAR c: float64  ); var  z: float64; begin    shift64Right( a.high, a.low, 12, z.high, z.low );    z.high := z.high or ( ( bits32 (a.sign) ) shl 31 ) or $7FF80000;    c:=z; end;(*----------------------------------------------------------------------------| Takes two double-precision floating-point values `a' and `b', one of which| is a NaN, and returns the appropriate NaN result.  If either `a' or `b' is a| signaling NaN, the invalid exception is raised.*----------------------------------------------------------------------------*)Procedure propagateFloat64NaN( a: float64; b: float64 ; VAR c: float64 );var aIsNaN, aIsSignalingNaN, bIsNaN, bIsSignalingNaN : flag; begin    aIsNaN := float64_is_nan( a );    aIsSignalingNaN := float64_is_signaling_nan( a );    bIsNaN := float64_is_nan( b );    bIsSignalingNaN := float64_is_signaling_nan( b );    a.high := a.high or $00080000;    b.high := b.high or $00080000;    if ( (aIsSignalingNaN<>0) or (bIsSignalingNaN<>0) ) then       float_raise( float_flag_invalid );    if bIsSignalingNaN<>0 then        c := b    else if aIsSignalingNan<>0 then        c := a    else if bIsNan<>0 then        c := b    else        c := a; end;{$ENDIF}(****************************************************************************)(*                        END ENDIAN SPECIFIC CODE                          *)(****************************************************************************){*-------------------------------------------------------------------------------Returns the fraction bits of the single-precision floating-point value `a'.-------------------------------------------------------------------------------*}Function ExtractFloat32Frac(a : Float32) : Bits32; Begin    ExtractFloat32Frac := A AND $007FFFFF; End;{*-------------------------------------------------------------------------------Returns the exponent bits of the single-precision floating-point value `a'.-------------------------------------------------------------------------------*}Function extractFloat32Exp( a: float32 ): Int16;  Begin    extractFloat32Exp := (a shr 23) AND $FF;  End;{*-------------------------------------------------------------------------------Returns the sign bit of the single-precision floating-point value `a'.-------------------------------------------------------------------------------*}Function extractFloat32Sign( a: float32 ): Flag;  Begin    extractFloat32Sign := a shr 31;  End;{*-------------------------------------------------------------------------------Normalizes the subnormal single-precision floating-point value representedby the denormalized significand `aSig'.  The normalized exponent andsignificand are stored at the locations pointed to by `zExpPtr' and`zSigPtr', respectively.-------------------------------------------------------------------------------*}Procedure normalizeFloat32Subnormal( aSig : bits32; VAR zExpPtr: Int16; VAR zSigPtr :bits32); Var   ShiftCount : BYTE; Begin    shiftCount := countLeadingZeros32( aSig ) - 8;    zSigPtr := aSig shl shiftCount;    zExpPtr := 1 - shiftCount;  End;{*-------------------------------------------------------------------------------Packs the sign `zSign', exponent `zExp', and significand `zSig' into asingle-precision floating-point value, returning the result.  After beingshifted into the proper positions, the three fields are simply addedtogether to form the result.  This means that any integer portion of `zSig'will be added into the exponent.  Since a properly normalized significandwill have an integer portion equal to 1, the `zExp' input should be 1 lessthan the desired result exponent whenever `zSig' is a complete, normalizedsignificand.-------------------------------------------------------------------------------*}Function packFloat32( zSign: Flag; zExp : Int16; zSig: Bits32 ): Float32; Begin    packFloat32 := ( ( bits32( zSign) ) shl 31 ) + ( ( bits32 (zExp) ) shl 23 )      + zSig; End;{*-------------------------------------------------------------------------------Takes an abstract floating-point value having sign `zSign', exponent `zExp',and significand `zSig', and returns the proper single-precision floating-point value corresponding to the abstract input.  Ordinarily, the abstractvalue is simply rounded and packed into the single-precision format, withthe inexact exception raised if the abstract input cannot be representedexactly.  However, if the abstract value is too large, the overflow andinexact exceptions are raised and an infinity or maximal finite value isreturned.  If the abstract value is too small, the input value is rounded toa subnormal number, and the underflow and inexact exceptions are raised ifthe abstract input cannot be represented exactly as a subnormal single-precision floating-point number.    The input significand `zSig' has its binary point between bits 30and 29, which is 7 bits to the left of the usual location.  This shiftedsignificand must be normalized or smaller.  If `zSig' is not normalized,`zExp' must be 0; in that case, the result returned is a subnormal number,and it must not require rounding.  In the usual case that `zSig' isnormalized, `zExp' must be 1 less than the ``true'' floating-point exponent.The handling of underflow and overflow follows the IEC/IEEE Standard forBinary Floating-Point Arithmetic.-------------------------------------------------------------------------------*}Function roundAndPackFloat32( zSign : Flag; zExp : Int16; zSig : Bits32 ) : float32; Var   roundingMode : BYTE;   roundNearestEven : Flag;   roundIncrement, roundBits : BYTE;   IsTiny : Flag; Begin    roundingMode := float_rounding_mode;    if (roundingMode = float_round_nearest_even) then      Begin        roundNearestEven := Flag(TRUE);      end    else       roundNearestEven := Flag(FALSE);    roundIncrement := $40;    if ( Boolean(roundNearestEven)  = FALSE)  then      Begin        if ( roundingMode = float_round_to_zero ) Then          Begin            roundIncrement := 0;          End        else          Begin            roundIncrement := $7F;            if ( zSign <> 0 ) then              Begin                if roundingMode = float_round_up then roundIncrement := 0;              End            else              Begin                if roundingMode = float_round_down then roundIncrement := 0;              End;         End      End;    roundBits := zSig AND $7F;    if ($FD <= bits16 (zExp) ) then     Begin        if (( $FD < zExp ) OR  ( zExp = $FD ) AND ( sbits32 ( zSig + roundIncrement ) < 0 ) ) then          Begin             float_raise( float_flag_overflow OR float_flag_inexact );             roundAndPackFloat32:=packFloat32( zSign, $FF, 0 ) - Flag( roundIncrement = 0 );             exit;          End;        if ( zExp < 0 ) then          Begin            isTiny :=                   flag(( float_detect_tininess = float_tininess_before_rounding )                OR ( zExp < -1 )                OR ( (zSig + roundIncrement) < $80000000 ));            shift32RightJamming( zSig, - zExp, zSig );            zExp := 0;            roundBits := zSig AND $7F;            if ( (isTiny = flag(TRUE)) and (roundBits<>0) ) then               float_raise( float_flag_underflow );          End;    End;    if ( roundBits )<> 0 then       float_exception_flags := float_flag_inexact OR float_exception_flags;    zSig := ( zSig + roundIncrement ) shr 7;    zSig := zSig AND not bits32( bits32( ( roundBits XOR $40 ) = 0 ) and roundNearestEven );    if ( zSig = 0 ) then zExp := 0;    roundAndPackFloat32 := packFloat32( zSign, zExp, zSig );    exit;  End;{*-------------------------------------------------------------------------------Takes an abstract floating-point value having sign `zSign', exponent `zExp',and significand `zSig', and returns the proper single-precision floating-point value corresponding to the abstract input.  This routine is just like`roundAndPackFloat32' except that `zSig' does not have to be normalized.Bit 31 of `zSig' must be zero, and `zExp' must be 1 less than the ``true''floating-point exponent.-------------------------------------------------------------------------------*}Function normalizeRoundAndPackFloat32( zSign: flag; zExp: int16; zSig:bits32 ): float32;  Var    ShiftCount : int8;  Begin    shiftCount := countLeadingZeros32( zSig ) - 1;    normalizeRoundAndPackFloat32 := roundAndPackFloat32( zSign, zExp - shiftCount, zSig shl shiftCount );  End;{*-------------------------------------------------------------------------------Returns the least-significant 32 fraction bits of the double-precisionfloating-point value `a'.-------------------------------------------------------------------------------*}Function extractFloat64Frac( a: float64 ): bits32;  Begin    extractFloat64Frac := a.low;  End;{*-------------------------------------------------------------------------------Returns the most-significant 20 fraction bits of the double-precisionfloating-point value `a'.-------------------------------------------------------------------------------*}Function extractFloat64Frac0(a: float64): bits32;  Begin    extractFloat64Frac0 := a.high and $000FFFFF;  End;{*-------------------------------------------------------------------------------Returns the least-significant 32 fraction bits of the double-precisionfloating-point value `a'.-------------------------------------------------------------------------------*}Function extractFloat64Frac1(a: float64): bits32;  Begin    extractFloat64Frac1 := a.low;  End;{*-------------------------------------------------------------------------------Returns the exponent bits of the double-precision floating-point value `a'.-------------------------------------------------------------------------------*}Function extractFloat64Exp(a: float64): int16; Begin    extractFloat64Exp:= ( a.high shr 20 ) AND $7FF; End;{*-------------------------------------------------------------------------------Returns the sign bit of the double-precision floating-point value `a'.-------------------------------------------------------------------------------*}Function extractFloat64Sign(a: float64) : flag; Begin    extractFloat64Sign := a.high shr 31; End;{*-------------------------------------------------------------------------------Normalizes the subnormal double-precision floating-point value representedby the denormalized significand formed by the concatenation of `aSig0' and`aSig1'.  The normalized exponent is stored at the location pointed to by`zExpPtr'.  The most significant 21 bits of the normalized significand arestored at the location pointed to by `zSig0Ptr', and the least significant32 bits of the normalized significand are stored at the location pointed toby `zSig1Ptr'.-------------------------------------------------------------------------------*}Procedure normalizeFloat64Subnormal(     aSig0: bits32;     aSig1: bits32;     VAR zExpPtr : Int16;     VAR zSig0Ptr : Bits32;     VAR zSig1Ptr : Bits32 ); Var  ShiftCount : Int8; Begin    if ( aSig0 = 0 ) then      Begin        shiftCount := countLeadingZeros32( aSig1 ) - 11;        if ( shiftCount < 0 ) then          Begin            zSig0Ptr := aSig1 shr ( - shiftCount );            zSig1Ptr := aSig1 shl ( shiftCount AND 31 );          End        else           Begin            zSig0Ptr := aSig1 shl shiftCount;            zSig1Ptr := 0;           End;        zExpPtr := - shiftCount - 31;      End    else      Begin        shiftCount := countLeadingZeros32( aSig0 ) - 11;        shortShift64Left( aSig0, aSig1, shiftCount, zSig0Ptr, zSig1Ptr );        zExpPtr := 1 - shiftCount;      End;  End;{*-------------------------------------------------------------------------------Packs the sign `zSign', the exponent `zExp', and the significand formed bythe concatenation of `zSig0' and `zSig1' into a double-precision floating-point value, returning the result.  After being shifted into the properpositions, the three fields `zSign', `zExp', and `zSig0' are simply addedtogether to form the most significant 32 bits of the result.  This meansthat any integer portion of `zSig0' will be added into the exponent.  Sincea properly normalized significand will have an integer portion equal to 1,the `zExp' input should be 1 less than the desired result exponent whenever`zSig0' and `zSig1' concatenated form a complete, normalized significand.-------------------------------------------------------------------------------*}Procedure packFloat64( zSign: Flag; zExp: Int16; zSig0: Bits32; zSig1 : Bits32; VAR c : float64); var    z: Float64; Begin    z.low := zSig1;    z.high := ( ( bits32 (zSign) ) shl 31 ) + ( ( bits32 (zExp) ) shl 20 ) + zSig0;    c := z; End;{*-------------------------------------------------------------------------------Takes an abstract floating-point value having sign `zSign', exponent `zExp',and extended significand formed by the concatenation of `zSig0', `zSig1',and `zSig2', and returns the proper double-precision floating-point valuecorresponding to the abstract input.  Ordinarily, the abstract value issimply rounded and packed into the double-precision format, with the inexactexception raised if the abstract input cannot be represented exactly.However, if the abstract value is too large, the overflow and inexactexceptions are raised and an infinity or maximal finite value is returned.If the abstract value is too small, the input value is rounded to asubnormal number, and the underflow and inexact exceptions are raised if theabstract input cannot be represented exactly as a subnormal double-precisionfloating-point number.    The input significand must be normalized or smaller.  If the inputsignificand is not normalized, `zExp' must be 0; in that case, the resultreturned is a subnormal number, and it must not require rounding.  In theusual case that the input significand is normalized, `zExp' must be 1 lessthan the ``true'' floating-point exponent.  The handling of underflow andoverflow follows the IEC/IEEE Standard for Binary Floating-Point Arithmetic.-------------------------------------------------------------------------------*}Procedure roundAndPackFloat64(     zSign: Flag; zExp: Int16; zSig0: Bits32; zSig1: Bits32; zSig2: Bits32; Var c: Float64 ); Var   roundingMode : Int8;   roundNearestEven, increment, isTiny : Flag; Begin    roundingMode := float_rounding_mode;    roundNearestEven := flag( roundingMode = float_round_nearest_even );    increment := flag( sbits32 (zSig2) < 0 );    if ( roundNearestEven  = flag(FALSE) ) then      Begin        if ( roundingMode = float_round_to_zero ) then            increment := 0        else          Begin            if ( zSign )<> 0 then              Begin                increment := flag( roundingMode = float_round_down ) and zSig2;              End            else              Begin                increment := flag( roundingMode = float_round_up ) and zSig2;              End          End      End;    if ( $7FD <= bits16 (zExp) ) then      Begin        if (( $7FD < zExp )             or (( zExp = $7FD )                  and (eq64( $001FFFFF, $FFFFFFFF, zSig0, zSig1 )<>0)                  and (increment<>0)                )           ) then           Begin            float_raise( float_flag_overflow OR  float_flag_inexact );            if (( roundingMode = float_round_to_zero )                 or ( (zSign<>0) and ( roundingMode = float_round_up ) )                 or ( (zSign = 0) and ( roundingMode = float_round_down ) )               ) then              Begin                packFloat64( zSign, $7FE, $000FFFFF, $FFFFFFFF, c );                exit;              End;            packFloat64( zSign, $7FF, 0, 0, c );            exit;           End;        if ( zExp < 0 ) then           Begin            isTiny :=                   flag( float_detect_tininess = float_tininess_before_rounding )                or flag( zExp < -1 )                or  flag(increment = 0)                or flag(lt64( zSig0, zSig1, $001FFFFF, $FFFFFFFF)<>0);            shift64ExtraRightJamming(                zSig0, zSig1, zSig2, - zExp, zSig0, zSig1, zSig2 );            zExp := 0;            if ( isTiny<>0) and (zSig2<>0 ) then float_raise( float_flag_underflow );            if ( roundNearestEven )<>0 then              Begin                increment := flag( sbits32 (zSig2) < 0 );              End            else              Begin                if ( zSign )<>0 then                  Begin                    increment := flag( roundingMode = float_round_down ) and zSig2;                  End                else                  Begin                    increment := flag( roundingMode = float_round_up ) and zSig2;                  End              End;        End;    End;    if ( zSig2 )<>0 then       float_exception_flags := float_exception_flags OR  float_flag_inexact;    if ( increment )<>0 then      Begin        add64( zSig0, zSig1, 0, 1, zSig0, zSig1 );        zSig1 := zSig1 and not ( bits32(flag( zSig2 + zSig2 = 0 )) and roundNearestEven );      End    else      Begin        if ( ( zSig0 or zSig1 ) = 0 ) then zExp := 0;      End;    packFloat64( zSign, zExp, zSig0, zSig1, c ); End;{*-------------------------------------------------------------------------------Takes an abstract floating-point value having sign `zSign', exponent `zExp',and significand formed by the concatenation of `zSig0' and `zSig1', andreturns the proper double-precision floating-point value correspondingto the abstract input.  This routine is just like `roundAndPackFloat64'except that the input significand has fewer bits and does not have to benormalized.  In all cases, `zExp' must be 1 less than the ``true'' floating-point exponent.-------------------------------------------------------------------------------*}Procedure normalizeRoundAndPackFloat64(     zSign:flag; zExp:int16; zSig0:bits32; zSig1:bits32; VAR c: float64 ); Var   shiftCount : int8;   zSig2 : bits32; Begin    if ( zSig0 = 0 ) then     Begin        zSig0 := zSig1;        zSig1 := 0;        zExp := zExp -32;     End;    shiftCount := countLeadingZeros32( zSig0 ) - 11;    if ( 0 <= shiftCount ) then      Begin        zSig2 := 0;        shortShift64Left( zSig0, zSig1, shiftCount, zSig0, zSig1 );      End    else      Begin        shift64ExtraRightJamming          (zSig0, zSig1, 0, - shiftCount, zSig0, zSig1, zSig2 );      End;    zExp := zExp - shiftCount;    roundAndPackFloat64( zSign, zExp, zSig0, zSig1, zSig2, c );  End;{*-------------------------------------------------------------------------------Returns the result of converting the 32-bit two's complement integer `a' tothe single-precision floating-point format.  The conversion is performedaccording to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.-------------------------------------------------------------------------------*}Function int32_to_float32( a: int32): float32; {$ifdef fpc}[public,Alias:'INT32_TO_FLOAT32'];{$ifdef hascompilerproc} compilerproc; {$endif}{$endif} Var  zSign : Flag; Begin    if ( a = 0 ) then      Begin       int32_to_float32 := 0;       exit;      End;    if ( a = sbits32 ($80000000) ) then      Begin       int32_to_float32 := packFloat32( 1, $9E, 0 );       exit;      end;    zSign := flag( a < 0 );    If zSign<>0 then      a := -a;    int32_to_float32:=      normalizeRoundAndPackFloat32( zSign, $9C, a ); End;{*-------------------------------------------------------------------------------Returns the result of converting the 32-bit two's complement integer `a' tothe double-precision floating-point format.  The conversion is performedaccording to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.-------------------------------------------------------------------------------*}Procedure int32_to_float64( a: int32; var c: float64 );{$ifdef fpc} [public,Alias:'INT32_TO_FLOAT64'];{$ifdef hascompilerproc} compilerproc; {$endif}{$endif}  var    zSign : flag;    absA : bits32;    shiftCount : int8;    zSig0, zSig1 : bits32;  Begin    if ( a = 0 ) then      Begin       packFloat64( 0, 0, 0, 0, c );       exit;      end;    zSign := flag( a < 0 );    if ZSign<>0 then      AbsA := -a    else      AbsA := a;    shiftCount := countLeadingZeros32( absA ) - 11;    if ( 0 <= shiftCount ) then      Begin        zSig0 := absA shl shiftCount;        zSig1 := 0;      End    else      Begin        shift64Right( absA, 0, - shiftCount, zSig0, zSig1 );      End;    packFloat64( zSign, $412 - shiftCount, zSig0, zSig1,c );  End;{*-------------------------------------------------------------------------------Returns the result of converting the single-precision floating-point value`a' to the 32-bit two's complement integer format.  The conversion isperformed according to the IEC/IEEE Standard for Binary Floating-PointArithmetic---which means in particular that the conversion is roundedaccording to the current rounding mode.  If `a' is a NaN, the largestpositive integer is returned.  Otherwise, if the conversion overflows, thelargest integer with the same sign as `a' is returned.-------------------------------------------------------------------------------*}Function float32_to_int32( a : float32) : int32;{$ifdef fpc} [public,Alias:'FLOAT32_TO_INT32'];{$ifdef hascompilerproc} compilerproc; {$endif}{$endif}  Var    aSign: flag;    aExp, shiftCount: int16;    aSig, aSigExtra: bits32;    z: int32;    roundingMode: int8;  Begin    aSig := extractFloat32Frac( a );    aExp := extractFloat32Exp( a );    aSign := extractFloat32Sign( a );    shiftCount := aExp - $96;    if ( 0 <= shiftCount ) then      Begin        if ( $9E <= aExp ) then          Begin            if ( a <> $CF000000 ) then              Begin                float_raise( float_flag_invalid );                if ( (aSign=0) or ( ( aExp = $FF ) and (aSig<>0) ) ) then                  Begin                    float32_to_int32 := $7FFFFFFF;                    exit;                  End;              End;            float32_to_int32 := sbits32 ($80000000);            exit;          End;        z := ( aSig or $00800000 ) shl shiftCount;        if ( aSign<>0 ) then z := - z;      End    else      Begin        if ( aExp < $7E ) then          Begin            aSigExtra := aExp OR aSig;            z := 0;          End        else         Begin            aSig := aSig OR $00800000;            aSigExtra := aSig shl ( shiftCount and 31 );            z := aSig shr ( - shiftCount );         End;        if ( aSigExtra<>0 ) then          float_exception_flags := float_exception_flags             or float_flag_inexact;        roundingMode := float_rounding_mode;        if ( roundingMode = float_round_nearest_even ) then          Begin            if ( sbits32 (aSigExtra) < 0 ) then              Begin                Inc(z);                if ( bits32 ( aSigExtra shl 1 ) = 0 ) then                  z := z and not 1;              End;              if ( aSign<>0 ) then                z := - z;          End        else          Begin            aSigExtra := flag( aSigExtra <> 0 );            if ( aSign<>0 ) then             Begin                z := z + (flag( roundingMode = float_round_down ) and aSigExtra);                z := - z;             End            else             Begin                z := z + (flag( roundingMode = float_round_up ) and aSigExtra);             End          End;      End;   float32_to_int32 := z;  End;{*-------------------------------------------------------------------------------Returns the result of converting the single-precision floating-point value`a' to the 32-bit two's complement integer format.  The conversion isperformed according to the IEC/IEEE Standard for Binary Floating-PointArithmetic, except that the conversion is always rounded toward zero.If `a' is a NaN, the largest positive integer is returned.  Otherwise, ifthe conversion overflows, the largest integer with the same sign as `a' isreturned.-------------------------------------------------------------------------------*}Function float32_to_int32_round_to_zero( a: Float32 ): int32;   {$ifdef fpc}[public,Alias:'FLOAT32_TO_INT32_ROUND_TO_ZERO'];{$ifdef hascompilerproc} compilerproc; {$endif}{$endif} Var    aSign : flag;    aExp, shiftCount : int16;    aSig : bits32;    z : int32; Begin    aSig := extractFloat32Frac( a );    aExp := extractFloat32Exp( a );    aSign := extractFloat32Sign( a );    shiftCount := aExp - $9E;    if ( 0 <= shiftCount ) then      Begin        if ( a <> $CF000000 ) then          Begin            float_raise( float_flag_invalid );            if ( (aSign=0) or ( ( aExp = $FF ) and (aSig<>0) ) ) then              Begin                float32_to_int32_round_to_zero := $7FFFFFFF;                exit;              end;          End;        float32_to_int32_round_to_zero:= sbits32 ($80000000);        exit;      End    else      if ( aExp <= $7E ) then      Begin        if ( aExp or aSig )<>0 then           float_exception_flags :=             float_exception_flags or float_flag_inexact;        float32_to_int32_round_to_zero := 0;        exit;      End;    aSig := ( aSig or $00800000 ) shl 8;    z := aSig shr ( - shiftCount );    if ( bits32 ( aSig shl ( shiftCount and 31 ) )<> 0 ) then      Begin           float_exception_flags :=             float_exception_flags or float_flag_inexact;      End;    if ( aSign<>0 ) then z := - z;    float32_to_int32_round_to_zero := z; End;{*-------------------------------------------------------------------------------Returns the result of converting the single-precision floating-point value`a' to the double-precision floating-point format.  The conversion isperformed according to the IEC/IEEE Standard for Binary Floating-PointArithmetic.-------------------------------------------------------------------------------*}Procedure float32_to_float64( a : float32; var out: Float64);{$ifdef fpc}[public,Alias:'FLOAT32_TO_FLOAT64'];{$ifdef hascompilerproc} compilerproc; {$endif}{$endif}  Var    aSign : flag;    aExp : int16;    aSig, zSig0, zSig1: bits32;    tmp : CommonNanT;  Begin    aSig := extractFloat32Frac( a );    aExp := extractFloat32Exp( a );    aSign := extractFloat32Sign( a );    if ( aExp = $FF ) then      Begin        if ( aSig<>0 ) then          Begin            float32ToCommonNaN(a, tmp);            commonNaNToFloat64(tmp , out);            exit;          End;          packFloat64( aSign, $7FF, 0, 0, out );          exit;      End;    if ( aExp = 0 ) then      Begin        if ( aSig = 0 ) then          Begin            packFloat64( aSign, 0, 0, 0, out );            exit;          end;        normalizeFloat32Subnormal( aSig, aExp, aSig );        Dec(aExp);      End;    shift64Right( aSig, 0, 3, zSig0, zSig1 );    packFloat64( aSign, aExp + $380, zSig0, zSig1, out );  End;{*-------------------------------------------------------------------------------Rounds the single-precision floating-point value `a' to an integer,and returns the result as a single-precision floating-point value.  Theoperation is performed according to the IEC/IEEE Standard for BinaryFloating-Point Arithmetic.-------------------------------------------------------------------------------*}Function float32_round_to_int( a: float32): float32;{$ifdef fpc}[public,Alias:'FLOAT32_ROUND_TO_INT'];{$ifdef hascompilerproc} compilerproc; {$endif}{$endif}  Var    aSign: flag;    aExp: int16;    lastBitMask, roundBitsMask: bits32;    roundingMode: int8;    z: float32;  Begin    aExp := extractFloat32Exp( a );    if ( $96 <= aExp ) then     Begin        if ( ( aExp = $FF ) and (extractFloat32Frac( a )<>0) ) then          Begin            float32_round_to_int:= propagateFloat32NaN( a, a );            exit;          End;        float32_round_to_int:=a;        exit;     End;    if ( aExp <= $7E ) then      Begin        if ( bits32 ( a shl 1 ) = 0 ) then          Begin             float32_round_to_int:=a;             exit;          end;        float_exception_flags          := float_exception_flags OR  float_flag_inexact;        aSign := extractFloat32Sign( a );        case ( float_rounding_mode ) of         float_round_nearest_even:            Begin              if ( ( aExp = $7E ) and (extractFloat32Frac( a )<>0) ) then                Begin                  float32_round_to_int := packFloat32( aSign, $7F, 0 );                  exit;                End;            End;         float_round_down:            Begin              if aSign <> 0 then                 float32_round_to_int := $BF800000              else                 float32_round_to_int := 0;              exit;            End;         float_round_up:            Begin              if aSign <> 0 then                 float32_round_to_int := $80000000              else                 float32_round_to_int := $3F800000;              exit;            End;        end;        float32_round_to_int := packFloat32( aSign, 0, 0 );      End;    lastBitMask := 1;    {_____________________________!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!}    lastBitMask := lastBitMask shl ($96 - aExp);    roundBitsMask := lastBitMask - 1;    z := a;    roundingMode := float_rounding_mode;    if ( roundingMode = float_round_nearest_even ) then      Begin        z := z + (lastBitMask shr 1);        if ( ( z and roundBitsMask ) = 0 ) then           z := z and not lastBitMask;      End    else if ( roundingMode <> float_round_to_zero ) then      Begin        if ( (extractFloat32Sign( z ) xor flag(roundingMode = float_round_up ))<>0 ) then          Begin            z := z + roundBitsMask;          End;      End;    z := z and not roundBitsMask;    if ( z <> a ) then      float_exception_flags := float_exception_flags or float_flag_inexact;    float32_round_to_int := z;  End;{*-------------------------------------------------------------------------------Returns the result of adding the absolute values of the single-precisionfloating-point values `a' and `b'.  If `zSign' is 1, the sum is negatedbefore being returned.  `zSign' is ignored if the result is a NaN.The addition is performed according to the IEC/IEEE Standard for BinaryFloating-Point Arithmetic.-------------------------------------------------------------------------------*}Function addFloat32Sigs( a:float32; b: float32; zSign:flag ): float32;  Var    aExp, bExp, zExp: int16;    aSig, bSig, zSig: bits32;    expDiff: int16;    label roundAndPack;  Begin    aSig:=extractFloat32Frac( a );    aExp:=extractFloat32Exp( a );    bSig:=extractFloat32Frac( b );    bExp := extractFloat32Exp( b );    expDiff := aExp - bExp;    aSig := aSig shl 6;    bSig := bSig shl 6;    if ( 0 < expDiff ) then    Begin        if ( aExp = $FF ) then          Begin            if ( aSig <> 0) then              Begin                addFloat32Sigs := propagateFloat32NaN( a, b );                exit;              End;            addFloat32Sigs := a;            exit;          End;        if ( bExp = 0 ) then          Begin             Dec(expDiff);          End        else          Begin            bSig := bSig or $20000000;          End;        shift32RightJamming( bSig, expDiff, bSig );        zExp := aExp;    End    else    If ( expDiff < 0 ) then      Begin        if ( bExp = $FF ) then        Begin            if ( bSig<>0 ) then              Begin                addFloat32Sigs := propagateFloat32NaN( a, b );                exit;              end;            addFloat32Sigs := packFloat32( zSign, $FF, 0 );            exit;        End;        if ( aExp = 0 ) then          Begin            Inc(expDiff);          End        else          Begin            aSig := aSig OR $20000000;          End;        shift32RightJamming( aSig, - expDiff, aSig );        zExp := bExp;    End    else    Begin        if ( aExp = $FF ) then        Begin            if ( aSig OR  bSig )<> 0 then              Begin                addFloat32Sigs := propagateFloat32NaN( a, b );                exit;              end;            addFloat32Sigs := a;            exit;        End;        if ( aExp = 0 ) then          Begin             addFloat32Sigs := packFloat32( zSign, 0, ( aSig + bSig ) shr 6 );             exit;          end;        zSig := $40000000 + aSig + bSig;        zExp := aExp;        goto roundAndPack;    End;    aSig := aSig OR $20000000;    zSig := ( aSig + bSig ) shl 1;    Dec(zExp);    if ( sbits32 (zSig) < 0 ) then      Begin        zSig := aSig + bSig;        Inc(zExp);      End; roundAndPack:    addFloat32Sigs := roundAndPackFloat32( zSign, zExp, zSig ); End;{*-------------------------------------------------------------------------------Returns the result of subtracting the absolute values of the single-precision floating-point values `a' and `b'.  If `zSign' is 1, thedifference is negated before being returned.  `zSign' is ignored if theresult is a NaN.  The subtraction is performed according to the IEC/IEEEStandard for Binary Floating-Point Arithmetic.-------------------------------------------------------------------------------*}Function subFloat32Sigs( a:float32; b:float32; zSign:flag ): float32;  Var    aExp, bExp, zExp: int16;    aSig, bSig, zSig: bits32;    expDiff : int16;    label aExpBigger;    label bExpBigger;    label aBigger;    label bBigger;    label normalizeRoundAndPack;  Begin    aSig := extractFloat32Frac( a );    aExp := extractFloat32Exp( a );    bSig := extractFloat32Frac( b );    bExp := extractFloat32Exp( b );    expDiff := aExp - bExp;    aSig := aSig shl 7;    bSig := bSig shl 7;    if ( 0 < expDiff ) then goto aExpBigger;    if ( expDiff < 0 ) then goto bExpBigger;    if ( aExp = $FF ) then    Begin        if ( aSig OR  bSig )<> 0 then          Begin           subFloat32Sigs := propagateFloat32NaN( a, b );           exit;          End;        float_raise( float_flag_invalid );        subFloat32Sigs := float32_default_nan;        exit;    End;    if ( aExp = 0 ) then    Begin        aExp := 1;        bExp := 1;    End;    if ( bSig < aSig ) Then goto aBigger;    if ( aSig < bSig ) Then goto bBigger;    subFloat32Sigs := packFloat32( flag(float_rounding_mode = float_round_down), 0, 0 );    exit; bExpBigger:    if ( bExp = $FF ) then    Begin        if ( bSig<>0 ) then        Begin          subFloat32Sigs := propagateFloat32NaN( a, b );          exit;        End;        subFloat32Sigs := packFloat32( zSign XOR 1, $FF, 0 );        exit;    End;    if ( aExp = 0 ) then      Begin        Inc(expDiff);      End    else      Begin        aSig := aSig OR $40000000;      End;    shift32RightJamming( aSig, - expDiff, aSig );    bSig := bSig OR $40000000; bBigger:    zSig := bSig - aSig;    zExp := bExp;    zSign := zSign xor 1;    goto normalizeRoundAndPack; aExpBigger:    if ( aExp = $FF ) then      Begin        if ( aSig <> 0) then          Begin            subFloat32Sigs := propagateFloat32NaN( a, b );            exit;          End;        subFloat32Sigs := a;        exit;      End;    if ( bExp = 0 ) then      Begin        Dec(expDiff);      End    else      Begin        bSig := bSig OR $40000000;      End;    shift32RightJamming( bSig, expDiff, bSig );    aSig := aSig OR $40000000; aBigger:    zSig := aSig - bSig;    zExp := aExp; normalizeRoundAndPack:    Dec(zExp);    subFloat32Sigs := normalizeRoundAndPackFloat32( zSign, zExp, zSig );  End;{*-------------------------------------------------------------------------------Returns the result of adding the single-precision floating-point values `a'and `b'.  The operation is performed according to the IEC/IEEE Standard forBinary Floating-Point Arithmetic.-------------------------------------------------------------------------------*}Function float32_add( a: float32; b:float32 ): float32;{$ifdef fpc} [public,Alias:'FLOAT32_ADD'];{$ifdef hascompilerproc} compilerproc; {$endif}{$endif}  Var    aSign, bSign: Flag;  Begin    aSign := extractFloat32Sign( a );    bSign := extractFloat32Sign( b );    if ( aSign = bSign ) then      Begin        float32_add := addFloat32Sigs( a, b, aSign );      End    else      Begin        float32_add := subFloat32Sigs( a, b, aSign );      End;  End;{*-------------------------------------------------------------------------------Returns the result of subtracting the single-precision floating-point values`a' and `b'.  The operation is performed according to the IEC/IEEE Standardfor Binary Floating-Point Arithmetic.-------------------------------------------------------------------------------*}Function float32_sub( a: float32 ; b:float32 ): float32;{$ifdef fpc} [public,Alias:'FLOAT32_SUB'];{$ifdef hascompilerproc} compilerproc; {$endif}{$endif}  Var    aSign, bSign: flag;  Begin    aSign := extractFloat32Sign( a );    bSign := extractFloat32Sign( b );    if ( aSign = bSign ) then      Begin        float32_sub := subFloat32Sigs( a, b, aSign );      End    else      Begin        float32_sub := addFloat32Sigs( a, b, aSign );      End;  End;{*-------------------------------------------------------------------------------Returns the result of multiplying the single-precision floating-point values`a' and `b'.  The operation is performed according to the IEC/IEEE Standardfor Binary Floating-Point Arithmetic.-------------------------------------------------------------------------------*}Function float32_mul(a: float32; b: float32 ) : float32;{$ifdef fpc} [public,Alias:'FLOAT32_MUL'];{$ifdef hascompilerproc} compilerproc; {$endif}{$endif}  Var    aSign, bSign, zSign: flag;    aExp, bExp, zExp : int16;    aSig, bSig, zSig0, zSig1: bits32;  Begin    aSig := extractFloat32Frac( a );    aExp := extractFloat32Exp( a );    aSign := extractFloat32Sign( a );    bSig := extractFloat32Frac( b );    bExp := extractFloat32Exp( b );    bSign := extractFloat32Sign( b );    zSign := aSign xor bSign;    if ( aExp = $FF ) then    Begin        if ( (aSig<>0) OR ( ( bExp = $FF ) AND  (bSig<>0) ) ) then        Begin            float32_mul := propagateFloat32NaN( a, b );        End;        if ( ( bExp OR  bSig ) = 0 ) then        Begin            float_raise( float_flag_invalid );            float32_mul := float32_default_nan;            exit;        End;        float32_mul := packFloat32( zSign, $FF, 0 );        exit;    End;    if ( bExp = $FF ) then    Begin        if ( bSig <> 0 ) then        Begin           float32_mul := propagateFloat32NaN( a, b );           exit;        End;        if ( ( aExp OR  aSig ) = 0 ) then        Begin            float_raise( float_flag_invalid );            float32_mul := float32_default_nan;            exit;        End;        float32_mul := packFloat32( zSign, $FF, 0 );        exit;    End;    if ( aExp = 0 ) then    Begin        if ( aSig = 0 ) then        Begin           float32_mul := packFloat32( zSign, 0, 0 );           exit;        End;        normalizeFloat32Subnormal( aSig, aExp, aSig );    End;    if ( bExp = 0 ) then    Begin        if ( bSig = 0 ) then         Begin           float32_mul := packFloat32( zSign, 0, 0 );           exit;         End;        normalizeFloat32Subnormal( bSig, bExp, bSig );    End;    zExp := aExp + bExp - $7F;    aSig := ( aSig OR  $00800000 ) shl 7;    bSig := ( bSig OR  $00800000 ) shl 8;    mul32To64( aSig, bSig, zSig0, zSig1 );    zSig0 := zSig0 OR bits32( zSig1 <> 0 );    if ( 0 <= sbits32 ( zSig0 shl 1 ) ) then    Begin        zSig0 := zSig0 shl 1;        Dec(zExp);    End;    float32_mul := roundAndPackFloat32( zSign, zExp, zSig0 ); End;{*-------------------------------------------------------------------------------Returns the result of dividing the single-precision floating-point value `a'by the corresponding value `b'.  The operation is performed according to theIEC/IEEE Standard for Binary Floating-Point Arithmetic.-------------------------------------------------------------------------------*}Function float32_div(a: float32;b: float32 ): float32;{$ifdef fpc} [public,Alias:'FLOAT32_DIV'];{$ifdef hascompilerproc} compilerproc; {$endif}{$endif}  Var    aSign, bSign, zSign: flag;    aExp, bExp, zExp: int16;    aSig, bSig, zSig, rem0, rem1, term0, term1: bits32;  Begin    aSig := extractFloat32Frac( a );    aExp := extractFloat32Exp( a );    aSign := extractFloat32Sign( a );    bSig := extractFloat32Frac( b );    bExp := extractFloat32Exp( b );    bSign := extractFloat32Sign( b );    zSign := aSign xor bSign;    if ( aExp = $FF ) then      Begin        if ( aSig <> 0 ) then        Begin           float32_div := propagateFloat32NaN( a, b );           exit;        End;        if ( bExp = $FF ) then        Begin            if ( bSig <> 0) then            Begin              float32_div := propagateFloat32NaN( a, b );            End;            float_raise( float_flag_invalid );            float32_div := float32_default_nan;            exit;        End;        float32_div := packFloat32( zSign, $FF, 0 );        exit;      End;    if ( bExp = $FF ) then    Begin        if ( bSig <> 0) then        Begin          float32_div := propagateFloat32NaN( a, b );          exit;        End;        float32_div := packFloat32( zSign, 0, 0 );        exit;    End;    if ( bExp = 0 ) Then    Begin        if ( bSig = 0 ) Then        Begin            if ( ( aExp OR  aSig ) = 0 ) then            Begin                float_raise( float_flag_invalid );                float32_div := float32_default_nan;                exit;            End;            float_raise( float_flag_divbyzero );            float32_div := packFloat32( zSign, $FF, 0 );            exit;        End;        normalizeFloat32Subnormal( bSig, bExp, bSig );    End;    if ( aExp = 0 ) Then    Begin        if ( aSig = 0 ) Then        Begin          float32_div := packFloat32( zSign, 0, 0 );          exit;        End;        normalizeFloat32Subnormal( aSig, aExp, aSig );    End;    zExp := aExp - bExp + $7D;    aSig := ( aSig OR  $00800000 ) shl 7;    bSig := ( bSig OR  $00800000 ) shl 8;    if ( bSig <= ( aSig + aSig ) ) then    Begin        aSig := aSig shr 1;        Inc(zExp);    End;    zSig := estimateDiv64To32( aSig, 0, bSig );    if ( ( zSig and $3F ) <= 2 ) then    Begin        mul32To64( bSig, zSig, term0, term1 );        sub64( aSig, 0, term0, term1, rem0, rem1 );        while ( sbits32 (rem0) < 0 ) do        Begin            Dec(zSig);            add64( rem0, rem1, 0, bSig, rem0, rem1 );        End;        zSig := zSig or bits32( rem1 <> 0 );    End;    float32_div := roundAndPackFloat32( zSign, zExp, zSig );  End;{*-------------------------------------------------------------------------------Returns the remainder of the single-precision floating-point value `a'with respect to the corresponding value `b'.  The operation is performedaccording to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.-------------------------------------------------------------------------------*}Function float32_rem(a: float32; b: float32 ):float32;{$ifdef fpc} [public,Alias:'FLOAT32_REM'];{$ifdef hascompilerproc} compilerproc; {$endif}{$endif}  Var    aSign, bSign, zSign: flag;    aExp, bExp, expDiff: int16;    aSig, bSig, q, allZero, alternateASig: bits32;    sigMean: sbits32;  Begin    aSig := extractFloat32Frac( a );    aExp := extractFloat32Exp( a );    aSign := extractFloat32Sign( a );    bSig := extractFloat32Frac( b );    bExp := extractFloat32Exp( b );    bSign := extractFloat32Sign( b );    if ( aExp = $FF ) then    Begin        if ( (aSig<>0) OR ( ( bExp = $FF ) AND  (bSig <>0)) ) then        Begin            float32_rem := propagateFloat32NaN( a, b );            exit;        End;        float_raise( float_flag_invalid );        float32_rem := float32_default_nan;        exit;    End;    if ( bExp = $FF ) then    Begin        if ( bSig <> 0 ) then        Begin          float32_rem := propagateFloat32NaN( a, b );          exit;        End;        float32_rem := a;        exit;    End;    if ( bExp = 0 ) then    Begin        if ( bSig = 0 ) then        Begin            float_raise( float_flag_invalid );            float32_rem := float32_default_nan;            exit;        End;        normalizeFloat32Subnormal( bSig, bExp, bSig );    End;    if ( aExp = 0 ) then    Begin        if ( aSig = 0 ) then        Begin           float32_rem := a;           exit;        End;        normalizeFloat32Subnormal( aSig, aExp, aSig );    End;    expDiff := aExp - bExp;    aSig := ( aSig OR  $00800000 ) shl 8;    bSig := ( bSig OR  $00800000 ) shl 8;    if ( expDiff < 0 ) then    Begin        if ( expDiff < -1 ) then        Begin           float32_rem := a;           exit;        End;        aSig := aSig shr 1;    End;    q := bits32( bSig <= aSig );    if ( q <> 0) then       aSig := aSig - bSig;    expDiff := expDiff - 32;    while ( 0 < expDiff ) do    Begin        q := estimateDiv64To32( aSig, 0, bSig );        if (2 < q) then         q := q - 2        else         q := 0;        aSig := - ( ( bSig shr 2 ) * q );        expDiff := expDiff - 30;    End;    expDiff := expDiff + 32;    if ( 0 < expDiff ) then    Begin        q := estimateDiv64To32( aSig, 0, bSig );        if (2 < q) then         q := q - 2        else         q := 0;        q := q shr (32 - expDiff);        bSig := bSig shr 2;        aSig := ( ( aSig shr 1 ) shl ( expDiff - 1 ) ) - bSig * q;    End    else    Begin        aSig := aSig shr 2;        bSig := bSig shr 2;    End;    Repeat        alternateASig := aSig;        Inc(q);        aSig := aSig - bSig;    Until not ( 0 <= sbits32 (aSig) );    sigMean := aSig + alternateASig;    if ( ( sigMean < 0 ) OR ( ( sigMean = 0 ) AND  (( q and 1 )<>0) ) ) then    Begin        aSig := alternateASig;    End;    zSign := flag( sbits32 (aSig) < 0 );    if ( zSign<>0 ) then      aSig := - aSig;    float32_rem := normalizeRoundAndPackFloat32( aSign xor zSign, bExp, aSig );  End;{*-------------------------------------------------------------------------------Returns the square root of the single-precision floating-point value `a'.The operation is performed according to the IEC/IEEE Standard for BinaryFloating-Point Arithmetic.-------------------------------------------------------------------------------*}Function float32_sqrt(a: float32 ): float32;{$ifdef fpc} [public,Alias:'FLOAT32_SQRT'];{$ifdef hascompilerproc} compilerproc; {$endif}{$endif}Var    aSign : flag;    aExp, zExp : int16;    aSig, zSig, rem0, rem1, term0, term1: bits32;    label roundAndPack;Begin    aSig := extractFloat32Frac( a );    aExp := extractFloat32Exp( a );    aSign := extractFloat32Sign( a );    if ( aExp = $FF ) then    Begin        if ( aSig <> 0) then        Begin           float32_sqrt := propagateFloat32NaN( a, 0 );           exit;        End;        if ( aSign = 0) then        Begin          float32_sqrt := a;          exit;        End;        float_raise( float_flag_invalid );        float32_sqrt := float32_default_nan;        exit;    End;    if ( aSign <> 0) then    Begin        if ( ( aExp OR  aSig ) = 0 ) then        Begin           float32_sqrt := a;           exit;        End;        float_raise( float_flag_invalid );        float32_sqrt := float32_default_nan;        exit;    End;    if ( aExp = 0 ) then    Begin        if ( aSig = 0 ) then        Begin           float32_sqrt := 0;           exit;        End;        normalizeFloat32Subnormal( aSig, aExp, aSig );    End;    zExp := ( ( aExp - $7F ) shr 1 ) + $7E;    aSig := ( aSig OR  $00800000 ) shl 8;    zSig := estimateSqrt32( aExp, aSig ) + 2;    if ( ( zSig and $7F ) <= 5 ) then    Begin        if ( zSig < 2 ) then        Begin            zSig := $7FFFFFFF;            goto roundAndPack;        End        else        Begin            aSig  := aSig shr (aExp and 1);            mul32To64( zSig, zSig, term0, term1 );            sub64( aSig, 0, term0, term1, rem0, rem1 );            while ( sbits32 (rem0) < 0 ) do            Begin                Dec(zSig);                shortShift64Left( 0, zSig, 1, term0, term1 );                term1 := term1 or 1;                add64( rem0, rem1, term0, term1, rem0, rem1 );            End;            zSig := zSig OR bits32( ( rem0 OR  rem1 ) <> 0 );        End;    End;    shift32RightJamming( zSig, 1, zSig ); roundAndPack:    float32_sqrt := roundAndPackFloat32( 0, zExp, zSig );End;{*-------------------------------------------------------------------------------Returns 1 if the single-precision floating-point value `a' is equal tothe corresponding value `b', and 0 otherwise.  The comparison is performedaccording to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.-------------------------------------------------------------------------------*}Function float32_eq( a:float32; b:float32): flag;{$ifdef fpc} [public,Alias:'FLOAT32_EQ'];{$ifdef hascompilerproc} compilerproc; {$endif}{$endif}Begin    if ((( extractFloat32Exp( a ) = $FF ) AND  (extractFloat32Frac( a )<>0))         OR ( ( extractFloat32Exp( b ) = $FF ) AND  (extractFloat32Frac( b )<>0) )       ) then    Begin        if ( (float32_is_signaling_nan( a )<>0) OR (float32_is_signaling_nan( b )<>0) ) then        Begin            float_raise( float_flag_invalid );        End;        float32_eq := 0;        exit;    End;    float32_eq := flag( a = b ) OR flag( bits32 ( ( a OR  b ) shl 1 ) = 0 );End;{*-------------------------------------------------------------------------------Returns 1 if the single-precision floating-point value `a' is less thanor equal to the corresponding value `b', and 0 otherwise.  The comparisonis performed according to the IEC/IEEE Standard for Binary Floating-PointArithmetic.-------------------------------------------------------------------------------*}Function float32_le( a: float32; b : float32 ):flag;{$ifdef fpc} [public,Alias:'FLOAT32_LE'];{$ifdef hascompilerproc} compilerproc; {$endif}{$endif}var    aSign, bSign: flag;Begin    if (    ( ( extractFloat32Exp( a ) = $FF ) AND  (extractFloat32Frac( a )<>0) )         OR ( ( extractFloat32Exp( b ) = $FF ) AND  (extractFloat32Frac( b )<>0) )       ) then    Begin        float_raise( float_flag_invalid );        float32_le := 0;        exit;    End;    aSign := extractFloat32Sign( a );    bSign := extractFloat32Sign( b );    if ( aSign <> bSign ) then    Begin       float32_le :=  aSign OR flag( bits32 ( ( a OR  b ) shl 1 ) = 0 );       exit;    End;    float32_le := flag(flag( a = b ) OR flag( aSign xor flag( a < b ) ));End;{*-------------------------------------------------------------------------------Returns 1 if the single-precision floating-point value `a' is less thanthe corresponding value `b', and 0 otherwise.  The comparison is performedaccording to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.-------------------------------------------------------------------------------*}Function float32_lt( a:float32 ; b : float32): flag;{$ifdef fpc} [public,Alias:'FLOAT32_LT'];{$ifdef hascompilerproc} compilerproc; {$endif}{$endif}var    aSign, bSign: flag;Begin    if (    ( ( extractFloat32Exp( a ) = $FF ) AND  (extractFloat32Frac( a ) <>0))         OR ( ( extractFloat32Exp( b ) = $FF ) AND  (extractFloat32Frac( b ) <>0) )       ) then    Begin        float_raise( float_flag_invalid );        float32_lt :=0;        exit;    End;    aSign := extractFloat32Sign( a );    bSign := extractFloat32Sign( b );    if ( aSign <> bSign ) then    Begin       float32_lt := aSign AND  flag( bits32 ( ( a OR  b ) shl 1 ) <> 0 );       exit;    End;    float32_lt := flag(flag( a <> b ) AND  flag( aSign xor flag( a < b ) ));End;{*-------------------------------------------------------------------------------Returns 1 if the single-precision floating-point value `a' is equal tothe corresponding value `b', and 0 otherwise.  The invalid exception israised if either operand is a NaN.  Otherwise, the comparison is performedaccording to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.-------------------------------------------------------------------------------*}Function float32_eq_signaling( a: float32; b: float32) : flag;Begin    if (    ( ( extractFloat32Exp( a ) = $FF ) AND  (extractFloat32Frac( a ) <> 0))         OR ( ( extractFloat32Exp( b ) = $FF ) AND  (extractFloat32Frac( b ) <> 0))       ) then    Begin        float_raise( float_flag_invalid );        float32_eq_signaling := 0;        exit;    End;    float32_eq_signaling := (flag( a = b ) OR flag( bits32 ( ( a OR  b ) shl 1 ) = 0 ));End;{*-------------------------------------------------------------------------------Returns 1 if the single-precision floating-point value `a' is less than orequal to the corresponding value `b', and 0 otherwise.  Quiet NaNs do notcause an exception.  Otherwise, the comparison is performed according to theIEC/IEEE Standard for Binary Floating-Point Arithmetic.-------------------------------------------------------------------------------*}Function float32_le_quiet( a: float32 ; b : float32 ): flag;Var    aSign, bSign: flag;    aExp, bExp: int16;Begin    if (    ( ( extractFloat32Exp( a ) = $FF ) AND  (extractFloat32Frac( a )<>0) )         OR ( ( extractFloat32Exp( b ) = $FF ) AND  (extractFloat32Frac( b )<>0) )       ) then    Begin        if ( (float32_is_signaling_nan( a )<>0) OR (float32_is_signaling_nan( b )<>0) ) then        Begin            float_raise( float_flag_invalid );        End;        float32_le_quiet := 0;        exit;    End;    aSign := extractFloat32Sign( a );    bSign := extractFloat32Sign( b );    if ( aSign <> bSign ) then    Begin       float32_le_quiet := aSign OR flag( bits32 ( ( a OR  b ) shl 1 ) = 0 );       exit;    End;    float32_le_quiet := flag(flag( a = b ) OR flag( aSign xor flag( a < b ) ));End;{*-------------------------------------------------------------------------------Returns 1 if the single-precision floating-point value `a' is less thanthe corresponding value `b', and 0 otherwise.  Quiet NaNs do not cause anexception.  Otherwise, the comparison is performed according to the IEC/IEEEStandard for Binary Floating-Point Arithmetic.-------------------------------------------------------------------------------*}Function float32_lt_quiet( a: float32 ; b: float32 ): flag;Var   aSign, bSign: flag;Begin    if (    ( ( extractFloat32Exp( a ) = $FF ) AND  (extractFloat32Frac( a )<>0) )         OR ( ( extractFloat32Exp( b ) = $FF ) AND  (extractFloat32Frac( b )<>0) )       ) then    Begin        if ( (float32_is_signaling_nan( a )<>0) OR (float32_is_signaling_nan( b )<>0) ) then        Begin            float_raise( float_flag_invalid );        End;        float32_lt_quiet := 0;        exit;    End;    aSign := extractFloat32Sign( a );    bSign := extractFloat32Sign( b );    if ( aSign <> bSign ) then    Begin        float32_lt_quiet := aSign AND  flag( bits32 ( ( a OR  b ) shl 1 ) <> 0 );        exit;    End;    float32_lt_quiet := flag(flag( a <> b ) AND  ( aSign xor flag( a < b ) ));End;{*-------------------------------------------------------------------------------Returns the result of converting the double-precision floating-point value`a' to the 32-bit two's complement integer format.  The conversion isperformed according to the IEC/IEEE Standard for Binary Floating-PointArithmetic---which means in particular that the conversion is roundedaccording to the current rounding mode.  If `a' is a NaN, the largestpositive integer is returned.  Otherwise, if the conversion overflows, thelargest integer with the same sign as `a' is returned.-------------------------------------------------------------------------------*}Function float64_to_int32(a: float64): int32;{$ifdef fpc} [public,Alias:'FLOAT64_TO_INT32'];{$ifdef hascompilerproc} compilerproc; {$endif}{$endif}var    aSign: flag;    aExp, shiftCount: int16;    aSig0, aSig1, absZ, aSigExtra: bits32;    z: int32;    roundingMode: int8;    label invalid;Begin    aSig1 := extractFloat64Frac1( a );    aSig0 := extractFloat64Frac0( a );    aExp := extractFloat64Exp( a );    aSign := extractFloat64Sign( a );    shiftCount := aExp - $413;    if ( 0 <= shiftCount ) then    Begin        if ( $41E < aExp ) then        Begin            if ( ( aExp = $7FF ) AND  (( aSig0 OR  aSig1 )<>0) ) then               aSign := 0;            goto invalid;        End;        shortShift64Left(            aSig0 OR  $00100000, aSig1, shiftCount, absZ, aSigExtra );        if ( $80000000 < absZ ) then          goto invalid;    End    else    Begin        aSig1 := flag( aSig1 <> 0 );        if ( aExp < $3FE ) then        Begin            aSigExtra := aExp OR  aSig0 OR  aSig1;            absZ := 0;        End        else        Begin            aSig0 := aSig0 OR $00100000;            aSigExtra := ( aSig0 shl ( shiftCount and 31 ) ) OR  aSig1;            absZ := aSig0 shr ( - shiftCount );        End;    End;    roundingMode := float_rounding_mode;    if ( roundingMode = float_round_nearest_even ) then    Begin        if ( sbits32(aSigExtra) < 0 ) then        Begin            Inc(absZ);            if ( bits32 ( aSigExtra shl 1 ) = 0 ) then               absZ :=  absZ and not 1;        End;        if aSign <> 0 then          z := - absZ        else          z := absZ;    End    else    Begin        aSigExtra := bits32( aSigExtra <> 0 );        if ( aSign <> 0) then        Begin            z := - (   absZ                    + ( int32( roundingMode = float_round_down ) and aSigExtra ) );        End        else        Begin            z := absZ + ( int32( roundingMode = float_round_up ) and aSigExtra );        End    End;    if ( (( aSign xor flag( z < 0 ) )<>0) AND  (z<>0) ) then    Begin invalid:        float_raise( float_flag_invalid );        if (aSign <> 0 ) then          float64_to_int32 := sbits32 ($80000000)        else          float64_to_int32 :=  $7FFFFFFF;        exit;    End;    if ( aSigExtra <> 0) then       float_exception_flags := float_exception_flags or float_flag_inexact;    float64_to_int32 := z;End;{*-------------------------------------------------------------------------------Returns the result of converting the double-precision floating-point value`a' to the 32-bit two's complement integer format.  The conversion isperformed according to the IEC/IEEE Standard for Binary Floating-PointArithmetic, except that the conversion is always rounded toward zero.If `a' is a NaN, the largest positive integer is returned.  Otherwise, ifthe conversion overflows, the largest integer with the same sign as `a' isreturned.-------------------------------------------------------------------------------*}Function float64_to_int32_round_to_zero(a: float64 ): int32;{$ifdef fpc} [public,Alias:'FLOAT64_TO_INT32_ROUND_TO_ZERO'];{$ifdef hascompilerproc} compilerproc; {$endif}{$endif}Var    aSign: flag;    aExp, shiftCount: int16;    aSig0, aSig1, absZ, aSigExtra: bits32;    z: int32;    label invalid; Begin    aSig1 := extractFloat64Frac1( a );    aSig0 := extractFloat64Frac0( a );    aExp := extractFloat64Exp( a );    aSign := extractFloat64Sign( a );    shiftCount := aExp - $413;    if ( 0 <= shiftCount ) then    Begin        if ( $41E < aExp ) then        Begin            if ( ( aExp = $7FF ) AND  (( aSig0 OR  aSig1 )<>0) ) then               aSign := 0;            goto invalid;        End;        shortShift64Left(            aSig0 OR  $00100000, aSig1, shiftCount, absZ, aSigExtra );    End    else    Begin        if ( aExp < $3FF ) then        Begin            if ( aExp OR  aSig0 OR  aSig1 )<>0 then            Begin                float_exception_flags :=                  float_exception_flags or float_flag_inexact;            End;            float64_to_int32_round_to_zero := 0;            exit;        End;        aSig0 := aSig0 or $00100000;        aSigExtra := ( aSig0 shl ( shiftCount and 31 ) ) OR  aSig1;        absZ := aSig0 shr ( - shiftCount );    End;    if aSign <> 0 then      z := - absZ    else      z := absZ;    if ( (( aSign xor flag( z < 0 )) <> 0) AND  (z<>0) ) then    Begin invalid:        float_raise( float_flag_invalid );        if (aSign <> 0) then          float64_to_int32_round_to_zero := sbits32 ($80000000)        else          float64_to_int32_round_to_zero :=  $7FFFFFFF;        exit;    End;    if ( aSigExtra <> 0) then       float_exception_flags := float_exception_flags or float_flag_inexact;    float64_to_int32_round_to_zero := z; End;{*-------------------------------------------------------------------------------Returns the result of converting the double-precision floating-point value`a' to the single-precision floating-point format.  The conversion isperformed according to the IEC/IEEE Standard for Binary Floating-PointArithmetic.-------------------------------------------------------------------------------*}Function float64_to_float32(a: float64 ): float32;{$ifdef fpc} [public,Alias:'FLOAT64_TO_FLOAT32'];{$ifdef hascompilerproc} compilerproc; {$endif}{$endif}Var    aSign: flag;    aExp: int16;    aSig0, aSig1, zSig: bits32;    allZero: bits32;    tmp : CommonNanT;Begin    aSig1 := extractFloat64Frac1( a );    aSig0 := extractFloat64Frac0( a );    aExp := extractFloat64Exp( a );    aSign := extractFloat64Sign( a );    if ( aExp = $7FF ) then    Begin        if ( aSig0 OR  aSig1 ) <> 0 then        Begin            float64ToCommonNaN( a, tmp );            float64_to_float32 := commonNaNToFloat32( tmp );            exit;        End;        float64_to_float32 := packFloat32( aSign, $FF, 0 );        exit;    End;    shift64RightJamming( aSig0, aSig1, 22, allZero, zSig );    if ( aExp <> 0) then      zSig := zSig OR $40000000;    float64_to_float32 := roundAndPackFloat32( aSign, aExp - $381, zSig );End;{*-------------------------------------------------------------------------------Rounds the double-precision floating-point value `a' to an integer,and returns the result as a double-precision floating-point value.  Theoperation is performed according to the IEC/IEEE Standard for BinaryFloating-Point Arithmetic.-------------------------------------------------------------------------------*}Procedure float64_round_to_int(a: float64; var out: float64 );{$ifdef fpc} [public,Alias:'FLOAT64_ROUND_TO_INT'];{$ifdef hascompilerproc} compilerproc; {$endif}{$endif}Var    aSign: flag;    aExp: int16;    lastBitMask, roundBitsMask: bits32;    roundingMode: int8;    z: float64;Begin    aExp := extractFloat64Exp( a );    if ( $413 <= aExp ) then    Begin        if ( $433 <= aExp ) then        Begin            if (    ( aExp = $7FF )                 AND            (            ( extractFloat64Frac0( a ) OR  extractFloat64Frac1( a )            ) <>0)            )  then            Begin                propagateFloat64NaN( a, a, out );                exit;            End;            out := a;            exit;        End;        lastBitMask := 1;        lastBitMask := ( lastBitMask shl ( $432 - aExp ) ) shl 1;        roundBitsMask := lastBitMask - 1;        z := a;        roundingMode := float_rounding_mode;        if ( roundingMode = float_round_nearest_even ) then        Begin            if ( lastBitMask <> 0) then            Begin                add64( z.high, z.low, 0, lastBitMask shr 1, z.high, z.low );                if ( ( z.low and roundBitsMask ) = 0 ) then                   z.low := z.low and not lastBitMask;            End            else            Begin                if ( sbits32 (z.low) < 0 ) then                Begin                    Inc(z.high);                    if ( bits32 ( z.low shl 1 ) = 0 ) then                      z.high := z.high and not 1;                End;            End;        End        else if ( roundingMode <> float_round_to_zero ) then        Begin            if (   extractFloat64Sign( z )                 xor flag( roundingMode = float_round_up ) )<> 0 then            Begin                add64( z.high, z.low, 0, roundBitsMask, z.high, z.low );            End;        End;        z.low := z.low and not roundBitsMask;    End    else    Begin        if ( aExp <= $3FE ) then        Begin            if ( ( ( bits32 ( a.high shl 1 ) ) OR  a.low ) = 0 ) then            Begin                out := a;                exit;            End;            float_exception_flags := float_exception_flags or               float_flag_inexact;            aSign := extractFloat64Sign( a );            case ( float_rounding_mode ) of             float_round_nearest_even:               Begin                if (    ( aExp = $3FE )                     AND  ( (extractFloat64Frac0( a ) OR  extractFloat64Frac1( a ) )<>0)                   ) then                Begin                    packFloat64( aSign, $3FF, 0, 0, out );                    exit;                End;               End;               float_round_down:                Begin                  if aSign<>0 then                   packFloat64( 1, $3FF, 0, 0, out )                  else                   packFloat64( 0, 0, 0, 0, out );                  exit;                End;             float_round_up:                Begin                  if aSign <> 0 then                   packFloat64( 1, 0, 0, 0, out )                  else                   packFloat64( 0, $3FF, 0, 0, out );                  exit;                End;            end;            packFloat64( aSign, 0, 0, 0, out );            exit;        End;        lastBitMask := 1;        lastBitMask := lastBitMask shl ($413 - aExp);        roundBitsMask := lastBitMask - 1;        z.low := 0;        z.high := a.high;        roundingMode := float_rounding_mode;        if ( roundingMode = float_round_nearest_even ) then        Begin            z.high := z.high + lastBitMask shr 1;            if ( ( ( z.high and roundBitsMask ) OR  a.low ) = 0 ) then            Begin                z.high := z.high and not lastBitMask;            End;        End        else if ( roundingMode <> float_round_to_zero ) then        Begin            if (   extractFloat64Sign( z )                 xor flag( roundingMode = float_round_up ) )<> 0 then            Begin                z.high := z.high or bits32( a.low <> 0 );                z.high := z.high + roundBitsMask;            End;        End;        z.high := z.high and not roundBitsMask;    End;    if ( ( z.low <> a.low ) OR ( z.high <> a.high ) ) then    Begin        float_exception_flags :=          float_exception_flags or float_flag_inexact;    End;    out := z;End;{*-------------------------------------------------------------------------------Returns the result of adding the absolute values of the double-precisionfloating-point values `a' and `b'.  If `zSign' is 1, the sum is negatedbefore being returned.  `zSign' is ignored if the result is a NaN.The addition is performed according to the IEC/IEEE Standard for BinaryFloating-Point Arithmetic.-------------------------------------------------------------------------------*}Procedure addFloat64Sigs( a:float64 ; b: float64 ; zSign:flag; Var out: float64 );Var    aExp, bExp, zExp: int16;    aSig0, aSig1, bSig0, bSig1, zSig0, zSig1, zSig2: bits32;    expDiff: int16;    label shiftRight1;    label roundAndPack;Begin    aSig1 := extractFloat64Frac1( a );    aSig0 := extractFloat64Frac0( a );    aExp := extractFloat64Exp( a );    bSig1 := extractFloat64Frac1( b );    bSig0 := extractFloat64Frac0( b );    bExp := extractFloat64Exp( b );    expDiff := aExp - bExp;    if ( 0 < expDiff ) then    Begin        if ( aExp = $7FF ) then        Begin            if ( aSig0 OR  aSig1 ) <> 0 then            Begin              propagateFloat64NaN( a, b, out );              exit;            end;            out := a;            exit;        End;        if ( bExp = 0 ) then        Begin            Dec(expDiff);        End        else        Begin            bSig0 := bSig0 or $00100000;        End;        shift64ExtraRightJamming(            bSig0, bSig1, 0, expDiff, bSig0, bSig1, zSig2 );        zExp := aExp;    End    else if ( expDiff < 0 ) then    Begin        if ( bExp = $7FF ) then        Begin            if ( bSig0 OR  bSig1 ) <> 0 then            Begin               propagateFloat64NaN( a, b, out );               exit;            End;            packFloat64( zSign, $7FF, 0, 0, out );        End;        if ( aExp = 0 ) then        Begin            Inc(expDiff);        End        else        Begin            aSig0 := aSig0 or $00100000;        End;        shift64ExtraRightJamming(            aSig0, aSig1, 0, - expDiff, aSig0, aSig1, zSig2 );        zExp := bExp;    End    else    Begin        if ( aExp = $7FF ) then        Begin            if ( aSig0 OR  aSig1 OR  bSig0 OR  bSig1 ) <> 0 then            Begin                propagateFloat64NaN( a, b, out );                exit;            End;            out := a;            exit;        End;        add64( aSig0, aSig1, bSig0, bSig1, zSig0, zSig1 );        if ( aExp = 0 ) then        Begin           packFloat64( zSign, 0, zSig0, zSig1, out );           exit;        End;        zSig2 := 0;        zSig0 := zSig0 or $00200000;        zExp := aExp;        goto shiftRight1;    End;    aSig0 := aSig0 or $00100000;    add64( aSig0, aSig1, bSig0, bSig1, zSig0, zSig1 );    Dec(zExp);    if ( zSig0 < $00200000 ) then       goto roundAndPack;    Inc(zExp); shiftRight1:    shift64ExtraRightJamming( zSig0, zSig1, zSig2, 1, zSig0, zSig1, zSig2 ); roundAndPack:    roundAndPackFloat64( zSign, zExp, zSig0, zSig1, zSig2, out );End;{*-------------------------------------------------------------------------------Returns the result of subtracting the absolute values of the double-precision floating-point values `a' and `b'.  If `zSign' is 1, thedifference is negated before being returned.  `zSign' is ignored if theresult is a NaN.  The subtraction is performed according to the IEC/IEEEStandard for Binary Floating-Point Arithmetic.-------------------------------------------------------------------------------*}Procedure subFloat64Sigs( a:float64; b: float64 ; zSign:flag; Var out: float64 );Var    aExp, bExp, zExp: int16;    aSig0, aSig1, bSig0, bSig1, zSig0, zSig1: bits32;    expDiff: int16;    z: float64;    label aExpBigger;    label bExpBigger;    label aBigger;    label bBigger;    label normalizeRoundAndPack;Begin    aSig1 := extractFloat64Frac1( a );    aSig0 := extractFloat64Frac0( a );    aExp := extractFloat64Exp( a );    bSig1 := extractFloat64Frac1( b );    bSig0 := extractFloat64Frac0( b );    bExp := extractFloat64Exp( b );    expDiff := aExp - bExp;    shortShift64Left( aSig0, aSig1, 10, aSig0, aSig1 );    shortShift64Left( bSig0, bSig1, 10, bSig0, bSig1 );    if ( 0 < expDiff ) then goto aExpBigger;    if ( expDiff < 0 ) then goto bExpBigger;    if ( aExp = $7FF ) then    Begin        if ( aSig0 OR  aSig1 OR  bSig0 OR  bSig1 ) <> 0 then        Begin            propagateFloat64NaN( a, b, out );            exit;        End;        float_raise( float_flag_invalid );        z.low := float64_default_nan_low;        z.high := float64_default_nan_high;        out := z;        exit;    End;    if ( aExp = 0 ) then    Begin        aExp := 1;        bExp := 1;    End;    if ( bSig0 < aSig0 ) then goto aBigger;    if ( aSig0 < bSig0 ) then goto bBigger;    if ( bSig1 < aSig1 ) then goto aBigger;    if ( aSig1 < bSig1 ) then goto bBigger;    packFloat64( flag(float_rounding_mode = float_round_down), 0, 0, 0 , out);    exit; bExpBigger:    if ( bExp = $7FF ) then    Begin        if ( bSig0 OR  bSig1 ) <> 0 then        Begin           propagateFloat64NaN( a, b, out );           exit;        End;        packFloat64( zSign xor 1, $7FF, 0, 0, out );        exit;    End;    if ( aExp = 0 ) then    Begin        Inc(expDiff);    End    else    Begin        aSig0 := aSig0 or $40000000;    End;    shift64RightJamming( aSig0, aSig1, - expDiff, aSig0, aSig1 );    bSig0 := bSig0 or $40000000; bBigger:    sub64( bSig0, bSig1, aSig0, aSig1, zSig0, zSig1 );    zExp := bExp;    zSign := zSign xor 1;    goto normalizeRoundAndPack; aExpBigger:    if ( aExp = $7FF ) then    Begin        if ( aSig0 OR  aSig1 ) <> 0 then        Begin           propagateFloat64NaN( a, b, out );           exit;        End;        out :=  a;        exit;    End;    if ( bExp = 0 ) then    Begin        Dec(expDiff);    End    else    Begin        bSig0 := bSig0 or $40000000;    End;    shift64RightJamming( bSig0, bSig1, expDiff, bSig0, bSig1 );    aSig0 := aSig0 or $40000000; aBigger:    sub64( aSig0, aSig1, bSig0, bSig1, zSig0, zSig1 );    zExp := aExp; normalizeRoundAndPack:    Dec(zExp);    normalizeRoundAndPackFloat64( zSign, zExp - 10, zSig0, zSig1, out );End;{*-------------------------------------------------------------------------------Returns the result of adding the double-precision floating-point values `a'and `b'.  The operation is performed according to the IEC/IEEE Standard forBinary Floating-Point Arithmetic.-------------------------------------------------------------------------------*}Procedure float64_add( a: float64; b : float64; Var out : float64); {$ifdef fpc}[public,Alias:'FLOAT64_ADD'];{$ifdef hascompilerproc} compilerproc; {$endif}{$endif}Var    aSign, bSign: flag;Begin    aSign := extractFloat64Sign( a );    bSign := extractFloat64Sign( b );    if ( aSign = bSign ) then    Begin         addFloat64Sigs( a, b, aSign, out );    End    else    Begin        subFloat64Sigs( a, b, aSign, out );    End;End;{*-------------------------------------------------------------------------------Returns the result of subtracting the double-precision floating-point values`a' and `b'.  The operation is performed according to the IEC/IEEE Standardfor Binary Floating-Point Arithmetic.-------------------------------------------------------------------------------*}Procedure float64_sub(a: float64; b : float64; var out: float64); {$ifdef fpc}[public,Alias:'FLOAT64_SUB'];{$ifdef hascompilerproc} compilerproc; {$endif}{$endif}Var    aSign, bSign: flag;Begin    aSign := extractFloat64Sign( a );    bSign := extractFloat64Sign( b );    if ( aSign = bSign ) then    Begin        subFloat64Sigs( a, b, aSign, out );    End    else    Begin        addFloat64Sigs( a, b, aSign, out );    End;End;{*-------------------------------------------------------------------------------Returns the result of multiplying the double-precision floating-point values`a' and `b'.  The operation is performed according to the IEC/IEEE Standardfor Binary Floating-Point Arithmetic.-------------------------------------------------------------------------------*}Procedure float64_mul( a: float64; b:float64; Var out: float64); {$ifdef fpc}[public,Alias:'FLOAT64_MUL'];{$ifdef hascompilerproc} compilerproc; {$endif}{$endif}Var    aSign, bSign, zSign: flag;    aExp, bExp, zExp: int16;    aSig0, aSig1, bSig0, bSig1, zSig0, zSig1, zSig2, zSig3: bits32;    z: float64;    label invalid;Begin    aSig1 := extractFloat64Frac1( a );    aSig0 := extractFloat64Frac0( a );    aExp := extractFloat64Exp( a );    aSign := extractFloat64Sign( a );    bSig1 := extractFloat64Frac1( b );    bSig0 := extractFloat64Frac0( b );    bExp := extractFloat64Exp( b );    bSign := extractFloat64Sign( b );    zSign := aSign xor bSign;    if ( aExp = $7FF ) then    Begin        if (    (( aSig0 OR  aSig1 ) <>0)             OR ( ( bExp = $7FF ) AND  (( bSig0 OR  bSig1 )<>0) ) ) then        Begin            propagateFloat64NaN( a, b, out );            exit;        End;        if ( ( bExp OR  bSig0 OR  bSig1 ) = 0 ) then goto invalid;        packFloat64( zSign, $7FF, 0, 0, out );        exit;    End;    if ( bExp = $7FF ) then    Begin        if ( bSig0 OR  bSig1 )<> 0 then        Begin          propagateFloat64NaN( a, b, out );          exit;        End;        if ( ( aExp OR  aSig0 OR  aSig1 ) = 0 ) then        Begin invalid:            float_raise( float_flag_invalid );            z.low := float64_default_nan_low;            z.high := float64_default_nan_high;            out := z;            exit;        End;        packFloat64( zSign, $7FF, 0, 0, out );        exit;    End;    if ( aExp = 0 ) then    Begin        if ( ( aSig0 OR  aSig1 ) = 0 ) then        Begin           packFloat64( zSign, 0, 0, 0, out );           exit;        End;        normalizeFloat64Subnormal( aSig0, aSig1, aExp, aSig0, aSig1 );    End;    if ( bExp = 0 ) then    Begin        if ( ( bSig0 OR  bSig1 ) = 0 ) then        Begin          packFloat64( zSign, 0, 0, 0, out );          exit;        End;        normalizeFloat64Subnormal( bSig0, bSig1, bExp, bSig0, bSig1 );    End;    zExp := aExp + bExp - $400;    aSig0 := aSig0 or $00100000;    shortShift64Left( bSig0, bSig1, 12, bSig0, bSig1 );    mul64To128( aSig0, aSig1, bSig0, bSig1, zSig0, zSig1, zSig2, zSig3 );    add64( zSig0, zSig1, aSig0, aSig1, zSig0, zSig1 );    zSig2 := zSig2 or flag( zSig3 <> 0 );    if ( $00200000 <= zSig0 ) then    Begin        shift64ExtraRightJamming(            zSig0, zSig1, zSig2, 1, zSig0, zSig1, zSig2 );        Inc(zExp);    End;    roundAndPackFloat64( zSign, zExp, zSig0, zSig1, zSig2, out );End;{*-------------------------------------------------------------------------------Returns the result of dividing the double-precision floating-point value `a'by the corresponding value `b'.  The operation is performed according to theIEC/IEEE Standard for Binary Floating-Point Arithmetic.-------------------------------------------------------------------------------*}Procedure float64_div(a: float64; b : float64 ; var out: float64 ); {$ifdef fpc}[public,Alias:'FLOAT64_DIV'];{$ifdef hascompilerproc} compilerproc; {$endif}{$endif}Var    aSign, bSign, zSign: flag;    aExp, bExp, zExp: int16;    aSig0, aSig1, bSig0, bSig1, zSig0, zSig1, zSig2: bits32;    rem0, rem1, rem2, rem3, term0, term1, term2, term3: bits32;    z: float64;    label invalid;Begin    aSig1 := extractFloat64Frac1( a );    aSig0 := extractFloat64Frac0( a );    aExp := extractFloat64Exp( a );    aSign := extractFloat64Sign( a );    bSig1 := extractFloat64Frac1( b );    bSig0 := extractFloat64Frac0( b );    bExp := extractFloat64Exp( b );    bSign := extractFloat64Sign( b );    zSign := aSign xor bSign;    if ( aExp = $7FF ) then    Begin        if ( aSig0 OR  aSig1 )<> 0 then        Begin           propagateFloat64NaN( a, b, out );           exit;        end;        if ( bExp = $7FF ) then        Begin            if ( bSig0 OR  bSig1 )<>0 then            Begin               propagateFloat64NaN( a, b, out );               exit;            End;            goto invalid;        End;        packFloat64( zSign, $7FF, 0, 0, out );        exit;    End;    if ( bExp = $7FF ) then    Begin        if ( bSig0 OR  bSig1 )<> 0 then        Begin          propagateFloat64NaN( a, b, out );          exit;        End;        packFloat64( zSign, 0, 0, 0, out );        exit;    End;    if ( bExp = 0 ) then    Begin        if ( ( bSig0 OR  bSig1 ) = 0 ) then        Begin            if ( ( aExp OR  aSig0 OR  aSig1 ) = 0 ) then            Begin invalid:                float_raise( float_flag_invalid );                z.low := float64_default_nan_low;                z.high := float64_default_nan_high;                out := z;                exit;            End;            float_raise( float_flag_divbyzero );            packFloat64( zSign, $7FF, 0, 0, out );            exit;        End;        normalizeFloat64Subnormal( bSig0, bSig1, bExp, bSig0, bSig1 );    End;    if ( aExp = 0 ) then    Begin        if ( ( aSig0 OR  aSig1 ) = 0 ) then        Begin           packFloat64( zSign, 0, 0, 0, out );           exit;        End;        normalizeFloat64Subnormal( aSig0, aSig1, aExp, aSig0, aSig1 );    End;    zExp := aExp - bExp + $3FD;    shortShift64Left( aSig0 OR  $00100000, aSig1, 11, aSig0, aSig1 );    shortShift64Left( bSig0 OR  $00100000, bSig1, 11, bSig0, bSig1 );    if ( le64( bSig0, bSig1, aSig0, aSig1 )<>0 ) then    Begin        shift64Right( aSig0, aSig1, 1, aSig0, aSig1 );        Inc(zExp);    End;    zSig0 := estimateDiv64To32( aSig0, aSig1, bSig0 );    mul64By32To96( bSig0, bSig1, zSig0, term0, term1, term2 );    sub96( aSig0, aSig1, 0, term0, term1, term2, rem0, rem1, rem2 );    while ( sbits32 (rem0) < 0 ) do    Begin        Dec(zSig0);        add96( rem0, rem1, rem2, 0, bSig0, bSig1, rem0, rem1, rem2 );    End;    zSig1 := estimateDiv64To32( rem1, rem2, bSig0 );    if ( ( zSig1 and $3FF ) <= 4 ) then    Begin        mul64By32To96( bSig0, bSig1, zSig1, term1, term2, term3 );        sub96( rem1, rem2, 0, term1, term2, term3, rem1, rem2, rem3 );        while ( sbits32 (rem1) < 0 ) do        Begin            Dec(zSig1);            add96( rem1, rem2, rem3, 0, bSig0, bSig1, rem1, rem2, rem3 );        End;        zSig1 := zSig1 or flag( ( rem1 OR  rem2 OR  rem3 ) <> 0 );    End;    shift64ExtraRightJamming( zSig0, zSig1, 0, 11, zSig0, zSig1, zSig2 );    roundAndPackFloat64( zSign, zExp, zSig0, zSig1, zSig2, out );End;{*-------------------------------------------------------------------------------Returns the remainder of the double-precision floating-point value `a'with respect to the corresponding value `b'.  The operation is performedaccording to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.-------------------------------------------------------------------------------*}Procedure float64_rem(a: float64; b : float64; var out: float64); {$ifdef fpc}[public,Alias:'FLOAT64_REM'];{$ifdef hascompilerproc} compilerproc; {$endif}{$endif}Var    aSign, bSign, zSign: flag;    aExp, bExp, expDiff: int16;    aSig0, aSig1, bSig0, bSig1, q, term0, term1, term2: bits32;    allZero, alternateASig0, alternateASig1, sigMean1: bits32;    sigMean0: sbits32;    z: float64;    label invalid;Begin    aSig1 := extractFloat64Frac1( a );    aSig0 := extractFloat64Frac0( a );    aExp := extractFloat64Exp( a );    aSign := extractFloat64Sign( a );    bSig1 := extractFloat64Frac1( b );    bSig0 := extractFloat64Frac0( b );    bExp := extractFloat64Exp( b );    bSign := extractFloat64Sign( b );    if ( aExp = $7FF ) then    Begin        if ((( aSig0 OR  aSig1 )<>0)             OR ( ( bExp = $7FF ) AND  (( bSig0 OR  bSig1 )<>0) ) ) then        Begin            propagateFloat64NaN( a, b, out );            exit;        End;        goto invalid;    End;    if ( bExp = $7FF ) then    Begin        if ( bSig0 OR  bSig1 ) <> 0 then        Begin          propagateFloat64NaN( a, b, out );          exit;        End;        out := a;        exit;    End;    if ( bExp = 0 ) then    Begin        if ( ( bSig0 OR  bSig1 ) = 0 ) then        Begin invalid:            float_raise( float_flag_invalid );            z.low := float64_default_nan_low;            z.high := float64_default_nan_high;            out := z;            exit;        End;        normalizeFloat64Subnormal( bSig0, bSig1, bExp, bSig0, bSig1 );    End;    if ( aExp = 0 ) then    Begin        if ( ( aSig0 OR  aSig1 ) = 0 ) then        Begin           out := a;           exit;        End;        normalizeFloat64Subnormal( aSig0, aSig1, aExp, aSig0, aSig1 );    End;    expDiff := aExp - bExp;    if ( expDiff < -1 ) then    Begin       out := a;       exit;    End;    shortShift64Left(        aSig0 OR  $00100000, aSig1, 11 - flag( expDiff < 0 ), aSig0, aSig1 );    shortShift64Left( bSig0 OR  $00100000, bSig1, 11, bSig0, bSig1 );    q := le64( bSig0, bSig1, aSig0, aSig1 );    if ( q )<>0 then       sub64( aSig0, aSig1, bSig0, bSig1, aSig0, aSig1 );    expDiff := expDiff - 32;    while ( 0 < expDiff ) do    Begin        q := estimateDiv64To32( aSig0, aSig1, bSig0 );        if 4 < q then          q:= q - 4        else          q := 0;        mul64By32To96( bSig0, bSig1, q, term0, term1, term2 );        shortShift96Left( term0, term1, term2, 29, term1, term2, allZero );        shortShift64Left( aSig0, aSig1, 29, aSig0, allZero );        sub64( aSig0, 0, term1, term2, aSig0, aSig1 );        expDiff := expDiff - 29;    End;    if ( -32 < expDiff ) then    Begin        q := estimateDiv64To32( aSig0, aSig1, bSig0 );        if 4 < q then          q := q - 4        else          q := 0;        q := q shr (- expDiff);        shift64Right( bSig0, bSig1, 8, bSig0, bSig1 );        expDiff := expDiff + 24;        if ( expDiff < 0 ) then        Begin            shift64Right( aSig0, aSig1, - expDiff, aSig0, aSig1 );        End        else        Begin            shortShift64Left( aSig0, aSig1, expDiff, aSig0, aSig1 );        End;        mul64By32To96( bSig0, bSig1, q, term0, term1, term2 );        sub64( aSig0, aSig1, term1, term2, aSig0, aSig1 );    End    else    Begin        shift64Right( aSig0, aSig1, 8, aSig0, aSig1 );        shift64Right( bSig0, bSig1, 8, bSig0, bSig1 );    End;    Repeat        alternateASig0 := aSig0;        alternateASig1 := aSig1;        Inc(q);        sub64( aSig0, aSig1, bSig0, bSig1, aSig0, aSig1 );    Until not ( 0 <= sbits32 (aSig0) );    add64(        aSig0, aSig1, alternateASig0, alternateASig1, bits32(sigMean0), sigMean1 );    if (    ( sigMean0 < 0 )         OR ( ( ( sigMean0 OR  sigMean1 ) = 0 ) AND  (( q AND 1 )<>0) ) ) then    Begin        aSig0 := alternateASig0;        aSig1 := alternateASig1;    End;    zSign := flag( sbits32 (aSig0) < 0 );    if ( zSign <> 0 ) then       sub64( 0, 0, aSig0, aSig1, aSig0, aSig1 );    normalizeRoundAndPackFloat64( aSign xor zSign, bExp - 4, aSig0, aSig1, out );End;{*-------------------------------------------------------------------------------Returns the square root of the double-precision floating-point value `a'.The operation is performed according to the IEC/IEEE Standard for BinaryFloating-Point Arithmetic.-------------------------------------------------------------------------------*}Procedure float64_sqrt( a: float64; var out: float64 ); {$ifdef fpc}[public,Alias:'FLOAT64_SQRT'];{$ifdef hascompilerproc} compilerproc; {$endif}{$endif}Var    aSign: flag;    aExp, zExp: int16;    aSig0, aSig1, zSig0, zSig1, zSig2, doubleZSig0: bits32;    rem0, rem1, rem2, rem3, term0, term1, term2, term3: bits32;    z: float64;    label invalid;Begin    aSig1 := extractFloat64Frac1( a );    aSig0 := extractFloat64Frac0( a );    aExp := extractFloat64Exp( a );    aSign := extractFloat64Sign( a );    if ( aExp = $7FF ) then    Begin        if ( aSig0 OR  aSig1 ) <> 0 then        Begin           propagateFloat64NaN( a, a, out );           exit;        End;        if ( aSign = 0) then        Begin          out := a;          exit;        End;        goto invalid;    End;    if ( aSign <> 0 ) then    Begin        if ( ( aExp OR  aSig0 OR  aSig1 ) = 0 ) then        Begin           out := a;           exit;        End; invalid:        float_raise( float_flag_invalid );        z.low := float64_default_nan_low;        z.high := float64_default_nan_high;        out := z;        exit;    End;    if ( aExp = 0 ) then    Begin        if ( ( aSig0 OR  aSig1 ) = 0 ) then        Begin           packFloat64( 0, 0, 0, 0, out );           exit;        End;        normalizeFloat64Subnormal( aSig0, aSig1, aExp, aSig0, aSig1 );    End;    zExp := ( ( aExp - $3FF ) shr 1 ) + $3FE;    aSig0 := aSig0 or $00100000;    shortShift64Left( aSig0, aSig1, 11, term0, term1 );    zSig0 := ( estimateSqrt32( aExp, term0 ) shr 1 ) + 1;    if ( zSig0 = 0 ) then       zSig0 := $7FFFFFFF;    doubleZSig0 := zSig0 + zSig0;    shortShift64Left( aSig0, aSig1, 9 - ( aExp and 1 ), aSig0, aSig1 );    mul32To64( zSig0, zSig0, term0, term1 );    sub64( aSig0, aSig1, term0, term1, rem0, rem1 );    while ( sbits32 (rem0) < 0 ) do    Begin        Dec(zSig0);        doubleZSig0 := doubleZSig0 - 2;        add64( rem0, rem1, 0, doubleZSig0 OR  1, rem0, rem1 );    End;    zSig1 := estimateDiv64To32( rem1, 0, doubleZSig0 );    if ( ( zSig1 and $1FF ) <= 5 ) then    Begin        if ( zSig1 = 0 ) then           zSig1 := 1;        mul32To64( doubleZSig0, zSig1, term1, term2 );        sub64( rem1, 0, term1, term2, rem1, rem2 );        mul32To64( zSig1, zSig1, term2, term3 );        sub96( rem1, rem2, 0, 0, term2, term3, rem1, rem2, rem3 );        while ( sbits32 (rem1) < 0 ) do        Begin            Dec(zSig1);            shortShift64Left( 0, zSig1, 1, term2, term3 );            term3 := term3 or 1;            term2 := term2 or doubleZSig0;            add96( rem1, rem2, rem3, 0, term2, term3, rem1, rem2, rem3 );        End;        zSig1 := zSig1 or bits32( ( rem1 OR  rem2 OR  rem3 ) <> 0 );    End;    shift64ExtraRightJamming( zSig0, zSig1, 0, 10, zSig0, zSig1, zSig2 );    roundAndPackFloat64( 0, zExp, zSig0, zSig1, zSig2, out );End;{*-------------------------------------------------------------------------------Returns 1 if the double-precision floating-point value `a' is equal tothe corresponding value `b', and 0 otherwise.  The comparison is performedaccording to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.-------------------------------------------------------------------------------*}Function float64_eq(a: float64; b: float64): flag; {$ifdef fpc}[public,Alias:'FLOAT64_EQ'];{$ifdef hascompilerproc} compilerproc; {$endif}{$endif}Begin    if         (                ( extractFloat64Exp( a ) = $7FF )            AND                (                    (extractFloat64Frac0( a )  OR  extractFloat64Frac1( a )) <>0                )         )         OR (                ( extractFloat64Exp( b ) = $7FF )           AND  (                    (extractFloat64Frac0( b ) OR  (extractFloat64Frac1( b )) <> 0                )           )       ) then    Begin        if ( (float64_is_signaling_nan( a )<>0) OR (float64_is_signaling_nan( b )<>0) ) then            float_raise( float_flag_invalid );        float64_eq := 0;        exit;    End;    float64_eq := flag(           ( a.low = b.low )        AND  (    ( a.high = b.high )             OR (    ( a.low = 0 )                  AND  ( bits32 ( ( a.high OR  b.high ) shl 1 ) = 0 ) )           ));End;{*-------------------------------------------------------------------------------Returns 1 if the double-precision floating-point value `a' is less thanor equal to the corresponding value `b', and 0 otherwise.  The comparisonis performed according to the IEC/IEEE Standard for Binary Floating-PointArithmetic.-------------------------------------------------------------------------------*}Function float64_le(a: float64;b: float64): flag; {$ifdef fpc}[public,Alias:'FLOAT64_LE'];{$ifdef hascompilerproc} compilerproc; {$endif}{$endif}Var    aSign, bSign: flag;Begin    if         (                ( extractFloat64Exp( a ) = $7FF )            AND                (                    (extractFloat64Frac0( a )  OR  extractFloat64Frac1( a )) <>0                )         )         OR (                ( extractFloat64Exp( b ) = $7FF )           AND  (                    (extractFloat64Frac0( b ) OR  (extractFloat64Frac1( b )) <> 0                )           )       ) then    Begin        float_raise( float_flag_invalid );        float64_le := 0;        exit;    End;    aSign := extractFloat64Sign( a );    bSign := extractFloat64Sign( b );    if ( aSign <> bSign ) then    Begin        float64_le := flag(               (aSign <> 0)            OR (    ( ( bits32 ( ( a.high OR  b.high ) shl 1 ) ) OR  a.low OR  b.low )                 = 0 ));        exit;    End;    if aSign <> 0 then      float64_le := le64( b.high, b.low, a.high, a.low )    else      float64_le := le64( a.high, a.low, b.high, b.low );End;{*-------------------------------------------------------------------------------Returns 1 if the double-precision floating-point value `a' is less thanthe corresponding value `b', and 0 otherwise.  The comparison is performedaccording to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.-------------------------------------------------------------------------------*}Function float64_lt(a: float64;b: float64): flag; {$ifdef fpc}[public,Alias:'FLOAT64_LT'];{$ifdef hascompilerproc} compilerproc; {$endif}{$endif}Var    aSign, bSign: flag;Begin    if         (                ( extractFloat64Exp( a ) = $7FF )            AND                (                    (extractFloat64Frac0( a )  OR  extractFloat64Frac1( a )) <>0                )         )         OR (                ( extractFloat64Exp( b ) = $7FF )           AND  (                    (extractFloat64Frac0( b ) OR  (extractFloat64Frac1( b )) <> 0                )           )       ) then    Begin        float_raise( float_flag_invalid );        float64_lt := 0;        exit;    End;    aSign := extractFloat64Sign( a );    bSign := extractFloat64Sign( b );    if ( aSign <> bSign ) then    Begin        float64_lt := flag(               (aSign <> 0)            AND  (    ( ( bits32 ( ( a.high OR  b.high ) shl 1 ) ) OR  a.low OR  b.low )                 <> 0 ));        exit;    End;    if aSign <> 0 then       float64_lt := lt64( b.high, b.low, a.high, a.low )    else       float64_lt := lt64( a.high, a.low, b.high, b.low );End;{*-------------------------------------------------------------------------------Returns 1 if the double-precision floating-point value `a' is equal tothe corresponding value `b', and 0 otherwise.  The invalid exception israised if either operand is a NaN.  Otherwise, the comparison is performedaccording to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.-------------------------------------------------------------------------------*}Function float64_eq_signaling( a: float64; b: float64): flag;Begin    if         (                ( extractFloat64Exp( a ) = $7FF )            AND                (                    (extractFloat64Frac0( a )  OR  extractFloat64Frac1( a )) <>0                )         )         OR (                ( extractFloat64Exp( b ) = $7FF )           AND  (                    (extractFloat64Frac0( b ) OR  (extractFloat64Frac1( b )) <> 0                )           )       ) then    Begin        float_raise( float_flag_invalid );        float64_eq_signaling := 0;        exit;    End;    float64_eq_signaling := flag(           ( a.low = b.low )        AND  (    ( a.high = b.high )             OR (    ( a.low = 0 )                  AND  ( bits32 ( ( a.high OR  b.high ) shl 1 ) = 0 ) )           ));End;{*-------------------------------------------------------------------------------Returns 1 if the double-precision floating-point value `a' is less than orequal to the corresponding value `b', and 0 otherwise.  Quiet NaNs do notcause an exception.  Otherwise, the comparison is performed according to theIEC/IEEE Standard for Binary Floating-Point Arithmetic.-------------------------------------------------------------------------------*}Function float64_le_quiet(a: float64 ; b: float64 ): flag;Var    aSign, bSign : flag;Begin    if         (                ( extractFloat64Exp( a ) = $7FF )            AND                (                    (extractFloat64Frac0( a )  OR  extractFloat64Frac1( a )) <>0                )         )         OR (                ( extractFloat64Exp( b ) = $7FF )           AND  (                    (extractFloat64Frac0( b ) OR  (extractFloat64Frac1( b )) <> 0                )           )       ) then    Begin        if ( (float64_is_signaling_nan( a )<>0)  OR (float64_is_signaling_nan( b )<>0) ) then            float_raise( float_flag_invalid );        float64_le_quiet := 0;        exit;    End;    aSign := extractFloat64Sign( a );    bSign := extractFloat64Sign( b );    if ( aSign <> bSign ) then    Begin     float64_le_quiet := flag      ((aSign <> 0)            OR (    ( ( bits32 ( ( a.high OR  b.high ) shl 1 ) ) OR  a.low OR  b.low )                 = 0 ));        exit;    End;    if aSign <> 0 then      float64_le_quiet := le64( b.high, b.low, a.high, a.low )    else      float64_le_quiet := le64( a.high, a.low, b.high, b.low );End;{*-------------------------------------------------------------------------------Returns 1 if the double-precision floating-point value `a' is less thanthe corresponding value `b', and 0 otherwise.  Quiet NaNs do not cause anexception.  Otherwise, the comparison is performed according to the IEC/IEEEStandard for Binary Floating-Point Arithmetic.-------------------------------------------------------------------------------*}Function float64_lt_quiet(a: float64; b: float64 ): Flag;Var    aSign, bSign: flag;Begin    if         (                ( extractFloat64Exp( a ) = $7FF )            AND                (                    (extractFloat64Frac0( a )  OR  extractFloat64Frac1( a )) <>0                )         )         OR (                ( extractFloat64Exp( b ) = $7FF )           AND  (                    (extractFloat64Frac0( b ) OR  (extractFloat64Frac1( b )) <> 0                )           )       ) then    Begin        if ( (float64_is_signaling_nan( a )<>0) OR (float64_is_signaling_nan( b )<>0) ) then            float_raise( float_flag_invalid );        float64_lt_quiet := 0;        exit;    End;    aSign := extractFloat64Sign( a );    bSign := extractFloat64Sign( b );    if ( aSign <> bSign ) then    Begin      float64_lt_quiet := flag(               (aSign<>0)            AND  (    ( ( bits32 ( ( a.high OR  b.high ) shl 1 ) ) OR  a.low OR  b.low )                 <> 0 ));        exit;    End;    If aSign <> 0 then      float64_lt_quiet :=  lt64( b.high, b.low, a.high, a.low )    else      float64_lt_quiet := lt64( a.high, a.low, b.high, b.low );End;{*----------------------------------------------------------------------------| Returns the result of converting the 64-bit two's complement integer `a'| to the single-precision floating-point format.  The conversion is performed| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.*----------------------------------------------------------------------------*}function int64_to_float32( a: int64 ): float32; {$ifdef fpc}[public,Alias:'INT64_TO_FLOAT32'];{$ifdef hascompilerproc} compilerproc; {$endif}{$endif}var    zSign : flag;    absA : uint64;    shiftCount: int8;    zSig : bits32;    intval : int64rec;Begin    if ( a = 0 ) then      begin       int64_to_float32 := 0;       exit;      end;    if a < 0 then      zSign := flag(TRUE)    else      zSign := flag(FALSE);    if zSign<>0 then       absA := -a    else       absA := a;    shiftCount := countLeadingZeros64( absA ) - 40;    if ( 0 <= shiftCount ) then      begin        int64_to_float32:= packFloat32( zSign, $95 - shiftCount, absA shl shiftCount );      end    else       begin        shiftCount := shiftCount + 7;        if ( shiftCount < 0 ) then          begin            intval.low := int64rec(AbsA).low;            intval.high := int64rec(AbsA).high;            shift64RightJamming( intval.low, intval.high, - shiftCount,               intval.low, intval.high);            int64rec(absA).low := intval.low;            int64rec(absA).high := intval.high;          end        else            absA := absA shl shiftCount;        int64_to_float32:=roundAndPackFloat32( zSign, $9C - shiftCount, absA );      end;End;{*----------------------------------------------------------------------------| Returns the result of converting the 64-bit two's complement integer `a'| to the double-precision floating-point format.  The conversion is performed| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.*----------------------------------------------------------------------------*}function int64_to_float64( a: int64 ): float64; {$ifdef fpc}[public,Alias:'INT64_TO_FLOAT64'];{$ifdef hascompilerproc} compilerproc; {$endif}{$endif}var zSign : flag; float_result : float64; intval : int64rec; AbsA : bits64; shiftcount : int8; zSig0, zSig1 : bits32;Begin    if ( a = 0 ) then      Begin       packFloat64( 0, 0, 0, 0, float_result );       exit;      end;    zSign := flag( a < 0 );    if ZSign<>0 then      AbsA := -a    else      AbsA := a;    shiftCount := countLeadingZeros64( absA ) - 11;    if ( 0 <= shiftCount ) then      Begin        absA := absA shl shiftcount;        zSig0:=int64rec(absA).high;        zSig1:=int64rec(absA).low;      End    else      Begin        shift64Right( absA, 0, - shiftCount, zSig0, zSig1 );      End;    packFloat64( zSign, $432 - shiftCount, zSig0, zSig1, float_result );    int64_to_float64:= float_result;End;end.{   $Log$   Revision 1.6  2002-11-30 23:25:19  carl     * forgot goto on switch in last commit   Revision 1.5  2002/11/30 21:34:20  carl     + compilerproc for softfpu (first step for integration)     * several bugfixes for big-endian support   Revision 1.4  2002/10/13 15:47:39  carl      * bugfix for int64 to float conversion   Revision 1.3  2002/10/12 20:24:22  carl     + int64_tof_loat conversion routines   Revision 1.2  2002/10/08 20:07:08  carl     * fix range check errors     - overflow checking must be off always     * debugged and works as expected   Revision 1.1  2002/09/16 19:10:17  carl     * first revision of FPU emulation}
 |