softfpu.pp 297 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511151215131514151515161517151815191520152115221523152415251526152715281529153015311532153315341535153615371538153915401541154215431544154515461547154815491550155115521553155415551556155715581559156015611562156315641565156615671568156915701571157215731574157515761577157815791580158115821583158415851586158715881589159015911592159315941595159615971598159916001601160216031604160516061607160816091610161116121613161416151616161716181619162016211622162316241625162616271628162916301631163216331634163516361637163816391640164116421643164416451646164716481649165016511652165316541655165616571658165916601661166216631664166516661667166816691670167116721673167416751676167716781679168016811682168316841685168616871688168916901691169216931694169516961697169816991700170117021703170417051706170717081709171017111712171317141715171617171718171917201721172217231724172517261727172817291730173117321733173417351736173717381739174017411742174317441745174617471748174917501751175217531754175517561757175817591760176117621763176417651766176717681769177017711772177317741775177617771778177917801781178217831784178517861787178817891790179117921793179417951796179717981799180018011802180318041805180618071808180918101811181218131814181518161817181818191820182118221823182418251826182718281829183018311832183318341835183618371838183918401841184218431844184518461847184818491850185118521853185418551856185718581859186018611862186318641865186618671868186918701871187218731874187518761877187818791880188118821883188418851886188718881889189018911892189318941895189618971898189919001901190219031904190519061907190819091910191119121913191419151916191719181919192019211922192319241925192619271928192919301931193219331934193519361937193819391940194119421943194419451946194719481949195019511952195319541955195619571958195919601961196219631964196519661967196819691970197119721973197419751976197719781979198019811982198319841985198619871988198919901991199219931994199519961997199819992000200120022003200420052006200720082009201020112012201320142015201620172018201920202021202220232024202520262027202820292030203120322033203420352036203720382039204020412042204320442045204620472048204920502051205220532054205520562057205820592060206120622063206420652066206720682069207020712072207320742075207620772078207920802081208220832084208520862087208820892090209120922093209420952096209720982099210021012102210321042105210621072108210921102111211221132114211521162117211821192120212121222123212421252126212721282129213021312132213321342135213621372138213921402141214221432144214521462147214821492150215121522153215421552156215721582159216021612162216321642165216621672168216921702171217221732174217521762177217821792180218121822183218421852186218721882189219021912192219321942195219621972198219922002201220222032204220522062207220822092210221122122213221422152216221722182219222022212222222322242225222622272228222922302231223222332234223522362237223822392240224122422243224422452246224722482249225022512252225322542255225622572258225922602261226222632264226522662267226822692270227122722273227422752276227722782279228022812282228322842285228622872288228922902291229222932294229522962297229822992300230123022303230423052306230723082309231023112312231323142315231623172318231923202321232223232324232523262327232823292330233123322333233423352336233723382339234023412342234323442345234623472348234923502351235223532354235523562357235823592360236123622363236423652366236723682369237023712372237323742375237623772378237923802381238223832384238523862387238823892390239123922393239423952396239723982399240024012402240324042405240624072408240924102411241224132414241524162417241824192420242124222423242424252426242724282429243024312432243324342435243624372438243924402441244224432444244524462447244824492450245124522453245424552456245724582459246024612462246324642465246624672468246924702471247224732474247524762477247824792480248124822483248424852486248724882489249024912492249324942495249624972498249925002501250225032504250525062507250825092510251125122513251425152516251725182519252025212522252325242525252625272528252925302531253225332534253525362537253825392540254125422543254425452546254725482549255025512552255325542555255625572558255925602561256225632564256525662567256825692570257125722573257425752576257725782579258025812582258325842585258625872588258925902591259225932594259525962597259825992600260126022603260426052606260726082609261026112612261326142615261626172618261926202621262226232624262526262627262826292630263126322633263426352636263726382639264026412642264326442645264626472648264926502651265226532654265526562657265826592660266126622663266426652666266726682669267026712672267326742675267626772678267926802681268226832684268526862687268826892690269126922693269426952696269726982699270027012702270327042705270627072708270927102711271227132714271527162717271827192720272127222723272427252726272727282729273027312732273327342735273627372738273927402741274227432744274527462747274827492750275127522753275427552756275727582759276027612762276327642765276627672768276927702771277227732774277527762777277827792780278127822783278427852786278727882789279027912792279327942795279627972798279928002801280228032804280528062807280828092810281128122813281428152816281728182819282028212822282328242825282628272828282928302831283228332834283528362837283828392840284128422843284428452846284728482849285028512852285328542855285628572858285928602861286228632864286528662867286828692870287128722873287428752876287728782879288028812882288328842885288628872888288928902891289228932894289528962897289828992900290129022903290429052906290729082909291029112912291329142915291629172918291929202921292229232924292529262927292829292930293129322933293429352936293729382939294029412942294329442945294629472948294929502951295229532954295529562957295829592960296129622963296429652966296729682969297029712972297329742975297629772978297929802981298229832984298529862987298829892990299129922993299429952996299729982999300030013002300330043005300630073008300930103011301230133014301530163017301830193020302130223023302430253026302730283029303030313032303330343035303630373038303930403041304230433044304530463047304830493050305130523053305430553056305730583059306030613062306330643065306630673068306930703071307230733074307530763077307830793080308130823083308430853086308730883089309030913092309330943095309630973098309931003101310231033104310531063107310831093110311131123113311431153116311731183119312031213122312331243125312631273128312931303131313231333134313531363137313831393140314131423143314431453146314731483149315031513152315331543155315631573158315931603161316231633164316531663167316831693170317131723173317431753176317731783179318031813182318331843185318631873188318931903191319231933194319531963197319831993200320132023203320432053206320732083209321032113212321332143215321632173218321932203221322232233224322532263227322832293230323132323233323432353236323732383239324032413242324332443245324632473248324932503251325232533254325532563257325832593260326132623263326432653266326732683269327032713272327332743275327632773278327932803281328232833284328532863287328832893290329132923293329432953296329732983299330033013302330333043305330633073308330933103311331233133314331533163317331833193320332133223323332433253326332733283329333033313332333333343335333633373338333933403341334233433344334533463347334833493350335133523353335433553356335733583359336033613362336333643365336633673368336933703371337233733374337533763377337833793380338133823383338433853386338733883389339033913392339333943395339633973398339934003401340234033404340534063407340834093410341134123413341434153416341734183419342034213422342334243425342634273428342934303431343234333434343534363437343834393440344134423443344434453446344734483449345034513452345334543455345634573458345934603461346234633464346534663467346834693470347134723473347434753476347734783479348034813482348334843485348634873488348934903491349234933494349534963497349834993500350135023503350435053506350735083509351035113512351335143515351635173518351935203521352235233524352535263527352835293530353135323533353435353536353735383539354035413542354335443545354635473548354935503551355235533554355535563557355835593560356135623563356435653566356735683569357035713572357335743575357635773578357935803581358235833584358535863587358835893590359135923593359435953596359735983599360036013602360336043605360636073608360936103611361236133614361536163617361836193620362136223623362436253626362736283629363036313632363336343635363636373638363936403641364236433644364536463647364836493650365136523653365436553656365736583659366036613662366336643665366636673668366936703671367236733674367536763677367836793680368136823683368436853686368736883689369036913692369336943695369636973698369937003701370237033704370537063707370837093710371137123713371437153716371737183719372037213722372337243725372637273728372937303731373237333734373537363737373837393740374137423743374437453746374737483749375037513752375337543755375637573758375937603761376237633764376537663767376837693770377137723773377437753776377737783779378037813782378337843785378637873788378937903791379237933794379537963797379837993800380138023803380438053806380738083809381038113812381338143815381638173818381938203821382238233824382538263827382838293830383138323833383438353836383738383839384038413842384338443845384638473848384938503851385238533854385538563857385838593860386138623863386438653866386738683869387038713872387338743875387638773878387938803881388238833884388538863887388838893890389138923893389438953896389738983899390039013902390339043905390639073908390939103911391239133914391539163917391839193920392139223923392439253926392739283929393039313932393339343935393639373938393939403941394239433944394539463947394839493950395139523953395439553956395739583959396039613962396339643965396639673968396939703971397239733974397539763977397839793980398139823983398439853986398739883989399039913992399339943995399639973998399940004001400240034004400540064007400840094010401140124013401440154016401740184019402040214022402340244025402640274028402940304031403240334034403540364037403840394040404140424043404440454046404740484049405040514052405340544055405640574058405940604061406240634064406540664067406840694070407140724073407440754076407740784079408040814082408340844085408640874088408940904091409240934094409540964097409840994100410141024103410441054106410741084109411041114112411341144115411641174118411941204121412241234124412541264127412841294130413141324133413441354136413741384139414041414142414341444145414641474148414941504151415241534154415541564157415841594160416141624163416441654166416741684169417041714172417341744175417641774178417941804181418241834184418541864187418841894190419141924193419441954196419741984199420042014202420342044205420642074208420942104211421242134214421542164217421842194220422142224223422442254226422742284229423042314232423342344235423642374238423942404241424242434244424542464247424842494250425142524253425442554256425742584259426042614262426342644265426642674268426942704271427242734274427542764277427842794280428142824283428442854286428742884289429042914292429342944295429642974298429943004301430243034304430543064307430843094310431143124313431443154316431743184319432043214322432343244325432643274328432943304331433243334334433543364337433843394340434143424343434443454346434743484349435043514352435343544355435643574358435943604361436243634364436543664367436843694370437143724373437443754376437743784379438043814382438343844385438643874388438943904391439243934394439543964397439843994400440144024403440444054406440744084409441044114412441344144415441644174418441944204421442244234424442544264427442844294430443144324433443444354436443744384439444044414442444344444445444644474448444944504451445244534454445544564457445844594460446144624463446444654466446744684469447044714472447344744475447644774478447944804481448244834484448544864487448844894490449144924493449444954496449744984499450045014502450345044505450645074508450945104511451245134514451545164517451845194520452145224523452445254526452745284529453045314532453345344535453645374538453945404541454245434544454545464547454845494550455145524553455445554556455745584559456045614562456345644565456645674568456945704571457245734574457545764577457845794580458145824583458445854586458745884589459045914592459345944595459645974598459946004601460246034604460546064607460846094610461146124613461446154616461746184619462046214622462346244625462646274628462946304631463246334634463546364637463846394640464146424643464446454646464746484649465046514652465346544655465646574658465946604661466246634664466546664667466846694670467146724673467446754676467746784679468046814682468346844685468646874688468946904691469246934694469546964697469846994700470147024703470447054706470747084709471047114712471347144715471647174718471947204721472247234724472547264727472847294730473147324733473447354736473747384739474047414742474347444745474647474748474947504751475247534754475547564757475847594760476147624763476447654766476747684769477047714772477347744775477647774778477947804781478247834784478547864787478847894790479147924793479447954796479747984799480048014802480348044805480648074808480948104811481248134814481548164817481848194820482148224823482448254826482748284829483048314832483348344835483648374838483948404841484248434844484548464847484848494850485148524853485448554856485748584859486048614862486348644865486648674868486948704871487248734874487548764877487848794880488148824883488448854886488748884889489048914892489348944895489648974898489949004901490249034904490549064907490849094910491149124913491449154916491749184919492049214922492349244925492649274928492949304931493249334934493549364937493849394940494149424943494449454946494749484949495049514952495349544955495649574958495949604961496249634964496549664967496849694970497149724973497449754976497749784979498049814982498349844985498649874988498949904991499249934994499549964997499849995000500150025003500450055006500750085009501050115012501350145015501650175018501950205021502250235024502550265027502850295030503150325033503450355036503750385039504050415042504350445045504650475048504950505051505250535054505550565057505850595060506150625063506450655066506750685069507050715072507350745075507650775078507950805081508250835084508550865087508850895090509150925093509450955096509750985099510051015102510351045105510651075108510951105111511251135114511551165117511851195120512151225123512451255126512751285129513051315132513351345135513651375138513951405141514251435144514551465147514851495150515151525153515451555156515751585159516051615162516351645165516651675168516951705171517251735174517551765177517851795180518151825183518451855186518751885189519051915192519351945195519651975198519952005201520252035204520552065207520852095210521152125213521452155216521752185219522052215222522352245225522652275228522952305231523252335234523552365237523852395240524152425243524452455246524752485249525052515252525352545255525652575258525952605261526252635264526552665267526852695270527152725273527452755276527752785279528052815282528352845285528652875288528952905291529252935294529552965297529852995300530153025303530453055306530753085309531053115312531353145315531653175318531953205321532253235324532553265327532853295330533153325333533453355336533753385339534053415342534353445345534653475348534953505351535253535354535553565357535853595360536153625363536453655366536753685369537053715372537353745375537653775378537953805381538253835384538553865387538853895390539153925393539453955396539753985399540054015402540354045405540654075408540954105411541254135414541554165417541854195420542154225423542454255426542754285429543054315432543354345435543654375438543954405441544254435444544554465447544854495450545154525453545454555456545754585459546054615462546354645465546654675468546954705471547254735474547554765477547854795480548154825483548454855486548754885489549054915492549354945495549654975498549955005501550255035504550555065507550855095510551155125513551455155516551755185519552055215522552355245525552655275528552955305531553255335534553555365537553855395540554155425543554455455546554755485549555055515552555355545555555655575558555955605561556255635564556555665567556855695570557155725573557455755576557755785579558055815582558355845585558655875588558955905591559255935594559555965597559855995600560156025603560456055606560756085609561056115612561356145615561656175618561956205621562256235624562556265627562856295630563156325633563456355636563756385639564056415642564356445645564656475648564956505651565256535654565556565657565856595660566156625663566456655666566756685669567056715672567356745675567656775678567956805681568256835684568556865687568856895690569156925693569456955696569756985699570057015702570357045705570657075708570957105711571257135714571557165717571857195720572157225723572457255726572757285729573057315732573357345735573657375738573957405741574257435744574557465747574857495750575157525753575457555756575757585759576057615762576357645765576657675768576957705771577257735774577557765777577857795780578157825783578457855786578757885789579057915792579357945795579657975798579958005801580258035804580558065807580858095810581158125813581458155816581758185819582058215822582358245825582658275828582958305831583258335834583558365837583858395840584158425843584458455846584758485849585058515852585358545855585658575858585958605861586258635864586558665867586858695870587158725873587458755876587758785879588058815882588358845885588658875888588958905891589258935894589558965897589858995900590159025903590459055906590759085909591059115912591359145915591659175918591959205921592259235924592559265927592859295930593159325933593459355936593759385939594059415942594359445945594659475948594959505951595259535954595559565957595859595960596159625963596459655966596759685969597059715972597359745975597659775978597959805981598259835984598559865987598859895990599159925993599459955996599759985999600060016002600360046005600660076008600960106011601260136014601560166017601860196020602160226023602460256026602760286029603060316032603360346035603660376038603960406041604260436044604560466047604860496050605160526053605460556056605760586059606060616062606360646065606660676068606960706071607260736074607560766077607860796080608160826083608460856086608760886089609060916092609360946095609660976098609961006101610261036104610561066107610861096110611161126113611461156116611761186119612061216122612361246125612661276128612961306131613261336134613561366137613861396140614161426143614461456146614761486149615061516152615361546155615661576158615961606161616261636164616561666167616861696170617161726173617461756176617761786179618061816182618361846185618661876188618961906191619261936194619561966197619861996200620162026203620462056206620762086209621062116212621362146215621662176218621962206221622262236224622562266227622862296230623162326233623462356236623762386239624062416242624362446245624662476248624962506251625262536254625562566257625862596260626162626263626462656266626762686269627062716272627362746275627662776278627962806281628262836284628562866287628862896290629162926293629462956296629762986299630063016302630363046305630663076308630963106311631263136314631563166317631863196320632163226323632463256326632763286329633063316332633363346335633663376338633963406341634263436344634563466347634863496350635163526353635463556356635763586359636063616362636363646365636663676368636963706371637263736374637563766377637863796380638163826383638463856386638763886389639063916392639363946395639663976398639964006401640264036404640564066407640864096410641164126413641464156416641764186419642064216422642364246425642664276428642964306431643264336434643564366437643864396440644164426443644464456446644764486449645064516452645364546455645664576458645964606461646264636464646564666467646864696470647164726473647464756476647764786479648064816482648364846485648664876488648964906491649264936494649564966497649864996500650165026503650465056506650765086509651065116512651365146515651665176518651965206521652265236524652565266527652865296530653165326533653465356536653765386539654065416542654365446545654665476548654965506551655265536554655565566557655865596560656165626563656465656566656765686569657065716572657365746575657665776578657965806581658265836584658565866587658865896590659165926593659465956596659765986599660066016602660366046605660666076608660966106611661266136614661566166617661866196620662166226623662466256626662766286629663066316632663366346635663666376638663966406641664266436644664566466647664866496650665166526653665466556656665766586659666066616662666366646665666666676668666966706671667266736674667566766677667866796680668166826683668466856686668766886689669066916692669366946695669666976698669967006701670267036704670567066707670867096710671167126713671467156716671767186719672067216722672367246725672667276728672967306731673267336734673567366737673867396740674167426743674467456746674767486749675067516752675367546755675667576758675967606761676267636764676567666767676867696770677167726773677467756776677767786779678067816782678367846785678667876788678967906791679267936794679567966797679867996800680168026803680468056806680768086809681068116812681368146815681668176818681968206821682268236824682568266827682868296830683168326833683468356836683768386839684068416842684368446845684668476848684968506851685268536854685568566857685868596860686168626863686468656866686768686869687068716872687368746875687668776878687968806881688268836884688568866887688868896890689168926893689468956896689768986899690069016902690369046905690669076908690969106911691269136914691569166917691869196920692169226923692469256926692769286929693069316932693369346935693669376938693969406941694269436944694569466947694869496950695169526953695469556956695769586959696069616962696369646965696669676968696969706971697269736974697569766977697869796980698169826983698469856986698769886989699069916992699369946995699669976998699970007001700270037004700570067007700870097010701170127013701470157016701770187019702070217022702370247025702670277028702970307031703270337034703570367037703870397040704170427043704470457046704770487049705070517052705370547055705670577058705970607061706270637064706570667067706870697070707170727073707470757076707770787079708070817082708370847085708670877088708970907091709270937094709570967097709870997100710171027103710471057106710771087109711071117112711371147115711671177118711971207121712271237124712571267127712871297130713171327133713471357136713771387139714071417142714371447145714671477148714971507151715271537154715571567157715871597160716171627163716471657166716771687169717071717172717371747175717671777178717971807181718271837184718571867187718871897190719171927193719471957196719771987199720072017202720372047205720672077208720972107211721272137214721572167217721872197220722172227223722472257226722772287229723072317232723372347235723672377238723972407241724272437244724572467247724872497250725172527253725472557256725772587259726072617262726372647265726672677268726972707271727272737274727572767277727872797280728172827283728472857286728772887289729072917292729372947295729672977298729973007301730273037304730573067307730873097310731173127313731473157316731773187319732073217322732373247325732673277328732973307331733273337334733573367337733873397340734173427343734473457346734773487349735073517352735373547355735673577358735973607361736273637364736573667367736873697370737173727373737473757376737773787379738073817382738373847385738673877388738973907391739273937394739573967397739873997400740174027403740474057406740774087409741074117412741374147415741674177418741974207421742274237424742574267427742874297430743174327433743474357436743774387439744074417442744374447445744674477448744974507451745274537454745574567457745874597460746174627463746474657466746774687469747074717472747374747475747674777478747974807481748274837484748574867487748874897490749174927493749474957496749774987499750075017502750375047505750675077508750975107511751275137514751575167517751875197520752175227523752475257526752775287529753075317532753375347535753675377538753975407541754275437544754575467547754875497550755175527553755475557556755775587559756075617562756375647565756675677568756975707571757275737574757575767577757875797580758175827583758475857586758775887589759075917592759375947595759675977598759976007601760276037604760576067607760876097610761176127613761476157616761776187619762076217622762376247625762676277628762976307631763276337634763576367637763876397640764176427643764476457646764776487649765076517652765376547655765676577658765976607661766276637664766576667667766876697670767176727673767476757676767776787679768076817682768376847685768676877688768976907691769276937694769576967697769876997700770177027703770477057706770777087709771077117712771377147715771677177718771977207721772277237724772577267727772877297730773177327733773477357736773777387739774077417742774377447745774677477748774977507751775277537754775577567757775877597760776177627763776477657766776777687769777077717772777377747775777677777778777977807781778277837784778577867787778877897790779177927793779477957796779777987799780078017802780378047805780678077808780978107811781278137814781578167817781878197820782178227823782478257826782778287829783078317832783378347835783678377838783978407841784278437844784578467847784878497850785178527853785478557856785778587859786078617862786378647865786678677868786978707871787278737874787578767877787878797880788178827883788478857886788778887889789078917892789378947895789678977898789979007901790279037904790579067907790879097910791179127913791479157916791779187919792079217922792379247925792679277928792979307931793279337934793579367937793879397940794179427943794479457946794779487949795079517952795379547955795679577958795979607961796279637964796579667967796879697970797179727973797479757976797779787979798079817982798379847985798679877988798979907991799279937994799579967997799879998000800180028003800480058006800780088009801080118012801380148015801680178018801980208021802280238024802580268027802880298030803180328033803480358036803780388039804080418042804380448045804680478048804980508051805280538054805580568057805880598060806180628063806480658066806780688069807080718072807380748075807680778078807980808081808280838084808580868087808880898090809180928093809480958096809780988099810081018102810381048105810681078108810981108111811281138114811581168117811881198120812181228123812481258126812781288129813081318132813381348135813681378138813981408141814281438144814581468147814881498150815181528153815481558156815781588159816081618162816381648165816681678168816981708171817281738174817581768177817881798180818181828183818481858186818781888189819081918192819381948195819681978198819982008201820282038204820582068207820882098210821182128213821482158216821782188219822082218222822382248225822682278228822982308231823282338234823582368237823882398240824182428243824482458246824782488249825082518252825382548255825682578258825982608261826282638264826582668267826882698270827182728273827482758276827782788279828082818282828382848285828682878288828982908291829282938294829582968297829882998300830183028303830483058306830783088309831083118312831383148315831683178318831983208321832283238324832583268327832883298330833183328333833483358336833783388339834083418342834383448345834683478348834983508351835283538354835583568357835883598360836183628363836483658366836783688369837083718372837383748375837683778378837983808381838283838384838583868387838883898390839183928393839483958396839783988399840084018402840384048405840684078408840984108411841284138414841584168417841884198420842184228423842484258426842784288429843084318432843384348435843684378438843984408441844284438444844584468447844884498450845184528453845484558456845784588459846084618462846384648465846684678468846984708471847284738474847584768477847884798480848184828483848484858486848784888489849084918492849384948495849684978498849985008501850285038504850585068507850885098510851185128513851485158516851785188519852085218522852385248525852685278528852985308531853285338534853585368537853885398540854185428543854485458546854785488549855085518552
  1. {*
  2. ===============================================================================
  3. The original notice of the softfloat package is shown below. The conversion
  4. to pascal was done by Carl Eric Codere in 2002 ([email protected]).
  5. ===============================================================================
  6. This C source file is part of the SoftFloat IEC/IEEE Floating-Point
  7. Arithmetic Package, Release 2a.
  8. Written by John R. Hauser. This work was made possible in part by the
  9. International Computer Science Institute, located at Suite 600, 1947 Center
  10. Street, Berkeley, California 94704. Funding was partially provided by the
  11. National Science Foundation under grant MIP-9311980. The original version
  12. of this code was written as part of a project to build a fixed-point vector
  13. processor in collaboration with the University of California at Berkeley,
  14. overseen by Profs. Nelson Morgan and John Wawrzynek. More information
  15. is available through the Web page
  16. `http://HTTP.CS.Berkeley.EDU/~jhauser/arithmetic/SoftFloat.html'.
  17. THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE. Although reasonable effort
  18. has been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT
  19. TIMES RESULT IN INCORRECT BEHAVIOR. USE OF THIS SOFTWARE IS RESTRICTED TO
  20. PERSONS AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ANY
  21. AND ALL LOSSES, COSTS, OR OTHER PROBLEMS ARISING FROM ITS USE.
  22. Derivative works are acceptable, even for commercial purposes, so long as
  23. (1) they include prominent notice that the work is derivative, and (2) they
  24. include prominent notice akin to these four paragraphs for those parts of
  25. this code that are retained.
  26. ===============================================================================
  27. The float80 and float128 part is translated from the softfloat package
  28. by Florian Klaempfl and contained the following copyright notice
  29. The code might contain some duplicate stuff because the floatx80/float128 port was
  30. done based on the 64 bit enabled softfloat code.
  31. ===============================================================================
  32. This C source file is part of the SoftFloat IEC/IEEE Floating-point Arithmetic
  33. Package, Release 2b.
  34. Written by John R. Hauser. This work was made possible in part by the
  35. International Computer Science Institute, located at Suite 600, 1947 Center
  36. Street, Berkeley, California 94704. Funding was partially provided by the
  37. National Science Foundation under grant MIP-9311980. The original version
  38. of this code was written as part of a project to build a fixed-point vector
  39. processor in collaboration with the University of California at Berkeley,
  40. overseen by Profs. Nelson Morgan and John Wawrzynek. More information
  41. is available through the Web page `http://www.cs.berkeley.edu/~jhauser/
  42. arithmetic/SoftFloat.html'.
  43. THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE. Although reasonable effort has
  44. been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT TIMES
  45. RESULT IN INCORRECT BEHAVIOR. USE OF THIS SOFTWARE IS RESTRICTED TO PERSONS
  46. AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ALL LOSSES,
  47. COSTS, OR OTHER PROBLEMS THEY INCUR DUE TO THE SOFTWARE, AND WHO FURTHERMORE
  48. EFFECTIVELY INDEMNIFY JOHN HAUSER AND THE INTERNATIONAL COMPUTER SCIENCE
  49. INSTITUTE (possibly via similar legal warning) AGAINST ALL LOSSES, COSTS, OR
  50. OTHER PROBLEMS INCURRED BY THEIR CUSTOMERS AND CLIENTS DUE TO THE SOFTWARE.
  51. Derivative works are acceptable, even for commercial purposes, so long as
  52. (1) the source code for the derivative work includes prominent notice that
  53. the work is derivative, and (2) the source code includes prominent notice with
  54. these four paragraphs for those parts of this code that are retained.
  55. ===============================================================================
  56. *}
  57. { $define FPC_SOFTFLOAT_FLOATX80}
  58. { $define FPC_SOFTFLOAT_FLOAT128}
  59. { the softfpu unit can be also embedded directly into the system unit }
  60. {$if not(defined(fpc_softfpu_interface)) and not(defined(fpc_softfpu_implementation))}
  61. {$mode objfpc}
  62. unit softfpu;
  63. { Overflow checking must be disabled,
  64. since some operations expect overflow!
  65. }
  66. {$Q-}
  67. {$goto on}
  68. {$macro on}
  69. {$define compilerproc:=stdcall }
  70. interface
  71. {$endif not(defined(fpc_softfpu_interface)) and not(defined(fpc_softfpu_implementation))}
  72. {$if not(defined(fpc_softfpu_implementation))}
  73. {
  74. -------------------------------------------------------------------------------
  75. Software IEC/IEEE floating-point types.
  76. -------------------------------------------------------------------------------
  77. }
  78. TYPE
  79. float32 = longword;
  80. {$define FPC_SYSTEM_HAS_float32}
  81. { we use here a record in the function header because
  82. the record allows bitwise conversion to single }
  83. float32rec = record
  84. float32 : float32;
  85. end;
  86. flag = byte;
  87. bits8 = byte;
  88. sbits8 = shortint;
  89. bits16 = word;
  90. sbits16 = smallint;
  91. sbits32 = longint;
  92. bits32 = longword;
  93. {$ifndef fpc}
  94. qword = int64;
  95. {$endif}
  96. { now part of the system unit
  97. uint64 = qword;
  98. }
  99. bits64 = qword;
  100. sbits64 = int64;
  101. {$ifdef ENDIAN_LITTLE}
  102. float64 = record
  103. case byte of
  104. 1: (low,high : bits32);
  105. // force the record to be aligned like a double
  106. // else *_to_double will fail for cpus like sparc
  107. // and avoid expensive unpacking/packing operations
  108. 2: (dummy : double);
  109. end;
  110. int64rec = record
  111. case byte of
  112. 1: (low,high : bits32);
  113. // force the record to be aligned like a double
  114. // else *_to_double will fail for cpus like sparc
  115. // and avoid expensive unpacking/packing operations
  116. 2: (dummy : int64);
  117. end;
  118. floatx80 = record
  119. case byte of
  120. 1: (low : qword;high : word);
  121. // force the record to be aligned like a double
  122. // else *_to_double will fail for cpus like sparc
  123. // and avoid expensive unpacking/packing operations
  124. 2: (dummy : extended);
  125. end;
  126. float128 = record
  127. case byte of
  128. 1: (low,high : qword);
  129. // force the record to be aligned like a double
  130. // else *_to_double will fail for cpus like sparc
  131. // and avoid expensive unpacking/packing operations
  132. 2: (dummy : qword);
  133. end;
  134. {$else}
  135. float64 = record
  136. case byte of
  137. 1: (high,low : bits32);
  138. // force the record to be aligned like a double
  139. // else *_to_double will fail for cpus like sparc
  140. 2: (dummy : double);
  141. end;
  142. int64rec = record
  143. case byte of
  144. 1: (high,low : bits32);
  145. // force the record to be aligned like a double
  146. // else *_to_double will fail for cpus like sparc
  147. // and avoid expensive unpacking/packing operations
  148. 2: (dummy : int64);
  149. end;
  150. floatx80 = record
  151. case byte of
  152. 1: (high : word;low : qword);
  153. // force the record to be aligned like a double
  154. // else *_to_double will fail for cpus like sparc
  155. // and avoid expensive unpacking/packing operations
  156. 2: (dummy : qword);
  157. end;
  158. float128 = record
  159. case byte of
  160. 1: (high : qword;low : qword);
  161. // force the record to be aligned like a double
  162. // else *_to_double will fail for cpus like sparc
  163. // and avoid expensive unpacking/packing operations
  164. 2: (dummy : qword);
  165. end;
  166. {$endif}
  167. {$define FPC_SYSTEM_HAS_float64}
  168. {*
  169. -------------------------------------------------------------------------------
  170. Returns 1 if the double-precision floating-point value `a' is less than
  171. the corresponding value `b', and 0 otherwise. The comparison is performed
  172. according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
  173. -------------------------------------------------------------------------------
  174. *}
  175. Function float64_lt(a: float64;b: float64): flag; compilerproc;
  176. {*
  177. -------------------------------------------------------------------------------
  178. Returns 1 if the double-precision floating-point value `a' is less than
  179. or equal to the corresponding value `b', and 0 otherwise. The comparison
  180. is performed according to the IEC/IEEE Standard for Binary Floating-Point
  181. Arithmetic.
  182. -------------------------------------------------------------------------------
  183. *}
  184. Function float64_le(a: float64;b: float64): flag; compilerproc;
  185. {*
  186. -------------------------------------------------------------------------------
  187. Returns 1 if the double-precision floating-point value `a' is equal to
  188. the corresponding value `b', and 0 otherwise. The comparison is performed
  189. according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
  190. -------------------------------------------------------------------------------
  191. *}
  192. Function float64_eq(a: float64;b: float64): flag; compilerproc;
  193. {*
  194. -------------------------------------------------------------------------------
  195. Returns the square root of the double-precision floating-point value `a'.
  196. The operation is performed according to the IEC/IEEE Standard for Binary
  197. Floating-Point Arithmetic.
  198. -------------------------------------------------------------------------------
  199. *}
  200. Procedure float64_sqrt( a: float64; var out: float64 ); compilerproc;
  201. {*
  202. -------------------------------------------------------------------------------
  203. Returns the remainder of the double-precision floating-point value `a'
  204. with respect to the corresponding value `b'. The operation is performed
  205. according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
  206. -------------------------------------------------------------------------------
  207. *}
  208. Function float64_rem(a: float64; b : float64) : float64; compilerproc;
  209. {*
  210. -------------------------------------------------------------------------------
  211. Returns the result of dividing the double-precision floating-point value `a'
  212. by the corresponding value `b'. The operation is performed according to the
  213. IEC/IEEE Standard for Binary Floating-Point Arithmetic.
  214. -------------------------------------------------------------------------------
  215. *}
  216. Function float64_div(a: float64; b : float64) : float64; compilerproc;
  217. {*
  218. -------------------------------------------------------------------------------
  219. Returns the result of multiplying the double-precision floating-point values
  220. `a' and `b'. The operation is performed according to the IEC/IEEE Standard
  221. for Binary Floating-Point Arithmetic.
  222. -------------------------------------------------------------------------------
  223. *}
  224. Function float64_mul( a: float64; b:float64) : float64; compilerproc;
  225. {*
  226. -------------------------------------------------------------------------------
  227. Returns the result of subtracting the double-precision floating-point values
  228. `a' and `b'. The operation is performed according to the IEC/IEEE Standard
  229. for Binary Floating-Point Arithmetic.
  230. -------------------------------------------------------------------------------
  231. *}
  232. Function float64_sub(a: float64; b : float64) : float64; compilerproc;
  233. {*
  234. -------------------------------------------------------------------------------
  235. Returns the result of adding the double-precision floating-point values `a'
  236. and `b'. The operation is performed according to the IEC/IEEE Standard for
  237. Binary Floating-Point Arithmetic.
  238. -------------------------------------------------------------------------------
  239. *}
  240. Function float64_add( a: float64; b : float64) : float64; compilerproc;
  241. {*
  242. -------------------------------------------------------------------------------
  243. Rounds the double-precision floating-point value `a' to an integer,
  244. and returns the result as a double-precision floating-point value. The
  245. operation is performed according to the IEC/IEEE Standard for Binary
  246. Floating-Point Arithmetic.
  247. -------------------------------------------------------------------------------
  248. *}
  249. Function float64_round_to_int(a: float64) : float64; compilerproc;
  250. {*
  251. -------------------------------------------------------------------------------
  252. Returns the result of converting the double-precision floating-point value
  253. `a' to the single-precision floating-point format. The conversion is
  254. performed according to the IEC/IEEE Standard for Binary Floating-Point
  255. Arithmetic.
  256. -------------------------------------------------------------------------------
  257. *}
  258. Function float64_to_float32(a: float64) : float32rec; compilerproc;
  259. {*
  260. -------------------------------------------------------------------------------
  261. Returns the result of converting the double-precision floating-point value
  262. `a' to the 32-bit two's complement integer format. The conversion is
  263. performed according to the IEC/IEEE Standard for Binary Floating-Point
  264. Arithmetic, except that the conversion is always rounded toward zero.
  265. If `a' is a NaN, the largest positive integer is returned. Otherwise, if
  266. the conversion overflows, the largest integer with the same sign as `a' is
  267. returned.
  268. -------------------------------------------------------------------------------
  269. *}
  270. Function float64_to_int32_round_to_zero(a: float64 ): int32; compilerproc;
  271. {*
  272. -------------------------------------------------------------------------------
  273. Returns the result of converting the double-precision floating-point value
  274. `a' to the 32-bit two's complement integer format. The conversion is
  275. performed according to the IEC/IEEE Standard for Binary Floating-Point
  276. Arithmetic---which means in particular that the conversion is rounded
  277. according to the current rounding mode. If `a' is a NaN, the largest
  278. positive integer is returned. Otherwise, if the conversion overflows, the
  279. largest integer with the same sign as `a' is returned.
  280. -------------------------------------------------------------------------------
  281. *}
  282. Function float64_to_int32(a: float64): int32; compilerproc;
  283. {*
  284. -------------------------------------------------------------------------------
  285. Returns 1 if the single-precision floating-point value `a' is less than
  286. the corresponding value `b', and 0 otherwise. The comparison is performed
  287. according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
  288. -------------------------------------------------------------------------------
  289. *}
  290. Function float32_lt( a:float32rec ; b : float32rec): flag; compilerproc;
  291. {*
  292. -------------------------------------------------------------------------------
  293. Returns 1 if the single-precision floating-point value `a' is less than
  294. or equal to the corresponding value `b', and 0 otherwise. The comparison
  295. is performed according to the IEC/IEEE Standard for Binary Floating-Point
  296. Arithmetic.
  297. -------------------------------------------------------------------------------
  298. *}
  299. Function float32_le( a: float32rec; b : float32rec ):flag; compilerproc;
  300. {*
  301. -------------------------------------------------------------------------------
  302. Returns 1 if the single-precision floating-point value `a' is equal to
  303. the corresponding value `b', and 0 otherwise. The comparison is performed
  304. according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
  305. -------------------------------------------------------------------------------
  306. *}
  307. Function float32_eq( a:float32rec; b:float32rec): flag; compilerproc;
  308. {*
  309. -------------------------------------------------------------------------------
  310. Returns the square root of the single-precision floating-point value `a'.
  311. The operation is performed according to the IEC/IEEE Standard for Binary
  312. Floating-Point Arithmetic.
  313. -------------------------------------------------------------------------------
  314. *}
  315. Function float32_sqrt(a: float32rec ): float32rec; compilerproc;
  316. {*
  317. -------------------------------------------------------------------------------
  318. Returns the remainder of the single-precision floating-point value `a'
  319. with respect to the corresponding value `b'. The operation is performed
  320. according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
  321. -------------------------------------------------------------------------------
  322. *}
  323. Function float32_rem(a: float32rec; b: float32rec ):float32rec; compilerproc;
  324. {*
  325. -------------------------------------------------------------------------------
  326. Returns the result of dividing the single-precision floating-point value `a'
  327. by the corresponding value `b'. The operation is performed according to the
  328. IEC/IEEE Standard for Binary Floating-Point Arithmetic.
  329. -------------------------------------------------------------------------------
  330. *}
  331. Function float32_div(a: float32rec;b: float32rec ): float32rec; compilerproc;
  332. {*
  333. -------------------------------------------------------------------------------
  334. Returns the result of multiplying the single-precision floating-point values
  335. `a' and `b'. The operation is performed according to the IEC/IEEE Standard
  336. for Binary Floating-Point Arithmetic.
  337. -------------------------------------------------------------------------------
  338. *}
  339. Function float32_mul(a: float32rec; b: float32rec ) : float32rec; compilerproc;
  340. {*
  341. -------------------------------------------------------------------------------
  342. Returns the result of subtracting the single-precision floating-point values
  343. `a' and `b'. The operation is performed according to the IEC/IEEE Standard
  344. for Binary Floating-Point Arithmetic.
  345. -------------------------------------------------------------------------------
  346. *}
  347. Function float32_sub( a: float32rec ; b:float32rec ): float32rec; compilerproc;
  348. {*
  349. -------------------------------------------------------------------------------
  350. Returns the result of adding the single-precision floating-point values `a'
  351. and `b'. The operation is performed according to the IEC/IEEE Standard for
  352. Binary Floating-Point Arithmetic.
  353. -------------------------------------------------------------------------------
  354. *}
  355. Function float32_add( a: float32rec; b:float32rec ): float32rec; compilerproc;
  356. {*
  357. -------------------------------------------------------------------------------
  358. Rounds the single-precision floating-point value `a' to an integer,
  359. and returns the result as a single-precision floating-point value. The
  360. operation is performed according to the IEC/IEEE Standard for Binary
  361. Floating-Point Arithmetic.
  362. -------------------------------------------------------------------------------
  363. *}
  364. Function float32_round_to_int( a: float32rec): float32rec; compilerproc;
  365. {*
  366. -------------------------------------------------------------------------------
  367. Returns the result of converting the single-precision floating-point value
  368. `a' to the double-precision floating-point format. The conversion is
  369. performed according to the IEC/IEEE Standard for Binary Floating-Point
  370. Arithmetic.
  371. -------------------------------------------------------------------------------
  372. *}
  373. Function float32_to_float64( a : float32rec) : Float64; compilerproc;
  374. {*
  375. -------------------------------------------------------------------------------
  376. Returns the result of converting the single-precision floating-point value
  377. `a' to the 32-bit two's complement integer format. The conversion is
  378. performed according to the IEC/IEEE Standard for Binary Floating-Point
  379. Arithmetic, except that the conversion is always rounded toward zero.
  380. If `a' is a NaN, the largest positive integer is returned. Otherwise, if
  381. the conversion overflows, the largest integer with the same sign as `a' is
  382. returned.
  383. -------------------------------------------------------------------------------
  384. *}
  385. Function float32_to_int32_round_to_zero( a: Float32rec ): int32; compilerproc;
  386. {*
  387. -------------------------------------------------------------------------------
  388. Returns the result of converting the single-precision floating-point value
  389. `a' to the 32-bit two's complement integer format. The conversion is
  390. performed according to the IEC/IEEE Standard for Binary Floating-Point
  391. Arithmetic---which means in particular that the conversion is rounded
  392. according to the current rounding mode. If `a' is a NaN, the largest
  393. positive integer is returned. Otherwise, if the conversion overflows, the
  394. largest integer with the same sign as `a' is returned.
  395. -------------------------------------------------------------------------------
  396. *}
  397. Function float32_to_int32( a : float32rec) : int32; compilerproc;
  398. {*
  399. -------------------------------------------------------------------------------
  400. Returns the result of converting the 32-bit two's complement integer `a' to
  401. the double-precision floating-point format. The conversion is performed
  402. according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
  403. -------------------------------------------------------------------------------
  404. *}
  405. Function int32_to_float64( a: int32) : float64; compilerproc;
  406. {*
  407. -------------------------------------------------------------------------------
  408. Returns the result of converting the 32-bit two's complement integer `a' to
  409. the single-precision floating-point format. The conversion is performed
  410. according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
  411. -------------------------------------------------------------------------------
  412. *}
  413. Function int32_to_float32( a: int32): float32rec; compilerproc;
  414. {*----------------------------------------------------------------------------
  415. | Returns the result of converting the 64-bit two's complement integer `a'
  416. | to the double-precision floating-point format. The conversion is performed
  417. | according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
  418. *----------------------------------------------------------------------------*}
  419. Function int64_to_float64( a: int64 ): float64; compilerproc;
  420. Function qword_to_float64( a: qword ): float64; compilerproc;
  421. {*----------------------------------------------------------------------------
  422. | Returns the result of converting the 64-bit two's complement integer `a'
  423. | to the single-precision floating-point format. The conversion is performed
  424. | according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
  425. *----------------------------------------------------------------------------*}
  426. Function int64_to_float32( a: int64 ): float32rec; compilerproc;
  427. Function qword_to_float32( a: qword ): float32rec; compilerproc;
  428. {$ifdef FPC_SOFTFLOAT_FLOAT128}
  429. function float128_is_nan( a : float128): flag;
  430. function float128_is_signaling_nan( a : float128): flag;
  431. function float128_to_int32(a: float128): int32;
  432. function float128_to_int32_round_to_zero(a: float128): int32;
  433. function float128_to_int64(a: float128): int64;
  434. function float128_to_int64_round_to_zero(a: float128): int64;
  435. function float128_to_float32(a: float128): float32;
  436. function float128_to_float64(a: float128): float64;
  437. function float64_to_float128( a : float64) : float128;
  438. {$ifdef FPC_SOFTFLOAT_FLOAT80}
  439. function float128_to_floatx80(a: float128): floatx80;
  440. {$endif FPC_SOFTFLOAT_FLOAT80}
  441. function float128_round_to_int(a: float128): float128;
  442. function float128_add(a: float128; b: float128): float128;
  443. function float128_sub(a: float128; b: float128): float128;
  444. function float128_mul(a: float128; b: float128): float128;
  445. function float128_div(a: float128; b: float128): float128;
  446. function float128_rem(a: float128; b: float128): float128;
  447. function float128_sqrt(a: float128): float128;
  448. function float128_eq(a: float128; b: float128): flag;
  449. function float128_le(a: float128; b: float128): flag;
  450. function float128_lt(a: float128; b: float128): flag;
  451. function float128_eq_signaling(a: float128; b: float128): flag;
  452. function float128_le_quiet(a: float128; b: float128): flag;
  453. function float128_lt_quiet(a: float128; b: float128): flag;
  454. {$endif FPC_SOFTFLOAT_FLOAT128}
  455. CONST
  456. {-------------------------------------------------------------------------------
  457. Software IEC/IEEE floating-point underflow tininess-detection mode.
  458. -------------------------------------------------------------------------------
  459. *}
  460. float_tininess_after_rounding = 0;
  461. float_tininess_before_rounding = 1;
  462. {*
  463. -------------------------------------------------------------------------------
  464. Underflow tininess-detection mode, statically initialized to default value.
  465. (The declaration in `softfloat.h' must match the `int8' type here.)
  466. -------------------------------------------------------------------------------
  467. *}
  468. const float_detect_tininess: int8 = float_tininess_after_rounding;
  469. {$endif not(defined(fpc_softfpu_implementation))}
  470. {$if not(defined(fpc_softfpu_interface)) and not(defined(fpc_softfpu_implementation))}
  471. implementation
  472. {$endif not(defined(fpc_softfpu_interface)) and not(defined(fpc_softfpu_implementation))}
  473. {$if not(defined(fpc_softfpu_interface))}
  474. (*****************************************************************************)
  475. (*----------------------------------------------------------------------------*)
  476. (* Primitive arithmetic functions, including multi-word arithmetic, and *)
  477. (* division and square root approximations. (Can be specialized to target if *)
  478. (* desired.) *)
  479. (* ---------------------------------------------------------------------------*)
  480. (*****************************************************************************)
  481. {*----------------------------------------------------------------------------
  482. | Takes a 64-bit fixed-point value `absZ' with binary point between bits 6
  483. | and 7, and returns the properly rounded 32-bit integer corresponding to the
  484. | input. If `zSign' is 1, the input is negated before being converted to an
  485. | integer. Bit 63 of `absZ' must be zero. Ordinarily, the fixed-point input
  486. | is simply rounded to an integer, with the inexact exception raised if the
  487. | input cannot be represented exactly as an integer. However, if the fixed-
  488. | point input is too large, the invalid exception is raised and the largest
  489. | positive or negative integer is returned.
  490. *----------------------------------------------------------------------------*}
  491. function roundAndPackInt32( zSign: flag; absZ : bits64): int32;
  492. var
  493. roundingMode: int8;
  494. roundNearestEven: flag;
  495. roundIncrement, roundBits: int8;
  496. z: int32;
  497. begin
  498. roundingMode := softfloat_rounding_mode;
  499. roundNearestEven := ord( roundingMode = float_round_nearest_even );
  500. roundIncrement := $40;
  501. if ( roundNearestEven=0 ) then
  502. begin
  503. if ( roundingMode = float_round_to_zero ) then
  504. begin
  505. roundIncrement := 0;
  506. end
  507. else begin
  508. roundIncrement := $7F;
  509. if ( zSign<>0 ) then
  510. begin
  511. if ( roundingMode = float_round_up ) then
  512. roundIncrement := 0;
  513. end
  514. else begin
  515. if ( roundingMode = float_round_down ) then
  516. roundIncrement := 0;
  517. end;
  518. end;
  519. end;
  520. roundBits := absZ and $7F;
  521. absZ := ( absZ + roundIncrement ) shr 7;
  522. absZ := absZ and not( ord( ( roundBits xor $40 ) = 0 ) and roundNearestEven );
  523. z := absZ;
  524. if ( zSign<>0 ) then
  525. z := - z;
  526. if ( ( absZ shr 32 ) or ( z and ( ord( z < 0 ) xor zSign ) ) )<>0 then
  527. begin
  528. float_raise( float_flag_invalid );
  529. if zSign<>0 then
  530. result:=sbits32($80000000)
  531. else
  532. result:=$7FFFFFFF;
  533. exit;
  534. end;
  535. if ( roundBits<>0 ) then
  536. softfloat_exception_flags := softfloat_exception_flags or float_flag_inexact;
  537. result:=z;
  538. end;
  539. {*----------------------------------------------------------------------------
  540. | Takes the 128-bit fixed-point value formed by concatenating `absZ0' and
  541. | `absZ1', with binary point between bits 63 and 64 (between the input words),
  542. | and returns the properly rounded 64-bit integer corresponding to the input.
  543. | If `zSign' is 1, the input is negated before being converted to an integer.
  544. | Ordinarily, the fixed-point input is simply rounded to an integer, with
  545. | the inexact exception raised if the input cannot be represented exactly as
  546. | an integer. However, if the fixed-point input is too large, the invalid
  547. | exception is raised and the largest positive or negative integer is
  548. | returned.
  549. *----------------------------------------------------------------------------*}
  550. function roundAndPackInt64( zSign: flag; absZ0: bits64; absZ1 : bits64): int64;
  551. var
  552. roundingMode: int8;
  553. roundNearestEven, increment: flag;
  554. z: int64;
  555. label
  556. overflow;
  557. begin
  558. roundingMode := softfloat_rounding_mode;
  559. roundNearestEven := ord( roundingMode = float_round_nearest_even );
  560. increment := ord( sbits64(absZ1) < 0 );
  561. if ( roundNearestEven=0 ) then
  562. begin
  563. if ( roundingMode = float_round_to_zero ) then
  564. begin
  565. increment := 0;
  566. end
  567. else begin
  568. if ( zSign<>0 ) then
  569. begin
  570. increment := ord(( roundingMode = float_round_down ) and (absZ1<>0));
  571. end
  572. else begin
  573. increment := ord(( roundingMode = float_round_up ) and (absZ1<>0));
  574. end;
  575. end;
  576. end;
  577. if ( increment<>0 ) then
  578. begin
  579. inc(absZ0);
  580. if ( absZ0 = 0 ) then
  581. goto overflow;
  582. absZ0 := absZ0 and not( ord( bits64( absZ1 shl 1 ) = 0 ) and roundNearestEven );
  583. end;
  584. z := absZ0;
  585. if ( zSign<>0 ) then
  586. z := - z;
  587. if ( (z<>0) and (( ord( z < 0 ) xor zSign )<>0) ) then
  588. begin
  589. overflow:
  590. float_raise( float_flag_invalid );
  591. if zSign<>0 then
  592. result:=int64($8000000000000000)
  593. else
  594. result:=int64($7FFFFFFFFFFFFFFF);
  595. end;
  596. if ( absZ1<>0 ) then
  597. softfloat_exception_flags := softfloat_exception_flags or float_flag_inexact;
  598. result:=z;
  599. end;
  600. {*
  601. -------------------------------------------------------------------------------
  602. Shifts `a' right by the number of bits given in `count'. If any nonzero
  603. bits are shifted off, they are ``jammed'' into the least significant bit of
  604. the result by setting the least significant bit to 1. The value of `count'
  605. can be arbitrarily large; in particular, if `count' is greater than 32, the
  606. result will be either 0 or 1, depending on whether `a' is zero or nonzero.
  607. The result is stored in the location pointed to by `zPtr'.
  608. -------------------------------------------------------------------------------
  609. *}
  610. Procedure shift32RightJamming( a: bits32 ; count: int16 ; VAR zPtr :bits32);
  611. var
  612. z: Bits32;
  613. Begin
  614. if ( count = 0 ) then
  615. z := a
  616. else
  617. if ( count < 32 ) then
  618. Begin
  619. z := ( a shr count ) or bits32( (( a shl ( ( - count ) AND 31 )) ) <> 0);
  620. End
  621. else
  622. Begin
  623. z := bits32( a <> 0 );
  624. End;
  625. zPtr := z;
  626. End;
  627. {*----------------------------------------------------------------------------
  628. | Shifts the 128-bit value formed by concatenating `a0' and `a1' right by the
  629. | number of bits given in `count'. Any bits shifted off are lost. The value
  630. | of `count' can be arbitrarily large; in particular, if `count' is greater
  631. | than 128, the result will be 0. The result is broken into two 64-bit pieces
  632. | which are stored at the locations pointed to by `z0Ptr' and `z1Ptr'.
  633. *----------------------------------------------------------------------------*}
  634. procedure shift128Right(a0: bits64; a1: bits64; count: int16; var z0Ptr: bits64; var z1Ptr : bits64);
  635. var
  636. z0, z1: bits64;
  637. negCount: int8;
  638. begin
  639. negCount := ( - count ) and 63;
  640. if ( count = 0 ) then
  641. begin
  642. z1 := a1;
  643. z0 := a0;
  644. end
  645. else if ( count < 64 ) then
  646. begin
  647. z1 := ( a0 shl negCount ) or ( a1 shr count );
  648. z0 := a0 shr count;
  649. end
  650. else
  651. begin
  652. if ( count shl 64 )<>0 then
  653. z1 := a0 shr ( count and 63 )
  654. else
  655. z1 := 0;
  656. z0 := 0;
  657. end;
  658. z1Ptr := z1;
  659. z0Ptr := z0;
  660. end;
  661. {*----------------------------------------------------------------------------
  662. | Shifts the 128-bit value formed by concatenating `a0' and `a1' right by the
  663. | number of bits given in `count'. If any nonzero bits are shifted off, they
  664. | are ``jammed'' into the least significant bit of the result by setting the
  665. | least significant bit to 1. The value of `count' can be arbitrarily large;
  666. | in particular, if `count' is greater than 128, the result will be either
  667. | 0 or 1, depending on whether the concatenation of `a0' and `a1' is zero or
  668. | nonzero. The result is broken into two 64-bit pieces which are stored at
  669. | the locations pointed to by `z0Ptr' and `z1Ptr'.
  670. *----------------------------------------------------------------------------*}
  671. procedure shift128RightJamming(a0,a1 : bits64; count : int16; var z0Ptr, z1Ptr : bits64);
  672. var
  673. z0,z1 : bits64;
  674. negCount : int8;
  675. begin
  676. negCount := ( - count ) and 63;
  677. if ( count = 0 ) then begin
  678. z1 := a1;
  679. z0 := a0;
  680. end
  681. else if ( count < 64 ) then begin
  682. z1 := ( a0 shl negCount ) or ( a1 shr count ) or ord( ( a1 shl negCount ) <> 0 );
  683. z0 := a0>>count;
  684. end
  685. else begin
  686. if ( count = 64 ) then begin
  687. z1 := a0 or ord( a1 <> 0 );
  688. end
  689. else if ( count < 128 ) then begin
  690. z1 := ( a0 shr ( count and 63 ) ) or ord( ( ( a0 shl negCount ) or a1 ) <> 0 );
  691. end
  692. else begin
  693. z1 := ord( ( a0 or a1 ) <> 0 );
  694. end;
  695. z0 := 0;
  696. end;
  697. z1Ptr := z1;
  698. z0Ptr := z0;
  699. end;
  700. {*
  701. -------------------------------------------------------------------------------
  702. Shifts the 64-bit value formed by concatenating `a0' and `a1' right by the
  703. number of bits given in `count'. Any bits shifted off are lost. The value
  704. of `count' can be arbitrarily large; in particular, if `count' is greater
  705. than 64, the result will be 0. The result is broken into two 32-bit pieces
  706. which are stored at the locations pointed to by `z0Ptr' and `z1Ptr'.
  707. -------------------------------------------------------------------------------
  708. *}
  709. Procedure
  710. shift64Right(
  711. a0 :bits32; a1: bits32; count:int16; VAR z0Ptr:bits32; VAR z1Ptr:bits32);
  712. Var
  713. z0, z1: bits32;
  714. negCount : int8;
  715. Begin
  716. negCount := ( - count ) AND 31;
  717. if ( count = 0 ) then
  718. Begin
  719. z1 := a1;
  720. z0 := a0;
  721. End
  722. else if ( count < 32 ) then
  723. Begin
  724. z1 := ( a0 shl negCount ) OR ( a1 shr count );
  725. z0 := a0 shr count;
  726. End
  727. else
  728. Begin
  729. if (count < 64) then
  730. z1 := ( a0 shr ( count AND 31 ) )
  731. else
  732. z1 := 0;
  733. z0 := 0;
  734. End;
  735. z1Ptr := z1;
  736. z0Ptr := z0;
  737. End;
  738. {*
  739. -------------------------------------------------------------------------------
  740. Shifts the 64-bit value formed by concatenating `a0' and `a1' right by the
  741. number of bits given in `count'. If any nonzero bits are shifted off, they
  742. are ``jammed'' into the least significant bit of the result by setting the
  743. least significant bit to 1. The value of `count' can be arbitrarily large;
  744. in particular, if `count' is greater than 64, the result will be either 0
  745. or 1, depending on whether the concatenation of `a0' and `a1' is zero or
  746. nonzero. The result is broken into two 32-bit pieces which are stored at
  747. the locations pointed to by `z0Ptr' and `z1Ptr'.
  748. -------------------------------------------------------------------------------
  749. *}
  750. Procedure
  751. shift64RightJamming(
  752. a0:bits32; a1: bits32; count:int16; VAR Z0Ptr :bits32;VAR z1Ptr: bits32 );
  753. VAR
  754. z0, z1 : bits32;
  755. negCount : int8;
  756. Begin
  757. negCount := ( - count ) AND 31;
  758. if ( count = 0 ) then
  759. Begin
  760. z1 := a1;
  761. z0 := a0;
  762. End
  763. else
  764. if ( count < 32 ) then
  765. Begin
  766. z1 := ( a0 shl negCount ) OR ( a1 shr count ) OR bits32( ( a1 shl negCount ) <> 0 );
  767. z0 := a0 shr count;
  768. End
  769. else
  770. Begin
  771. if ( count = 32 ) then
  772. Begin
  773. z1 := a0 OR bits32( a1 <> 0 );
  774. End
  775. else
  776. if ( count < 64 ) Then
  777. Begin
  778. z1 := ( a0 shr ( count AND 31 ) ) OR bits32( ( ( a0 shl negCount ) OR a1 ) <> 0 );
  779. End
  780. else
  781. Begin
  782. z1 := bits32( ( a0 OR a1 ) <> 0 );
  783. End;
  784. z0 := 0;
  785. End;
  786. z1Ptr := z1;
  787. z0Ptr := z0;
  788. End;
  789. {*----------------------------------------------------------------------------
  790. | Shifts `a' right by the number of bits given in `count'. If any nonzero
  791. | bits are shifted off, they are ``jammed'' into the least significant bit of
  792. | the result by setting the least significant bit to 1. The value of `count'
  793. | can be arbitrarily large; in particular, if `count' is greater than 64, the
  794. | result will be either 0 or 1, depending on whether `a' is zero or nonzero.
  795. | The result is stored in the location pointed to by `zPtr'.
  796. *----------------------------------------------------------------------------*}
  797. procedure shift64RightJamming(a: bits64; count: int16; var zPtr : bits64);
  798. var
  799. z: bits64;
  800. begin
  801. if ( count = 0 ) then
  802. begin
  803. z := a;
  804. end
  805. else if ( count < 64 ) then
  806. begin
  807. z := ( a shr count ) or ord( ( a shl ( ( - count ) and 63 ) ) <> 0 );
  808. end
  809. else
  810. begin
  811. z := ord( a <> 0 );
  812. end;
  813. zPtr := z;
  814. end;
  815. {*
  816. -------------------------------------------------------------------------------
  817. Shifts the 96-bit value formed by concatenating `a0', `a1', and `a2' right
  818. by 32 _plus_ the number of bits given in `count'. The shifted result is
  819. at most 64 nonzero bits; these are broken into two 32-bit pieces which are
  820. stored at the locations pointed to by `z0Ptr' and `z1Ptr'. The bits shifted
  821. off form a third 32-bit result as follows: The _last_ bit shifted off is
  822. the most-significant bit of the extra result, and the other 31 bits of the
  823. extra result are all zero if and only if _all_but_the_last_ bits shifted off
  824. were all zero. This extra result is stored in the location pointed to by
  825. `z2Ptr'. The value of `count' can be arbitrarily large.
  826. (This routine makes more sense if `a0', `a1', and `a2' are considered
  827. to form a fixed-point value with binary point between `a1' and `a2'. This
  828. fixed-point value is shifted right by the number of bits given in `count',
  829. and the integer part of the result is returned at the locations pointed to
  830. by `z0Ptr' and `z1Ptr'. The fractional part of the result may be slightly
  831. corrupted as described above, and is returned at the location pointed to by
  832. `z2Ptr'.)
  833. -------------------------------------------------------------------------------
  834. }
  835. Procedure
  836. shift64ExtraRightJamming(
  837. a0: bits32;
  838. a1: bits32;
  839. a2: bits32;
  840. count: int16;
  841. VAR z0Ptr: bits32;
  842. VAR z1Ptr: bits32;
  843. VAR z2Ptr: bits32
  844. );
  845. Var
  846. z0, z1, z2: bits32;
  847. negCount : int8;
  848. Begin
  849. negCount := ( - count ) AND 31;
  850. if ( count = 0 ) then
  851. Begin
  852. z2 := a2;
  853. z1 := a1;
  854. z0 := a0;
  855. End
  856. else
  857. Begin
  858. if ( count < 32 ) Then
  859. Begin
  860. z2 := a1 shl negCount;
  861. z1 := ( a0 shl negCount ) OR ( a1 shr count );
  862. z0 := a0 shr count;
  863. End
  864. else
  865. Begin
  866. if ( count = 32 ) then
  867. Begin
  868. z2 := a1;
  869. z1 := a0;
  870. End
  871. else
  872. Begin
  873. a2 := a2 or a1;
  874. if ( count < 64 ) then
  875. Begin
  876. z2 := a0 shl negCount;
  877. z1 := a0 shr ( count AND 31 );
  878. End
  879. else
  880. Begin
  881. if count = 64 then
  882. z2 := a0
  883. else
  884. z2 := bits32(a0 <> 0);
  885. z1 := 0;
  886. End;
  887. End;
  888. z0 := 0;
  889. End;
  890. z2 := z2 or bits32( a2 <> 0 );
  891. End;
  892. z2Ptr := z2;
  893. z1Ptr := z1;
  894. z0Ptr := z0;
  895. End;
  896. {*
  897. -------------------------------------------------------------------------------
  898. Shifts the 64-bit value formed by concatenating `a0' and `a1' left by the
  899. number of bits given in `count'. Any bits shifted off are lost. The value
  900. of `count' must be less than 32. The result is broken into two 32-bit
  901. pieces which are stored at the locations pointed to by `z0Ptr' and `z1Ptr'.
  902. -------------------------------------------------------------------------------
  903. *}
  904. Procedure
  905. shortShift64Left(
  906. a0:bits32; a1:bits32; count:int16; VAR z0Ptr:bits32; VAR z1Ptr:bits32 );
  907. Begin
  908. z1Ptr := a1 shl count;
  909. if count = 0 then
  910. z0Ptr := a0
  911. else
  912. z0Ptr := ( a0 shl count ) OR ( a1 shr ( ( - count ) AND 31 ) );
  913. End;
  914. {*
  915. -------------------------------------------------------------------------------
  916. Shifts the 96-bit value formed by concatenating `a0', `a1', and `a2' left
  917. by the number of bits given in `count'. Any bits shifted off are lost.
  918. The value of `count' must be less than 32. The result is broken into three
  919. 32-bit pieces which are stored at the locations pointed to by `z0Ptr',
  920. `z1Ptr', and `z2Ptr'.
  921. -------------------------------------------------------------------------------
  922. *}
  923. Procedure
  924. shortShift96Left(
  925. a0: bits32;
  926. a1: bits32;
  927. a2: bits32;
  928. count: int16;
  929. VAR z0Ptr: bits32;
  930. VAR z1Ptr: bits32;
  931. VAR z2Ptr: bits32
  932. );
  933. Var
  934. z0, z1, z2: bits32;
  935. negCount: int8;
  936. Begin
  937. z2 := a2 shl count;
  938. z1 := a1 shl count;
  939. z0 := a0 shl count;
  940. if ( 0 < count ) then
  941. Begin
  942. negCount := ( ( - count ) AND 31 );
  943. z1 := z1 or (a2 shr negCount);
  944. z0 := z0 or (a1 shr negCount);
  945. End;
  946. z2Ptr := z2;
  947. z1Ptr := z1;
  948. z0Ptr := z0;
  949. End;
  950. {*----------------------------------------------------------------------------
  951. | Shifts the 128-bit value formed by concatenating `a0' and `a1' left by the
  952. | number of bits given in `count'. Any bits shifted off are lost. The value
  953. | of `count' must be less than 64. The result is broken into two 64-bit
  954. | pieces which are stored at the locations pointed to by `z0Ptr' and `z1Ptr'.
  955. *----------------------------------------------------------------------------*}
  956. procedure shortShift128Left(a0: bits64; a1: bits64; count: int16; var z0Ptr: bits64; z1Ptr : bits64);
  957. begin
  958. z1Ptr := a1 shl count;
  959. if count=0 then
  960. z0Ptr:=a0
  961. else
  962. z0Ptr:=( a0 shl count ) or ( a1 shr ( ( - count ) and 63 ) );
  963. end;
  964. {*
  965. -------------------------------------------------------------------------------
  966. Adds the 64-bit value formed by concatenating `a0' and `a1' to the 64-bit
  967. value formed by concatenating `b0' and `b1'. Addition is modulo 2^64, so
  968. any carry out is lost. The result is broken into two 32-bit pieces which
  969. are stored at the locations pointed to by `z0Ptr' and `z1Ptr'.
  970. -------------------------------------------------------------------------------
  971. *}
  972. Procedure
  973. add64(
  974. a0:bits32; a1:bits32; b0:bits32; b1:bits32; VAR z0Ptr:bits32; VAR z1Ptr:bits32 );
  975. Var
  976. z1: bits32;
  977. Begin
  978. z1 := a1 + b1;
  979. z1Ptr := z1;
  980. z0Ptr := a0 + b0 + bits32( z1 < a1 );
  981. End;
  982. {*
  983. -------------------------------------------------------------------------------
  984. Adds the 96-bit value formed by concatenating `a0', `a1', and `a2' to the
  985. 96-bit value formed by concatenating `b0', `b1', and `b2'. Addition is
  986. modulo 2^96, so any carry out is lost. The result is broken into three
  987. 32-bit pieces which are stored at the locations pointed to by `z0Ptr',
  988. `z1Ptr', and `z2Ptr'.
  989. -------------------------------------------------------------------------------
  990. *}
  991. Procedure
  992. add96(
  993. a0: bits32;
  994. a1: bits32;
  995. a2: bits32;
  996. b0: bits32;
  997. b1: bits32;
  998. b2: bits32;
  999. VAR z0Ptr: bits32;
  1000. VAR z1Ptr: bits32;
  1001. VAR z2Ptr: bits32
  1002. );
  1003. var
  1004. z0, z1, z2: bits32;
  1005. carry0, carry1: int8;
  1006. Begin
  1007. z2 := a2 + b2;
  1008. carry1 := int8( z2 < a2 );
  1009. z1 := a1 + b1;
  1010. carry0 := int8( z1 < a1 );
  1011. z0 := a0 + b0;
  1012. z1 := z1 + carry1;
  1013. z0 := z0 + bits32( z1 < carry1 );
  1014. z0 := z0 + carry0;
  1015. z2Ptr := z2;
  1016. z1Ptr := z1;
  1017. z0Ptr := z0;
  1018. End;
  1019. {*----------------------------------------------------------------------------
  1020. | Shifts the 192-bit value formed by concatenating `a0', `a1', and `a2' left
  1021. | by the number of bits given in `count'. Any bits shifted off are lost.
  1022. | The value of `count' must be less than 64. The result is broken into three
  1023. | 64-bit pieces which are stored at the locations pointed to by `z0Ptr',
  1024. | `z1Ptr', and `z2Ptr'.
  1025. *----------------------------------------------------------------------------*}
  1026. procedure shortShift192Left(a0,a1,a2 : bits64;count : int16;var z0Ptr,z1Ptr,z2Ptr : bits64);
  1027. var
  1028. z0, z1, z2 : bits64;
  1029. negCount : int8;
  1030. begin
  1031. z2 := a2 shl count;
  1032. z1 := a1 shl count;
  1033. z0 := a0 shl count;
  1034. if ( 0 < count ) then
  1035. begin
  1036. negCount := ( ( - count ) and 63 );
  1037. z1 := z1 or (a2 shr negCount);
  1038. z0 := z0 or (a1 shr negCount);
  1039. end;
  1040. z2Ptr := z2;
  1041. z1Ptr := z1;
  1042. z0Ptr := z0;
  1043. end;
  1044. {*----------------------------------------------------------------------------
  1045. | Adds the 128-bit value formed by concatenating `a0' and `a1' to the 128-bit
  1046. | value formed by concatenating `b0' and `b1'. Addition is modulo 2^128, so
  1047. | any carry out is lost. The result is broken into two 64-bit pieces which
  1048. | are stored at the locations pointed to by `z0Ptr' and `z1Ptr'.
  1049. *----------------------------------------------------------------------------*}
  1050. procedure add128( a0, a1, b0, b1 : bits64; var z0Ptr, z1Ptr : bits64);inline;
  1051. var
  1052. z1 : bits64;
  1053. begin
  1054. z1 := a1 + b1;
  1055. z1Ptr := z1;
  1056. z0Ptr := a0 + b0 + ord( z1 < a1 );
  1057. end;
  1058. {*----------------------------------------------------------------------------
  1059. | Adds the 192-bit value formed by concatenating `a0', `a1', and `a2' to the
  1060. | 192-bit value formed by concatenating `b0', `b1', and `b2'. Addition is
  1061. | modulo 2^192, so any carry out is lost. The result is broken into three
  1062. | 64-bit pieces which are stored at the locations pointed to by `z0Ptr',
  1063. | `z1Ptr', and `z2Ptr'.
  1064. *----------------------------------------------------------------------------*}
  1065. procedure add192(a0,a1,a2,b0,b1,b2: bits64; var z0Ptr,z1Ptr,z2Ptr : bits64);
  1066. var
  1067. z0, z1, z2 : bits64;
  1068. carry0, carry1 : int8;
  1069. begin
  1070. z2 := a2 + b2;
  1071. carry1 := ord( z2 < a2 );
  1072. z1 := a1 + b1;
  1073. carry0 := ord( z1 < a1 );
  1074. z0 := a0 + b0;
  1075. inc(z1, carry1);
  1076. inc(z0, ord( z1 < carry1 ));
  1077. inc(z0, carry0);
  1078. z2Ptr := z2;
  1079. z1Ptr := z1;
  1080. z0Ptr := z0;
  1081. end;
  1082. {*
  1083. -------------------------------------------------------------------------------
  1084. Subtracts the 64-bit value formed by concatenating `b0' and `b1' from the
  1085. 64-bit value formed by concatenating `a0' and `a1'. Subtraction is modulo
  1086. 2^64, so any borrow out (carry out) is lost. The result is broken into two
  1087. 32-bit pieces which are stored at the locations pointed to by `z0Ptr' and
  1088. `z1Ptr'.
  1089. -------------------------------------------------------------------------------
  1090. *}
  1091. Procedure
  1092. sub64(
  1093. a0: bits32; a1 : bits32; b0 :bits32; b1: bits32; VAR z0Ptr:bits32; VAR z1Ptr: bits32 );
  1094. Begin
  1095. z1Ptr := a1 - b1;
  1096. z0Ptr := a0 - b0 - bits32( a1 < b1 );
  1097. End;
  1098. {*
  1099. -------------------------------------------------------------------------------
  1100. Subtracts the 96-bit value formed by concatenating `b0', `b1', and `b2' from
  1101. the 96-bit value formed by concatenating `a0', `a1', and `a2'. Subtraction
  1102. is modulo 2^96, so any borrow out (carry out) is lost. The result is broken
  1103. into three 32-bit pieces which are stored at the locations pointed to by
  1104. `z0Ptr', `z1Ptr', and `z2Ptr'.
  1105. -------------------------------------------------------------------------------
  1106. *}
  1107. Procedure
  1108. sub96(
  1109. a0:bits32;
  1110. a1:bits32;
  1111. a2:bits32;
  1112. b0:bits32;
  1113. b1:bits32;
  1114. b2:bits32;
  1115. VAR z0Ptr:bits32;
  1116. VAR z1Ptr:bits32;
  1117. VAR z2Ptr:bits32
  1118. );
  1119. Var
  1120. z0, z1, z2: bits32;
  1121. borrow0, borrow1: int8;
  1122. Begin
  1123. z2 := a2 - b2;
  1124. borrow1 := int8( a2 < b2 );
  1125. z1 := a1 - b1;
  1126. borrow0 := int8( a1 < b1 );
  1127. z0 := a0 - b0;
  1128. z0 := z0 - bits32( z1 < borrow1 );
  1129. z1 := z1 - borrow1;
  1130. z0 := z0 -borrow0;
  1131. z2Ptr := z2;
  1132. z1Ptr := z1;
  1133. z0Ptr := z0;
  1134. End;
  1135. {*----------------------------------------------------------------------------
  1136. | Subtracts the 128-bit value formed by concatenating `b0' and `b1' from the
  1137. | 128-bit value formed by concatenating `a0' and `a1'. Subtraction is modulo
  1138. | 2^128, so any borrow out (carry out) is lost. The result is broken into two
  1139. | 64-bit pieces which are stored at the locations pointed to by `z0Ptr' and
  1140. | `z1Ptr'.
  1141. *----------------------------------------------------------------------------*}
  1142. procedure sub128( a0, a1, b0, b1 : bits64; var z0Ptr, z1Ptr : bits64);
  1143. begin
  1144. z1Ptr := a1 - b1;
  1145. z0Ptr := a0 - b0 - ord( a1 < b1 );
  1146. end;
  1147. {*----------------------------------------------------------------------------
  1148. | Subtracts the 192-bit value formed by concatenating `b0', `b1', and `b2'
  1149. | from the 192-bit value formed by concatenating `a0', `a1', and `a2'.
  1150. | Subtraction is modulo 2^192, so any borrow out (carry out) is lost. The
  1151. | result is broken into three 64-bit pieces which are stored at the locations
  1152. | pointed to by `z0Ptr', `z1Ptr', and `z2Ptr'.
  1153. *----------------------------------------------------------------------------*}
  1154. procedure sub192(a0,a1,a2,b0,b1,b2: bits64; var z0Ptr,z1Ptr,z2Ptr : bits64);
  1155. var
  1156. z0, z1, z2 : bits64;
  1157. borrow0, borrow1 : int8;
  1158. begin
  1159. z2 := a2 - b2;
  1160. borrow1 := ord( a2 < b2 );
  1161. z1 := a1 - b1;
  1162. borrow0 := ord( a1 < b1 );
  1163. z0 := a0 - b0;
  1164. dec(z0, ord( z1 < borrow1 ));
  1165. dec(z1, borrow1);
  1166. dec(z0, borrow0);
  1167. z2Ptr := z2;
  1168. z1Ptr := z1;
  1169. z0Ptr := z0;
  1170. end;
  1171. {*
  1172. -------------------------------------------------------------------------------
  1173. Multiplies `a' by `b' to obtain a 64-bit product. The product is broken
  1174. into two 32-bit pieces which are stored at the locations pointed to by
  1175. `z0Ptr' and `z1Ptr'.
  1176. -------------------------------------------------------------------------------
  1177. *}
  1178. Procedure mul32To64( a:bits32; b:bits32; VAR z0Ptr: bits32; VAR z1Ptr
  1179. :bits32 );
  1180. Var
  1181. aHigh, aLow, bHigh, bLow: bits16;
  1182. z0, zMiddleA, zMiddleB, z1: bits32;
  1183. Begin
  1184. aLow := a and $ffff;
  1185. aHigh := a shr 16;
  1186. bLow := b and $ffff;
  1187. bHigh := b shr 16;
  1188. z1 := ( bits32( aLow) ) * bLow;
  1189. zMiddleA := ( bits32 (aLow) ) * bHigh;
  1190. zMiddleB := ( bits32 (aHigh) ) * bLow;
  1191. z0 := ( bits32 (aHigh) ) * bHigh;
  1192. zMiddleA := zMiddleA + zMiddleB;
  1193. z0 := z0 + ( ( bits32 ( zMiddleA < zMiddleB ) ) shl 16 ) + ( zMiddleA shr 16 );
  1194. zMiddleA := zmiddleA shl 16;
  1195. z1 := z1 + zMiddleA;
  1196. z0 := z0 + bits32( z1 < zMiddleA );
  1197. z1Ptr := z1;
  1198. z0Ptr := z0;
  1199. End;
  1200. {*
  1201. -------------------------------------------------------------------------------
  1202. Multiplies the 64-bit value formed by concatenating `a0' and `a1' by `b'
  1203. to obtain a 96-bit product. The product is broken into three 32-bit pieces
  1204. which are stored at the locations pointed to by `z0Ptr', `z1Ptr', and
  1205. `z2Ptr'.
  1206. -------------------------------------------------------------------------------
  1207. *}
  1208. Procedure
  1209. mul64By32To96(
  1210. a0:bits32;
  1211. a1:bits32;
  1212. b:bits32;
  1213. VAR z0Ptr:bits32;
  1214. VAR z1Ptr:bits32;
  1215. VAR z2Ptr:bits32
  1216. );
  1217. Var
  1218. z0, z1, z2, more1: bits32;
  1219. Begin
  1220. mul32To64( a1, b, z1, z2 );
  1221. mul32To64( a0, b, z0, more1 );
  1222. add64( z0, more1, 0, z1, z0, z1 );
  1223. z2Ptr := z2;
  1224. z1Ptr := z1;
  1225. z0Ptr := z0;
  1226. End;
  1227. {*
  1228. -------------------------------------------------------------------------------
  1229. Multiplies the 64-bit value formed by concatenating `a0' and `a1' to the
  1230. 64-bit value formed by concatenating `b0' and `b1' to obtain a 128-bit
  1231. product. The product is broken into four 32-bit pieces which are stored at
  1232. the locations pointed to by `z0Ptr', `z1Ptr', `z2Ptr', and `z3Ptr'.
  1233. -------------------------------------------------------------------------------
  1234. *}
  1235. Procedure
  1236. mul64To128(
  1237. a0:bits32;
  1238. a1:bits32;
  1239. b0:bits32;
  1240. b1:bits32;
  1241. VAR z0Ptr:bits32;
  1242. VAR z1Ptr:bits32;
  1243. VAR z2Ptr:bits32;
  1244. VAR z3Ptr:bits32
  1245. );
  1246. Var
  1247. z0, z1, z2, z3: bits32;
  1248. more1, more2: bits32;
  1249. Begin
  1250. mul32To64( a1, b1, z2, z3 );
  1251. mul32To64( a1, b0, z1, more2 );
  1252. add64( z1, more2, 0, z2, z1, z2 );
  1253. mul32To64( a0, b0, z0, more1 );
  1254. add64( z0, more1, 0, z1, z0, z1 );
  1255. mul32To64( a0, b1, more1, more2 );
  1256. add64( more1, more2, 0, z2, more1, z2 );
  1257. add64( z0, z1, 0, more1, z0, z1 );
  1258. z3Ptr := z3;
  1259. z2Ptr := z2;
  1260. z1Ptr := z1;
  1261. z0Ptr := z0;
  1262. End;
  1263. {*----------------------------------------------------------------------------
  1264. | Multiplies `a' by `b' to obtain a 128-bit product. The product is broken
  1265. | into two 64-bit pieces which are stored at the locations pointed to by
  1266. | `z0Ptr' and `z1Ptr'.
  1267. *----------------------------------------------------------------------------*}
  1268. procedure mul64To128( a, b : bits64; var z0Ptr, z1Ptr : bits64);
  1269. var
  1270. aHigh, aLow, bHigh, bLow : bits32;
  1271. z0, zMiddleA, zMiddleB, z1 : bits64;
  1272. begin
  1273. aLow := a;
  1274. aHigh := a shr 32;
  1275. bLow := b;
  1276. bHigh := b shr 32;
  1277. z1 := ( bits64(aLow) ) * bLow;
  1278. zMiddleA := ( bits64( aLow )) * bHigh;
  1279. zMiddleB := ( bits64( aHigh )) * bLow;
  1280. z0 := ( bits64(aHigh) ) * bHigh;
  1281. inc(zMiddleA, zMiddleB);
  1282. inc(z0 ,( ( bits64( zMiddleA < zMiddleB ) ) shl 32 ) + ( zMiddleA shr 32 ));
  1283. zMiddleA := zMiddleA shl 32;
  1284. inc(z1, zMiddleA);
  1285. inc(z0, ord( z1 < zMiddleA ));
  1286. z1Ptr := z1;
  1287. z0Ptr := z0;
  1288. end;
  1289. {*----------------------------------------------------------------------------
  1290. | Multiplies the 128-bit value formed by concatenating `a0' and `a1' to the
  1291. | 128-bit value formed by concatenating `b0' and `b1' to obtain a 256-bit
  1292. | product. The product is broken into four 64-bit pieces which are stored at
  1293. | the locations pointed to by `z0Ptr', `z1Ptr', `z2Ptr', and `z3Ptr'.
  1294. *----------------------------------------------------------------------------*}
  1295. procedure mul128To256(a0,a1,b0,b1 : bits64;var z0Ptr,z1Ptr,z2Ptr,z3Ptr : bits64);
  1296. var
  1297. z0,z1,z2,z3,more1,more2 : bits64;
  1298. begin
  1299. mul64To128( a1, b1, z2, z3 );
  1300. mul64To128( a1, b0, z1, more2 );
  1301. add128( z1, more2, 0, z2, z1, z2 );
  1302. mul64To128( a0, b0, z0, more1 );
  1303. add128( z0, more1, 0, z1, z0, z1 );
  1304. mul64To128( a0, b1, more1, more2 );
  1305. add128( more1, more2, 0, z2, more1, z2 );
  1306. add128( z0, z1, 0, more1, z0, z1 );
  1307. z3Ptr := z3;
  1308. z2Ptr := z2;
  1309. z1Ptr := z1;
  1310. z0Ptr := z0;
  1311. end;
  1312. {*----------------------------------------------------------------------------
  1313. | Multiplies the 128-bit value formed by concatenating `a0' and `a1' by
  1314. | `b' to obtain a 192-bit product. The product is broken into three 64-bit
  1315. | pieces which are stored at the locations pointed to by `z0Ptr', `z1Ptr', and
  1316. | `z2Ptr'.
  1317. *----------------------------------------------------------------------------*}
  1318. procedure mul128By64To192(a0,a1,b : bits64;var z0Ptr,z1Ptr,z2Ptr : bits64);
  1319. var
  1320. z0, z1, z2, more1 : bits64;
  1321. begin
  1322. mul64To128( a1, b, z1, z2 );
  1323. mul64To128( a0, b, z0, more1 );
  1324. add128( z0, more1, 0, z1, z0, z1 );
  1325. z2Ptr := z2;
  1326. z1Ptr := z1;
  1327. z0Ptr := z0;
  1328. end;
  1329. {*----------------------------------------------------------------------------
  1330. | Returns an approximation to the 64-bit integer quotient obtained by dividing
  1331. | `b' into the 128-bit value formed by concatenating `a0' and `a1'. The
  1332. | divisor `b' must be at least 2^63. If q is the exact quotient truncated
  1333. | toward zero, the approximation returned lies between q and q + 2 inclusive.
  1334. | If the exact quotient q is larger than 64 bits, the maximum positive 64-bit
  1335. | unsigned integer is returned.
  1336. *----------------------------------------------------------------------------*}
  1337. Function estimateDiv128To64( a0:bits64; a1: bits64; b:bits64): bits64;
  1338. var
  1339. b0, b1, rem0, rem1, term0, term1, z : bits64;
  1340. begin
  1341. if ( b <= a0 ) then
  1342. begin
  1343. result:=qword( $FFFFFFFFFFFFFFFF );
  1344. exit;
  1345. end;
  1346. b0 := b shr 32;
  1347. if ( b0 shl 32 <= a0 ) then
  1348. z:=qword( $FFFFFFFF00000000 )
  1349. else
  1350. z:=( a0 div b0 ) shl 32;
  1351. mul64To128( b, z, term0, term1 );
  1352. sub128( a0, a1, term0, term1, rem0, rem1 );
  1353. while ( ( sbits64(rem0) ) < 0 ) do begin
  1354. dec(z,qword( $100000000 ));
  1355. b1 := b shl 32;
  1356. add128( rem0, rem1, b0, b1, rem0, rem1 );
  1357. end;
  1358. rem0 := ( rem0 shl 32 ) or ( rem1 shr 32 );
  1359. if ( b0 shl 32 <= rem0 ) then
  1360. z:=z or $FFFFFFFF
  1361. else
  1362. z:=z or rem0 div b0;
  1363. result:=z;
  1364. end;
  1365. {*
  1366. -------------------------------------------------------------------------------
  1367. Returns an approximation to the 32-bit integer quotient obtained by dividing
  1368. `b' into the 64-bit value formed by concatenating `a0' and `a1'. The
  1369. divisor `b' must be at least 2^31. If q is the exact quotient truncated
  1370. toward zero, the approximation returned lies between q and q + 2 inclusive.
  1371. If the exact quotient q is larger than 32 bits, the maximum positive 32-bit
  1372. unsigned integer is returned.
  1373. -------------------------------------------------------------------------------
  1374. *}
  1375. Function estimateDiv64To32( a0:bits32; a1: bits32; b:bits32): bits32;
  1376. Var
  1377. b0, b1: bits32;
  1378. rem0, rem1, term0, term1: bits32;
  1379. z: bits32;
  1380. Begin
  1381. if ( b <= a0 ) then
  1382. Begin
  1383. estimateDiv64To32 := $FFFFFFFF;
  1384. exit;
  1385. End;
  1386. b0 := b shr 16;
  1387. if ( b0 shl 16 <= a0 ) then
  1388. z:= $FFFF0000
  1389. else
  1390. z:= ( a0 div b0 ) shl 16;
  1391. mul32To64( b, z, term0, term1 );
  1392. sub64( a0, a1, term0, term1, rem0, rem1 );
  1393. while ( ( sbits32 (rem0) ) < 0 ) do
  1394. Begin
  1395. z := z - $10000;
  1396. b1 := b shl 16;
  1397. add64( rem0, rem1, b0, b1, rem0, rem1 );
  1398. End;
  1399. rem0 := ( rem0 shl 16 ) OR ( rem1 shr 16 );
  1400. if ( b0 shl 16 <= rem0 ) then
  1401. z := z or $FFFF
  1402. else
  1403. z := z or (rem0 div b0);
  1404. estimateDiv64To32 := z;
  1405. End;
  1406. {*
  1407. -------------------------------------------------------------------------------
  1408. Returns an approximation to the square root of the 32-bit significand given
  1409. by `a'. Considered as an integer, `a' must be at least 2^31. If bit 0 of
  1410. `aExp' (the least significant bit) is 1, the integer returned approximates
  1411. 2^31*sqrt(`a'/2^31), where `a' is considered an integer. If bit 0 of `aExp'
  1412. is 0, the integer returned approximates 2^31*sqrt(`a'/2^30). In either
  1413. case, the approximation returned lies strictly within +/-2 of the exact
  1414. value.
  1415. -------------------------------------------------------------------------------
  1416. *}
  1417. Function estimateSqrt32( aExp: int16; a: bits32 ): bits32;
  1418. const sqrtOddAdjustments: array[0..15] of bits16 = (
  1419. $0004, $0022, $005D, $00B1, $011D, $019F, $0236, $02E0,
  1420. $039C, $0468, $0545, $0631, $072B, $0832, $0946, $0A67
  1421. );
  1422. const sqrtEvenAdjustments: array[0..15] of bits16 = (
  1423. $0A2D, $08AF, $075A, $0629, $051A, $0429, $0356, $029E,
  1424. $0200, $0179, $0109, $00AF, $0068, $0034, $0012, $0002
  1425. );
  1426. Var
  1427. index: int8;
  1428. z: bits32;
  1429. Begin
  1430. index := ( a shr 27 ) AND 15;
  1431. if ( aExp AND 1 ) <> 0 then
  1432. Begin
  1433. z := $4000 + ( a shr 17 ) - sqrtOddAdjustments[ index ];
  1434. z := ( ( a div z ) shl 14 ) + ( z shl 15 );
  1435. a := a shr 1;
  1436. End
  1437. else
  1438. Begin
  1439. z := $8000 + ( a shr 17 ) - sqrtEvenAdjustments[ index ];
  1440. z := a div z + z;
  1441. if ( $20000 <= z ) then
  1442. z := $FFFF8000
  1443. else
  1444. z := ( z shl 15 );
  1445. if ( z <= a ) then
  1446. Begin
  1447. estimateSqrt32 := bits32 ( ( sbits32 (a )) shr 1 );
  1448. exit;
  1449. End;
  1450. End;
  1451. estimateSqrt32 := ( ( estimateDiv64To32( a, 0, z ) ) shr 1 ) + ( z shr 1 );
  1452. End;
  1453. {*
  1454. -------------------------------------------------------------------------------
  1455. Returns the number of leading 0 bits before the most-significant 1 bit of
  1456. `a'. If `a' is zero, 32 is returned.
  1457. -------------------------------------------------------------------------------
  1458. *}
  1459. Function countLeadingZeros32( a:bits32 ): int8;
  1460. const countLeadingZerosHigh:array[0..255] of int8 = (
  1461. 8, 7, 6, 6, 5, 5, 5, 5, 4, 4, 4, 4, 4, 4, 4, 4,
  1462. 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
  1463. 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
  1464. 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
  1465. 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
  1466. 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
  1467. 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
  1468. 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
  1469. 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
  1470. 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
  1471. 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
  1472. 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
  1473. 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
  1474. 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
  1475. 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
  1476. 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
  1477. );
  1478. Var
  1479. shiftCount: int8;
  1480. Begin
  1481. shiftCount := 0;
  1482. if ( a < $10000 ) then
  1483. Begin
  1484. shiftCount := shiftcount + 16;
  1485. a := a shl 16;
  1486. End;
  1487. if ( a < $1000000 ) then
  1488. Begin
  1489. shiftCount := shiftcount + 8;
  1490. a := a shl 8;
  1491. end;
  1492. shiftCount := shiftcount + countLeadingZerosHigh[ a shr 24 ];
  1493. countLeadingZeros32:= shiftCount;
  1494. End;
  1495. {*----------------------------------------------------------------------------
  1496. | Returns the number of leading 0 bits before the most-significant 1 bit of
  1497. | `a'. If `a' is zero, 64 is returned.
  1498. *----------------------------------------------------------------------------*}
  1499. function countLeadingZeros64( a : bits64): int8;
  1500. var
  1501. shiftcount : int8;
  1502. Begin
  1503. shiftCount := 0;
  1504. if ( a < bits64(bits64(1) shl 32 )) then
  1505. shiftCount := shiftcount + 32
  1506. else
  1507. a := a shr 32;
  1508. shiftCount := shiftCount + countLeadingZeros32( a );
  1509. countLeadingZeros64:= shiftCount;
  1510. End;
  1511. {*
  1512. -------------------------------------------------------------------------------
  1513. Returns 1 if the 64-bit value formed by concatenating `a0' and `a1' is
  1514. equal to the 64-bit value formed by concatenating `b0' and `b1'. Otherwise,
  1515. returns 0.
  1516. -------------------------------------------------------------------------------
  1517. *}
  1518. Function eq64( a0: bits32; a1:bits32 ;b0:bits32; b1:bits32 ): flag;
  1519. Begin
  1520. eq64 := flag( a0 = b0 ) and flag( a1 = b1 );
  1521. End;
  1522. {*
  1523. -------------------------------------------------------------------------------
  1524. Returns 1 if the 64-bit value formed by concatenating `a0' and `a1' is less
  1525. than or equal to the 64-bit value formed by concatenating `b0' and `b1'.
  1526. Otherwise, returns 0.
  1527. -------------------------------------------------------------------------------
  1528. *}
  1529. Function le64( a0: bits32; a1:bits32 ;b0:bits32; b1:bits32 ): flag;
  1530. Begin
  1531. le64:= flag( a0 < b0 ) or flag( ( a0 = b0 ) and ( a1 <= b1 ) );
  1532. End;
  1533. {*
  1534. -------------------------------------------------------------------------------
  1535. Returns 1 if the 64-bit value formed by concatenating `a0' and `a1' is less
  1536. than the 64-bit value formed by concatenating `b0' and `b1'. Otherwise,
  1537. returns 0.
  1538. -------------------------------------------------------------------------------
  1539. *}
  1540. Function lt64( a0: bits32; a1:bits32 ;b0:bits32; b1:bits32 ): flag;
  1541. Begin
  1542. lt64 := flag( a0 < b0 ) or flag( ( a0 = b0 ) and ( a1 < b1 ) );
  1543. End;
  1544. {*
  1545. -------------------------------------------------------------------------------
  1546. Returns 1 if the 64-bit value formed by concatenating `a0' and `a1' is not
  1547. equal to the 64-bit value formed by concatenating `b0' and `b1'. Otherwise,
  1548. returns 0.
  1549. -------------------------------------------------------------------------------
  1550. *}
  1551. Function ne64( a0: bits32; a1:bits32 ;b0:bits32; b1:bits32 ): flag;
  1552. Begin
  1553. ne64:= flag( a0 <> b0 ) or flag( a1 <> b1 );
  1554. End;
  1555. const
  1556. float128_default_nan_high = qword($FFFFFFFFFFFFFFFF);
  1557. float128_default_nan_low = qword($FFFFFFFFFFFFFFFF);
  1558. (*****************************************************************************)
  1559. (* End Low-Level arithmetic *)
  1560. (*****************************************************************************)
  1561. {*
  1562. -------------------------------------------------------------------------------
  1563. Functions and definitions to determine: (1) whether tininess for underflow
  1564. is detected before or after rounding by default, (2) what (if anything)
  1565. happens when exceptions are raised, (3) how signaling NaNs are distinguished
  1566. from quiet NaNs, (4) the default generated quiet NaNs, and (4) how NaNs
  1567. are propagated from function inputs to output. These details are ENDIAN
  1568. specific
  1569. -------------------------------------------------------------------------------
  1570. *}
  1571. {$IFDEF ENDIAN_LITTLE}
  1572. {*
  1573. -------------------------------------------------------------------------------
  1574. Internal canonical NaN format.
  1575. -------------------------------------------------------------------------------
  1576. *}
  1577. TYPE
  1578. commonNaNT = packed record
  1579. sign: flag;
  1580. high, low : bits32;
  1581. end;
  1582. {*
  1583. -------------------------------------------------------------------------------
  1584. The pattern for a default generated single-precision NaN.
  1585. -------------------------------------------------------------------------------
  1586. *}
  1587. const float32_default_nan = $FFC00000;
  1588. {*
  1589. -------------------------------------------------------------------------------
  1590. Returns 1 if the single-precision floating-point value `a' is a NaN;
  1591. otherwise returns 0.
  1592. -------------------------------------------------------------------------------
  1593. *}
  1594. Function float32_is_nan( a : float32 ): flag;
  1595. Begin
  1596. float32_is_nan:= flag( $FF000000 < bits32 ( a shl 1 ) );
  1597. End;
  1598. {*
  1599. -------------------------------------------------------------------------------
  1600. Returns 1 if the single-precision floating-point value `a' is a signaling
  1601. NaN; otherwise returns 0.
  1602. -------------------------------------------------------------------------------
  1603. *}
  1604. Function float32_is_signaling_nan( a : float32 ): flag;
  1605. Begin
  1606. float32_is_signaling_nan := flag
  1607. ( ( ( a shr 22 ) and $1FF ) = $1FE ) and( a and $003FFFFF );
  1608. End;
  1609. {*
  1610. -------------------------------------------------------------------------------
  1611. Returns the result of converting the single-precision floating-point NaN
  1612. `a' to the canonical NaN format. If `a' is a signaling NaN, the invalid
  1613. exception is raised.
  1614. -------------------------------------------------------------------------------
  1615. *}
  1616. Procedure float32ToCommonNaN( a: float32; VAR c:commonNaNT );
  1617. var
  1618. z : commonNaNT ;
  1619. Begin
  1620. if ( float32_is_signaling_nan( a ) <> 0) then
  1621. float_raise( float_flag_invalid );
  1622. z.sign := a shr 31;
  1623. z.low := 0;
  1624. z.high := a shl 9;
  1625. c := z;
  1626. End;
  1627. {*
  1628. -------------------------------------------------------------------------------
  1629. Returns the result of converting the canonical NaN `a' to the single-
  1630. precision floating-point format.
  1631. -------------------------------------------------------------------------------
  1632. *}
  1633. Function commonNaNToFloat32( a : commonNaNT ): float32;
  1634. Begin
  1635. commonNaNToFloat32 := ( ( bits32 (a.sign) ) shl 31 ) or $7FC00000 or ( a.high shr 9 );
  1636. End;
  1637. {*
  1638. -------------------------------------------------------------------------------
  1639. Takes two single-precision floating-point values `a' and `b', one of which
  1640. is a NaN, and returns the appropriate NaN result. If either `a' or `b' is a
  1641. signaling NaN, the invalid exception is raised.
  1642. -------------------------------------------------------------------------------
  1643. *}
  1644. Function propagateFloat32NaN( a : float32 ; b: float32 ): float32;
  1645. Var
  1646. aIsNaN, aIsSignalingNaN, bIsNaN, bIsSignalingNaN: flag;
  1647. label returnLargerSignificand;
  1648. Begin
  1649. aIsNaN := float32_is_nan( a );
  1650. aIsSignalingNaN := float32_is_signaling_nan( a );
  1651. bIsNaN := float32_is_nan( b );
  1652. bIsSignalingNaN := float32_is_signaling_nan( b );
  1653. a := a or $00400000;
  1654. b := b or $00400000;
  1655. if ( aIsSignalingNaN or bIsSignalingNaN ) <> 0 then
  1656. float_raise( float_flag_invalid );
  1657. if ( aIsSignalingNaN )<> 0 then
  1658. Begin
  1659. if ( bIsSignalingNaN ) <> 0 then
  1660. goto returnLargerSignificand;
  1661. if bIsNan <> 0 then
  1662. propagateFloat32NaN := b
  1663. else
  1664. propagateFloat32NaN := a;
  1665. exit;
  1666. End
  1667. else if ( aIsNaN <> 0) then
  1668. Begin
  1669. if ( bIsSignalingNaN or not bIsNaN )<> 0 then
  1670. Begin
  1671. propagateFloat32NaN := a;
  1672. exit;
  1673. End;
  1674. returnLargerSignificand:
  1675. if ( bits32 ( a shl 1 ) < bits32 ( b shl 1 ) ) then
  1676. Begin
  1677. propagateFloat32NaN := b;
  1678. exit;
  1679. End;
  1680. if ( bits32 ( b shl 1 ) < bits32 ( a shl 1 ) ) then
  1681. Begin
  1682. propagateFloat32NaN := a;
  1683. End;
  1684. if a < b then
  1685. propagateFloat32NaN := a
  1686. else
  1687. propagateFloat32NaN := b;
  1688. exit;
  1689. End
  1690. else
  1691. Begin
  1692. propagateFloat32NaN := b;
  1693. exit;
  1694. End;
  1695. End;
  1696. {*
  1697. -------------------------------------------------------------------------------
  1698. The pattern for a default generated double-precision NaN. The `high' and
  1699. `low' values hold the most- and least-significant bits, respectively.
  1700. -------------------------------------------------------------------------------
  1701. *}
  1702. const
  1703. float64_default_nan_high = $FFF80000;
  1704. float64_default_nan_low = $00000000;
  1705. {*
  1706. -------------------------------------------------------------------------------
  1707. Returns 1 if the double-precision floating-point value `a' is a NaN;
  1708. otherwise returns 0.
  1709. -------------------------------------------------------------------------------
  1710. *}
  1711. Function float64_is_nan( a : float64 ) : flag;
  1712. Begin
  1713. float64_is_nan :=
  1714. flag( $FFE00000 <= bits32 ( a.high shl 1 ) )
  1715. and ( a.low or ( a.high and $000FFFFF ) );
  1716. End;
  1717. {*
  1718. -------------------------------------------------------------------------------
  1719. Returns 1 if the double-precision floating-point value `a' is a signaling
  1720. NaN; otherwise returns 0.
  1721. -------------------------------------------------------------------------------
  1722. *}
  1723. Function float64_is_signaling_nan( a : float64 ): flag;
  1724. Begin
  1725. float64_is_signaling_nan :=
  1726. flag( ( ( a.high shr 19 ) and $FFF ) = $FFE )
  1727. and ( a.low or ( a.high and $0007FFFF ) );
  1728. End;
  1729. {*
  1730. -------------------------------------------------------------------------------
  1731. Returns the result of converting the double-precision floating-point NaN
  1732. `a' to the canonical NaN format. If `a' is a signaling NaN, the invalid
  1733. exception is raised.
  1734. -------------------------------------------------------------------------------
  1735. *}
  1736. Procedure float64ToCommonNaN( a : float64; VAR c:commonNaNT );
  1737. Var
  1738. z : commonNaNT;
  1739. Begin
  1740. if ( float64_is_signaling_nan( a )<>0 ) then
  1741. float_raise( float_flag_invalid );
  1742. z.sign := a.high shr 31;
  1743. shortShift64Left( a.high, a.low, 12, z.high, z.low );
  1744. c := z;
  1745. End;
  1746. function float64ToCommonNaN( a : float64 ) : commonNaNT;
  1747. Var
  1748. z : commonNaNT;
  1749. Begin
  1750. if ( float64_is_signaling_nan( a )<>0 ) then
  1751. float_raise( float_flag_invalid );
  1752. z.sign := a.high shr 31;
  1753. shortShift64Left( a.high, a.low, 12, z.high, z.low );
  1754. result := z;
  1755. End;
  1756. {*
  1757. -------------------------------------------------------------------------------
  1758. Returns the result of converting the canonical NaN `a' to the double-
  1759. precision floating-point format.
  1760. -------------------------------------------------------------------------------
  1761. *}
  1762. Procedure commonNaNToFloat64( a : commonNaNT; VAR c: float64 );
  1763. Var
  1764. z: float64;
  1765. Begin
  1766. shift64Right( a.high, a.low, 12, z.high, z.low );
  1767. z.high := z.high or ( ( bits32 (a.sign) ) shl 31 ) or $7FF80000;
  1768. c := z;
  1769. End;
  1770. {*
  1771. -------------------------------------------------------------------------------
  1772. Takes two double-precision floating-point values `a' and `b', one of which
  1773. is a NaN, and returns the appropriate NaN result. If either `a' or `b' is a
  1774. signaling NaN, the invalid exception is raised.
  1775. -------------------------------------------------------------------------------
  1776. *}
  1777. Procedure propagateFloat64NaN( a: float64; b: float64 ; VAR c: float64 );
  1778. Var
  1779. aIsNaN, aIsSignalingNaN, bIsNaN, bIsSignalingNaN: flag;
  1780. label returnLargerSignificand;
  1781. Begin
  1782. aIsNaN := float64_is_nan( a );
  1783. aIsSignalingNaN := float64_is_signaling_nan( a );
  1784. bIsNaN := float64_is_nan( b );
  1785. bIsSignalingNaN := float64_is_signaling_nan( b );
  1786. a.high := a.high or $00080000;
  1787. b.high := b.high or $00080000;
  1788. if ( aIsSignalingNaN or bIsSignalingNaN )<> 0 then
  1789. float_raise( float_flag_invalid );
  1790. if ( aIsSignalingNaN )<>0 then
  1791. Begin
  1792. if ( bIsSignalingNaN )<>0 then
  1793. goto returnLargerSignificand;
  1794. if bIsNan <> 0 then
  1795. c := b
  1796. else
  1797. c := a;
  1798. exit;
  1799. End
  1800. else if ( aIsNaN )<> 0 then
  1801. Begin
  1802. if ( bIsSignalingNaN or not bIsNaN ) <> 0 then
  1803. Begin
  1804. c := a;
  1805. exit;
  1806. End;
  1807. returnLargerSignificand:
  1808. if ( lt64( a.high shl 1, a.low, b.high shl 1, b.low ) ) <> 0 then
  1809. Begin
  1810. c := b;
  1811. exit;
  1812. End;
  1813. if ( lt64( b.high shl 1, b.low, a.high shl 1, a.low ) ) <> 0 then
  1814. Begin
  1815. c := a;
  1816. exit;
  1817. End;
  1818. if a.high < b.high then
  1819. c := a
  1820. else
  1821. c := b;
  1822. exit;
  1823. End
  1824. else
  1825. Begin
  1826. c := b;
  1827. exit;
  1828. End;
  1829. End;
  1830. {*----------------------------------------------------------------------------
  1831. | Returns 1 if the 128-bit value formed by concatenating `a0' and `a1' is less
  1832. | than the 128-bit value formed by concatenating `b0' and `b1'. Otherwise,
  1833. | returns 0.
  1834. *----------------------------------------------------------------------------*}
  1835. function lt128(a0: bits64; a1: bits64; b0: bits64; b1 : bits64): flag;inline;
  1836. begin
  1837. result := ord(( a0 < b0 ) or ( ( a0 = b0 ) and ( a1 < b1 ) ));
  1838. end;
  1839. {*----------------------------------------------------------------------------
  1840. | Returns 1 if the quadruple-precision floating-point value `a' is a NaN;
  1841. | otherwise returns 0.
  1842. *----------------------------------------------------------------------------*}
  1843. function float128_is_nan( a : float128): flag;
  1844. begin
  1845. result:= ord(( int64( $FFFE000000000000 ) <= bits64( a.high shl 1 ) )
  1846. and ( (a.low<>0) or (( a.high and int64( $0000FFFFFFFFFFFF ) )<>0 ) ));
  1847. end;
  1848. {*----------------------------------------------------------------------------
  1849. | Returns 1 if the quadruple-precision floating-point value `a' is a
  1850. | signaling NaN; otherwise returns 0.
  1851. *----------------------------------------------------------------------------*}
  1852. function float128_is_signaling_nan( a : float128): flag;
  1853. begin
  1854. result:=ord(( ( ( a.high shr 47 ) and $FFFF ) = $FFFE ) and
  1855. ( (a.low<>0) or (( a.high and int64( $00007FFFFFFFFFFF ) )<>0) ));
  1856. end;
  1857. {*----------------------------------------------------------------------------
  1858. | Returns the result of converting the quadruple-precision floating-point NaN
  1859. | `a' to the canonical NaN format. If `a' is a signaling NaN, the invalid
  1860. | exception is raised.
  1861. *----------------------------------------------------------------------------*}
  1862. function float128ToCommonNaN( a : float128): commonNaNT;
  1863. var
  1864. z: commonNaNT;
  1865. qhigh,qlow : qword;
  1866. begin
  1867. if ( float128_is_signaling_nan( a )<>0) then
  1868. float_raise( float_flag_invalid );
  1869. z.sign := a.high shr 63;
  1870. shortShift128Left( a.high, a.low, 16, qhigh, qlow );
  1871. z.high:=qhigh shr 32;
  1872. z.low:=qhigh and $ffffffff;
  1873. result:=z;
  1874. end;
  1875. {*----------------------------------------------------------------------------
  1876. | Returns the result of converting the canonical NaN `a' to the quadruple-
  1877. | precision floating-point format.
  1878. *----------------------------------------------------------------------------*}
  1879. function commonNaNToFloat128( a : commonNaNT): float128;
  1880. var
  1881. z: float128;
  1882. begin
  1883. shift128Right( a.high, a.low, 16, z.high, z.low );
  1884. z.high := z.high or ( ( bits64(a.sign) ) shl 63 ) or int64( $7FFF800000000000 );
  1885. result:=z;
  1886. end;
  1887. {*----------------------------------------------------------------------------
  1888. | Takes two quadruple-precision floating-point values `a' and `b', one of
  1889. | which is a NaN, and returns the appropriate NaN result. If either `a' or
  1890. | `b' is a signaling NaN, the invalid exception is raised.
  1891. *----------------------------------------------------------------------------*}
  1892. function propagateFloat128NaN( a: float128; b : float128): float128;
  1893. var
  1894. aIsNaN, aIsSignalingNaN, bIsNaN, bIsSignalingNaN: flag;
  1895. label
  1896. returnLargerSignificand;
  1897. begin
  1898. aIsNaN := float128_is_nan( a );
  1899. aIsSignalingNaN := float128_is_signaling_nan( a );
  1900. bIsNaN := float128_is_nan( b );
  1901. bIsSignalingNaN := float128_is_signaling_nan( b );
  1902. a.high := a.high or int64( $0000800000000000 );
  1903. b.high := b.high or int64( $0000800000000000 );
  1904. if ( aIsSignalingNaN or bIsSignalingNaN )<>0 then
  1905. float_raise( float_flag_invalid );
  1906. if ( aIsSignalingNaN )<>0 then
  1907. begin
  1908. if ( bIsSignalingNaN )<>0 then
  1909. goto returnLargerSignificand;
  1910. if bIsNaN<>0 then
  1911. result := b
  1912. else
  1913. result := a;
  1914. exit;
  1915. end
  1916. else if ( aIsNaN )<>0 then
  1917. begin
  1918. if ( bIsSignalingNaN or not( bIsNaN) )<>0 then
  1919. begin
  1920. result := a;
  1921. exit;
  1922. end;
  1923. returnLargerSignificand:
  1924. if ( lt128( a.high shl 1, a.low, b.high shl 1, b.low ) )<>0 then
  1925. begin
  1926. result := b;
  1927. exit;
  1928. end;
  1929. if ( lt128( b.high shl 1, b.low, a.high shl 1, a.low ) )<>0 then
  1930. begin
  1931. result := a;
  1932. exit
  1933. end;
  1934. if ( a.high < b.high ) then
  1935. result := a
  1936. else
  1937. result := b;
  1938. exit;
  1939. end
  1940. else
  1941. result:=b;
  1942. end;
  1943. {$ELSE}
  1944. { Big endian code }
  1945. (*----------------------------------------------------------------------------
  1946. | Internal canonical NaN format.
  1947. *----------------------------------------------------------------------------*)
  1948. type
  1949. commonNANT = packed record
  1950. sign : flag;
  1951. high, low : bits32;
  1952. end;
  1953. (*----------------------------------------------------------------------------
  1954. | The pattern for a default generated single-precision NaN.
  1955. *----------------------------------------------------------------------------*)
  1956. const float32_default_nan = $7FFFFFFF;
  1957. (*----------------------------------------------------------------------------
  1958. | Returns 1 if the single-precision floating-point value `a' is a NaN;
  1959. | otherwise returns 0.
  1960. *----------------------------------------------------------------------------*)
  1961. function float32_is_nan(a: float32): flag;
  1962. begin
  1963. float32_is_nan := flag( $FF000000 < bits32( a shl 1 ) );
  1964. end;
  1965. (*----------------------------------------------------------------------------
  1966. | Returns 1 if the single-precision floating-point value `a' is a signaling
  1967. | NaN; otherwise returns 0.
  1968. *----------------------------------------------------------------------------*)
  1969. function float32_is_signaling_nan(a: float32):flag;
  1970. begin
  1971. float32_is_signaling_nan := flag( ( ( a shr 22 ) and $1FF ) = $1FE ) and flag( boolean((a and $003FFFFF)<>0) );
  1972. end;
  1973. (*----------------------------------------------------------------------------
  1974. | Returns the result of converting the single-precision floating-point NaN
  1975. | `a' to the canonical NaN format. If `a' is a signaling NaN, the invalid
  1976. | exception is raised.
  1977. *----------------------------------------------------------------------------*)
  1978. Procedure float32ToCommonNaN( a: float32; VAR c:commonNaNT );
  1979. var
  1980. z: commonNANT;
  1981. begin
  1982. if float32_is_signaling_nan(a)<>0 then
  1983. float_raise(float_flag_invalid);
  1984. z.sign := a shr 31;
  1985. z.low := 0;
  1986. z.high := a shl 9;
  1987. c:=z;
  1988. end;
  1989. (*----------------------------------------------------------------------------
  1990. | Returns the result of converting the canonical NaN `a' to the single-
  1991. | precision floating-point format.
  1992. *----------------------------------------------------------------------------*)
  1993. function CommonNanToFloat32(a : CommonNaNT): float32;
  1994. begin
  1995. CommonNanToFloat32:= ( ( bits32( a.sign )) shl 31 ) OR $7FC00000 OR ( a.high shr 9 );
  1996. end;
  1997. (*----------------------------------------------------------------------------
  1998. | Takes two single-precision floating-point values `a' and `b', one of which
  1999. | is a NaN, and returns the appropriate NaN result. If either `a' or `b' is a
  2000. | signaling NaN, the invalid exception is raised.
  2001. *----------------------------------------------------------------------------*)
  2002. function propagateFloat32NaN( a: float32 ; b: float32): float32;
  2003. var
  2004. aIsNaN, aIsSignalingNaN, bIsNaN, bIsSignalingNaN: flag;
  2005. begin
  2006. aIsNaN := float32_is_nan( a );
  2007. aIsSignalingNaN := float32_is_signaling_nan( a );
  2008. bIsNaN := float32_is_nan( b );
  2009. bIsSignalingNaN := float32_is_signaling_nan( b );
  2010. a := a or $00400000;
  2011. b := b or $00400000;
  2012. if ( aIsSignalingNaN or bIsSignalingNaN )<>0 then
  2013. float_raise( float_flag_invalid );
  2014. if bIsSignalingNaN<>0 then
  2015. propagateFloat32Nan := b
  2016. else if aIsSignalingNan<>0 then
  2017. propagateFloat32Nan := a
  2018. else if bIsNan<>0 then
  2019. propagateFloat32Nan := b
  2020. else
  2021. propagateFloat32Nan := a;
  2022. end;
  2023. (*----------------------------------------------------------------------------
  2024. | The pattern for a default generated double-precision NaN. The `high' and
  2025. | `low' values hold the most- and least-significant bits, respectively.
  2026. *----------------------------------------------------------------------------*)
  2027. const
  2028. float64_default_nan_high = $7FFFFFFF;
  2029. float64_default_nan_low = $FFFFFFFF;
  2030. (*----------------------------------------------------------------------------
  2031. | Returns 1 if the double-precision floating-point value `a' is a NaN;
  2032. | otherwise returns 0.
  2033. *----------------------------------------------------------------------------*)
  2034. function float64_is_nan(a: float64): flag;
  2035. begin
  2036. float64_is_nan := flag (
  2037. ( $FFE00000 <= bits32 ( a.high shl 1 ) )
  2038. and ( (a.low<>0) or (( a.high and $000FFFFF )<>0) ));
  2039. end;
  2040. (*----------------------------------------------------------------------------
  2041. | Returns 1 if the double-precision floating-point value `a' is a signaling
  2042. | NaN; otherwise returns 0.
  2043. *----------------------------------------------------------------------------*)
  2044. function float64_is_signaling_nan( a:float64): flag;
  2045. begin
  2046. float64_is_signaling_nan := flag(
  2047. ( ( ( a.high shr 19 ) and $FFF ) = $FFE )
  2048. and ( (a.low<>0) or ( ( a.high and $0007FFFF )<>0) ));
  2049. end;
  2050. (*----------------------------------------------------------------------------
  2051. | Returns the result of converting the double-precision floating-point NaN
  2052. | `a' to the canonical NaN format. If `a' is a signaling NaN, the invalid
  2053. | exception is raised.
  2054. *----------------------------------------------------------------------------*)
  2055. Procedure float64ToCommonNaN( a : float64; VAR c:commonNaNT );
  2056. var
  2057. z : commonNaNT;
  2058. begin
  2059. if ( float64_is_signaling_nan( a )<>0 ) then
  2060. float_raise( float_flag_invalid );
  2061. z.sign := a.high shr 31;
  2062. shortShift64Left( a.high, a.low, 12, z.high, z.low );
  2063. c:=z;
  2064. end;
  2065. (*----------------------------------------------------------------------------
  2066. | Returns the result of converting the canonical NaN `a' to the double-
  2067. | precision floating-point format.
  2068. *----------------------------------------------------------------------------*)
  2069. Procedure commonNaNToFloat64( a : commonNaNT; VAR c: float64 );
  2070. var
  2071. z: float64;
  2072. begin
  2073. shift64Right( a.high, a.low, 12, z.high, z.low );
  2074. z.high := z.high or ( ( bits32 (a.sign) ) shl 31 ) or $7FF80000;
  2075. c:=z;
  2076. end;
  2077. (*----------------------------------------------------------------------------
  2078. | Takes two double-precision floating-point values `a' and `b', one of which
  2079. | is a NaN, and returns the appropriate NaN result. If either `a' or `b' is a
  2080. | signaling NaN, the invalid exception is raised.
  2081. *----------------------------------------------------------------------------*)
  2082. Procedure propagateFloat64NaN( a: float64; b: float64 ; VAR c: float64 );
  2083. var
  2084. aIsNaN, aIsSignalingNaN, bIsNaN, bIsSignalingNaN : flag;
  2085. begin
  2086. aIsNaN := float64_is_nan( a );
  2087. aIsSignalingNaN := float64_is_signaling_nan( a );
  2088. bIsNaN := float64_is_nan( b );
  2089. bIsSignalingNaN := float64_is_signaling_nan( b );
  2090. a.high := a.high or $00080000;
  2091. b.high := b.high or $00080000;
  2092. if ( (aIsSignalingNaN<>0) or (bIsSignalingNaN<>0) ) then
  2093. float_raise( float_flag_invalid );
  2094. if bIsSignalingNaN<>0 then
  2095. c := b
  2096. else if aIsSignalingNan<>0 then
  2097. c := a
  2098. else if bIsNan<>0 then
  2099. c := b
  2100. else
  2101. c := a;
  2102. end;
  2103. {$ENDIF}
  2104. (****************************************************************************)
  2105. (* END ENDIAN SPECIFIC CODE *)
  2106. (****************************************************************************)
  2107. {*
  2108. -------------------------------------------------------------------------------
  2109. Returns the fraction bits of the single-precision floating-point value `a'.
  2110. -------------------------------------------------------------------------------
  2111. *}
  2112. Function ExtractFloat32Frac(a : Float32) : Bits32;
  2113. Begin
  2114. ExtractFloat32Frac := A AND $007FFFFF;
  2115. End;
  2116. {*
  2117. -------------------------------------------------------------------------------
  2118. Returns the exponent bits of the single-precision floating-point value `a'.
  2119. -------------------------------------------------------------------------------
  2120. *}
  2121. Function extractFloat32Exp( a: float32 ): Int16;
  2122. Begin
  2123. extractFloat32Exp := (a shr 23) AND $FF;
  2124. End;
  2125. {*
  2126. -------------------------------------------------------------------------------
  2127. Returns the sign bit of the single-precision floating-point value `a'.
  2128. -------------------------------------------------------------------------------
  2129. *}
  2130. Function extractFloat32Sign( a: float32 ): Flag;
  2131. Begin
  2132. extractFloat32Sign := a shr 31;
  2133. End;
  2134. {*
  2135. -------------------------------------------------------------------------------
  2136. Normalizes the subnormal single-precision floating-point value represented
  2137. by the denormalized significand `aSig'. The normalized exponent and
  2138. significand are stored at the locations pointed to by `zExpPtr' and
  2139. `zSigPtr', respectively.
  2140. -------------------------------------------------------------------------------
  2141. *}
  2142. Procedure normalizeFloat32Subnormal( aSig : bits32; VAR zExpPtr: Int16; VAR zSigPtr :bits32);
  2143. Var
  2144. ShiftCount : BYTE;
  2145. Begin
  2146. shiftCount := countLeadingZeros32( aSig ) - 8;
  2147. zSigPtr := aSig shl shiftCount;
  2148. zExpPtr := 1 - shiftCount;
  2149. End;
  2150. {*
  2151. -------------------------------------------------------------------------------
  2152. Packs the sign `zSign', exponent `zExp', and significand `zSig' into a
  2153. single-precision floating-point value, returning the result. After being
  2154. shifted into the proper positions, the three fields are simply added
  2155. together to form the result. This means that any integer portion of `zSig'
  2156. will be added into the exponent. Since a properly normalized significand
  2157. will have an integer portion equal to 1, the `zExp' input should be 1 less
  2158. than the desired result exponent whenever `zSig' is a complete, normalized
  2159. significand.
  2160. -------------------------------------------------------------------------------
  2161. *}
  2162. Function packFloat32( zSign: Flag; zExp : Int16; zSig: Bits32 ): Float32;
  2163. Begin
  2164. packFloat32 := ( ( bits32( zSign) ) shl 31 ) + ( ( bits32 (zExp) ) shl 23 )
  2165. + zSig;
  2166. End;
  2167. {*
  2168. -------------------------------------------------------------------------------
  2169. Takes an abstract floating-point value having sign `zSign', exponent `zExp',
  2170. and significand `zSig', and returns the proper single-precision floating-
  2171. point value corresponding to the abstract input. Ordinarily, the abstract
  2172. value is simply rounded and packed into the single-precision format, with
  2173. the inexact exception raised if the abstract input cannot be represented
  2174. exactly. However, if the abstract value is too large, the overflow and
  2175. inexact exceptions are raised and an infinity or maximal finite value is
  2176. returned. If the abstract value is too small, the input value is rounded to
  2177. a subnormal number, and the underflow and inexact exceptions are raised if
  2178. the abstract input cannot be represented exactly as a subnormal single-
  2179. precision floating-point number.
  2180. The input significand `zSig' has its binary point between bits 30
  2181. and 29, which is 7 bits to the left of the usual location. This shifted
  2182. significand must be normalized or smaller. If `zSig' is not normalized,
  2183. `zExp' must be 0; in that case, the result returned is a subnormal number,
  2184. and it must not require rounding. In the usual case that `zSig' is
  2185. normalized, `zExp' must be 1 less than the ``true'' floating-point exponent.
  2186. The handling of underflow and overflow follows the IEC/IEEE Standard for
  2187. Binary Floating-Point Arithmetic.
  2188. -------------------------------------------------------------------------------
  2189. *}
  2190. Function roundAndPackFloat32( zSign : Flag; zExp : Int16; zSig : Bits32 ) : float32;
  2191. Var
  2192. roundingMode : BYTE;
  2193. roundNearestEven : Flag;
  2194. roundIncrement, roundBits : BYTE;
  2195. IsTiny : Flag;
  2196. Begin
  2197. roundingMode := softfloat_rounding_mode;
  2198. if (roundingMode = float_round_nearest_even) then
  2199. Begin
  2200. roundNearestEven := Flag(TRUE);
  2201. end
  2202. else
  2203. roundNearestEven := Flag(FALSE);
  2204. roundIncrement := $40;
  2205. if ( Boolean(roundNearestEven) = FALSE) then
  2206. Begin
  2207. if ( roundingMode = float_round_to_zero ) Then
  2208. Begin
  2209. roundIncrement := 0;
  2210. End
  2211. else
  2212. Begin
  2213. roundIncrement := $7F;
  2214. if ( zSign <> 0 ) then
  2215. Begin
  2216. if roundingMode = float_round_up then roundIncrement := 0;
  2217. End
  2218. else
  2219. Begin
  2220. if roundingMode = float_round_down then roundIncrement := 0;
  2221. End;
  2222. End
  2223. End;
  2224. roundBits := zSig AND $7F;
  2225. if ($FD <= bits16 (zExp) ) then
  2226. Begin
  2227. if (( $FD < zExp ) OR ( zExp = $FD ) AND ( sbits32 ( zSig + roundIncrement ) < 0 ) ) then
  2228. Begin
  2229. float_raise( float_flag_overflow OR float_flag_inexact );
  2230. roundAndPackFloat32:=packFloat32( zSign, $FF, 0 ) - Flag( roundIncrement = 0 );
  2231. exit;
  2232. End;
  2233. if ( zExp < 0 ) then
  2234. Begin
  2235. isTiny :=
  2236. flag(( float_detect_tininess = float_tininess_before_rounding )
  2237. OR ( zExp < -1 )
  2238. OR ( (zSig + roundIncrement) < $80000000 ));
  2239. shift32RightJamming( zSig, - zExp, zSig );
  2240. zExp := 0;
  2241. roundBits := zSig AND $7F;
  2242. if ( (isTiny = flag(TRUE)) and (roundBits<>0) ) then
  2243. float_raise( float_flag_underflow );
  2244. End;
  2245. End;
  2246. if ( roundBits )<> 0 then
  2247. softfloat_exception_flags := float_flag_inexact OR softfloat_exception_flags;
  2248. zSig := ( zSig + roundIncrement ) shr 7;
  2249. zSig := zSig AND not bits32( bits32( ( roundBits XOR $40 ) = 0 ) and roundNearestEven );
  2250. if ( zSig = 0 ) then zExp := 0;
  2251. roundAndPackFloat32 := packFloat32( zSign, zExp, zSig );
  2252. exit;
  2253. End;
  2254. {*
  2255. -------------------------------------------------------------------------------
  2256. Takes an abstract floating-point value having sign `zSign', exponent `zExp',
  2257. and significand `zSig', and returns the proper single-precision floating-
  2258. point value corresponding to the abstract input. This routine is just like
  2259. `roundAndPackFloat32' except that `zSig' does not have to be normalized.
  2260. Bit 31 of `zSig' must be zero, and `zExp' must be 1 less than the ``true''
  2261. floating-point exponent.
  2262. -------------------------------------------------------------------------------
  2263. *}
  2264. Function normalizeRoundAndPackFloat32( zSign: flag; zExp: int16; zSig:bits32 ): float32;
  2265. Var
  2266. ShiftCount : int8;
  2267. Begin
  2268. shiftCount := countLeadingZeros32( zSig ) - 1;
  2269. normalizeRoundAndPackFloat32 := roundAndPackFloat32( zSign, zExp - shiftCount, zSig shl shiftCount );
  2270. End;
  2271. {*
  2272. -------------------------------------------------------------------------------
  2273. Returns the most-significant 20 fraction bits of the double-precision
  2274. floating-point value `a'.
  2275. -------------------------------------------------------------------------------
  2276. *}
  2277. Function extractFloat64Frac0(a: float64): bits32;
  2278. Begin
  2279. extractFloat64Frac0 := a.high and $000FFFFF;
  2280. End;
  2281. {*
  2282. -------------------------------------------------------------------------------
  2283. Returns the least-significant 32 fraction bits of the double-precision
  2284. floating-point value `a'.
  2285. -------------------------------------------------------------------------------
  2286. *}
  2287. Function extractFloat64Frac1(a: float64): bits32;
  2288. Begin
  2289. extractFloat64Frac1 := a.low;
  2290. End;
  2291. {$define FPC_SYSTEM_HAS_extractFloat64Frac}
  2292. Function extractFloat64Frac(a: float64): bits64;
  2293. Begin
  2294. extractFloat64Frac := bits64(a) and $000FFFFFFFFFFFFF;
  2295. End;
  2296. {*
  2297. -------------------------------------------------------------------------------
  2298. Returns the exponent bits of the double-precision floating-point value `a'.
  2299. -------------------------------------------------------------------------------
  2300. *}
  2301. Function extractFloat64Exp(a: float64): int16;
  2302. Begin
  2303. extractFloat64Exp:= ( a.high shr 20 ) AND $7FF;
  2304. End;
  2305. {*
  2306. -------------------------------------------------------------------------------
  2307. Returns the sign bit of the double-precision floating-point value `a'.
  2308. -------------------------------------------------------------------------------
  2309. *}
  2310. Function extractFloat64Sign(a: float64) : flag;
  2311. Begin
  2312. extractFloat64Sign := a.high shr 31;
  2313. End;
  2314. {*
  2315. -------------------------------------------------------------------------------
  2316. Normalizes the subnormal double-precision floating-point value represented
  2317. by the denormalized significand formed by the concatenation of `aSig0' and
  2318. `aSig1'. The normalized exponent is stored at the location pointed to by
  2319. `zExpPtr'. The most significant 21 bits of the normalized significand are
  2320. stored at the location pointed to by `zSig0Ptr', and the least significant
  2321. 32 bits of the normalized significand are stored at the location pointed to
  2322. by `zSig1Ptr'.
  2323. -------------------------------------------------------------------------------
  2324. *}
  2325. Procedure normalizeFloat64Subnormal(
  2326. aSig0: bits32;
  2327. aSig1: bits32;
  2328. VAR zExpPtr : Int16;
  2329. VAR zSig0Ptr : Bits32;
  2330. VAR zSig1Ptr : Bits32
  2331. );
  2332. Var
  2333. ShiftCount : Int8;
  2334. Begin
  2335. if ( aSig0 = 0 ) then
  2336. Begin
  2337. shiftCount := countLeadingZeros32( aSig1 ) - 11;
  2338. if ( shiftCount < 0 ) then
  2339. Begin
  2340. zSig0Ptr := aSig1 shr ( - shiftCount );
  2341. zSig1Ptr := aSig1 shl ( shiftCount AND 31 );
  2342. End
  2343. else
  2344. Begin
  2345. zSig0Ptr := aSig1 shl shiftCount;
  2346. zSig1Ptr := 0;
  2347. End;
  2348. zExpPtr := - shiftCount - 31;
  2349. End
  2350. else
  2351. Begin
  2352. shiftCount := countLeadingZeros32( aSig0 ) - 11;
  2353. shortShift64Left( aSig0, aSig1, shiftCount, zSig0Ptr, zSig1Ptr );
  2354. zExpPtr := 1 - shiftCount;
  2355. End;
  2356. End;
  2357. procedure normalizeFloat64Subnormal(aSig : bits64;var zExpPtr : int16; var zSigPtr : bits64);
  2358. var
  2359. shiftCount : int8;
  2360. begin
  2361. shiftCount := countLeadingZeros64( aSig ) - 11;
  2362. zSigPtr := aSig shl shiftCount;
  2363. zExpPtr := 1 - shiftCount;
  2364. end;
  2365. {*
  2366. -------------------------------------------------------------------------------
  2367. Packs the sign `zSign', the exponent `zExp', and the significand formed by
  2368. the concatenation of `zSig0' and `zSig1' into a double-precision floating-
  2369. point value, returning the result. After being shifted into the proper
  2370. positions, the three fields `zSign', `zExp', and `zSig0' are simply added
  2371. together to form the most significant 32 bits of the result. This means
  2372. that any integer portion of `zSig0' will be added into the exponent. Since
  2373. a properly normalized significand will have an integer portion equal to 1,
  2374. the `zExp' input should be 1 less than the desired result exponent whenever
  2375. `zSig0' and `zSig1' concatenated form a complete, normalized significand.
  2376. -------------------------------------------------------------------------------
  2377. *}
  2378. Procedure
  2379. packFloat64( zSign: Flag; zExp: Int16; zSig0: Bits32; zSig1 : Bits32; VAR c : float64);
  2380. var
  2381. z: Float64;
  2382. Begin
  2383. z.low := zSig1;
  2384. z.high := ( ( bits32 (zSign) ) shl 31 ) + ( ( bits32 (zExp) ) shl 20 ) + zSig0;
  2385. c := z;
  2386. End;
  2387. {*----------------------------------------------------------------------------
  2388. | Packs the sign `zSign', exponent `zExp', and significand `zSig' into a
  2389. | double-precision floating-point value, returning the result. After being
  2390. | shifted into the proper positions, the three fields are simply added
  2391. | together to form the result. This means that any integer portion of `zSig'
  2392. | will be added into the exponent. Since a properly normalized significand
  2393. | will have an integer portion equal to 1, the `zExp' input should be 1 less
  2394. | than the desired result exponent whenever `zSig' is a complete, normalized
  2395. | significand.
  2396. *----------------------------------------------------------------------------*}
  2397. function packFloat64( zSign: flag; zExp: int16; zSig : bits64): float64;inline;
  2398. begin
  2399. result := float64(( ( bits64(zSign) ) shl 63 ) + ( ( bits64(zExp) ) shl 52 ) + zSig);
  2400. end;
  2401. {*
  2402. -------------------------------------------------------------------------------
  2403. Takes an abstract floating-point value having sign `zSign', exponent `zExp',
  2404. and extended significand formed by the concatenation of `zSig0', `zSig1',
  2405. and `zSig2', and returns the proper double-precision floating-point value
  2406. corresponding to the abstract input. Ordinarily, the abstract value is
  2407. simply rounded and packed into the double-precision format, with the inexact
  2408. exception raised if the abstract input cannot be represented exactly.
  2409. However, if the abstract value is too large, the overflow and inexact
  2410. exceptions are raised and an infinity or maximal finite value is returned.
  2411. If the abstract value is too small, the input value is rounded to a
  2412. subnormal number, and the underflow and inexact exceptions are raised if the
  2413. abstract input cannot be represented exactly as a subnormal double-precision
  2414. floating-point number.
  2415. The input significand must be normalized or smaller. If the input
  2416. significand is not normalized, `zExp' must be 0; in that case, the result
  2417. returned is a subnormal number, and it must not require rounding. In the
  2418. usual case that the input significand is normalized, `zExp' must be 1 less
  2419. than the ``true'' floating-point exponent. The handling of underflow and
  2420. overflow follows the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
  2421. -------------------------------------------------------------------------------
  2422. *}
  2423. Procedure
  2424. roundAndPackFloat64(
  2425. zSign: Flag; zExp: Int16; zSig0: Bits32; zSig1: Bits32; zSig2: Bits32; Var c: Float64 );
  2426. Var
  2427. roundingMode : Int8;
  2428. roundNearestEven, increment, isTiny : Flag;
  2429. Begin
  2430. roundingMode := softfloat_rounding_mode;
  2431. roundNearestEven := flag( roundingMode = float_round_nearest_even );
  2432. increment := flag( sbits32 (zSig2) < 0 );
  2433. if ( roundNearestEven = flag(FALSE) ) then
  2434. Begin
  2435. if ( roundingMode = float_round_to_zero ) then
  2436. increment := 0
  2437. else
  2438. Begin
  2439. if ( zSign )<> 0 then
  2440. Begin
  2441. increment := flag( roundingMode = float_round_down ) and zSig2;
  2442. End
  2443. else
  2444. Begin
  2445. increment := flag( roundingMode = float_round_up ) and zSig2;
  2446. End
  2447. End
  2448. End;
  2449. if ( $7FD <= bits16 (zExp) ) then
  2450. Begin
  2451. if (( $7FD < zExp )
  2452. or (( zExp = $7FD )
  2453. and (eq64( $001FFFFF, $FFFFFFFF, zSig0, zSig1 )<>0)
  2454. and (increment<>0)
  2455. )
  2456. ) then
  2457. Begin
  2458. float_raise( float_flag_overflow OR float_flag_inexact );
  2459. if (( roundingMode = float_round_to_zero )
  2460. or ( (zSign<>0) and ( roundingMode = float_round_up ) )
  2461. or ( (zSign = 0) and ( roundingMode = float_round_down ) )
  2462. ) then
  2463. Begin
  2464. packFloat64( zSign, $7FE, $000FFFFF, $FFFFFFFF, c );
  2465. exit;
  2466. End;
  2467. packFloat64( zSign, $7FF, 0, 0, c );
  2468. exit;
  2469. End;
  2470. if ( zExp < 0 ) then
  2471. Begin
  2472. isTiny :=
  2473. flag( float_detect_tininess = float_tininess_before_rounding )
  2474. or flag( zExp < -1 )
  2475. or flag(increment = 0)
  2476. or flag(lt64( zSig0, zSig1, $001FFFFF, $FFFFFFFF)<>0);
  2477. shift64ExtraRightJamming(
  2478. zSig0, zSig1, zSig2, - zExp, zSig0, zSig1, zSig2 );
  2479. zExp := 0;
  2480. if ( isTiny<>0) and (zSig2<>0 ) then float_raise( float_flag_underflow );
  2481. if ( roundNearestEven )<>0 then
  2482. Begin
  2483. increment := flag( sbits32 (zSig2) < 0 );
  2484. End
  2485. else
  2486. Begin
  2487. if ( zSign )<>0 then
  2488. Begin
  2489. increment := flag( roundingMode = float_round_down ) and zSig2;
  2490. End
  2491. else
  2492. Begin
  2493. increment := flag( roundingMode = float_round_up ) and zSig2;
  2494. End
  2495. End;
  2496. End;
  2497. End;
  2498. if ( zSig2 )<>0 then
  2499. softfloat_exception_flags := softfloat_exception_flags OR float_flag_inexact;
  2500. if ( increment )<>0 then
  2501. Begin
  2502. add64( zSig0, zSig1, 0, 1, zSig0, zSig1 );
  2503. zSig1 := zSig1 and not ( bits32(flag( zSig2 + zSig2 = 0 )) and roundNearestEven );
  2504. End
  2505. else
  2506. Begin
  2507. if ( ( zSig0 or zSig1 ) = 0 ) then zExp := 0;
  2508. End;
  2509. packFloat64( zSign, zExp, zSig0, zSig1, c );
  2510. End;
  2511. {*----------------------------------------------------------------------------
  2512. | Takes an abstract floating-point value having sign `zSign', exponent `zExp',
  2513. | and significand `zSig', and returns the proper double-precision floating-
  2514. | point value corresponding to the abstract input. Ordinarily, the abstract
  2515. | value is simply rounded and packed into the double-precision format, with
  2516. | the inexact exception raised if the abstract input cannot be represented
  2517. | exactly. However, if the abstract value is too large, the overflow and
  2518. | inexact exceptions are raised and an infinity or maximal finite value is
  2519. | returned. If the abstract value is too small, the input value is rounded
  2520. | to a subnormal number, and the underflow and inexact exceptions are raised
  2521. | if the abstract input cannot be represented exactly as a subnormal double-
  2522. | precision floating-point number.
  2523. | The input significand `zSig' has its binary point between bits 62
  2524. | and 61, which is 10 bits to the left of the usual location. This shifted
  2525. | significand must be normalized or smaller. If `zSig' is not normalized,
  2526. | `zExp' must be 0; in that case, the result returned is a subnormal number,
  2527. | and it must not require rounding. In the usual case that `zSig' is
  2528. | normalized, `zExp' must be 1 less than the ``true'' floating-point exponent.
  2529. | The handling of underflow and overflow follows the IEC/IEEE Standard for
  2530. | Binary Floating-Point Arithmetic.
  2531. *----------------------------------------------------------------------------*}
  2532. function roundAndPackFloat64( zSign: flag; zExp: int16; zSig : bits64): float64;
  2533. var
  2534. roundingMode: int8;
  2535. roundNearestEven: flag;
  2536. roundIncrement, roundBits: int16;
  2537. isTiny: flag;
  2538. begin
  2539. roundingMode := softfloat_rounding_mode;
  2540. roundNearestEven := ord( roundingMode = float_round_nearest_even );
  2541. roundIncrement := $200;
  2542. if ( roundNearestEven=0 ) then
  2543. begin
  2544. if ( roundingMode = float_round_to_zero ) then
  2545. begin
  2546. roundIncrement := 0;
  2547. end
  2548. else begin
  2549. roundIncrement := $3FF;
  2550. if ( zSign<>0 ) then
  2551. begin
  2552. if ( roundingMode = float_round_up ) then
  2553. roundIncrement := 0;
  2554. end
  2555. else begin
  2556. if ( roundingMode = float_round_down ) then
  2557. roundIncrement := 0;
  2558. end
  2559. end
  2560. end;
  2561. roundBits := zSig and $3FF;
  2562. if ( $7FD <= bits16(zExp) ) then
  2563. begin
  2564. if ( ( $7FD < zExp )
  2565. or ( ( zExp = $7FD )
  2566. and ( sbits64( zSig + roundIncrement ) < 0 ) )
  2567. ) then
  2568. begin
  2569. float_raise( float_flag_overflow or float_flag_inexact );
  2570. result := float64(qword(packFloat64( zSign, $7FF, 0 )) - ord( roundIncrement = 0 ));
  2571. exit;
  2572. end;
  2573. if ( zExp < 0 ) then
  2574. begin
  2575. isTiny := ord(
  2576. ( float_detect_tininess = float_tininess_before_rounding )
  2577. or ( zExp < -1 )
  2578. or ( (zSig + roundIncrement) < int64( $8000000000000000 ) ) );
  2579. shift64RightJamming( zSig, - zExp, zSig );
  2580. zExp := 0;
  2581. roundBits := zSig and $3FF;
  2582. if ( isTiny and roundBits )<>0 then
  2583. float_raise( float_flag_underflow );
  2584. end
  2585. end;
  2586. if ( roundBits<>0 ) then
  2587. softfloat_exception_flags := softfloat_exception_flags or float_flag_inexact;
  2588. zSig := ( zSig + roundIncrement ) shr 10;
  2589. zSig := zSig and not( ord( ( roundBits xor $200 ) = 0 ) and roundNearestEven );
  2590. if ( zSig = 0 ) then
  2591. zExp := 0;
  2592. result:=packFloat64( zSign, zExp, zSig );
  2593. end;
  2594. {*
  2595. -------------------------------------------------------------------------------
  2596. Takes an abstract floating-point value having sign `zSign', exponent `zExp',
  2597. and significand formed by the concatenation of `zSig0' and `zSig1', and
  2598. returns the proper double-precision floating-point value corresponding
  2599. to the abstract input. This routine is just like `roundAndPackFloat64'
  2600. except that the input significand has fewer bits and does not have to be
  2601. normalized. In all cases, `zExp' must be 1 less than the ``true'' floating-
  2602. point exponent.
  2603. -------------------------------------------------------------------------------
  2604. *}
  2605. Procedure
  2606. normalizeRoundAndPackFloat64(
  2607. zSign:flag; zExp:int16; zSig0:bits32; zSig1:bits32; VAR c: float64 );
  2608. Var
  2609. shiftCount : int8;
  2610. zSig2 : bits32;
  2611. Begin
  2612. if ( zSig0 = 0 ) then
  2613. Begin
  2614. zSig0 := zSig1;
  2615. zSig1 := 0;
  2616. zExp := zExp -32;
  2617. End;
  2618. shiftCount := countLeadingZeros32( zSig0 ) - 11;
  2619. if ( 0 <= shiftCount ) then
  2620. Begin
  2621. zSig2 := 0;
  2622. shortShift64Left( zSig0, zSig1, shiftCount, zSig0, zSig1 );
  2623. End
  2624. else
  2625. Begin
  2626. shift64ExtraRightJamming
  2627. (zSig0, zSig1, 0, - shiftCount, zSig0, zSig1, zSig2 );
  2628. End;
  2629. zExp := zExp - shiftCount;
  2630. roundAndPackFloat64( zSign, zExp, zSig0, zSig1, zSig2, c );
  2631. End;
  2632. {*
  2633. -------------------------------------------------------------------------------
  2634. Returns the result of converting the 32-bit two's complement integer `a' to
  2635. the single-precision floating-point format. The conversion is performed
  2636. according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
  2637. -------------------------------------------------------------------------------
  2638. *}
  2639. Function int32_to_float32( a: int32): float32rec; compilerproc;
  2640. Var
  2641. zSign : Flag;
  2642. Begin
  2643. if ( a = 0 ) then
  2644. Begin
  2645. int32_to_float32.float32 := 0;
  2646. exit;
  2647. End;
  2648. if ( a = sbits32 ($80000000) ) then
  2649. Begin
  2650. int32_to_float32.float32 := packFloat32( 1, $9E, 0 );
  2651. exit;
  2652. end;
  2653. zSign := flag( a < 0 );
  2654. If zSign<>0 then
  2655. a := -a;
  2656. int32_to_float32.float32:=
  2657. normalizeRoundAndPackFloat32( zSign, $9C, a );
  2658. End;
  2659. {*
  2660. -------------------------------------------------------------------------------
  2661. Returns the result of converting the 32-bit two's complement integer `a' to
  2662. the double-precision floating-point format. The conversion is performed
  2663. according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
  2664. -------------------------------------------------------------------------------
  2665. *}
  2666. Function int32_to_float64( a: int32) : float64;{$ifdef fpc} [public,Alias:'INT32_TO_FLOAT64'];compilerproc;{$endif}
  2667. var
  2668. zSign : flag;
  2669. absA : bits32;
  2670. shiftCount : int8;
  2671. zSig0, zSig1 : bits32;
  2672. Begin
  2673. if ( a = 0 ) then
  2674. Begin
  2675. packFloat64( 0, 0, 0, 0, result );
  2676. exit;
  2677. end;
  2678. zSign := flag( a < 0 );
  2679. if ZSign<>0 then
  2680. AbsA := -a
  2681. else
  2682. AbsA := a;
  2683. shiftCount := countLeadingZeros32( absA ) - 11;
  2684. if ( 0 <= shiftCount ) then
  2685. Begin
  2686. zSig0 := absA shl shiftCount;
  2687. zSig1 := 0;
  2688. End
  2689. else
  2690. Begin
  2691. shift64Right( absA, 0, - shiftCount, zSig0, zSig1 );
  2692. End;
  2693. packFloat64( zSign, $412 - shiftCount, zSig0, zSig1, result );
  2694. End;
  2695. {*
  2696. -------------------------------------------------------------------------------
  2697. Returns the result of converting the single-precision floating-point value
  2698. `a' to the 32-bit two's complement integer format. The conversion is
  2699. performed according to the IEC/IEEE Standard for Binary Floating-Point
  2700. Arithmetic---which means in particular that the conversion is rounded
  2701. according to the current rounding mode. If `a' is a NaN, the largest
  2702. positive integer is returned. Otherwise, if the conversion overflows, the
  2703. largest integer with the same sign as `a' is returned.
  2704. -------------------------------------------------------------------------------
  2705. *}
  2706. Function float32_to_int32( a : float32rec) : int32;compilerproc;
  2707. Var
  2708. aSign: flag;
  2709. aExp, shiftCount: int16;
  2710. aSig, aSigExtra: bits32;
  2711. z: int32;
  2712. roundingMode: int8;
  2713. Begin
  2714. aSig := extractFloat32Frac( a.float32 );
  2715. aExp := extractFloat32Exp( a.float32 );
  2716. aSign := extractFloat32Sign( a.float32 );
  2717. shiftCount := aExp - $96;
  2718. if ( 0 <= shiftCount ) then
  2719. Begin
  2720. if ( $9E <= aExp ) then
  2721. Begin
  2722. if ( a.float32 <> $CF000000 ) then
  2723. Begin
  2724. float_raise( float_flag_invalid );
  2725. if ( (aSign=0) or ( ( aExp = $FF ) and (aSig<>0) ) ) then
  2726. Begin
  2727. float32_to_int32 := $7FFFFFFF;
  2728. exit;
  2729. End;
  2730. End;
  2731. float32_to_int32 := sbits32 ($80000000);
  2732. exit;
  2733. End;
  2734. z := ( aSig or $00800000 ) shl shiftCount;
  2735. if ( aSign<>0 ) then z := - z;
  2736. End
  2737. else
  2738. Begin
  2739. if ( aExp < $7E ) then
  2740. Begin
  2741. aSigExtra := aExp OR aSig;
  2742. z := 0;
  2743. End
  2744. else
  2745. Begin
  2746. aSig := aSig OR $00800000;
  2747. aSigExtra := aSig shl ( shiftCount and 31 );
  2748. z := aSig shr ( - shiftCount );
  2749. End;
  2750. if ( aSigExtra<>0 ) then
  2751. softfloat_exception_flags := softfloat_exception_flags
  2752. or float_flag_inexact;
  2753. roundingMode := softfloat_rounding_mode;
  2754. if ( roundingMode = float_round_nearest_even ) then
  2755. Begin
  2756. if ( sbits32 (aSigExtra) < 0 ) then
  2757. Begin
  2758. Inc(z);
  2759. if ( bits32 ( aSigExtra shl 1 ) = 0 ) then
  2760. z := z and not 1;
  2761. End;
  2762. if ( aSign<>0 ) then
  2763. z := - z;
  2764. End
  2765. else
  2766. Begin
  2767. aSigExtra := flag( aSigExtra <> 0 );
  2768. if ( aSign<>0 ) then
  2769. Begin
  2770. z := z + (flag( roundingMode = float_round_down ) and aSigExtra);
  2771. z := - z;
  2772. End
  2773. else
  2774. Begin
  2775. z := z + (flag( roundingMode = float_round_up ) and aSigExtra);
  2776. End
  2777. End;
  2778. End;
  2779. float32_to_int32 := z;
  2780. End;
  2781. {*
  2782. -------------------------------------------------------------------------------
  2783. Returns the result of converting the single-precision floating-point value
  2784. `a' to the 32-bit two's complement integer format. The conversion is
  2785. performed according to the IEC/IEEE Standard for Binary Floating-Point
  2786. Arithmetic, except that the conversion is always rounded toward zero.
  2787. If `a' is a NaN, the largest positive integer is returned. Otherwise, if
  2788. the conversion overflows, the largest integer with the same sign as `a' is
  2789. returned.
  2790. -------------------------------------------------------------------------------
  2791. *}
  2792. Function float32_to_int32_round_to_zero( a: Float32rec ): int32;compilerproc;
  2793. Var
  2794. aSign : flag;
  2795. aExp, shiftCount : int16;
  2796. aSig : bits32;
  2797. z : int32;
  2798. Begin
  2799. aSig := extractFloat32Frac( a.float32 );
  2800. aExp := extractFloat32Exp( a.float32 );
  2801. aSign := extractFloat32Sign( a.float32 );
  2802. shiftCount := aExp - $9E;
  2803. if ( 0 <= shiftCount ) then
  2804. Begin
  2805. if ( a.float32 <> $CF000000 ) then
  2806. Begin
  2807. float_raise( float_flag_invalid );
  2808. if ( (aSign=0) or ( ( aExp = $FF ) and (aSig<>0) ) ) then
  2809. Begin
  2810. float32_to_int32_round_to_zero := $7FFFFFFF;
  2811. exit;
  2812. end;
  2813. End;
  2814. float32_to_int32_round_to_zero:= sbits32 ($80000000);
  2815. exit;
  2816. End
  2817. else
  2818. if ( aExp <= $7E ) then
  2819. Begin
  2820. if ( aExp or aSig )<>0 then
  2821. softfloat_exception_flags :=
  2822. softfloat_exception_flags or float_flag_inexact;
  2823. float32_to_int32_round_to_zero := 0;
  2824. exit;
  2825. End;
  2826. aSig := ( aSig or $00800000 ) shl 8;
  2827. z := aSig shr ( - shiftCount );
  2828. if ( bits32 ( aSig shl ( shiftCount and 31 ) )<> 0 ) then
  2829. Begin
  2830. softfloat_exception_flags :=
  2831. softfloat_exception_flags or float_flag_inexact;
  2832. End;
  2833. if ( aSign<>0 ) then z := - z;
  2834. float32_to_int32_round_to_zero := z;
  2835. End;
  2836. {*
  2837. -------------------------------------------------------------------------------
  2838. Returns the result of converting the single-precision floating-point value
  2839. `a' to the double-precision floating-point format. The conversion is
  2840. performed according to the IEC/IEEE Standard for Binary Floating-Point
  2841. Arithmetic.
  2842. -------------------------------------------------------------------------------
  2843. *}
  2844. Function float32_to_float64( a : float32rec) : Float64;compilerproc;
  2845. Var
  2846. aSign : flag;
  2847. aExp : int16;
  2848. aSig, zSig0, zSig1: bits32;
  2849. tmp : CommonNanT;
  2850. Begin
  2851. aSig := extractFloat32Frac( a.float32 );
  2852. aExp := extractFloat32Exp( a.float32 );
  2853. aSign := extractFloat32Sign( a.float32 );
  2854. if ( aExp = $FF ) then
  2855. Begin
  2856. if ( aSig<>0 ) then
  2857. Begin
  2858. float32ToCommonNaN(a.float32, tmp);
  2859. commonNaNToFloat64(tmp , result);
  2860. exit;
  2861. End;
  2862. packFloat64( aSign, $7FF, 0, 0, result);
  2863. exit;
  2864. End;
  2865. if ( aExp = 0 ) then
  2866. Begin
  2867. if ( aSig = 0 ) then
  2868. Begin
  2869. packFloat64( aSign, 0, 0, 0, result );
  2870. exit;
  2871. end;
  2872. normalizeFloat32Subnormal( aSig, aExp, aSig );
  2873. Dec(aExp);
  2874. End;
  2875. shift64Right( aSig, 0, 3, zSig0, zSig1 );
  2876. packFloat64( aSign, aExp + $380, zSig0, zSig1, result );
  2877. End;
  2878. {*
  2879. -------------------------------------------------------------------------------
  2880. Rounds the single-precision floating-point value `a' to an integer,
  2881. and returns the result as a single-precision floating-point value. The
  2882. operation is performed according to the IEC/IEEE Standard for Binary
  2883. Floating-Point Arithmetic.
  2884. -------------------------------------------------------------------------------
  2885. *}
  2886. Function float32_round_to_int( a: float32rec): float32rec;compilerproc;
  2887. Var
  2888. aSign: flag;
  2889. aExp: int16;
  2890. lastBitMask, roundBitsMask: bits32;
  2891. roundingMode: int8;
  2892. z: float32;
  2893. Begin
  2894. aExp := extractFloat32Exp( a.float32 );
  2895. if ( $96 <= aExp ) then
  2896. Begin
  2897. if ( ( aExp = $FF ) and (extractFloat32Frac( a.float32 )<>0) ) then
  2898. Begin
  2899. float32_round_to_int.float32 := propagateFloat32NaN( a.float32, a.float32 );
  2900. exit;
  2901. End;
  2902. float32_round_to_int:=a;
  2903. exit;
  2904. End;
  2905. if ( aExp <= $7E ) then
  2906. Begin
  2907. if ( bits32 ( a.float32 shl 1 ) = 0 ) then
  2908. Begin
  2909. float32_round_to_int:=a;
  2910. exit;
  2911. end;
  2912. softfloat_exception_flags
  2913. := softfloat_exception_flags OR float_flag_inexact;
  2914. aSign := extractFloat32Sign( a.float32 );
  2915. case ( softfloat_rounding_mode ) of
  2916. float_round_nearest_even:
  2917. Begin
  2918. if ( ( aExp = $7E ) and (extractFloat32Frac( a.float32 )<>0) ) then
  2919. Begin
  2920. float32_round_to_int.float32 := packFloat32( aSign, $7F, 0 );
  2921. exit;
  2922. End;
  2923. End;
  2924. float_round_down:
  2925. Begin
  2926. if aSign <> 0 then
  2927. float32_round_to_int.float32 := $BF800000
  2928. else
  2929. float32_round_to_int.float32 := 0;
  2930. exit;
  2931. End;
  2932. float_round_up:
  2933. Begin
  2934. if aSign <> 0 then
  2935. float32_round_to_int.float32 := $80000000
  2936. else
  2937. float32_round_to_int.float32 := $3F800000;
  2938. exit;
  2939. End;
  2940. end;
  2941. float32_round_to_int.float32 := packFloat32( aSign, 0, 0 );
  2942. End;
  2943. lastBitMask := 1;
  2944. {_____________________________!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!}
  2945. lastBitMask := lastBitMask shl ($96 - aExp);
  2946. roundBitsMask := lastBitMask - 1;
  2947. z := a.float32;
  2948. roundingMode := softfloat_rounding_mode;
  2949. if ( roundingMode = float_round_nearest_even ) then
  2950. Begin
  2951. z := z + (lastBitMask shr 1);
  2952. if ( ( z and roundBitsMask ) = 0 ) then
  2953. z := z and not lastBitMask;
  2954. End
  2955. else if ( roundingMode <> float_round_to_zero ) then
  2956. Begin
  2957. if ( (extractFloat32Sign( z ) xor flag(roundingMode = float_round_up ))<>0 ) then
  2958. Begin
  2959. z := z + roundBitsMask;
  2960. End;
  2961. End;
  2962. z := z and not roundBitsMask;
  2963. if ( z <> a.float32 ) then
  2964. softfloat_exception_flags := softfloat_exception_flags or float_flag_inexact;
  2965. float32_round_to_int.float32 := z;
  2966. End;
  2967. {*
  2968. -------------------------------------------------------------------------------
  2969. Returns the result of adding the absolute values of the single-precision
  2970. floating-point values `a' and `b'. If `zSign' is 1, the sum is negated
  2971. before being returned. `zSign' is ignored if the result is a NaN.
  2972. The addition is performed according to the IEC/IEEE Standard for Binary
  2973. Floating-Point Arithmetic.
  2974. -------------------------------------------------------------------------------
  2975. *}
  2976. Function addFloat32Sigs( a:float32; b: float32; zSign:flag ): float32;
  2977. Var
  2978. aExp, bExp, zExp: int16;
  2979. aSig, bSig, zSig: bits32;
  2980. expDiff: int16;
  2981. label roundAndPack;
  2982. Begin
  2983. aSig:=extractFloat32Frac( a );
  2984. aExp:=extractFloat32Exp( a );
  2985. bSig:=extractFloat32Frac( b );
  2986. bExp := extractFloat32Exp( b );
  2987. expDiff := aExp - bExp;
  2988. aSig := aSig shl 6;
  2989. bSig := bSig shl 6;
  2990. if ( 0 < expDiff ) then
  2991. Begin
  2992. if ( aExp = $FF ) then
  2993. Begin
  2994. if ( aSig <> 0) then
  2995. Begin
  2996. addFloat32Sigs := propagateFloat32NaN( a, b );
  2997. exit;
  2998. End;
  2999. addFloat32Sigs := a;
  3000. exit;
  3001. End;
  3002. if ( bExp = 0 ) then
  3003. Begin
  3004. Dec(expDiff);
  3005. End
  3006. else
  3007. Begin
  3008. bSig := bSig or $20000000;
  3009. End;
  3010. shift32RightJamming( bSig, expDiff, bSig );
  3011. zExp := aExp;
  3012. End
  3013. else
  3014. If ( expDiff < 0 ) then
  3015. Begin
  3016. if ( bExp = $FF ) then
  3017. Begin
  3018. if ( bSig<>0 ) then
  3019. Begin
  3020. addFloat32Sigs := propagateFloat32NaN( a, b );
  3021. exit;
  3022. end;
  3023. addFloat32Sigs := packFloat32( zSign, $FF, 0 );
  3024. exit;
  3025. End;
  3026. if ( aExp = 0 ) then
  3027. Begin
  3028. Inc(expDiff);
  3029. End
  3030. else
  3031. Begin
  3032. aSig := aSig OR $20000000;
  3033. End;
  3034. shift32RightJamming( aSig, - expDiff, aSig );
  3035. zExp := bExp;
  3036. End
  3037. else
  3038. Begin
  3039. if ( aExp = $FF ) then
  3040. Begin
  3041. if ( aSig OR bSig )<> 0 then
  3042. Begin
  3043. addFloat32Sigs := propagateFloat32NaN( a, b );
  3044. exit;
  3045. end;
  3046. addFloat32Sigs := a;
  3047. exit;
  3048. End;
  3049. if ( aExp = 0 ) then
  3050. Begin
  3051. addFloat32Sigs := packFloat32( zSign, 0, ( aSig + bSig ) shr 6 );
  3052. exit;
  3053. end;
  3054. zSig := $40000000 + aSig + bSig;
  3055. zExp := aExp;
  3056. goto roundAndPack;
  3057. End;
  3058. aSig := aSig OR $20000000;
  3059. zSig := ( aSig + bSig ) shl 1;
  3060. Dec(zExp);
  3061. if ( sbits32 (zSig) < 0 ) then
  3062. Begin
  3063. zSig := aSig + bSig;
  3064. Inc(zExp);
  3065. End;
  3066. roundAndPack:
  3067. addFloat32Sigs := roundAndPackFloat32( zSign, zExp, zSig );
  3068. End;
  3069. {*
  3070. -------------------------------------------------------------------------------
  3071. Returns the result of subtracting the absolute values of the single-
  3072. precision floating-point values `a' and `b'. If `zSign' is 1, the
  3073. difference is negated before being returned. `zSign' is ignored if the
  3074. result is a NaN. The subtraction is performed according to the IEC/IEEE
  3075. Standard for Binary Floating-Point Arithmetic.
  3076. -------------------------------------------------------------------------------
  3077. *}
  3078. Function subFloat32Sigs( a:float32; b:float32; zSign:flag ): float32;
  3079. Var
  3080. aExp, bExp, zExp: int16;
  3081. aSig, bSig, zSig: bits32;
  3082. expDiff : int16;
  3083. label aExpBigger;
  3084. label bExpBigger;
  3085. label aBigger;
  3086. label bBigger;
  3087. label normalizeRoundAndPack;
  3088. Begin
  3089. aSig := extractFloat32Frac( a );
  3090. aExp := extractFloat32Exp( a );
  3091. bSig := extractFloat32Frac( b );
  3092. bExp := extractFloat32Exp( b );
  3093. expDiff := aExp - bExp;
  3094. aSig := aSig shl 7;
  3095. bSig := bSig shl 7;
  3096. if ( 0 < expDiff ) then goto aExpBigger;
  3097. if ( expDiff < 0 ) then goto bExpBigger;
  3098. if ( aExp = $FF ) then
  3099. Begin
  3100. if ( aSig OR bSig )<> 0 then
  3101. Begin
  3102. subFloat32Sigs := propagateFloat32NaN( a, b );
  3103. exit;
  3104. End;
  3105. float_raise( float_flag_invalid );
  3106. subFloat32Sigs := float32_default_nan;
  3107. exit;
  3108. End;
  3109. if ( aExp = 0 ) then
  3110. Begin
  3111. aExp := 1;
  3112. bExp := 1;
  3113. End;
  3114. if ( bSig < aSig ) Then goto aBigger;
  3115. if ( aSig < bSig ) Then goto bBigger;
  3116. subFloat32Sigs := packFloat32( flag(softfloat_rounding_mode = float_round_down), 0, 0 );
  3117. exit;
  3118. bExpBigger:
  3119. if ( bExp = $FF ) then
  3120. Begin
  3121. if ( bSig<>0 ) then
  3122. Begin
  3123. subFloat32Sigs := propagateFloat32NaN( a, b );
  3124. exit;
  3125. End;
  3126. subFloat32Sigs := packFloat32( zSign XOR 1, $FF, 0 );
  3127. exit;
  3128. End;
  3129. if ( aExp = 0 ) then
  3130. Begin
  3131. Inc(expDiff);
  3132. End
  3133. else
  3134. Begin
  3135. aSig := aSig OR $40000000;
  3136. End;
  3137. shift32RightJamming( aSig, - expDiff, aSig );
  3138. bSig := bSig OR $40000000;
  3139. bBigger:
  3140. zSig := bSig - aSig;
  3141. zExp := bExp;
  3142. zSign := zSign xor 1;
  3143. goto normalizeRoundAndPack;
  3144. aExpBigger:
  3145. if ( aExp = $FF ) then
  3146. Begin
  3147. if ( aSig <> 0) then
  3148. Begin
  3149. subFloat32Sigs := propagateFloat32NaN( a, b );
  3150. exit;
  3151. End;
  3152. subFloat32Sigs := a;
  3153. exit;
  3154. End;
  3155. if ( bExp = 0 ) then
  3156. Begin
  3157. Dec(expDiff);
  3158. End
  3159. else
  3160. Begin
  3161. bSig := bSig OR $40000000;
  3162. End;
  3163. shift32RightJamming( bSig, expDiff, bSig );
  3164. aSig := aSig OR $40000000;
  3165. aBigger:
  3166. zSig := aSig - bSig;
  3167. zExp := aExp;
  3168. normalizeRoundAndPack:
  3169. Dec(zExp);
  3170. subFloat32Sigs := normalizeRoundAndPackFloat32( zSign, zExp, zSig );
  3171. End;
  3172. {*
  3173. -------------------------------------------------------------------------------
  3174. Returns the result of adding the single-precision floating-point values `a'
  3175. and `b'. The operation is performed according to the IEC/IEEE Standard for
  3176. Binary Floating-Point Arithmetic.
  3177. -------------------------------------------------------------------------------
  3178. *}
  3179. Function float32_add( a: float32rec; b:float32rec ): float32rec; compilerproc;
  3180. Var
  3181. aSign, bSign: Flag;
  3182. Begin
  3183. aSign := extractFloat32Sign( a.float32 );
  3184. bSign := extractFloat32Sign( b.float32 );
  3185. if ( aSign = bSign ) then
  3186. Begin
  3187. float32_add.float32 := addFloat32Sigs( a.float32, b.float32, aSign );
  3188. End
  3189. else
  3190. Begin
  3191. float32_add.float32 := subFloat32Sigs( a.float32, b.float32, aSign );
  3192. End;
  3193. End;
  3194. {*
  3195. -------------------------------------------------------------------------------
  3196. Returns the result of subtracting the single-precision floating-point values
  3197. `a' and `b'. The operation is performed according to the IEC/IEEE Standard
  3198. for Binary Floating-Point Arithmetic.
  3199. -------------------------------------------------------------------------------
  3200. *}
  3201. Function float32_sub( a: float32rec ; b:float32rec ): float32rec;compilerproc;
  3202. Var
  3203. aSign, bSign: flag;
  3204. Begin
  3205. aSign := extractFloat32Sign( a.float32 );
  3206. bSign := extractFloat32Sign( b.float32 );
  3207. if ( aSign = bSign ) then
  3208. Begin
  3209. float32_sub.float32 := subFloat32Sigs( a.float32, b.float32, aSign );
  3210. End
  3211. else
  3212. Begin
  3213. float32_sub.float32 := addFloat32Sigs( a.float32, b.float32, aSign );
  3214. End;
  3215. End;
  3216. {*
  3217. -------------------------------------------------------------------------------
  3218. Returns the result of multiplying the single-precision floating-point values
  3219. `a' and `b'. The operation is performed according to the IEC/IEEE Standard
  3220. for Binary Floating-Point Arithmetic.
  3221. -------------------------------------------------------------------------------
  3222. *}
  3223. Function float32_mul(a: float32rec; b: float32rec ) : float32rec; compilerproc;
  3224. Var
  3225. aSign, bSign, zSign: flag;
  3226. aExp, bExp, zExp : int16;
  3227. aSig, bSig, zSig0, zSig1: bits32;
  3228. Begin
  3229. aSig := extractFloat32Frac( a.float32 );
  3230. aExp := extractFloat32Exp( a.float32 );
  3231. aSign := extractFloat32Sign( a.float32 );
  3232. bSig := extractFloat32Frac( b.float32 );
  3233. bExp := extractFloat32Exp( b.float32 );
  3234. bSign := extractFloat32Sign( b.float32 );
  3235. zSign := aSign xor bSign;
  3236. if ( aExp = $FF ) then
  3237. Begin
  3238. if ( (aSig<>0) OR ( ( bExp = $FF ) AND (bSig<>0) ) ) then
  3239. Begin
  3240. float32_mul.float32 := propagateFloat32NaN( a.float32, b.float32 );
  3241. End;
  3242. if ( ( bExp OR bSig ) = 0 ) then
  3243. Begin
  3244. float_raise( float_flag_invalid );
  3245. float32_mul.float32 := float32_default_nan;
  3246. exit;
  3247. End;
  3248. float32_mul.float32 := packFloat32( zSign, $FF, 0 );
  3249. exit;
  3250. End;
  3251. if ( bExp = $FF ) then
  3252. Begin
  3253. if ( bSig <> 0 ) then
  3254. Begin
  3255. float32_mul.float32 := propagateFloat32NaN( a.float32, b.float32 );
  3256. exit;
  3257. End;
  3258. if ( ( aExp OR aSig ) = 0 ) then
  3259. Begin
  3260. float_raise( float_flag_invalid );
  3261. float32_mul.float32 := float32_default_nan;
  3262. exit;
  3263. End;
  3264. float32_mul.float32 := packFloat32( zSign, $FF, 0 );
  3265. exit;
  3266. End;
  3267. if ( aExp = 0 ) then
  3268. Begin
  3269. if ( aSig = 0 ) then
  3270. Begin
  3271. float32_mul.float32 := packFloat32( zSign, 0, 0 );
  3272. exit;
  3273. End;
  3274. normalizeFloat32Subnormal( aSig, aExp, aSig );
  3275. End;
  3276. if ( bExp = 0 ) then
  3277. Begin
  3278. if ( bSig = 0 ) then
  3279. Begin
  3280. float32_mul.float32 := packFloat32( zSign, 0, 0 );
  3281. exit;
  3282. End;
  3283. normalizeFloat32Subnormal( bSig, bExp, bSig );
  3284. End;
  3285. zExp := aExp + bExp - $7F;
  3286. aSig := ( aSig OR $00800000 ) shl 7;
  3287. bSig := ( bSig OR $00800000 ) shl 8;
  3288. mul32To64( aSig, bSig, zSig0, zSig1 );
  3289. zSig0 := zSig0 OR bits32( zSig1 <> 0 );
  3290. if ( 0 <= sbits32 ( zSig0 shl 1 ) ) then
  3291. Begin
  3292. zSig0 := zSig0 shl 1;
  3293. Dec(zExp);
  3294. End;
  3295. float32_mul.float32 := roundAndPackFloat32( zSign, zExp, zSig0 );
  3296. End;
  3297. {*
  3298. -------------------------------------------------------------------------------
  3299. Returns the result of dividing the single-precision floating-point value `a'
  3300. by the corresponding value `b'. The operation is performed according to the
  3301. IEC/IEEE Standard for Binary Floating-Point Arithmetic.
  3302. -------------------------------------------------------------------------------
  3303. *}
  3304. Function float32_div(a: float32rec;b: float32rec ): float32rec; compilerproc;
  3305. Var
  3306. aSign, bSign, zSign: flag;
  3307. aExp, bExp, zExp: int16;
  3308. aSig, bSig, zSig, rem0, rem1, term0, term1: bits32;
  3309. Begin
  3310. aSig := extractFloat32Frac( a.float32 );
  3311. aExp := extractFloat32Exp( a.float32 );
  3312. aSign := extractFloat32Sign( a.float32 );
  3313. bSig := extractFloat32Frac( b.float32 );
  3314. bExp := extractFloat32Exp( b.float32 );
  3315. bSign := extractFloat32Sign( b.float32 );
  3316. zSign := aSign xor bSign;
  3317. if ( aExp = $FF ) then
  3318. Begin
  3319. if ( aSig <> 0 ) then
  3320. Begin
  3321. float32_div.float32 := propagateFloat32NaN( a.float32, b.float32 );
  3322. exit;
  3323. End;
  3324. if ( bExp = $FF ) then
  3325. Begin
  3326. if ( bSig <> 0) then
  3327. Begin
  3328. float32_div.float32 := propagateFloat32NaN( a.float32, b.float32 );
  3329. End;
  3330. float_raise( float_flag_invalid );
  3331. float32_div.float32 := float32_default_nan;
  3332. exit;
  3333. End;
  3334. float32_div.float32 := packFloat32( zSign, $FF, 0 );
  3335. exit;
  3336. End;
  3337. if ( bExp = $FF ) then
  3338. Begin
  3339. if ( bSig <> 0) then
  3340. Begin
  3341. float32_div.float32 := propagateFloat32NaN( a.float32, b.float32 );
  3342. exit;
  3343. End;
  3344. float32_div.float32 := packFloat32( zSign, 0, 0 );
  3345. exit;
  3346. End;
  3347. if ( bExp = 0 ) Then
  3348. Begin
  3349. if ( bSig = 0 ) Then
  3350. Begin
  3351. if ( ( aExp OR aSig ) = 0 ) then
  3352. Begin
  3353. float_raise( float_flag_invalid );
  3354. float32_div.float32 := float32_default_nan;
  3355. exit;
  3356. End;
  3357. float_raise( float_flag_divbyzero );
  3358. float32_div.float32 := packFloat32( zSign, $FF, 0 );
  3359. exit;
  3360. End;
  3361. normalizeFloat32Subnormal( bSig, bExp, bSig );
  3362. End;
  3363. if ( aExp = 0 ) Then
  3364. Begin
  3365. if ( aSig = 0 ) Then
  3366. Begin
  3367. float32_div.float32 := packFloat32( zSign, 0, 0 );
  3368. exit;
  3369. End;
  3370. normalizeFloat32Subnormal( aSig, aExp, aSig );
  3371. End;
  3372. zExp := aExp - bExp + $7D;
  3373. aSig := ( aSig OR $00800000 ) shl 7;
  3374. bSig := ( bSig OR $00800000 ) shl 8;
  3375. if ( bSig <= ( aSig + aSig ) ) then
  3376. Begin
  3377. aSig := aSig shr 1;
  3378. Inc(zExp);
  3379. End;
  3380. zSig := estimateDiv64To32( aSig, 0, bSig );
  3381. if ( ( zSig and $3F ) <= 2 ) then
  3382. Begin
  3383. mul32To64( bSig, zSig, term0, term1 );
  3384. sub64( aSig, 0, term0, term1, rem0, rem1 );
  3385. while ( sbits32 (rem0) < 0 ) do
  3386. Begin
  3387. Dec(zSig);
  3388. add64( rem0, rem1, 0, bSig, rem0, rem1 );
  3389. End;
  3390. zSig := zSig or bits32( rem1 <> 0 );
  3391. End;
  3392. float32_div.float32 := roundAndPackFloat32( zSign, zExp, zSig );
  3393. End;
  3394. {*
  3395. -------------------------------------------------------------------------------
  3396. Returns the remainder of the single-precision floating-point value `a'
  3397. with respect to the corresponding value `b'. The operation is performed
  3398. according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
  3399. -------------------------------------------------------------------------------
  3400. *}
  3401. Function float32_rem(a: float32rec; b: float32rec ):float32rec; compilerproc;
  3402. Var
  3403. aSign, bSign, zSign: flag;
  3404. aExp, bExp, expDiff: int16;
  3405. aSig, bSig, q, allZero, alternateASig: bits32;
  3406. sigMean: sbits32;
  3407. Begin
  3408. aSig := extractFloat32Frac( a.float32 );
  3409. aExp := extractFloat32Exp( a.float32 );
  3410. aSign := extractFloat32Sign( a.float32 );
  3411. bSig := extractFloat32Frac( b.float32 );
  3412. bExp := extractFloat32Exp( b.float32 );
  3413. bSign := extractFloat32Sign( b.float32 );
  3414. if ( aExp = $FF ) then
  3415. Begin
  3416. if ( (aSig<>0) OR ( ( bExp = $FF ) AND (bSig <>0)) ) then
  3417. Begin
  3418. float32_rem.float32 := propagateFloat32NaN( a.float32, b.float32 );
  3419. exit;
  3420. End;
  3421. float_raise( float_flag_invalid );
  3422. float32_rem.float32 := float32_default_nan;
  3423. exit;
  3424. End;
  3425. if ( bExp = $FF ) then
  3426. Begin
  3427. if ( bSig <> 0 ) then
  3428. Begin
  3429. float32_rem.float32 := propagateFloat32NaN( a.float32, b.float32 );
  3430. exit;
  3431. End;
  3432. float32_rem := a;
  3433. exit;
  3434. End;
  3435. if ( bExp = 0 ) then
  3436. Begin
  3437. if ( bSig = 0 ) then
  3438. Begin
  3439. float_raise( float_flag_invalid );
  3440. float32_rem.float32 := float32_default_nan;
  3441. exit;
  3442. End;
  3443. normalizeFloat32Subnormal( bSig, bExp, bSig );
  3444. End;
  3445. if ( aExp = 0 ) then
  3446. Begin
  3447. if ( aSig = 0 ) then
  3448. Begin
  3449. float32_rem := a;
  3450. exit;
  3451. End;
  3452. normalizeFloat32Subnormal( aSig, aExp, aSig );
  3453. End;
  3454. expDiff := aExp - bExp;
  3455. aSig := ( aSig OR $00800000 ) shl 8;
  3456. bSig := ( bSig OR $00800000 ) shl 8;
  3457. if ( expDiff < 0 ) then
  3458. Begin
  3459. if ( expDiff < -1 ) then
  3460. Begin
  3461. float32_rem := a;
  3462. exit;
  3463. End;
  3464. aSig := aSig shr 1;
  3465. End;
  3466. q := bits32( bSig <= aSig );
  3467. if ( q <> 0) then
  3468. aSig := aSig - bSig;
  3469. expDiff := expDiff - 32;
  3470. while ( 0 < expDiff ) do
  3471. Begin
  3472. q := estimateDiv64To32( aSig, 0, bSig );
  3473. if (2 < q) then
  3474. q := q - 2
  3475. else
  3476. q := 0;
  3477. aSig := - ( ( bSig shr 2 ) * q );
  3478. expDiff := expDiff - 30;
  3479. End;
  3480. expDiff := expDiff + 32;
  3481. if ( 0 < expDiff ) then
  3482. Begin
  3483. q := estimateDiv64To32( aSig, 0, bSig );
  3484. if (2 < q) then
  3485. q := q - 2
  3486. else
  3487. q := 0;
  3488. q := q shr (32 - expDiff);
  3489. bSig := bSig shr 2;
  3490. aSig := ( ( aSig shr 1 ) shl ( expDiff - 1 ) ) - bSig * q;
  3491. End
  3492. else
  3493. Begin
  3494. aSig := aSig shr 2;
  3495. bSig := bSig shr 2;
  3496. End;
  3497. Repeat
  3498. alternateASig := aSig;
  3499. Inc(q);
  3500. aSig := aSig - bSig;
  3501. Until not ( 0 <= sbits32 (aSig) );
  3502. sigMean := aSig + alternateASig;
  3503. if ( ( sigMean < 0 ) OR ( ( sigMean = 0 ) AND (( q and 1 )<>0) ) ) then
  3504. Begin
  3505. aSig := alternateASig;
  3506. End;
  3507. zSign := flag( sbits32 (aSig) < 0 );
  3508. if ( zSign<>0 ) then
  3509. aSig := - aSig;
  3510. float32_rem.float32 := normalizeRoundAndPackFloat32( aSign xor zSign, bExp, aSig );
  3511. End;
  3512. {*
  3513. -------------------------------------------------------------------------------
  3514. Returns the square root of the single-precision floating-point value `a'.
  3515. The operation is performed according to the IEC/IEEE Standard for Binary
  3516. Floating-Point Arithmetic.
  3517. -------------------------------------------------------------------------------
  3518. *}
  3519. Function float32_sqrt(a: float32rec ): float32rec;compilerproc;
  3520. Var
  3521. aSign : flag;
  3522. aExp, zExp : int16;
  3523. aSig, zSig, rem0, rem1, term0, term1: bits32;
  3524. label roundAndPack;
  3525. Begin
  3526. aSig := extractFloat32Frac( a.float32 );
  3527. aExp := extractFloat32Exp( a.float32 );
  3528. aSign := extractFloat32Sign( a.float32 );
  3529. if ( aExp = $FF ) then
  3530. Begin
  3531. if ( aSig <> 0) then
  3532. Begin
  3533. float32_sqrt.float32 := propagateFloat32NaN( a.float32, 0 );
  3534. exit;
  3535. End;
  3536. if ( aSign = 0) then
  3537. Begin
  3538. float32_sqrt := a;
  3539. exit;
  3540. End;
  3541. float_raise( float_flag_invalid );
  3542. float32_sqrt.float32 := float32_default_nan;
  3543. exit;
  3544. End;
  3545. if ( aSign <> 0) then
  3546. Begin
  3547. if ( ( aExp OR aSig ) = 0 ) then
  3548. Begin
  3549. float32_sqrt := a;
  3550. exit;
  3551. End;
  3552. float_raise( float_flag_invalid );
  3553. float32_sqrt.float32 := float32_default_nan;
  3554. exit;
  3555. End;
  3556. if ( aExp = 0 ) then
  3557. Begin
  3558. if ( aSig = 0 ) then
  3559. Begin
  3560. float32_sqrt.float32 := 0;
  3561. exit;
  3562. End;
  3563. normalizeFloat32Subnormal( aSig, aExp, aSig );
  3564. End;
  3565. zExp := ( ( aExp - $7F ) shr 1 ) + $7E;
  3566. aSig := ( aSig OR $00800000 ) shl 8;
  3567. zSig := estimateSqrt32( aExp, aSig ) + 2;
  3568. if ( ( zSig and $7F ) <= 5 ) then
  3569. Begin
  3570. if ( zSig < 2 ) then
  3571. Begin
  3572. zSig := $7FFFFFFF;
  3573. goto roundAndPack;
  3574. End
  3575. else
  3576. Begin
  3577. aSig := aSig shr (aExp and 1);
  3578. mul32To64( zSig, zSig, term0, term1 );
  3579. sub64( aSig, 0, term0, term1, rem0, rem1 );
  3580. while ( sbits32 (rem0) < 0 ) do
  3581. Begin
  3582. Dec(zSig);
  3583. shortShift64Left( 0, zSig, 1, term0, term1 );
  3584. term1 := term1 or 1;
  3585. add64( rem0, rem1, term0, term1, rem0, rem1 );
  3586. End;
  3587. zSig := zSig OR bits32( ( rem0 OR rem1 ) <> 0 );
  3588. End;
  3589. End;
  3590. shift32RightJamming( zSig, 1, zSig );
  3591. roundAndPack:
  3592. float32_sqrt.float32 := roundAndPackFloat32( 0, zExp, zSig );
  3593. End;
  3594. {*
  3595. -------------------------------------------------------------------------------
  3596. Returns 1 if the single-precision floating-point value `a' is equal to
  3597. the corresponding value `b', and 0 otherwise. The comparison is performed
  3598. according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
  3599. -------------------------------------------------------------------------------
  3600. *}
  3601. Function float32_eq( a:float32rec; b:float32rec): flag; compilerproc;
  3602. Begin
  3603. if ((( extractFloat32Exp( a.float32 ) = $FF ) AND (extractFloat32Frac( a.float32 )<>0))
  3604. OR ( ( extractFloat32Exp( b.float32 ) = $FF ) AND (extractFloat32Frac( b.float32 )<>0) )
  3605. ) then
  3606. Begin
  3607. if ( (float32_is_signaling_nan( a.float32 )<>0) OR (float32_is_signaling_nan( b.float32 )<>0) ) then
  3608. Begin
  3609. float_raise( float_flag_invalid );
  3610. End;
  3611. float32_eq := 0;
  3612. exit;
  3613. End;
  3614. float32_eq := flag( a.float32 = b.float32 ) OR flag( bits32 ( ( a.float32 OR b.float32 ) shl 1 ) = 0 );
  3615. End;
  3616. {*
  3617. -------------------------------------------------------------------------------
  3618. Returns 1 if the single-precision floating-point value `a' is less than
  3619. or equal to the corresponding value `b', and 0 otherwise. The comparison
  3620. is performed according to the IEC/IEEE Standard for Binary Floating-Point
  3621. Arithmetic.
  3622. -------------------------------------------------------------------------------
  3623. *}
  3624. Function float32_le( a: float32rec; b : float32rec ):flag;compilerproc;
  3625. var
  3626. aSign, bSign: flag;
  3627. Begin
  3628. if ( ( ( extractFloat32Exp( a.float32 ) = $FF ) AND (extractFloat32Frac( a.float32 )<>0) )
  3629. OR ( ( extractFloat32Exp( b.float32 ) = $FF ) AND (extractFloat32Frac( b.float32 )<>0) )
  3630. ) then
  3631. Begin
  3632. float_raise( float_flag_invalid );
  3633. float32_le := 0;
  3634. exit;
  3635. End;
  3636. aSign := extractFloat32Sign( a.float32 );
  3637. bSign := extractFloat32Sign( b.float32 );
  3638. if ( aSign <> bSign ) then
  3639. Begin
  3640. float32_le := aSign OR flag( bits32 ( ( a.float32 OR b.float32 ) shl 1 ) = 0 );
  3641. exit;
  3642. End;
  3643. float32_le := flag(flag( a.float32 = b.float32 ) OR flag( aSign xor flag( a.float32 < b.float32 ) ));
  3644. End;
  3645. {*
  3646. -------------------------------------------------------------------------------
  3647. Returns 1 if the single-precision floating-point value `a' is less than
  3648. the corresponding value `b', and 0 otherwise. The comparison is performed
  3649. according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
  3650. -------------------------------------------------------------------------------
  3651. *}
  3652. Function float32_lt( a:float32rec ; b : float32rec): flag; compilerproc;
  3653. var
  3654. aSign, bSign: flag;
  3655. Begin
  3656. if ( ( ( extractFloat32Exp( a.float32 ) = $FF ) AND (extractFloat32Frac( a.float32 ) <>0))
  3657. OR ( ( extractFloat32Exp( b.float32 ) = $FF ) AND (extractFloat32Frac( b.float32 ) <>0) )
  3658. ) then
  3659. Begin
  3660. float_raise( float_flag_invalid );
  3661. float32_lt :=0;
  3662. exit;
  3663. End;
  3664. aSign := extractFloat32Sign( a.float32 );
  3665. bSign := extractFloat32Sign( b.float32 );
  3666. if ( aSign <> bSign ) then
  3667. Begin
  3668. float32_lt := aSign AND flag( bits32 ( ( a.float32 OR b.float32 ) shl 1 ) <> 0 );
  3669. exit;
  3670. End;
  3671. float32_lt := flag(flag( a.float32 <> b.float32 ) AND flag( aSign xor flag( a.float32 < b.float32 ) ));
  3672. End;
  3673. {*
  3674. -------------------------------------------------------------------------------
  3675. Returns 1 if the single-precision floating-point value `a' is equal to
  3676. the corresponding value `b', and 0 otherwise. The invalid exception is
  3677. raised if either operand is a NaN. Otherwise, the comparison is performed
  3678. according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
  3679. -------------------------------------------------------------------------------
  3680. *}
  3681. Function float32_eq_signaling( a: float32; b: float32) : flag;
  3682. Begin
  3683. if ( ( ( extractFloat32Exp( a ) = $FF ) AND (extractFloat32Frac( a ) <> 0))
  3684. OR ( ( extractFloat32Exp( b ) = $FF ) AND (extractFloat32Frac( b ) <> 0))
  3685. ) then
  3686. Begin
  3687. float_raise( float_flag_invalid );
  3688. float32_eq_signaling := 0;
  3689. exit;
  3690. End;
  3691. float32_eq_signaling := (flag( a = b ) OR flag( bits32 ( ( a OR b ) shl 1 ) = 0 ));
  3692. End;
  3693. {*
  3694. -------------------------------------------------------------------------------
  3695. Returns 1 if the single-precision floating-point value `a' is less than or
  3696. equal to the corresponding value `b', and 0 otherwise. Quiet NaNs do not
  3697. cause an exception. Otherwise, the comparison is performed according to the
  3698. IEC/IEEE Standard for Binary Floating-Point Arithmetic.
  3699. -------------------------------------------------------------------------------
  3700. *}
  3701. Function float32_le_quiet( a: float32 ; b : float32 ): flag;
  3702. Var
  3703. aSign, bSign: flag;
  3704. aExp, bExp: int16;
  3705. Begin
  3706. if ( ( ( extractFloat32Exp( a ) = $FF ) AND (extractFloat32Frac( a )<>0) )
  3707. OR ( ( extractFloat32Exp( b ) = $FF ) AND (extractFloat32Frac( b )<>0) )
  3708. ) then
  3709. Begin
  3710. if ( (float32_is_signaling_nan( a )<>0) OR (float32_is_signaling_nan( b )<>0) ) then
  3711. Begin
  3712. float_raise( float_flag_invalid );
  3713. End;
  3714. float32_le_quiet := 0;
  3715. exit;
  3716. End;
  3717. aSign := extractFloat32Sign( a );
  3718. bSign := extractFloat32Sign( b );
  3719. if ( aSign <> bSign ) then
  3720. Begin
  3721. float32_le_quiet := aSign OR flag( bits32 ( ( a OR b ) shl 1 ) = 0 );
  3722. exit;
  3723. End;
  3724. float32_le_quiet := flag(flag( a = b ) OR flag( aSign xor flag( a < b ) ));
  3725. End;
  3726. {*
  3727. -------------------------------------------------------------------------------
  3728. Returns 1 if the single-precision floating-point value `a' is less than
  3729. the corresponding value `b', and 0 otherwise. Quiet NaNs do not cause an
  3730. exception. Otherwise, the comparison is performed according to the IEC/IEEE
  3731. Standard for Binary Floating-Point Arithmetic.
  3732. -------------------------------------------------------------------------------
  3733. *}
  3734. Function float32_lt_quiet( a: float32 ; b: float32 ): flag;
  3735. Var
  3736. aSign, bSign: flag;
  3737. Begin
  3738. if ( ( ( extractFloat32Exp( a ) = $FF ) AND (extractFloat32Frac( a )<>0) )
  3739. OR ( ( extractFloat32Exp( b ) = $FF ) AND (extractFloat32Frac( b )<>0) )
  3740. ) then
  3741. Begin
  3742. if ( (float32_is_signaling_nan( a )<>0) OR (float32_is_signaling_nan( b )<>0) ) then
  3743. Begin
  3744. float_raise( float_flag_invalid );
  3745. End;
  3746. float32_lt_quiet := 0;
  3747. exit;
  3748. End;
  3749. aSign := extractFloat32Sign( a );
  3750. bSign := extractFloat32Sign( b );
  3751. if ( aSign <> bSign ) then
  3752. Begin
  3753. float32_lt_quiet := aSign AND flag( bits32 ( ( a OR b ) shl 1 ) <> 0 );
  3754. exit;
  3755. End;
  3756. float32_lt_quiet := flag(flag( a <> b ) AND ( aSign xor flag( a < b ) ));
  3757. End;
  3758. {*
  3759. -------------------------------------------------------------------------------
  3760. Returns the result of converting the double-precision floating-point value
  3761. `a' to the 32-bit two's complement integer format. The conversion is
  3762. performed according to the IEC/IEEE Standard for Binary Floating-Point
  3763. Arithmetic---which means in particular that the conversion is rounded
  3764. according to the current rounding mode. If `a' is a NaN, the largest
  3765. positive integer is returned. Otherwise, if the conversion overflows, the
  3766. largest integer with the same sign as `a' is returned.
  3767. -------------------------------------------------------------------------------
  3768. *}
  3769. Function float64_to_int32(a: float64): int32;{$ifdef fpc} [public,Alias:'FLOAT64_TO_INT32'];compilerproc;{$endif}
  3770. var
  3771. aSign: flag;
  3772. aExp, shiftCount: int16;
  3773. aSig0, aSig1, absZ, aSigExtra: bits32;
  3774. z: int32;
  3775. roundingMode: int8;
  3776. label invalid;
  3777. Begin
  3778. aSig1 := extractFloat64Frac1( a );
  3779. aSig0 := extractFloat64Frac0( a );
  3780. aExp := extractFloat64Exp( a );
  3781. aSign := extractFloat64Sign( a );
  3782. shiftCount := aExp - $413;
  3783. if ( 0 <= shiftCount ) then
  3784. Begin
  3785. if ( $41E < aExp ) then
  3786. Begin
  3787. if ( ( aExp = $7FF ) AND (( aSig0 OR aSig1 )<>0) ) then
  3788. aSign := 0;
  3789. goto invalid;
  3790. End;
  3791. shortShift64Left(
  3792. aSig0 OR $00100000, aSig1, shiftCount, absZ, aSigExtra );
  3793. if ( $80000000 < absZ ) then
  3794. goto invalid;
  3795. End
  3796. else
  3797. Begin
  3798. aSig1 := flag( aSig1 <> 0 );
  3799. if ( aExp < $3FE ) then
  3800. Begin
  3801. aSigExtra := aExp OR aSig0 OR aSig1;
  3802. absZ := 0;
  3803. End
  3804. else
  3805. Begin
  3806. aSig0 := aSig0 OR $00100000;
  3807. aSigExtra := ( aSig0 shl ( shiftCount and 31 ) ) OR aSig1;
  3808. absZ := aSig0 shr ( - shiftCount );
  3809. End;
  3810. End;
  3811. roundingMode := softfloat_rounding_mode;
  3812. if ( roundingMode = float_round_nearest_even ) then
  3813. Begin
  3814. if ( sbits32(aSigExtra) < 0 ) then
  3815. Begin
  3816. Inc(absZ);
  3817. if ( bits32 ( aSigExtra shl 1 ) = 0 ) then
  3818. absZ := absZ and not 1;
  3819. End;
  3820. if aSign <> 0 then
  3821. z := - absZ
  3822. else
  3823. z := absZ;
  3824. End
  3825. else
  3826. Begin
  3827. aSigExtra := bits32( aSigExtra <> 0 );
  3828. if ( aSign <> 0) then
  3829. Begin
  3830. z := - ( absZ
  3831. + ( int32( roundingMode = float_round_down ) and aSigExtra ) );
  3832. End
  3833. else
  3834. Begin
  3835. z := absZ + ( int32( roundingMode = float_round_up ) and aSigExtra );
  3836. End
  3837. End;
  3838. if ( (( aSign xor flag( z < 0 ) )<>0) AND (z<>0) ) then
  3839. Begin
  3840. invalid:
  3841. float_raise( float_flag_invalid );
  3842. if (aSign <> 0 ) then
  3843. float64_to_int32 := sbits32 ($80000000)
  3844. else
  3845. float64_to_int32 := $7FFFFFFF;
  3846. exit;
  3847. End;
  3848. if ( aSigExtra <> 0) then
  3849. softfloat_exception_flags := softfloat_exception_flags or float_flag_inexact;
  3850. float64_to_int32 := z;
  3851. End;
  3852. {*
  3853. -------------------------------------------------------------------------------
  3854. Returns the result of converting the double-precision floating-point value
  3855. `a' to the 32-bit two's complement integer format. The conversion is
  3856. performed according to the IEC/IEEE Standard for Binary Floating-Point
  3857. Arithmetic, except that the conversion is always rounded toward zero.
  3858. If `a' is a NaN, the largest positive integer is returned. Otherwise, if
  3859. the conversion overflows, the largest integer with the same sign as `a' is
  3860. returned.
  3861. -------------------------------------------------------------------------------
  3862. *}
  3863. Function float64_to_int32_round_to_zero(a: float64 ): int32;
  3864. {$ifdef fpc} [public,Alias:'FLOAT64_TO_INT32_ROUND_TO_ZERO'];compilerproc;{$endif}
  3865. Var
  3866. aSign: flag;
  3867. aExp, shiftCount: int16;
  3868. aSig0, aSig1, absZ, aSigExtra: bits32;
  3869. z: int32;
  3870. label invalid;
  3871. Begin
  3872. aSig1 := extractFloat64Frac1( a );
  3873. aSig0 := extractFloat64Frac0( a );
  3874. aExp := extractFloat64Exp( a );
  3875. aSign := extractFloat64Sign( a );
  3876. shiftCount := aExp - $413;
  3877. if ( 0 <= shiftCount ) then
  3878. Begin
  3879. if ( $41E < aExp ) then
  3880. Begin
  3881. if ( ( aExp = $7FF ) AND (( aSig0 OR aSig1 )<>0) ) then
  3882. aSign := 0;
  3883. goto invalid;
  3884. End;
  3885. shortShift64Left(
  3886. aSig0 OR $00100000, aSig1, shiftCount, absZ, aSigExtra );
  3887. End
  3888. else
  3889. Begin
  3890. if ( aExp < $3FF ) then
  3891. Begin
  3892. if ( aExp OR aSig0 OR aSig1 )<>0 then
  3893. Begin
  3894. softfloat_exception_flags :=
  3895. softfloat_exception_flags or float_flag_inexact;
  3896. End;
  3897. float64_to_int32_round_to_zero := 0;
  3898. exit;
  3899. End;
  3900. aSig0 := aSig0 or $00100000;
  3901. aSigExtra := ( aSig0 shl ( shiftCount and 31 ) ) OR aSig1;
  3902. absZ := aSig0 shr ( - shiftCount );
  3903. End;
  3904. if aSign <> 0 then
  3905. z := - absZ
  3906. else
  3907. z := absZ;
  3908. if ( (( aSign xor flag( z < 0 )) <> 0) AND (z<>0) ) then
  3909. Begin
  3910. invalid:
  3911. float_raise( float_flag_invalid );
  3912. if (aSign <> 0) then
  3913. float64_to_int32_round_to_zero := sbits32 ($80000000)
  3914. else
  3915. float64_to_int32_round_to_zero := $7FFFFFFF;
  3916. exit;
  3917. End;
  3918. if ( aSigExtra <> 0) then
  3919. softfloat_exception_flags := softfloat_exception_flags or float_flag_inexact;
  3920. float64_to_int32_round_to_zero := z;
  3921. End;
  3922. {*
  3923. -------------------------------------------------------------------------------
  3924. Returns the result of converting the double-precision floating-point value
  3925. `a' to the single-precision floating-point format. The conversion is
  3926. performed according to the IEC/IEEE Standard for Binary Floating-Point
  3927. Arithmetic.
  3928. -------------------------------------------------------------------------------
  3929. *}
  3930. Function float64_to_float32(a: float64 ): float32rec;compilerproc;
  3931. Var
  3932. aSign: flag;
  3933. aExp: int16;
  3934. aSig0, aSig1, zSig: bits32;
  3935. allZero: bits32;
  3936. tmp : CommonNanT;
  3937. Begin
  3938. aSig1 := extractFloat64Frac1( a );
  3939. aSig0 := extractFloat64Frac0( a );
  3940. aExp := extractFloat64Exp( a );
  3941. aSign := extractFloat64Sign( a );
  3942. if ( aExp = $7FF ) then
  3943. Begin
  3944. if ( aSig0 OR aSig1 ) <> 0 then
  3945. Begin
  3946. float64ToCommonNaN( a, tmp );
  3947. float64_to_float32.float32 := commonNaNToFloat32( tmp );
  3948. exit;
  3949. End;
  3950. float64_to_float32.float32 := packFloat32( aSign, $FF, 0 );
  3951. exit;
  3952. End;
  3953. shift64RightJamming( aSig0, aSig1, 22, allZero, zSig );
  3954. if ( aExp <> 0) then
  3955. zSig := zSig OR $40000000;
  3956. float64_to_float32.float32 := roundAndPackFloat32( aSign, aExp - $381, zSig );
  3957. End;
  3958. {*
  3959. -------------------------------------------------------------------------------
  3960. Rounds the double-precision floating-point value `a' to an integer,
  3961. and returns the result as a double-precision floating-point value. The
  3962. operation is performed according to the IEC/IEEE Standard for Binary
  3963. Floating-Point Arithmetic.
  3964. -------------------------------------------------------------------------------
  3965. *}
  3966. function float64_round_to_int(a: float64) : Float64;{$ifdef fpc} [public,Alias:'FLOAT64_ROUND_TO_INT'];compilerproc;{$endif}
  3967. Var
  3968. aSign: flag;
  3969. aExp: int16;
  3970. lastBitMask, roundBitsMask: bits32;
  3971. roundingMode: int8;
  3972. z: float64;
  3973. Begin
  3974. aExp := extractFloat64Exp( a );
  3975. if ( $413 <= aExp ) then
  3976. Begin
  3977. if ( $433 <= aExp ) then
  3978. Begin
  3979. if ( ( aExp = $7FF )
  3980. AND
  3981. (
  3982. ( extractFloat64Frac0( a ) OR extractFloat64Frac1( a )
  3983. ) <>0)
  3984. ) then
  3985. Begin
  3986. propagateFloat64NaN( a, a, result );
  3987. exit;
  3988. End;
  3989. result := a;
  3990. exit;
  3991. End;
  3992. lastBitMask := 1;
  3993. lastBitMask := ( lastBitMask shl ( $432 - aExp ) ) shl 1;
  3994. roundBitsMask := lastBitMask - 1;
  3995. z := a;
  3996. roundingMode := softfloat_rounding_mode;
  3997. if ( roundingMode = float_round_nearest_even ) then
  3998. Begin
  3999. if ( lastBitMask <> 0) then
  4000. Begin
  4001. add64( z.high, z.low, 0, lastBitMask shr 1, z.high, z.low );
  4002. if ( ( z.low and roundBitsMask ) = 0 ) then
  4003. z.low := z.low and not lastBitMask;
  4004. End
  4005. else
  4006. Begin
  4007. if ( sbits32 (z.low) < 0 ) then
  4008. Begin
  4009. Inc(z.high);
  4010. if ( bits32 ( z.low shl 1 ) = 0 ) then
  4011. z.high := z.high and not 1;
  4012. End;
  4013. End;
  4014. End
  4015. else if ( roundingMode <> float_round_to_zero ) then
  4016. Begin
  4017. if ( extractFloat64Sign( z )
  4018. xor flag( roundingMode = float_round_up ) )<> 0 then
  4019. Begin
  4020. add64( z.high, z.low, 0, roundBitsMask, z.high, z.low );
  4021. End;
  4022. End;
  4023. z.low := z.low and not roundBitsMask;
  4024. End
  4025. else
  4026. Begin
  4027. if ( aExp <= $3FE ) then
  4028. Begin
  4029. if ( ( ( bits32 ( a.high shl 1 ) ) OR a.low ) = 0 ) then
  4030. Begin
  4031. result := a;
  4032. exit;
  4033. End;
  4034. softfloat_exception_flags := softfloat_exception_flags or
  4035. float_flag_inexact;
  4036. aSign := extractFloat64Sign( a );
  4037. case ( softfloat_rounding_mode ) of
  4038. float_round_nearest_even:
  4039. Begin
  4040. if ( ( aExp = $3FE )
  4041. AND ( (extractFloat64Frac0( a ) OR extractFloat64Frac1( a ) )<>0)
  4042. ) then
  4043. Begin
  4044. packFloat64( aSign, $3FF, 0, 0, result );
  4045. exit;
  4046. End;
  4047. End;
  4048. float_round_down:
  4049. Begin
  4050. if aSign<>0 then
  4051. packFloat64( 1, $3FF, 0, 0, result )
  4052. else
  4053. packFloat64( 0, 0, 0, 0, result );
  4054. exit;
  4055. End;
  4056. float_round_up:
  4057. Begin
  4058. if aSign <> 0 then
  4059. packFloat64( 1, 0, 0, 0, result )
  4060. else
  4061. packFloat64( 0, $3FF, 0, 0, result );
  4062. exit;
  4063. End;
  4064. end;
  4065. packFloat64( aSign, 0, 0, 0, result );
  4066. exit;
  4067. End;
  4068. lastBitMask := 1;
  4069. lastBitMask := lastBitMask shl ($413 - aExp);
  4070. roundBitsMask := lastBitMask - 1;
  4071. z.low := 0;
  4072. z.high := a.high;
  4073. roundingMode := softfloat_rounding_mode;
  4074. if ( roundingMode = float_round_nearest_even ) then
  4075. Begin
  4076. z.high := z.high + lastBitMask shr 1;
  4077. if ( ( ( z.high and roundBitsMask ) OR a.low ) = 0 ) then
  4078. Begin
  4079. z.high := z.high and not lastBitMask;
  4080. End;
  4081. End
  4082. else if ( roundingMode <> float_round_to_zero ) then
  4083. Begin
  4084. if ( extractFloat64Sign( z )
  4085. xor flag( roundingMode = float_round_up ) )<> 0 then
  4086. Begin
  4087. z.high := z.high or bits32( a.low <> 0 );
  4088. z.high := z.high + roundBitsMask;
  4089. End;
  4090. End;
  4091. z.high := z.high and not roundBitsMask;
  4092. End;
  4093. if ( ( z.low <> a.low ) OR ( z.high <> a.high ) ) then
  4094. Begin
  4095. softfloat_exception_flags :=
  4096. softfloat_exception_flags or float_flag_inexact;
  4097. End;
  4098. result := z;
  4099. End;
  4100. {*
  4101. -------------------------------------------------------------------------------
  4102. Returns the result of adding the absolute values of the double-precision
  4103. floating-point values `a' and `b'. If `zSign' is 1, the sum is negated
  4104. before being returned. `zSign' is ignored if the result is a NaN.
  4105. The addition is performed according to the IEC/IEEE Standard for Binary
  4106. Floating-Point Arithmetic.
  4107. -------------------------------------------------------------------------------
  4108. *}
  4109. Procedure addFloat64Sigs( a:float64 ; b: float64 ; zSign:flag; Var out: float64 );
  4110. Var
  4111. aExp, bExp, zExp: int16;
  4112. aSig0, aSig1, bSig0, bSig1, zSig0, zSig1, zSig2: bits32;
  4113. expDiff: int16;
  4114. label shiftRight1;
  4115. label roundAndPack;
  4116. Begin
  4117. aSig1 := extractFloat64Frac1( a );
  4118. aSig0 := extractFloat64Frac0( a );
  4119. aExp := extractFloat64Exp( a );
  4120. bSig1 := extractFloat64Frac1( b );
  4121. bSig0 := extractFloat64Frac0( b );
  4122. bExp := extractFloat64Exp( b );
  4123. expDiff := aExp - bExp;
  4124. if ( 0 < expDiff ) then
  4125. Begin
  4126. if ( aExp = $7FF ) then
  4127. Begin
  4128. if ( aSig0 OR aSig1 ) <> 0 then
  4129. Begin
  4130. propagateFloat64NaN( a, b, out );
  4131. exit;
  4132. end;
  4133. out := a;
  4134. exit;
  4135. End;
  4136. if ( bExp = 0 ) then
  4137. Begin
  4138. Dec(expDiff);
  4139. End
  4140. else
  4141. Begin
  4142. bSig0 := bSig0 or $00100000;
  4143. End;
  4144. shift64ExtraRightJamming(
  4145. bSig0, bSig1, 0, expDiff, bSig0, bSig1, zSig2 );
  4146. zExp := aExp;
  4147. End
  4148. else if ( expDiff < 0 ) then
  4149. Begin
  4150. if ( bExp = $7FF ) then
  4151. Begin
  4152. if ( bSig0 OR bSig1 ) <> 0 then
  4153. Begin
  4154. propagateFloat64NaN( a, b, out );
  4155. exit;
  4156. End;
  4157. packFloat64( zSign, $7FF, 0, 0, out );
  4158. End;
  4159. if ( aExp = 0 ) then
  4160. Begin
  4161. Inc(expDiff);
  4162. End
  4163. else
  4164. Begin
  4165. aSig0 := aSig0 or $00100000;
  4166. End;
  4167. shift64ExtraRightJamming(
  4168. aSig0, aSig1, 0, - expDiff, aSig0, aSig1, zSig2 );
  4169. zExp := bExp;
  4170. End
  4171. else
  4172. Begin
  4173. if ( aExp = $7FF ) then
  4174. Begin
  4175. if ( aSig0 OR aSig1 OR bSig0 OR bSig1 ) <> 0 then
  4176. Begin
  4177. propagateFloat64NaN( a, b, out );
  4178. exit;
  4179. End;
  4180. out := a;
  4181. exit;
  4182. End;
  4183. add64( aSig0, aSig1, bSig0, bSig1, zSig0, zSig1 );
  4184. if ( aExp = 0 ) then
  4185. Begin
  4186. packFloat64( zSign, 0, zSig0, zSig1, out );
  4187. exit;
  4188. End;
  4189. zSig2 := 0;
  4190. zSig0 := zSig0 or $00200000;
  4191. zExp := aExp;
  4192. goto shiftRight1;
  4193. End;
  4194. aSig0 := aSig0 or $00100000;
  4195. add64( aSig0, aSig1, bSig0, bSig1, zSig0, zSig1 );
  4196. Dec(zExp);
  4197. if ( zSig0 < $00200000 ) then
  4198. goto roundAndPack;
  4199. Inc(zExp);
  4200. shiftRight1:
  4201. shift64ExtraRightJamming( zSig0, zSig1, zSig2, 1, zSig0, zSig1, zSig2 );
  4202. roundAndPack:
  4203. roundAndPackFloat64( zSign, zExp, zSig0, zSig1, zSig2, out );
  4204. End;
  4205. {*
  4206. -------------------------------------------------------------------------------
  4207. Returns the result of subtracting the absolute values of the double-
  4208. precision floating-point values `a' and `b'. If `zSign' is 1, the
  4209. difference is negated before being returned. `zSign' is ignored if the
  4210. result is a NaN. The subtraction is performed according to the IEC/IEEE
  4211. Standard for Binary Floating-Point Arithmetic.
  4212. -------------------------------------------------------------------------------
  4213. *}
  4214. Procedure subFloat64Sigs( a:float64; b: float64 ; zSign:flag; Var out: float64 );
  4215. Var
  4216. aExp, bExp, zExp: int16;
  4217. aSig0, aSig1, bSig0, bSig1, zSig0, zSig1: bits32;
  4218. expDiff: int16;
  4219. z: float64;
  4220. label aExpBigger;
  4221. label bExpBigger;
  4222. label aBigger;
  4223. label bBigger;
  4224. label normalizeRoundAndPack;
  4225. Begin
  4226. aSig1 := extractFloat64Frac1( a );
  4227. aSig0 := extractFloat64Frac0( a );
  4228. aExp := extractFloat64Exp( a );
  4229. bSig1 := extractFloat64Frac1( b );
  4230. bSig0 := extractFloat64Frac0( b );
  4231. bExp := extractFloat64Exp( b );
  4232. expDiff := aExp - bExp;
  4233. shortShift64Left( aSig0, aSig1, 10, aSig0, aSig1 );
  4234. shortShift64Left( bSig0, bSig1, 10, bSig0, bSig1 );
  4235. if ( 0 < expDiff ) then goto aExpBigger;
  4236. if ( expDiff < 0 ) then goto bExpBigger;
  4237. if ( aExp = $7FF ) then
  4238. Begin
  4239. if ( aSig0 OR aSig1 OR bSig0 OR bSig1 ) <> 0 then
  4240. Begin
  4241. propagateFloat64NaN( a, b, out );
  4242. exit;
  4243. End;
  4244. float_raise( float_flag_invalid );
  4245. z.low := float64_default_nan_low;
  4246. z.high := float64_default_nan_high;
  4247. out := z;
  4248. exit;
  4249. End;
  4250. if ( aExp = 0 ) then
  4251. Begin
  4252. aExp := 1;
  4253. bExp := 1;
  4254. End;
  4255. if ( bSig0 < aSig0 ) then goto aBigger;
  4256. if ( aSig0 < bSig0 ) then goto bBigger;
  4257. if ( bSig1 < aSig1 ) then goto aBigger;
  4258. if ( aSig1 < bSig1 ) then goto bBigger;
  4259. packFloat64( flag(softfloat_rounding_mode = float_round_down), 0, 0, 0 , out);
  4260. exit;
  4261. bExpBigger:
  4262. if ( bExp = $7FF ) then
  4263. Begin
  4264. if ( bSig0 OR bSig1 ) <> 0 then
  4265. Begin
  4266. propagateFloat64NaN( a, b, out );
  4267. exit;
  4268. End;
  4269. packFloat64( zSign xor 1, $7FF, 0, 0, out );
  4270. exit;
  4271. End;
  4272. if ( aExp = 0 ) then
  4273. Begin
  4274. Inc(expDiff);
  4275. End
  4276. else
  4277. Begin
  4278. aSig0 := aSig0 or $40000000;
  4279. End;
  4280. shift64RightJamming( aSig0, aSig1, - expDiff, aSig0, aSig1 );
  4281. bSig0 := bSig0 or $40000000;
  4282. bBigger:
  4283. sub64( bSig0, bSig1, aSig0, aSig1, zSig0, zSig1 );
  4284. zExp := bExp;
  4285. zSign := zSign xor 1;
  4286. goto normalizeRoundAndPack;
  4287. aExpBigger:
  4288. if ( aExp = $7FF ) then
  4289. Begin
  4290. if ( aSig0 OR aSig1 ) <> 0 then
  4291. Begin
  4292. propagateFloat64NaN( a, b, out );
  4293. exit;
  4294. End;
  4295. out := a;
  4296. exit;
  4297. End;
  4298. if ( bExp = 0 ) then
  4299. Begin
  4300. Dec(expDiff);
  4301. End
  4302. else
  4303. Begin
  4304. bSig0 := bSig0 or $40000000;
  4305. End;
  4306. shift64RightJamming( bSig0, bSig1, expDiff, bSig0, bSig1 );
  4307. aSig0 := aSig0 or $40000000;
  4308. aBigger:
  4309. sub64( aSig0, aSig1, bSig0, bSig1, zSig0, zSig1 );
  4310. zExp := aExp;
  4311. normalizeRoundAndPack:
  4312. Dec(zExp);
  4313. normalizeRoundAndPackFloat64( zSign, zExp - 10, zSig0, zSig1, out );
  4314. End;
  4315. {*
  4316. -------------------------------------------------------------------------------
  4317. Returns the result of adding the double-precision floating-point values `a'
  4318. and `b'. The operation is performed according to the IEC/IEEE Standard for
  4319. Binary Floating-Point Arithmetic.
  4320. -------------------------------------------------------------------------------
  4321. *}
  4322. Function float64_add( a: float64; b : float64) : Float64;
  4323. {$ifdef fpc}[public,Alias:'FLOAT64_ADD'];compilerproc;{$endif}
  4324. Var
  4325. aSign, bSign: flag;
  4326. Begin
  4327. aSign := extractFloat64Sign( a );
  4328. bSign := extractFloat64Sign( b );
  4329. if ( aSign = bSign ) then
  4330. Begin
  4331. addFloat64Sigs( a, b, aSign, result );
  4332. End
  4333. else
  4334. Begin
  4335. subFloat64Sigs( a, b, aSign, result );
  4336. End;
  4337. End;
  4338. {*
  4339. -------------------------------------------------------------------------------
  4340. Returns the result of subtracting the double-precision floating-point values
  4341. `a' and `b'. The operation is performed according to the IEC/IEEE Standard
  4342. for Binary Floating-Point Arithmetic.
  4343. -------------------------------------------------------------------------------
  4344. *}
  4345. Function float64_sub(a: float64; b : float64) : Float64;
  4346. {$ifdef fpc}[public,Alias:'FLOAT64_SUB'];compilerproc;{$endif}
  4347. Var
  4348. aSign, bSign: flag;
  4349. Begin
  4350. aSign := extractFloat64Sign( a );
  4351. bSign := extractFloat64Sign( b );
  4352. if ( aSign = bSign ) then
  4353. Begin
  4354. subFloat64Sigs( a, b, aSign, result );
  4355. End
  4356. else
  4357. Begin
  4358. addFloat64Sigs( a, b, aSign, result );
  4359. End;
  4360. End;
  4361. {*
  4362. -------------------------------------------------------------------------------
  4363. Returns the result of multiplying the double-precision floating-point values
  4364. `a' and `b'. The operation is performed according to the IEC/IEEE Standard
  4365. for Binary Floating-Point Arithmetic.
  4366. -------------------------------------------------------------------------------
  4367. *}
  4368. Function float64_mul( a: float64; b:float64) : Float64;
  4369. {$ifdef fpc}[public,Alias:'FLOAT64_MUL'];compilerproc;{$endif}
  4370. Var
  4371. aSign, bSign, zSign: flag;
  4372. aExp, bExp, zExp: int16;
  4373. aSig0, aSig1, bSig0, bSig1, zSig0, zSig1, zSig2, zSig3: bits32;
  4374. z: float64;
  4375. label invalid;
  4376. Begin
  4377. aSig1 := extractFloat64Frac1( a );
  4378. aSig0 := extractFloat64Frac0( a );
  4379. aExp := extractFloat64Exp( a );
  4380. aSign := extractFloat64Sign( a );
  4381. bSig1 := extractFloat64Frac1( b );
  4382. bSig0 := extractFloat64Frac0( b );
  4383. bExp := extractFloat64Exp( b );
  4384. bSign := extractFloat64Sign( b );
  4385. zSign := aSign xor bSign;
  4386. if ( aExp = $7FF ) then
  4387. Begin
  4388. if ( (( aSig0 OR aSig1 ) <>0)
  4389. OR ( ( bExp = $7FF ) AND (( bSig0 OR bSig1 )<>0) ) ) then
  4390. Begin
  4391. propagateFloat64NaN( a, b, result );
  4392. exit;
  4393. End;
  4394. if ( ( bExp OR bSig0 OR bSig1 ) = 0 ) then goto invalid;
  4395. packFloat64( zSign, $7FF, 0, 0, result );
  4396. exit;
  4397. End;
  4398. if ( bExp = $7FF ) then
  4399. Begin
  4400. if ( bSig0 OR bSig1 )<> 0 then
  4401. Begin
  4402. propagateFloat64NaN( a, b, result );
  4403. exit;
  4404. End;
  4405. if ( ( aExp OR aSig0 OR aSig1 ) = 0 ) then
  4406. Begin
  4407. invalid:
  4408. float_raise( float_flag_invalid );
  4409. z.low := float64_default_nan_low;
  4410. z.high := float64_default_nan_high;
  4411. result := z;
  4412. exit;
  4413. End;
  4414. packFloat64( zSign, $7FF, 0, 0, result );
  4415. exit;
  4416. End;
  4417. if ( aExp = 0 ) then
  4418. Begin
  4419. if ( ( aSig0 OR aSig1 ) = 0 ) then
  4420. Begin
  4421. packFloat64( zSign, 0, 0, 0, result );
  4422. exit;
  4423. End;
  4424. normalizeFloat64Subnormal( aSig0, aSig1, aExp, aSig0, aSig1 );
  4425. End;
  4426. if ( bExp = 0 ) then
  4427. Begin
  4428. if ( ( bSig0 OR bSig1 ) = 0 ) then
  4429. Begin
  4430. packFloat64( zSign, 0, 0, 0, result );
  4431. exit;
  4432. End;
  4433. normalizeFloat64Subnormal( bSig0, bSig1, bExp, bSig0, bSig1 );
  4434. End;
  4435. zExp := aExp + bExp - $400;
  4436. aSig0 := aSig0 or $00100000;
  4437. shortShift64Left( bSig0, bSig1, 12, bSig0, bSig1 );
  4438. mul64To128( aSig0, aSig1, bSig0, bSig1, zSig0, zSig1, zSig2, zSig3 );
  4439. add64( zSig0, zSig1, aSig0, aSig1, zSig0, zSig1 );
  4440. zSig2 := zSig2 or flag( zSig3 <> 0 );
  4441. if ( $00200000 <= zSig0 ) then
  4442. Begin
  4443. shift64ExtraRightJamming(
  4444. zSig0, zSig1, zSig2, 1, zSig0, zSig1, zSig2 );
  4445. Inc(zExp);
  4446. End;
  4447. roundAndPackFloat64( zSign, zExp, zSig0, zSig1, zSig2, result );
  4448. End;
  4449. {*
  4450. -------------------------------------------------------------------------------
  4451. Returns the result of dividing the double-precision floating-point value `a'
  4452. by the corresponding value `b'. The operation is performed according to the
  4453. IEC/IEEE Standard for Binary Floating-Point Arithmetic.
  4454. -------------------------------------------------------------------------------
  4455. *}
  4456. Function float64_div(a: float64; b : float64) : Float64;
  4457. {$ifdef fpc}[public,Alias:'FLOAT64_DIV'];compilerproc;{$endif}
  4458. Var
  4459. aSign, bSign, zSign: flag;
  4460. aExp, bExp, zExp: int16;
  4461. aSig0, aSig1, bSig0, bSig1, zSig0, zSig1, zSig2: bits32;
  4462. rem0, rem1, rem2, rem3, term0, term1, term2, term3: bits32;
  4463. z: float64;
  4464. label invalid;
  4465. Begin
  4466. aSig1 := extractFloat64Frac1( a );
  4467. aSig0 := extractFloat64Frac0( a );
  4468. aExp := extractFloat64Exp( a );
  4469. aSign := extractFloat64Sign( a );
  4470. bSig1 := extractFloat64Frac1( b );
  4471. bSig0 := extractFloat64Frac0( b );
  4472. bExp := extractFloat64Exp( b );
  4473. bSign := extractFloat64Sign( b );
  4474. zSign := aSign xor bSign;
  4475. if ( aExp = $7FF ) then
  4476. Begin
  4477. if ( aSig0 OR aSig1 )<> 0 then
  4478. Begin
  4479. propagateFloat64NaN( a, b, result );
  4480. exit;
  4481. end;
  4482. if ( bExp = $7FF ) then
  4483. Begin
  4484. if ( bSig0 OR bSig1 )<>0 then
  4485. Begin
  4486. propagateFloat64NaN( a, b, result );
  4487. exit;
  4488. End;
  4489. goto invalid;
  4490. End;
  4491. packFloat64( zSign, $7FF, 0, 0, result );
  4492. exit;
  4493. End;
  4494. if ( bExp = $7FF ) then
  4495. Begin
  4496. if ( bSig0 OR bSig1 )<> 0 then
  4497. Begin
  4498. propagateFloat64NaN( a, b, result );
  4499. exit;
  4500. End;
  4501. packFloat64( zSign, 0, 0, 0, result );
  4502. exit;
  4503. End;
  4504. if ( bExp = 0 ) then
  4505. Begin
  4506. if ( ( bSig0 OR bSig1 ) = 0 ) then
  4507. Begin
  4508. if ( ( aExp OR aSig0 OR aSig1 ) = 0 ) then
  4509. Begin
  4510. invalid:
  4511. float_raise( float_flag_invalid );
  4512. z.low := float64_default_nan_low;
  4513. z.high := float64_default_nan_high;
  4514. result := z;
  4515. exit;
  4516. End;
  4517. float_raise( float_flag_divbyzero );
  4518. packFloat64( zSign, $7FF, 0, 0, result );
  4519. exit;
  4520. End;
  4521. normalizeFloat64Subnormal( bSig0, bSig1, bExp, bSig0, bSig1 );
  4522. End;
  4523. if ( aExp = 0 ) then
  4524. Begin
  4525. if ( ( aSig0 OR aSig1 ) = 0 ) then
  4526. Begin
  4527. packFloat64( zSign, 0, 0, 0, result );
  4528. exit;
  4529. End;
  4530. normalizeFloat64Subnormal( aSig0, aSig1, aExp, aSig0, aSig1 );
  4531. End;
  4532. zExp := aExp - bExp + $3FD;
  4533. shortShift64Left( aSig0 OR $00100000, aSig1, 11, aSig0, aSig1 );
  4534. shortShift64Left( bSig0 OR $00100000, bSig1, 11, bSig0, bSig1 );
  4535. if ( le64( bSig0, bSig1, aSig0, aSig1 )<>0 ) then
  4536. Begin
  4537. shift64Right( aSig0, aSig1, 1, aSig0, aSig1 );
  4538. Inc(zExp);
  4539. End;
  4540. zSig0 := estimateDiv64To32( aSig0, aSig1, bSig0 );
  4541. mul64By32To96( bSig0, bSig1, zSig0, term0, term1, term2 );
  4542. sub96( aSig0, aSig1, 0, term0, term1, term2, rem0, rem1, rem2 );
  4543. while ( sbits32 (rem0) < 0 ) do
  4544. Begin
  4545. Dec(zSig0);
  4546. add96( rem0, rem1, rem2, 0, bSig0, bSig1, rem0, rem1, rem2 );
  4547. End;
  4548. zSig1 := estimateDiv64To32( rem1, rem2, bSig0 );
  4549. if ( ( zSig1 and $3FF ) <= 4 ) then
  4550. Begin
  4551. mul64By32To96( bSig0, bSig1, zSig1, term1, term2, term3 );
  4552. sub96( rem1, rem2, 0, term1, term2, term3, rem1, rem2, rem3 );
  4553. while ( sbits32 (rem1) < 0 ) do
  4554. Begin
  4555. Dec(zSig1);
  4556. add96( rem1, rem2, rem3, 0, bSig0, bSig1, rem1, rem2, rem3 );
  4557. End;
  4558. zSig1 := zSig1 or flag( ( rem1 OR rem2 OR rem3 ) <> 0 );
  4559. End;
  4560. shift64ExtraRightJamming( zSig0, zSig1, 0, 11, zSig0, zSig1, zSig2 );
  4561. roundAndPackFloat64( zSign, zExp, zSig0, zSig1, zSig2, result );
  4562. End;
  4563. {*
  4564. -------------------------------------------------------------------------------
  4565. Returns the remainder of the double-precision floating-point value `a'
  4566. with respect to the corresponding value `b'. The operation is performed
  4567. according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
  4568. -------------------------------------------------------------------------------
  4569. *}
  4570. Function float64_rem(a: float64; b : float64) : float64;
  4571. {$ifdef fpc}[public,Alias:'FLOAT64_REM'];compilerproc;{$endif}
  4572. Var
  4573. aSign, bSign, zSign: flag;
  4574. aExp, bExp, expDiff: int16;
  4575. aSig0, aSig1, bSig0, bSig1, q, term0, term1, term2: bits32;
  4576. allZero, alternateASig0, alternateASig1, sigMean1: bits32;
  4577. sigMean0: sbits32;
  4578. z: float64;
  4579. label invalid;
  4580. Begin
  4581. aSig1 := extractFloat64Frac1( a );
  4582. aSig0 := extractFloat64Frac0( a );
  4583. aExp := extractFloat64Exp( a );
  4584. aSign := extractFloat64Sign( a );
  4585. bSig1 := extractFloat64Frac1( b );
  4586. bSig0 := extractFloat64Frac0( b );
  4587. bExp := extractFloat64Exp( b );
  4588. bSign := extractFloat64Sign( b );
  4589. if ( aExp = $7FF ) then
  4590. Begin
  4591. if ((( aSig0 OR aSig1 )<>0)
  4592. OR ( ( bExp = $7FF ) AND (( bSig0 OR bSig1 )<>0) ) ) then
  4593. Begin
  4594. propagateFloat64NaN( a, b, result );
  4595. exit;
  4596. End;
  4597. goto invalid;
  4598. End;
  4599. if ( bExp = $7FF ) then
  4600. Begin
  4601. if ( bSig0 OR bSig1 ) <> 0 then
  4602. Begin
  4603. propagateFloat64NaN( a, b, result );
  4604. exit;
  4605. End;
  4606. result := a;
  4607. exit;
  4608. End;
  4609. if ( bExp = 0 ) then
  4610. Begin
  4611. if ( ( bSig0 OR bSig1 ) = 0 ) then
  4612. Begin
  4613. invalid:
  4614. float_raise( float_flag_invalid );
  4615. z.low := float64_default_nan_low;
  4616. z.high := float64_default_nan_high;
  4617. result := z;
  4618. exit;
  4619. End;
  4620. normalizeFloat64Subnormal( bSig0, bSig1, bExp, bSig0, bSig1 );
  4621. End;
  4622. if ( aExp = 0 ) then
  4623. Begin
  4624. if ( ( aSig0 OR aSig1 ) = 0 ) then
  4625. Begin
  4626. result := a;
  4627. exit;
  4628. End;
  4629. normalizeFloat64Subnormal( aSig0, aSig1, aExp, aSig0, aSig1 );
  4630. End;
  4631. expDiff := aExp - bExp;
  4632. if ( expDiff < -1 ) then
  4633. Begin
  4634. result := a;
  4635. exit;
  4636. End;
  4637. shortShift64Left(
  4638. aSig0 OR $00100000, aSig1, 11 - flag( expDiff < 0 ), aSig0, aSig1 );
  4639. shortShift64Left( bSig0 OR $00100000, bSig1, 11, bSig0, bSig1 );
  4640. q := le64( bSig0, bSig1, aSig0, aSig1 );
  4641. if ( q )<>0 then
  4642. sub64( aSig0, aSig1, bSig0, bSig1, aSig0, aSig1 );
  4643. expDiff := expDiff - 32;
  4644. while ( 0 < expDiff ) do
  4645. Begin
  4646. q := estimateDiv64To32( aSig0, aSig1, bSig0 );
  4647. if 4 < q then
  4648. q:= q - 4
  4649. else
  4650. q := 0;
  4651. mul64By32To96( bSig0, bSig1, q, term0, term1, term2 );
  4652. shortShift96Left( term0, term1, term2, 29, term1, term2, allZero );
  4653. shortShift64Left( aSig0, aSig1, 29, aSig0, allZero );
  4654. sub64( aSig0, 0, term1, term2, aSig0, aSig1 );
  4655. expDiff := expDiff - 29;
  4656. End;
  4657. if ( -32 < expDiff ) then
  4658. Begin
  4659. q := estimateDiv64To32( aSig0, aSig1, bSig0 );
  4660. if 4 < q then
  4661. q := q - 4
  4662. else
  4663. q := 0;
  4664. q := q shr (- expDiff);
  4665. shift64Right( bSig0, bSig1, 8, bSig0, bSig1 );
  4666. expDiff := expDiff + 24;
  4667. if ( expDiff < 0 ) then
  4668. Begin
  4669. shift64Right( aSig0, aSig1, - expDiff, aSig0, aSig1 );
  4670. End
  4671. else
  4672. Begin
  4673. shortShift64Left( aSig0, aSig1, expDiff, aSig0, aSig1 );
  4674. End;
  4675. mul64By32To96( bSig0, bSig1, q, term0, term1, term2 );
  4676. sub64( aSig0, aSig1, term1, term2, aSig0, aSig1 );
  4677. End
  4678. else
  4679. Begin
  4680. shift64Right( aSig0, aSig1, 8, aSig0, aSig1 );
  4681. shift64Right( bSig0, bSig1, 8, bSig0, bSig1 );
  4682. End;
  4683. Repeat
  4684. alternateASig0 := aSig0;
  4685. alternateASig1 := aSig1;
  4686. Inc(q);
  4687. sub64( aSig0, aSig1, bSig0, bSig1, aSig0, aSig1 );
  4688. Until not ( 0 <= sbits32 (aSig0) );
  4689. add64(
  4690. aSig0, aSig1, alternateASig0, alternateASig1, bits32(sigMean0), sigMean1 );
  4691. if ( ( sigMean0 < 0 )
  4692. OR ( ( ( sigMean0 OR sigMean1 ) = 0 ) AND (( q AND 1 )<>0) ) ) then
  4693. Begin
  4694. aSig0 := alternateASig0;
  4695. aSig1 := alternateASig1;
  4696. End;
  4697. zSign := flag( sbits32 (aSig0) < 0 );
  4698. if ( zSign <> 0 ) then
  4699. sub64( 0, 0, aSig0, aSig1, aSig0, aSig1 );
  4700. normalizeRoundAndPackFloat64( aSign xor zSign, bExp - 4, aSig0, aSig1, result );
  4701. End;
  4702. {*
  4703. -------------------------------------------------------------------------------
  4704. Returns the square root of the double-precision floating-point value `a'.
  4705. The operation is performed according to the IEC/IEEE Standard for Binary
  4706. Floating-Point Arithmetic.
  4707. -------------------------------------------------------------------------------
  4708. *}
  4709. Procedure float64_sqrt( a: float64; var out: float64 );
  4710. {$ifdef fpc}[public,Alias:'FLOAT64_SQRT'];compilerproc;{$endif}
  4711. Var
  4712. aSign: flag;
  4713. aExp, zExp: int16;
  4714. aSig0, aSig1, zSig0, zSig1, zSig2, doubleZSig0: bits32;
  4715. rem0, rem1, rem2, rem3, term0, term1, term2, term3: bits32;
  4716. z: float64;
  4717. label invalid;
  4718. Begin
  4719. aSig1 := extractFloat64Frac1( a );
  4720. aSig0 := extractFloat64Frac0( a );
  4721. aExp := extractFloat64Exp( a );
  4722. aSign := extractFloat64Sign( a );
  4723. if ( aExp = $7FF ) then
  4724. Begin
  4725. if ( aSig0 OR aSig1 ) <> 0 then
  4726. Begin
  4727. propagateFloat64NaN( a, a, out );
  4728. exit;
  4729. End;
  4730. if ( aSign = 0) then
  4731. Begin
  4732. out := a;
  4733. exit;
  4734. End;
  4735. goto invalid;
  4736. End;
  4737. if ( aSign <> 0 ) then
  4738. Begin
  4739. if ( ( aExp OR aSig0 OR aSig1 ) = 0 ) then
  4740. Begin
  4741. out := a;
  4742. exit;
  4743. End;
  4744. invalid:
  4745. float_raise( float_flag_invalid );
  4746. z.low := float64_default_nan_low;
  4747. z.high := float64_default_nan_high;
  4748. out := z;
  4749. exit;
  4750. End;
  4751. if ( aExp = 0 ) then
  4752. Begin
  4753. if ( ( aSig0 OR aSig1 ) = 0 ) then
  4754. Begin
  4755. packFloat64( 0, 0, 0, 0, out );
  4756. exit;
  4757. End;
  4758. normalizeFloat64Subnormal( aSig0, aSig1, aExp, aSig0, aSig1 );
  4759. End;
  4760. zExp := ( ( aExp - $3FF ) shr 1 ) + $3FE;
  4761. aSig0 := aSig0 or $00100000;
  4762. shortShift64Left( aSig0, aSig1, 11, term0, term1 );
  4763. zSig0 := ( estimateSqrt32( aExp, term0 ) shr 1 ) + 1;
  4764. if ( zSig0 = 0 ) then
  4765. zSig0 := $7FFFFFFF;
  4766. doubleZSig0 := zSig0 + zSig0;
  4767. shortShift64Left( aSig0, aSig1, 9 - ( aExp and 1 ), aSig0, aSig1 );
  4768. mul32To64( zSig0, zSig0, term0, term1 );
  4769. sub64( aSig0, aSig1, term0, term1, rem0, rem1 );
  4770. while ( sbits32 (rem0) < 0 ) do
  4771. Begin
  4772. Dec(zSig0);
  4773. doubleZSig0 := doubleZSig0 - 2;
  4774. add64( rem0, rem1, 0, doubleZSig0 OR 1, rem0, rem1 );
  4775. End;
  4776. zSig1 := estimateDiv64To32( rem1, 0, doubleZSig0 );
  4777. if ( ( zSig1 and $1FF ) <= 5 ) then
  4778. Begin
  4779. if ( zSig1 = 0 ) then
  4780. zSig1 := 1;
  4781. mul32To64( doubleZSig0, zSig1, term1, term2 );
  4782. sub64( rem1, 0, term1, term2, rem1, rem2 );
  4783. mul32To64( zSig1, zSig1, term2, term3 );
  4784. sub96( rem1, rem2, 0, 0, term2, term3, rem1, rem2, rem3 );
  4785. while ( sbits32 (rem1) < 0 ) do
  4786. Begin
  4787. Dec(zSig1);
  4788. shortShift64Left( 0, zSig1, 1, term2, term3 );
  4789. term3 := term3 or 1;
  4790. term2 := term2 or doubleZSig0;
  4791. add96( rem1, rem2, rem3, 0, term2, term3, rem1, rem2, rem3 );
  4792. End;
  4793. zSig1 := zSig1 or bits32( ( rem1 OR rem2 OR rem3 ) <> 0 );
  4794. End;
  4795. shift64ExtraRightJamming( zSig0, zSig1, 0, 10, zSig0, zSig1, zSig2 );
  4796. roundAndPackFloat64( 0, zExp, zSig0, zSig1, zSig2, out );
  4797. End;
  4798. {*
  4799. -------------------------------------------------------------------------------
  4800. Returns 1 if the double-precision floating-point value `a' is equal to
  4801. the corresponding value `b', and 0 otherwise. The comparison is performed
  4802. according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
  4803. -------------------------------------------------------------------------------
  4804. *}
  4805. Function float64_eq(a: float64; b: float64): flag;
  4806. {$ifdef fpc}[public,Alias:'FLOAT64_EQ'];compilerproc;{$endif}
  4807. Begin
  4808. if
  4809. (
  4810. ( extractFloat64Exp( a ) = $7FF )
  4811. AND
  4812. (
  4813. (extractFloat64Frac0( a ) OR extractFloat64Frac1( a )) <>0
  4814. )
  4815. )
  4816. OR (
  4817. ( extractFloat64Exp( b ) = $7FF )
  4818. AND (
  4819. (extractFloat64Frac0( b ) OR (extractFloat64Frac1( b )) <> 0
  4820. )
  4821. )
  4822. ) then
  4823. Begin
  4824. if ( (float64_is_signaling_nan( a )<>0) OR (float64_is_signaling_nan( b )<>0) ) then
  4825. float_raise( float_flag_invalid );
  4826. float64_eq := 0;
  4827. exit;
  4828. End;
  4829. float64_eq := flag(
  4830. ( a.low = b.low )
  4831. AND ( ( a.high = b.high )
  4832. OR ( ( a.low = 0 )
  4833. AND ( bits32 ( ( a.high OR b.high ) shl 1 ) = 0 ) )
  4834. ));
  4835. End;
  4836. {*
  4837. -------------------------------------------------------------------------------
  4838. Returns 1 if the double-precision floating-point value `a' is less than
  4839. or equal to the corresponding value `b', and 0 otherwise. The comparison
  4840. is performed according to the IEC/IEEE Standard for Binary Floating-Point
  4841. Arithmetic.
  4842. -------------------------------------------------------------------------------
  4843. *}
  4844. Function float64_le(a: float64;b: float64): flag;
  4845. {$ifdef fpc}[public,Alias:'FLOAT64_LE'];compilerproc;{$endif}
  4846. Var
  4847. aSign, bSign: flag;
  4848. Begin
  4849. if
  4850. (
  4851. ( extractFloat64Exp( a ) = $7FF )
  4852. AND
  4853. (
  4854. (extractFloat64Frac0( a ) OR extractFloat64Frac1( a )) <>0
  4855. )
  4856. )
  4857. OR (
  4858. ( extractFloat64Exp( b ) = $7FF )
  4859. AND (
  4860. (extractFloat64Frac0( b ) OR (extractFloat64Frac1( b )) <> 0
  4861. )
  4862. )
  4863. ) then
  4864. Begin
  4865. float_raise( float_flag_invalid );
  4866. float64_le := 0;
  4867. exit;
  4868. End;
  4869. aSign := extractFloat64Sign( a );
  4870. bSign := extractFloat64Sign( b );
  4871. if ( aSign <> bSign ) then
  4872. Begin
  4873. float64_le := flag(
  4874. (aSign <> 0)
  4875. OR ( ( ( bits32 ( ( a.high OR b.high ) shl 1 ) ) OR a.low OR b.low )
  4876. = 0 ));
  4877. exit;
  4878. End;
  4879. if aSign <> 0 then
  4880. float64_le := le64( b.high, b.low, a.high, a.low )
  4881. else
  4882. float64_le := le64( a.high, a.low, b.high, b.low );
  4883. End;
  4884. {*
  4885. -------------------------------------------------------------------------------
  4886. Returns 1 if the double-precision floating-point value `a' is less than
  4887. the corresponding value `b', and 0 otherwise. The comparison is performed
  4888. according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
  4889. -------------------------------------------------------------------------------
  4890. *}
  4891. Function float64_lt(a: float64;b: float64): flag;
  4892. {$ifdef fpc}[public,Alias:'FLOAT64_LT'];compilerproc;{$endif}
  4893. Var
  4894. aSign, bSign: flag;
  4895. Begin
  4896. if
  4897. (
  4898. ( extractFloat64Exp( a ) = $7FF )
  4899. AND
  4900. (
  4901. (extractFloat64Frac0( a ) OR extractFloat64Frac1( a )) <>0
  4902. )
  4903. )
  4904. OR (
  4905. ( extractFloat64Exp( b ) = $7FF )
  4906. AND (
  4907. (extractFloat64Frac0( b ) OR (extractFloat64Frac1( b )) <> 0
  4908. )
  4909. )
  4910. ) then
  4911. Begin
  4912. float_raise( float_flag_invalid );
  4913. float64_lt := 0;
  4914. exit;
  4915. End;
  4916. aSign := extractFloat64Sign( a );
  4917. bSign := extractFloat64Sign( b );
  4918. if ( aSign <> bSign ) then
  4919. Begin
  4920. float64_lt := flag(
  4921. (aSign <> 0)
  4922. AND ( ( ( bits32 ( ( a.high OR b.high ) shl 1 ) ) OR a.low OR b.low )
  4923. <> 0 ));
  4924. exit;
  4925. End;
  4926. if aSign <> 0 then
  4927. float64_lt := lt64( b.high, b.low, a.high, a.low )
  4928. else
  4929. float64_lt := lt64( a.high, a.low, b.high, b.low );
  4930. End;
  4931. {*
  4932. -------------------------------------------------------------------------------
  4933. Returns 1 if the double-precision floating-point value `a' is equal to
  4934. the corresponding value `b', and 0 otherwise. The invalid exception is
  4935. raised if either operand is a NaN. Otherwise, the comparison is performed
  4936. according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
  4937. -------------------------------------------------------------------------------
  4938. *}
  4939. Function float64_eq_signaling( a: float64; b: float64): flag;
  4940. Begin
  4941. if
  4942. (
  4943. ( extractFloat64Exp( a ) = $7FF )
  4944. AND
  4945. (
  4946. (extractFloat64Frac0( a ) OR extractFloat64Frac1( a )) <>0
  4947. )
  4948. )
  4949. OR (
  4950. ( extractFloat64Exp( b ) = $7FF )
  4951. AND (
  4952. (extractFloat64Frac0( b ) OR (extractFloat64Frac1( b )) <> 0
  4953. )
  4954. )
  4955. ) then
  4956. Begin
  4957. float_raise( float_flag_invalid );
  4958. float64_eq_signaling := 0;
  4959. exit;
  4960. End;
  4961. float64_eq_signaling := flag(
  4962. ( a.low = b.low )
  4963. AND ( ( a.high = b.high )
  4964. OR ( ( a.low = 0 )
  4965. AND ( bits32 ( ( a.high OR b.high ) shl 1 ) = 0 ) )
  4966. ));
  4967. End;
  4968. {*
  4969. -------------------------------------------------------------------------------
  4970. Returns 1 if the double-precision floating-point value `a' is less than or
  4971. equal to the corresponding value `b', and 0 otherwise. Quiet NaNs do not
  4972. cause an exception. Otherwise, the comparison is performed according to the
  4973. IEC/IEEE Standard for Binary Floating-Point Arithmetic.
  4974. -------------------------------------------------------------------------------
  4975. *}
  4976. Function float64_le_quiet(a: float64 ; b: float64 ): flag;
  4977. Var
  4978. aSign, bSign : flag;
  4979. Begin
  4980. if
  4981. (
  4982. ( extractFloat64Exp( a ) = $7FF )
  4983. AND
  4984. (
  4985. (extractFloat64Frac0( a ) OR extractFloat64Frac1( a )) <>0
  4986. )
  4987. )
  4988. OR (
  4989. ( extractFloat64Exp( b ) = $7FF )
  4990. AND (
  4991. (extractFloat64Frac0( b ) OR (extractFloat64Frac1( b )) <> 0
  4992. )
  4993. )
  4994. ) then
  4995. Begin
  4996. if ( (float64_is_signaling_nan( a )<>0) OR (float64_is_signaling_nan( b )<>0) ) then
  4997. float_raise( float_flag_invalid );
  4998. float64_le_quiet := 0;
  4999. exit;
  5000. End;
  5001. aSign := extractFloat64Sign( a );
  5002. bSign := extractFloat64Sign( b );
  5003. if ( aSign <> bSign ) then
  5004. Begin
  5005. float64_le_quiet := flag
  5006. ((aSign <> 0)
  5007. OR ( ( ( bits32 ( ( a.high OR b.high ) shl 1 ) ) OR a.low OR b.low )
  5008. = 0 ));
  5009. exit;
  5010. End;
  5011. if aSign <> 0 then
  5012. float64_le_quiet := le64( b.high, b.low, a.high, a.low )
  5013. else
  5014. float64_le_quiet := le64( a.high, a.low, b.high, b.low );
  5015. End;
  5016. {*
  5017. -------------------------------------------------------------------------------
  5018. Returns 1 if the double-precision floating-point value `a' is less than
  5019. the corresponding value `b', and 0 otherwise. Quiet NaNs do not cause an
  5020. exception. Otherwise, the comparison is performed according to the IEC/IEEE
  5021. Standard for Binary Floating-Point Arithmetic.
  5022. -------------------------------------------------------------------------------
  5023. *}
  5024. Function float64_lt_quiet(a: float64; b: float64 ): Flag;
  5025. Var
  5026. aSign, bSign: flag;
  5027. Begin
  5028. if
  5029. (
  5030. ( extractFloat64Exp( a ) = $7FF )
  5031. AND
  5032. (
  5033. (extractFloat64Frac0( a ) OR extractFloat64Frac1( a )) <>0
  5034. )
  5035. )
  5036. OR (
  5037. ( extractFloat64Exp( b ) = $7FF )
  5038. AND (
  5039. (extractFloat64Frac0( b ) OR (extractFloat64Frac1( b )) <> 0
  5040. )
  5041. )
  5042. ) then
  5043. Begin
  5044. if ( (float64_is_signaling_nan( a )<>0) OR (float64_is_signaling_nan( b )<>0) ) then
  5045. float_raise( float_flag_invalid );
  5046. float64_lt_quiet := 0;
  5047. exit;
  5048. End;
  5049. aSign := extractFloat64Sign( a );
  5050. bSign := extractFloat64Sign( b );
  5051. if ( aSign <> bSign ) then
  5052. Begin
  5053. float64_lt_quiet := flag(
  5054. (aSign<>0)
  5055. AND ( ( ( bits32 ( ( a.high OR b.high ) shl 1 ) ) OR a.low OR b.low )
  5056. <> 0 ));
  5057. exit;
  5058. End;
  5059. If aSign <> 0 then
  5060. float64_lt_quiet := lt64( b.high, b.low, a.high, a.low )
  5061. else
  5062. float64_lt_quiet := lt64( a.high, a.low, b.high, b.low );
  5063. End;
  5064. {*----------------------------------------------------------------------------
  5065. | Returns the result of converting the 64-bit two's complement integer `a'
  5066. | to the single-precision floating-point format. The conversion is performed
  5067. | according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
  5068. *----------------------------------------------------------------------------*}
  5069. function int64_to_float32( a: int64 ): float32rec; compilerproc;
  5070. var
  5071. zSign : flag;
  5072. absA : uint64;
  5073. shiftCount: int8;
  5074. zSig : bits32;
  5075. intval : int64rec;
  5076. Begin
  5077. if ( a = 0 ) then
  5078. begin
  5079. int64_to_float32.float32 := 0;
  5080. exit;
  5081. end;
  5082. if a < 0 then
  5083. zSign := flag(TRUE)
  5084. else
  5085. zSign := flag(FALSE);
  5086. if zSign<>0 then
  5087. absA := -a
  5088. else
  5089. absA := a;
  5090. shiftCount := countLeadingZeros64( absA ) - 40;
  5091. if ( 0 <= shiftCount ) then
  5092. begin
  5093. int64_to_float32.float32:= packFloat32( zSign, $95 - shiftCount, absA shl shiftCount );
  5094. end
  5095. else
  5096. begin
  5097. shiftCount := shiftCount + 7;
  5098. if ( shiftCount < 0 ) then
  5099. begin
  5100. intval.low := int64rec(AbsA).low;
  5101. intval.high := int64rec(AbsA).high;
  5102. shift64RightJamming( intval.low, intval.high, - shiftCount,
  5103. intval.low, intval.high);
  5104. int64rec(absA).low := intval.low;
  5105. int64rec(absA).high := intval.high;
  5106. end
  5107. else
  5108. absA := absA shl shiftCount;
  5109. int64_to_float32.float32:=roundAndPackFloat32( zSign, $9C - shiftCount, absA );
  5110. end;
  5111. End;
  5112. {*----------------------------------------------------------------------------
  5113. | Returns the result of converting the 64-bit two's complement integer `a'
  5114. | to the single-precision floating-point format. The conversion is performed
  5115. | according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
  5116. | Unisgned version.
  5117. *----------------------------------------------------------------------------*}
  5118. function qword_to_float32( a: qword ): float32rec; compilerproc;
  5119. var
  5120. zSign : flag;
  5121. absA : uint64;
  5122. shiftCount: int8;
  5123. zSig : bits32;
  5124. intval : int64rec;
  5125. Begin
  5126. if ( a = 0 ) then
  5127. begin
  5128. qword_to_float32.float32 := 0;
  5129. exit;
  5130. end;
  5131. zSign := flag(FALSE);
  5132. absA := a;
  5133. shiftCount := countLeadingZeros64( absA ) - 40;
  5134. if ( 0 <= shiftCount ) then
  5135. begin
  5136. qword_to_float32.float32:= packFloat32( zSign, $95 - shiftCount, absA shl shiftCount );
  5137. end
  5138. else
  5139. begin
  5140. shiftCount := shiftCount + 7;
  5141. if ( shiftCount < 0 ) then
  5142. begin
  5143. intval.low := int64rec(AbsA).low;
  5144. intval.high := int64rec(AbsA).high;
  5145. shift64RightJamming( intval.low, intval.high, - shiftCount,
  5146. intval.low, intval.high);
  5147. int64rec(absA).low := intval.low;
  5148. int64rec(absA).high := intval.high;
  5149. end
  5150. else
  5151. absA := absA shl shiftCount;
  5152. qword_to_float32.float32:=roundAndPackFloat32( zSign, $9C - shiftCount, absA );
  5153. end;
  5154. End;
  5155. {*----------------------------------------------------------------------------
  5156. | Returns the result of converting the 64-bit two's complement integer `a'
  5157. | to the double-precision floating-point format. The conversion is performed
  5158. | according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
  5159. *----------------------------------------------------------------------------*}
  5160. function qword_to_float64( a: qword ): float64;
  5161. {$ifdef fpc}[public,Alias:'QWORD_TO_FLOAT64'];compilerproc;{$endif}
  5162. var
  5163. zSign : flag;
  5164. float_result : float64;
  5165. intval : int64rec;
  5166. AbsA : bits64;
  5167. shiftcount : int8;
  5168. zSig0, zSig1 : bits32;
  5169. Begin
  5170. if ( a = 0 ) then
  5171. Begin
  5172. packFloat64( 0, 0, 0, 0, result );
  5173. exit;
  5174. end;
  5175. zSign := flag(FALSE);
  5176. AbsA := a;
  5177. shiftCount := countLeadingZeros64( absA ) - 11;
  5178. if ( 0 <= shiftCount ) then
  5179. Begin
  5180. absA := absA shl shiftcount;
  5181. zSig0:=int64rec(absA).high;
  5182. zSig1:=int64rec(absA).low;
  5183. End
  5184. else
  5185. Begin
  5186. shift64Right( int64rec(absA).high, int64rec(absA).low,
  5187. - shiftCount, zSig0, zSig1 );
  5188. End;
  5189. packFloat64( zSign, $432 - shiftCount, zSig0, zSig1, float_result );
  5190. qword_to_float64:= float_result;
  5191. End;
  5192. {*----------------------------------------------------------------------------
  5193. | Returns the result of converting the 64-bit two's complement integer `a'
  5194. | to the double-precision floating-point format. The conversion is performed
  5195. | according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
  5196. *----------------------------------------------------------------------------*}
  5197. function int64_to_float64( a: int64 ): float64;
  5198. {$ifdef fpc}[public,Alias:'INT64_TO_FLOAT64'];compilerproc;{$endif}
  5199. var
  5200. zSign : flag;
  5201. float_result : float64;
  5202. intval : int64rec;
  5203. AbsA : bits64;
  5204. shiftcount : int8;
  5205. zSig0, zSig1 : bits32;
  5206. Begin
  5207. if ( a = 0 ) then
  5208. Begin
  5209. packFloat64( 0, 0, 0, 0, result );
  5210. exit;
  5211. end;
  5212. zSign := flag( a < 0 );
  5213. if ZSign<>0 then
  5214. AbsA := -a
  5215. else
  5216. AbsA := a;
  5217. shiftCount := countLeadingZeros64( absA ) - 11;
  5218. if ( 0 <= shiftCount ) then
  5219. Begin
  5220. absA := absA shl shiftcount;
  5221. zSig0:=int64rec(absA).high;
  5222. zSig1:=int64rec(absA).low;
  5223. End
  5224. else
  5225. Begin
  5226. shift64Right( int64rec(absA).high, int64rec(absA).low,
  5227. - shiftCount, zSig0, zSig1 );
  5228. End;
  5229. packFloat64( zSign, $432 - shiftCount, zSig0, zSig1, float_result );
  5230. int64_to_float64:= float_result;
  5231. End;
  5232. {*----------------------------------------------------------------------------
  5233. | Returns 1 if the 128-bit value formed by concatenating `a0' and `a1'
  5234. | is equal to the 128-bit value formed by concatenating `b0' and `b1'.
  5235. | Otherwise, returns 0.
  5236. *----------------------------------------------------------------------------*}
  5237. function eq128( a0: bits64; a1: bits64; b0: bits64; b1 : bits64): flag;inline;
  5238. begin
  5239. result := ord(( a0 = b0 ) and ( a1 = b1 ));
  5240. end;
  5241. {*----------------------------------------------------------------------------
  5242. | Returns 1 if the 128-bit value formed by concatenating `a0' and `a1' is less
  5243. | than or equal to the 128-bit value formed by concatenating `b0' and `b1'.
  5244. | Otherwise, returns 0.
  5245. *----------------------------------------------------------------------------*}
  5246. function le128( a0: bits64; a1: bits64; b0: bits64; b1 : bits64): flag;inline;
  5247. begin
  5248. result:=ord(( a0 < b0 ) or ( ( a0 = b0 ) and ( a1 <= b1 ) ));
  5249. end;
  5250. {*----------------------------------------------------------------------------
  5251. | Shifts the 192-bit value formed by concatenating `a0', `a1', and `a2' right
  5252. | by 64 _plus_ the number of bits given in `count'. The shifted result is
  5253. | at most 128 nonzero bits; these are broken into two 64-bit pieces which are
  5254. | stored at the locations pointed to by `z0Ptr' and `z1Ptr'. The bits shifted
  5255. | off form a third 64-bit result as follows: The _last_ bit shifted off is
  5256. | the most-significant bit of the extra result, and the other 63 bits of the
  5257. | extra result are all zero if and only if _all_but_the_last_ bits shifted off
  5258. | were all zero. This extra result is stored in the location pointed to by
  5259. | `z2Ptr'. The value of `count' can be arbitrarily large.
  5260. | (This routine makes more sense if `a0', `a1', and `a2' are considered
  5261. | to form a fixed-point value with binary point between `a1' and `a2'. This
  5262. | fixed-point value is shifted right by the number of bits given in `count',
  5263. | and the integer part of the result is returned at the locations pointed to
  5264. | by `z0Ptr' and `z1Ptr'. The fractional part of the result may be slightly
  5265. | corrupted as described above, and is returned at the location pointed to by
  5266. | `z2Ptr'.)
  5267. *----------------------------------------------------------------------------*}
  5268. procedure shift128ExtraRightJamming(
  5269. a0: bits64;
  5270. a1: bits64;
  5271. a2: bits64;
  5272. count: int16;
  5273. var z0Ptr: bits64;
  5274. var z1Ptr: bits64;
  5275. var z2Ptr: bits64);
  5276. var
  5277. z0, z1, z2: bits64;
  5278. negCount: int8;
  5279. begin
  5280. negCount := ( - count ) and 63;
  5281. if ( count = 0 ) then
  5282. begin
  5283. z2 := a2;
  5284. z1 := a1;
  5285. z0 := a0;
  5286. end
  5287. else begin
  5288. if ( count < 64 ) then
  5289. begin
  5290. z2 := a1 shr negCount;
  5291. z1 := ( a0 shl negCount ) or ( a1 shr count );
  5292. z0 := a0 shr count;
  5293. end
  5294. else begin
  5295. if ( count = 64 ) then
  5296. begin
  5297. z2 := a1;
  5298. z1 := a0;
  5299. end
  5300. else begin
  5301. a2 := a2 or a1;
  5302. if ( count < 128 ) then
  5303. begin
  5304. z2 := a0 shl negCount;
  5305. z1 := a0 shr ( count and 63 );
  5306. end
  5307. else begin
  5308. if ( count = 128 ) then
  5309. z2 := a0
  5310. else
  5311. z2 := ord( a0 <> 0 );
  5312. z1 := 0;
  5313. end;
  5314. end;
  5315. z0 := 0;
  5316. end;
  5317. z2 := z2 or ord( a2 <> 0 );
  5318. end;
  5319. z2Ptr := z2;
  5320. z1Ptr := z1;
  5321. z0Ptr := z0;
  5322. end;
  5323. {*----------------------------------------------------------------------------
  5324. | Shifts the 128-bit value formed by concatenating `a0' and `a1' right by 64
  5325. | _plus_ the number of bits given in `count'. The shifted result is at most
  5326. | 64 nonzero bits; this is stored at the location pointed to by `z0Ptr'. The
  5327. | bits shifted off form a second 64-bit result as follows: The _last_ bit
  5328. | shifted off is the most-significant bit of the extra result, and the other
  5329. | 63 bits of the extra result are all zero if and only if _all_but_the_last_
  5330. | bits shifted off were all zero. This extra result is stored in the location
  5331. | pointed to by `z1Ptr'. The value of `count' can be arbitrarily large.
  5332. | (This routine makes more sense if `a0' and `a1' are considered to form
  5333. | a fixed-point value with binary point between `a0' and `a1'. This fixed-
  5334. | point value is shifted right by the number of bits given in `count', and
  5335. | the integer part of the result is returned at the location pointed to by
  5336. | `z0Ptr'. The fractional part of the result may be slightly corrupted as
  5337. | described above, and is returned at the location pointed to by `z1Ptr'.)
  5338. *----------------------------------------------------------------------------*}
  5339. procedure shift64ExtraRightJamming(a0: bits64; a1: bits64; count: int16; var z0Ptr: bits64; var z1Ptr : bits64);
  5340. var
  5341. z0, z1: bits64;
  5342. negCount: int8;
  5343. begin
  5344. negCount := ( - count ) and 63;
  5345. if ( count = 0 ) then
  5346. begin
  5347. z1 := a1;
  5348. z0 := a0;
  5349. end
  5350. else if ( count < 64 ) then
  5351. begin
  5352. z1 := ( a0 shl negCount ) or ord( a1 <> 0 );
  5353. z0 := a0 shr count;
  5354. end
  5355. else begin
  5356. if ( count = 64 ) then
  5357. begin
  5358. z1 := a0 or ord( a1 <> 0 );
  5359. end
  5360. else begin
  5361. z1 := ord( ( a0 or a1 ) <> 0 );
  5362. end;
  5363. z0 := 0;
  5364. end;
  5365. z1Ptr := z1;
  5366. z0Ptr := z0;
  5367. end;
  5368. {$ifdef FPC_SOFTFLOAT_FLOATX80}
  5369. {*----------------------------------------------------------------------------
  5370. | Returns the fraction bits of the extended double-precision floating-point
  5371. | value `a'.
  5372. *----------------------------------------------------------------------------*}
  5373. function extractFloatx80Frac(a : floatx80): bits64;inline;
  5374. begin
  5375. result:=a.low;
  5376. end;
  5377. {*----------------------------------------------------------------------------
  5378. | Returns the exponent bits of the extended double-precision floating-point
  5379. | value `a'.
  5380. *----------------------------------------------------------------------------*}
  5381. function extractFloatx80Exp(a : floatx80): int32;inline;
  5382. begin
  5383. result:=a.high and $7FFF;
  5384. end;
  5385. {*----------------------------------------------------------------------------
  5386. | Returns the sign bit of the extended double-precision floating-point value
  5387. | `a'.
  5388. *----------------------------------------------------------------------------*}
  5389. function extractFloatx80Sign(a : floatx80): flag;inline;
  5390. begin
  5391. result:=a.high shr 15;
  5392. end;
  5393. {*----------------------------------------------------------------------------
  5394. | Normalizes the subnormal extended double-precision floating-point value
  5395. | represented by the denormalized significand `aSig'. The normalized exponent
  5396. | and significand are stored at the locations pointed to by `zExpPtr' and
  5397. | `zSigPtr', respectively.
  5398. *----------------------------------------------------------------------------*}
  5399. procedure normalizeFloatx80Subnormal( aSig: bits64; var zExpPtr: int32; var zSigPtr : bits64);
  5400. var
  5401. shiftCount: int8;
  5402. begin
  5403. shiftCount := countLeadingZeros64( aSig );
  5404. zSigPtr := aSig shl shiftCount;
  5405. zExpPtr := 1 - shiftCount;
  5406. end;
  5407. {*----------------------------------------------------------------------------
  5408. | Packs the sign `zSign', exponent `zExp', and significand `zSig' into an
  5409. | extended double-precision floating-point value, returning the result.
  5410. *----------------------------------------------------------------------------*}
  5411. function packFloatx80( zSign: flag; zExp: int32; zSig : bits64): floatx80;
  5412. var
  5413. z: floatx80;
  5414. begin
  5415. z.low := zSig;
  5416. z.high := ( bits16(zSign) shl 15 ) + zExp;
  5417. result:=z;
  5418. end;
  5419. {*----------------------------------------------------------------------------
  5420. | Takes an abstract floating-point value having sign `zSign', exponent `zExp',
  5421. | and extended significand formed by the concatenation of `zSig0' and `zSig1',
  5422. | and returns the proper extended double-precision floating-point value
  5423. | corresponding to the abstract input. Ordinarily, the abstract value is
  5424. | rounded and packed into the extended double-precision format, with the
  5425. | inexact exception raised if the abstract input cannot be represented
  5426. | exactly. However, if the abstract value is too large, the overflow and
  5427. | inexact exceptions are raised and an infinity or maximal finite value is
  5428. | returned. If the abstract value is too small, the input value is rounded to
  5429. | a subnormal number, and the underflow and inexact exceptions are raised if
  5430. | the abstract input cannot be represented exactly as a subnormal extended
  5431. | double-precision floating-point number.
  5432. | If `roundingPrecision' is 32 or 64, the result is rounded to the same
  5433. | number of bits as single or double precision, respectively. Otherwise, the
  5434. | result is rounded to the full precision of the extended double-precision
  5435. | format.
  5436. | The input significand must be normalized or smaller. If the input
  5437. | significand is not normalized, `zExp' must be 0; in that case, the result
  5438. | returned is a subnormal number, and it must not require rounding. The
  5439. | handling of underflow and overflow follows the IEC/IEEE Standard for Binary
  5440. | Floating-Point Arithmetic.
  5441. *----------------------------------------------------------------------------*}
  5442. function roundAndPackFloatx80(roundingPrecision: int8; zSign: flag; zExp: int32; zSig0: bits64; zSig1: bits64): floatx80;
  5443. var
  5444. roundingMode: int8;
  5445. roundNearestEven, increment, isTiny: flag;
  5446. roundIncrement, roundMask, roundBits: int64;
  5447. label
  5448. precision80;
  5449. begin
  5450. roundingMode := softfloat_rounding_mode;
  5451. roundNearestEven := flag( roundingMode = float_round_nearest_even );
  5452. if ( roundingPrecision = 80 ) then
  5453. goto precision80;
  5454. if ( roundingPrecision = 64 ) then
  5455. begin
  5456. roundIncrement := int64( $0000000000000400 );
  5457. roundMask := int64( $00000000000007FF );
  5458. end
  5459. else if ( roundingPrecision = 32 ) then
  5460. begin
  5461. roundIncrement := int64( $0000008000000000 );
  5462. roundMask := int64( $000000FFFFFFFFFF );
  5463. end
  5464. else begin
  5465. goto precision80;
  5466. end;
  5467. zSig0 := zSig0 or ord( zSig1 <> 0 );
  5468. if ( not (roundNearestEven<>0) ) then
  5469. begin
  5470. if ( roundingMode = float_round_to_zero ) then
  5471. begin
  5472. roundIncrement := 0;
  5473. end
  5474. else begin
  5475. roundIncrement := roundMask;
  5476. if ( zSign<>0 ) then
  5477. begin
  5478. if ( roundingMode = float_round_up ) then
  5479. roundIncrement := 0;
  5480. end
  5481. else begin
  5482. if ( roundingMode = float_round_down ) then
  5483. roundIncrement := 0;
  5484. end;
  5485. end;
  5486. end;
  5487. roundBits := zSig0 and roundMask;
  5488. if ( $7FFD <= (bits32) ( zExp - 1 ) ) begin
  5489. if ( ( $7FFE < zExp )
  5490. or ( ( zExp = $7FFE ) and ( zSig0 + roundIncrement < zSig0 ) )
  5491. ) begin
  5492. goto overflow;
  5493. end;
  5494. if ( zExp <= 0 ) begin
  5495. isTiny =
  5496. ( float_detect_tininess = float_tininess_before_rounding )
  5497. or ( zExp < 0 )
  5498. or ( zSig0 <= zSig0 + roundIncrement );
  5499. shift64RightJamming( zSig0, 1 - zExp, zSig0 );
  5500. zExp := 0;
  5501. roundBits := zSig0 and roundMask;
  5502. if ( isTiny and roundBits ) float_raise( float_flag_underflow );
  5503. if ( roundBits ) softfloat_exception_flags |= float_flag_inexact;
  5504. zSig0 += roundIncrement;
  5505. if ( (sbits64) zSig0 < 0 ) zExp := 1;
  5506. roundIncrement := roundMask + 1;
  5507. if ( roundNearestEven and ( roundBits shl 1 = roundIncrement ) ) begin
  5508. roundMask |= roundIncrement;
  5509. end;
  5510. zSig0 = ~ roundMask;
  5511. result:=packFloatx80( zSign, zExp, zSig0 );
  5512. end;
  5513. end;
  5514. if ( roundBits ) softfloat_exception_flags |= float_flag_inexact;
  5515. zSig0 += roundIncrement;
  5516. if ( zSig0 < roundIncrement ) begin
  5517. ++zExp;
  5518. zSig0 := LIT64( $8000000000000000 );
  5519. end;
  5520. roundIncrement := roundMask + 1;
  5521. if ( roundNearestEven and ( roundBits shl 1 = roundIncrement ) ) begin
  5522. roundMask |= roundIncrement;
  5523. end;
  5524. zSig0 = ~ roundMask;
  5525. if ( zSig0 = 0 ) zExp := 0;
  5526. result:=packFloatx80( zSign, zExp, zSig0 );
  5527. precision80:
  5528. increment := ( (sbits64) zSig1 < 0 );
  5529. if ( ! roundNearestEven ) begin
  5530. if ( roundingMode = float_round_to_zero ) begin
  5531. increment := 0;
  5532. end;
  5533. else begin
  5534. if ( zSign ) begin
  5535. increment := ( roundingMode = float_round_down ) and zSig1;
  5536. end;
  5537. else begin
  5538. increment := ( roundingMode = float_round_up ) and zSig1;
  5539. end;
  5540. end;
  5541. end;
  5542. if ( $7FFD <= (bits32) ( zExp - 1 ) ) begin
  5543. if ( ( $7FFE < zExp )
  5544. or ( ( zExp = $7FFE )
  5545. and ( zSig0 = LIT64( $FFFFFFFFFFFFFFFF ) )
  5546. and increment
  5547. )
  5548. ) begin
  5549. roundMask := 0;
  5550. overflow:
  5551. float_raise( float_flag_overflow or float_flag_inexact );
  5552. if ( ( roundingMode = float_round_to_zero )
  5553. or ( zSign and ( roundingMode = float_round_up ) )
  5554. or ( ! zSign and ( roundingMode = float_round_down ) )
  5555. ) begin
  5556. result:=packFloatx80( zSign, $7FFE, ~ roundMask );
  5557. end;
  5558. result:=packFloatx80( zSign, $7FFF, LIT64( $8000000000000000 ) );
  5559. end;
  5560. if ( zExp <= 0 ) begin
  5561. isTiny =
  5562. ( float_detect_tininess = float_tininess_before_rounding )
  5563. or ( zExp < 0 )
  5564. or ! increment
  5565. or ( zSig0 < LIT64( $FFFFFFFFFFFFFFFF ) );
  5566. shift64ExtraRightJamming( zSig0, zSig1, 1 - zExp, zSig0, zSig1 );
  5567. zExp := 0;
  5568. if ( isTiny and zSig1 ) float_raise( float_flag_underflow );
  5569. if ( zSig1 ) softfloat_exception_flags |= float_flag_inexact;
  5570. if ( roundNearestEven ) begin
  5571. increment := ( (sbits64) zSig1 < 0 );
  5572. end;
  5573. else begin
  5574. if ( zSign ) begin
  5575. increment := ( roundingMode = float_round_down ) and zSig1;
  5576. end;
  5577. else begin
  5578. increment := ( roundingMode = float_round_up ) and zSig1;
  5579. end;
  5580. end;
  5581. if ( increment ) begin
  5582. ++zSig0;
  5583. zSig0 =
  5584. ~ ( ( (bits64) ( zSig1 shl 1 ) = 0 ) and roundNearestEven );
  5585. if ( (sbits64) zSig0 < 0 ) zExp := 1;
  5586. end;
  5587. result:=packFloatx80( zSign, zExp, zSig0 );
  5588. end;
  5589. end;
  5590. if ( zSig1 ) softfloat_exception_flags |= float_flag_inexact;
  5591. if ( increment ) begin
  5592. ++zSig0;
  5593. if ( zSig0 = 0 ) begin
  5594. ++zExp;
  5595. zSig0 := LIT64( $8000000000000000 );
  5596. end;
  5597. else begin
  5598. zSig0 = ~ ( ( (bits64) ( zSig1 shl 1 ) = 0 ) and roundNearestEven );
  5599. end;
  5600. end;
  5601. else begin
  5602. if ( zSig0 = 0 ) zExp := 0;
  5603. end;
  5604. result:=packFloatx80( zSign, zExp, zSig0 );
  5605. end;
  5606. {*----------------------------------------------------------------------------
  5607. | Takes an abstract floating-point value having sign `zSign', exponent
  5608. | `zExp', and significand formed by the concatenation of `zSig0' and `zSig1',
  5609. | and returns the proper extended double-precision floating-point value
  5610. | corresponding to the abstract input. This routine is just like
  5611. | `roundAndPackFloatx80' except that the input significand does not have to be
  5612. | normalized.
  5613. *----------------------------------------------------------------------------*}
  5614. function normalizeRoundAndPackFloatx80(roundingPrecision: int8; zSign: flag; zExp: int32; zSig0: bits64; zSig1: bits64): floatx80;
  5615. var
  5616. shiftCount: int8;
  5617. begin
  5618. if ( zSig0 = 0 ) begin
  5619. zSig0 := zSig1;
  5620. zSig1 := 0;
  5621. zExp -= 64;
  5622. end;
  5623. shiftCount := countLeadingZeros64( zSig0 );
  5624. shortShift128Left( zSig0, zSig1, shiftCount, zSig0, zSig1 );
  5625. zExp := eExp - shiftCount;
  5626. return
  5627. roundAndPackFloatx80( roundingPrecision, zSign, zExp, zSig0, zSig1 );
  5628. end;
  5629. {*----------------------------------------------------------------------------
  5630. | Returns the result of converting the extended double-precision floating-
  5631. | point value `a' to the 32-bit two's complement integer format. The
  5632. | conversion is performed according to the IEC/IEEE Standard for Binary
  5633. | Floating-Point Arithmetic---which means in particular that the conversion
  5634. | is rounded according to the current rounding mode. If `a' is a NaN, the
  5635. | largest positive integer is returned. Otherwise, if the conversion
  5636. | overflows, the largest integer with the same sign as `a' is returned.
  5637. *----------------------------------------------------------------------------*}
  5638. function floatx80_to_int32(a: floatx80): int32;
  5639. var
  5640. aSign: flag;
  5641. aExp, shiftCount: int32;
  5642. aSig: bits64;
  5643. begin
  5644. aSig := extractFloatx80Frac( a );
  5645. aExp := extractFloatx80Exp( a );
  5646. aSign := extractFloatx80Sign( a );
  5647. if ( ( aExp = $7FFF ) and (bits64) ( aSig shl 1 ) ) aSign := 0;
  5648. shiftCount := $4037 - aExp;
  5649. if ( shiftCount <= 0 ) shiftCount := 1;
  5650. shift64RightJamming( aSig, shiftCount, aSig );
  5651. result := roundAndPackInt32( aSign, aSig );
  5652. end;
  5653. {*----------------------------------------------------------------------------
  5654. | Returns the result of converting the extended double-precision floating-
  5655. | point value `a' to the 32-bit two's complement integer format. The
  5656. | conversion is performed according to the IEC/IEEE Standard for Binary
  5657. | Floating-Point Arithmetic, except that the conversion is always rounded
  5658. | toward zero. If `a' is a NaN, the largest positive integer is returned.
  5659. | Otherwise, if the conversion overflows, the largest integer with the same
  5660. | sign as `a' is returned.
  5661. *----------------------------------------------------------------------------*}
  5662. function floatx80_to_int32_round_to_zero(a: floatx80): int32;
  5663. var
  5664. aSign: flag;
  5665. aExp, shiftCount: int32;
  5666. aSig, savedASig: bits64;
  5667. z: int32;
  5668. begin
  5669. aSig := extractFloatx80Frac( a );
  5670. aExp := extractFloatx80Exp( a );
  5671. aSign := extractFloatx80Sign( a );
  5672. if ( $401E < aExp ) begin
  5673. if ( ( aExp = $7FFF ) and (bits64) ( aSig shl 1 ) ) aSign := 0;
  5674. goto invalid;
  5675. end;
  5676. else if ( aExp < $3FFF ) begin
  5677. if ( aExp or aSig ) softfloat_exception_flags or= float_flag_inexact;
  5678. result := 0;
  5679. end;
  5680. shiftCount := $403E - aExp;
  5681. savedASig := aSig;
  5682. aSig >>= shiftCount;
  5683. z := aSig;
  5684. if ( aSign ) z := - z;
  5685. if ( ( z < 0 ) xor aSign ) begin
  5686. invalid:
  5687. float_raise( float_flag_invalid );
  5688. result := aSign ? (sbits32) $80000000 : $7FFFFFFF;
  5689. end;
  5690. if ( ( aSig shl shiftCount ) <> savedASig ) begin
  5691. softfloat_exception_flags or= float_flag_inexact;
  5692. end;
  5693. result := z;
  5694. end;
  5695. {*----------------------------------------------------------------------------
  5696. | Returns the result of converting the extended double-precision floating-
  5697. | point value `a' to the 64-bit two's complement integer format. The
  5698. | conversion is performed according to the IEC/IEEE Standard for Binary
  5699. | Floating-Point Arithmetic---which means in particular that the conversion
  5700. | is rounded according to the current rounding mode. If `a' is a NaN,
  5701. | the largest positive integer is returned. Otherwise, if the conversion
  5702. | overflows, the largest integer with the same sign as `a' is returned.
  5703. *----------------------------------------------------------------------------*}
  5704. function floatx80_to_int64(a: floatx80): int64;
  5705. var
  5706. aSign: flag;
  5707. aExp, shiftCount: int32;
  5708. aSig, aSigExtra: bits64;
  5709. begin
  5710. aSig := extractFloatx80Frac( a );
  5711. aExp := extractFloatx80Exp( a );
  5712. aSign := extractFloatx80Sign( a );
  5713. shiftCount := $403E - aExp;
  5714. if ( shiftCount <= 0 ) begin
  5715. if ( shiftCount ) begin
  5716. float_raise( float_flag_invalid );
  5717. if ( ! aSign
  5718. or ( ( aExp = $7FFF )
  5719. and ( aSig <> LIT64( $8000000000000000 ) ) )
  5720. ) begin
  5721. result := LIT64( $7FFFFFFFFFFFFFFF );
  5722. end;
  5723. result := (sbits64) LIT64( $8000000000000000 );
  5724. end;
  5725. aSigExtra := 0;
  5726. end;
  5727. else begin
  5728. shift64ExtraRightJamming( aSig, 0, shiftCount, aSig, aSigExtra );
  5729. end;
  5730. result := roundAndPackInt64( aSign, aSig, aSigExtra );
  5731. end;
  5732. {*----------------------------------------------------------------------------
  5733. | Returns the result of converting the extended double-precision floating-
  5734. | point value `a' to the 64-bit two's complement integer format. The
  5735. | conversion is performed according to the IEC/IEEE Standard for Binary
  5736. | Floating-Point Arithmetic, except that the conversion is always rounded
  5737. | toward zero. If `a' is a NaN, the largest positive integer is returned.
  5738. | Otherwise, if the conversion overflows, the largest integer with the same
  5739. | sign as `a' is returned.
  5740. *----------------------------------------------------------------------------*}
  5741. function floatx80_to_int64_round_to_zero(a: floatx80): int64;
  5742. var
  5743. aSign: flag;
  5744. aExp, shiftCount: int32;
  5745. aSig: bits64;
  5746. z: int64;
  5747. begin
  5748. aSig := extractFloatx80Frac( a );
  5749. aExp := extractFloatx80Exp( a );
  5750. aSign := extractFloatx80Sign( a );
  5751. shiftCount := aExp - $403E;
  5752. if ( 0 <= shiftCount ) begin
  5753. aSig = LIT64( $7FFFFFFFFFFFFFFF );
  5754. if ( ( a.high <> $C03E ) or aSig ) begin
  5755. float_raise( float_flag_invalid );
  5756. if ( ! aSign or ( ( aExp = $7FFF ) and aSig ) ) begin
  5757. result := LIT64( $7FFFFFFFFFFFFFFF );
  5758. end;
  5759. end;
  5760. result := (sbits64) LIT64( $8000000000000000 );
  5761. end;
  5762. else if ( aExp < $3FFF ) begin
  5763. if ( aExp or aSig ) softfloat_exception_flags or= float_flag_inexact;
  5764. result := 0;
  5765. end;
  5766. z := aSig>>( - shiftCount );
  5767. if ( (bits64) ( aSig shl ( shiftCount and 63 ) ) ) begin
  5768. softfloat_exception_flags or= float_flag_inexact;
  5769. end;
  5770. if ( aSign ) z := - z;
  5771. result := z;
  5772. end;
  5773. {*----------------------------------------------------------------------------
  5774. | Returns the result of converting the extended double-precision floating-
  5775. | point value `a' to the single-precision floating-point format. The
  5776. | conversion is performed according to the IEC/IEEE Standard for Binary
  5777. | Floating-Point Arithmetic.
  5778. *----------------------------------------------------------------------------*}
  5779. function floatx80_to_float32(a: floatx80): float32;
  5780. var
  5781. aSign: flag;
  5782. aExp: int32;
  5783. aSig: bits64;
  5784. begin
  5785. aSig := extractFloatx80Frac( a );
  5786. aExp := extractFloatx80Exp( a );
  5787. aSign := extractFloatx80Sign( a );
  5788. if ( aExp = $7FFF ) begin
  5789. if ( (bits64) ( aSig shl 1 ) ) begin
  5790. result := commonNaNToFloat32( floatx80ToCommonNaN( a ) );
  5791. end;
  5792. result := packFloat32( aSign, $FF, 0 );
  5793. end;
  5794. shift64RightJamming( aSig, 33, aSig );
  5795. if ( aExp or aSig ) aExp -= $3F81;
  5796. result := roundAndPackFloat32( aSign, aExp, aSig );
  5797. end;
  5798. {*----------------------------------------------------------------------------
  5799. | Returns the result of converting the extended double-precision floating-
  5800. | point value `a' to the double-precision floating-point format. The
  5801. | conversion is performed according to the IEC/IEEE Standard for Binary
  5802. | Floating-Point Arithmetic.
  5803. *----------------------------------------------------------------------------*}
  5804. function floatx80_to_float64(a: floatx80): float64;
  5805. var
  5806. aSign: flag;
  5807. aExp: int32;
  5808. aSig, zSig: bits64;
  5809. begin
  5810. aSig := extractFloatx80Frac( a );
  5811. aExp := extractFloatx80Exp( a );
  5812. aSign := extractFloatx80Sign( a );
  5813. if ( aExp = $7FFF ) begin
  5814. if ( (bits64) ( aSig shl 1 ) ) begin
  5815. result := commonNaNToFloat64( floatx80ToCommonNaN( a ) );
  5816. end;
  5817. result := packFloat64( aSign, $7FF, 0 );
  5818. end;
  5819. shift64RightJamming( aSig, 1, zSig );
  5820. if ( aExp or aSig ) aExp -= $3C01;
  5821. result := roundAndPackFloat64( aSign, aExp, zSig );
  5822. end;
  5823. {$ifdef FPC_SOFTFLOAT_FLOAT128}
  5824. {*----------------------------------------------------------------------------
  5825. | Returns the result of converting the extended double-precision floating-
  5826. | point value `a' to the quadruple-precision floating-point format. The
  5827. | conversion is performed according to the IEC/IEEE Standard for Binary
  5828. | Floating-Point Arithmetic.
  5829. *----------------------------------------------------------------------------*}
  5830. function floatx80_to_float128(a: floatx80): float128;
  5831. var
  5832. aSign: flag;
  5833. aExp: int16;
  5834. aSig, zSig0, zSig1: bits64;
  5835. begin
  5836. aSig := extractFloatx80Frac( a );
  5837. aExp := extractFloatx80Exp( a );
  5838. aSign := extractFloatx80Sign( a );
  5839. if ( ( aExp = $7FFF ) and (bits64) ( aSig shl 1 ) ) begin
  5840. result := commonNaNToFloat128( floatx80ToCommonNaN( a ) );
  5841. end;
  5842. shift128Right( aSig shl 1, 0, 16, zSig0, zSig1 );
  5843. result := packFloat128( aSign, aExp, zSig0, zSig1 );
  5844. end;
  5845. {$endif FPC_SOFTFLOAT_FLOAT128}
  5846. {*----------------------------------------------------------------------------
  5847. | Rounds the extended double-precision floating-point value `a' to an integer,
  5848. | and Returns the result as an extended quadruple-precision floating-point
  5849. | value. The operation is performed according to the IEC/IEEE Standard for
  5850. | Binary Floating-Point Arithmetic.
  5851. *----------------------------------------------------------------------------*}
  5852. function floatx80_round_to_int(a: floatx80): floatx80;
  5853. var
  5854. aSign: flag;
  5855. aExp: int32;
  5856. lastBitMask, roundBitsMask: bits64;
  5857. roundingMode: int8;
  5858. z: floatx80;
  5859. begin
  5860. aExp := extractFloatx80Exp( a );
  5861. if ( $403E <= aExp ) begin
  5862. if ( ( aExp = $7FFF ) and (bits64) ( extractFloatx80Frac( a ) shl 1 ) ) begin
  5863. result := propagateFloatx80NaN( a, a );
  5864. end;
  5865. result := a;
  5866. end;
  5867. if ( aExp < $3FFF ) begin
  5868. if ( ( aExp = 0 )
  5869. and ( (bits64) ( extractFloatx80Frac( a ) shl 1 ) = 0 ) ) begin
  5870. result := a;
  5871. end;
  5872. softfloat_exception_flags or= float_flag_inexact;
  5873. aSign := extractFloatx80Sign( a );
  5874. switch ( softfloat_rounding_mode ) begin
  5875. case float_round_nearest_even:
  5876. if ( ( aExp = $3FFE ) and (bits64) ( extractFloatx80Frac( a ) shl 1 )
  5877. ) begin
  5878. result :=
  5879. packFloatx80( aSign, $3FFF, LIT64( $8000000000000000 ) );
  5880. end;
  5881. break;
  5882. case float_round_down:
  5883. result :=
  5884. aSign ?
  5885. packFloatx80( 1, $3FFF, LIT64( $8000000000000000 ) )
  5886. : packFloatx80( 0, 0, 0 );
  5887. case float_round_up:
  5888. result :=
  5889. aSign ? packFloatx80( 1, 0, 0 )
  5890. : packFloatx80( 0, $3FFF, LIT64( $8000000000000000 ) );
  5891. end;
  5892. result := packFloatx80( aSign, 0, 0 );
  5893. end;
  5894. lastBitMask := 1;
  5895. lastBitMask shl = $403E - aExp;
  5896. roundBitsMask := lastBitMask - 1;
  5897. z := a;
  5898. roundingMode := softfloat_rounding_mode;
  5899. if ( roundingMode = float_round_nearest_even ) begin
  5900. z.low += lastBitMask>>1;
  5901. if ( ( z.low and roundBitsMask ) = 0 ) z.low = ~ lastBitMask;
  5902. end;
  5903. else if ( roundingMode <> float_round_to_zero ) begin
  5904. if ( extractFloatx80Sign( z ) xor ( roundingMode = float_round_up ) ) begin
  5905. z.low += roundBitsMask;
  5906. end;
  5907. end;
  5908. z.low = ~ roundBitsMask;
  5909. if ( z.low = 0 ) begin
  5910. ++z.high;
  5911. z.low := LIT64( $8000000000000000 );
  5912. end;
  5913. if ( z.low <> a.low ) softfloat_exception_flags or= float_flag_inexact;
  5914. result := z;
  5915. end;
  5916. {*----------------------------------------------------------------------------
  5917. | Returns the result of adding the absolute values of the extended double-
  5918. | precision floating-point values `a' and `b'. If `zSign' is 1, the sum is
  5919. | negated before being returned. `zSign' is ignored if the result is a NaN.
  5920. | The addition is performed according to the IEC/IEEE Standard for Binary
  5921. | Floating-Point Arithmetic.
  5922. *----------------------------------------------------------------------------*}
  5923. function addFloatx80Sigs(a: floatx80; b: floatx80; zSign : flag): floatx80;
  5924. var
  5925. aExp, bExp, zExp: int32;
  5926. aSig, bSig, zSig0, zSig1: bits64;
  5927. expDiff: int32;
  5928. begin
  5929. aSig := extractFloatx80Frac( a );
  5930. aExp := extractFloatx80Exp( a );
  5931. bSig := extractFloatx80Frac( b );
  5932. bExp := extractFloatx80Exp( b );
  5933. expDiff := aExp - bExp;
  5934. if ( 0 < expDiff ) begin
  5935. if ( aExp = $7FFF ) begin
  5936. if ( (bits64) ( aSig shl 1 ) ) result := propagateFloatx80NaN( a, b );
  5937. result := a;
  5938. end;
  5939. if ( bExp = 0 ) --expDiff;
  5940. shift64ExtraRightJamming( bSig, 0, expDiff, bSig, zSig1 );
  5941. zExp := aExp;
  5942. end;
  5943. else if ( expDiff < 0 ) begin
  5944. if ( bExp = $7FFF ) begin
  5945. if ( (bits64) ( bSig shl 1 ) ) result := propagateFloatx80NaN( a, b );
  5946. result := packFloatx80( zSign, $7FFF, LIT64( $8000000000000000 ) );
  5947. end;
  5948. if ( aExp = 0 ) ++expDiff;
  5949. shift64ExtraRightJamming( aSig, 0, - expDiff, aSig, zSig1 );
  5950. zExp := bExp;
  5951. end;
  5952. else begin
  5953. if ( aExp = $7FFF ) begin
  5954. if ( (bits64) ( ( aSig or bSig ) shl 1 ) ) begin
  5955. result := propagateFloatx80NaN( a, b );
  5956. end;
  5957. result := a;
  5958. end;
  5959. zSig1 := 0;
  5960. zSig0 := aSig + bSig;
  5961. if ( aExp = 0 ) begin
  5962. normalizeFloatx80Subnormal( zSig0, zExp, zSig0 );
  5963. goto roundAndPack;
  5964. end;
  5965. zExp := aExp;
  5966. goto shiftRight1;
  5967. end;
  5968. zSig0 := aSig + bSig;
  5969. if ( (sbits64) zSig0 < 0 ) goto roundAndPack;
  5970. shiftRight1:
  5971. shift64ExtraRightJamming( zSig0, zSig1, 1, zSig0, zSig1 );
  5972. zSig0 or= LIT64( $8000000000000000 );
  5973. ++zExp;
  5974. roundAndPack:
  5975. result :=
  5976. roundAndPackFloatx80(
  5977. floatx80_rounding_precision, zSign, zExp, zSig0, zSig1 );
  5978. end;
  5979. {*----------------------------------------------------------------------------
  5980. | Returns the result of subtracting the absolute values of the extended
  5981. | double-precision floating-point values `a' and `b'. If `zSign' is 1, the
  5982. | difference is negated before being returned. `zSign' is ignored if the
  5983. | result is a NaN. The subtraction is performed according to the IEC/IEEE
  5984. | Standard for Binary Floating-Point Arithmetic.
  5985. *----------------------------------------------------------------------------*}
  5986. function subFloatx80Sigs(a: floatx80; b: floatx80; zSign : flag): floatx80;
  5987. var
  5988. aExp, bExp, zExp: int32;
  5989. aSig, bSig, zSig0, zSig1: bits64;
  5990. expDiff: int32;
  5991. z: floatx80;
  5992. begin
  5993. aSig := extractFloatx80Frac( a );
  5994. aExp := extractFloatx80Exp( a );
  5995. bSig := extractFloatx80Frac( b );
  5996. bExp := extractFloatx80Exp( b );
  5997. expDiff := aExp - bExp;
  5998. if ( 0 < expDiff ) goto aExpBigger;
  5999. if ( expDiff < 0 ) goto bExpBigger;
  6000. if ( aExp = $7FFF ) begin
  6001. if ( (bits64) ( ( aSig or bSig ) shl 1 ) ) begin
  6002. result := propagateFloatx80NaN( a, b );
  6003. end;
  6004. float_raise( float_flag_invalid );
  6005. z.low := floatx80_default_nan_low;
  6006. z.high := floatx80_default_nan_high;
  6007. result := z;
  6008. end;
  6009. if ( aExp = 0 ) begin
  6010. aExp := 1;
  6011. bExp := 1;
  6012. end;
  6013. zSig1 := 0;
  6014. if ( bSig < aSig ) goto aBigger;
  6015. if ( aSig < bSig ) goto bBigger;
  6016. result := packFloatx80( softfloat_rounding_mode = float_round_down, 0, 0 );
  6017. bExpBigger:
  6018. if ( bExp = $7FFF ) begin
  6019. if ( (bits64) ( bSig shl 1 ) ) result := propagateFloatx80NaN( a, b );
  6020. result := packFloatx80( zSign xor 1, $7FFF, LIT64( $8000000000000000 ) );
  6021. end;
  6022. if ( aExp = 0 ) ++expDiff;
  6023. shift128RightJamming( aSig, 0, - expDiff, aSig, zSig1 );
  6024. bBigger:
  6025. sub128( bSig, 0, aSig, zSig1, zSig0, zSig1 );
  6026. zExp := bExp;
  6027. zSign xor = 1;
  6028. goto normalizeRoundAndPack;
  6029. aExpBigger:
  6030. if ( aExp = $7FFF ) begin
  6031. if ( (bits64) ( aSig shl 1 ) ) result := propagateFloatx80NaN( a, b );
  6032. result := a;
  6033. end;
  6034. if ( bExp = 0 ) --expDiff;
  6035. shift128RightJamming( bSig, 0, expDiff, bSig, zSig1 );
  6036. aBigger:
  6037. sub128( aSig, 0, bSig, zSig1, zSig0, zSig1 );
  6038. zExp := aExp;
  6039. normalizeRoundAndPack:
  6040. result :=
  6041. normalizeRoundAndPackFloatx80(
  6042. floatx80_rounding_precision, zSign, zExp, zSig0, zSig1 );
  6043. end;
  6044. {*----------------------------------------------------------------------------
  6045. | Returns the result of adding the extended double-precision floating-point
  6046. | values `a' and `b'. The operation is performed according to the IEC/IEEE
  6047. | Standard for Binary Floating-Point Arithmetic.
  6048. *----------------------------------------------------------------------------*}
  6049. function floatx80_add(a: floatx80; b: floatx80): floatx80;
  6050. var
  6051. aSign, bSign: flag;
  6052. begin
  6053. aSign := extractFloatx80Sign( a );
  6054. bSign := extractFloatx80Sign( b );
  6055. if ( aSign = bSign ) begin
  6056. result := addFloatx80Sigs( a, b, aSign );
  6057. end;
  6058. else begin
  6059. result := subFloatx80Sigs( a, b, aSign );
  6060. end;
  6061. end;
  6062. {*----------------------------------------------------------------------------
  6063. | Returns the result of subtracting the extended double-precision floating-
  6064. | point values `a' and `b'. The operation is performed according to the
  6065. | IEC/IEEE Standard for Binary Floating-Point Arithmetic.
  6066. *----------------------------------------------------------------------------*}
  6067. function floatx80_sub(a: floatx80; b: floatx80 ): floatx80;
  6068. var
  6069. aSign, bSign: flag;
  6070. begin
  6071. aSign := extractFloatx80Sign( a );
  6072. bSign := extractFloatx80Sign( b );
  6073. if ( aSign = bSign ) begin
  6074. result := subFloatx80Sigs( a, b, aSign );
  6075. end;
  6076. else begin
  6077. result := addFloatx80Sigs( a, b, aSign );
  6078. end;
  6079. end;
  6080. {*----------------------------------------------------------------------------
  6081. | Returns the result of multiplying the extended double-precision floating-
  6082. | point values `a' and `b'. The operation is performed according to the
  6083. | IEC/IEEE Standard for Binary Floating-Point Arithmetic.
  6084. *----------------------------------------------------------------------------*}
  6085. function floatx80_mul(a: floatx80; b: floatx80): floatx80;
  6086. var
  6087. aSign, bSign, zSign: flag;
  6088. aExp, bExp, zExp: int32;
  6089. aSig, bSig, zSig0, zSig1: bits64;
  6090. z: floatx80;
  6091. begin
  6092. aSig := extractFloatx80Frac( a );
  6093. aExp := extractFloatx80Exp( a );
  6094. aSign := extractFloatx80Sign( a );
  6095. bSig := extractFloatx80Frac( b );
  6096. bExp := extractFloatx80Exp( b );
  6097. bSign := extractFloatx80Sign( b );
  6098. zSign := aSign xor bSign;
  6099. if ( aExp = $7FFF ) begin
  6100. if ( (bits64) ( aSig shl 1 )
  6101. or ( ( bExp = $7FFF ) and (bits64) ( bSig shl 1 ) ) ) begin
  6102. result := propagateFloatx80NaN( a, b );
  6103. end;
  6104. if ( ( bExp or bSig ) = 0 ) goto invalid;
  6105. result := packFloatx80( zSign, $7FFF, LIT64( $8000000000000000 ) );
  6106. end;
  6107. if ( bExp = $7FFF ) begin
  6108. if ( (bits64) ( bSig shl 1 ) ) result := propagateFloatx80NaN( a, b );
  6109. if ( ( aExp or aSig ) = 0 ) begin
  6110. invalid:
  6111. float_raise( float_flag_invalid );
  6112. z.low := floatx80_default_nan_low;
  6113. z.high := floatx80_default_nan_high;
  6114. result := z;
  6115. end;
  6116. result := packFloatx80( zSign, $7FFF, LIT64( $8000000000000000 ) );
  6117. end;
  6118. if ( aExp = 0 ) begin
  6119. if ( aSig = 0 ) result := packFloatx80( zSign, 0, 0 );
  6120. normalizeFloatx80Subnormal( aSig, aExp, aSig );
  6121. end;
  6122. if ( bExp = 0 ) begin
  6123. if ( bSig = 0 ) result := packFloatx80( zSign, 0, 0 );
  6124. normalizeFloatx80Subnormal( bSig, bExp, bSig );
  6125. end;
  6126. zExp := aExp + bExp - $3FFE;
  6127. mul64To128( aSig, bSig, zSig0, zSig1 );
  6128. if ( 0 < (sbits64) zSig0 ) begin
  6129. shortShift128Left( zSig0, zSig1, 1, zSig0, zSig1 );
  6130. --zExp;
  6131. end;
  6132. result :=
  6133. roundAndPackFloatx80(
  6134. floatx80_rounding_precision, zSign, zExp, zSig0, zSig1 );
  6135. end;
  6136. {*----------------------------------------------------------------------------
  6137. | Returns the result of dividing the extended double-precision floating-point
  6138. | value `a' by the corresponding value `b'. The operation is performed
  6139. | according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
  6140. *----------------------------------------------------------------------------*}
  6141. function floatx80_div(a: floatx80; b: floatx80 ): floatx80;
  6142. var
  6143. aSign, bSign, zSign: flag;
  6144. aExp, bExp, zExp: int32;
  6145. aSig, bSig, zSig0, zSig1: bits64;
  6146. rem0, rem1, rem2, term0, term1, term2: bits64;
  6147. z: floatx80;
  6148. begin
  6149. aSig := extractFloatx80Frac( a );
  6150. aExp := extractFloatx80Exp( a );
  6151. aSign := extractFloatx80Sign( a );
  6152. bSig := extractFloatx80Frac( b );
  6153. bExp := extractFloatx80Exp( b );
  6154. bSign := extractFloatx80Sign( b );
  6155. zSign := aSign xor bSign;
  6156. if ( aExp = $7FFF ) begin
  6157. if ( (bits64) ( aSig shl 1 ) ) result := propagateFloatx80NaN( a, b );
  6158. if ( bExp = $7FFF ) begin
  6159. if ( (bits64) ( bSig shl 1 ) ) result := propagateFloatx80NaN( a, b );
  6160. goto invalid;
  6161. end;
  6162. result := packFloatx80( zSign, $7FFF, LIT64( $8000000000000000 ) );
  6163. end;
  6164. if ( bExp = $7FFF ) begin
  6165. if ( (bits64) ( bSig shl 1 ) ) result := propagateFloatx80NaN( a, b );
  6166. result := packFloatx80( zSign, 0, 0 );
  6167. end;
  6168. if ( bExp = 0 ) begin
  6169. if ( bSig = 0 ) begin
  6170. if ( ( aExp or aSig ) = 0 ) begin
  6171. invalid:
  6172. float_raise( float_flag_invalid );
  6173. z.low := floatx80_default_nan_low;
  6174. z.high := floatx80_default_nan_high;
  6175. result := z;
  6176. end;
  6177. float_raise( float_flag_divbyzero );
  6178. result := packFloatx80( zSign, $7FFF, LIT64( $8000000000000000 ) );
  6179. end;
  6180. normalizeFloatx80Subnormal( bSig, bExp, bSig );
  6181. end;
  6182. if ( aExp = 0 ) begin
  6183. if ( aSig = 0 ) result := packFloatx80( zSign, 0, 0 );
  6184. normalizeFloatx80Subnormal( aSig, aExp, aSig );
  6185. end;
  6186. zExp := aExp - bExp + $3FFE;
  6187. rem1 := 0;
  6188. if ( bSig <= aSig ) begin
  6189. shift128Right( aSig, 0, 1, aSig, rem1 );
  6190. ++zExp;
  6191. end;
  6192. zSig0 := estimateDiv128To64( aSig, rem1, bSig );
  6193. mul64To128( bSig, zSig0, term0, term1 );
  6194. sub128( aSig, rem1, term0, term1, rem0, rem1 );
  6195. while ( (sbits64) rem0 < 0 ) begin
  6196. --zSig0;
  6197. add128( rem0, rem1, 0, bSig, rem0, rem1 );
  6198. end;
  6199. zSig1 := estimateDiv128To64( rem1, 0, bSig );
  6200. if ( (bits64) ( zSig1 shl 1 ) <= 8 ) begin
  6201. mul64To128( bSig, zSig1, term1, term2 );
  6202. sub128( rem1, 0, term1, term2, rem1, rem2 );
  6203. while ( (sbits64) rem1 < 0 ) begin
  6204. --zSig1;
  6205. add128( rem1, rem2, 0, bSig, rem1, rem2 );
  6206. end;
  6207. zSig1 or= ( ( rem1 or rem2 ) <> 0 );
  6208. end;
  6209. result :=
  6210. roundAndPackFloatx80(
  6211. floatx80_rounding_precision, zSign, zExp, zSig0, zSig1 );
  6212. end;
  6213. {*----------------------------------------------------------------------------
  6214. | Returns the remainder of the extended double-precision floating-point value
  6215. | `a' with respect to the corresponding value `b'. The operation is performed
  6216. | according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
  6217. *----------------------------------------------------------------------------*}
  6218. function floatx80_rem(a: floatx80; b: floatx80 ): floatx80;
  6219. var
  6220. aSign, bSign, zSign: flag;
  6221. aExp, bExp, expDiff: int32;
  6222. aSig0, aSig1, bSig: bits64;
  6223. q, term0, term1, alternateASig0, alternateASig1: bits64;
  6224. z: floatx80;
  6225. begin
  6226. aSig0 := extractFloatx80Frac( a );
  6227. aExp := extractFloatx80Exp( a );
  6228. aSign := extractFloatx80Sign( a );
  6229. bSig := extractFloatx80Frac( b );
  6230. bExp := extractFloatx80Exp( b );
  6231. bSign := extractFloatx80Sign( b );
  6232. if ( aExp = $7FFF ) begin
  6233. if ( (bits64) ( aSig0 shl 1 )
  6234. or ( ( bExp = $7FFF ) and (bits64) ( bSig shl 1 ) ) ) begin
  6235. result := propagateFloatx80NaN( a, b );
  6236. end;
  6237. goto invalid;
  6238. end;
  6239. if ( bExp = $7FFF ) begin
  6240. if ( (bits64) ( bSig shl 1 ) ) result := propagateFloatx80NaN( a, b );
  6241. result := a;
  6242. end;
  6243. if ( bExp = 0 ) begin
  6244. if ( bSig = 0 ) begin
  6245. invalid:
  6246. float_raise( float_flag_invalid );
  6247. z.low := floatx80_default_nan_low;
  6248. z.high := floatx80_default_nan_high;
  6249. result := z;
  6250. end;
  6251. normalizeFloatx80Subnormal( bSig, bExp, bSig );
  6252. end;
  6253. if ( aExp = 0 ) begin
  6254. if ( (bits64) ( aSig0 shl 1 ) = 0 ) result := a;
  6255. normalizeFloatx80Subnormal( aSig0, aExp, aSig0 );
  6256. end;
  6257. bSig or= LIT64( $8000000000000000 );
  6258. zSign := aSign;
  6259. expDiff := aExp - bExp;
  6260. aSig1 := 0;
  6261. if ( expDiff < 0 ) begin
  6262. if ( expDiff < -1 ) result := a;
  6263. shift128Right( aSig0, 0, 1, aSig0, aSig1 );
  6264. expDiff := 0;
  6265. end;
  6266. q := ( bSig <= aSig0 );
  6267. if ( q ) aSig0 -= bSig;
  6268. expDiff -= 64;
  6269. while ( 0 < expDiff ) begin
  6270. q := estimateDiv128To64( aSig0, aSig1, bSig );
  6271. q := ( 2 < q ) ? q - 2 : 0;
  6272. mul64To128( bSig, q, term0, term1 );
  6273. sub128( aSig0, aSig1, term0, term1, aSig0, aSig1 );
  6274. shortShift128Left( aSig0, aSig1, 62, aSig0, aSig1 );
  6275. expDiff -= 62;
  6276. end;
  6277. expDiff += 64;
  6278. if ( 0 < expDiff ) begin
  6279. q := estimateDiv128To64( aSig0, aSig1, bSig );
  6280. q := ( 2 < q ) ? q - 2 : 0;
  6281. q >>= 64 - expDiff;
  6282. mul64To128( bSig, q shl ( 64 - expDiff ), term0, term1 );
  6283. sub128( aSig0, aSig1, term0, term1, aSig0, aSig1 );
  6284. shortShift128Left( 0, bSig, 64 - expDiff, term0, term1 );
  6285. while ( le128( term0, term1, aSig0, aSig1 ) ) begin
  6286. ++q;
  6287. sub128( aSig0, aSig1, term0, term1, aSig0, aSig1 );
  6288. end;
  6289. end;
  6290. else begin
  6291. term1 := 0;
  6292. term0 := bSig;
  6293. end;
  6294. sub128( term0, term1, aSig0, aSig1, alternateASig0, alternateASig1 );
  6295. if ( lt128( alternateASig0, alternateASig1, aSig0, aSig1 )
  6296. or ( eq128( alternateASig0, alternateASig1, aSig0, aSig1 )
  6297. and ( q and 1 ) )
  6298. ) begin
  6299. aSig0 := alternateASig0;
  6300. aSig1 := alternateASig1;
  6301. zSign := ! zSign;
  6302. end;
  6303. result :=
  6304. normalizeRoundAndPackFloatx80(
  6305. 80, zSign, bExp + expDiff, aSig0, aSig1 );
  6306. end;
  6307. {*----------------------------------------------------------------------------
  6308. | Returns the square root of the extended double-precision floating-point
  6309. | value `a'. The operation is performed according to the IEC/IEEE Standard
  6310. | for Binary Floating-Point Arithmetic.
  6311. *----------------------------------------------------------------------------*}
  6312. function floatx80_sqrt(a: floatx80): floatx80;
  6313. var
  6314. aSign: flag;
  6315. aExp, zExp: int32;
  6316. aSig0, aSig1, zSig0, zSig1, doubleZSig0: bits64;
  6317. rem0, rem1, rem2, rem3, term0, term1, term2, term3: bits64;
  6318. z: floatx80;
  6319. label
  6320. invalid;
  6321. begin
  6322. aSig0 := extractFloatx80Frac( a );
  6323. aExp := extractFloatx80Exp( a );
  6324. aSign := extractFloatx80Sign( a );
  6325. if ( aExp = $7FFF ) begin
  6326. if ( (bits64) ( aSig0 shl 1 ) ) result := propagateFloatx80NaN( a, a );
  6327. if ( ! aSign ) result := a;
  6328. goto invalid;
  6329. end;
  6330. if ( aSign ) begin
  6331. if ( ( aExp or aSig0 ) = 0 ) result := a;
  6332. invalid:
  6333. float_raise( float_flag_invalid );
  6334. z.low := floatx80_default_nan_low;
  6335. z.high := floatx80_default_nan_high;
  6336. result := z;
  6337. end;
  6338. if ( aExp = 0 ) begin
  6339. if ( aSig0 = 0 ) result := packFloatx80( 0, 0, 0 );
  6340. normalizeFloatx80Subnormal( aSig0, aExp, aSig0 );
  6341. end;
  6342. zExp := ( ( aExp - $3FFF )>>1 ) + $3FFF;
  6343. zSig0 := estimateSqrt32( aExp, aSig0>>32 );
  6344. shift128Right( aSig0, 0, 2 + ( aExp and 1 ), aSig0, aSig1 );
  6345. zSig0 := estimateDiv128To64( aSig0, aSig1, zSig0 shl 32 ) + ( zSig0 shl 30 );
  6346. doubleZSig0 := zSig0 shl 1;
  6347. mul64To128( zSig0, zSig0, term0, term1 );
  6348. sub128( aSig0, aSig1, term0, term1, rem0, rem1 );
  6349. while ( (sbits64) rem0 < 0 ) begin
  6350. --zSig0;
  6351. doubleZSig0 -= 2;
  6352. add128( rem0, rem1, zSig0>>63, doubleZSig0 or 1, rem0, rem1 );
  6353. end;
  6354. zSig1 := estimateDiv128To64( rem1, 0, doubleZSig0 );
  6355. if ( ( zSig1 and LIT64( $3FFFFFFFFFFFFFFF ) ) <= 5 ) begin
  6356. if ( zSig1 = 0 ) zSig1 := 1;
  6357. mul64To128( doubleZSig0, zSig1, term1, term2 );
  6358. sub128( rem1, 0, term1, term2, rem1, rem2 );
  6359. mul64To128( zSig1, zSig1, term2, term3 );
  6360. sub192( rem1, rem2, 0, 0, term2, term3, rem1, rem2, rem3 );
  6361. while ( (sbits64) rem1 < 0 ) begin
  6362. --zSig1;
  6363. shortShift128Left( 0, zSig1, 1, term2, term3 );
  6364. term3 or= 1;
  6365. term2 or= doubleZSig0;
  6366. add192( rem1, rem2, rem3, 0, term2, term3, rem1, rem2, rem3 );
  6367. end;
  6368. zSig1 or= ( ( rem1 or rem2 or rem3 ) <> 0 );
  6369. end;
  6370. shortShift128Left( 0, zSig1, 1, zSig0, zSig1 );
  6371. zSig0 or= doubleZSig0;
  6372. result :=
  6373. roundAndPackFloatx80(
  6374. floatx80_rounding_precision, 0, zExp, zSig0, zSig1 );
  6375. end;
  6376. {*----------------------------------------------------------------------------
  6377. | Returns 1 if the extended double-precision floating-point value `a' is
  6378. | equal to the corresponding value `b', and 0 otherwise. The comparison is
  6379. | performed according to the IEC/IEEE Standard for Binary Floating-Point
  6380. | Arithmetic.
  6381. *----------------------------------------------------------------------------*}
  6382. function floatx80_eq(a: floatx80; b: floatx80 ): flag;
  6383. begin
  6384. if ( ( ( extractFloatx80Exp( a ) = $7FFF )
  6385. and (bits64) ( extractFloatx80Frac( a ) shl 1 ) )
  6386. or ( ( extractFloatx80Exp( b ) = $7FFF )
  6387. and (bits64) ( extractFloatx80Frac( b ) shl 1 ) )
  6388. ) begin
  6389. if ( floatx80_is_signaling_nan( a )
  6390. or floatx80_is_signaling_nan( b ) ) begin
  6391. float_raise( float_flag_invalid );
  6392. end;
  6393. result := 0;
  6394. end;
  6395. result :=
  6396. ( a.low = b.low )
  6397. and ( ( a.high = b.high )
  6398. or ( ( a.low = 0 )
  6399. and ( (bits16) ( ( a.high or b.high ) shl 1 ) = 0 ) )
  6400. );
  6401. end;
  6402. {*----------------------------------------------------------------------------
  6403. | Returns 1 if the extended double-precision floating-point value `a' is
  6404. | less than or equal to the corresponding value `b', and 0 otherwise. The
  6405. | comparison is performed according to the IEC/IEEE Standard for Binary
  6406. | Floating-Point Arithmetic.
  6407. *----------------------------------------------------------------------------*}
  6408. function floatx80_le(a: floatx80; b: floatx80 ): flag;
  6409. var
  6410. aSign, bSign: flag;
  6411. begin
  6412. if ( ( ( extractFloatx80Exp( a ) = $7FFF )
  6413. and (bits64) ( extractFloatx80Frac( a ) shl 1 ) )
  6414. or ( ( extractFloatx80Exp( b ) = $7FFF )
  6415. and (bits64) ( extractFloatx80Frac( b ) shl 1 ) )
  6416. ) begin
  6417. float_raise( float_flag_invalid );
  6418. result := 0;
  6419. end;
  6420. aSign := extractFloatx80Sign( a );
  6421. bSign := extractFloatx80Sign( b );
  6422. if ( aSign <> bSign ) begin
  6423. result :=
  6424. aSign
  6425. or ( ( ( (bits16) ( ( a.high or b.high ) shl 1 ) ) or a.low or b.low )
  6426. = 0 );
  6427. end;
  6428. result :=
  6429. aSign ? le128( b.high, b.low, a.high, a.low )
  6430. : le128( a.high, a.low, b.high, b.low );
  6431. end;
  6432. {*----------------------------------------------------------------------------
  6433. | Returns 1 if the extended double-precision floating-point value `a' is
  6434. | less than the corresponding value `b', and 0 otherwise. The comparison
  6435. | is performed according to the IEC/IEEE Standard for Binary Floating-Point
  6436. | Arithmetic.
  6437. *----------------------------------------------------------------------------*}
  6438. function floatx80_lt(a: floatx80; b: floatx80 ): flag;
  6439. var
  6440. aSign, bSign: flag;
  6441. begin
  6442. if ( ( ( extractFloatx80Exp( a ) = $7FFF )
  6443. and (bits64) ( extractFloatx80Frac( a ) shl 1 ) )
  6444. or ( ( extractFloatx80Exp( b ) = $7FFF )
  6445. and (bits64) ( extractFloatx80Frac( b ) shl 1 ) )
  6446. ) begin
  6447. float_raise( float_flag_invalid );
  6448. result := 0;
  6449. end;
  6450. aSign := extractFloatx80Sign( a );
  6451. bSign := extractFloatx80Sign( b );
  6452. if ( aSign <> bSign ) begin
  6453. result :=
  6454. aSign
  6455. and ( ( ( (bits16) ( ( a.high or b.high ) shl 1 ) ) or a.low or b.low )
  6456. <> 0 );
  6457. end;
  6458. result :=
  6459. aSign ? lt128( b.high, b.low, a.high, a.low )
  6460. : lt128( a.high, a.low, b.high, b.low );
  6461. end;
  6462. {*----------------------------------------------------------------------------
  6463. | Returns 1 if the extended double-precision floating-point value `a' is equal
  6464. | to the corresponding value `b', and 0 otherwise. The invalid exception is
  6465. | raised if either operand is a NaN. Otherwise, the comparison is performed
  6466. | according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
  6467. *----------------------------------------------------------------------------*}
  6468. function floatx80_eq_signaling(a: floatx80; b: floatx80 ): flag;
  6469. begin
  6470. if ( ( ( extractFloatx80Exp( a ) = $7FFF )
  6471. and (bits64) ( extractFloatx80Frac( a ) shl 1 ) )
  6472. or ( ( extractFloatx80Exp( b ) = $7FFF )
  6473. and (bits64) ( extractFloatx80Frac( b ) shl 1 ) )
  6474. ) begin
  6475. float_raise( float_flag_invalid );
  6476. result := 0;
  6477. end;
  6478. result :=
  6479. ( a.low = b.low )
  6480. and ( ( a.high = b.high )
  6481. or ( ( a.low = 0 )
  6482. and ( (bits16) ( ( a.high or b.high ) shl 1 ) = 0 ) )
  6483. );
  6484. end;
  6485. {*----------------------------------------------------------------------------
  6486. | Returns 1 if the extended double-precision floating-point value `a' is less
  6487. | than or equal to the corresponding value `b', and 0 otherwise. Quiet NaNs
  6488. | do not cause an exception. Otherwise, the comparison is performed according
  6489. | to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
  6490. *----------------------------------------------------------------------------*}
  6491. function floatx80_le_quiet(a: floatx80; b: floatx80 ): flag;
  6492. var
  6493. aSign, bSign: flag;
  6494. begin
  6495. if ( ( ( extractFloatx80Exp( a ) = $7FFF )
  6496. and (bits64) ( extractFloatx80Frac( a ) shl 1 ) )
  6497. or ( ( extractFloatx80Exp( b ) = $7FFF )
  6498. and (bits64) ( extractFloatx80Frac( b ) shl 1 ) )
  6499. ) begin
  6500. if ( floatx80_is_signaling_nan( a )
  6501. or floatx80_is_signaling_nan( b ) ) begin
  6502. float_raise( float_flag_invalid );
  6503. end;
  6504. result := 0;
  6505. end;
  6506. aSign := extractFloatx80Sign( a );
  6507. bSign := extractFloatx80Sign( b );
  6508. if ( aSign <> bSign ) begin
  6509. result :=
  6510. aSign
  6511. or ( ( ( (bits16) ( ( a.high or b.high ) shl 1 ) ) or a.low or b.low )
  6512. = 0 );
  6513. end;
  6514. result :=
  6515. aSign ? le128( b.high, b.low, a.high, a.low )
  6516. : le128( a.high, a.low, b.high, b.low );
  6517. end;
  6518. {*----------------------------------------------------------------------------
  6519. | Returns 1 if the extended double-precision floating-point value `a' is less
  6520. | than the corresponding value `b', and 0 otherwise. Quiet NaNs do not cause
  6521. | an exception. Otherwise, the comparison is performed according to the
  6522. | IEC/IEEE Standard for Binary Floating-Point Arithmetic.
  6523. *----------------------------------------------------------------------------*}
  6524. function floatx80_lt_quiet(a: floatx80; b: floatx80 ): flag;
  6525. var
  6526. aSign, bSign: flag;
  6527. begin
  6528. if ( ( ( extractFloatx80Exp( a ) = $7FFF )
  6529. and (bits64) ( extractFloatx80Frac( a ) shl 1 ) )
  6530. or ( ( extractFloatx80Exp( b ) = $7FFF )
  6531. and (bits64) ( extractFloatx80Frac( b ) shl 1 ) )
  6532. ) begin
  6533. if ( floatx80_is_signaling_nan( a )
  6534. or floatx80_is_signaling_nan( b ) ) begin
  6535. float_raise( float_flag_invalid );
  6536. end;
  6537. result := 0;
  6538. end;
  6539. aSign := extractFloatx80Sign( a );
  6540. bSign := extractFloatx80Sign( b );
  6541. if ( aSign <> bSign ) begin
  6542. result :=
  6543. aSign
  6544. and ( ( ( (bits16) ( ( a.high or b.high ) shl 1 ) ) or a.low or b.low )
  6545. <> 0 );
  6546. end;
  6547. result :=
  6548. aSign ? lt128( b.high, b.low, a.high, a.low )
  6549. : lt128( a.high, a.low, b.high, b.low );
  6550. end;
  6551. {$endif FPC_SOFTFLOAT_FLOATX80}
  6552. {$ifdef FPC_SOFTFLOAT_FLOAT128}
  6553. {*----------------------------------------------------------------------------
  6554. | Returns the least-significant 64 fraction bits of the quadruple-precision
  6555. | floating-point value `a'.
  6556. *----------------------------------------------------------------------------*}
  6557. function extractFloat128Frac1(a : float128): bits64;
  6558. begin
  6559. result:=a.low;
  6560. end;
  6561. {*----------------------------------------------------------------------------
  6562. | Returns the most-significant 48 fraction bits of the quadruple-precision
  6563. | floating-point value `a'.
  6564. *----------------------------------------------------------------------------*}
  6565. function extractFloat128Frac0(a : float128): bits64;
  6566. begin
  6567. result:=a.high and int64($0000FFFFFFFFFFFF);
  6568. end;
  6569. {*----------------------------------------------------------------------------
  6570. | Returns the exponent bits of the quadruple-precision floating-point value
  6571. | `a'.
  6572. *----------------------------------------------------------------------------*}
  6573. function extractFloat128Exp(a : float128): int32;
  6574. begin
  6575. result:=( a.high shr 48 ) and $7FFF;
  6576. end;
  6577. {*----------------------------------------------------------------------------
  6578. | Returns the sign bit of the quadruple-precision floating-point value `a'.
  6579. *----------------------------------------------------------------------------*}
  6580. function extractFloat128Sign(a : float128): flag;
  6581. begin
  6582. result:=a.high shr 63;
  6583. end;
  6584. {*----------------------------------------------------------------------------
  6585. | Normalizes the subnormal quadruple-precision floating-point value
  6586. | represented by the denormalized significand formed by the concatenation of
  6587. | `aSig0' and `aSig1'. The normalized exponent is stored at the location
  6588. | pointed to by `zExpPtr'. The most significant 49 bits of the normalized
  6589. | significand are stored at the location pointed to by `zSig0Ptr', and the
  6590. | least significant 64 bits of the normalized significand are stored at the
  6591. | location pointed to by `zSig1Ptr'.
  6592. *----------------------------------------------------------------------------*}
  6593. procedure normalizeFloat128Subnormal(
  6594. aSig0: bits64;
  6595. aSig1: bits64;
  6596. var zExpPtr: int32;
  6597. var zSig0Ptr: bits64;
  6598. var zSig1Ptr: bits64);
  6599. var
  6600. shiftCount: int8;
  6601. begin
  6602. if ( aSig0 = 0 ) then
  6603. begin
  6604. shiftCount := countLeadingZeros64( aSig1 ) - 15;
  6605. if ( shiftCount < 0 ) then
  6606. begin
  6607. zSig0Ptr := aSig1 shr ( - shiftCount );
  6608. zSig1Ptr := aSig1 shl ( shiftCount and 63 );
  6609. end
  6610. else begin
  6611. zSig0Ptr := aSig1 shl shiftCount;
  6612. zSig1Ptr := 0;
  6613. end;
  6614. zExpPtr := - shiftCount - 63;
  6615. end
  6616. else begin
  6617. shiftCount := countLeadingZeros64( aSig0 ) - 15;
  6618. shortShift128Left( aSig0, aSig1, shiftCount, zSig0Ptr, zSig1Ptr );
  6619. zExpPtr := 1 - shiftCount;
  6620. end;
  6621. end;
  6622. {*----------------------------------------------------------------------------
  6623. | Packs the sign `zSign', the exponent `zExp', and the significand formed
  6624. | by the concatenation of `zSig0' and `zSig1' into a quadruple-precision
  6625. | floating-point value, returning the result. After being shifted into the
  6626. | proper positions, the three fields `zSign', `zExp', and `zSig0' are simply
  6627. | added together to form the most significant 32 bits of the result. This
  6628. | means that any integer portion of `zSig0' will be added into the exponent.
  6629. | Since a properly normalized significand will have an integer portion equal
  6630. | to 1, the `zExp' input should be 1 less than the desired result exponent
  6631. | whenever `zSig0' and `zSig1' concatenated form a complete, normalized
  6632. | significand.
  6633. *----------------------------------------------------------------------------*}
  6634. function packFloat128( zSign: flag; zExp: int32; zSig0: bits64; zSig1: bits64) : float128;
  6635. var
  6636. z: float128;
  6637. begin
  6638. z.low := zSig1;
  6639. z.high := ( ( bits64(zSign) ) shl 63 ) + ( ( bits64(zExp) ) shl 48 ) + zSig0;
  6640. result:=z;
  6641. end;
  6642. {*----------------------------------------------------------------------------
  6643. | Takes an abstract floating-point value having sign `zSign', exponent `zExp',
  6644. | and extended significand formed by the concatenation of `zSig0', `zSig1',
  6645. | and `zSig2', and returns the proper quadruple-precision floating-point value
  6646. | corresponding to the abstract input. Ordinarily, the abstract value is
  6647. | simply rounded and packed into the quadruple-precision format, with the
  6648. | inexact exception raised if the abstract input cannot be represented
  6649. | exactly. However, if the abstract value is too large, the overflow and
  6650. | inexact exceptions are raised and an infinity or maximal finite value is
  6651. | returned. If the abstract value is too small, the input value is rounded to
  6652. | a subnormal number, and the underflow and inexact exceptions are raised if
  6653. | the abstract input cannot be represented exactly as a subnormal quadruple-
  6654. | precision floating-point number.
  6655. | The input significand must be normalized or smaller. If the input
  6656. | significand is not normalized, `zExp' must be 0; in that case, the result
  6657. | returned is a subnormal number, and it must not require rounding. In the
  6658. | usual case that the input significand is normalized, `zExp' must be 1 less
  6659. | than the ``true'' floating-point exponent. The handling of underflow and
  6660. | overflow follows the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
  6661. *----------------------------------------------------------------------------*}
  6662. function roundAndPackFloat128(zSign: flag; zExp: int32; zSig0: bits64; zSig1: bits64; zSig2: bits64): float128;
  6663. var
  6664. roundingMode: int8;
  6665. roundNearestEven, increment, isTiny: flag;
  6666. begin
  6667. roundingMode := softfloat_rounding_mode;
  6668. roundNearestEven := ord( roundingMode = float_round_nearest_even );
  6669. increment := ord( sbits64(zSig2) < 0 );
  6670. if ( roundNearestEven=0 ) then
  6671. begin
  6672. if ( roundingMode = float_round_to_zero ) then
  6673. begin
  6674. increment := 0;
  6675. end
  6676. else begin
  6677. if ( zSign<>0 ) then
  6678. begin
  6679. increment := ord( roundingMode = float_round_down ) and zSig2;
  6680. end
  6681. else begin
  6682. increment := ord( roundingMode = float_round_up ) and zSig2;
  6683. end;
  6684. end;
  6685. end;
  6686. if ( $7FFD <= bits32(zExp) ) then
  6687. begin
  6688. if ( ord( $7FFD < zExp )
  6689. or ( ord( zExp = $7FFD )
  6690. and eq128(
  6691. int64( $0001FFFFFFFFFFFF ),
  6692. int64( $FFFFFFFFFFFFFFFF ),
  6693. zSig0,
  6694. zSig1
  6695. )
  6696. and increment
  6697. )
  6698. )<>0 then
  6699. begin
  6700. float_raise( float_flag_overflow or float_flag_inexact );
  6701. if ( ord( roundingMode = float_round_to_zero )
  6702. or ( zSign and ord( roundingMode = float_round_up ) )
  6703. or ( not(zSign) and ord( roundingMode = float_round_down ) )
  6704. )<>0 then
  6705. begin
  6706. result :=
  6707. packFloat128(
  6708. zSign,
  6709. $7FFE,
  6710. int64( $0000FFFFFFFFFFFF ),
  6711. int64( $FFFFFFFFFFFFFFFF )
  6712. );
  6713. end;
  6714. result:=packFloat128( zSign, $7FFF, 0, 0 );
  6715. end;
  6716. if ( zExp < 0 ) then
  6717. begin
  6718. isTiny :=
  6719. ord(( float_detect_tininess = float_tininess_before_rounding )
  6720. or ( zExp < -1 )
  6721. or not( increment<>0 )
  6722. or boolean(lt128(
  6723. zSig0,
  6724. zSig1,
  6725. int64( $0001FFFFFFFFFFFF ),
  6726. int64( $FFFFFFFFFFFFFFFF )
  6727. )));
  6728. shift128ExtraRightJamming(
  6729. zSig0, zSig1, zSig2, - zExp, zSig0, zSig1, zSig2 );
  6730. zExp := 0;
  6731. if ( isTiny and zSig2 )<>0 then
  6732. float_raise( float_flag_underflow );
  6733. if ( roundNearestEven<>0 ) then
  6734. begin
  6735. increment := ord( sbits64(zSig2) < 0 );
  6736. end
  6737. else begin
  6738. if ( zSign<>0 ) then
  6739. begin
  6740. increment := ord( roundingMode = float_round_down ) and zSig2;
  6741. end
  6742. else begin
  6743. increment := ord( roundingMode = float_round_up ) and zSig2;
  6744. end;
  6745. end;
  6746. end;
  6747. end;
  6748. if ( zSig2<>0 ) then
  6749. softfloat_exception_flags := softfloat_exception_flags or float_flag_inexact;
  6750. if ( increment<>0 ) then
  6751. begin
  6752. add128( zSig0, zSig1, 0, 1, zSig0, zSig1 );
  6753. zSig1 := zSig1 and not( ord( zSig2 + zSig2 = 0 ) and roundNearestEven );
  6754. end
  6755. else begin
  6756. if ( ( zSig0 or zSig1 ) = 0 ) then
  6757. zExp := 0;
  6758. end;
  6759. result:=packFloat128( zSign, zExp, zSig0, zSig1 );
  6760. end;
  6761. {*----------------------------------------------------------------------------
  6762. | Takes an abstract floating-point value having sign `zSign', exponent `zExp',
  6763. | and significand formed by the concatenation of `zSig0' and `zSig1', and
  6764. | returns the proper quadruple-precision floating-point value corresponding
  6765. | to the abstract input. This routine is just like `roundAndPackFloat128'
  6766. | except that the input significand has fewer bits and does not have to be
  6767. | normalized. In all cases, `zExp' must be 1 less than the ``true'' floating-
  6768. | point exponent.
  6769. *----------------------------------------------------------------------------*}
  6770. function normalizeRoundAndPackFloat128(zSign: flag; zExp: int32; zSig0: bits64; zSig1: bits64): float128;
  6771. var
  6772. shiftCount: int8;
  6773. zSig2: bits64;
  6774. begin
  6775. if ( zSig0 = 0 ) then
  6776. begin
  6777. zSig0 := zSig1;
  6778. zSig1 := 0;
  6779. dec(zExp, 64);
  6780. end;
  6781. shiftCount := countLeadingZeros64( zSig0 ) - 15;
  6782. if ( 0 <= shiftCount ) then
  6783. begin
  6784. zSig2 := 0;
  6785. shortShift128Left( zSig0, zSig1, shiftCount, zSig0, zSig1 );
  6786. end
  6787. else begin
  6788. shift128ExtraRightJamming(
  6789. zSig0, zSig1, 0, - shiftCount, zSig0, zSig1, zSig2 );
  6790. end;
  6791. dec(zExp, shiftCount);
  6792. result:=roundAndPackFloat128( zSign, zExp, zSig0, zSig1, zSig2 );
  6793. end;
  6794. {*----------------------------------------------------------------------------
  6795. | Returns the result of converting the quadruple-precision floating-point
  6796. | value `a' to the 32-bit two's complement integer format. The conversion
  6797. | is performed according to the IEC/IEEE Standard for Binary Floating-Point
  6798. | Arithmetic---which means in particular that the conversion is rounded
  6799. | according to the current rounding mode. If `a' is a NaN, the largest
  6800. | positive integer is returned. Otherwise, if the conversion overflows, the
  6801. | largest integer with the same sign as `a' is returned.
  6802. *----------------------------------------------------------------------------*}
  6803. function float128_to_int32(a: float128): int32;
  6804. var
  6805. aSign: flag;
  6806. aExp, shiftCount: int32;
  6807. aSig0, aSig1: bits64;
  6808. begin
  6809. aSig1 := extractFloat128Frac1( a );
  6810. aSig0 := extractFloat128Frac0( a );
  6811. aExp := extractFloat128Exp( a );
  6812. aSign := extractFloat128Sign( a );
  6813. if ( ord( aExp = $7FFF ) and ( aSig0 or aSig1 ) )<>0 then
  6814. aSign := 0;
  6815. if ( aExp<>0 ) then
  6816. aSig0 := aSig0 or int64( $0001000000000000 );
  6817. aSig0 := aSig0 or ord( aSig1 <> 0 );
  6818. shiftCount := $4028 - aExp;
  6819. if ( 0 < shiftCount ) then
  6820. shift64RightJamming( aSig0, shiftCount, aSig0 );
  6821. result := roundAndPackInt32( aSign, aSig0 );
  6822. end;
  6823. {*----------------------------------------------------------------------------
  6824. | Returns the result of converting the quadruple-precision floating-point
  6825. | value `a' to the 32-bit two's complement integer format. The conversion
  6826. | is performed according to the IEC/IEEE Standard for Binary Floating-Point
  6827. | Arithmetic, except that the conversion is always rounded toward zero. If
  6828. | `a' is a NaN, the largest positive integer is returned. Otherwise, if the
  6829. | conversion overflows, the largest integer with the same sign as `a' is
  6830. | returned.
  6831. *----------------------------------------------------------------------------*}
  6832. function float128_to_int32_round_to_zero(a: float128): int32;
  6833. var
  6834. aSign: flag;
  6835. aExp, shiftCount: int32;
  6836. aSig0, aSig1, savedASig: bits64;
  6837. z: int32;
  6838. label
  6839. invalid;
  6840. begin
  6841. aSig1 := extractFloat128Frac1( a );
  6842. aSig0 := extractFloat128Frac0( a );
  6843. aExp := extractFloat128Exp( a );
  6844. aSign := extractFloat128Sign( a );
  6845. aSig0 := aSig0 or ord( aSig1 <> 0 );
  6846. if ( $401E < aExp ) then
  6847. begin
  6848. if ( ord( aExp = $7FFF ) and aSig0 )<>0 then
  6849. aSign := 0;
  6850. goto invalid;
  6851. end
  6852. else if ( aExp < $3FFF ) then
  6853. begin
  6854. if ( aExp or aSig0 )<>0 then
  6855. softfloat_exception_flags := softfloat_exception_flags or float_flag_inexact;
  6856. result := 0;
  6857. exit;
  6858. end;
  6859. aSig0 := aSig0 or int64( $0001000000000000 );
  6860. shiftCount := $402F - aExp;
  6861. savedASig := aSig0;
  6862. aSig0 := aSig0 shr shiftCount;
  6863. z := aSig0;
  6864. if ( aSign )<>0 then
  6865. z := - z;
  6866. if ( ord( z < 0 ) xor aSign )<>0 then
  6867. begin
  6868. invalid:
  6869. float_raise( float_flag_invalid );
  6870. if aSign<>0 then
  6871. result:=$80000000
  6872. else
  6873. result:=$7FFFFFFF;
  6874. exit;
  6875. end;
  6876. if ( ( aSig0 shl shiftCount ) <> savedASig ) then
  6877. begin
  6878. softfloat_exception_flags := softfloat_exception_flags or float_flag_inexact;
  6879. end;
  6880. result := z;
  6881. end;
  6882. {*----------------------------------------------------------------------------
  6883. | Returns the result of converting the quadruple-precision floating-point
  6884. | value `a' to the 64-bit two's complement integer format. The conversion
  6885. | is performed according to the IEC/IEEE Standard for Binary Floating-Point
  6886. | Arithmetic---which means in particular that the conversion is rounded
  6887. | according to the current rounding mode. If `a' is a NaN, the largest
  6888. | positive integer is returned. Otherwise, if the conversion overflows, the
  6889. | largest integer with the same sign as `a' is returned.
  6890. *----------------------------------------------------------------------------*}
  6891. function float128_to_int64(a: float128): int64;
  6892. var
  6893. aSign: flag;
  6894. aExp, shiftCount: int32;
  6895. aSig0, aSig1: bits64;
  6896. begin
  6897. aSig1 := extractFloat128Frac1( a );
  6898. aSig0 := extractFloat128Frac0( a );
  6899. aExp := extractFloat128Exp( a );
  6900. aSign := extractFloat128Sign( a );
  6901. if ( aExp<>0 ) then
  6902. aSig0 := aSig0 or int64( $0001000000000000 );
  6903. shiftCount := $402F - aExp;
  6904. if ( shiftCount <= 0 ) then
  6905. begin
  6906. if ( $403E < aExp ) then
  6907. begin
  6908. float_raise( float_flag_invalid );
  6909. if ( (aSign=0)
  6910. or ( ( aExp = $7FFF )
  6911. and ( (aSig1<>0) or ( aSig0 <> int64( $0001000000000000 ) ) )
  6912. )
  6913. ) then
  6914. begin
  6915. result := int64( $7FFFFFFFFFFFFFFF );
  6916. end;
  6917. result := int64( $8000000000000000 );
  6918. end;
  6919. shortShift128Left( aSig0, aSig1, - shiftCount, aSig0, aSig1 );
  6920. end
  6921. else begin
  6922. shift64ExtraRightJamming( aSig0, aSig1, shiftCount, aSig0, aSig1 );
  6923. end;
  6924. result := roundAndPackInt64( aSign, aSig0, aSig1 );
  6925. end;
  6926. {*----------------------------------------------------------------------------
  6927. | Returns the result of converting the quadruple-precision floating-point
  6928. | value `a' to the 64-bit two's complement integer format. The conversion
  6929. | is performed according to the IEC/IEEE Standard for Binary Floating-Point
  6930. | Arithmetic, except that the conversion is always rounded toward zero.
  6931. | If `a' is a NaN, the largest positive integer is returned. Otherwise, if
  6932. | the conversion overflows, the largest integer with the same sign as `a' is
  6933. | returned.
  6934. *----------------------------------------------------------------------------*}
  6935. function float128_to_int64_round_to_zero(a: float128): int64;
  6936. var
  6937. aSign: flag;
  6938. aExp, shiftCount: int32;
  6939. aSig0, aSig1: bits64;
  6940. z: int64;
  6941. begin
  6942. aSig1 := extractFloat128Frac1( a );
  6943. aSig0 := extractFloat128Frac0( a );
  6944. aExp := extractFloat128Exp( a );
  6945. aSign := extractFloat128Sign( a );
  6946. if ( aExp<>0 ) then
  6947. aSig0 := aSig0 or int64( $0001000000000000 );
  6948. shiftCount := aExp - $402F;
  6949. if ( 0 < shiftCount ) then
  6950. begin
  6951. if ( $403E <= aExp ) then
  6952. begin
  6953. aSig0 := aSig0 and int64( $0000FFFFFFFFFFFF );
  6954. if ( ( a.high = int64( $C03E000000000000 ) )
  6955. and ( aSig1 < int64( $0002000000000000 ) ) ) then
  6956. begin
  6957. if ( aSig1<>0 ) then
  6958. softfloat_exception_flags := softfloat_exception_flags or float_flag_inexact;
  6959. end
  6960. else begin
  6961. float_raise( float_flag_invalid );
  6962. if ( (aSign=0) or ( ( aExp = $7FFF ) and (( aSig0 or aSig1 )<>0) ) ) then
  6963. begin
  6964. result := int64( $7FFFFFFFFFFFFFFF );
  6965. exit;
  6966. end;
  6967. end;
  6968. result := int64( $8000000000000000 );
  6969. exit;
  6970. end;
  6971. z := ( aSig0 shl shiftCount ) or ( aSig1>>( ( - shiftCount ) and 63 ) );
  6972. if ( int64( aSig1 shl shiftCount )<>0 ) then
  6973. begin
  6974. softfloat_exception_flags := softfloat_exception_flags or float_flag_inexact;
  6975. end;
  6976. end
  6977. else begin
  6978. if ( aExp < $3FFF ) then
  6979. begin
  6980. if ( aExp or aSig0 or aSig1 )<>0 then
  6981. begin
  6982. softfloat_exception_flags := softfloat_exception_flags or float_flag_inexact;
  6983. end;
  6984. result := 0;
  6985. exit;
  6986. end;
  6987. z := aSig0 shr ( - shiftCount );
  6988. if ( (aSig1<>0)
  6989. or ( (shiftCount<>0) and (int64( aSig0 shl ( shiftCount and 63 ) )<>0) ) ) then
  6990. begin
  6991. softfloat_exception_flags := softfloat_exception_flags or float_flag_inexact;
  6992. end;
  6993. end;
  6994. if ( aSign<>0 ) then
  6995. z := - z;
  6996. result := z;
  6997. end;
  6998. {*----------------------------------------------------------------------------
  6999. | Returns the result of converting the quadruple-precision floating-point
  7000. | value `a' to the single-precision floating-point format. The conversion
  7001. | is performed according to the IEC/IEEE Standard for Binary Floating-Point
  7002. | Arithmetic.
  7003. *----------------------------------------------------------------------------*}
  7004. function float128_to_float32(a: float128): float32;
  7005. var
  7006. aSign: flag;
  7007. aExp: int32;
  7008. aSig0, aSig1: bits64;
  7009. zSig: bits32;
  7010. begin
  7011. aSig1 := extractFloat128Frac1( a );
  7012. aSig0 := extractFloat128Frac0( a );
  7013. aExp := extractFloat128Exp( a );
  7014. aSign := extractFloat128Sign( a );
  7015. if ( aExp = $7FFF ) then
  7016. begin
  7017. if ( aSig0 or aSig1 )<>0 then
  7018. begin
  7019. result := commonNaNToFloat32( float128ToCommonNaN( a ) );
  7020. exit;
  7021. end;
  7022. result := packFloat32( aSign, $FF, 0 );
  7023. exit;
  7024. end;
  7025. aSig0 := aSig0 or ord( aSig1 <> 0 );
  7026. shift64RightJamming( aSig0, 18, aSig0 );
  7027. zSig := aSig0;
  7028. if ( aExp or zSig )<>0 then
  7029. begin
  7030. zSig := zSig or $40000000;
  7031. dec(aExp,$3F81);
  7032. end;
  7033. result := roundAndPackFloat32( aSign, aExp, zSig );
  7034. end;
  7035. {*----------------------------------------------------------------------------
  7036. | Returns the result of converting the quadruple-precision floating-point
  7037. | value `a' to the double-precision floating-point format. The conversion
  7038. | is performed according to the IEC/IEEE Standard for Binary Floating-Point
  7039. | Arithmetic.
  7040. *----------------------------------------------------------------------------*}
  7041. function float128_to_float64(a: float128): float64;
  7042. var
  7043. aSign: flag;
  7044. aExp: int32;
  7045. aSig0, aSig1: bits64;
  7046. begin
  7047. aSig1 := extractFloat128Frac1( a );
  7048. aSig0 := extractFloat128Frac0( a );
  7049. aExp := extractFloat128Exp( a );
  7050. aSign := extractFloat128Sign( a );
  7051. if ( aExp = $7FFF ) then
  7052. begin
  7053. if ( aSig0 or aSig1 )<>0 then
  7054. begin
  7055. commonNaNToFloat64( float128ToCommonNaN( a ),result);
  7056. exit;
  7057. end;
  7058. result:=packFloat64( aSign, $7FF, 0);
  7059. exit;
  7060. end;
  7061. shortShift128Left( aSig0, aSig1, 14, aSig0, aSig1 );
  7062. aSig0 := aSig0 or ord( aSig1 <> 0 );
  7063. if ( aExp or aSig0 )<>0 then
  7064. begin
  7065. aSig0 := aSig0 or int64( $4000000000000000 );
  7066. dec(aExp,$3C01);
  7067. end;
  7068. result := roundAndPackFloat64( aSign, aExp, aSig0 );
  7069. end;
  7070. {$ifdef FPC_SOFTFLOAT_FLOATX80}
  7071. {*----------------------------------------------------------------------------
  7072. | Returns the result of converting the quadruple-precision floating-point
  7073. | value `a' to the extended double-precision floating-point format. The
  7074. | conversion is performed according to the IEC/IEEE Standard for Binary
  7075. | Floating-Point Arithmetic.
  7076. *----------------------------------------------------------------------------*}
  7077. function float128_to_floatx80(a: float128): floatx80;
  7078. var
  7079. aSign: flag;
  7080. aExp: int32;
  7081. aSig0, aSig1: bits64;
  7082. begin
  7083. aSig1 := extractFloat128Frac1( a );
  7084. aSig0 := extractFloat128Frac0( a );
  7085. aExp := extractFloat128Exp( a );
  7086. aSign := extractFloat128Sign( a );
  7087. if ( aExp = $7FFF ) begin
  7088. if ( aSig0 or aSig1 ) begin
  7089. result := commonNaNToFloatx80( float128ToCommonNaN( a ) );
  7090. exit;
  7091. end;
  7092. result := packFloatx80( aSign, $7FFF, int64( $8000000000000000 ) );
  7093. exit;
  7094. end;
  7095. if ( aExp = 0 ) begin
  7096. if ( ( aSig0 or aSig1 ) = 0 ) then
  7097. begin
  7098. result := packFloatx80( aSign, 0, 0 );
  7099. exit;
  7100. end;
  7101. normalizeFloat128Subnormal( aSig0, aSig1, aExp, aSig0, aSig1 );
  7102. end;
  7103. else begin
  7104. aSig0 or= int64( $0001000000000000 );
  7105. end;
  7106. shortShift128Left( aSig0, aSig1, 15, aSig0, aSig1 );
  7107. result := roundAndPackFloatx80( 80, aSign, aExp, aSig0, aSig1 );
  7108. end;
  7109. {$endif FPC_SOFTFLOAT_FLOATX80}
  7110. {*----------------------------------------------------------------------------
  7111. | Rounds the quadruple-precision floating-point value `a' to an integer, and
  7112. | Returns the result as a quadruple-precision floating-point value. The
  7113. | operation is performed according to the IEC/IEEE Standard for Binary
  7114. | Floating-Point Arithmetic.
  7115. *----------------------------------------------------------------------------*}
  7116. function float128_round_to_int(a: float128): float128;
  7117. var
  7118. aSign: flag;
  7119. aExp: int32;
  7120. lastBitMask, roundBitsMask: bits64;
  7121. roundingMode: int8;
  7122. z: float128;
  7123. begin
  7124. aExp := extractFloat128Exp( a );
  7125. if ( $402F <= aExp ) then
  7126. begin
  7127. if ( $406F <= aExp ) then
  7128. begin
  7129. if ( ( aExp = $7FFF )
  7130. and (( extractFloat128Frac0( a ) or extractFloat128Frac1( a ) )<>0)
  7131. ) then
  7132. begin
  7133. result := propagateFloat128NaN( a, a );
  7134. exit;
  7135. end;
  7136. result := a;
  7137. exit;
  7138. end;
  7139. lastBitMask := 1;
  7140. lastBitMask := ( lastBitMask shl ( $406E - aExp ) ) shl 1;
  7141. roundBitsMask := lastBitMask - 1;
  7142. z := a;
  7143. roundingMode := softfloat_rounding_mode;
  7144. if ( roundingMode = float_round_nearest_even ) then
  7145. begin
  7146. if ( lastBitMask )<>0 then
  7147. begin
  7148. add128( z.high, z.low, 0, lastBitMask shr 1, z.high, z.low );
  7149. if ( ( z.low and roundBitsMask ) = 0 ) then
  7150. z.low := z.low and not(lastBitMask);
  7151. end
  7152. else begin
  7153. if ( sbits64(z.low) < 0 ) then
  7154. begin
  7155. inc(z.high);
  7156. if ( bits64( z.low shl 1 ) = 0 ) then
  7157. z.high := z.high and not(1);
  7158. end;
  7159. end;
  7160. end
  7161. else if ( roundingMode <> float_round_to_zero ) then
  7162. begin
  7163. if ( extractFloat128Sign( z )
  7164. xor ord( roundingMode = float_round_up ) )<>0 then
  7165. begin
  7166. add128( z.high, z.low, 0, roundBitsMask, z.high, z.low );
  7167. end;
  7168. end;
  7169. z.low := z.low and not(roundBitsMask);
  7170. end
  7171. else begin
  7172. if ( aExp < $3FFF ) then
  7173. begin
  7174. if ( ( ( bits64( a.high shl 1 ) ) or a.low ) = 0 ) then
  7175. begin
  7176. result := a;
  7177. exit;
  7178. end;
  7179. softfloat_exception_flags := softfloat_exception_flags or float_flag_inexact;
  7180. aSign := extractFloat128Sign( a );
  7181. case softfloat_rounding_mode of
  7182. float_round_nearest_even:
  7183. if ( ( aExp = $3FFE )
  7184. and ( (extractFloat128Frac0( a )<>0)
  7185. or (extractFloat128Frac1( a )<>0) )
  7186. ) then begin
  7187. begin
  7188. result := packFloat128( aSign, $3FFF, 0, 0 );
  7189. exit;
  7190. end;
  7191. end;
  7192. float_round_down:
  7193. begin
  7194. if aSign<>0 then
  7195. result:=packFloat128( 1, $3FFF, 0, 0 )
  7196. else
  7197. result:=packFloat128( 0, 0, 0, 0 );
  7198. exit;
  7199. end;
  7200. float_round_up:
  7201. begin
  7202. if aSign<>0 then
  7203. result := packFloat128( 1, 0, 0, 0 )
  7204. else
  7205. result:=packFloat128( 0, $3FFF, 0, 0 );
  7206. exit;
  7207. end;
  7208. end;
  7209. result := packFloat128( aSign, 0, 0, 0 );
  7210. exit;
  7211. end;
  7212. lastBitMask := 1;
  7213. lastBitMask := lastBitMask shl ($402F - aExp);
  7214. roundBitsMask := lastBitMask - 1;
  7215. z.low := 0;
  7216. z.high := a.high;
  7217. roundingMode := softfloat_rounding_mode;
  7218. if ( roundingMode = float_round_nearest_even ) then begin
  7219. inc(z.high,lastBitMask shr 1);
  7220. if ( ( ( z.high and roundBitsMask ) or a.low ) = 0 ) then begin
  7221. z.high := z.high and not(lastBitMask);
  7222. end;
  7223. end
  7224. else if ( roundingMode <> float_round_to_zero ) then begin
  7225. if ( (extractFloat128Sign( z )<>0)
  7226. xor ( roundingMode = float_round_up ) ) then begin
  7227. z.high := z.high or ord( a.low <> 0 );
  7228. z.high := z.high+roundBitsMask;
  7229. end;
  7230. end;
  7231. z.high := z.high and not(roundBitsMask);
  7232. end;
  7233. if ( ( z.low <> a.low ) or ( z.high <> a.high ) ) then begin
  7234. softfloat_exception_flags := softfloat_exception_flags or float_flag_inexact;
  7235. end;
  7236. result := z;
  7237. end;
  7238. {*----------------------------------------------------------------------------
  7239. | Returns the result of adding the absolute values of the quadruple-precision
  7240. | floating-point values `a' and `b'. If `zSign' is 1, the sum is negated
  7241. | before being returned. `zSign' is ignored if the result is a NaN.
  7242. | The addition is performed according to the IEC/IEEE Standard for Binary
  7243. | Floating-Point Arithmetic.
  7244. *----------------------------------------------------------------------------*}
  7245. function addFloat128Sigs(a,b : float128; zSign : flag ): float128;
  7246. var
  7247. aExp, bExp, zExp: int32;
  7248. aSig0, aSig1, bSig0, bSig1, zSig0, zSig1, zSig2: bits64;
  7249. expDiff: int32;
  7250. label
  7251. shiftRight1,roundAndPack;
  7252. begin
  7253. aSig1 := extractFloat128Frac1( a );
  7254. aSig0 := extractFloat128Frac0( a );
  7255. aExp := extractFloat128Exp( a );
  7256. bSig1 := extractFloat128Frac1( b );
  7257. bSig0 := extractFloat128Frac0( b );
  7258. bExp := extractFloat128Exp( b );
  7259. expDiff := aExp - bExp;
  7260. if ( 0 < expDiff ) then begin
  7261. if ( aExp = $7FFF ) then begin
  7262. if ( aSig0 or aSig1 )<>0 then
  7263. begin
  7264. result := propagateFloat128NaN( a, b );
  7265. exit;
  7266. end;
  7267. result := a;
  7268. exit;
  7269. end;
  7270. if ( bExp = 0 ) then begin
  7271. dec(expDiff);
  7272. end
  7273. else begin
  7274. bSig0 := bSig0 or int64( $0001000000000000 );
  7275. end;
  7276. shift128ExtraRightJamming(
  7277. bSig0, bSig1, 0, expDiff, bSig0, bSig1, zSig2 );
  7278. zExp := aExp;
  7279. end
  7280. else if ( expDiff < 0 ) then begin
  7281. if ( bExp = $7FFF ) then begin
  7282. if ( bSig0 or bSig1 )<>0 then
  7283. begin
  7284. result := propagateFloat128NaN( a, b );
  7285. exit;
  7286. end;
  7287. result := packFloat128( zSign, $7FFF, 0, 0 );
  7288. exit;
  7289. end;
  7290. if ( aExp = 0 ) then begin
  7291. inc(expDiff);
  7292. end
  7293. else begin
  7294. aSig0 := aSig0 or int64( $0001000000000000 );
  7295. end;
  7296. shift128ExtraRightJamming(
  7297. aSig0, aSig1, 0, - expDiff, aSig0, aSig1, zSig2 );
  7298. zExp := bExp;
  7299. end
  7300. else begin
  7301. if ( aExp = $7FFF ) then begin
  7302. if ( aSig0 or aSig1 or bSig0 or bSig1 )<>0 then begin
  7303. result := propagateFloat128NaN( a, b );
  7304. exit;
  7305. end;
  7306. result := a;
  7307. exit;
  7308. end;
  7309. add128( aSig0, aSig1, bSig0, bSig1, zSig0, zSig1 );
  7310. if ( aExp = 0 ) then
  7311. begin
  7312. result := packFloat128( zSign, 0, zSig0, zSig1 );
  7313. exit;
  7314. end;
  7315. zSig2 := 0;
  7316. zSig0 := zSig0 or int64( $0002000000000000 );
  7317. zExp := aExp;
  7318. goto shiftRight1;
  7319. end;
  7320. aSig0 := aSig0 or int64( $0001000000000000 );
  7321. add128( aSig0, aSig1, bSig0, bSig1, zSig0, zSig1 );
  7322. dec(zExp);
  7323. if ( zSig0 < int64( $0002000000000000 ) ) then goto roundAndPack;
  7324. inc(zExp);
  7325. shiftRight1:
  7326. shift128ExtraRightJamming(
  7327. zSig0, zSig1, zSig2, 1, zSig0, zSig1, zSig2 );
  7328. roundAndPack:
  7329. result := roundAndPackFloat128( zSign, zExp, zSig0, zSig1, zSig2 );
  7330. end;
  7331. {*----------------------------------------------------------------------------
  7332. | Returns the result of subtracting the absolute values of the quadruple-
  7333. | precision floating-point values `a' and `b'. If `zSign' is 1, the
  7334. | difference is negated before being returned. `zSign' is ignored if the
  7335. | result is a NaN. The subtraction is performed according to the IEC/IEEE
  7336. | Standard for Binary Floating-Point Arithmetic.
  7337. *----------------------------------------------------------------------------*}
  7338. function subFloat128Sigs( a, b : float128; zSign : flag): float128;
  7339. var
  7340. aExp, bExp, zExp: int32;
  7341. aSig0, aSig1, bSig0, bSig1, zSig0, zSig1: bits64;
  7342. expDiff: int32;
  7343. z: float128;
  7344. label
  7345. aExpBigger,bExpBigger,aBigger,bBigger,normalizeRoundAndPack;
  7346. begin
  7347. aSig1 := extractFloat128Frac1( a );
  7348. aSig0 := extractFloat128Frac0( a );
  7349. aExp := extractFloat128Exp( a );
  7350. bSig1 := extractFloat128Frac1( b );
  7351. bSig0 := extractFloat128Frac0( b );
  7352. bExp := extractFloat128Exp( b );
  7353. expDiff := aExp - bExp;
  7354. shortShift128Left( aSig0, aSig1, 14, aSig0, aSig1 );
  7355. shortShift128Left( bSig0, bSig1, 14, bSig0, bSig1 );
  7356. if ( 0 < expDiff ) then goto aExpBigger;
  7357. if ( expDiff < 0 ) then goto bExpBigger;
  7358. if ( aExp = $7FFF ) then begin
  7359. if ( aSig0 or aSig1 or bSig0 or bSig1 )<>0 then begin
  7360. result := propagateFloat128NaN( a, b );
  7361. exit;
  7362. end;
  7363. float_raise( float_flag_invalid );
  7364. z.low := float128_default_nan_low;
  7365. z.high := float128_default_nan_high;
  7366. result := z;
  7367. exit;
  7368. end;
  7369. if ( aExp = 0 ) then begin
  7370. aExp := 1;
  7371. bExp := 1;
  7372. end;
  7373. if ( bSig0 < aSig0 ) then goto aBigger;
  7374. if ( aSig0 < bSig0 ) then goto bBigger;
  7375. if ( bSig1 < aSig1 ) then goto aBigger;
  7376. if ( aSig1 < bSig1 ) then goto bBigger;
  7377. result := packFloat128( ord(softfloat_rounding_mode = float_round_down), 0, 0, 0 );
  7378. exit;
  7379. bExpBigger:
  7380. if ( bExp = $7FFF ) then begin
  7381. if ( bSig0 or bSig1 )<>0 then
  7382. begin
  7383. result := propagateFloat128NaN( a, b );
  7384. exit;
  7385. end;
  7386. result := packFloat128( zSign xor 1, $7FFF, 0, 0 );
  7387. exit;
  7388. end;
  7389. if ( aExp = 0 ) then begin
  7390. inc(expDiff);
  7391. end
  7392. else begin
  7393. aSig0 := aSig0 or int64( $4000000000000000 );
  7394. end;
  7395. shift128RightJamming( aSig0, aSig1, - expDiff, aSig0, aSig1 );
  7396. bSig0 := bSig0 or int64( $4000000000000000 );
  7397. bBigger:
  7398. sub128( bSig0, bSig1, aSig0, aSig1, zSig0, zSig1 );
  7399. zExp := bExp;
  7400. zSign := zSign xor 1;
  7401. goto normalizeRoundAndPack;
  7402. aExpBigger:
  7403. if ( aExp = $7FFF ) then begin
  7404. if ( aSig0 or aSig1 )<>0 then
  7405. begin
  7406. result := propagateFloat128NaN( a, b );
  7407. exit;
  7408. end;
  7409. result := a;
  7410. exit;
  7411. end;
  7412. if ( bExp = 0 ) then begin
  7413. dec(expDiff);
  7414. end
  7415. else begin
  7416. bSig0 := bSig0 or int64( $4000000000000000 );
  7417. end;
  7418. shift128RightJamming( bSig0, bSig1, expDiff, bSig0, bSig1 );
  7419. aSig0 := aSig0 or int64( $4000000000000000 );
  7420. aBigger:
  7421. sub128( aSig0, aSig1, bSig0, bSig1, zSig0, zSig1 );
  7422. zExp := aExp;
  7423. normalizeRoundAndPack:
  7424. dec(zExp);
  7425. result := normalizeRoundAndPackFloat128( zSign, zExp - 14, zSig0, zSig1 );
  7426. end;
  7427. {*----------------------------------------------------------------------------
  7428. | Returns the result of adding the quadruple-precision floating-point values
  7429. | `a' and `b'. The operation is performed according to the IEC/IEEE Standard
  7430. | for Binary Floating-Point Arithmetic.
  7431. *----------------------------------------------------------------------------*}
  7432. function float128_add(a: float128; b: float128): float128;
  7433. var
  7434. aSign, bSign: flag;
  7435. begin
  7436. aSign := extractFloat128Sign( a );
  7437. bSign := extractFloat128Sign( b );
  7438. if ( aSign = bSign ) then begin
  7439. result := addFloat128Sigs( a, b, aSign );
  7440. end
  7441. else begin
  7442. result := subFloat128Sigs( a, b, aSign );
  7443. end;
  7444. end;
  7445. {*----------------------------------------------------------------------------
  7446. | Returns the result of subtracting the quadruple-precision floating-point
  7447. | values `a' and `b'. The operation is performed according to the IEC/IEEE
  7448. | Standard for Binary Floating-Point Arithmetic.
  7449. *----------------------------------------------------------------------------*}
  7450. function float128_sub(a: float128; b: float128): float128;
  7451. var
  7452. aSign, bSign: flag;
  7453. begin
  7454. aSign := extractFloat128Sign( a );
  7455. bSign := extractFloat128Sign( b );
  7456. if ( aSign = bSign ) then begin
  7457. result := subFloat128Sigs( a, b, aSign );
  7458. end
  7459. else begin
  7460. result := addFloat128Sigs( a, b, aSign );
  7461. end;
  7462. end;
  7463. {*----------------------------------------------------------------------------
  7464. | Returns the result of multiplying the quadruple-precision floating-point
  7465. | values `a' and `b'. The operation is performed according to the IEC/IEEE
  7466. | Standard for Binary Floating-Point Arithmetic.
  7467. *----------------------------------------------------------------------------*}
  7468. function float128_mul(a: float128; b: float128): float128;
  7469. var
  7470. aSign, bSign, zSign: flag;
  7471. aExp, bExp, zExp: int32;
  7472. aSig0, aSig1, bSig0, bSig1, zSig0, zSig1, zSig2, zSig3: bits64;
  7473. z: float128;
  7474. label
  7475. invalid;
  7476. begin
  7477. aSig1 := extractFloat128Frac1( a );
  7478. aSig0 := extractFloat128Frac0( a );
  7479. aExp := extractFloat128Exp( a );
  7480. aSign := extractFloat128Sign( a );
  7481. bSig1 := extractFloat128Frac1( b );
  7482. bSig0 := extractFloat128Frac0( b );
  7483. bExp := extractFloat128Exp( b );
  7484. bSign := extractFloat128Sign( b );
  7485. zSign := aSign xor bSign;
  7486. if ( aExp = $7FFF ) then begin
  7487. if ( (( aSig0 or aSig1 )<>0)
  7488. or ( ( bExp = $7FFF ) and (( bSig0 or bSig1 )<>0) ) ) then begin
  7489. result := propagateFloat128NaN( a, b );
  7490. exit;
  7491. end;
  7492. if ( ( bExp or bSig0 or bSig1 ) = 0 ) then goto invalid;
  7493. result := packFloat128( zSign, $7FFF, 0, 0 );
  7494. exit;
  7495. end;
  7496. if ( bExp = $7FFF ) then begin
  7497. if ( bSig0 or bSig1 )<>0 then
  7498. begin
  7499. result := propagateFloat128NaN( a, b );
  7500. exit;
  7501. end;
  7502. if ( ( aExp or aSig0 or aSig1 ) = 0 ) then begin
  7503. invalid:
  7504. float_raise( float_flag_invalid );
  7505. z.low := float128_default_nan_low;
  7506. z.high := float128_default_nan_high;
  7507. result := z;
  7508. exit;
  7509. end;
  7510. result := packFloat128( zSign, $7FFF, 0, 0 );
  7511. exit;
  7512. end;
  7513. if ( aExp = 0 ) then begin
  7514. if ( ( aSig0 or aSig1 ) = 0 ) then
  7515. begin
  7516. result := packFloat128( zSign, 0, 0, 0 );
  7517. exit;
  7518. end;
  7519. normalizeFloat128Subnormal( aSig0, aSig1, aExp, aSig0, aSig1 );
  7520. end;
  7521. if ( bExp = 0 ) then begin
  7522. if ( ( bSig0 or bSig1 ) = 0 ) then
  7523. begin
  7524. result := packFloat128( zSign, 0, 0, 0 );
  7525. exit;
  7526. end;
  7527. normalizeFloat128Subnormal( bSig0, bSig1, bExp, bSig0, bSig1 );
  7528. end;
  7529. zExp := aExp + bExp - $4000;
  7530. aSig0 := aSig0 or int64( $0001000000000000 );
  7531. shortShift128Left( bSig0, bSig1, 16, bSig0, bSig1 );
  7532. mul128To256( aSig0, aSig1, bSig0, bSig1, zSig0, zSig1, zSig2, zSig3 );
  7533. add128( zSig0, zSig1, aSig0, aSig1, zSig0, zSig1 );
  7534. zSig2 := zSig2 or ord( zSig3 <> 0 );
  7535. if ( int64( $0002000000000000 ) <= zSig0 ) then begin
  7536. shift128ExtraRightJamming(
  7537. zSig0, zSig1, zSig2, 1, zSig0, zSig1, zSig2 );
  7538. inc(zExp);
  7539. end;
  7540. result := roundAndPackFloat128( zSign, zExp, zSig0, zSig1, zSig2 );
  7541. end;
  7542. {*----------------------------------------------------------------------------
  7543. | Returns the result of dividing the quadruple-precision floating-point value
  7544. | `a' by the corresponding value `b'. The operation is performed according to
  7545. | the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
  7546. *----------------------------------------------------------------------------*}
  7547. function float128_div(a: float128; b: float128): float128;
  7548. var
  7549. aSign, bSign, zSign: flag;
  7550. aExp, bExp, zExp: int32;
  7551. aSig0, aSig1, bSig0, bSig1, zSig0, zSig1, zSig2: bits64;
  7552. rem0, rem1, rem2, rem3, term0, term1, term2, term3: bits64;
  7553. z: float128;
  7554. label
  7555. invalid;
  7556. begin
  7557. aSig1 := extractFloat128Frac1( a );
  7558. aSig0 := extractFloat128Frac0( a );
  7559. aExp := extractFloat128Exp( a );
  7560. aSign := extractFloat128Sign( a );
  7561. bSig1 := extractFloat128Frac1( b );
  7562. bSig0 := extractFloat128Frac0( b );
  7563. bExp := extractFloat128Exp( b );
  7564. bSign := extractFloat128Sign( b );
  7565. zSign := aSign xor bSign;
  7566. if ( aExp = $7FFF ) then begin
  7567. if ( aSig0 or aSig1 )<>0 then
  7568. begin
  7569. result := propagateFloat128NaN( a, b );
  7570. exit;
  7571. end;
  7572. if ( bExp = $7FFF ) then begin
  7573. if ( bSig0 or bSig1 )<>0 then
  7574. begin
  7575. result := propagateFloat128NaN( a, b );
  7576. exit;
  7577. end;
  7578. goto invalid;
  7579. end;
  7580. result := packFloat128( zSign, $7FFF, 0, 0 );
  7581. exit;
  7582. end;
  7583. if ( bExp = $7FFF ) then begin
  7584. if ( bSig0 or bSig1 )<>0 then
  7585. begin
  7586. result := propagateFloat128NaN( a, b );
  7587. exit;
  7588. end;
  7589. result := packFloat128( zSign, 0, 0, 0 );
  7590. exit;
  7591. end;
  7592. if ( bExp = 0 ) then begin
  7593. if ( ( bSig0 or bSig1 ) = 0 ) then begin
  7594. if ( ( aExp or aSig0 or aSig1 ) = 0 ) then begin
  7595. invalid:
  7596. float_raise( float_flag_invalid );
  7597. z.low := float128_default_nan_low;
  7598. z.high := float128_default_nan_high;
  7599. result := z;
  7600. exit;
  7601. end;
  7602. float_raise( float_flag_divbyzero );
  7603. result := packFloat128( zSign, $7FFF, 0, 0 );
  7604. exit;
  7605. end;
  7606. normalizeFloat128Subnormal( bSig0, bSig1, bExp, bSig0, bSig1 );
  7607. end;
  7608. if ( aExp = 0 ) then begin
  7609. if ( ( aSig0 or aSig1 ) = 0 ) then
  7610. begin
  7611. result := packFloat128( zSign, 0, 0, 0 );
  7612. exit;
  7613. end;
  7614. normalizeFloat128Subnormal( aSig0, aSig1, aExp, aSig0, aSig1 );
  7615. end;
  7616. zExp := aExp - bExp + $3FFD;
  7617. shortShift128Left(
  7618. aSig0 or int64( $0001000000000000 ), aSig1, 15, aSig0, aSig1 );
  7619. shortShift128Left(
  7620. bSig0 or int64( $0001000000000000 ), bSig1, 15, bSig0, bSig1 );
  7621. if ( le128( bSig0, bSig1, aSig0, aSig1 )<>0 ) then begin
  7622. shift128Right( aSig0, aSig1, 1, aSig0, aSig1 );
  7623. inc(zExp);
  7624. end;
  7625. zSig0 := estimateDiv128To64( aSig0, aSig1, bSig0 );
  7626. mul128By64To192( bSig0, bSig1, zSig0, term0, term1, term2 );
  7627. sub192( aSig0, aSig1, 0, term0, term1, term2, rem0, rem1, rem2 );
  7628. while ( sbits64(rem0) < 0 ) do begin
  7629. dec(zSig0);
  7630. add192( rem0, rem1, rem2, 0, bSig0, bSig1, rem0, rem1, rem2 );
  7631. end;
  7632. zSig1 := estimateDiv128To64( rem1, rem2, bSig0 );
  7633. if ( ( zSig1 and $3FFF ) <= 4 ) then begin
  7634. mul128By64To192( bSig0, bSig1, zSig1, term1, term2, term3 );
  7635. sub192( rem1, rem2, 0, term1, term2, term3, rem1, rem2, rem3 );
  7636. while ( sbits64(rem1) < 0 ) do begin
  7637. dec(zSig1);
  7638. add192( rem1, rem2, rem3, 0, bSig0, bSig1, rem1, rem2, rem3 );
  7639. end;
  7640. zSig1 := zSig1 or ord( ( rem1 or rem2 or rem3 ) <> 0 );
  7641. end;
  7642. shift128ExtraRightJamming( zSig0, zSig1, 0, 15, zSig0, zSig1, zSig2 );
  7643. result := roundAndPackFloat128( zSign, zExp, zSig0, zSig1, zSig2 );
  7644. end;
  7645. {*----------------------------------------------------------------------------
  7646. | Returns the remainder of the quadruple-precision floating-point value `a'
  7647. | with respect to the corresponding value `b'. The operation is performed
  7648. | according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
  7649. *----------------------------------------------------------------------------*}
  7650. function float128_rem(a: float128; b: float128): float128;
  7651. var
  7652. aSign, bSign, zSign: flag;
  7653. aExp, bExp, expDiff: int32;
  7654. aSig0, aSig1, bSig0, bSig1, q, term0, term1, term2: bits64;
  7655. allZero, alternateASig0, alternateASig1, sigMean1: bits64;
  7656. sigMean0: sbits64;
  7657. z: float128;
  7658. label
  7659. invalid;
  7660. begin
  7661. aSig1 := extractFloat128Frac1( a );
  7662. aSig0 := extractFloat128Frac0( a );
  7663. aExp := extractFloat128Exp( a );
  7664. aSign := extractFloat128Sign( a );
  7665. bSig1 := extractFloat128Frac1( b );
  7666. bSig0 := extractFloat128Frac0( b );
  7667. bExp := extractFloat128Exp( b );
  7668. bSign := extractFloat128Sign( b );
  7669. if ( aExp = $7FFF ) then begin
  7670. if ( (( aSig0 or aSig1 )<>0)
  7671. or ( ( bExp = $7FFF ) and (( bSig0 or bSig1 )<>0) ) ) then begin
  7672. result := propagateFloat128NaN( a, b );
  7673. exit;
  7674. end;
  7675. goto invalid;
  7676. end;
  7677. if ( bExp = $7FFF ) then begin
  7678. if ( bSig0 or bSig1 )<>0 then
  7679. begin
  7680. result := propagateFloat128NaN( a, b );
  7681. exit;
  7682. end;
  7683. result := a;
  7684. exit;
  7685. end;
  7686. if ( bExp = 0 ) then begin
  7687. if ( ( bSig0 or bSig1 ) = 0 ) then begin
  7688. invalid:
  7689. float_raise( float_flag_invalid );
  7690. z.low := float128_default_nan_low;
  7691. z.high := float128_default_nan_high;
  7692. result := z;
  7693. exit;
  7694. end;
  7695. normalizeFloat128Subnormal( bSig0, bSig1, bExp, bSig0, bSig1 );
  7696. end;
  7697. if ( aExp = 0 ) then begin
  7698. if ( ( aSig0 or aSig1 ) = 0 ) then
  7699. begin
  7700. result := a;
  7701. exit;
  7702. end;
  7703. normalizeFloat128Subnormal( aSig0, aSig1, aExp, aSig0, aSig1 );
  7704. end;
  7705. expDiff := aExp - bExp;
  7706. if ( expDiff < -1 ) then
  7707. begin
  7708. result := a;
  7709. exit;
  7710. end;
  7711. shortShift128Left(
  7712. aSig0 or int64( $0001000000000000 ),
  7713. aSig1,
  7714. 15 - ord( expDiff < 0 ),
  7715. aSig0,
  7716. aSig1
  7717. );
  7718. shortShift128Left(
  7719. bSig0 or int64( $0001000000000000 ), bSig1, 15, bSig0, bSig1 );
  7720. q := le128( bSig0, bSig1, aSig0, aSig1 );
  7721. if ( q )<>0 then sub128( aSig0, aSig1, bSig0, bSig1, aSig0, aSig1 );
  7722. dec(expDiff,64);
  7723. while ( 0 < expDiff ) do begin
  7724. q := estimateDiv128To64( aSig0, aSig1, bSig0 );
  7725. if ( 4 < q ) then
  7726. q := q - 4
  7727. else
  7728. q := 0;
  7729. mul128By64To192( bSig0, bSig1, q, term0, term1, term2 );
  7730. shortShift192Left( term0, term1, term2, 61, term1, term2, allZero );
  7731. shortShift128Left( aSig0, aSig1, 61, aSig0, allZero );
  7732. sub128( aSig0, 0, term1, term2, aSig0, aSig1 );
  7733. dec(expDiff,61);
  7734. end;
  7735. if ( -64 < expDiff ) then begin
  7736. q := estimateDiv128To64( aSig0, aSig1, bSig0 );
  7737. if ( 4 < q ) then
  7738. q := q - 4
  7739. else
  7740. q := 0;
  7741. q := q shr (- expDiff);
  7742. shift128Right( bSig0, bSig1, 12, bSig0, bSig1 );
  7743. inc(expDiff,52);
  7744. if ( expDiff < 0 ) then begin
  7745. shift128Right( aSig0, aSig1, - expDiff, aSig0, aSig1 );
  7746. end
  7747. else begin
  7748. shortShift128Left( aSig0, aSig1, expDiff, aSig0, aSig1 );
  7749. end;
  7750. mul128By64To192( bSig0, bSig1, q, term0, term1, term2 );
  7751. sub128( aSig0, aSig1, term1, term2, aSig0, aSig1 );
  7752. end
  7753. else begin
  7754. shift128Right( aSig0, aSig1, 12, aSig0, aSig1 );
  7755. shift128Right( bSig0, bSig1, 12, bSig0, bSig1 );
  7756. end;
  7757. repeat
  7758. alternateASig0 := aSig0;
  7759. alternateASig1 := aSig1;
  7760. inc(q);
  7761. sub128( aSig0, aSig1, bSig0, bSig1, aSig0, aSig1 );
  7762. until not( 0 <= sbits64(aSig0) );
  7763. add128(
  7764. aSig0, aSig1, alternateASig0, alternateASig1, bits64(sigMean0), sigMean1 );
  7765. if ( ( sigMean0 < 0 )
  7766. or ( ( ( sigMean0 or sigMean1 ) = 0 ) and (( q and 1 )<>0) ) ) then begin
  7767. aSig0 := alternateASig0;
  7768. aSig1 := alternateASig1;
  7769. end;
  7770. zSign := ord( sbits64(aSig0) < 0 );
  7771. if ( zSign<>0 ) then sub128( 0, 0, aSig0, aSig1, aSig0, aSig1 );
  7772. result :=
  7773. normalizeRoundAndPackFloat128( aSign xor zSign, bExp - 4, aSig0, aSig1 );
  7774. end;
  7775. {*----------------------------------------------------------------------------
  7776. | Returns the square root of the quadruple-precision floating-point value `a'.
  7777. | The operation is performed according to the IEC/IEEE Standard for Binary
  7778. | Floating-Point Arithmetic.
  7779. *----------------------------------------------------------------------------*}
  7780. function float128_sqrt(a: float128): float128;
  7781. var
  7782. aSign: flag;
  7783. aExp, zExp: int32;
  7784. aSig0, aSig1, zSig0, zSig1, zSig2, doubleZSig0: bits64;
  7785. rem0, rem1, rem2, rem3, term0, term1, term2, term3: bits64;
  7786. z: float128;
  7787. label
  7788. invalid;
  7789. begin
  7790. aSig1 := extractFloat128Frac1( a );
  7791. aSig0 := extractFloat128Frac0( a );
  7792. aExp := extractFloat128Exp( a );
  7793. aSign := extractFloat128Sign( a );
  7794. if ( aExp = $7FFF ) then begin
  7795. if ( aSig0 or aSig1 )<>0 then
  7796. begin
  7797. result := propagateFloat128NaN( a, a );
  7798. exit;
  7799. end;
  7800. if ( aSign=0 ) then
  7801. begin
  7802. result := a;
  7803. exit;
  7804. end;
  7805. goto invalid;
  7806. end;
  7807. if ( aSign<>0 ) then begin
  7808. if ( ( aExp or aSig0 or aSig1 ) = 0 ) then
  7809. begin
  7810. result := a;
  7811. exit;
  7812. end;
  7813. invalid:
  7814. float_raise( float_flag_invalid );
  7815. z.low := float128_default_nan_low;
  7816. z.high := float128_default_nan_high;
  7817. result := z;
  7818. exit;
  7819. end;
  7820. if ( aExp = 0 ) then begin
  7821. if ( ( aSig0 or aSig1 ) = 0 ) then
  7822. begin
  7823. result := packFloat128( 0, 0, 0, 0 );
  7824. exit;
  7825. end;
  7826. normalizeFloat128Subnormal( aSig0, aSig1, aExp, aSig0, aSig1 );
  7827. end;
  7828. zExp := ( ( aExp - $3FFF )>>1 ) + $3FFE;
  7829. aSig0 := aSig0 or int64( $0001000000000000 );
  7830. zSig0 := estimateSqrt32( aExp, aSig0>>17 );
  7831. shortShift128Left( aSig0, aSig1, 13 - ( aExp and 1 ), aSig0, aSig1 );
  7832. zSig0 := estimateDiv128To64( aSig0, aSig1, zSig0 shl 32 ) + ( zSig0 shl 30 );
  7833. doubleZSig0 := zSig0 shl 1;
  7834. mul64To128( zSig0, zSig0, term0, term1 );
  7835. sub128( aSig0, aSig1, term0, term1, rem0, rem1 );
  7836. while ( sbits64(rem0) < 0 ) do begin
  7837. dec(zSig0);
  7838. dec(doubleZSig0,2);
  7839. add128( rem0, rem1, zSig0 shr 63, doubleZSig0 or 1, rem0, rem1 );
  7840. end;
  7841. zSig1 := estimateDiv128To64( rem1, 0, doubleZSig0 );
  7842. if ( ( zSig1 and $1FFF ) <= 5 ) then begin
  7843. if ( zSig1 = 0 ) then zSig1 := 1;
  7844. mul64To128( doubleZSig0, zSig1, term1, term2 );
  7845. sub128( rem1, 0, term1, term2, rem1, rem2 );
  7846. mul64To128( zSig1, zSig1, term2, term3 );
  7847. sub192( rem1, rem2, 0, 0, term2, term3, rem1, rem2, rem3 );
  7848. while ( sbits64(rem1) < 0 ) do begin
  7849. dec(zSig1);
  7850. shortShift128Left( 0, zSig1, 1, term2, term3 );
  7851. term3 := term3 or 1;
  7852. term2 := term2 or doubleZSig0;
  7853. add192( rem1, rem2, rem3, 0, term2, term3, rem1, rem2, rem3 );
  7854. end;
  7855. zSig1 := zSig1 or ord( ( rem1 or rem2 or rem3 ) <> 0 );
  7856. end;
  7857. shift128ExtraRightJamming( zSig0, zSig1, 0, 14, zSig0, zSig1, zSig2 );
  7858. result := roundAndPackFloat128( 0, zExp, zSig0, zSig1, zSig2 );
  7859. end;
  7860. {*----------------------------------------------------------------------------
  7861. | Returns 1 if the quadruple-precision floating-point value `a' is equal to
  7862. | the corresponding value `b', and 0 otherwise. The comparison is performed
  7863. | according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
  7864. *----------------------------------------------------------------------------*}
  7865. function float128_eq(a: float128; b: float128): flag;
  7866. begin
  7867. if ( ( ( extractFloat128Exp( a ) = $7FFF )
  7868. and (( extractFloat128Frac0( a ) or extractFloat128Frac1( a ))<>0 ) )
  7869. or ( ( extractFloat128Exp( b ) = $7FFF )
  7870. and ( (extractFloat128Frac0( b ) or extractFloat128Frac1( b ))<>0 ) )
  7871. ) then begin
  7872. if ( (float128_is_signaling_nan( a )<>0)
  7873. or (float128_is_signaling_nan( b )<>0) ) then begin
  7874. float_raise( float_flag_invalid );
  7875. end;
  7876. result := 0;
  7877. exit;
  7878. end;
  7879. result := ord(
  7880. ( a.low = b.low )
  7881. and ( ( a.high = b.high )
  7882. or ( ( a.low = 0 )
  7883. and ( bits64( ( a.high or b.high ) shl 1 ) = 0 ) )
  7884. ));
  7885. end;
  7886. {*----------------------------------------------------------------------------
  7887. | Returns 1 if the quadruple-precision floating-point value `a' is less than
  7888. | or equal to the corresponding value `b', and 0 otherwise. The comparison
  7889. | is performed according to the IEC/IEEE Standard for Binary Floating-Point
  7890. | Arithmetic.
  7891. *----------------------------------------------------------------------------*}
  7892. function float128_le(a: float128; b: float128): flag;
  7893. var
  7894. aSign, bSign: flag;
  7895. begin
  7896. if ( ( ( extractFloat128Exp( a ) = $7FFF )
  7897. and (( extractFloat128Frac0( a ) or extractFloat128Frac1( a ))<>0 ) )
  7898. or ( ( extractFloat128Exp( b ) = $7FFF )
  7899. and ( (extractFloat128Frac0( b ) or extractFloat128Frac1( b ))<>0 ) )
  7900. ) then begin
  7901. float_raise( float_flag_invalid );
  7902. result := 0;
  7903. exit;
  7904. end;
  7905. aSign := extractFloat128Sign( a );
  7906. bSign := extractFloat128Sign( b );
  7907. if ( aSign <> bSign ) then begin
  7908. result := ord(
  7909. (aSign<>0)
  7910. or ( ( ( bits64 ( ( a.high or b.high ) shl 1 ) ) or a.low or b.low )
  7911. = 0 ));
  7912. exit;
  7913. end;
  7914. if aSign<>0 then
  7915. result := le128( b.high, b.low, a.high, a.low )
  7916. else
  7917. result := le128( a.high, a.low, b.high, b.low );
  7918. end;
  7919. {*----------------------------------------------------------------------------
  7920. | Returns 1 if the quadruple-precision floating-point value `a' is less than
  7921. | the corresponding value `b', and 0 otherwise. The comparison is performed
  7922. | according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
  7923. *----------------------------------------------------------------------------*}
  7924. function float128_lt(a: float128; b: float128): flag;
  7925. var
  7926. aSign, bSign: flag;
  7927. begin
  7928. if ( ( ( extractFloat128Exp( a ) = $7FFF )
  7929. and (( extractFloat128Frac0( a ) or extractFloat128Frac1( a ))<>0 ) )
  7930. or ( ( extractFloat128Exp( b ) = $7FFF )
  7931. and ( (extractFloat128Frac0( b ) or extractFloat128Frac1( b ))<>0 ) )
  7932. ) then begin
  7933. float_raise( float_flag_invalid );
  7934. result := 0;
  7935. exit;
  7936. end;
  7937. aSign := extractFloat128Sign( a );
  7938. bSign := extractFloat128Sign( b );
  7939. if ( aSign <> bSign ) then begin
  7940. result := ord(
  7941. (aSign<>0)
  7942. and ( ( ( bits64( ( a.high or b.high ) shl 1 ) ) or a.low or b.low )
  7943. <> 0 ));
  7944. exit;
  7945. end;
  7946. if aSign<>0 then
  7947. result := lt128( b.high, b.low, a.high, a.low )
  7948. else
  7949. result := lt128( a.high, a.low, b.high, b.low );
  7950. end;
  7951. {*----------------------------------------------------------------------------
  7952. | Returns 1 if the quadruple-precision floating-point value `a' is equal to
  7953. | the corresponding value `b', and 0 otherwise. The invalid exception is
  7954. | raised if either operand is a NaN. Otherwise, the comparison is performed
  7955. | according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
  7956. *----------------------------------------------------------------------------*}
  7957. function float128_eq_signaling(a: float128; b: float128): flag;
  7958. begin
  7959. if ( ( ( extractFloat128Exp( a ) = $7FFF )
  7960. and ( ( extractFloat128Frac0( a ) or extractFloat128Frac1( a ))<>0 ) )
  7961. or ( ( extractFloat128Exp( b ) = $7FFF )
  7962. and ( (extractFloat128Frac0( b ) or extractFloat128Frac1( b ))<>0 ) )
  7963. ) then begin
  7964. float_raise( float_flag_invalid );
  7965. result := 0;
  7966. exit;
  7967. end;
  7968. result := ord(
  7969. ( a.low = b.low )
  7970. and ( ( a.high = b.high )
  7971. or ( ( a.low = 0 )
  7972. and ( bits64 ( ( a.high or b.high ) shl 1 ) = 0 ) )
  7973. ));
  7974. end;
  7975. {*----------------------------------------------------------------------------
  7976. | Returns 1 if the quadruple-precision floating-point value `a' is less than
  7977. | or equal to the corresponding value `b', and 0 otherwise. Quiet NaNs do not
  7978. | cause an exception. Otherwise, the comparison is performed according to the
  7979. | IEC/IEEE Standard for Binary Floating-Point Arithmetic.
  7980. *----------------------------------------------------------------------------*}
  7981. function float128_le_quiet(a: float128; b: float128): flag;
  7982. var
  7983. aSign, bSign: flag;
  7984. begin
  7985. if ( ( ( extractFloat128Exp( a ) = $7FFF )
  7986. and ( ( extractFloat128Frac0( a ) or extractFloat128Frac1( a ))<>0 ) )
  7987. or ( ( extractFloat128Exp( b ) = $7FFF )
  7988. and ( (extractFloat128Frac0( b ) or extractFloat128Frac1( b ))<>0 ) )
  7989. ) then begin
  7990. if ( (float128_is_signaling_nan( a )<>0)
  7991. or (float128_is_signaling_nan( b )<>0) ) then begin
  7992. float_raise( float_flag_invalid );
  7993. end;
  7994. result := 0;
  7995. exit;
  7996. end;
  7997. aSign := extractFloat128Sign( a );
  7998. bSign := extractFloat128Sign( b );
  7999. if ( aSign <> bSign ) then begin
  8000. result := ord(
  8001. (aSign<>0)
  8002. or ( ( ( bits64( ( a.high or b.high ) shl 1 ) ) or a.low or b.low )
  8003. = 0 ));
  8004. exit;
  8005. end;
  8006. if aSign<>0 then
  8007. result := le128( b.high, b.low, a.high, a.low )
  8008. else
  8009. result := le128( a.high, a.low, b.high, b.low );
  8010. end;
  8011. {*----------------------------------------------------------------------------
  8012. | Returns 1 if the quadruple-precision floating-point value `a' is less than
  8013. | the corresponding value `b', and 0 otherwise. Quiet NaNs do not cause an
  8014. | exception. Otherwise, the comparison is performed according to the IEC/IEEE
  8015. | Standard for Binary Floating-Point Arithmetic.
  8016. *----------------------------------------------------------------------------*}
  8017. function float128_lt_quiet(a: float128; b: float128): flag;
  8018. var
  8019. aSign, bSign: flag;
  8020. begin
  8021. if ( ( ( extractFloat128Exp( a ) = $7FFF )
  8022. and (( extractFloat128Frac0( a ) or extractFloat128Frac1( a ))<>0 ) )
  8023. or ( ( extractFloat128Exp( b ) = $7FFF )
  8024. and ( (extractFloat128Frac0( b ) or extractFloat128Frac1( b ))<>0 ) )
  8025. ) then begin
  8026. if ( (float128_is_signaling_nan( a )<>0)
  8027. or (float128_is_signaling_nan( b )<>0) ) then begin
  8028. float_raise( float_flag_invalid );
  8029. end;
  8030. result := 0;
  8031. exit;
  8032. end;
  8033. aSign := extractFloat128Sign( a );
  8034. bSign := extractFloat128Sign( b );
  8035. if ( aSign <> bSign ) then begin
  8036. result := ord(
  8037. (aSign<>0)
  8038. and ( ( ( bits64( ( a.high or b.high ) shl 1 ) ) or a.low or b.low )
  8039. <> 0 ));
  8040. exit;
  8041. end;
  8042. if aSign<>0 then
  8043. result:=lt128( b.high, b.low, a.high, a.low )
  8044. else
  8045. result:=lt128( a.high, a.low, b.high, b.low );
  8046. end;
  8047. {----------------------------------------------------------------------------
  8048. | Returns the result of converting the double-precision floating-point value
  8049. | `a' to the quadruple-precision floating-point format. The conversion is
  8050. | performed according to the IEC/IEEE Standard for Binary Floating-Point
  8051. | Arithmetic.
  8052. *----------------------------------------------------------------------------}
  8053. function float64_to_float128( a : float64) : float128;
  8054. var
  8055. aSign : flag;
  8056. aExp : int16;
  8057. aSig, zSig0, zSig1 : bits64;
  8058. begin
  8059. aSig := extractFloat64Frac( a );
  8060. aExp := extractFloat64Exp( a );
  8061. aSign := extractFloat64Sign( a );
  8062. if ( aExp = $7FF ) then begin
  8063. if ( aSig<>0 ) then
  8064. result:=commonNaNToFloat128( float64ToCommonNaN( a ) );
  8065. result:=packFloat128( aSign, $7FFF, 0, 0 );
  8066. exit;
  8067. end;
  8068. if ( aExp = 0 ) then begin
  8069. if ( aSig = 0 ) then
  8070. begin
  8071. result:=packFloat128( aSign, 0, 0, 0 );
  8072. exit;
  8073. end;
  8074. normalizeFloat64Subnormal( aSig, aExp, aSig );
  8075. dec(aExp);
  8076. end;
  8077. shift128Right( aSig, 0, 4, zSig0, zSig1 );
  8078. result:=packFloat128( aSign, aExp + $3C00, zSig0, zSig1 );
  8079. end;
  8080. {$endif FPC_SOFTFLOAT_FLOAT128}
  8081. {$endif not(defined(fpc_softfpu_interface))}
  8082. {$if not(defined(fpc_softfpu_interface)) and not(defined(fpc_softfpu_implementation))}
  8083. end.
  8084. {$endif not(defined(fpc_softfpu_interface)) and not(defined(fpc_softfpu_implementation))}