cpupara.pas 82 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650165116521653165416551656165716581659166016611662166316641665166616671668166916701671167216731674167516761677167816791680168116821683168416851686168716881689169016911692169316941695169616971698169917001701170217031704170517061707170817091710171117121713171417151716171717181719172017211722172317241725172617271728172917301731173217331734173517361737173817391740174117421743174417451746174717481749175017511752175317541755175617571758175917601761176217631764176517661767176817691770177117721773177417751776177717781779178017811782178317841785178617871788178917901791179217931794179517961797179817991800180118021803180418051806180718081809181018111812181318141815181618171818181918201821182218231824182518261827182818291830183118321833183418351836183718381839184018411842184318441845184618471848184918501851185218531854185518561857185818591860186118621863186418651866186718681869187018711872187318741875187618771878187918801881188218831884188518861887188818891890189118921893189418951896189718981899190019011902190319041905190619071908190919101911191219131914191519161917191819191920192119221923192419251926192719281929193019311932193319341935193619371938193919401941194219431944194519461947194819491950195119521953195419551956195719581959196019611962196319641965196619671968196919701971197219731974197519761977197819791980198119821983198419851986198719881989199019911992199319941995199619971998199920002001200220032004200520062007200820092010201120122013201420152016201720182019202020212022202320242025202620272028202920302031203220332034203520362037203820392040204120422043204420452046204720482049205020512052
  1. {
  2. Copyright (c) 2002 by Florian Klaempfl
  3. Generates the argument location information for x86-64 target
  4. This program is free software; you can redistribute it and/or modify
  5. it under the terms of the GNU General Public License as published by
  6. the Free Software Foundation; either version 2 of the License, or
  7. (at your option) any later version.
  8. This program is distributed in the hope that it will be useful,
  9. but WITHOUT ANY WARRANTY; without even the implied warranty of
  10. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  11. GNU General Public License for more details.
  12. You should have received a copy of the GNU General Public License
  13. along with this program; if not, write to the Free Software
  14. Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  15. ****************************************************************************
  16. }
  17. unit cpupara;
  18. {$i fpcdefs.inc}
  19. interface
  20. uses
  21. globtype,
  22. cpubase,cgbase,cgutils,
  23. symconst,symtype,symsym,symdef,
  24. parabase,paramgr;
  25. type
  26. tcpuparamanager = class(tparamanager)
  27. private
  28. procedure create_paraloc_info_intern(p : tabstractprocdef; side: tcallercallee;paras:tparalist;
  29. var intparareg,mmparareg,parasize:longint;varargsparas: boolean);
  30. public
  31. function param_use_paraloc(const cgpara:tcgpara):boolean;override;
  32. function push_addr_param(varspez:tvarspez;def : tdef;calloption : tproccalloption) : boolean;override;
  33. function ret_in_param(def:tdef;pd:tabstractprocdef):boolean;override;
  34. function get_volatile_registers_int(calloption : tproccalloption):tcpuregisterset;override;
  35. function get_volatile_registers_mm(calloption : tproccalloption):tcpuregisterset;override;
  36. function get_volatile_registers_fpu(calloption : tproccalloption):tcpuregisterset;override;
  37. function get_saved_registers_int(calloption : tproccalloption):tcpuregisterarray;override;
  38. function get_saved_registers_mm(calloption: tproccalloption):tcpuregisterarray;override;
  39. function create_paraloc_info(p : tabstractprocdef; side: tcallercallee):longint;override;
  40. function create_varargs_paraloc_info(p : tabstractprocdef; side: tcallercallee; varargspara:tvarargsparalist):longint;override;
  41. function get_funcretloc(p : tabstractprocdef; side: tcallercallee; forcetempdef: tdef): tcgpara;override;
  42. end;
  43. implementation
  44. uses
  45. cutils,verbose,
  46. systems,
  47. globals,defutil,
  48. symtable,symutil,
  49. procinfo,cpupi,
  50. cgx86,cgobj,cgcpu;
  51. const
  52. paraintsupregs : array[0..5] of tsuperregister = (RS_RDI,RS_RSI,RS_RDX,RS_RCX,RS_R8,RS_R9);
  53. parammsupregs : array[0..7] of tsuperregister = (RS_XMM0,RS_XMM1,RS_XMM2,RS_XMM3,RS_XMM4,RS_XMM5,RS_XMM6,RS_XMM7);
  54. paraintsupregs_winx64 : array[0..3] of tsuperregister = (RS_RCX,RS_RDX,RS_R8,RS_R9);
  55. parammsupregs_winx64 : array[0..3] of tsuperregister = (RS_XMM0,RS_XMM1,RS_XMM2,RS_XMM3);
  56. parammsupregs_vectorcall : array[0..5] of tsuperregister = (RS_XMM0,RS_XMM1,RS_XMM2,RS_XMM3,RS_XMM4,RS_XMM5);
  57. {
  58. The argument classification code largely comes from libffi:
  59. ffi64.c - Copyright (c) 2002, 2007 Bo Thorsen <[email protected]>
  60. Copyright (c) 2008 Red Hat, Inc.
  61. x86-64 Foreign Function Interface
  62. Permission is hereby granted, free of charge, to any person obtaining
  63. a copy of this software and associated documentation files (the
  64. ``Software''), to deal in the Software without restriction, including
  65. without limitation the rights to use, copy, modify, merge, publish,
  66. distribute, sublicense, and/or sell copies of the Software, and to
  67. permit persons to whom the Software is furnished to do so, subject to
  68. the following conditions:
  69. The above copyright notice and this permission notice shall be included
  70. in all copies or substantial portions of the Software.
  71. THE SOFTWARE IS PROVIDED ``AS IS'', WITHOUT WARRANTY OF ANY KIND,
  72. EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  73. MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
  74. NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
  75. HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
  76. WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  77. OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
  78. DEALINGS IN THE SOFTWARE.
  79. ----------------------------------------------------------------------- *)
  80. }
  81. const
  82. { This many classes are required in order to support 4 YMMs (_m256) in a
  83. homogeneous vector aggregate under vectorcall. [Kit] }
  84. MAX_PARA_CLASSES = 16;
  85. type
  86. tx64paraclasstype = (
  87. X86_64_NO_CLASS,
  88. X86_64_INTEGER_CLASS,X86_64_INTEGERSI_CLASS,
  89. X86_64_SSE_CLASS,X86_64_SSESF_CLASS,X86_64_SSEDF_CLASS,X86_64_SSEUP_CLASS,
  90. X86_64_X87_CLASS,X86_64_X87UP_CLASS,
  91. X86_64_COMPLEX_X87_CLASS,
  92. X86_64_MEMORY_CLASS
  93. );
  94. tx64paraclass = record
  95. def: tdef;
  96. typ: tx64paraclasstype;
  97. end;
  98. tx64paraclasses = array[0..MAX_PARA_CLASSES-1] of tx64paraclass;
  99. { Win64-specific helper }
  100. function aggregate_in_registers_win64(varspez:tvarspez;size:longint):boolean;
  101. begin
  102. { TODO: Temporary hack: vs_const parameters are always passed by reference for win64}
  103. result:=(varspez=vs_value) and (size in [1,2,4,8])
  104. end;
  105. (* x86-64 register passing implementation. See x86-64 ABI for details. Goal
  106. of this code is to classify each 8bytes of incoming argument by the register
  107. class and assign registers accordingly. *)
  108. function classify_representative_def(def1, def2: tdef): tdef;
  109. var
  110. def1size, def2size: asizeint;
  111. begin
  112. if not assigned(def1) then
  113. result:=def2
  114. else if not assigned(def2) then
  115. result:=def1
  116. else
  117. begin
  118. def1size:=def1.size;
  119. def2size:=def2.size;
  120. if def1size>def2size then
  121. result:=def1
  122. else if def2size>def1size then
  123. result:=def2
  124. else if def1.alignment>def2.alignment then
  125. result:=def1
  126. else
  127. result:=def2;
  128. end;
  129. end;
  130. (* Classify the argument of type TYPE and mode MODE.
  131. CLASSES will be filled by the register class used to pass each word
  132. of the operand. The number of words is returned. In case the parameter
  133. should be passed in memory, 0 is returned. As a special case for zero
  134. sized containers, classes[0] will be NO_CLASS and 1 is returned.
  135. real_size contains either def.size, or a value derived from
  136. def.bitpackedsize and the field offset denoting the number of bytes
  137. spanned by a bitpacked field
  138. See the x86-64 PS ABI for details.
  139. *)
  140. procedure classify_single_integer_class(def: tdef; size,real_size: aint; var cl: tx64paraclass; byte_offset: aint);
  141. begin
  142. if (byte_offset=0) and
  143. (real_size in [1,2,4,8]) and
  144. (not assigned(cl.def) or
  145. (def.alignment>=cl.def.alignment)) then
  146. cl.def:=def;
  147. if size<=4 then
  148. begin
  149. cl.typ:=X86_64_INTEGERSI_CLASS;
  150. { The ABI does not require any sign/zero extension for parameters,
  151. except for _Bool (= Pascal boolean) to 8 bits. However, some
  152. compilers (clang) extend them to 32 bits anyway and rely on it
  153. -> also do it for compatibility when calling such code }
  154. if not assigned(cl.def) or
  155. (cl.def.typ<>orddef) or
  156. (torddef(cl.def).ordtype<>pasbool1) then
  157. cl.def:=u32inttype;
  158. end
  159. else
  160. begin
  161. cl.typ:=X86_64_INTEGER_CLASS;
  162. if not assigned(cl.def) or
  163. (cl.def.size<size) or
  164. (not(cl.def.typ in [orddef,floatdef,pointerdef,classrefdef]) and
  165. not is_implicit_pointer_object_type(cl.def) and
  166. not is_dynamicstring(cl.def) and
  167. not is_dynamic_array(cl.def)) then
  168. cl.def:=u64inttype;
  169. end;
  170. end;
  171. function classify_as_integer_argument(def: tdef; real_size: aint; var classes: tx64paraclasses; byte_offset: aint): longint;
  172. var
  173. size: aint;
  174. begin
  175. size:=byte_offset+real_size;
  176. classify_single_integer_class(def,size,real_size,classes[0],byte_offset);
  177. if size<=8 then
  178. result:=1
  179. else
  180. begin
  181. classify_single_integer_class(def,size-8,real_size,classes[1],byte_offset-8);
  182. if size>16 then
  183. internalerror(2010021401);
  184. result:=2;
  185. end
  186. end;
  187. (* Return the union class of CLASS1 and CLASS2.
  188. See the x86-64 PS ABI for details. *)
  189. function merge_classes(class1, class2: tx64paraclass): tx64paraclass;
  190. begin
  191. (* Rule #1: If both classes are equal, this is the resulting class. *)
  192. if (class1.typ=class2.typ) then
  193. begin
  194. result.typ:=class1.typ;
  195. result.def:=classify_representative_def(class1.def,class2.def);
  196. exit;
  197. end;
  198. (* Rule #2: If one of the classes is NO_CLASS, the resulting class is
  199. the other class. *)
  200. if (class1.typ=X86_64_NO_CLASS) then
  201. exit(class2);
  202. if (class2.typ=X86_64_NO_CLASS) then
  203. exit(class1);
  204. (* Rule #3: If one of the classes is MEMORY, the result is MEMORY. *)
  205. if (class1.typ=X86_64_MEMORY_CLASS) then
  206. exit(class1)
  207. else if (class2.typ=X86_64_MEMORY_CLASS) then
  208. exit(class2);
  209. (* Rule #4: If one of the classes is INTEGER, the result is INTEGER. *)
  210. { 32 bit }
  211. if ((class1.typ=X86_64_INTEGERSI_CLASS) and
  212. (class2.typ=X86_64_SSESF_CLASS)) then
  213. exit(class1)
  214. else if ((class2.typ=X86_64_INTEGERSI_CLASS) and
  215. (class1.typ=X86_64_SSESF_CLASS)) then
  216. exit(class2);
  217. { 64 bit }
  218. if (class1.typ in [X86_64_INTEGER_CLASS,X86_64_INTEGERSI_CLASS]) then
  219. begin
  220. result:=class1;
  221. if result.def.size<8 then
  222. begin
  223. result.typ:=X86_64_INTEGER_CLASS;
  224. result.def:=s64inttype;
  225. end;
  226. exit
  227. end
  228. else if (class2.typ in [X86_64_INTEGER_CLASS,X86_64_INTEGERSI_CLASS]) then
  229. begin
  230. result:=class2;
  231. if result.def.size<8 then
  232. begin
  233. result.typ:=X86_64_INTEGER_CLASS;
  234. result.def:=s64inttype;
  235. end;
  236. exit
  237. end;
  238. (* Rule #5: If one of the classes is X87, X87UP, or COMPLEX_X87 class,
  239. MEMORY is used. *)
  240. if (class1.typ in [X86_64_X87_CLASS,X86_64_X87UP_CLASS,X86_64_COMPLEX_X87_CLASS]) then
  241. begin
  242. result:=class1;
  243. result.typ:=X86_64_MEMORY_CLASS;
  244. exit;
  245. end
  246. else if (class2.typ in [X86_64_X87_CLASS,X86_64_X87UP_CLASS,X86_64_COMPLEX_X87_CLASS]) then
  247. begin
  248. result:=class2;
  249. result.typ:=X86_64_MEMORY_CLASS;
  250. exit;
  251. end;
  252. (* Rule #6: Otherwise class SSE is used. *)
  253. if class1.def.size>class2.def.size then
  254. result:=class1
  255. else
  256. result:=class2;
  257. result.typ:=X86_64_SSE_CLASS;
  258. result.def:=s64floattype;
  259. end;
  260. function classify_argument(calloption: tproccalloption; def: tdef; parentdef: tdef; varspez: tvarspez; real_size: aint; var classes: tx64paraclasses; byte_offset: aint; round_to_8: Boolean): longint; forward;
  261. function init_aggregate_classification(calloption: tproccalloption; def: tdef; parentdef: tdef; varspez: tvarspez; byte_offset: aint; out words: longint; out classes: tx64paraclasses): longint;
  262. var
  263. i: longint;
  264. begin
  265. words:=0;
  266. { we'll be merging the classes elements with the subclasses
  267. elements, so initialise them first }
  268. for i:=low(classes) to high(classes) do
  269. begin
  270. classes[i].typ:=X86_64_NO_CLASS;
  271. classes[i].def:=nil;
  272. end;
  273. { win64 follows a different convention here }
  274. if x86_64_use_ms_abi(calloption) then
  275. begin
  276. if aggregate_in_registers_win64(varspez,def.size) then
  277. begin
  278. classes[0].typ:=X86_64_INTEGER_CLASS;
  279. classes[0].def:=def;
  280. result:=1;
  281. end
  282. else if (calloption = pocall_vectorcall) then
  283. begin
  284. words := (def.size+byte_offset mod 8+7) div 8;
  285. case words of
  286. 0:
  287. Exit(0);
  288. 1..4:
  289. { Aligned vector or array elements }
  290. Result := words;
  291. else
  292. if ((def.aggregatealignment mod (words shl 3)) = 0) or
  293. Assigned(parentdef) and ((parentdef.aggregatealignment mod 16) = 0)
  294. then
  295. begin
  296. { Field of aligned vector type }
  297. if words = 0 then
  298. begin
  299. classes[0].typ:=X86_64_NO_CLASS;
  300. classes[0].def:=def;
  301. Result := 1;
  302. end
  303. else
  304. Result := words;
  305. end
  306. else
  307. Result := 0;
  308. end;
  309. end
  310. else
  311. Result := 0;
  312. Exit;
  313. end;
  314. (* If the struct is larger than 32 bytes, pass it on the stack. *)
  315. if def.size > 32 then
  316. exit(0);
  317. { if a struct starts an offset not divisible by 8, it can span extra
  318. words }
  319. words:=(def.size+byte_offset mod 8+7) div 8;
  320. (* Zero sized arrays or structures are NO_CLASS. We return 0 to
  321. signal memory class, so handle it as special case. *)
  322. if (words=0) then
  323. begin
  324. classes[0].typ:=X86_64_NO_CLASS;
  325. classes[0].def:=def;
  326. exit(1);
  327. end;
  328. result:=words;
  329. end;
  330. function classify_aggregate_element(calloption: tproccalloption; def: tdef; parentdef: tdef; varspez: tvarspez; real_size: aint; var classes: tx64paraclasses; new_byte_offset: aint): longint;
  331. var
  332. subclasses: tx64paraclasses;
  333. i,
  334. pos: longint;
  335. begin
  336. fillchar(subclasses,sizeof(subclasses),0);
  337. result:=classify_argument(calloption,def,parentdef,varspez,real_size,subclasses,new_byte_offset, True);
  338. if (result=0) then
  339. exit;
  340. pos:=new_byte_offset div 8;
  341. if result-1+pos>high(classes) then
  342. internalerror(2010053108);
  343. for i:=0 to result-1 do
  344. begin
  345. classes[i+pos] :=
  346. merge_classes(subclasses[i],classes[i+pos]);
  347. end;
  348. inc(result,pos);
  349. end;
  350. function finalize_aggregate_classification(calloption: tproccalloption; def: tdef; words: longint; var classes: tx64paraclasses): longint;
  351. var
  352. i, vecsize, maxvecsize: longint;
  353. begin
  354. { Workaround: It's not immediately possible to determine if a Double is
  355. by itself or is part of an aligned vector. If the latter, correct the
  356. class definitions here. [Kit] }
  357. if (classes[0].typ = X86_64_SSEDF_CLASS) and (classes[1].typ = X86_64_SSEUP_CLASS) then
  358. classes[0].typ := X86_64_SSE_CLASS;
  359. if (words>2) then
  360. begin
  361. { When size > 16 bytes, if the first one isn't
  362. X86_64_SSE_CLASS or any other ones aren't
  363. X86_64_SSEUP_CLASS, everything should be passed in
  364. memory... }
  365. if (classes[0].typ<>X86_64_SSE_CLASS) then
  366. begin
  367. { ... except if the calling convention is 'vectorcall', then
  368. check to see if we don't have an HFA of 3 or 4 Doubles }
  369. if (calloption <> pocall_vectorcall) or (words > 4) then
  370. Exit(0);
  371. for i := 0 to words - 1 do
  372. if classes[i].typ <> X86_64_SSEDF_CLASS then
  373. Exit(0);
  374. Exit(words);
  375. end;
  376. if ((words shl 3) > def.aggregatealignment) then
  377. { The alignment is wrong for this vector size, hence it is unaligned }
  378. Exit(0);
  379. vecsize := 1;
  380. maxvecsize := words;
  381. for i:=1 to words-1 do
  382. if (classes[i].typ=X86_64_SSEUP_CLASS) then
  383. Inc(vecsize)
  384. else
  385. begin
  386. { Exceptional case. Check that we're not dealing an array of
  387. aligned vectors that is itself aligned to a stricter
  388. boundary (e.g. 4 XMM registers that can be merged into a
  389. single ZMM register). }
  390. if
  391. (classes[i].typ <> X86_64_SSE_CLASS) or { Easy case first - is it actually another SSE vector? }
  392. ((vecsize and (vecsize - 1)) <> 0) or { If vecsize is not a power of two, then it is definitely not a valid vector }
  393. (vecsize > maxvecsize) or ((maxvecsize < words) and (vecsize <> maxvecsize)) { Mixture of XMMs and YMMs, for example, is not valid }
  394. then
  395. Exit(0);
  396. classes[i].typ := X86_64_SSEUP_CLASS;
  397. maxvecsize := vecsize;
  398. vecsize := 1;
  399. end;
  400. if vecsize <> maxvecsize then
  401. { Last vector is of a different size }
  402. Exit(0);
  403. if vecsize > 2 then
  404. begin
  405. { Cannot use 256-bit and 512-bit vectors if we're not using AVX }
  406. if not UseAVX then
  407. Exit(0);
  408. { WARNING: There is currently no support for 256-bit and 512-bit
  409. aligned vectors, so if an aggregate contains more than two
  410. eightbyte words, it must be passed in memory. When 256-bit and
  411. 512-bit vectors are fully supported, remove the following
  412. line. [Kit] }
  413. Exit(0);
  414. end;
  415. end;
  416. (* Final merger cleanup. *)
  417. (* The first one must never be X86_64_SSEUP_CLASS or
  418. X86_64_X87UP_CLASS. *)
  419. if (classes[0].typ=X86_64_SSEUP_CLASS) or
  420. (classes[0].typ=X86_64_X87UP_CLASS) then
  421. internalerror(2010021402);
  422. for i:=0 to words-1 do
  423. begin
  424. (* If one class is MEMORY, everything should be passed in
  425. memory. *)
  426. if (classes[i].typ=X86_64_MEMORY_CLASS) then
  427. exit(0);
  428. (* The X86_64_SSEUP_CLASS should be always preceded by
  429. X86_64_SSE_CLASS or X86_64_SSEUP_CLASS. *)
  430. if (classes[i].typ=X86_64_SSEUP_CLASS) and
  431. (classes[i-1].typ<>X86_64_SSE_CLASS) and
  432. (classes[i-1].typ<>X86_64_SSEUP_CLASS) then
  433. begin
  434. classes[i].typ:=X86_64_SSE_CLASS;
  435. classes[i].def:=carraydef.getreusable_no_free(s32floattype,2);
  436. end;
  437. (* If X86_64_X87UP_CLASS isn't preceded by X86_64_X87_CLASS,
  438. everything should be passed in memory. *)
  439. if (classes[i].typ=X86_64_X87UP_CLASS) and
  440. (classes[i-1].typ<>X86_64_X87_CLASS) then
  441. exit(0);
  442. (* FPC addition: because we store an extended in 10 bytes, the
  443. X86_64_X87UP_CLASS can be replaced with e.g. INTEGER if an
  444. extended is followed by e.g. an array [0..5] of byte -> we also
  445. have to check whether each X86_64_X87_CLASS is followed by
  446. X86_64_X87UP_CLASS -- if not, pass in memory
  447. This cannot happen in the original ABI, because there
  448. sizeof(extended) = 16 and hence nothing can be merged with
  449. X86_64_X87UP_CLASS and change it into something else *)
  450. if (classes[i].typ=X86_64_X87_CLASS) and
  451. ((i=(words-1)) or
  452. (classes[i+1].typ<>X86_64_X87UP_CLASS)) then
  453. exit(0);
  454. end;
  455. {$ifndef llvm}
  456. { FIXME: in case a record contains empty padding space, e.g. a
  457. "single" field followed by a "double", then we have a problem
  458. because the cgpara helpers cannot figure out that they should
  459. skip 4 bytes after storing the single (LOC_MMREGISTER with size
  460. OS_F32) to memory before storing the double -> for now scale
  461. such locations always up to 64 bits, although this loads/stores
  462. some superfluous data }
  463. { 1) the first part is 32 bit while there is still a second part }
  464. if (classes[1].typ<>X86_64_NO_CLASS) then
  465. case classes[0].typ of
  466. X86_64_INTEGERSI_CLASS:
  467. begin
  468. classes[0].typ:=X86_64_INTEGER_CLASS;
  469. classes[0].def:=s64inttype;
  470. end;
  471. X86_64_SSESF_CLASS:
  472. begin
  473. classes[0].typ:=X86_64_SSE_CLASS;
  474. classes[0].def:=carraydef.getreusable_no_free(s32floattype,2);
  475. end;
  476. else
  477. ;
  478. end;
  479. { 2) the second part is 32 bit, but the total size is > 12 bytes }
  480. if (def.size>12) then
  481. case classes[1].typ of
  482. X86_64_INTEGERSI_CLASS:
  483. begin
  484. classes[1].typ:=X86_64_INTEGER_CLASS;
  485. classes[1].def:=s64inttype;
  486. end;
  487. X86_64_SSESF_CLASS:
  488. begin
  489. classes[1].typ:=X86_64_SSE_CLASS;
  490. classes[1].def:=carraydef.getreusable_no_free(s32floattype,2);
  491. end;
  492. else
  493. ;
  494. end;
  495. {$endif not llvm}
  496. result:=words;
  497. end;
  498. function try_build_homogeneous_aggregate(def: tdef; words: longint; var classes: tx64paraclasses): longint;
  499. var
  500. i, vecsize, maxvecsize, veccount: longint;
  501. {size, }byte_offset: aint;
  502. vs: TFieldVarSym;
  503. checkalignment: Boolean;
  504. begin
  505. if (words = 0) then
  506. { Should be at least 1 word at this point }
  507. InternalError(2018013100);
  508. case classes[0].typ of
  509. X86_64_SSESF_CLASS:
  510. begin
  511. { Should be an HFA of only a Single }
  512. for i := 1 to High(classes) do
  513. if classes[i].typ <> X86_64_NO_CLASS then
  514. Exit(0);
  515. result := 1;
  516. end;
  517. X86_64_SSEDF_CLASS:
  518. begin
  519. { Possibly an HFA of Doubles }
  520. if TAbstractRecordDef(def).symtable.symlist.count = 0 then
  521. Exit(0);
  522. { Get the information and position on the last entry }
  523. vs:=TFieldVarSym(TAbstractRecordDef(def).symtable.symlist[TAbstractRecordDef(def).symtable.symlist.count - 1]);
  524. //size:=vs.vardef.size;
  525. checkalignment:=true;
  526. if not TAbstractRecordSymtable(TAbstractRecordDef(def).symtable).is_packed then
  527. begin
  528. byte_offset:=vs.fieldoffset;
  529. //size:=vs.vardef.size;
  530. end
  531. else
  532. begin
  533. byte_offset:=vs.fieldoffset div 8;
  534. if (vs.vardef.typ in [orddef,enumdef]) then
  535. begin
  536. { calculate the number of bytes spanned by
  537. this bitpacked field }
  538. //size:=((vs.fieldoffset+vs.vardef.packedbitsize+7) div 8)-(vs.fieldoffset div 8);
  539. { our bitpacked fields are interpreted as always being
  540. aligned, because unlike in C we don't have char:1, int:1
  541. etc (so everything is basically a char:x) }
  542. checkalignment:=false;
  543. end
  544. else
  545. ;//size:=vs.vardef.size;
  546. end;
  547. { If [..] an object [..] contains unaligned fields, it has class
  548. MEMORY }
  549. if checkalignment and
  550. (align(byte_offset,vs.vardef.structalignment)<>byte_offset) then
  551. begin
  552. result:=0;
  553. exit;
  554. end;
  555. if words > 4 then
  556. { HFA too large }
  557. Exit(0);
  558. for i := 1 to words - 1 do
  559. if classes[i].typ <> X86_64_SSEDF_CLASS then
  560. Exit(0);
  561. result := words;
  562. end;
  563. X86_64_SSE_CLASS:
  564. begin
  565. { Determine the nature of the classes.
  566. - If the SSE is by itself, then it is an HFA consisting of 2 Singles.
  567. - If the SSE is followed by an SSESF, then it is an HFA consisting of 3 Singles.
  568. - If the SSE is followed by an SSE and nothing else, then it is an HFA consisting of 4 Singles.
  569. - If the SSE is followed by an SSE, but another class follows, then it is an HFA that is too large.
  570. - If the SSE is followed by an SSEUP, then it is an HVA of some kind.
  571. }
  572. case classes[1].typ of
  573. X86_64_NO_CLASS:
  574. begin
  575. for i := 2 to words - 1 do
  576. if classes[i].typ <> X86_64_NO_CLASS then
  577. { Compound type }
  578. Exit(0);
  579. { Split into 2 Singles again so they correctly fall into separate XMM registers }
  580. classes[0].typ := X86_64_SSESF_CLASS;
  581. classes[0].def := tdef(tarraydef(classes[0].def).elementdef); { Break up the array }
  582. classes[1].typ := X86_64_SSESF_CLASS;
  583. classes[1].def := classes[0].def;
  584. result := 2;
  585. end;
  586. X86_64_SSESF_CLASS:
  587. begin
  588. for i := 2 to words - 1 do
  589. if classes[i].typ <> X86_64_NO_CLASS then
  590. { Compound type }
  591. Exit(0);
  592. classes[2].typ := X86_64_SSESF_CLASS;
  593. classes[2].def := classes[1].def; { Transfer class 1 to class 2 }
  594. classes[0].typ := X86_64_SSESF_CLASS;
  595. classes[0].def := tdef(tarraydef(classes[0].def).elementdef); { Break up the array }
  596. classes[1].typ := X86_64_SSESF_CLASS;
  597. classes[1].def := classes[0].def;
  598. result := 3;
  599. end;
  600. X86_64_SSE_CLASS:
  601. begin
  602. for i := 2 to words - 1 do
  603. if classes[i].typ <> X86_64_NO_CLASS then
  604. { HFA too large (or not a true HFA) }
  605. Exit(0);
  606. classes[0].def := tdef(tarraydef(classes[0].def).elementdef); { Break up the arrays }
  607. classes[2].def := tdef(tarraydef(classes[1].def).elementdef);
  608. classes[1].def := classes[0].def;
  609. classes[3].def := classes[2].def;
  610. classes[0].typ := X86_64_SSESF_CLASS;
  611. classes[1].typ := X86_64_SSESF_CLASS;
  612. classes[2].typ := X86_64_SSESF_CLASS;
  613. classes[3].typ := X86_64_SSESF_CLASS;
  614. result := 4;
  615. end;
  616. X86_64_SSEUP_CLASS:
  617. begin
  618. { Determine vector size }
  619. veccount := 1;
  620. vecsize := 2;
  621. maxvecsize := words;
  622. for i := 2 to words - 1 do
  623. if (classes[i].typ=X86_64_SSEUP_CLASS) then
  624. Inc(vecsize)
  625. else
  626. begin
  627. if
  628. (classes[i].typ <> X86_64_SSE_CLASS) or { Easy case first - is it actually another SSE vector? }
  629. ((vecsize and (vecsize - 1)) <> 0) or { If vecsize is not a power of two, then it is definitely not a valid aggregate }
  630. (vecsize > maxvecsize) or ((maxvecsize < words) and (vecsize <> maxvecsize)) { Mixture of XMMs and YMMs, for example, is not valid }
  631. then
  632. Exit(0);
  633. Inc(veccount);
  634. maxvecsize := vecsize;
  635. vecsize := 1;
  636. end;
  637. if vecsize <> maxvecsize then
  638. { Last vector is of a different size }
  639. Exit(0);
  640. if veccount > 4 then
  641. { HVA too large }
  642. Exit(0);
  643. Result := words;
  644. end;
  645. else
  646. Exit(0);
  647. end;
  648. end;
  649. else
  650. Exit(0);
  651. end;
  652. end;
  653. function classify_record(calloption: tproccalloption; def: tdef; parentdef: tdef; varspez: tvarspez; var classes: tx64paraclasses; byte_offset: aint): longint;
  654. var
  655. vs: tfieldvarsym;
  656. size,
  657. new_byte_offset: aint;
  658. i,
  659. words,
  660. num: longint;
  661. checkalignment: boolean;
  662. begin
  663. result:=init_aggregate_classification(calloption,def,parentdef,varspez,byte_offset,words,classes);
  664. if (words=0) then
  665. exit;
  666. (* Merge the fields of the structure. *)
  667. for i:=0 to tabstractrecorddef(def).symtable.symlist.count-1 do
  668. begin
  669. if not is_normal_fieldvarsym(tsym(tabstractrecorddef(def).symtable.symlist[i])) then
  670. continue;
  671. vs:=tfieldvarsym(tabstractrecorddef(def).symtable.symlist[i]);
  672. checkalignment:=true;
  673. if not tabstractrecordsymtable(tabstractrecorddef(def).symtable).is_packed then
  674. begin
  675. new_byte_offset:=byte_offset+vs.fieldoffset;
  676. size:=vs.vardef.size;
  677. end
  678. else
  679. begin
  680. new_byte_offset:=byte_offset+vs.fieldoffset div 8;
  681. if (vs.vardef.typ in [orddef,enumdef]) then
  682. begin
  683. { calculate the number of bytes spanned by
  684. this bitpacked field }
  685. size:=((vs.fieldoffset+vs.vardef.packedbitsize+7) div 8)-(vs.fieldoffset div 8);
  686. { our bitpacked fields are interpreted as always being
  687. aligned, because unlike in C we don't have char:1, int:1
  688. etc (so everything is basically a char:x) }
  689. checkalignment:=false;
  690. end
  691. else
  692. size:=vs.vardef.size;
  693. end;
  694. { If [..] an object [..] contains unaligned fields, it has class
  695. MEMORY }
  696. if checkalignment and
  697. (align(new_byte_offset,vs.vardef.structalignment)<>new_byte_offset) then
  698. begin
  699. result:=0;
  700. exit;
  701. end;
  702. num:=classify_aggregate_element(calloption,vs.vardef,def,varspez,size,classes,new_byte_offset);
  703. if (num=0) then
  704. exit(0);
  705. end;
  706. result:=finalize_aggregate_classification(calloption,def,words,classes);
  707. { There is still one case where it might not have to be passed on the
  708. stack, and that's a homogeneous vector aggregate (HVA) or a
  709. homogeneous float aggregate (HFA) under vectorcall. }
  710. if (calloption = pocall_vectorcall) then
  711. begin
  712. if (result = 0) then
  713. result := try_build_homogeneous_aggregate(def,words,classes)
  714. else
  715. { If we're dealing with an HFA that has 3 or 4 Singles, pairs of
  716. Singles may be merged into a single SSE_CLASS, which must be
  717. split into separate SSESF_CLASS references for vectorcall; this
  718. is only performed in "try_build_homogeneous_aggregate" and not
  719. elsewhere, so accommodate for this exceptional case. [Kit] }
  720. if (result = 2) then
  721. begin
  722. num := try_build_homogeneous_aggregate(def,words,classes);
  723. if num <> 0 then
  724. { If it's equal to zero, just pass 2 and handle the record
  725. type normally }
  726. result := num;
  727. end;
  728. end;
  729. end;
  730. function classify_normal_array(calloption: tproccalloption; def: tarraydef; parentdef: tdef; varspez: tvarspez; var classes: tx64paraclasses; byte_offset: aint): longint;
  731. var
  732. i, elecount: aword;
  733. size,
  734. elesize,
  735. new_byte_offset,
  736. bitoffset: aint;
  737. words,
  738. num: longint;
  739. isbitpacked: boolean;
  740. begin
  741. size:=0;
  742. bitoffset:=0;
  743. result:=init_aggregate_classification(calloption,def,parentdef,varspez,byte_offset,words,classes);
  744. if (words=0) then
  745. exit;
  746. isbitpacked:=is_packed_array(def);
  747. if not isbitpacked then
  748. begin
  749. elesize:=def.elesize;
  750. size:=elesize;
  751. end
  752. else
  753. begin
  754. elesize:=def.elepackedbitsize;
  755. bitoffset:=0;
  756. end;
  757. (* Merge the elements of the array. *)
  758. i:=0;
  759. elecount:=def.elecount;
  760. repeat
  761. if not isbitpacked then
  762. begin
  763. { size does not change }
  764. new_byte_offset:=byte_offset+i*elesize;
  765. { If [..] an object [..] contains unaligned fields, it has class
  766. MEMORY }
  767. if align(new_byte_offset,def.alignment)<>new_byte_offset then
  768. begin
  769. result:=0;
  770. exit;
  771. end;
  772. end
  773. else
  774. begin
  775. { calculate the number of bytes spanned by this bitpacked
  776. element }
  777. size:=((bitoffset+elesize+7) div 8)-(bitoffset div 8);
  778. new_byte_offset:=byte_offset+(elesize*i) div 8;
  779. { bit offset of next element }
  780. inc(bitoffset,elesize);
  781. end;
  782. num:=classify_aggregate_element(calloption,def.elementdef,def,varspez,size,classes,new_byte_offset);
  783. if (num=0) then
  784. exit(0);
  785. inc(i);
  786. until (i=elecount);
  787. result:=finalize_aggregate_classification(calloption,def,words,classes);
  788. end;
  789. function classify_argument(calloption: tproccalloption; def: tdef; parentdef: tdef; varspez: tvarspez; real_size: aint; var classes: tx64paraclasses; byte_offset: aint; round_to_8: Boolean): longint;
  790. var
  791. rounded_offset: aint;
  792. begin
  793. if round_to_8 then
  794. rounded_offset := byte_offset mod 8
  795. else
  796. rounded_offset := byte_offset;
  797. case def.typ of
  798. orddef,
  799. enumdef,
  800. pointerdef,
  801. classrefdef:
  802. result:=classify_as_integer_argument(def,real_size,classes,rounded_offset);
  803. formaldef:
  804. result:=classify_as_integer_argument(voidpointertype,voidpointertype.size,classes,rounded_offset);
  805. floatdef:
  806. begin
  807. classes[0].def:=def;
  808. case tfloatdef(def).floattype of
  809. s32real:
  810. begin
  811. if (byte_offset mod 8) = 0 then { Check regardless of the round_to_8 flag }
  812. begin
  813. if Assigned(parentdef) and ((parentdef.aggregatealignment mod 16) = 0) and ((byte_offset mod parentdef.aggregatealignment) <> 0) then
  814. { Third element of an aligned vector }
  815. classes[0].typ:=X86_64_SSEUP_CLASS
  816. else
  817. classes[0].typ:=X86_64_SSESF_CLASS
  818. end
  819. else
  820. begin
  821. if Assigned(parentdef) and ((parentdef.aggregatealignment mod 16) = 0) then
  822. { Fourth element of an aligned vector }
  823. classes[0].typ:=X86_64_SSEUP_CLASS
  824. else
  825. { if we have e.g. a record with two successive "single"
  826. fields, we need a 64 bit rather than a 32 bit load }
  827. classes[0].typ:=X86_64_SSE_CLASS;
  828. classes[0].def:=carraydef.getreusable_no_free(s32floattype,2);
  829. end;
  830. result:=1;
  831. end;
  832. s64real:
  833. begin
  834. if Assigned(parentdef) and ((parentdef.aggregatealignment mod 16) = 0) and ((byte_offset mod parentdef.aggregatealignment) <> 0) then
  835. { Aligned vector of type double }
  836. classes[0].typ:=X86_64_SSEUP_CLASS
  837. else
  838. classes[0].typ:=X86_64_SSEDF_CLASS;
  839. result:=1;
  840. end;
  841. s80real,
  842. sc80real:
  843. begin
  844. classes[0].typ:=X86_64_X87_CLASS;
  845. classes[1].typ:=X86_64_X87UP_CLASS;
  846. classes[1].def:=def;
  847. result:=2;
  848. end;
  849. s64comp,
  850. s64currency:
  851. begin
  852. classes[0].typ:=X86_64_INTEGER_CLASS;
  853. result:=1;
  854. end;
  855. s128real:
  856. begin
  857. classes[0].typ:=X86_64_SSE_CLASS;
  858. classes[0].def:=carraydef.getreusable_no_free(s32floattype,2);
  859. classes[1].typ:=X86_64_SSEUP_CLASS;
  860. classes[1].def:=carraydef.getreusable_no_free(s32floattype,2);
  861. result:=2;
  862. end;
  863. end;
  864. end;
  865. recorddef:
  866. result:=classify_record(calloption,def,parentdef,varspez,classes,rounded_offset);
  867. objectdef:
  868. begin
  869. if is_object(def) then
  870. { pass by reference, like ppc and i386 }
  871. result:=0
  872. else
  873. { all kinds of pointer types: class, objcclass, interface, ... }
  874. result:=classify_as_integer_argument(def,voidpointertype.size,classes,rounded_offset);
  875. end;
  876. setdef:
  877. begin
  878. if is_smallset(def) then
  879. result:=classify_as_integer_argument(def,def.size,classes,rounded_offset)
  880. else
  881. result:=0;
  882. end;
  883. stringdef:
  884. begin
  885. if (tstringdef(def).stringtype in [st_shortstring,st_longstring]) then
  886. result:=0
  887. else
  888. result:=classify_as_integer_argument(def,def.size,classes,rounded_offset);
  889. end;
  890. arraydef:
  891. begin
  892. { a dynamic array is treated like a pointer }
  893. if is_dynamic_array(def) then
  894. result:=classify_as_integer_argument(def,voidpointertype.size,classes,rounded_offset)
  895. { other special arrays are passed on the stack }
  896. else if is_open_array(def) or
  897. is_array_of_const(def) then
  898. result:=0
  899. else
  900. { normal array }
  901. result:=classify_normal_array(calloption,tarraydef(def),parentdef,varspez,classes,rounded_offset);
  902. end;
  903. { the file record is definitely too big }
  904. filedef:
  905. result:=0;
  906. procvardef:
  907. begin
  908. if (po_methodpointer in tprocvardef(def).procoptions) then
  909. begin
  910. { treat as TMethod record }
  911. def:=search_system_type('TMETHOD').typedef;
  912. result:=classify_argument(calloption,def,parentdef,varspez,def.size,classes,rounded_offset, False);
  913. end
  914. else
  915. { pointer }
  916. result:=classify_as_integer_argument(def,def.size,classes,rounded_offset);
  917. end;
  918. variantdef:
  919. begin
  920. { same as tvardata record }
  921. def:=search_system_type('TVARDATA').typedef;
  922. result:=classify_argument(calloption,def,parentdef,varspez,def.size,classes,rounded_offset, False);
  923. end;
  924. undefineddef:
  925. { show shall we know?
  926. since classify_argument is called during parsing, see tw27685.pp,
  927. we handle undefineddef here }
  928. result:=0;
  929. errordef:
  930. { error message should have been thrown already before, so avoid only
  931. an internal error }
  932. result:=0;
  933. else
  934. internalerror(2010021405);
  935. end;
  936. end;
  937. { Returns the size of a single element in the aggregate, or the entire vector, if it is one of these types, 0 otherwise }
  938. function is_simd_vector_type_or_homogeneous_aggregate(calloption: tproccalloption; def: tdef; varspez: tvarspez): aint;
  939. var
  940. numclasses,i,vecsize,veccount,maxvecsize:longint;
  941. classes: tx64paraclasses;
  942. firstclass: tx64paraclasstype;
  943. begin
  944. for i := Low(classes) to High(classes) do
  945. begin
  946. classes[i].typ := X86_64_NO_CLASS;
  947. classes[i].def := nil;
  948. end;
  949. numclasses:=classify_argument(calloption,def,nil,vs_value,def.size,classes,0,False);
  950. if numclasses = 0 then
  951. Exit(0);
  952. firstclass := classes[0].typ;
  953. case firstclass of
  954. X86_64_SSESF_CLASS: { Only valid if the aggregate contains a lone Single }
  955. begin
  956. if (numclasses = 1) and (calloption = pocall_vectorcall) then
  957. Result := 4
  958. else
  959. Result := 0;
  960. Exit;
  961. end;
  962. X86_64_SSEDF_CLASS:
  963. begin
  964. if (numclasses > 1) and (calloption <> pocall_vectorcall) then
  965. Result := 0
  966. else
  967. begin
  968. for i := 1 to numclasses - 1 do
  969. if classes[i].typ <> X86_64_SSEDF_CLASS then
  970. begin
  971. Result := 0;
  972. Exit;
  973. end;
  974. if (def.size div 8) <> numclasses then
  975. { Wrong alignment or compound size }
  976. Result := 0
  977. else
  978. Result := 8;
  979. end;
  980. end;
  981. X86_64_SSE_CLASS:
  982. begin
  983. maxvecsize := numclasses * 2;
  984. if numclasses = 1 then
  985. begin
  986. { 2 Singles }
  987. if calloption = pocall_vectorcall then
  988. Result := 4
  989. else
  990. Result := 0;
  991. Exit;
  992. end;
  993. if classes[1].typ = X86_64_SSESF_CLASS then
  994. begin
  995. { 3 Singles }
  996. if numclasses <> 2 then
  997. Result := 0
  998. else
  999. Result := 4;
  1000. Exit;
  1001. end;
  1002. vecsize := 2;
  1003. veccount := 1;
  1004. for i := 1 to numclasses - 1 do
  1005. case classes[i].typ of
  1006. X86_64_SSEUP_CLASS:
  1007. Inc(vecsize, 2);
  1008. X86_64_SSE_CLASS:
  1009. begin
  1010. if (maxvecsize < numclasses * 2) and (vecsize <> maxvecsize) then
  1011. { Different vector sizes }
  1012. Exit(0);
  1013. maxvecsize := vecsize;
  1014. vecsize := 2;
  1015. Inc(veccount);
  1016. end;
  1017. else
  1018. Exit(0);
  1019. end;
  1020. if vecsize <> maxvecsize then
  1021. { Last vector has to be the same size }
  1022. Exit(0);
  1023. { Either an HFA with 4 Singles, or an HVA with up to 4 vectors
  1024. (or a lone SIMD vector if veccount = 1) }
  1025. if (veccount < 4) then
  1026. begin
  1027. if (veccount > 1) and (calloption <> pocall_vectorcall) then
  1028. Result := 0
  1029. else
  1030. if vecsize = 2 then
  1031. { Packed, unaligned array of Singles }
  1032. Result := 4
  1033. else
  1034. Result := vecsize * 8
  1035. end
  1036. else
  1037. Result := 0;
  1038. end;
  1039. else
  1040. Exit(0);
  1041. end;
  1042. end;
  1043. procedure getvalueparaloc(calloption: tproccalloption;varspez:tvarspez;def:tdef;var classes: tx64paraclasses);
  1044. var
  1045. size: aint;
  1046. i: longint;
  1047. numclasses: longint;
  1048. begin
  1049. { init the classes array, because even if classify_argument inits only
  1050. one element we copy both to loc1/loc2 in case "1" is returned }
  1051. for i:=low(classes) to high(classes) do
  1052. begin
  1053. classes[i].typ:=X86_64_NO_CLASS;
  1054. classes[i].def:=nil;
  1055. end;
  1056. { def.size internalerrors for open arrays and dynamic arrays, since
  1057. their size cannot be determined at compile-time.
  1058. classify_argument does not look at the realsize argument for arrays
  1059. cases, but we obviously do have to pass something... }
  1060. if is_special_array(def) then
  1061. size:=-1
  1062. else
  1063. size:=def.size;
  1064. numclasses:=classify_argument(calloption,def,nil,varspez,size,classes,0,False);
  1065. case numclasses of
  1066. 0:
  1067. begin
  1068. classes[0].typ:=X86_64_MEMORY_CLASS;
  1069. classes[0].def:=def;
  1070. end;
  1071. 1..4:
  1072. begin
  1073. { If the class is X87, X87UP or COMPLEX_X87, it is passed in memory }
  1074. for i := 0 to numclasses - 1 do
  1075. begin
  1076. if classes[i].typ in [X86_64_X87_CLASS,X86_64_X87UP_CLASS,X86_64_COMPLEX_X87_CLASS] then
  1077. classes[i].typ:=X86_64_MEMORY_CLASS;
  1078. end;
  1079. end;
  1080. else
  1081. { 8 can happen for _m512 vectors, but are not yet supported }
  1082. internalerror(2010021501);
  1083. end;
  1084. end;
  1085. function tcpuparamanager.ret_in_param(def:tdef;pd:tabstractprocdef):boolean;
  1086. var
  1087. classes: tx64paraclasses;
  1088. numclasses: longint;
  1089. begin
  1090. if handle_common_ret_in_param(def,pd,result) then
  1091. exit;
  1092. fillchar(classes,sizeof(classes),0);
  1093. case def.typ of
  1094. { for records it depends on their contents and size }
  1095. recorddef,
  1096. { make sure we handle 'procedure of object' correctly }
  1097. procvardef:
  1098. begin
  1099. numclasses:=classify_argument(pd.proccalloption,def,nil,vs_value,def.size,classes,0,False);
  1100. result:=(numclasses=0);
  1101. end;
  1102. else
  1103. result:=inherited ret_in_param(def,pd);
  1104. end;
  1105. end;
  1106. function tcpuparamanager.param_use_paraloc(const cgpara:tcgpara):boolean;
  1107. var
  1108. paraloc : pcgparalocation;
  1109. begin
  1110. if not assigned(cgpara.location) then
  1111. internalerror(200410102);
  1112. result:=true;
  1113. { All locations are LOC_REFERENCE }
  1114. paraloc:=cgpara.location;
  1115. while assigned(paraloc) do
  1116. begin
  1117. if (paraloc^.loc<>LOC_REFERENCE) then
  1118. begin
  1119. result:=false;
  1120. exit;
  1121. end;
  1122. paraloc:=paraloc^.next;
  1123. end;
  1124. end;
  1125. { true if a parameter is too large to copy and only the address is pushed }
  1126. function tcpuparamanager.push_addr_param(varspez:tvarspez;def : tdef;calloption : tproccalloption) : boolean;
  1127. var
  1128. classes: tx64paraclasses;
  1129. numclasses: longint;
  1130. begin
  1131. fillchar(classes,sizeof(classes),0);
  1132. result:=false;
  1133. { var,out,constref always require address }
  1134. if varspez in [vs_var,vs_out,vs_constref] then
  1135. begin
  1136. result:=true;
  1137. exit;
  1138. end;
  1139. { Only vs_const, vs_value here }
  1140. case def.typ of
  1141. formaldef :
  1142. result:=true;
  1143. recorddef :
  1144. begin
  1145. { MetroWerks Pascal: const records always passed by reference
  1146. (for Mac OS X interfaces) }
  1147. if (calloption=pocall_mwpascal) and
  1148. (varspez=vs_const) then
  1149. result:=true
  1150. { Win ABI depends on size to pass it in a register or not }
  1151. else if x86_64_use_ms_abi(calloption) then
  1152. begin
  1153. if calloption = pocall_vectorcall then
  1154. begin
  1155. { "vectorcall" has the addition that it allows for aligned SSE types }
  1156. result :=
  1157. not aggregate_in_registers_win64(varspez,def.size) and
  1158. (is_simd_vector_type_or_homogeneous_aggregate(pocall_vectorcall,def,vs_value) = 0);
  1159. end
  1160. else
  1161. result:=not aggregate_in_registers_win64(varspez,def.size)
  1162. end
  1163. { pass constant parameters that would be passed via memory by
  1164. reference for non-cdecl/cppdecl, and make sure that the tmethod
  1165. record (size=16) is passed the same way as a complex procvar }
  1166. else if ((varspez=vs_const) and
  1167. not(calloption in cdecl_pocalls)) or
  1168. (def.size=16) then
  1169. begin
  1170. numclasses:=classify_argument(calloption,def,nil,vs_value,def.size,classes,0,False);
  1171. result:=numclasses=0;
  1172. end
  1173. else
  1174. { SysV ABI always passes it as value parameter }
  1175. result:=false;
  1176. end;
  1177. arraydef :
  1178. begin
  1179. { cdecl array of const need to be ignored and therefor be puhsed
  1180. as value parameter with length 0 }
  1181. if ((calloption in cdecl_pocalls) and
  1182. is_array_of_const(def)) or
  1183. is_dynamic_array(def) then
  1184. result:=false
  1185. else if (calloption = pocall_vectorcall) then
  1186. begin
  1187. { Pass all arrays by reference unless they are a valid, aligned SIMD type (arrays can't be homogeneous aggregates) }
  1188. result := (is_simd_vector_type_or_homogeneous_aggregate(pocall_vectorcall,def,vs_value) = 0);
  1189. end
  1190. else
  1191. { pass all arrays by reference to be compatible with C (passing
  1192. an array by value (= copying it on the stack) does not exist,
  1193. because an array is the same as a pointer there }
  1194. result:=true
  1195. end;
  1196. objectdef :
  1197. begin
  1198. { don't treat objects like records, because we only know wheter
  1199. or not they'll have a VMT after the entire object is parsed
  1200. -> if they are used as function result from one of their own
  1201. methods, their size can still change after we've determined
  1202. whether this function result should be returned by reference or
  1203. by value }
  1204. if is_object(def) then
  1205. result:=true;
  1206. end;
  1207. variantdef,
  1208. stringdef,
  1209. procvardef,
  1210. setdef :
  1211. begin
  1212. numclasses:=classify_argument(calloption,def,nil,vs_value,def.size,classes,0,False);
  1213. result:=numclasses=0;
  1214. end;
  1215. else
  1216. ;
  1217. end;
  1218. end;
  1219. function tcpuparamanager.get_volatile_registers_int(calloption : tproccalloption):tcpuregisterset;
  1220. begin
  1221. if x86_64_use_ms_abi(calloption) then
  1222. result:=[RS_RAX,RS_RCX,RS_RDX,RS_R8,RS_R9,RS_R10,RS_R11]
  1223. else
  1224. result:=[RS_RAX,RS_RCX,RS_RDX,RS_RSI,RS_RDI,RS_R8,RS_R9,RS_R10,RS_R11];
  1225. end;
  1226. function tcpuparamanager.get_volatile_registers_mm(calloption : tproccalloption):tcpuregisterset;
  1227. begin
  1228. if x86_64_use_ms_abi(calloption) then
  1229. result:=[RS_XMM0..RS_XMM5,RS_XMM16..RS_XMM31]
  1230. else
  1231. result:=[RS_XMM0..RS_XMM15,RS_XMM16..RS_XMM31];
  1232. end;
  1233. function tcpuparamanager.get_volatile_registers_fpu(calloption : tproccalloption):tcpuregisterset;
  1234. begin
  1235. result:=[RS_ST0..RS_ST7];
  1236. end;
  1237. function tcpuparamanager.get_saved_registers_int(calloption : tproccalloption):tcpuregisterarray;
  1238. const
  1239. win64_saved_std_regs : {$ifndef VER3_0}tcpuregisterarray{$else}array[0..7] of tsuperregister{$endif} = (RS_RBX,RS_RDI,RS_RSI,RS_R12,RS_R13,RS_R14,RS_R15,RS_RBP);
  1240. others_saved_std_regs : {$ifndef VER3_0}tcpuregisterarray{$else}array[0..4] of tsuperregister{$endif} = (RS_RBX,RS_R12,RS_R13,RS_R14,RS_R15);
  1241. begin
  1242. if tcgx86_64(cg).use_ms_abi then
  1243. result:=win64_saved_std_regs
  1244. else
  1245. result:=others_saved_std_regs;
  1246. end;
  1247. function tcpuparamanager.get_saved_registers_mm(calloption: tproccalloption):tcpuregisterarray;
  1248. const
  1249. win64_saved_xmm_regs : {$ifndef VER3_0}tcpuregisterarray{$else}array[0..9] of tsuperregister{$endif} = (RS_XMM6,RS_XMM7,
  1250. RS_XMM8,RS_XMM9,RS_XMM10,RS_XMM11,RS_XMM12,RS_XMM13,RS_XMM14,RS_XMM15);
  1251. begin
  1252. if tcgx86_64(cg).use_ms_abi then
  1253. result:=win64_saved_xmm_regs
  1254. else
  1255. SetLength(result,0);
  1256. end;
  1257. function tcpuparamanager.get_funcretloc(p : tabstractprocdef; side: tcallercallee; forcetempdef: tdef): tcgpara;
  1258. const
  1259. intretregs: array[0..1] of tregister = (NR_FUNCTION_RETURN_REG,NR_FUNCTION_RETURN_REG_HIGH);
  1260. mmretregs: array[0..1] of tregister = (NR_MM_RESULT_REG,NR_MM_RESULT_REG_HIGH);
  1261. mmretregs_vectorcall: array[0..3] of tregister = (NR_XMM0,NR_XMM1,NR_XMM2,NR_XMM3);
  1262. var
  1263. classes: tx64paraclasses;
  1264. i,j,
  1265. numclasses: longint;
  1266. intretregidx,
  1267. mmretregidx: longint;
  1268. retcgsize : tcgsize;
  1269. paraloc : pcgparalocation;
  1270. begin
  1271. if set_common_funcretloc_info(p,forcetempdef,retcgsize,result) then
  1272. exit;
  1273. { Return in FPU register? -> don't use classify_argument(), because
  1274. currency and comp need special treatment here (they are integer class
  1275. when passing as parameter, but LOC_FPUREGISTER as function result) }
  1276. if result.def.typ=floatdef then
  1277. begin
  1278. paraloc:=result.add_location;
  1279. paraloc^.def:=result.def;
  1280. case tfloatdef(result.def).floattype of
  1281. s32real:
  1282. begin
  1283. paraloc^.loc:=LOC_MMREGISTER;
  1284. paraloc^.register:=newreg(R_MMREGISTER,RS_MM_RESULT_REG,R_SUBMMS);
  1285. paraloc^.size:=OS_F32;
  1286. end;
  1287. s64real:
  1288. begin
  1289. paraloc^.loc:=LOC_MMREGISTER;
  1290. paraloc^.register:=newreg(R_MMREGISTER,RS_MM_RESULT_REG,R_SUBMMD);
  1291. paraloc^.size:=OS_F64;
  1292. end;
  1293. { the first two only exist on targets with an x87, on others
  1294. they are replace by int64 }
  1295. s64currency,
  1296. s64comp,
  1297. s80real,
  1298. sc80real:
  1299. begin
  1300. paraloc^.loc:=LOC_FPUREGISTER;
  1301. paraloc^.register:=NR_FPU_RESULT_REG;
  1302. paraloc^.size:=retcgsize;
  1303. end;
  1304. else
  1305. internalerror(200405034);
  1306. end;
  1307. end
  1308. else
  1309. { Return in register }
  1310. begin
  1311. fillchar(classes,sizeof(classes),0);
  1312. numclasses:=classify_argument(p.proccalloption,result.def,nil,vs_value,result.def.size,classes,0,False);
  1313. { this would mean a memory return }
  1314. if (numclasses=0) then
  1315. begin
  1316. { we got an error before, so we just skip all the return type generation }
  1317. if result.def.typ=errordef then
  1318. exit;
  1319. internalerror(2010021502);
  1320. end;
  1321. if (numclasses > MAX_PARA_CLASSES) then
  1322. internalerror(2010021503);
  1323. intretregidx:=0;
  1324. mmretregidx:=0;
  1325. i := 0;
  1326. { We can't use a for-loop here because the treatment of the SSEUP class requires skipping over i's }
  1327. while i < numclasses do
  1328. begin
  1329. paraloc:=result.add_location;
  1330. paraloc^.def:=classes[i].def;
  1331. case classes[i].typ of
  1332. X86_64_INTEGERSI_CLASS,
  1333. X86_64_INTEGER_CLASS:
  1334. begin
  1335. paraloc^.loc:=LOC_REGISTER;
  1336. paraloc^.register:=intretregs[intretregidx];
  1337. if classes[i].typ=X86_64_INTEGER_CLASS then
  1338. begin
  1339. paraloc^.size:=OS_64;
  1340. if paraloc^.def.size<>8 then
  1341. paraloc^.def:=u64inttype;
  1342. end
  1343. else if result.intsize in [1,2,4] then
  1344. begin
  1345. { The ABI does not require sign/zero-extended function
  1346. results, but older versions of clang did so and
  1347. on Darwin current versions of clang keep doing so
  1348. for backward compatibility. On other platforms, it
  1349. doesn't and hence we don't either }
  1350. if (i=0) and
  1351. not(target_info.system in systems_darwin) and
  1352. (result.intsize in [1,2]) then
  1353. begin
  1354. paraloc^.size:=int_cgsize(result.intsize);
  1355. paraloc^.def:=cgsize_orddef(paraloc^.size);
  1356. end
  1357. else
  1358. paraloc^.size:=def_cgsize(paraloc^.def);
  1359. end
  1360. else
  1361. begin
  1362. paraloc^.size:=OS_32;
  1363. if paraloc^.def.size<>4 then
  1364. paraloc^.def:=u32inttype;
  1365. end;
  1366. setsubreg(paraloc^.register,cgsize2subreg(R_INTREGISTER,paraloc^.size));
  1367. inc(intretregidx);
  1368. end;
  1369. X86_64_SSE_CLASS,
  1370. X86_64_SSEUP_CLASS,
  1371. X86_64_SSESF_CLASS,
  1372. X86_64_SSEDF_CLASS:
  1373. begin
  1374. paraloc^.loc:=LOC_MMREGISTER;
  1375. if p.proccalloption = pocall_vectorcall then
  1376. paraloc^.register:=mmretregs_vectorcall[mmretregidx]
  1377. else
  1378. paraloc^.register:=mmretregs[mmretregidx];
  1379. case classes[i].typ of
  1380. X86_64_SSESF_CLASS:
  1381. begin
  1382. setsubreg(paraloc^.register,R_SUBMMS);
  1383. paraloc^.size:=OS_F32;
  1384. end;
  1385. X86_64_SSEDF_CLASS:
  1386. begin
  1387. setsubreg(paraloc^.register,R_SUBMMD);
  1388. paraloc^.size:=OS_F64;
  1389. end;
  1390. X86_64_SSE_CLASS:
  1391. begin
  1392. j := 1;
  1393. if not (x86_64_use_ms_abi(p.proccalloption) and (p.proccalloption <> pocall_vectorcall)) then
  1394. while i + j <= numclasses do
  1395. begin
  1396. if classes[i+j].typ <> X86_64_SSEUP_CLASS then
  1397. Break;
  1398. Inc(j);
  1399. end;
  1400. { j = MM word count }
  1401. Inc(i, j - 1);
  1402. case j of
  1403. 1:
  1404. begin
  1405. setsubreg(paraloc^.register,R_SUBQ);
  1406. paraloc^.size:=OS_M64;
  1407. end;
  1408. 2:
  1409. begin
  1410. setsubreg(paraloc^.register,R_SUBMMX);
  1411. paraloc^.size:=OS_M128;
  1412. end;
  1413. 4:
  1414. begin
  1415. setsubreg(paraloc^.register,R_SUBMMY);
  1416. paraloc^.size:=OS_M256; { Currently unsupported }
  1417. end;
  1418. 8:
  1419. begin
  1420. setsubreg(paraloc^.register,R_SUBMMZ);
  1421. paraloc^.size:=OS_M512; { Currently unsupported }
  1422. end;
  1423. else
  1424. InternalError(2018012901);
  1425. end;
  1426. paraloc^.def:=carraydef.getreusable_no_free_vector(paraloc^.def,j);
  1427. end;
  1428. else
  1429. if (x86_64_use_ms_abi(p.proccalloption) and (p.proccalloption <> pocall_vectorcall)) then
  1430. begin
  1431. setsubreg(paraloc^.register,R_SUBQ);
  1432. paraloc^.size:=OS_M64;
  1433. end
  1434. else
  1435. { Should not get here }
  1436. InternalError(2018012900);
  1437. end;
  1438. inc(mmretregidx);
  1439. end;
  1440. X86_64_X87_CLASS:
  1441. begin
  1442. { must be followed by X86_64_X87UP_CLASS and that must be
  1443. the last class }
  1444. if (i<>(numclasses-2)) or
  1445. (classes[i+1].typ<>X86_64_X87UP_CLASS) then
  1446. internalerror(2014110401);
  1447. paraloc^.loc:=LOC_FPUREGISTER;
  1448. paraloc^.register:=NR_FPU_RESULT_REG;
  1449. paraloc^.size:=OS_F80;
  1450. break;
  1451. end;
  1452. X86_64_NO_CLASS:
  1453. begin
  1454. { empty record/array }
  1455. if (i<>0) or
  1456. (numclasses<>1) then
  1457. internalerror(2010060302);
  1458. paraloc^.loc:=LOC_VOID;
  1459. paraloc^.def:=voidtype;
  1460. end;
  1461. else
  1462. internalerror(2010021504);
  1463. end;
  1464. Inc(i);
  1465. end;
  1466. end;
  1467. end;
  1468. procedure tcpuparamanager.create_paraloc_info_intern(p : tabstractprocdef; side: tcallercallee;paras:tparalist;
  1469. var intparareg,mmparareg,parasize:longint;varargsparas: boolean);
  1470. var
  1471. hp : tparavarsym;
  1472. fdef,
  1473. paradef : tdef;
  1474. paraloc : pcgparalocation;
  1475. subreg : tsubregister;
  1476. pushaddr : boolean;
  1477. paracgsize : tcgsize;
  1478. { loc[2] onwards are only used for _m256 under vectorcall/SysV, and
  1479. homogeneous vector aggregates and homogeneous float aggreates under
  1480. the vectorcall calling convention. [Kit] }
  1481. loc : tx64paraclasses;
  1482. needintloc,
  1483. needmmloc,
  1484. paralen,
  1485. locidx,
  1486. i,j,
  1487. varalign,
  1488. paraalign : longint;
  1489. use_ms_abi : boolean;
  1490. begin
  1491. paraalign:=get_para_align(p.proccalloption);
  1492. use_ms_abi:=x86_64_use_ms_abi(p.proccalloption);
  1493. { Register parameters are assigned from left to right }
  1494. for i:=0 to paras.count-1 do
  1495. begin
  1496. hp:=tparavarsym(paras[i]);
  1497. paradef:=hp.vardef;
  1498. { on win64, if a record has only one field and that field is a
  1499. single or double, it has to be handled like a single/double }
  1500. if use_ms_abi and
  1501. ((paradef.typ=recorddef) {or
  1502. is_object(paradef)}) and
  1503. tabstractrecordsymtable(tabstractrecorddef(paradef).symtable).has_single_field(fdef) and
  1504. (fdef.typ=floatdef) and
  1505. (tfloatdef(fdef).floattype in [s32real,s64real]) then
  1506. paradef:=fdef;
  1507. pushaddr:=push_addr_param(hp.varspez,paradef,p.proccalloption);
  1508. if pushaddr then
  1509. begin
  1510. loc[0].typ:=X86_64_INTEGER_CLASS;
  1511. loc[1].typ:=X86_64_NO_CLASS;
  1512. paracgsize:=OS_ADDR;
  1513. paralen:=sizeof(pint);
  1514. paradef:=cpointerdef.getreusable_no_free(paradef);
  1515. loc[0].def:=paradef;
  1516. loc[1].def:=nil;
  1517. for j:=2 to high(loc) do
  1518. begin
  1519. loc[j].typ:=X86_64_NO_CLASS;
  1520. loc[j].def:=nil;
  1521. end;
  1522. end
  1523. else
  1524. begin
  1525. getvalueparaloc(p.proccalloption,hp.varspez,paradef,loc);
  1526. paralen:=push_size(hp.varspez,paradef,p.proccalloption);
  1527. if p.proccalloption = pocall_vectorcall then
  1528. begin
  1529. { TODO: Can this set of instructions be put into 'defutil' without it relying on the argument classification? [Kit] }
  1530. { The SIMD vector types have to be OS_M128 etc., not OS_128 etc.}
  1531. case is_simd_vector_type_or_homogeneous_aggregate(pocall_vectorcall,paradef,vs_value) of
  1532. 0:
  1533. { Not a vector or valid aggregate }
  1534. paracgsize:=def_cgsize(paradef);
  1535. 4:
  1536. paracgsize:=OS_F32;
  1537. 8:
  1538. paracgsize:=OS_F64;
  1539. 16:
  1540. paracgsize:=OS_M128;
  1541. 32:
  1542. paracgsize:=OS_M256;
  1543. 64:
  1544. paracgsize:=OS_M512;
  1545. else
  1546. InternalError(2018012910);
  1547. end;
  1548. end
  1549. else
  1550. paracgsize:=def_cgsize(paradef);
  1551. end;
  1552. { cheat for now, we should copy the value to an mm reg as well (FK) }
  1553. if varargsparas and
  1554. use_ms_abi and
  1555. (paradef.typ = floatdef) then
  1556. begin
  1557. loc[1].typ:=X86_64_NO_CLASS;
  1558. if paracgsize=OS_F64 then
  1559. begin
  1560. loc[0].typ:=X86_64_INTEGER_CLASS;
  1561. paracgsize:=OS_64;
  1562. paradef:=u64inttype;
  1563. end
  1564. else
  1565. begin
  1566. loc[0].typ:=X86_64_INTEGERSI_CLASS;
  1567. paracgsize:=OS_32;
  1568. paradef:=u32inttype;
  1569. end;
  1570. loc[0].def:=paradef;
  1571. end;
  1572. hp.paraloc[side].reset;
  1573. hp.paraloc[side].size:=paracgsize;
  1574. hp.paraloc[side].intsize:=paralen;
  1575. hp.paraloc[side].Alignment:=paraalign;
  1576. hp.paraloc[side].def:=paradef;
  1577. if paralen>0 then
  1578. begin
  1579. { Enough registers free? }
  1580. needintloc:=0;
  1581. needmmloc:=0;
  1582. for locidx:=low(loc) to high(loc) do
  1583. case loc[locidx].typ of
  1584. X86_64_INTEGER_CLASS,
  1585. X86_64_INTEGERSI_CLASS:
  1586. inc(needintloc);
  1587. { Note, do NOT include X86_64_SSEUP_CLASS because this links with
  1588. X86_64_SSE_CLASS and we only need one register, not two. [Kit] }
  1589. X86_64_SSE_CLASS,
  1590. X86_64_SSESF_CLASS,
  1591. X86_64_SSEDF_CLASS:
  1592. inc(needmmloc);
  1593. else
  1594. ;
  1595. end;
  1596. { the "-1" is because we can also use the current register }
  1597. if (use_ms_abi and
  1598. ((intparareg+needintloc-1 > high(paraintsupregs_winx64)) or
  1599. ((p.proccalloption = pocall_vectorcall) and (mmparareg+needmmloc-1 > high(parammsupregs_vectorcall))) or
  1600. ((p.proccalloption <> pocall_vectorcall) and (mmparareg+needmmloc-1 > high(parammsupregs_winx64))))) or
  1601. (not use_ms_abi and
  1602. ((intparareg+needintloc-1 > high(paraintsupregs)) or
  1603. (mmparareg+needmmloc-1 > high(parammsupregs)))) then
  1604. begin
  1605. { If there are no registers available for any
  1606. eightbyte of an argument, the whole argument is
  1607. passed on the stack. }
  1608. loc[low(loc)].typ:=X86_64_MEMORY_CLASS;
  1609. loc[low(loc)].def:=paradef;
  1610. for locidx:=succ(low(loc)) to high(loc) do
  1611. loc[locidx].typ:=X86_64_NO_CLASS;
  1612. end;
  1613. locidx:=0;
  1614. while (paralen>0) and
  1615. (locidx<=high(loc)) and
  1616. (loc[locidx].typ<>X86_64_NO_CLASS) do
  1617. begin
  1618. { Allocate }
  1619. case loc[locidx].typ of
  1620. X86_64_INTEGER_CLASS,
  1621. X86_64_INTEGERSI_CLASS:
  1622. begin
  1623. paraloc:=hp.paraloc[side].add_location;
  1624. paraloc^.loc:=LOC_REGISTER;
  1625. paraloc^.def:=loc[locidx].def;
  1626. if (paracgsize=OS_NO) or ((locidx<high(loc)) and (loc[locidx+1].typ<>X86_64_NO_CLASS)) then
  1627. begin
  1628. if loc[locidx].typ=X86_64_INTEGER_CLASS then
  1629. begin
  1630. paraloc^.size:=OS_INT;
  1631. paraloc^.def:=u64inttype;
  1632. subreg:=R_SUBWHOLE;
  1633. end
  1634. else
  1635. begin
  1636. paraloc^.size:=OS_32;
  1637. paraloc^.def:=u32inttype;
  1638. subreg:=R_SUBD;
  1639. end;
  1640. end
  1641. else
  1642. begin
  1643. { some compilers sign/zero-extend on the callerside,
  1644. others don't. To be compatible with both, FPC
  1645. extends on the callerside, and assumes no
  1646. extension has been performed on the calleeside.
  1647. This is less efficient, but the alternative is
  1648. occasional crashes when calling code generated
  1649. by certain other compilers, or being called from
  1650. code generated by other compilers.
  1651. Exception: Darwin, since everyone there needs to
  1652. be compatible with the system compiler clang
  1653. (which extends on the caller side).
  1654. Exception: if the call is not external, then we can follow the ABI as FPC
  1655. generated code follows the ABI
  1656. Not for LLVM, since there the zero/signext
  1657. attributes by definition only apply to the
  1658. caller side }
  1659. {$ifndef LLVM}
  1660. if not(target_info.system in systems_darwin) and
  1661. ((side=calleeside) or (([po_weakexternal,po_external]*p.procoptions)=[])) and
  1662. (hp.paraloc[side].intsize in [1,2]) then
  1663. begin
  1664. paraloc^.def:=hp.paraloc[side].def
  1665. end;
  1666. {$endif not LLVM}
  1667. paraloc^.size:=def_cgsize(paraloc^.def);
  1668. { s64comp is pushed in an int register }
  1669. if paraloc^.size=OS_C64 then
  1670. begin
  1671. paraloc^.size:=OS_64;
  1672. paraloc^.def:=u64inttype;
  1673. end;
  1674. subreg:=cgsize2subreg(R_INTREGISTER,paraloc^.size);
  1675. end;
  1676. { winx64 uses different registers }
  1677. if use_ms_abi then
  1678. paraloc^.register:=newreg(R_INTREGISTER,paraintsupregs_winx64[intparareg],subreg)
  1679. else
  1680. paraloc^.register:=newreg(R_INTREGISTER,paraintsupregs[intparareg],subreg);
  1681. { matching mm register must be skipped }
  1682. if use_ms_abi then
  1683. inc(mmparareg);
  1684. inc(intparareg);
  1685. dec(paralen,tcgsize2size[paraloc^.size]);
  1686. end;
  1687. X86_64_SSE_CLASS,
  1688. X86_64_SSESF_CLASS,
  1689. X86_64_SSEDF_CLASS:
  1690. begin
  1691. paraloc:=hp.paraloc[side].add_location;
  1692. paraloc^.loc:=LOC_MMREGISTER;
  1693. paraloc^.def:=loc[locidx].def;
  1694. case loc[locidx].typ of
  1695. X86_64_SSESF_CLASS:
  1696. begin
  1697. subreg:=R_SUBMMS;
  1698. paraloc^.size:=OS_F32;
  1699. end;
  1700. X86_64_SSEDF_CLASS:
  1701. begin
  1702. subreg:=R_SUBMMD;
  1703. paraloc^.size:=OS_F64;
  1704. end;
  1705. X86_64_SSE_CLASS:
  1706. begin
  1707. subreg:=R_SUBQ;
  1708. paraloc^.size:=OS_M64;
  1709. j := 1;
  1710. if not (use_ms_abi and (p.proccalloption <> pocall_vectorcall)) then
  1711. while locidx + j <= high(loc) do
  1712. begin
  1713. if loc[locidx+j].typ <> X86_64_SSEUP_CLASS then
  1714. Break;
  1715. Inc(j);
  1716. end;
  1717. { j = MM word count }
  1718. Inc(locidx, j - 1);
  1719. case j of
  1720. 1:
  1721. begin
  1722. subreg:=R_SUBQ;
  1723. paraloc^.size:=OS_M64;
  1724. end;
  1725. 2:
  1726. begin
  1727. subreg:=R_SUBMMX;
  1728. paraloc^.size:=OS_M128;
  1729. end;
  1730. 4:
  1731. begin
  1732. subreg:=R_SUBMMY;
  1733. paraloc^.size:=OS_M256; { Currently unsupported }
  1734. end;
  1735. 8:
  1736. begin
  1737. subreg:=R_SUBMMZ;
  1738. paraloc^.size:=OS_M512; { Currently unsupported }
  1739. end;
  1740. else
  1741. InternalError(2018012903);
  1742. end;
  1743. paraloc^.def:=carraydef.getreusable_no_free_vector(paraloc^.def,j);
  1744. end;
  1745. else
  1746. if (use_ms_abi and (p.proccalloption <> pocall_vectorcall)) then
  1747. begin
  1748. subreg:=R_SUBQ;
  1749. paraloc^.size:=OS_M64;
  1750. end
  1751. else
  1752. { Should not get here }
  1753. InternalError(2018012902);
  1754. end;
  1755. { winx64 uses different registers }
  1756. if use_ms_abi then
  1757. begin
  1758. if p.proccalloption = pocall_vectorcall then
  1759. paraloc^.register:=newreg(R_MMREGISTER,parammsupregs_vectorcall[mmparareg],subreg)
  1760. else
  1761. paraloc^.register:=newreg(R_MMREGISTER,parammsupregs_winx64[mmparareg],subreg);
  1762. end
  1763. else
  1764. paraloc^.register:=newreg(R_MMREGISTER,parammsupregs[mmparareg],subreg);
  1765. { matching int register must be skipped }
  1766. if use_ms_abi then
  1767. inc(intparareg);
  1768. inc(mmparareg);
  1769. dec(paralen,tcgsize2size[paraloc^.size]);
  1770. end;
  1771. X86_64_MEMORY_CLASS :
  1772. begin
  1773. paraloc:=hp.paraloc[side].add_location;
  1774. paraloc^.loc:=LOC_REFERENCE;
  1775. paraloc^.def:=loc[locidx].def;
  1776. {Hack alert!!! We should modify int_cgsize to handle OS_128,
  1777. however, since int_cgsize is called in many places in the
  1778. compiler where only a few can already handle OS_128, fixing it
  1779. properly is out of the question to release 2.2.0 in time. (DM)}
  1780. if paracgsize=OS_128 then
  1781. if paralen=8 then
  1782. paraloc^.size:=OS_64
  1783. else if paralen=16 then
  1784. paraloc^.size:=OS_128
  1785. else
  1786. internalerror(200707143)
  1787. else if paracgsize in [OS_F32,OS_F64,OS_F80,OS_F128] then
  1788. paraloc^.size:=int_float_cgsize(paralen)
  1789. else
  1790. paraloc^.size:=int_cgsize(paralen);
  1791. if side=callerside then
  1792. paraloc^.reference.index:=NR_STACK_POINTER_REG
  1793. else
  1794. paraloc^.reference.index:=NR_FRAME_POINTER_REG;
  1795. varalign:=used_align(size_2_align(paralen),paraalign,paraalign);
  1796. paraloc^.reference.offset:=parasize;
  1797. parasize:=align(parasize+paralen,varalign);
  1798. paralen:=0;
  1799. end;
  1800. else
  1801. internalerror(2010053113);
  1802. end;
  1803. inc(locidx);
  1804. end;
  1805. end
  1806. else
  1807. begin
  1808. paraloc:=hp.paraloc[side].add_location;
  1809. paraloc^.loc:=LOC_VOID;
  1810. paraloc^.def:=paradef;
  1811. end;
  1812. end;
  1813. { Register parameters are assigned from left-to-right, but the
  1814. offsets on the stack are right-to-left. There is no need
  1815. to reverse the offset, only adapt the calleeside with the
  1816. start offset of the first param on the stack }
  1817. if side=calleeside then
  1818. begin
  1819. for i:=0 to paras.count-1 do
  1820. begin
  1821. hp:=tparavarsym(paras[i]);
  1822. paraloc:=hp.paraloc[side].location;
  1823. while paraloc<>nil do
  1824. begin
  1825. with paraloc^ do
  1826. if (loc=LOC_REFERENCE) then
  1827. inc(reference.offset,target_info.first_parm_offset);
  1828. paraloc:=paraloc^.next;
  1829. end;
  1830. end;
  1831. end;
  1832. end;
  1833. function tcpuparamanager.create_varargs_paraloc_info(p : tabstractprocdef; side: tcallercallee; varargspara:tvarargsparalist):longint;
  1834. var
  1835. intparareg,mmparareg,
  1836. parasize : longint;
  1837. begin
  1838. intparareg:=0;
  1839. mmparareg:=0;
  1840. if x86_64_use_ms_abi(p.proccalloption) then
  1841. parasize:=4*8
  1842. else
  1843. parasize:=0;
  1844. { calculate the registers for the normal parameters }
  1845. create_paraloc_info_intern(p,side,p.paras,intparareg,mmparareg,parasize,false);
  1846. { append the varargs }
  1847. if assigned(varargspara) then
  1848. begin
  1849. if side=callerside then
  1850. create_paraloc_info_intern(p,side,varargspara,intparareg,mmparareg,parasize,true)
  1851. else
  1852. internalerror(2019021917);
  1853. { store used no. of SSE registers, that needs to be passed in %AL }
  1854. varargspara.mmregsused:=mmparareg;
  1855. end;
  1856. create_funcretloc_info(p,side);
  1857. result:=parasize;
  1858. end;
  1859. function tcpuparamanager.create_paraloc_info(p : tabstractprocdef; side: tcallercallee):longint;
  1860. var
  1861. intparareg,mmparareg,
  1862. parasize : longint;
  1863. begin
  1864. intparareg:=0;
  1865. mmparareg:=0;
  1866. if x86_64_use_ms_abi(p.proccalloption) then
  1867. parasize:=4*8
  1868. else
  1869. parasize:=0;
  1870. create_paraloc_info_intern(p,side,p.paras,intparareg,mmparareg,parasize,false);
  1871. { Create Function result paraloc }
  1872. create_funcretloc_info(p,side);
  1873. { We need to return the size allocated on the stack }
  1874. result:=parasize;
  1875. end;
  1876. begin
  1877. paramanager:=tcpuparamanager.create;
  1878. end.