cpupara.pas 79 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650165116521653165416551656165716581659166016611662166316641665166616671668166916701671167216731674167516761677167816791680168116821683168416851686168716881689169016911692169316941695169616971698169917001701170217031704170517061707170817091710171117121713171417151716171717181719172017211722172317241725172617271728172917301731173217331734173517361737173817391740174117421743174417451746174717481749175017511752175317541755175617571758175917601761176217631764176517661767176817691770177117721773177417751776177717781779178017811782178317841785178617871788178917901791179217931794179517961797179817991800180118021803180418051806180718081809181018111812181318141815181618171818181918201821182218231824182518261827182818291830183118321833183418351836183718381839184018411842184318441845184618471848184918501851185218531854185518561857185818591860186118621863186418651866186718681869187018711872187318741875187618771878187918801881188218831884188518861887188818891890189118921893189418951896189718981899190019011902190319041905190619071908190919101911191219131914191519161917191819191920192119221923192419251926192719281929193019311932193319341935193619371938193919401941194219431944194519461947194819491950195119521953195419551956195719581959196019611962196319641965196619671968196919701971197219731974197519761977197819791980198119821983198419851986198719881989199019911992199319941995199619971998199920002001200220032004
  1. {
  2. Copyright (c) 2002 by Florian Klaempfl
  3. Generates the argument location information for x86-64 target
  4. This program is free software; you can redistribute it and/or modify
  5. it under the terms of the GNU General Public License as published by
  6. the Free Software Foundation; either version 2 of the License, or
  7. (at your option) any later version.
  8. This program is distributed in the hope that it will be useful,
  9. but WITHOUT ANY WARRANTY; without even the implied warranty of
  10. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  11. GNU General Public License for more details.
  12. You should have received a copy of the GNU General Public License
  13. along with this program; if not, write to the Free Software
  14. Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  15. ****************************************************************************
  16. }
  17. unit cpupara;
  18. {$i fpcdefs.inc}
  19. interface
  20. uses
  21. globtype,
  22. cpubase,cgbase,cgutils,
  23. symconst,symtype,symsym,symdef,
  24. parabase,paramgr;
  25. type
  26. tcpuparamanager = class(tparamanager)
  27. private
  28. procedure create_paraloc_info_intern(p : tabstractprocdef; side: tcallercallee;paras:tparalist;
  29. var intparareg,mmparareg,parasize:longint;varargsparas: boolean);
  30. public
  31. function param_use_paraloc(const cgpara:tcgpara):boolean;override;
  32. function push_addr_param(varspez:tvarspez;def : tdef;calloption : tproccalloption) : boolean;override;
  33. function ret_in_param(def:tdef;pd:tabstractprocdef):boolean;override;
  34. function get_volatile_registers_int(calloption : tproccalloption):tcpuregisterset;override;
  35. function get_volatile_registers_mm(calloption : tproccalloption):tcpuregisterset;override;
  36. function get_volatile_registers_fpu(calloption : tproccalloption):tcpuregisterset;override;
  37. function get_saved_registers_int(calloption : tproccalloption):tcpuregisterarray;override;
  38. function get_saved_registers_mm(calloption: tproccalloption):tcpuregisterarray;override;
  39. function create_paraloc_info(p : tabstractprocdef; side: tcallercallee):longint;override;
  40. function create_varargs_paraloc_info(p : tabstractprocdef; side: tcallercallee; varargspara:tvarargsparalist):longint;override;
  41. function get_funcretloc(p : tabstractprocdef; side: tcallercallee; forcetempdef: tdef): tcgpara;override;
  42. end;
  43. implementation
  44. uses
  45. cutils,verbose,
  46. systems,
  47. defutil,
  48. symtable,
  49. cpupi,
  50. cgx86,cgobj,cgcpu;
  51. const
  52. paraintsupregs : array[0..5] of tsuperregister = (RS_RDI,RS_RSI,RS_RDX,RS_RCX,RS_R8,RS_R9);
  53. parammsupregs : array[0..7] of tsuperregister = (RS_XMM0,RS_XMM1,RS_XMM2,RS_XMM3,RS_XMM4,RS_XMM5,RS_XMM6,RS_XMM7);
  54. paraintsupregs_winx64 : array[0..3] of tsuperregister = (RS_RCX,RS_RDX,RS_R8,RS_R9);
  55. parammsupregs_winx64 : array[0..3] of tsuperregister = (RS_XMM0,RS_XMM1,RS_XMM2,RS_XMM3);
  56. parammsupregs_vectorcall : array[0..5] of tsuperregister = (RS_XMM0,RS_XMM1,RS_XMM2,RS_XMM3,RS_XMM4,RS_XMM5);
  57. {
  58. The argument classification code largely comes from libffi:
  59. ffi64.c - Copyright (c) 2002, 2007 Bo Thorsen <[email protected]>
  60. Copyright (c) 2008 Red Hat, Inc.
  61. x86-64 Foreign Function Interface
  62. Permission is hereby granted, free of charge, to any person obtaining
  63. a copy of this software and associated documentation files (the
  64. ``Software''), to deal in the Software without restriction, including
  65. without limitation the rights to use, copy, modify, merge, publish,
  66. distribute, sublicense, and/or sell copies of the Software, and to
  67. permit persons to whom the Software is furnished to do so, subject to
  68. the following conditions:
  69. The above copyright notice and this permission notice shall be included
  70. in all copies or substantial portions of the Software.
  71. THE SOFTWARE IS PROVIDED ``AS IS'', WITHOUT WARRANTY OF ANY KIND,
  72. EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  73. MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
  74. NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
  75. HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
  76. WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  77. OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
  78. DEALINGS IN THE SOFTWARE.
  79. ----------------------------------------------------------------------- *)
  80. }
  81. const
  82. { This many classes are required in order to support 4 YMMs (_m256) in a
  83. homogeneous vector aggregate under vectorcall. [Kit] }
  84. MAX_PARA_CLASSES = 16;
  85. type
  86. tx64paraclasstype = (
  87. X86_64_NO_CLASS,
  88. X86_64_INTEGER_CLASS,X86_64_INTEGERSI_CLASS,
  89. X86_64_SSE_CLASS,X86_64_SSESF_CLASS,X86_64_SSEDF_CLASS,X86_64_SSEUP_CLASS,
  90. X86_64_X87_CLASS,X86_64_X87UP_CLASS,
  91. X86_64_COMPLEX_X87_CLASS,
  92. X86_64_MEMORY_CLASS
  93. );
  94. tx64paraclass = record
  95. def: tdef;
  96. typ: tx64paraclasstype;
  97. end;
  98. tx64paraclasses = array[0..MAX_PARA_CLASSES-1] of tx64paraclass;
  99. { Win64-specific helper }
  100. function aggregate_in_registers_win64(varspez:tvarspez;size:longint):boolean;
  101. begin
  102. { TODO: Temporary hack: vs_const parameters are always passed by reference for win64}
  103. result:=(varspez=vs_value) and (size in [1,2,4,8])
  104. end;
  105. (* x86-64 register passing implementation. See x86-64 ABI for details. Goal
  106. of this code is to classify each 8bytes of incoming argument by the register
  107. class and assign registers accordingly. *)
  108. function classify_representative_def(def1, def2: tdef): tdef;
  109. var
  110. def1size, def2size: asizeint;
  111. begin
  112. if not assigned(def1) then
  113. result:=def2
  114. else if not assigned(def2) then
  115. result:=def1
  116. else
  117. begin
  118. def1size:=def1.size;
  119. def2size:=def2.size;
  120. if def1size>def2size then
  121. result:=def1
  122. else if def2size>def1size then
  123. result:=def2
  124. else if def1.alignment>def2.alignment then
  125. result:=def1
  126. else
  127. result:=def2;
  128. end;
  129. end;
  130. (* Classify the argument of type TYPE and mode MODE.
  131. CLASSES will be filled by the register class used to pass each word
  132. of the operand. The number of words is returned. In case the parameter
  133. should be passed in memory, 0 is returned. As a special case for zero
  134. sized containers, classes[0] will be NO_CLASS and 1 is returned.
  135. real_size contains either def.size, or a value derived from
  136. def.bitpackedsize and the field offset denoting the number of bytes
  137. spanned by a bitpacked field
  138. See the x86-64 PS ABI for details.
  139. *)
  140. procedure classify_single_integer_class(def: tdef; size,real_size: aint; var cl: tx64paraclass; byte_offset: aint);
  141. begin
  142. if (byte_offset=0) and
  143. (real_size in [1,2,4,8]) and
  144. (not assigned(cl.def) or
  145. (def.alignment>=cl.def.alignment)) then
  146. cl.def:=def;
  147. if size<=4 then
  148. begin
  149. cl.typ:=X86_64_INTEGERSI_CLASS;
  150. { gcc/clang sign/zero-extend all values to 32 bits, except for
  151. _Bool (= Pascal boolean), which is only zero-extended to 8 bits
  152. as per the x86-64 ABI -> do the same }
  153. if not assigned(cl.def) or
  154. not is_pasbool(cl.def) or
  155. (torddef(cl.def).ordtype<>pasbool1) then
  156. cl.def:=u32inttype;
  157. end
  158. else
  159. begin
  160. cl.typ:=X86_64_INTEGER_CLASS;
  161. if not assigned(cl.def) or
  162. (cl.def.size<size) or
  163. (not(cl.def.typ in [orddef,floatdef,pointerdef,classrefdef]) and
  164. not is_implicit_pointer_object_type(cl.def) and
  165. not is_dynamicstring(cl.def) and
  166. not is_dynamic_array(cl.def)) then
  167. cl.def:=u64inttype;
  168. end;
  169. end;
  170. function classify_as_integer_argument(def: tdef; real_size: aint; var classes: tx64paraclasses; byte_offset: aint): longint;
  171. var
  172. size: aint;
  173. begin
  174. size:=byte_offset+real_size;
  175. classify_single_integer_class(def,size,real_size,classes[0],byte_offset);
  176. if size<=8 then
  177. result:=1
  178. else
  179. begin
  180. classify_single_integer_class(def,size-8,real_size,classes[1],byte_offset-8);
  181. if size>16 then
  182. internalerror(2010021401);
  183. result:=2;
  184. end
  185. end;
  186. (* Return the union class of CLASS1 and CLASS2.
  187. See the x86-64 PS ABI for details. *)
  188. function merge_classes(class1, class2: tx64paraclass): tx64paraclass;
  189. begin
  190. (* Rule #1: If both classes are equal, this is the resulting class. *)
  191. if (class1.typ=class2.typ) then
  192. begin
  193. result.typ:=class1.typ;
  194. result.def:=classify_representative_def(class1.def,class2.def);
  195. exit;
  196. end;
  197. (* Rule #2: If one of the classes is NO_CLASS, the resulting class is
  198. the other class. *)
  199. if (class1.typ=X86_64_NO_CLASS) then
  200. exit(class2);
  201. if (class2.typ=X86_64_NO_CLASS) then
  202. exit(class1);
  203. (* Rule #3: If one of the classes is MEMORY, the result is MEMORY. *)
  204. if (class1.typ=X86_64_MEMORY_CLASS) then
  205. exit(class1)
  206. else if (class2.typ=X86_64_MEMORY_CLASS) then
  207. exit(class2);
  208. (* Rule #4: If one of the classes is INTEGER, the result is INTEGER. *)
  209. { 32 bit }
  210. if ((class1.typ=X86_64_INTEGERSI_CLASS) and
  211. (class2.typ=X86_64_SSESF_CLASS)) then
  212. exit(class1)
  213. else if ((class2.typ=X86_64_INTEGERSI_CLASS) and
  214. (class1.typ=X86_64_SSESF_CLASS)) then
  215. exit(class2);
  216. { 64 bit }
  217. if (class1.typ in [X86_64_INTEGER_CLASS,X86_64_INTEGERSI_CLASS]) then
  218. begin
  219. result:=class1;
  220. if result.def.size<8 then
  221. begin
  222. result.typ:=X86_64_INTEGER_CLASS;
  223. result.def:=s64inttype;
  224. end;
  225. exit
  226. end
  227. else if (class2.typ in [X86_64_INTEGER_CLASS,X86_64_INTEGERSI_CLASS]) then
  228. begin
  229. result:=class2;
  230. if result.def.size<8 then
  231. begin
  232. result.typ:=X86_64_INTEGER_CLASS;
  233. result.def:=s64inttype;
  234. end;
  235. exit
  236. end;
  237. (* Rule #5: If one of the classes is X87, X87UP, or COMPLEX_X87 class,
  238. MEMORY is used. *)
  239. if (class1.typ in [X86_64_X87_CLASS,X86_64_X87UP_CLASS,X86_64_COMPLEX_X87_CLASS]) then
  240. begin
  241. result:=class1;
  242. result.typ:=X86_64_MEMORY_CLASS;
  243. exit;
  244. end
  245. else if (class2.typ in [X86_64_X87_CLASS,X86_64_X87UP_CLASS,X86_64_COMPLEX_X87_CLASS]) then
  246. begin
  247. result:=class2;
  248. result.typ:=X86_64_MEMORY_CLASS;
  249. exit;
  250. end;
  251. (* Rule #6: Otherwise class SSE is used. *)
  252. if class1.def.size>class2.def.size then
  253. result:=class1
  254. else
  255. result:=class2;
  256. result.typ:=X86_64_SSE_CLASS;
  257. result.def:=s64floattype;
  258. end;
  259. function classify_argument(calloption: tproccalloption; def: tdef; parentdef: tdef; varspez: tvarspez; real_size: aint; var classes: tx64paraclasses; byte_offset: aint; round_to_8: Boolean): longint; forward;
  260. function init_aggregate_classification(calloption: tproccalloption; def: tdef; parentdef: tdef; varspez: tvarspez; byte_offset: aint; out words: longint; out classes: tx64paraclasses): longint;
  261. var
  262. i: longint;
  263. begin
  264. words:=0;
  265. { we'll be merging the classes elements with the subclasses
  266. elements, so initialise them first }
  267. for i:=low(classes) to high(classes) do
  268. begin
  269. classes[i].typ:=X86_64_NO_CLASS;
  270. classes[i].def:=nil;
  271. end;
  272. { win64 follows a different convention here }
  273. if x86_64_use_ms_abi(calloption) then
  274. begin
  275. if aggregate_in_registers_win64(varspez,def.size) then
  276. begin
  277. classes[0].typ:=X86_64_INTEGER_CLASS;
  278. classes[0].def:=def;
  279. result:=1;
  280. end
  281. else if (calloption = pocall_vectorcall) then
  282. begin
  283. words := (def.size+byte_offset mod 8+7) div 8;
  284. case words of
  285. 0:
  286. Exit(0);
  287. 1..4:
  288. { Aligned vector or array elements }
  289. Result := words;
  290. else
  291. if ((def.aggregatealignment mod (words shl 3)) = 0) or
  292. Assigned(parentdef) and ((parentdef.aggregatealignment mod 16) = 0)
  293. then
  294. begin
  295. { Field of aligned vector type }
  296. if words = 0 then
  297. begin
  298. classes[0].typ:=X86_64_NO_CLASS;
  299. classes[0].def:=def;
  300. Result := 1;
  301. end
  302. else
  303. Result := words;
  304. end
  305. else
  306. Result := 0;
  307. end;
  308. end
  309. else
  310. Result := 0;
  311. Exit;
  312. end;
  313. (* If the struct is larger than 32 bytes, pass it on the stack. *)
  314. if def.size > 32 then
  315. exit(0);
  316. { if a struct starts an offset not divisible by 8, it can span extra
  317. words }
  318. words:=(def.size+byte_offset mod 8+7) div 8;
  319. (* Zero sized arrays or structures are NO_CLASS. We return 0 to
  320. signal memory class, so handle it as special case. *)
  321. if (words=0) then
  322. begin
  323. classes[0].typ:=X86_64_NO_CLASS;
  324. classes[0].def:=def;
  325. exit(1);
  326. end;
  327. result:=words;
  328. end;
  329. function classify_aggregate_element(calloption: tproccalloption; def: tdef; parentdef: tdef; varspez: tvarspez; real_size: aint; var classes: tx64paraclasses; new_byte_offset: aint): longint;
  330. var
  331. subclasses: tx64paraclasses;
  332. i,
  333. pos: longint;
  334. begin
  335. fillchar(subclasses,sizeof(subclasses),0);
  336. result:=classify_argument(calloption,def,parentdef,varspez,real_size,subclasses,new_byte_offset, True);
  337. if (result=0) then
  338. exit;
  339. pos:=new_byte_offset div 8;
  340. if result-1+pos>high(classes) then
  341. internalerror(2010053108);
  342. for i:=0 to result-1 do
  343. begin
  344. classes[i+pos] :=
  345. merge_classes(subclasses[i],classes[i+pos]);
  346. end;
  347. inc(result,pos);
  348. end;
  349. function finalize_aggregate_classification(calloption: tproccalloption; def: tdef; words: longint; var classes: tx64paraclasses): longint;
  350. var
  351. i, vecsize, maxvecsize: longint;
  352. begin
  353. { Workaround: It's not immediately possible to determine if a Double is
  354. by itself or is part of an aligned vector. If the latter, correct the
  355. class definitions here. [Kit] }
  356. if (classes[0].typ = X86_64_SSEDF_CLASS) and (classes[1].typ = X86_64_SSEUP_CLASS) then
  357. classes[0].typ := X86_64_SSE_CLASS;
  358. if (words>2) then
  359. begin
  360. { When size > 16 bytes, if the first one isn't
  361. X86_64_SSE_CLASS or any other ones aren't
  362. X86_64_SSEUP_CLASS, everything should be passed in
  363. memory... }
  364. if (classes[0].typ<>X86_64_SSE_CLASS) then
  365. begin
  366. { ... except if the calling convention is 'vectorcall', then
  367. check to see if we don't have an HFA of 3 or 4 Doubles }
  368. if (calloption <> pocall_vectorcall) or (words > 4) then
  369. Exit(0);
  370. for i := 0 to words - 1 do
  371. if classes[i].typ <> X86_64_SSEDF_CLASS then
  372. Exit(0);
  373. Exit(words);
  374. end;
  375. if ((words shl 3) > def.aggregatealignment) then
  376. { The alignment is wrong for this vector size, hence it is unaligned }
  377. Exit(0);
  378. vecsize := 1;
  379. maxvecsize := words;
  380. for i:=1 to words-1 do
  381. if (classes[i].typ=X86_64_SSEUP_CLASS) then
  382. Inc(vecsize)
  383. else
  384. begin
  385. { Exceptional case. Check that we're not dealing an array of
  386. aligned vectors that is itself aligned to a stricter
  387. boundary (e.g. 4 XMM registers that can be merged into a
  388. single ZMM register). }
  389. if
  390. (classes[i].typ <> X86_64_SSE_CLASS) or { Easy case first - is it actually another SSE vector? }
  391. ((vecsize and (vecsize - 1)) <> 0) or { If vecsize is not a power of two, then it is definitely not a valid vector }
  392. (vecsize > maxvecsize) or ((maxvecsize < words) and (vecsize <> maxvecsize)) { Mixture of XMMs and YMMs, for example, is not valid }
  393. then
  394. Exit(0);
  395. classes[i].typ := X86_64_SSEUP_CLASS;
  396. maxvecsize := vecsize;
  397. vecsize := 1;
  398. end;
  399. if vecsize <> maxvecsize then
  400. { Last vector is of a different size }
  401. Exit(0);
  402. if vecsize > 2 then
  403. begin
  404. { Cannot use 256-bit and 512-bit vectors if we're not using AVX }
  405. if not UseAVX then
  406. Exit(0);
  407. { WARNING: There is currently no support for 256-bit and 512-bit
  408. aligned vectors, so if an aggregate contains more than two
  409. eightbyte words, it must be passed in memory. When 256-bit and
  410. 512-bit vectors are fully supported, remove the following
  411. line. [Kit] }
  412. Exit(0);
  413. end;
  414. end;
  415. (* Final merger cleanup. *)
  416. (* The first one must never be X86_64_SSEUP_CLASS or
  417. X86_64_X87UP_CLASS. *)
  418. if (classes[0].typ=X86_64_SSEUP_CLASS) or
  419. (classes[0].typ=X86_64_X87UP_CLASS) then
  420. internalerror(2010021402);
  421. for i:=0 to words-1 do
  422. begin
  423. (* If one class is MEMORY, everything should be passed in
  424. memory. *)
  425. if (classes[i].typ=X86_64_MEMORY_CLASS) then
  426. exit(0);
  427. (* The X86_64_SSEUP_CLASS should be always preceded by
  428. X86_64_SSE_CLASS or X86_64_SSEUP_CLASS. *)
  429. if (classes[i].typ=X86_64_SSEUP_CLASS) and
  430. (classes[i-1].typ<>X86_64_SSE_CLASS) and
  431. (classes[i-1].typ<>X86_64_SSEUP_CLASS) then
  432. begin
  433. classes[i].typ:=X86_64_SSE_CLASS;
  434. classes[i].def:=carraydef.getreusable_no_free(s32floattype,2);
  435. end;
  436. (* If X86_64_X87UP_CLASS isn't preceded by X86_64_X87_CLASS,
  437. everything should be passed in memory. *)
  438. if (classes[i].typ=X86_64_X87UP_CLASS) and
  439. (classes[i-1].typ<>X86_64_X87_CLASS) then
  440. exit(0);
  441. (* FPC addition: because we store an extended in 10 bytes, the
  442. X86_64_X87UP_CLASS can be replaced with e.g. INTEGER if an
  443. extended is followed by e.g. an array [0..5] of byte -> we also
  444. have to check whether each X86_64_X87_CLASS is followed by
  445. X86_64_X87UP_CLASS -- if not, pass in memory
  446. This cannot happen in the original ABI, because there
  447. sizeof(extended) = 16 and hence nothing can be merged with
  448. X86_64_X87UP_CLASS and change it into something else *)
  449. if (classes[i].typ=X86_64_X87_CLASS) and
  450. ((i=(words-1)) or
  451. (classes[i+1].typ<>X86_64_X87UP_CLASS)) then
  452. exit(0);
  453. end;
  454. {$ifndef llvm}
  455. { FIXME: in case a record contains empty padding space, e.g. a
  456. "single" field followed by a "double", then we have a problem
  457. because the cgpara helpers cannot figure out that they should
  458. skip 4 bytes after storing the single (LOC_MMREGISTER with size
  459. OS_F32) to memory before storing the double -> for now scale
  460. such locations always up to 64 bits, although this loads/stores
  461. some superfluous data }
  462. { 1) the first part is 32 bit while there is still a second part }
  463. if (classes[1].typ<>X86_64_NO_CLASS) then
  464. case classes[0].typ of
  465. X86_64_INTEGERSI_CLASS:
  466. begin
  467. classes[0].typ:=X86_64_INTEGER_CLASS;
  468. classes[0].def:=s64inttype;
  469. end;
  470. X86_64_SSESF_CLASS:
  471. begin
  472. classes[0].typ:=X86_64_SSE_CLASS;
  473. classes[0].def:=carraydef.getreusable_no_free(s32floattype,2);
  474. end;
  475. else
  476. ;
  477. end;
  478. { 2) the second part is 32 bit, but the total size is > 12 bytes }
  479. if (def.size>12) then
  480. case classes[1].typ of
  481. X86_64_INTEGERSI_CLASS:
  482. begin
  483. classes[1].typ:=X86_64_INTEGER_CLASS;
  484. classes[1].def:=s64inttype;
  485. end;
  486. X86_64_SSESF_CLASS:
  487. begin
  488. classes[1].typ:=X86_64_SSE_CLASS;
  489. classes[1].def:=carraydef.getreusable_no_free(s32floattype,2);
  490. end;
  491. else
  492. ;
  493. end;
  494. {$endif not llvm}
  495. result:=words;
  496. end;
  497. function try_build_homogeneous_aggregate(def: tdef; words: longint; var classes: tx64paraclasses): longint;
  498. var
  499. i, vecsize, maxvecsize, veccount: longint;
  500. {size, }byte_offset: aint;
  501. vs: TFieldVarSym;
  502. checkalignment: Boolean;
  503. begin
  504. if (words = 0) then
  505. { Should be at least 1 word at this point }
  506. InternalError(2018013100);
  507. case classes[0].typ of
  508. X86_64_SSESF_CLASS:
  509. begin
  510. { Should be an HFA of only a Single }
  511. for i := 1 to High(classes) do
  512. if classes[i].typ <> X86_64_NO_CLASS then
  513. Exit(0);
  514. result := 1;
  515. end;
  516. X86_64_SSEDF_CLASS:
  517. begin
  518. { Possibly an HFA of Doubles }
  519. if TAbstractRecordDef(def).symtable.symlist.count = 0 then
  520. Exit(0);
  521. { Get the information and position on the last entry }
  522. vs:=TFieldVarSym(TAbstractRecordDef(def).symtable.symlist[TAbstractRecordDef(def).symtable.symlist.count - 1]);
  523. //size:=vs.vardef.size;
  524. checkalignment:=true;
  525. if not TAbstractRecordSymtable(TAbstractRecordDef(def).symtable).is_packed then
  526. begin
  527. byte_offset:=vs.fieldoffset;
  528. //size:=vs.vardef.size;
  529. end
  530. else
  531. begin
  532. byte_offset:=vs.fieldoffset div 8;
  533. if (vs.vardef.typ in [orddef,enumdef]) then
  534. begin
  535. { calculate the number of bytes spanned by
  536. this bitpacked field }
  537. //size:=((vs.fieldoffset+vs.vardef.packedbitsize+7) div 8)-(vs.fieldoffset div 8);
  538. { our bitpacked fields are interpreted as always being
  539. aligned, because unlike in C we don't have char:1, int:1
  540. etc (so everything is basically a char:x) }
  541. checkalignment:=false;
  542. end
  543. else
  544. ;//size:=vs.vardef.size;
  545. end;
  546. { If [..] an object [..] contains unaligned fields, it has class
  547. MEMORY }
  548. if checkalignment and
  549. (align(byte_offset,vs.vardef.structalignment)<>byte_offset) then
  550. begin
  551. result:=0;
  552. exit;
  553. end;
  554. if words > 4 then
  555. { HFA too large }
  556. Exit(0);
  557. for i := 1 to words - 1 do
  558. if classes[i].typ <> X86_64_SSEDF_CLASS then
  559. Exit(0);
  560. result := words;
  561. end;
  562. X86_64_SSE_CLASS:
  563. begin
  564. { Determine the nature of the classes.
  565. - If the SSE is by itself, then it is an HFA consisting of 2 Singles.
  566. - If the SSE is followed by an SSESF, then it is an HFA consisting of 3 Singles.
  567. - If the SSE is followed by an SSE and nothing else, then it is an HFA consisting of 4 Singles.
  568. - If the SSE is followed by an SSE, but another class follows, then it is an HFA that is too large.
  569. - If the SSE is followed by an SSEUP, then it is an HVA of some kind.
  570. }
  571. case classes[1].typ of
  572. X86_64_NO_CLASS:
  573. begin
  574. for i := 2 to words - 1 do
  575. if classes[i].typ <> X86_64_NO_CLASS then
  576. { Compound type }
  577. Exit(0);
  578. { Split into 2 Singles again so they correctly fall into separate XMM registers }
  579. classes[0].typ := X86_64_SSESF_CLASS;
  580. classes[0].def := tdef(tarraydef(classes[0].def).elementdef); { Break up the array }
  581. classes[1].typ := X86_64_SSESF_CLASS;
  582. classes[1].def := classes[0].def;
  583. result := 2;
  584. end;
  585. X86_64_SSESF_CLASS:
  586. begin
  587. for i := 2 to words - 1 do
  588. if classes[i].typ <> X86_64_NO_CLASS then
  589. { Compound type }
  590. Exit(0);
  591. classes[2].typ := X86_64_SSESF_CLASS;
  592. classes[2].def := classes[1].def; { Transfer class 1 to class 2 }
  593. classes[0].typ := X86_64_SSESF_CLASS;
  594. classes[0].def := tdef(tarraydef(classes[0].def).elementdef); { Break up the array }
  595. classes[1].typ := X86_64_SSESF_CLASS;
  596. classes[1].def := classes[0].def;
  597. result := 3;
  598. end;
  599. X86_64_SSE_CLASS:
  600. begin
  601. for i := 2 to words - 1 do
  602. if classes[i].typ <> X86_64_NO_CLASS then
  603. { HFA too large (or not a true HFA) }
  604. Exit(0);
  605. classes[0].def := tdef(tarraydef(classes[0].def).elementdef); { Break up the arrays }
  606. classes[2].def := tdef(tarraydef(classes[1].def).elementdef);
  607. classes[1].def := classes[0].def;
  608. classes[3].def := classes[2].def;
  609. classes[0].typ := X86_64_SSESF_CLASS;
  610. classes[1].typ := X86_64_SSESF_CLASS;
  611. classes[2].typ := X86_64_SSESF_CLASS;
  612. classes[3].typ := X86_64_SSESF_CLASS;
  613. result := 4;
  614. end;
  615. X86_64_SSEUP_CLASS:
  616. begin
  617. { Determine vector size }
  618. veccount := 1;
  619. vecsize := 2;
  620. maxvecsize := words;
  621. for i := 2 to words - 1 do
  622. if (classes[i].typ=X86_64_SSEUP_CLASS) then
  623. Inc(vecsize)
  624. else
  625. begin
  626. if
  627. (classes[i].typ <> X86_64_SSE_CLASS) or { Easy case first - is it actually another SSE vector? }
  628. ((vecsize and (vecsize - 1)) <> 0) or { If vecsize is not a power of two, then it is definitely not a valid aggregate }
  629. (vecsize > maxvecsize) or ((maxvecsize < words) and (vecsize <> maxvecsize)) { Mixture of XMMs and YMMs, for example, is not valid }
  630. then
  631. Exit(0);
  632. Inc(veccount);
  633. maxvecsize := vecsize;
  634. vecsize := 1;
  635. end;
  636. if vecsize <> maxvecsize then
  637. { Last vector is of a different size }
  638. Exit(0);
  639. if veccount > 4 then
  640. { HVA too large }
  641. Exit(0);
  642. Result := words;
  643. end;
  644. else
  645. Exit(0);
  646. end;
  647. end;
  648. else
  649. Exit(0);
  650. end;
  651. end;
  652. function classify_record(calloption: tproccalloption; def: tdef; parentdef: tdef; varspez: tvarspez; var classes: tx64paraclasses; byte_offset: aint): longint;
  653. var
  654. vs: tfieldvarsym;
  655. size,
  656. new_byte_offset: aint;
  657. i,
  658. words,
  659. num: longint;
  660. checkalignment: boolean;
  661. begin
  662. result:=init_aggregate_classification(calloption,def,parentdef,varspez,byte_offset,words,classes);
  663. if (words=0) then
  664. exit;
  665. (* Merge the fields of the structure. *)
  666. for i:=0 to tabstractrecorddef(def).symtable.symlist.count-1 do
  667. begin
  668. if tsym(tabstractrecorddef(def).symtable.symlist[i]).typ<>fieldvarsym then
  669. continue;
  670. vs:=tfieldvarsym(tabstractrecorddef(def).symtable.symlist[i]);
  671. checkalignment:=true;
  672. if not tabstractrecordsymtable(tabstractrecorddef(def).symtable).is_packed then
  673. begin
  674. new_byte_offset:=byte_offset+vs.fieldoffset;
  675. size:=vs.vardef.size;
  676. end
  677. else
  678. begin
  679. new_byte_offset:=byte_offset+vs.fieldoffset div 8;
  680. if (vs.vardef.typ in [orddef,enumdef]) then
  681. begin
  682. { calculate the number of bytes spanned by
  683. this bitpacked field }
  684. size:=((vs.fieldoffset+vs.vardef.packedbitsize+7) div 8)-(vs.fieldoffset div 8);
  685. { our bitpacked fields are interpreted as always being
  686. aligned, because unlike in C we don't have char:1, int:1
  687. etc (so everything is basically a char:x) }
  688. checkalignment:=false;
  689. end
  690. else
  691. size:=vs.vardef.size;
  692. end;
  693. { If [..] an object [..] contains unaligned fields, it has class
  694. MEMORY }
  695. if checkalignment and
  696. (align(new_byte_offset,vs.vardef.structalignment)<>new_byte_offset) then
  697. begin
  698. result:=0;
  699. exit;
  700. end;
  701. num:=classify_aggregate_element(calloption,vs.vardef,def,varspez,size,classes,new_byte_offset);
  702. if (num=0) then
  703. exit(0);
  704. end;
  705. result:=finalize_aggregate_classification(calloption,def,words,classes);
  706. { There is still one case where it might not have to be passed on the
  707. stack, and that's a homogeneous vector aggregate (HVA) or a
  708. homogeneous float aggregate (HFA) under vectorcall. }
  709. if (calloption = pocall_vectorcall) then
  710. begin
  711. if (result = 0) then
  712. result := try_build_homogeneous_aggregate(def,words,classes)
  713. else
  714. { If we're dealing with an HFA that has 3 or 4 Singles, pairs of
  715. Singles may be merged into a single SSE_CLASS, which must be
  716. split into separate SSESF_CLASS references for vectorcall; this
  717. is only performed in "try_build_homogeneous_aggregate" and not
  718. elsewhere, so accommodate for this exceptional case. [Kit] }
  719. if (result = 2) then
  720. begin
  721. num := try_build_homogeneous_aggregate(def,words,classes);
  722. if num <> 0 then
  723. { If it's equal to zero, just pass 2 and handle the record
  724. type normally }
  725. result := num;
  726. end;
  727. end;
  728. end;
  729. function classify_normal_array(calloption: tproccalloption; def: tarraydef; parentdef: tdef; varspez: tvarspez; var classes: tx64paraclasses; byte_offset: aint): longint;
  730. var
  731. i, elecount: aword;
  732. size,
  733. elesize,
  734. new_byte_offset,
  735. bitoffset: aint;
  736. words,
  737. num: longint;
  738. isbitpacked: boolean;
  739. begin
  740. size:=0;
  741. bitoffset:=0;
  742. result:=init_aggregate_classification(calloption,def,parentdef,varspez,byte_offset,words,classes);
  743. if (words=0) then
  744. exit;
  745. isbitpacked:=is_packed_array(def);
  746. if not isbitpacked then
  747. begin
  748. elesize:=def.elesize;
  749. size:=elesize;
  750. end
  751. else
  752. begin
  753. elesize:=def.elepackedbitsize;
  754. bitoffset:=0;
  755. end;
  756. (* Merge the elements of the array. *)
  757. i:=0;
  758. elecount:=def.elecount;
  759. repeat
  760. if not isbitpacked then
  761. begin
  762. { size does not change }
  763. new_byte_offset:=byte_offset+i*elesize;
  764. { If [..] an object [..] contains unaligned fields, it has class
  765. MEMORY }
  766. if align(new_byte_offset,def.alignment)<>new_byte_offset then
  767. begin
  768. result:=0;
  769. exit;
  770. end;
  771. end
  772. else
  773. begin
  774. { calculate the number of bytes spanned by this bitpacked
  775. element }
  776. size:=((bitoffset+elesize+7) div 8)-(bitoffset div 8);
  777. new_byte_offset:=byte_offset+(elesize*i) div 8;
  778. { bit offset of next element }
  779. inc(bitoffset,elesize);
  780. end;
  781. num:=classify_aggregate_element(calloption,def.elementdef,def,varspez,size,classes,new_byte_offset);
  782. if (num=0) then
  783. exit(0);
  784. inc(i);
  785. until (i=elecount);
  786. result:=finalize_aggregate_classification(calloption,def,words,classes);
  787. end;
  788. function classify_argument(calloption: tproccalloption; def: tdef; parentdef: tdef; varspez: tvarspez; real_size: aint; var classes: tx64paraclasses; byte_offset: aint; round_to_8: Boolean): longint;
  789. var
  790. rounded_offset: aint;
  791. begin
  792. if round_to_8 then
  793. rounded_offset := byte_offset mod 8
  794. else
  795. rounded_offset := byte_offset;
  796. case def.typ of
  797. orddef,
  798. enumdef,
  799. pointerdef,
  800. classrefdef:
  801. result:=classify_as_integer_argument(def,real_size,classes,rounded_offset);
  802. formaldef:
  803. result:=classify_as_integer_argument(voidpointertype,voidpointertype.size,classes,rounded_offset);
  804. floatdef:
  805. begin
  806. classes[0].def:=def;
  807. case tfloatdef(def).floattype of
  808. s32real:
  809. begin
  810. if (byte_offset mod 8) = 0 then { Check regardless of the round_to_8 flag }
  811. begin
  812. if Assigned(parentdef) and ((parentdef.aggregatealignment mod 16) = 0) and ((byte_offset mod parentdef.aggregatealignment) <> 0) then
  813. { Third element of an aligned vector }
  814. classes[0].typ:=X86_64_SSEUP_CLASS
  815. else
  816. classes[0].typ:=X86_64_SSESF_CLASS
  817. end
  818. else
  819. begin
  820. if Assigned(parentdef) and ((parentdef.aggregatealignment mod 16) = 0) then
  821. { Fourth element of an aligned vector }
  822. classes[0].typ:=X86_64_SSEUP_CLASS
  823. else
  824. { if we have e.g. a record with two successive "single"
  825. fields, we need a 64 bit rather than a 32 bit load }
  826. classes[0].typ:=X86_64_SSE_CLASS;
  827. classes[0].def:=carraydef.getreusable_no_free(s32floattype,2);
  828. end;
  829. result:=1;
  830. end;
  831. s64real:
  832. begin
  833. if Assigned(parentdef) and ((parentdef.aggregatealignment mod 16) = 0) and ((byte_offset mod parentdef.aggregatealignment) <> 0) then
  834. { Aligned vector of type double }
  835. classes[0].typ:=X86_64_SSEUP_CLASS
  836. else
  837. classes[0].typ:=X86_64_SSEDF_CLASS;
  838. result:=1;
  839. end;
  840. s80real,
  841. sc80real:
  842. begin
  843. classes[0].typ:=X86_64_X87_CLASS;
  844. classes[1].typ:=X86_64_X87UP_CLASS;
  845. classes[1].def:=def;
  846. result:=2;
  847. end;
  848. s64comp,
  849. s64currency:
  850. begin
  851. classes[0].typ:=X86_64_INTEGER_CLASS;
  852. result:=1;
  853. end;
  854. s128real:
  855. begin
  856. classes[0].typ:=X86_64_SSE_CLASS;
  857. classes[0].def:=carraydef.getreusable_no_free(s32floattype,2);
  858. classes[1].typ:=X86_64_SSEUP_CLASS;
  859. classes[1].def:=carraydef.getreusable_no_free(s32floattype,2);
  860. result:=2;
  861. end;
  862. end;
  863. end;
  864. recorddef:
  865. result:=classify_record(calloption,def,parentdef,varspez,classes,rounded_offset);
  866. objectdef:
  867. begin
  868. if is_object(def) then
  869. { pass by reference, like ppc and i386 }
  870. result:=0
  871. else
  872. { all kinds of pointer types: class, objcclass, interface, ... }
  873. result:=classify_as_integer_argument(def,voidpointertype.size,classes,rounded_offset);
  874. end;
  875. setdef:
  876. begin
  877. if is_smallset(def) then
  878. result:=classify_as_integer_argument(def,def.size,classes,rounded_offset)
  879. else
  880. result:=0;
  881. end;
  882. stringdef:
  883. begin
  884. if (tstringdef(def).stringtype in [st_shortstring,st_longstring]) then
  885. result:=0
  886. else
  887. result:=classify_as_integer_argument(def,def.size,classes,rounded_offset);
  888. end;
  889. arraydef:
  890. begin
  891. { a dynamic array is treated like a pointer }
  892. if is_dynamic_array(def) then
  893. result:=classify_as_integer_argument(def,voidpointertype.size,classes,rounded_offset)
  894. { other special arrays are passed on the stack }
  895. else if is_open_array(def) or
  896. is_array_of_const(def) then
  897. result:=0
  898. else
  899. { normal array }
  900. result:=classify_normal_array(calloption,tarraydef(def),parentdef,varspez,classes,rounded_offset);
  901. end;
  902. { the file record is definitely too big }
  903. filedef:
  904. result:=0;
  905. procvardef:
  906. begin
  907. if (po_methodpointer in tprocvardef(def).procoptions) then
  908. begin
  909. { treat as TMethod record }
  910. def:=search_system_type('TMETHOD').typedef;
  911. result:=classify_argument(calloption,def,parentdef,varspez,def.size,classes,rounded_offset, False);
  912. end
  913. else
  914. { pointer }
  915. result:=classify_as_integer_argument(def,def.size,classes,rounded_offset);
  916. end;
  917. variantdef:
  918. begin
  919. { same as tvardata record }
  920. def:=search_system_type('TVARDATA').typedef;
  921. result:=classify_argument(calloption,def,parentdef,varspez,def.size,classes,rounded_offset, False);
  922. end;
  923. undefineddef:
  924. { show shall we know?
  925. since classify_argument is called during parsing, see tw27685.pp,
  926. we handle undefineddef here }
  927. result:=0;
  928. errordef:
  929. { error message should have been thrown already before, so avoid only
  930. an internal error }
  931. result:=0;
  932. else
  933. internalerror(2010021405);
  934. end;
  935. end;
  936. { Returns the size of a single element in the aggregate, or the entire vector, if it is one of these types, 0 otherwise }
  937. function is_simd_vector_type_or_homogeneous_aggregate(calloption: tproccalloption; def: tdef; varspez: tvarspez): aint;
  938. var
  939. numclasses,i,vecsize,veccount,maxvecsize:longint;
  940. classes: tx64paraclasses;
  941. firstclass: tx64paraclasstype;
  942. begin
  943. for i := Low(classes) to High(classes) do
  944. begin
  945. classes[i].typ := X86_64_NO_CLASS;
  946. classes[i].def := nil;
  947. end;
  948. numclasses:=classify_argument(calloption,def,nil,vs_value,def.size,classes,0,False);
  949. if numclasses = 0 then
  950. Exit(0);
  951. firstclass := classes[0].typ;
  952. case firstclass of
  953. X86_64_SSESF_CLASS: { Only valid if the aggregate contains a lone Single }
  954. begin
  955. if (numclasses = 1) and (calloption = pocall_vectorcall) then
  956. Result := 4
  957. else
  958. Result := 0;
  959. Exit;
  960. end;
  961. X86_64_SSEDF_CLASS:
  962. begin
  963. if (numclasses > 1) and (calloption <> pocall_vectorcall) then
  964. Result := 0
  965. else
  966. begin
  967. for i := 1 to numclasses - 1 do
  968. if classes[i].typ <> X86_64_SSEDF_CLASS then
  969. begin
  970. Result := 0;
  971. Exit;
  972. end;
  973. if (def.size div 8) <> numclasses then
  974. { Wrong alignment or compound size }
  975. Result := 0
  976. else
  977. Result := 8;
  978. end;
  979. end;
  980. X86_64_SSE_CLASS:
  981. begin
  982. maxvecsize := numclasses * 2;
  983. if numclasses = 1 then
  984. begin
  985. { 2 Singles }
  986. if calloption = pocall_vectorcall then
  987. Result := 4
  988. else
  989. Result := 0;
  990. Exit;
  991. end;
  992. if classes[1].typ = X86_64_SSESF_CLASS then
  993. begin
  994. { 3 Singles }
  995. if numclasses <> 2 then
  996. Result := 0
  997. else
  998. Result := 4;
  999. Exit;
  1000. end;
  1001. vecsize := 2;
  1002. veccount := 1;
  1003. for i := 1 to numclasses - 1 do
  1004. case classes[i].typ of
  1005. X86_64_SSEUP_CLASS:
  1006. Inc(vecsize, 2);
  1007. X86_64_SSE_CLASS:
  1008. begin
  1009. if (maxvecsize < numclasses * 2) and (vecsize <> maxvecsize) then
  1010. { Different vector sizes }
  1011. Exit(0);
  1012. maxvecsize := vecsize;
  1013. vecsize := 2;
  1014. Inc(veccount);
  1015. end;
  1016. else
  1017. Exit(0);
  1018. end;
  1019. if vecsize <> maxvecsize then
  1020. { Last vector has to be the same size }
  1021. Exit(0);
  1022. { Either an HFA with 4 Singles, or an HVA with up to 4 vectors
  1023. (or a lone SIMD vector if veccount = 1) }
  1024. if (veccount < 4) then
  1025. begin
  1026. if (veccount > 1) and (calloption <> pocall_vectorcall) then
  1027. Result := 0
  1028. else
  1029. if vecsize = 2 then
  1030. { Packed, unaligned array of Singles }
  1031. Result := 4
  1032. else
  1033. Result := vecsize * 8
  1034. end
  1035. else
  1036. Result := 0;
  1037. end;
  1038. else
  1039. Exit(0);
  1040. end;
  1041. end;
  1042. procedure getvalueparaloc(calloption: tproccalloption;varspez:tvarspez;def:tdef;var classes: tx64paraclasses);
  1043. var
  1044. size: aint;
  1045. i: longint;
  1046. numclasses: longint;
  1047. begin
  1048. { init the classes array, because even if classify_argument inits only
  1049. one element we copy both to loc1/loc2 in case "1" is returned }
  1050. for i:=low(classes) to high(classes) do
  1051. begin
  1052. classes[i].typ:=X86_64_NO_CLASS;
  1053. classes[i].def:=nil;
  1054. end;
  1055. { def.size internalerrors for open arrays and dynamic arrays, since
  1056. their size cannot be determined at compile-time.
  1057. classify_argument does not look at the realsize argument for arrays
  1058. cases, but we obviously do have to pass something... }
  1059. if is_special_array(def) then
  1060. size:=-1
  1061. else
  1062. size:=def.size;
  1063. numclasses:=classify_argument(calloption,def,nil,varspez,size,classes,0,False);
  1064. case numclasses of
  1065. 0:
  1066. begin
  1067. classes[0].typ:=X86_64_MEMORY_CLASS;
  1068. classes[0].def:=def;
  1069. end;
  1070. 1..4:
  1071. begin
  1072. { If the class is X87, X87UP or COMPLEX_X87, it is passed in memory }
  1073. for i := 0 to numclasses - 1 do
  1074. begin
  1075. if classes[i].typ in [X86_64_X87_CLASS,X86_64_X87UP_CLASS,X86_64_COMPLEX_X87_CLASS] then
  1076. classes[i].typ:=X86_64_MEMORY_CLASS;
  1077. end;
  1078. end;
  1079. else
  1080. { 8 can happen for _m512 vectors, but are not yet supported }
  1081. internalerror(2010021501);
  1082. end;
  1083. end;
  1084. function tcpuparamanager.ret_in_param(def:tdef;pd:tabstractprocdef):boolean;
  1085. var
  1086. classes: tx64paraclasses;
  1087. numclasses: longint;
  1088. begin
  1089. if handle_common_ret_in_param(def,pd,result) then
  1090. exit;
  1091. fillchar(classes,sizeof(classes),0);
  1092. case def.typ of
  1093. { for records it depends on their contents and size }
  1094. recorddef,
  1095. { make sure we handle 'procedure of object' correctly }
  1096. procvardef:
  1097. begin
  1098. numclasses:=classify_argument(pd.proccalloption,def,nil,vs_value,def.size,classes,0,False);
  1099. result:=(numclasses=0);
  1100. end;
  1101. else
  1102. result:=inherited ret_in_param(def,pd);
  1103. end;
  1104. end;
  1105. function tcpuparamanager.param_use_paraloc(const cgpara:tcgpara):boolean;
  1106. var
  1107. paraloc : pcgparalocation;
  1108. begin
  1109. if not assigned(cgpara.location) then
  1110. internalerror(200410102);
  1111. result:=true;
  1112. { All locations are LOC_REFERENCE }
  1113. paraloc:=cgpara.location;
  1114. while assigned(paraloc) do
  1115. begin
  1116. if (paraloc^.loc<>LOC_REFERENCE) then
  1117. begin
  1118. result:=false;
  1119. exit;
  1120. end;
  1121. paraloc:=paraloc^.next;
  1122. end;
  1123. end;
  1124. { true if a parameter is too large to copy and only the address is pushed }
  1125. function tcpuparamanager.push_addr_param(varspez:tvarspez;def : tdef;calloption : tproccalloption) : boolean;
  1126. var
  1127. classes: tx64paraclasses;
  1128. numclasses: longint;
  1129. begin
  1130. fillchar(classes,sizeof(classes),0);
  1131. result:=false;
  1132. { var,out,constref always require address }
  1133. if varspez in [vs_var,vs_out,vs_constref] then
  1134. begin
  1135. result:=true;
  1136. exit;
  1137. end;
  1138. { Only vs_const, vs_value here }
  1139. case def.typ of
  1140. formaldef :
  1141. result:=true;
  1142. recorddef :
  1143. begin
  1144. { MetroWerks Pascal: const records always passed by reference
  1145. (for Mac OS X interfaces) }
  1146. if (calloption=pocall_mwpascal) and
  1147. (varspez=vs_const) then
  1148. result:=true
  1149. { Win ABI depends on size to pass it in a register or not }
  1150. else if x86_64_use_ms_abi(calloption) then
  1151. begin
  1152. if calloption = pocall_vectorcall then
  1153. begin
  1154. { "vectorcall" has the addition that it allows for aligned SSE types }
  1155. result :=
  1156. not aggregate_in_registers_win64(varspez,def.size) and
  1157. (is_simd_vector_type_or_homogeneous_aggregate(pocall_vectorcall,def,vs_value) = 0);
  1158. end
  1159. else
  1160. result:=not aggregate_in_registers_win64(varspez,def.size)
  1161. end
  1162. { pass constant parameters that would be passed via memory by
  1163. reference for non-cdecl/cppdecl, and make sure that the tmethod
  1164. record (size=16) is passed the same way as a complex procvar }
  1165. else if ((varspez=vs_const) and
  1166. not(calloption in cdecl_pocalls)) or
  1167. (def.size=16) then
  1168. begin
  1169. numclasses:=classify_argument(calloption,def,nil,vs_value,def.size,classes,0,False);
  1170. result:=numclasses=0;
  1171. end
  1172. else
  1173. { SysV ABI always passes it as value parameter }
  1174. result:=false;
  1175. end;
  1176. arraydef :
  1177. begin
  1178. { cdecl array of const need to be ignored and therefor be puhsed
  1179. as value parameter with length 0 }
  1180. if ((calloption in cdecl_pocalls) and
  1181. is_array_of_const(def)) or
  1182. is_dynamic_array(def) then
  1183. result:=false
  1184. else if (calloption = pocall_vectorcall) then
  1185. begin
  1186. { Pass all arrays by reference unless they are a valid, aligned SIMD type (arrays can't be homogeneous aggregates) }
  1187. result := (is_simd_vector_type_or_homogeneous_aggregate(pocall_vectorcall,def,vs_value) = 0);
  1188. end
  1189. else
  1190. { pass all arrays by reference to be compatible with C (passing
  1191. an array by value (= copying it on the stack) does not exist,
  1192. because an array is the same as a pointer there }
  1193. result:=true
  1194. end;
  1195. objectdef :
  1196. begin
  1197. { don't treat objects like records, because we only know wheter
  1198. or not they'll have a VMT after the entire object is parsed
  1199. -> if they are used as function result from one of their own
  1200. methods, their size can still change after we've determined
  1201. whether this function result should be returned by reference or
  1202. by value }
  1203. if is_object(def) then
  1204. result:=true;
  1205. end;
  1206. variantdef,
  1207. stringdef,
  1208. procvardef,
  1209. setdef :
  1210. begin
  1211. numclasses:=classify_argument(calloption,def,nil,vs_value,def.size,classes,0,False);
  1212. result:=numclasses=0;
  1213. end;
  1214. else
  1215. ;
  1216. end;
  1217. end;
  1218. function tcpuparamanager.get_volatile_registers_int(calloption : tproccalloption):tcpuregisterset;
  1219. begin
  1220. if x86_64_use_ms_abi(calloption) then
  1221. result:=[RS_RAX,RS_RCX,RS_RDX,RS_R8,RS_R9,RS_R10,RS_R11]
  1222. else
  1223. result:=[RS_RAX,RS_RCX,RS_RDX,RS_RSI,RS_RDI,RS_R8,RS_R9,RS_R10,RS_R11];
  1224. end;
  1225. function tcpuparamanager.get_volatile_registers_mm(calloption : tproccalloption):tcpuregisterset;
  1226. begin
  1227. if x86_64_use_ms_abi(calloption) then
  1228. result:=[RS_XMM0..RS_XMM5]
  1229. else
  1230. result:=[RS_XMM0..RS_XMM15];
  1231. end;
  1232. function tcpuparamanager.get_volatile_registers_fpu(calloption : tproccalloption):tcpuregisterset;
  1233. begin
  1234. result:=[RS_ST0..RS_ST7];
  1235. end;
  1236. function tcpuparamanager.get_saved_registers_int(calloption : tproccalloption):tcpuregisterarray;
  1237. const
  1238. win64_saved_std_regs : {$ifndef VER3_0}tcpuregisterarray{$else}array[0..7] of tsuperregister{$endif} = (RS_RBX,RS_RDI,RS_RSI,RS_R12,RS_R13,RS_R14,RS_R15,RS_RBP);
  1239. others_saved_std_regs : {$ifndef VER3_0}tcpuregisterarray{$else}array[0..4] of tsuperregister{$endif} = (RS_RBX,RS_R12,RS_R13,RS_R14,RS_R15);
  1240. begin
  1241. if tcgx86_64(cg).use_ms_abi then
  1242. result:=win64_saved_std_regs
  1243. else
  1244. result:=others_saved_std_regs;
  1245. end;
  1246. function tcpuparamanager.get_saved_registers_mm(calloption: tproccalloption):tcpuregisterarray;
  1247. const
  1248. win64_saved_xmm_regs : {$ifndef VER3_0}tcpuregisterarray{$else}array[0..9] of tsuperregister{$endif} = (RS_XMM6,RS_XMM7,
  1249. RS_XMM8,RS_XMM9,RS_XMM10,RS_XMM11,RS_XMM12,RS_XMM13,RS_XMM14,RS_XMM15);
  1250. begin
  1251. if tcgx86_64(cg).use_ms_abi then
  1252. result:=win64_saved_xmm_regs
  1253. else
  1254. SetLength(result,0);
  1255. end;
  1256. function tcpuparamanager.get_funcretloc(p : tabstractprocdef; side: tcallercallee; forcetempdef: tdef): tcgpara;
  1257. const
  1258. intretregs: array[0..1] of tregister = (NR_FUNCTION_RETURN_REG,NR_FUNCTION_RETURN_REG_HIGH);
  1259. mmretregs: array[0..1] of tregister = (NR_MM_RESULT_REG,NR_MM_RESULT_REG_HIGH);
  1260. mmretregs_vectorcall: array[0..3] of tregister = (NR_XMM0,NR_XMM1,NR_XMM2,NR_XMM3);
  1261. var
  1262. classes: tx64paraclasses;
  1263. i,j,
  1264. numclasses: longint;
  1265. intretregidx,
  1266. mmretregidx: longint;
  1267. retcgsize : tcgsize;
  1268. paraloc : pcgparalocation;
  1269. begin
  1270. if set_common_funcretloc_info(p,forcetempdef,retcgsize,result) then
  1271. exit;
  1272. { Return in FPU register? -> don't use classify_argument(), because
  1273. currency and comp need special treatment here (they are integer class
  1274. when passing as parameter, but LOC_FPUREGISTER as function result) }
  1275. if result.def.typ=floatdef then
  1276. begin
  1277. paraloc:=result.add_location;
  1278. paraloc^.def:=result.def;
  1279. case tfloatdef(result.def).floattype of
  1280. s32real:
  1281. begin
  1282. paraloc^.loc:=LOC_MMREGISTER;
  1283. paraloc^.register:=newreg(R_MMREGISTER,RS_MM_RESULT_REG,R_SUBMMS);
  1284. paraloc^.size:=OS_F32;
  1285. end;
  1286. s64real:
  1287. begin
  1288. paraloc^.loc:=LOC_MMREGISTER;
  1289. paraloc^.register:=newreg(R_MMREGISTER,RS_MM_RESULT_REG,R_SUBMMD);
  1290. paraloc^.size:=OS_F64;
  1291. end;
  1292. { the first two only exist on targets with an x87, on others
  1293. they are replace by int64 }
  1294. s64currency,
  1295. s64comp,
  1296. s80real,
  1297. sc80real:
  1298. begin
  1299. paraloc^.loc:=LOC_FPUREGISTER;
  1300. paraloc^.register:=NR_FPU_RESULT_REG;
  1301. paraloc^.size:=retcgsize;
  1302. end;
  1303. else
  1304. internalerror(200405034);
  1305. end;
  1306. end
  1307. else
  1308. { Return in register }
  1309. begin
  1310. fillchar(classes,sizeof(classes),0);
  1311. numclasses:=classify_argument(p.proccalloption,result.def,nil,vs_value,result.def.size,classes,0,False);
  1312. { this would mean a memory return }
  1313. if (numclasses=0) then
  1314. internalerror(2010021502);
  1315. if (numclasses > MAX_PARA_CLASSES) then
  1316. internalerror(2010021503);
  1317. intretregidx:=0;
  1318. mmretregidx:=0;
  1319. i := 0;
  1320. { We can't use a for-loop here because the treatment of the SSEUP class requires skipping over i's }
  1321. while i < numclasses do
  1322. begin
  1323. paraloc:=result.add_location;
  1324. paraloc^.def:=classes[i].def;
  1325. case classes[i].typ of
  1326. X86_64_INTEGERSI_CLASS,
  1327. X86_64_INTEGER_CLASS:
  1328. begin
  1329. paraloc^.loc:=LOC_REGISTER;
  1330. paraloc^.register:=intretregs[intretregidx];
  1331. if classes[i].typ=X86_64_INTEGER_CLASS then
  1332. begin
  1333. paraloc^.size:=OS_64;
  1334. if paraloc^.def.size<>8 then
  1335. paraloc^.def:=u64inttype;
  1336. end
  1337. else if result.intsize in [1,2,4] then
  1338. begin
  1339. paraloc^.size:=def_cgsize(paraloc^.def);
  1340. end
  1341. else
  1342. begin
  1343. paraloc^.size:=OS_32;
  1344. if paraloc^.def.size<>4 then
  1345. paraloc^.def:=u32inttype;
  1346. end;
  1347. setsubreg(paraloc^.register,cgsize2subreg(R_INTREGISTER,paraloc^.size));
  1348. inc(intretregidx);
  1349. end;
  1350. X86_64_SSE_CLASS,
  1351. X86_64_SSEUP_CLASS,
  1352. X86_64_SSESF_CLASS,
  1353. X86_64_SSEDF_CLASS:
  1354. begin
  1355. paraloc^.loc:=LOC_MMREGISTER;
  1356. if p.proccalloption = pocall_vectorcall then
  1357. paraloc^.register:=mmretregs_vectorcall[mmretregidx]
  1358. else
  1359. paraloc^.register:=mmretregs[mmretregidx];
  1360. case classes[i].typ of
  1361. X86_64_SSESF_CLASS:
  1362. begin
  1363. setsubreg(paraloc^.register,R_SUBMMS);
  1364. paraloc^.size:=OS_F32;
  1365. end;
  1366. X86_64_SSEDF_CLASS:
  1367. begin
  1368. setsubreg(paraloc^.register,R_SUBMMD);
  1369. paraloc^.size:=OS_F64;
  1370. end;
  1371. X86_64_SSE_CLASS:
  1372. begin
  1373. j := 1;
  1374. if not (x86_64_use_ms_abi(p.proccalloption) and (p.proccalloption <> pocall_vectorcall)) then
  1375. while i + j <= numclasses do
  1376. begin
  1377. if classes[i+j].typ <> X86_64_SSEUP_CLASS then
  1378. Break;
  1379. Inc(j);
  1380. end;
  1381. { j = MM word count }
  1382. Inc(i, j - 1);
  1383. case j of
  1384. 1:
  1385. begin
  1386. setsubreg(paraloc^.register,R_SUBQ);
  1387. paraloc^.size:=OS_M64;
  1388. end;
  1389. 2:
  1390. begin
  1391. setsubreg(paraloc^.register,R_SUBMMX);
  1392. paraloc^.size:=OS_M128;
  1393. end;
  1394. 4:
  1395. begin
  1396. setsubreg(paraloc^.register,R_SUBMMY);
  1397. paraloc^.size:=OS_M256; { Currently unsupported }
  1398. end;
  1399. 8:
  1400. begin
  1401. setsubreg(paraloc^.register,R_SUBMMZ);
  1402. paraloc^.size:=OS_M512; { Currently unsupported }
  1403. end;
  1404. else
  1405. InternalError(2018012901);
  1406. end;
  1407. end;
  1408. else
  1409. if (x86_64_use_ms_abi(p.proccalloption) and (p.proccalloption <> pocall_vectorcall)) then
  1410. begin
  1411. setsubreg(paraloc^.register,R_SUBQ);
  1412. paraloc^.size:=OS_M64;
  1413. end
  1414. else
  1415. { Should not get here }
  1416. InternalError(2018012900);
  1417. end;
  1418. inc(mmretregidx);
  1419. end;
  1420. X86_64_X87_CLASS:
  1421. begin
  1422. { must be followed by X86_64_X87UP_CLASS and that must be
  1423. the last class }
  1424. if (i<>(numclasses-2)) or
  1425. (classes[i+1].typ<>X86_64_X87UP_CLASS) then
  1426. internalerror(2014110401);
  1427. paraloc^.loc:=LOC_FPUREGISTER;
  1428. paraloc^.register:=NR_FPU_RESULT_REG;
  1429. paraloc^.size:=OS_F80;
  1430. break;
  1431. end;
  1432. X86_64_NO_CLASS:
  1433. begin
  1434. { empty record/array }
  1435. if (i<>0) or
  1436. (numclasses<>1) then
  1437. internalerror(2010060302);
  1438. paraloc^.loc:=LOC_VOID;
  1439. paraloc^.def:=voidtype;
  1440. end;
  1441. else
  1442. internalerror(2010021504);
  1443. end;
  1444. Inc(i);
  1445. end;
  1446. end;
  1447. end;
  1448. procedure tcpuparamanager.create_paraloc_info_intern(p : tabstractprocdef; side: tcallercallee;paras:tparalist;
  1449. var intparareg,mmparareg,parasize:longint;varargsparas: boolean);
  1450. var
  1451. hp : tparavarsym;
  1452. fdef,
  1453. paradef : tdef;
  1454. paraloc : pcgparalocation;
  1455. subreg : tsubregister;
  1456. pushaddr : boolean;
  1457. paracgsize : tcgsize;
  1458. { loc[2] onwards are only used for _m256 under vectorcall/SysV, and
  1459. homogeneous vector aggregates and homogeneous float aggreates under
  1460. the vectorcall calling convention. [Kit] }
  1461. loc : tx64paraclasses;
  1462. needintloc,
  1463. needmmloc,
  1464. paralen,
  1465. locidx,
  1466. i,j,
  1467. varalign,
  1468. paraalign : longint;
  1469. use_ms_abi : boolean;
  1470. begin
  1471. paraalign:=get_para_align(p.proccalloption);
  1472. use_ms_abi:=x86_64_use_ms_abi(p.proccalloption);
  1473. { Register parameters are assigned from left to right }
  1474. for i:=0 to paras.count-1 do
  1475. begin
  1476. hp:=tparavarsym(paras[i]);
  1477. paradef:=hp.vardef;
  1478. { on win64, if a record has only one field and that field is a
  1479. single or double, it has to be handled like a single/double }
  1480. if use_ms_abi and
  1481. ((paradef.typ=recorddef) {or
  1482. is_object(paradef)}) and
  1483. tabstractrecordsymtable(tabstractrecorddef(paradef).symtable).has_single_field(fdef) and
  1484. (fdef.typ=floatdef) and
  1485. (tfloatdef(fdef).floattype in [s32real,s64real]) then
  1486. paradef:=fdef;
  1487. pushaddr:=push_addr_param(hp.varspez,paradef,p.proccalloption);
  1488. if pushaddr then
  1489. begin
  1490. loc[0].typ:=X86_64_INTEGER_CLASS;
  1491. loc[1].typ:=X86_64_NO_CLASS;
  1492. paracgsize:=OS_ADDR;
  1493. paralen:=sizeof(pint);
  1494. paradef:=cpointerdef.getreusable_no_free(paradef);
  1495. loc[0].def:=paradef;
  1496. loc[1].def:=nil;
  1497. for j:=2 to high(loc) do
  1498. begin
  1499. loc[j].typ:=X86_64_NO_CLASS;
  1500. loc[j].def:=nil;
  1501. end;
  1502. end
  1503. else
  1504. begin
  1505. getvalueparaloc(p.proccalloption,hp.varspez,paradef,loc);
  1506. paralen:=push_size(hp.varspez,paradef,p.proccalloption);
  1507. if p.proccalloption = pocall_vectorcall then
  1508. begin
  1509. { TODO: Can this set of instructions be put into 'defutil' without it relying on the argument classification? [Kit] }
  1510. { The SIMD vector types have to be OS_M128 etc., not OS_128 etc.}
  1511. case is_simd_vector_type_or_homogeneous_aggregate(pocall_vectorcall,paradef,vs_value) of
  1512. 0:
  1513. { Not a vector or valid aggregate }
  1514. paracgsize:=def_cgsize(paradef);
  1515. 4:
  1516. paracgsize:=OS_F32;
  1517. 8:
  1518. paracgsize:=OS_F64;
  1519. 16:
  1520. paracgsize:=OS_M128;
  1521. 32:
  1522. paracgsize:=OS_M256;
  1523. 64:
  1524. paracgsize:=OS_M512;
  1525. else
  1526. InternalError(2018012910);
  1527. end;
  1528. end
  1529. else
  1530. paracgsize:=def_cgsize(paradef);
  1531. end;
  1532. { cheat for now, we should copy the value to an mm reg as well (FK) }
  1533. if varargsparas and
  1534. use_ms_abi and
  1535. (paradef.typ = floatdef) then
  1536. begin
  1537. loc[1].typ:=X86_64_NO_CLASS;
  1538. if paracgsize=OS_F64 then
  1539. begin
  1540. loc[0].typ:=X86_64_INTEGER_CLASS;
  1541. paracgsize:=OS_64;
  1542. paradef:=u64inttype;
  1543. end
  1544. else
  1545. begin
  1546. loc[0].typ:=X86_64_INTEGERSI_CLASS;
  1547. paracgsize:=OS_32;
  1548. paradef:=u32inttype;
  1549. end;
  1550. loc[0].def:=paradef;
  1551. end;
  1552. hp.paraloc[side].reset;
  1553. hp.paraloc[side].size:=paracgsize;
  1554. hp.paraloc[side].intsize:=paralen;
  1555. hp.paraloc[side].Alignment:=paraalign;
  1556. hp.paraloc[side].def:=paradef;
  1557. if paralen>0 then
  1558. begin
  1559. { Enough registers free? }
  1560. needintloc:=0;
  1561. needmmloc:=0;
  1562. for locidx:=low(loc) to high(loc) do
  1563. case loc[locidx].typ of
  1564. X86_64_INTEGER_CLASS,
  1565. X86_64_INTEGERSI_CLASS:
  1566. inc(needintloc);
  1567. { Note, do NOT include X86_64_SSEUP_CLASS because this links with
  1568. X86_64_SSE_CLASS and we only need one register, not two. [Kit] }
  1569. X86_64_SSE_CLASS,
  1570. X86_64_SSESF_CLASS,
  1571. X86_64_SSEDF_CLASS:
  1572. inc(needmmloc);
  1573. else
  1574. ;
  1575. end;
  1576. { the "-1" is because we can also use the current register }
  1577. if (use_ms_abi and
  1578. ((intparareg+needintloc-1 > high(paraintsupregs_winx64)) or
  1579. ((p.proccalloption = pocall_vectorcall) and (mmparareg+needmmloc-1 > high(parammsupregs_vectorcall))) or
  1580. ((p.proccalloption <> pocall_vectorcall) and (mmparareg+needmmloc-1 > high(parammsupregs_winx64))))) or
  1581. (not use_ms_abi and
  1582. ((intparareg+needintloc-1 > high(paraintsupregs)) or
  1583. (mmparareg+needmmloc-1 > high(parammsupregs)))) then
  1584. begin
  1585. { If there are no registers available for any
  1586. eightbyte of an argument, the whole argument is
  1587. passed on the stack. }
  1588. loc[low(loc)].typ:=X86_64_MEMORY_CLASS;
  1589. loc[low(loc)].def:=paradef;
  1590. for locidx:=succ(low(loc)) to high(loc) do
  1591. loc[locidx].typ:=X86_64_NO_CLASS;
  1592. end;
  1593. locidx:=0;
  1594. while (paralen>0) and
  1595. (locidx<=high(loc)) and
  1596. (loc[locidx].typ<>X86_64_NO_CLASS) do
  1597. begin
  1598. { Allocate }
  1599. case loc[locidx].typ of
  1600. X86_64_INTEGER_CLASS,
  1601. X86_64_INTEGERSI_CLASS:
  1602. begin
  1603. paraloc:=hp.paraloc[side].add_location;
  1604. paraloc^.loc:=LOC_REGISTER;
  1605. paraloc^.def:=loc[locidx].def;
  1606. if (paracgsize=OS_NO) or ((locidx<high(loc)) and (loc[locidx+1].typ<>X86_64_NO_CLASS)) then
  1607. begin
  1608. if loc[locidx].typ=X86_64_INTEGER_CLASS then
  1609. begin
  1610. paraloc^.size:=OS_INT;
  1611. paraloc^.def:=u64inttype;
  1612. subreg:=R_SUBWHOLE;
  1613. end
  1614. else
  1615. begin
  1616. paraloc^.size:=OS_32;
  1617. paraloc^.def:=u32inttype;
  1618. subreg:=R_SUBD;
  1619. end;
  1620. end
  1621. else
  1622. begin
  1623. paraloc^.size:=def_cgsize(paraloc^.def);
  1624. { s64comp is pushed in an int register }
  1625. if paraloc^.size=OS_C64 then
  1626. begin
  1627. paraloc^.size:=OS_64;
  1628. paraloc^.def:=u64inttype;
  1629. end;
  1630. subreg:=cgsize2subreg(R_INTREGISTER,paraloc^.size);
  1631. end;
  1632. { winx64 uses different registers }
  1633. if use_ms_abi then
  1634. paraloc^.register:=newreg(R_INTREGISTER,paraintsupregs_winx64[intparareg],subreg)
  1635. else
  1636. paraloc^.register:=newreg(R_INTREGISTER,paraintsupregs[intparareg],subreg);
  1637. { matching mm register must be skipped }
  1638. if use_ms_abi then
  1639. inc(mmparareg);
  1640. inc(intparareg);
  1641. dec(paralen,tcgsize2size[paraloc^.size]);
  1642. end;
  1643. X86_64_SSE_CLASS,
  1644. X86_64_SSESF_CLASS,
  1645. X86_64_SSEDF_CLASS:
  1646. begin
  1647. paraloc:=hp.paraloc[side].add_location;
  1648. paraloc^.loc:=LOC_MMREGISTER;
  1649. paraloc^.def:=loc[locidx].def;
  1650. case loc[locidx].typ of
  1651. X86_64_SSESF_CLASS:
  1652. begin
  1653. subreg:=R_SUBMMS;
  1654. paraloc^.size:=OS_F32;
  1655. end;
  1656. X86_64_SSEDF_CLASS:
  1657. begin
  1658. subreg:=R_SUBMMD;
  1659. paraloc^.size:=OS_F64;
  1660. end;
  1661. X86_64_SSE_CLASS:
  1662. begin
  1663. subreg:=R_SUBQ;
  1664. paraloc^.size:=OS_M64;
  1665. j := 1;
  1666. if not (use_ms_abi and (p.proccalloption <> pocall_vectorcall)) then
  1667. while locidx + j <= high(loc) do
  1668. begin
  1669. if loc[locidx+j].typ <> X86_64_SSEUP_CLASS then
  1670. Break;
  1671. Inc(j);
  1672. end;
  1673. { j = MM word count }
  1674. Inc(locidx, j - 1);
  1675. case j of
  1676. 1:
  1677. begin
  1678. subreg:=R_SUBQ;
  1679. paraloc^.size:=OS_M64;
  1680. end;
  1681. 2:
  1682. begin
  1683. subreg:=R_SUBMMX;
  1684. paraloc^.size:=OS_M128;
  1685. end;
  1686. 4:
  1687. begin
  1688. subreg:=R_SUBMMY;
  1689. paraloc^.size:=OS_M256; { Currently unsupported }
  1690. end;
  1691. 8:
  1692. begin
  1693. subreg:=R_SUBMMZ;
  1694. paraloc^.size:=OS_M512; { Currently unsupported }
  1695. end;
  1696. else
  1697. InternalError(2018012903);
  1698. end;
  1699. end;
  1700. else
  1701. if (use_ms_abi and (p.proccalloption <> pocall_vectorcall)) then
  1702. begin
  1703. subreg:=R_SUBQ;
  1704. paraloc^.size:=OS_M64;
  1705. end
  1706. else
  1707. { Should not get here }
  1708. InternalError(2018012902);
  1709. end;
  1710. { winx64 uses different registers }
  1711. if use_ms_abi then
  1712. begin
  1713. if p.proccalloption = pocall_vectorcall then
  1714. paraloc^.register:=newreg(R_MMREGISTER,parammsupregs_vectorcall[mmparareg],subreg)
  1715. else
  1716. paraloc^.register:=newreg(R_MMREGISTER,parammsupregs_winx64[mmparareg],subreg);
  1717. end
  1718. else
  1719. paraloc^.register:=newreg(R_MMREGISTER,parammsupregs[mmparareg],subreg);
  1720. { matching int register must be skipped }
  1721. if use_ms_abi then
  1722. inc(intparareg);
  1723. inc(mmparareg);
  1724. dec(paralen,tcgsize2size[paraloc^.size]);
  1725. end;
  1726. X86_64_MEMORY_CLASS :
  1727. begin
  1728. paraloc:=hp.paraloc[side].add_location;
  1729. paraloc^.loc:=LOC_REFERENCE;
  1730. paraloc^.def:=loc[locidx].def;
  1731. {Hack alert!!! We should modify int_cgsize to handle OS_128,
  1732. however, since int_cgsize is called in many places in the
  1733. compiler where only a few can already handle OS_128, fixing it
  1734. properly is out of the question to release 2.2.0 in time. (DM)}
  1735. if paracgsize=OS_128 then
  1736. if paralen=8 then
  1737. paraloc^.size:=OS_64
  1738. else if paralen=16 then
  1739. paraloc^.size:=OS_128
  1740. else
  1741. internalerror(200707143)
  1742. else if paracgsize in [OS_F32,OS_F64,OS_F80,OS_F128] then
  1743. paraloc^.size:=int_float_cgsize(paralen)
  1744. else
  1745. paraloc^.size:=int_cgsize(paralen);
  1746. if side=callerside then
  1747. paraloc^.reference.index:=NR_STACK_POINTER_REG
  1748. else
  1749. paraloc^.reference.index:=NR_FRAME_POINTER_REG;
  1750. varalign:=used_align(size_2_align(paralen),paraalign,paraalign);
  1751. paraloc^.reference.offset:=parasize;
  1752. parasize:=align(parasize+paralen,varalign);
  1753. paralen:=0;
  1754. end;
  1755. else
  1756. internalerror(2010053113);
  1757. end;
  1758. inc(locidx);
  1759. end;
  1760. end
  1761. else
  1762. begin
  1763. paraloc:=hp.paraloc[side].add_location;
  1764. paraloc^.loc:=LOC_VOID;
  1765. paraloc^.def:=paradef;
  1766. end;
  1767. end;
  1768. { Register parameters are assigned from left-to-right, but the
  1769. offsets on the stack are right-to-left. There is no need
  1770. to reverse the offset, only adapt the calleeside with the
  1771. start offset of the first param on the stack }
  1772. if side=calleeside then
  1773. begin
  1774. for i:=0 to paras.count-1 do
  1775. begin
  1776. hp:=tparavarsym(paras[i]);
  1777. paraloc:=hp.paraloc[side].location;
  1778. while paraloc<>nil do
  1779. begin
  1780. with paraloc^ do
  1781. if (loc=LOC_REFERENCE) then
  1782. inc(reference.offset,target_info.first_parm_offset);
  1783. paraloc:=paraloc^.next;
  1784. end;
  1785. end;
  1786. end;
  1787. end;
  1788. function tcpuparamanager.create_varargs_paraloc_info(p : tabstractprocdef; side: tcallercallee; varargspara:tvarargsparalist):longint;
  1789. var
  1790. intparareg,mmparareg,
  1791. parasize : longint;
  1792. begin
  1793. intparareg:=0;
  1794. mmparareg:=0;
  1795. if x86_64_use_ms_abi(p.proccalloption) then
  1796. parasize:=4*8
  1797. else
  1798. parasize:=0;
  1799. { calculate the registers for the normal parameters }
  1800. create_paraloc_info_intern(p,side,p.paras,intparareg,mmparareg,parasize,false);
  1801. { append the varargs }
  1802. if assigned(varargspara) then
  1803. begin
  1804. if side=callerside then
  1805. create_paraloc_info_intern(p,side,varargspara,intparareg,mmparareg,parasize,true)
  1806. else
  1807. internalerror(2019021917);
  1808. { store used no. of SSE registers, that needs to be passed in %AL }
  1809. varargspara.mmregsused:=mmparareg;
  1810. end;
  1811. create_funcretloc_info(p,side);
  1812. result:=parasize;
  1813. end;
  1814. function tcpuparamanager.create_paraloc_info(p : tabstractprocdef; side: tcallercallee):longint;
  1815. var
  1816. intparareg,mmparareg,
  1817. parasize : longint;
  1818. begin
  1819. intparareg:=0;
  1820. mmparareg:=0;
  1821. if x86_64_use_ms_abi(p.proccalloption) then
  1822. parasize:=4*8
  1823. else
  1824. parasize:=0;
  1825. create_paraloc_info_intern(p,side,p.paras,intparareg,mmparareg,parasize,false);
  1826. { Create Function result paraloc }
  1827. create_funcretloc_info(p,side);
  1828. { We need to return the size allocated on the stack }
  1829. result:=parasize;
  1830. end;
  1831. begin
  1832. paramanager:=tcpuparamanager.create;
  1833. end.