cpupara.pas 80 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546154715481549155015511552155315541555155615571558155915601561156215631564156515661567156815691570157115721573157415751576157715781579158015811582158315841585158615871588158915901591159215931594159515961597159815991600160116021603160416051606160716081609161016111612161316141615161616171618161916201621162216231624162516261627162816291630163116321633163416351636163716381639164016411642164316441645164616471648164916501651165216531654165516561657165816591660166116621663166416651666166716681669167016711672167316741675167616771678167916801681168216831684168516861687168816891690169116921693169416951696169716981699170017011702170317041705170617071708170917101711171217131714171517161717171817191720172117221723172417251726172717281729173017311732173317341735173617371738173917401741174217431744174517461747174817491750175117521753175417551756175717581759176017611762176317641765176617671768176917701771177217731774177517761777177817791780178117821783178417851786178717881789179017911792179317941795179617971798179918001801180218031804180518061807180818091810181118121813181418151816181718181819182018211822182318241825182618271828182918301831183218331834183518361837183818391840184118421843184418451846184718481849185018511852185318541855185618571858185918601861186218631864186518661867186818691870187118721873187418751876187718781879188018811882188318841885188618871888188918901891189218931894189518961897189818991900190119021903190419051906190719081909191019111912191319141915191619171918191919201921192219231924192519261927192819291930193119321933193419351936193719381939194019411942194319441945194619471948194919501951195219531954195519561957195819591960196119621963196419651966196719681969197019711972197319741975197619771978197919801981198219831984198519861987198819891990199119921993199419951996199719981999200020012002200320042005200620072008200920102011201220132014
  1. {
  2. Copyright (c) 2002 by Florian Klaempfl
  3. Generates the argument location information for x86-64 target
  4. This program is free software; you can redistribute it and/or modify
  5. it under the terms of the GNU General Public License as published by
  6. the Free Software Foundation; either version 2 of the License, or
  7. (at your option) any later version.
  8. This program is distributed in the hope that it will be useful,
  9. but WITHOUT ANY WARRANTY; without even the implied warranty of
  10. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  11. GNU General Public License for more details.
  12. You should have received a copy of the GNU General Public License
  13. along with this program; if not, write to the Free Software
  14. Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  15. ****************************************************************************
  16. }
  17. unit cpupara;
  18. {$i fpcdefs.inc}
  19. interface
  20. uses
  21. globtype,
  22. cpubase,cgbase,cgutils,
  23. symconst,symtype,symsym,symdef,
  24. parabase,paramgr;
  25. type
  26. tcpuparamanager = class(tparamanager)
  27. private
  28. procedure create_paraloc_info_intern(p : tabstractprocdef; side: tcallercallee;paras:tparalist;
  29. var intparareg,mmparareg,parasize:longint;varargsparas: boolean);
  30. public
  31. function param_use_paraloc(const cgpara:tcgpara):boolean;override;
  32. function push_addr_param(varspez:tvarspez;def : tdef;calloption : tproccalloption) : boolean;override;
  33. function ret_in_param(def:tdef;pd:tabstractprocdef):boolean;override;
  34. function get_volatile_registers_int(calloption : tproccalloption):tcpuregisterset;override;
  35. function get_volatile_registers_mm(calloption : tproccalloption):tcpuregisterset;override;
  36. function get_volatile_registers_fpu(calloption : tproccalloption):tcpuregisterset;override;
  37. function get_saved_registers_int(calloption : tproccalloption):tcpuregisterarray;override;
  38. function get_saved_registers_mm(calloption: tproccalloption):tcpuregisterarray;override;
  39. function create_paraloc_info(p : tabstractprocdef; side: tcallercallee):longint;override;
  40. function create_varargs_paraloc_info(p : tabstractprocdef; side: tcallercallee; varargspara:tvarargsparalist):longint;override;
  41. function get_funcretloc(p : tabstractprocdef; side: tcallercallee; forcetempdef: tdef): tcgpara;override;
  42. end;
  43. implementation
  44. uses
  45. cutils,verbose,
  46. systems,
  47. globals,defutil,
  48. symtable,symutil,
  49. cpupi,
  50. cgx86,cgobj,cgcpu;
  51. const
  52. paraintsupregs : array[0..5] of tsuperregister = (RS_RDI,RS_RSI,RS_RDX,RS_RCX,RS_R8,RS_R9);
  53. parammsupregs : array[0..7] of tsuperregister = (RS_XMM0,RS_XMM1,RS_XMM2,RS_XMM3,RS_XMM4,RS_XMM5,RS_XMM6,RS_XMM7);
  54. paraintsupregs_winx64 : array[0..3] of tsuperregister = (RS_RCX,RS_RDX,RS_R8,RS_R9);
  55. parammsupregs_winx64 : array[0..3] of tsuperregister = (RS_XMM0,RS_XMM1,RS_XMM2,RS_XMM3);
  56. parammsupregs_vectorcall : array[0..5] of tsuperregister = (RS_XMM0,RS_XMM1,RS_XMM2,RS_XMM3,RS_XMM4,RS_XMM5);
  57. {
  58. The argument classification code largely comes from libffi:
  59. ffi64.c - Copyright (c) 2002, 2007 Bo Thorsen <[email protected]>
  60. Copyright (c) 2008 Red Hat, Inc.
  61. x86-64 Foreign Function Interface
  62. Permission is hereby granted, free of charge, to any person obtaining
  63. a copy of this software and associated documentation files (the
  64. ``Software''), to deal in the Software without restriction, including
  65. without limitation the rights to use, copy, modify, merge, publish,
  66. distribute, sublicense, and/or sell copies of the Software, and to
  67. permit persons to whom the Software is furnished to do so, subject to
  68. the following conditions:
  69. The above copyright notice and this permission notice shall be included
  70. in all copies or substantial portions of the Software.
  71. THE SOFTWARE IS PROVIDED ``AS IS'', WITHOUT WARRANTY OF ANY KIND,
  72. EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  73. MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
  74. NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
  75. HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
  76. WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  77. OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
  78. DEALINGS IN THE SOFTWARE.
  79. ----------------------------------------------------------------------- *)
  80. }
  81. const
  82. { This many classes are required in order to support 4 YMMs (_m256) in a
  83. homogeneous vector aggregate under vectorcall. [Kit] }
  84. MAX_PARA_CLASSES = 16;
  85. type
  86. tx64paraclasstype = (
  87. X86_64_NO_CLASS,
  88. X86_64_INTEGER_CLASS,X86_64_INTEGERSI_CLASS,
  89. X86_64_SSE_CLASS,X86_64_SSESF_CLASS,X86_64_SSEDF_CLASS,X86_64_SSEUP_CLASS,
  90. X86_64_X87_CLASS,X86_64_X87UP_CLASS,
  91. X86_64_COMPLEX_X87_CLASS,
  92. X86_64_MEMORY_CLASS
  93. );
  94. tx64paraclass = record
  95. def: tdef;
  96. typ: tx64paraclasstype;
  97. end;
  98. tx64paraclasses = array[0..MAX_PARA_CLASSES-1] of tx64paraclass;
  99. { Win64-specific helper }
  100. function aggregate_in_registers_win64(varspez:tvarspez;size:longint):boolean;
  101. begin
  102. { TODO: Temporary hack: vs_const parameters are always passed by reference for win64}
  103. result:=(varspez=vs_value) and (size in [1,2,4,8])
  104. end;
  105. (* x86-64 register passing implementation. See x86-64 ABI for details. Goal
  106. of this code is to classify each 8bytes of incoming argument by the register
  107. class and assign registers accordingly. *)
  108. function classify_representative_def(def1, def2: tdef): tdef;
  109. var
  110. def1size, def2size: asizeint;
  111. begin
  112. if not assigned(def1) then
  113. result:=def2
  114. else if not assigned(def2) then
  115. result:=def1
  116. else
  117. begin
  118. def1size:=def1.size;
  119. def2size:=def2.size;
  120. if def1size>def2size then
  121. result:=def1
  122. else if def2size>def1size then
  123. result:=def2
  124. else if def1.alignment>def2.alignment then
  125. result:=def1
  126. else
  127. result:=def2;
  128. end;
  129. end;
  130. (* Classify the argument of type TYPE and mode MODE.
  131. CLASSES will be filled by the register class used to pass each word
  132. of the operand. The number of words is returned. In case the parameter
  133. should be passed in memory, 0 is returned. As a special case for zero
  134. sized containers, classes[0] will be NO_CLASS and 1 is returned.
  135. real_size contains either def.size, or a value derived from
  136. def.bitpackedsize and the field offset denoting the number of bytes
  137. spanned by a bitpacked field
  138. See the x86-64 PS ABI for details.
  139. *)
  140. procedure classify_single_integer_class(def: tdef; size,real_size: aint; var cl: tx64paraclass; byte_offset: aint);
  141. begin
  142. if (byte_offset=0) and
  143. (real_size in [1,2,4,8]) and
  144. (not assigned(cl.def) or
  145. (def.alignment>=cl.def.alignment)) then
  146. cl.def:=def;
  147. if size<=4 then
  148. begin
  149. cl.typ:=X86_64_INTEGERSI_CLASS;
  150. { gcc/clang sign/zero-extend all values to 32 bits, except for
  151. _Bool (= Pascal boolean), which is only zero-extended to 8 bits
  152. as per the x86-64 ABI -> do the same
  153. some testing showed, that this is not true for 8 bit values:
  154. in case of an 8 bit value, it is not zero/sign extended }
  155. if not assigned(cl.def) or
  156. not(cl.def.typ=orddef) or
  157. not(torddef(cl.def).ordtype in [uchar,u8bit,s8bit,pasbool1]) then
  158. cl.def:=u32inttype;
  159. end
  160. else
  161. begin
  162. cl.typ:=X86_64_INTEGER_CLASS;
  163. if not assigned(cl.def) or
  164. (cl.def.size<size) or
  165. (not(cl.def.typ in [orddef,floatdef,pointerdef,classrefdef]) and
  166. not is_implicit_pointer_object_type(cl.def) and
  167. not is_dynamicstring(cl.def) and
  168. not is_dynamic_array(cl.def)) then
  169. cl.def:=u64inttype;
  170. end;
  171. end;
  172. function classify_as_integer_argument(def: tdef; real_size: aint; var classes: tx64paraclasses; byte_offset: aint): longint;
  173. var
  174. size: aint;
  175. begin
  176. size:=byte_offset+real_size;
  177. classify_single_integer_class(def,size,real_size,classes[0],byte_offset);
  178. if size<=8 then
  179. result:=1
  180. else
  181. begin
  182. classify_single_integer_class(def,size-8,real_size,classes[1],byte_offset-8);
  183. if size>16 then
  184. internalerror(2010021401);
  185. result:=2;
  186. end
  187. end;
  188. (* Return the union class of CLASS1 and CLASS2.
  189. See the x86-64 PS ABI for details. *)
  190. function merge_classes(class1, class2: tx64paraclass): tx64paraclass;
  191. begin
  192. (* Rule #1: If both classes are equal, this is the resulting class. *)
  193. if (class1.typ=class2.typ) then
  194. begin
  195. result.typ:=class1.typ;
  196. result.def:=classify_representative_def(class1.def,class2.def);
  197. exit;
  198. end;
  199. (* Rule #2: If one of the classes is NO_CLASS, the resulting class is
  200. the other class. *)
  201. if (class1.typ=X86_64_NO_CLASS) then
  202. exit(class2);
  203. if (class2.typ=X86_64_NO_CLASS) then
  204. exit(class1);
  205. (* Rule #3: If one of the classes is MEMORY, the result is MEMORY. *)
  206. if (class1.typ=X86_64_MEMORY_CLASS) then
  207. exit(class1)
  208. else if (class2.typ=X86_64_MEMORY_CLASS) then
  209. exit(class2);
  210. (* Rule #4: If one of the classes is INTEGER, the result is INTEGER. *)
  211. { 32 bit }
  212. if ((class1.typ=X86_64_INTEGERSI_CLASS) and
  213. (class2.typ=X86_64_SSESF_CLASS)) then
  214. exit(class1)
  215. else if ((class2.typ=X86_64_INTEGERSI_CLASS) and
  216. (class1.typ=X86_64_SSESF_CLASS)) then
  217. exit(class2);
  218. { 64 bit }
  219. if (class1.typ in [X86_64_INTEGER_CLASS,X86_64_INTEGERSI_CLASS]) then
  220. begin
  221. result:=class1;
  222. if result.def.size<8 then
  223. begin
  224. result.typ:=X86_64_INTEGER_CLASS;
  225. result.def:=s64inttype;
  226. end;
  227. exit
  228. end
  229. else if (class2.typ in [X86_64_INTEGER_CLASS,X86_64_INTEGERSI_CLASS]) then
  230. begin
  231. result:=class2;
  232. if result.def.size<8 then
  233. begin
  234. result.typ:=X86_64_INTEGER_CLASS;
  235. result.def:=s64inttype;
  236. end;
  237. exit
  238. end;
  239. (* Rule #5: If one of the classes is X87, X87UP, or COMPLEX_X87 class,
  240. MEMORY is used. *)
  241. if (class1.typ in [X86_64_X87_CLASS,X86_64_X87UP_CLASS,X86_64_COMPLEX_X87_CLASS]) then
  242. begin
  243. result:=class1;
  244. result.typ:=X86_64_MEMORY_CLASS;
  245. exit;
  246. end
  247. else if (class2.typ in [X86_64_X87_CLASS,X86_64_X87UP_CLASS,X86_64_COMPLEX_X87_CLASS]) then
  248. begin
  249. result:=class2;
  250. result.typ:=X86_64_MEMORY_CLASS;
  251. exit;
  252. end;
  253. (* Rule #6: Otherwise class SSE is used. *)
  254. if class1.def.size>class2.def.size then
  255. result:=class1
  256. else
  257. result:=class2;
  258. result.typ:=X86_64_SSE_CLASS;
  259. result.def:=s64floattype;
  260. end;
  261. function classify_argument(calloption: tproccalloption; def: tdef; parentdef: tdef; varspez: tvarspez; real_size: aint; var classes: tx64paraclasses; byte_offset: aint; round_to_8: Boolean): longint; forward;
  262. function init_aggregate_classification(calloption: tproccalloption; def: tdef; parentdef: tdef; varspez: tvarspez; byte_offset: aint; out words: longint; out classes: tx64paraclasses): longint;
  263. var
  264. i: longint;
  265. begin
  266. words:=0;
  267. { we'll be merging the classes elements with the subclasses
  268. elements, so initialise them first }
  269. for i:=low(classes) to high(classes) do
  270. begin
  271. classes[i].typ:=X86_64_NO_CLASS;
  272. classes[i].def:=nil;
  273. end;
  274. { win64 follows a different convention here }
  275. if x86_64_use_ms_abi(calloption) then
  276. begin
  277. if aggregate_in_registers_win64(varspez,def.size) then
  278. begin
  279. classes[0].typ:=X86_64_INTEGER_CLASS;
  280. classes[0].def:=def;
  281. result:=1;
  282. end
  283. else if (calloption = pocall_vectorcall) then
  284. begin
  285. words := (def.size+byte_offset mod 8+7) div 8;
  286. case words of
  287. 0:
  288. Exit(0);
  289. 1..4:
  290. { Aligned vector or array elements }
  291. Result := words;
  292. else
  293. if ((def.aggregatealignment mod (words shl 3)) = 0) or
  294. Assigned(parentdef) and ((parentdef.aggregatealignment mod 16) = 0)
  295. then
  296. begin
  297. { Field of aligned vector type }
  298. if words = 0 then
  299. begin
  300. classes[0].typ:=X86_64_NO_CLASS;
  301. classes[0].def:=def;
  302. Result := 1;
  303. end
  304. else
  305. Result := words;
  306. end
  307. else
  308. Result := 0;
  309. end;
  310. end
  311. else
  312. Result := 0;
  313. Exit;
  314. end;
  315. (* If the struct is larger than 32 bytes, pass it on the stack. *)
  316. if def.size > 32 then
  317. exit(0);
  318. { if a struct starts an offset not divisible by 8, it can span extra
  319. words }
  320. words:=(def.size+byte_offset mod 8+7) div 8;
  321. (* Zero sized arrays or structures are NO_CLASS. We return 0 to
  322. signal memory class, so handle it as special case. *)
  323. if (words=0) then
  324. begin
  325. classes[0].typ:=X86_64_NO_CLASS;
  326. classes[0].def:=def;
  327. exit(1);
  328. end;
  329. result:=words;
  330. end;
  331. function classify_aggregate_element(calloption: tproccalloption; def: tdef; parentdef: tdef; varspez: tvarspez; real_size: aint; var classes: tx64paraclasses; new_byte_offset: aint): longint;
  332. var
  333. subclasses: tx64paraclasses;
  334. i,
  335. pos: longint;
  336. begin
  337. fillchar(subclasses,sizeof(subclasses),0);
  338. result:=classify_argument(calloption,def,parentdef,varspez,real_size,subclasses,new_byte_offset, True);
  339. if (result=0) then
  340. exit;
  341. pos:=new_byte_offset div 8;
  342. if result-1+pos>high(classes) then
  343. internalerror(2010053108);
  344. for i:=0 to result-1 do
  345. begin
  346. classes[i+pos] :=
  347. merge_classes(subclasses[i],classes[i+pos]);
  348. end;
  349. inc(result,pos);
  350. end;
  351. function finalize_aggregate_classification(calloption: tproccalloption; def: tdef; words: longint; var classes: tx64paraclasses): longint;
  352. var
  353. i, vecsize, maxvecsize: longint;
  354. begin
  355. { Workaround: It's not immediately possible to determine if a Double is
  356. by itself or is part of an aligned vector. If the latter, correct the
  357. class definitions here. [Kit] }
  358. if (classes[0].typ = X86_64_SSEDF_CLASS) and (classes[1].typ = X86_64_SSEUP_CLASS) then
  359. classes[0].typ := X86_64_SSE_CLASS;
  360. if (words>2) then
  361. begin
  362. { When size > 16 bytes, if the first one isn't
  363. X86_64_SSE_CLASS or any other ones aren't
  364. X86_64_SSEUP_CLASS, everything should be passed in
  365. memory... }
  366. if (classes[0].typ<>X86_64_SSE_CLASS) then
  367. begin
  368. { ... except if the calling convention is 'vectorcall', then
  369. check to see if we don't have an HFA of 3 or 4 Doubles }
  370. if (calloption <> pocall_vectorcall) or (words > 4) then
  371. Exit(0);
  372. for i := 0 to words - 1 do
  373. if classes[i].typ <> X86_64_SSEDF_CLASS then
  374. Exit(0);
  375. Exit(words);
  376. end;
  377. if ((words shl 3) > def.aggregatealignment) then
  378. { The alignment is wrong for this vector size, hence it is unaligned }
  379. Exit(0);
  380. vecsize := 1;
  381. maxvecsize := words;
  382. for i:=1 to words-1 do
  383. if (classes[i].typ=X86_64_SSEUP_CLASS) then
  384. Inc(vecsize)
  385. else
  386. begin
  387. { Exceptional case. Check that we're not dealing an array of
  388. aligned vectors that is itself aligned to a stricter
  389. boundary (e.g. 4 XMM registers that can be merged into a
  390. single ZMM register). }
  391. if
  392. (classes[i].typ <> X86_64_SSE_CLASS) or { Easy case first - is it actually another SSE vector? }
  393. ((vecsize and (vecsize - 1)) <> 0) or { If vecsize is not a power of two, then it is definitely not a valid vector }
  394. (vecsize > maxvecsize) or ((maxvecsize < words) and (vecsize <> maxvecsize)) { Mixture of XMMs and YMMs, for example, is not valid }
  395. then
  396. Exit(0);
  397. classes[i].typ := X86_64_SSEUP_CLASS;
  398. maxvecsize := vecsize;
  399. vecsize := 1;
  400. end;
  401. if vecsize <> maxvecsize then
  402. { Last vector is of a different size }
  403. Exit(0);
  404. if vecsize > 2 then
  405. begin
  406. { Cannot use 256-bit and 512-bit vectors if we're not using AVX }
  407. if not UseAVX then
  408. Exit(0);
  409. { WARNING: There is currently no support for 256-bit and 512-bit
  410. aligned vectors, so if an aggregate contains more than two
  411. eightbyte words, it must be passed in memory. When 256-bit and
  412. 512-bit vectors are fully supported, remove the following
  413. line. [Kit] }
  414. Exit(0);
  415. end;
  416. end;
  417. (* Final merger cleanup. *)
  418. (* The first one must never be X86_64_SSEUP_CLASS or
  419. X86_64_X87UP_CLASS. *)
  420. if (classes[0].typ=X86_64_SSEUP_CLASS) or
  421. (classes[0].typ=X86_64_X87UP_CLASS) then
  422. internalerror(2010021402);
  423. for i:=0 to words-1 do
  424. begin
  425. (* If one class is MEMORY, everything should be passed in
  426. memory. *)
  427. if (classes[i].typ=X86_64_MEMORY_CLASS) then
  428. exit(0);
  429. (* The X86_64_SSEUP_CLASS should be always preceded by
  430. X86_64_SSE_CLASS or X86_64_SSEUP_CLASS. *)
  431. if (classes[i].typ=X86_64_SSEUP_CLASS) and
  432. (classes[i-1].typ<>X86_64_SSE_CLASS) and
  433. (classes[i-1].typ<>X86_64_SSEUP_CLASS) then
  434. begin
  435. classes[i].typ:=X86_64_SSE_CLASS;
  436. classes[i].def:=carraydef.getreusable_no_free(s32floattype,2);
  437. end;
  438. (* If X86_64_X87UP_CLASS isn't preceded by X86_64_X87_CLASS,
  439. everything should be passed in memory. *)
  440. if (classes[i].typ=X86_64_X87UP_CLASS) and
  441. (classes[i-1].typ<>X86_64_X87_CLASS) then
  442. exit(0);
  443. (* FPC addition: because we store an extended in 10 bytes, the
  444. X86_64_X87UP_CLASS can be replaced with e.g. INTEGER if an
  445. extended is followed by e.g. an array [0..5] of byte -> we also
  446. have to check whether each X86_64_X87_CLASS is followed by
  447. X86_64_X87UP_CLASS -- if not, pass in memory
  448. This cannot happen in the original ABI, because there
  449. sizeof(extended) = 16 and hence nothing can be merged with
  450. X86_64_X87UP_CLASS and change it into something else *)
  451. if (classes[i].typ=X86_64_X87_CLASS) and
  452. ((i=(words-1)) or
  453. (classes[i+1].typ<>X86_64_X87UP_CLASS)) then
  454. exit(0);
  455. end;
  456. {$ifndef llvm}
  457. { FIXME: in case a record contains empty padding space, e.g. a
  458. "single" field followed by a "double", then we have a problem
  459. because the cgpara helpers cannot figure out that they should
  460. skip 4 bytes after storing the single (LOC_MMREGISTER with size
  461. OS_F32) to memory before storing the double -> for now scale
  462. such locations always up to 64 bits, although this loads/stores
  463. some superfluous data }
  464. { 1) the first part is 32 bit while there is still a second part }
  465. if (classes[1].typ<>X86_64_NO_CLASS) then
  466. case classes[0].typ of
  467. X86_64_INTEGERSI_CLASS:
  468. begin
  469. classes[0].typ:=X86_64_INTEGER_CLASS;
  470. classes[0].def:=s64inttype;
  471. end;
  472. X86_64_SSESF_CLASS:
  473. begin
  474. classes[0].typ:=X86_64_SSE_CLASS;
  475. classes[0].def:=carraydef.getreusable_no_free(s32floattype,2);
  476. end;
  477. else
  478. ;
  479. end;
  480. { 2) the second part is 32 bit, but the total size is > 12 bytes }
  481. if (def.size>12) then
  482. case classes[1].typ of
  483. X86_64_INTEGERSI_CLASS:
  484. begin
  485. classes[1].typ:=X86_64_INTEGER_CLASS;
  486. classes[1].def:=s64inttype;
  487. end;
  488. X86_64_SSESF_CLASS:
  489. begin
  490. classes[1].typ:=X86_64_SSE_CLASS;
  491. classes[1].def:=carraydef.getreusable_no_free(s32floattype,2);
  492. end;
  493. else
  494. ;
  495. end;
  496. {$endif not llvm}
  497. result:=words;
  498. end;
  499. function try_build_homogeneous_aggregate(def: tdef; words: longint; var classes: tx64paraclasses): longint;
  500. var
  501. i, vecsize, maxvecsize, veccount: longint;
  502. {size, }byte_offset: aint;
  503. vs: TFieldVarSym;
  504. checkalignment: Boolean;
  505. begin
  506. if (words = 0) then
  507. { Should be at least 1 word at this point }
  508. InternalError(2018013100);
  509. case classes[0].typ of
  510. X86_64_SSESF_CLASS:
  511. begin
  512. { Should be an HFA of only a Single }
  513. for i := 1 to High(classes) do
  514. if classes[i].typ <> X86_64_NO_CLASS then
  515. Exit(0);
  516. result := 1;
  517. end;
  518. X86_64_SSEDF_CLASS:
  519. begin
  520. { Possibly an HFA of Doubles }
  521. if TAbstractRecordDef(def).symtable.symlist.count = 0 then
  522. Exit(0);
  523. { Get the information and position on the last entry }
  524. vs:=TFieldVarSym(TAbstractRecordDef(def).symtable.symlist[TAbstractRecordDef(def).symtable.symlist.count - 1]);
  525. //size:=vs.vardef.size;
  526. checkalignment:=true;
  527. if not TAbstractRecordSymtable(TAbstractRecordDef(def).symtable).is_packed then
  528. begin
  529. byte_offset:=vs.fieldoffset;
  530. //size:=vs.vardef.size;
  531. end
  532. else
  533. begin
  534. byte_offset:=vs.fieldoffset div 8;
  535. if (vs.vardef.typ in [orddef,enumdef]) then
  536. begin
  537. { calculate the number of bytes spanned by
  538. this bitpacked field }
  539. //size:=((vs.fieldoffset+vs.vardef.packedbitsize+7) div 8)-(vs.fieldoffset div 8);
  540. { our bitpacked fields are interpreted as always being
  541. aligned, because unlike in C we don't have char:1, int:1
  542. etc (so everything is basically a char:x) }
  543. checkalignment:=false;
  544. end
  545. else
  546. ;//size:=vs.vardef.size;
  547. end;
  548. { If [..] an object [..] contains unaligned fields, it has class
  549. MEMORY }
  550. if checkalignment and
  551. (align(byte_offset,vs.vardef.structalignment)<>byte_offset) then
  552. begin
  553. result:=0;
  554. exit;
  555. end;
  556. if words > 4 then
  557. { HFA too large }
  558. Exit(0);
  559. for i := 1 to words - 1 do
  560. if classes[i].typ <> X86_64_SSEDF_CLASS then
  561. Exit(0);
  562. result := words;
  563. end;
  564. X86_64_SSE_CLASS:
  565. begin
  566. { Determine the nature of the classes.
  567. - If the SSE is by itself, then it is an HFA consisting of 2 Singles.
  568. - If the SSE is followed by an SSESF, then it is an HFA consisting of 3 Singles.
  569. - If the SSE is followed by an SSE and nothing else, then it is an HFA consisting of 4 Singles.
  570. - If the SSE is followed by an SSE, but another class follows, then it is an HFA that is too large.
  571. - If the SSE is followed by an SSEUP, then it is an HVA of some kind.
  572. }
  573. case classes[1].typ of
  574. X86_64_NO_CLASS:
  575. begin
  576. for i := 2 to words - 1 do
  577. if classes[i].typ <> X86_64_NO_CLASS then
  578. { Compound type }
  579. Exit(0);
  580. { Split into 2 Singles again so they correctly fall into separate XMM registers }
  581. classes[0].typ := X86_64_SSESF_CLASS;
  582. classes[0].def := tdef(tarraydef(classes[0].def).elementdef); { Break up the array }
  583. classes[1].typ := X86_64_SSESF_CLASS;
  584. classes[1].def := classes[0].def;
  585. result := 2;
  586. end;
  587. X86_64_SSESF_CLASS:
  588. begin
  589. for i := 2 to words - 1 do
  590. if classes[i].typ <> X86_64_NO_CLASS then
  591. { Compound type }
  592. Exit(0);
  593. classes[2].typ := X86_64_SSESF_CLASS;
  594. classes[2].def := classes[1].def; { Transfer class 1 to class 2 }
  595. classes[0].typ := X86_64_SSESF_CLASS;
  596. classes[0].def := tdef(tarraydef(classes[0].def).elementdef); { Break up the array }
  597. classes[1].typ := X86_64_SSESF_CLASS;
  598. classes[1].def := classes[0].def;
  599. result := 3;
  600. end;
  601. X86_64_SSE_CLASS:
  602. begin
  603. for i := 2 to words - 1 do
  604. if classes[i].typ <> X86_64_NO_CLASS then
  605. { HFA too large (or not a true HFA) }
  606. Exit(0);
  607. classes[0].def := tdef(tarraydef(classes[0].def).elementdef); { Break up the arrays }
  608. classes[2].def := tdef(tarraydef(classes[1].def).elementdef);
  609. classes[1].def := classes[0].def;
  610. classes[3].def := classes[2].def;
  611. classes[0].typ := X86_64_SSESF_CLASS;
  612. classes[1].typ := X86_64_SSESF_CLASS;
  613. classes[2].typ := X86_64_SSESF_CLASS;
  614. classes[3].typ := X86_64_SSESF_CLASS;
  615. result := 4;
  616. end;
  617. X86_64_SSEUP_CLASS:
  618. begin
  619. { Determine vector size }
  620. veccount := 1;
  621. vecsize := 2;
  622. maxvecsize := words;
  623. for i := 2 to words - 1 do
  624. if (classes[i].typ=X86_64_SSEUP_CLASS) then
  625. Inc(vecsize)
  626. else
  627. begin
  628. if
  629. (classes[i].typ <> X86_64_SSE_CLASS) or { Easy case first - is it actually another SSE vector? }
  630. ((vecsize and (vecsize - 1)) <> 0) or { If vecsize is not a power of two, then it is definitely not a valid aggregate }
  631. (vecsize > maxvecsize) or ((maxvecsize < words) and (vecsize <> maxvecsize)) { Mixture of XMMs and YMMs, for example, is not valid }
  632. then
  633. Exit(0);
  634. Inc(veccount);
  635. maxvecsize := vecsize;
  636. vecsize := 1;
  637. end;
  638. if vecsize <> maxvecsize then
  639. { Last vector is of a different size }
  640. Exit(0);
  641. if veccount > 4 then
  642. { HVA too large }
  643. Exit(0);
  644. Result := words;
  645. end;
  646. else
  647. Exit(0);
  648. end;
  649. end;
  650. else
  651. Exit(0);
  652. end;
  653. end;
  654. function classify_record(calloption: tproccalloption; def: tdef; parentdef: tdef; varspez: tvarspez; var classes: tx64paraclasses; byte_offset: aint): longint;
  655. var
  656. vs: tfieldvarsym;
  657. size,
  658. new_byte_offset: aint;
  659. i,
  660. words,
  661. num: longint;
  662. checkalignment: boolean;
  663. begin
  664. result:=init_aggregate_classification(calloption,def,parentdef,varspez,byte_offset,words,classes);
  665. if (words=0) then
  666. exit;
  667. (* Merge the fields of the structure. *)
  668. for i:=0 to tabstractrecorddef(def).symtable.symlist.count-1 do
  669. begin
  670. if not is_normal_fieldvarsym(tsym(tabstractrecorddef(def).symtable.symlist[i])) then
  671. continue;
  672. vs:=tfieldvarsym(tabstractrecorddef(def).symtable.symlist[i]);
  673. checkalignment:=true;
  674. if not tabstractrecordsymtable(tabstractrecorddef(def).symtable).is_packed then
  675. begin
  676. new_byte_offset:=byte_offset+vs.fieldoffset;
  677. size:=vs.vardef.size;
  678. end
  679. else
  680. begin
  681. new_byte_offset:=byte_offset+vs.fieldoffset div 8;
  682. if (vs.vardef.typ in [orddef,enumdef]) then
  683. begin
  684. { calculate the number of bytes spanned by
  685. this bitpacked field }
  686. size:=((vs.fieldoffset+vs.vardef.packedbitsize+7) div 8)-(vs.fieldoffset div 8);
  687. { our bitpacked fields are interpreted as always being
  688. aligned, because unlike in C we don't have char:1, int:1
  689. etc (so everything is basically a char:x) }
  690. checkalignment:=false;
  691. end
  692. else
  693. size:=vs.vardef.size;
  694. end;
  695. { If [..] an object [..] contains unaligned fields, it has class
  696. MEMORY }
  697. if checkalignment and
  698. (align(new_byte_offset,vs.vardef.structalignment)<>new_byte_offset) then
  699. begin
  700. result:=0;
  701. exit;
  702. end;
  703. num:=classify_aggregate_element(calloption,vs.vardef,def,varspez,size,classes,new_byte_offset);
  704. if (num=0) then
  705. exit(0);
  706. end;
  707. result:=finalize_aggregate_classification(calloption,def,words,classes);
  708. { There is still one case where it might not have to be passed on the
  709. stack, and that's a homogeneous vector aggregate (HVA) or a
  710. homogeneous float aggregate (HFA) under vectorcall. }
  711. if (calloption = pocall_vectorcall) then
  712. begin
  713. if (result = 0) then
  714. result := try_build_homogeneous_aggregate(def,words,classes)
  715. else
  716. { If we're dealing with an HFA that has 3 or 4 Singles, pairs of
  717. Singles may be merged into a single SSE_CLASS, which must be
  718. split into separate SSESF_CLASS references for vectorcall; this
  719. is only performed in "try_build_homogeneous_aggregate" and not
  720. elsewhere, so accommodate for this exceptional case. [Kit] }
  721. if (result = 2) then
  722. begin
  723. num := try_build_homogeneous_aggregate(def,words,classes);
  724. if num <> 0 then
  725. { If it's equal to zero, just pass 2 and handle the record
  726. type normally }
  727. result := num;
  728. end;
  729. end;
  730. end;
  731. function classify_normal_array(calloption: tproccalloption; def: tarraydef; parentdef: tdef; varspez: tvarspez; var classes: tx64paraclasses; byte_offset: aint): longint;
  732. var
  733. i, elecount: aword;
  734. size,
  735. elesize,
  736. new_byte_offset,
  737. bitoffset: aint;
  738. words,
  739. num: longint;
  740. isbitpacked: boolean;
  741. begin
  742. size:=0;
  743. bitoffset:=0;
  744. result:=init_aggregate_classification(calloption,def,parentdef,varspez,byte_offset,words,classes);
  745. if (words=0) then
  746. exit;
  747. isbitpacked:=is_packed_array(def);
  748. if not isbitpacked then
  749. begin
  750. elesize:=def.elesize;
  751. size:=elesize;
  752. end
  753. else
  754. begin
  755. elesize:=def.elepackedbitsize;
  756. bitoffset:=0;
  757. end;
  758. (* Merge the elements of the array. *)
  759. i:=0;
  760. elecount:=def.elecount;
  761. repeat
  762. if not isbitpacked then
  763. begin
  764. { size does not change }
  765. new_byte_offset:=byte_offset+i*elesize;
  766. { If [..] an object [..] contains unaligned fields, it has class
  767. MEMORY }
  768. if align(new_byte_offset,def.alignment)<>new_byte_offset then
  769. begin
  770. result:=0;
  771. exit;
  772. end;
  773. end
  774. else
  775. begin
  776. { calculate the number of bytes spanned by this bitpacked
  777. element }
  778. size:=((bitoffset+elesize+7) div 8)-(bitoffset div 8);
  779. new_byte_offset:=byte_offset+(elesize*i) div 8;
  780. { bit offset of next element }
  781. inc(bitoffset,elesize);
  782. end;
  783. num:=classify_aggregate_element(calloption,def.elementdef,def,varspez,size,classes,new_byte_offset);
  784. if (num=0) then
  785. exit(0);
  786. inc(i);
  787. until (i=elecount);
  788. result:=finalize_aggregate_classification(calloption,def,words,classes);
  789. end;
  790. function classify_argument(calloption: tproccalloption; def: tdef; parentdef: tdef; varspez: tvarspez; real_size: aint; var classes: tx64paraclasses; byte_offset: aint; round_to_8: Boolean): longint;
  791. var
  792. rounded_offset: aint;
  793. begin
  794. if round_to_8 then
  795. rounded_offset := byte_offset mod 8
  796. else
  797. rounded_offset := byte_offset;
  798. case def.typ of
  799. orddef,
  800. enumdef,
  801. pointerdef,
  802. classrefdef:
  803. result:=classify_as_integer_argument(def,real_size,classes,rounded_offset);
  804. formaldef:
  805. result:=classify_as_integer_argument(voidpointertype,voidpointertype.size,classes,rounded_offset);
  806. floatdef:
  807. begin
  808. classes[0].def:=def;
  809. case tfloatdef(def).floattype of
  810. s32real:
  811. begin
  812. if (byte_offset mod 8) = 0 then { Check regardless of the round_to_8 flag }
  813. begin
  814. if Assigned(parentdef) and ((parentdef.aggregatealignment mod 16) = 0) and ((byte_offset mod parentdef.aggregatealignment) <> 0) then
  815. { Third element of an aligned vector }
  816. classes[0].typ:=X86_64_SSEUP_CLASS
  817. else
  818. classes[0].typ:=X86_64_SSESF_CLASS
  819. end
  820. else
  821. begin
  822. if Assigned(parentdef) and ((parentdef.aggregatealignment mod 16) = 0) then
  823. { Fourth element of an aligned vector }
  824. classes[0].typ:=X86_64_SSEUP_CLASS
  825. else
  826. { if we have e.g. a record with two successive "single"
  827. fields, we need a 64 bit rather than a 32 bit load }
  828. classes[0].typ:=X86_64_SSE_CLASS;
  829. classes[0].def:=carraydef.getreusable_no_free(s32floattype,2);
  830. end;
  831. result:=1;
  832. end;
  833. s64real:
  834. begin
  835. if Assigned(parentdef) and ((parentdef.aggregatealignment mod 16) = 0) and ((byte_offset mod parentdef.aggregatealignment) <> 0) then
  836. { Aligned vector of type double }
  837. classes[0].typ:=X86_64_SSEUP_CLASS
  838. else
  839. classes[0].typ:=X86_64_SSEDF_CLASS;
  840. result:=1;
  841. end;
  842. s80real,
  843. sc80real:
  844. begin
  845. classes[0].typ:=X86_64_X87_CLASS;
  846. classes[1].typ:=X86_64_X87UP_CLASS;
  847. classes[1].def:=def;
  848. result:=2;
  849. end;
  850. s64comp,
  851. s64currency:
  852. begin
  853. classes[0].typ:=X86_64_INTEGER_CLASS;
  854. result:=1;
  855. end;
  856. s128real:
  857. begin
  858. classes[0].typ:=X86_64_SSE_CLASS;
  859. classes[0].def:=carraydef.getreusable_no_free(s32floattype,2);
  860. classes[1].typ:=X86_64_SSEUP_CLASS;
  861. classes[1].def:=carraydef.getreusable_no_free(s32floattype,2);
  862. result:=2;
  863. end;
  864. end;
  865. end;
  866. recorddef:
  867. result:=classify_record(calloption,def,parentdef,varspez,classes,rounded_offset);
  868. objectdef:
  869. begin
  870. if is_object(def) then
  871. { pass by reference, like ppc and i386 }
  872. result:=0
  873. else
  874. { all kinds of pointer types: class, objcclass, interface, ... }
  875. result:=classify_as_integer_argument(def,voidpointertype.size,classes,rounded_offset);
  876. end;
  877. setdef:
  878. begin
  879. if is_smallset(def) then
  880. result:=classify_as_integer_argument(def,def.size,classes,rounded_offset)
  881. else
  882. result:=0;
  883. end;
  884. stringdef:
  885. begin
  886. if (tstringdef(def).stringtype in [st_shortstring,st_longstring]) then
  887. result:=0
  888. else
  889. result:=classify_as_integer_argument(def,def.size,classes,rounded_offset);
  890. end;
  891. arraydef:
  892. begin
  893. { a dynamic array is treated like a pointer }
  894. if is_dynamic_array(def) then
  895. result:=classify_as_integer_argument(def,voidpointertype.size,classes,rounded_offset)
  896. { other special arrays are passed on the stack }
  897. else if is_open_array(def) or
  898. is_array_of_const(def) then
  899. result:=0
  900. else
  901. { normal array }
  902. result:=classify_normal_array(calloption,tarraydef(def),parentdef,varspez,classes,rounded_offset);
  903. end;
  904. { the file record is definitely too big }
  905. filedef:
  906. result:=0;
  907. procvardef:
  908. begin
  909. if (po_methodpointer in tprocvardef(def).procoptions) then
  910. begin
  911. { treat as TMethod record }
  912. def:=search_system_type('TMETHOD').typedef;
  913. result:=classify_argument(calloption,def,parentdef,varspez,def.size,classes,rounded_offset, False);
  914. end
  915. else
  916. { pointer }
  917. result:=classify_as_integer_argument(def,def.size,classes,rounded_offset);
  918. end;
  919. variantdef:
  920. begin
  921. { same as tvardata record }
  922. def:=search_system_type('TVARDATA').typedef;
  923. result:=classify_argument(calloption,def,parentdef,varspez,def.size,classes,rounded_offset, False);
  924. end;
  925. undefineddef:
  926. { show shall we know?
  927. since classify_argument is called during parsing, see tw27685.pp,
  928. we handle undefineddef here }
  929. result:=0;
  930. errordef:
  931. { error message should have been thrown already before, so avoid only
  932. an internal error }
  933. result:=0;
  934. else
  935. internalerror(2010021405);
  936. end;
  937. end;
  938. { Returns the size of a single element in the aggregate, or the entire vector, if it is one of these types, 0 otherwise }
  939. function is_simd_vector_type_or_homogeneous_aggregate(calloption: tproccalloption; def: tdef; varspez: tvarspez): aint;
  940. var
  941. numclasses,i,vecsize,veccount,maxvecsize:longint;
  942. classes: tx64paraclasses;
  943. firstclass: tx64paraclasstype;
  944. begin
  945. for i := Low(classes) to High(classes) do
  946. begin
  947. classes[i].typ := X86_64_NO_CLASS;
  948. classes[i].def := nil;
  949. end;
  950. numclasses:=classify_argument(calloption,def,nil,vs_value,def.size,classes,0,False);
  951. if numclasses = 0 then
  952. Exit(0);
  953. firstclass := classes[0].typ;
  954. case firstclass of
  955. X86_64_SSESF_CLASS: { Only valid if the aggregate contains a lone Single }
  956. begin
  957. if (numclasses = 1) and (calloption = pocall_vectorcall) then
  958. Result := 4
  959. else
  960. Result := 0;
  961. Exit;
  962. end;
  963. X86_64_SSEDF_CLASS:
  964. begin
  965. if (numclasses > 1) and (calloption <> pocall_vectorcall) then
  966. Result := 0
  967. else
  968. begin
  969. for i := 1 to numclasses - 1 do
  970. if classes[i].typ <> X86_64_SSEDF_CLASS then
  971. begin
  972. Result := 0;
  973. Exit;
  974. end;
  975. if (def.size div 8) <> numclasses then
  976. { Wrong alignment or compound size }
  977. Result := 0
  978. else
  979. Result := 8;
  980. end;
  981. end;
  982. X86_64_SSE_CLASS:
  983. begin
  984. maxvecsize := numclasses * 2;
  985. if numclasses = 1 then
  986. begin
  987. { 2 Singles }
  988. if calloption = pocall_vectorcall then
  989. Result := 4
  990. else
  991. Result := 0;
  992. Exit;
  993. end;
  994. if classes[1].typ = X86_64_SSESF_CLASS then
  995. begin
  996. { 3 Singles }
  997. if numclasses <> 2 then
  998. Result := 0
  999. else
  1000. Result := 4;
  1001. Exit;
  1002. end;
  1003. vecsize := 2;
  1004. veccount := 1;
  1005. for i := 1 to numclasses - 1 do
  1006. case classes[i].typ of
  1007. X86_64_SSEUP_CLASS:
  1008. Inc(vecsize, 2);
  1009. X86_64_SSE_CLASS:
  1010. begin
  1011. if (maxvecsize < numclasses * 2) and (vecsize <> maxvecsize) then
  1012. { Different vector sizes }
  1013. Exit(0);
  1014. maxvecsize := vecsize;
  1015. vecsize := 2;
  1016. Inc(veccount);
  1017. end;
  1018. else
  1019. Exit(0);
  1020. end;
  1021. if vecsize <> maxvecsize then
  1022. { Last vector has to be the same size }
  1023. Exit(0);
  1024. { Either an HFA with 4 Singles, or an HVA with up to 4 vectors
  1025. (or a lone SIMD vector if veccount = 1) }
  1026. if (veccount < 4) then
  1027. begin
  1028. if (veccount > 1) and (calloption <> pocall_vectorcall) then
  1029. Result := 0
  1030. else
  1031. if vecsize = 2 then
  1032. { Packed, unaligned array of Singles }
  1033. Result := 4
  1034. else
  1035. Result := vecsize * 8
  1036. end
  1037. else
  1038. Result := 0;
  1039. end;
  1040. else
  1041. Exit(0);
  1042. end;
  1043. end;
  1044. procedure getvalueparaloc(calloption: tproccalloption;varspez:tvarspez;def:tdef;var classes: tx64paraclasses);
  1045. var
  1046. size: aint;
  1047. i: longint;
  1048. numclasses: longint;
  1049. begin
  1050. { init the classes array, because even if classify_argument inits only
  1051. one element we copy both to loc1/loc2 in case "1" is returned }
  1052. for i:=low(classes) to high(classes) do
  1053. begin
  1054. classes[i].typ:=X86_64_NO_CLASS;
  1055. classes[i].def:=nil;
  1056. end;
  1057. { def.size internalerrors for open arrays and dynamic arrays, since
  1058. their size cannot be determined at compile-time.
  1059. classify_argument does not look at the realsize argument for arrays
  1060. cases, but we obviously do have to pass something... }
  1061. if is_special_array(def) then
  1062. size:=-1
  1063. else
  1064. size:=def.size;
  1065. numclasses:=classify_argument(calloption,def,nil,varspez,size,classes,0,False);
  1066. case numclasses of
  1067. 0:
  1068. begin
  1069. classes[0].typ:=X86_64_MEMORY_CLASS;
  1070. classes[0].def:=def;
  1071. end;
  1072. 1..4:
  1073. begin
  1074. { If the class is X87, X87UP or COMPLEX_X87, it is passed in memory }
  1075. for i := 0 to numclasses - 1 do
  1076. begin
  1077. if classes[i].typ in [X86_64_X87_CLASS,X86_64_X87UP_CLASS,X86_64_COMPLEX_X87_CLASS] then
  1078. classes[i].typ:=X86_64_MEMORY_CLASS;
  1079. end;
  1080. end;
  1081. else
  1082. { 8 can happen for _m512 vectors, but are not yet supported }
  1083. internalerror(2010021501);
  1084. end;
  1085. end;
  1086. function tcpuparamanager.ret_in_param(def:tdef;pd:tabstractprocdef):boolean;
  1087. var
  1088. classes: tx64paraclasses;
  1089. numclasses: longint;
  1090. begin
  1091. if handle_common_ret_in_param(def,pd,result) then
  1092. exit;
  1093. fillchar(classes,sizeof(classes),0);
  1094. case def.typ of
  1095. { for records it depends on their contents and size }
  1096. recorddef,
  1097. { make sure we handle 'procedure of object' correctly }
  1098. procvardef:
  1099. begin
  1100. numclasses:=classify_argument(pd.proccalloption,def,nil,vs_value,def.size,classes,0,False);
  1101. result:=(numclasses=0);
  1102. end;
  1103. else
  1104. result:=inherited ret_in_param(def,pd);
  1105. end;
  1106. end;
  1107. function tcpuparamanager.param_use_paraloc(const cgpara:tcgpara):boolean;
  1108. var
  1109. paraloc : pcgparalocation;
  1110. begin
  1111. if not assigned(cgpara.location) then
  1112. internalerror(200410102);
  1113. result:=true;
  1114. { All locations are LOC_REFERENCE }
  1115. paraloc:=cgpara.location;
  1116. while assigned(paraloc) do
  1117. begin
  1118. if (paraloc^.loc<>LOC_REFERENCE) then
  1119. begin
  1120. result:=false;
  1121. exit;
  1122. end;
  1123. paraloc:=paraloc^.next;
  1124. end;
  1125. end;
  1126. { true if a parameter is too large to copy and only the address is pushed }
  1127. function tcpuparamanager.push_addr_param(varspez:tvarspez;def : tdef;calloption : tproccalloption) : boolean;
  1128. var
  1129. classes: tx64paraclasses;
  1130. numclasses: longint;
  1131. begin
  1132. fillchar(classes,sizeof(classes),0);
  1133. result:=false;
  1134. { var,out,constref always require address }
  1135. if varspez in [vs_var,vs_out,vs_constref] then
  1136. begin
  1137. result:=true;
  1138. exit;
  1139. end;
  1140. { Only vs_const, vs_value here }
  1141. case def.typ of
  1142. formaldef :
  1143. result:=true;
  1144. recorddef :
  1145. begin
  1146. { MetroWerks Pascal: const records always passed by reference
  1147. (for Mac OS X interfaces) }
  1148. if (calloption=pocall_mwpascal) and
  1149. (varspez=vs_const) then
  1150. result:=true
  1151. { Win ABI depends on size to pass it in a register or not }
  1152. else if x86_64_use_ms_abi(calloption) then
  1153. begin
  1154. if calloption = pocall_vectorcall then
  1155. begin
  1156. { "vectorcall" has the addition that it allows for aligned SSE types }
  1157. result :=
  1158. not aggregate_in_registers_win64(varspez,def.size) and
  1159. (is_simd_vector_type_or_homogeneous_aggregate(pocall_vectorcall,def,vs_value) = 0);
  1160. end
  1161. else
  1162. result:=not aggregate_in_registers_win64(varspez,def.size)
  1163. end
  1164. { pass constant parameters that would be passed via memory by
  1165. reference for non-cdecl/cppdecl, and make sure that the tmethod
  1166. record (size=16) is passed the same way as a complex procvar }
  1167. else if ((varspez=vs_const) and
  1168. not(calloption in cdecl_pocalls)) or
  1169. (def.size=16) then
  1170. begin
  1171. numclasses:=classify_argument(calloption,def,nil,vs_value,def.size,classes,0,False);
  1172. result:=numclasses=0;
  1173. end
  1174. else
  1175. { SysV ABI always passes it as value parameter }
  1176. result:=false;
  1177. end;
  1178. arraydef :
  1179. begin
  1180. { cdecl array of const need to be ignored and therefor be puhsed
  1181. as value parameter with length 0 }
  1182. if ((calloption in cdecl_pocalls) and
  1183. is_array_of_const(def)) or
  1184. is_dynamic_array(def) then
  1185. result:=false
  1186. else if (calloption = pocall_vectorcall) then
  1187. begin
  1188. { Pass all arrays by reference unless they are a valid, aligned SIMD type (arrays can't be homogeneous aggregates) }
  1189. result := (is_simd_vector_type_or_homogeneous_aggregate(pocall_vectorcall,def,vs_value) = 0);
  1190. end
  1191. else
  1192. { pass all arrays by reference to be compatible with C (passing
  1193. an array by value (= copying it on the stack) does not exist,
  1194. because an array is the same as a pointer there }
  1195. result:=true
  1196. end;
  1197. objectdef :
  1198. begin
  1199. { don't treat objects like records, because we only know wheter
  1200. or not they'll have a VMT after the entire object is parsed
  1201. -> if they are used as function result from one of their own
  1202. methods, their size can still change after we've determined
  1203. whether this function result should be returned by reference or
  1204. by value }
  1205. if is_object(def) then
  1206. result:=true;
  1207. end;
  1208. variantdef,
  1209. stringdef,
  1210. procvardef,
  1211. setdef :
  1212. begin
  1213. numclasses:=classify_argument(calloption,def,nil,vs_value,def.size,classes,0,False);
  1214. result:=numclasses=0;
  1215. end;
  1216. else
  1217. ;
  1218. end;
  1219. end;
  1220. function tcpuparamanager.get_volatile_registers_int(calloption : tproccalloption):tcpuregisterset;
  1221. begin
  1222. if x86_64_use_ms_abi(calloption) then
  1223. result:=[RS_RAX,RS_RCX,RS_RDX,RS_R8,RS_R9,RS_R10,RS_R11]
  1224. else
  1225. result:=[RS_RAX,RS_RCX,RS_RDX,RS_RSI,RS_RDI,RS_R8,RS_R9,RS_R10,RS_R11];
  1226. end;
  1227. function tcpuparamanager.get_volatile_registers_mm(calloption : tproccalloption):tcpuregisterset;
  1228. begin
  1229. if x86_64_use_ms_abi(calloption) then
  1230. result:=[RS_XMM0..RS_XMM5,RS_XMM16..RS_XMM31]
  1231. else
  1232. result:=[RS_XMM0..RS_XMM15,RS_XMM16..RS_XMM31];
  1233. end;
  1234. function tcpuparamanager.get_volatile_registers_fpu(calloption : tproccalloption):tcpuregisterset;
  1235. begin
  1236. result:=[RS_ST0..RS_ST7];
  1237. end;
  1238. function tcpuparamanager.get_saved_registers_int(calloption : tproccalloption):tcpuregisterarray;
  1239. const
  1240. win64_saved_std_regs : {$ifndef VER3_0}tcpuregisterarray{$else}array[0..7] of tsuperregister{$endif} = (RS_RBX,RS_RDI,RS_RSI,RS_R12,RS_R13,RS_R14,RS_R15,RS_RBP);
  1241. others_saved_std_regs : {$ifndef VER3_0}tcpuregisterarray{$else}array[0..4] of tsuperregister{$endif} = (RS_RBX,RS_R12,RS_R13,RS_R14,RS_R15);
  1242. begin
  1243. if tcgx86_64(cg).use_ms_abi then
  1244. result:=win64_saved_std_regs
  1245. else
  1246. result:=others_saved_std_regs;
  1247. end;
  1248. function tcpuparamanager.get_saved_registers_mm(calloption: tproccalloption):tcpuregisterarray;
  1249. const
  1250. win64_saved_xmm_regs : {$ifndef VER3_0}tcpuregisterarray{$else}array[0..9] of tsuperregister{$endif} = (RS_XMM6,RS_XMM7,
  1251. RS_XMM8,RS_XMM9,RS_XMM10,RS_XMM11,RS_XMM12,RS_XMM13,RS_XMM14,RS_XMM15);
  1252. begin
  1253. if tcgx86_64(cg).use_ms_abi then
  1254. result:=win64_saved_xmm_regs
  1255. else
  1256. SetLength(result,0);
  1257. end;
  1258. function tcpuparamanager.get_funcretloc(p : tabstractprocdef; side: tcallercallee; forcetempdef: tdef): tcgpara;
  1259. const
  1260. intretregs: array[0..1] of tregister = (NR_FUNCTION_RETURN_REG,NR_FUNCTION_RETURN_REG_HIGH);
  1261. mmretregs: array[0..1] of tregister = (NR_MM_RESULT_REG,NR_MM_RESULT_REG_HIGH);
  1262. mmretregs_vectorcall: array[0..3] of tregister = (NR_XMM0,NR_XMM1,NR_XMM2,NR_XMM3);
  1263. var
  1264. classes: tx64paraclasses;
  1265. i,j,
  1266. numclasses: longint;
  1267. intretregidx,
  1268. mmretregidx: longint;
  1269. retcgsize : tcgsize;
  1270. paraloc : pcgparalocation;
  1271. begin
  1272. if set_common_funcretloc_info(p,forcetempdef,retcgsize,result) then
  1273. exit;
  1274. { Return in FPU register? -> don't use classify_argument(), because
  1275. currency and comp need special treatment here (they are integer class
  1276. when passing as parameter, but LOC_FPUREGISTER as function result) }
  1277. if result.def.typ=floatdef then
  1278. begin
  1279. paraloc:=result.add_location;
  1280. paraloc^.def:=result.def;
  1281. case tfloatdef(result.def).floattype of
  1282. s32real:
  1283. begin
  1284. paraloc^.loc:=LOC_MMREGISTER;
  1285. paraloc^.register:=newreg(R_MMREGISTER,RS_MM_RESULT_REG,R_SUBMMS);
  1286. paraloc^.size:=OS_F32;
  1287. end;
  1288. s64real:
  1289. begin
  1290. paraloc^.loc:=LOC_MMREGISTER;
  1291. paraloc^.register:=newreg(R_MMREGISTER,RS_MM_RESULT_REG,R_SUBMMD);
  1292. paraloc^.size:=OS_F64;
  1293. end;
  1294. { the first two only exist on targets with an x87, on others
  1295. they are replace by int64 }
  1296. s64currency,
  1297. s64comp,
  1298. s80real,
  1299. sc80real:
  1300. begin
  1301. paraloc^.loc:=LOC_FPUREGISTER;
  1302. paraloc^.register:=NR_FPU_RESULT_REG;
  1303. paraloc^.size:=retcgsize;
  1304. end;
  1305. else
  1306. internalerror(200405034);
  1307. end;
  1308. end
  1309. else
  1310. { Return in register }
  1311. begin
  1312. fillchar(classes,sizeof(classes),0);
  1313. numclasses:=classify_argument(p.proccalloption,result.def,nil,vs_value,result.def.size,classes,0,False);
  1314. { this would mean a memory return }
  1315. if (numclasses=0) then
  1316. begin
  1317. { we got an error before, so we just skip all the return type generation }
  1318. if result.def.typ=errordef then
  1319. exit;
  1320. internalerror(2010021502);
  1321. end;
  1322. if (numclasses > MAX_PARA_CLASSES) then
  1323. internalerror(2010021503);
  1324. intretregidx:=0;
  1325. mmretregidx:=0;
  1326. i := 0;
  1327. { We can't use a for-loop here because the treatment of the SSEUP class requires skipping over i's }
  1328. while i < numclasses do
  1329. begin
  1330. paraloc:=result.add_location;
  1331. paraloc^.def:=classes[i].def;
  1332. case classes[i].typ of
  1333. X86_64_INTEGERSI_CLASS,
  1334. X86_64_INTEGER_CLASS:
  1335. begin
  1336. paraloc^.loc:=LOC_REGISTER;
  1337. paraloc^.register:=intretregs[intretregidx];
  1338. if classes[i].typ=X86_64_INTEGER_CLASS then
  1339. begin
  1340. paraloc^.size:=OS_64;
  1341. if paraloc^.def.size<>8 then
  1342. paraloc^.def:=u64inttype;
  1343. end
  1344. else if result.intsize in [1,2,4] then
  1345. begin
  1346. paraloc^.size:=def_cgsize(paraloc^.def);
  1347. end
  1348. else
  1349. begin
  1350. paraloc^.size:=OS_32;
  1351. if paraloc^.def.size<>4 then
  1352. paraloc^.def:=u32inttype;
  1353. end;
  1354. setsubreg(paraloc^.register,cgsize2subreg(R_INTREGISTER,paraloc^.size));
  1355. inc(intretregidx);
  1356. end;
  1357. X86_64_SSE_CLASS,
  1358. X86_64_SSEUP_CLASS,
  1359. X86_64_SSESF_CLASS,
  1360. X86_64_SSEDF_CLASS:
  1361. begin
  1362. paraloc^.loc:=LOC_MMREGISTER;
  1363. if p.proccalloption = pocall_vectorcall then
  1364. paraloc^.register:=mmretregs_vectorcall[mmretregidx]
  1365. else
  1366. paraloc^.register:=mmretregs[mmretregidx];
  1367. case classes[i].typ of
  1368. X86_64_SSESF_CLASS:
  1369. begin
  1370. setsubreg(paraloc^.register,R_SUBMMS);
  1371. paraloc^.size:=OS_F32;
  1372. end;
  1373. X86_64_SSEDF_CLASS:
  1374. begin
  1375. setsubreg(paraloc^.register,R_SUBMMD);
  1376. paraloc^.size:=OS_F64;
  1377. end;
  1378. X86_64_SSE_CLASS:
  1379. begin
  1380. j := 1;
  1381. if not (x86_64_use_ms_abi(p.proccalloption) and (p.proccalloption <> pocall_vectorcall)) then
  1382. while i + j <= numclasses do
  1383. begin
  1384. if classes[i+j].typ <> X86_64_SSEUP_CLASS then
  1385. Break;
  1386. Inc(j);
  1387. end;
  1388. { j = MM word count }
  1389. Inc(i, j - 1);
  1390. case j of
  1391. 1:
  1392. begin
  1393. setsubreg(paraloc^.register,R_SUBQ);
  1394. paraloc^.size:=OS_M64;
  1395. end;
  1396. 2:
  1397. begin
  1398. setsubreg(paraloc^.register,R_SUBMMX);
  1399. paraloc^.size:=OS_M128;
  1400. end;
  1401. 4:
  1402. begin
  1403. setsubreg(paraloc^.register,R_SUBMMY);
  1404. paraloc^.size:=OS_M256; { Currently unsupported }
  1405. end;
  1406. 8:
  1407. begin
  1408. setsubreg(paraloc^.register,R_SUBMMZ);
  1409. paraloc^.size:=OS_M512; { Currently unsupported }
  1410. end;
  1411. else
  1412. InternalError(2018012901);
  1413. end;
  1414. paraloc^.def:=carraydef.getreusable_no_free_vector(paraloc^.def,j);
  1415. end;
  1416. else
  1417. if (x86_64_use_ms_abi(p.proccalloption) and (p.proccalloption <> pocall_vectorcall)) then
  1418. begin
  1419. setsubreg(paraloc^.register,R_SUBQ);
  1420. paraloc^.size:=OS_M64;
  1421. end
  1422. else
  1423. { Should not get here }
  1424. InternalError(2018012900);
  1425. end;
  1426. inc(mmretregidx);
  1427. end;
  1428. X86_64_X87_CLASS:
  1429. begin
  1430. { must be followed by X86_64_X87UP_CLASS and that must be
  1431. the last class }
  1432. if (i<>(numclasses-2)) or
  1433. (classes[i+1].typ<>X86_64_X87UP_CLASS) then
  1434. internalerror(2014110401);
  1435. paraloc^.loc:=LOC_FPUREGISTER;
  1436. paraloc^.register:=NR_FPU_RESULT_REG;
  1437. paraloc^.size:=OS_F80;
  1438. break;
  1439. end;
  1440. X86_64_NO_CLASS:
  1441. begin
  1442. { empty record/array }
  1443. if (i<>0) or
  1444. (numclasses<>1) then
  1445. internalerror(2010060302);
  1446. paraloc^.loc:=LOC_VOID;
  1447. paraloc^.def:=voidtype;
  1448. end;
  1449. else
  1450. internalerror(2010021504);
  1451. end;
  1452. Inc(i);
  1453. end;
  1454. end;
  1455. end;
  1456. procedure tcpuparamanager.create_paraloc_info_intern(p : tabstractprocdef; side: tcallercallee;paras:tparalist;
  1457. var intparareg,mmparareg,parasize:longint;varargsparas: boolean);
  1458. var
  1459. hp : tparavarsym;
  1460. fdef,
  1461. paradef : tdef;
  1462. paraloc : pcgparalocation;
  1463. subreg : tsubregister;
  1464. pushaddr : boolean;
  1465. paracgsize : tcgsize;
  1466. { loc[2] onwards are only used for _m256 under vectorcall/SysV, and
  1467. homogeneous vector aggregates and homogeneous float aggreates under
  1468. the vectorcall calling convention. [Kit] }
  1469. loc : tx64paraclasses;
  1470. needintloc,
  1471. needmmloc,
  1472. paralen,
  1473. locidx,
  1474. i,j,
  1475. varalign,
  1476. paraalign : longint;
  1477. use_ms_abi : boolean;
  1478. begin
  1479. paraalign:=get_para_align(p.proccalloption);
  1480. use_ms_abi:=x86_64_use_ms_abi(p.proccalloption);
  1481. { Register parameters are assigned from left to right }
  1482. for i:=0 to paras.count-1 do
  1483. begin
  1484. hp:=tparavarsym(paras[i]);
  1485. paradef:=hp.vardef;
  1486. { on win64, if a record has only one field and that field is a
  1487. single or double, it has to be handled like a single/double }
  1488. if use_ms_abi and
  1489. ((paradef.typ=recorddef) {or
  1490. is_object(paradef)}) and
  1491. tabstractrecordsymtable(tabstractrecorddef(paradef).symtable).has_single_field(fdef) and
  1492. (fdef.typ=floatdef) and
  1493. (tfloatdef(fdef).floattype in [s32real,s64real]) then
  1494. paradef:=fdef;
  1495. pushaddr:=push_addr_param(hp.varspez,paradef,p.proccalloption);
  1496. if pushaddr then
  1497. begin
  1498. loc[0].typ:=X86_64_INTEGER_CLASS;
  1499. loc[1].typ:=X86_64_NO_CLASS;
  1500. paracgsize:=OS_ADDR;
  1501. paralen:=sizeof(pint);
  1502. paradef:=cpointerdef.getreusable_no_free(paradef);
  1503. loc[0].def:=paradef;
  1504. loc[1].def:=nil;
  1505. for j:=2 to high(loc) do
  1506. begin
  1507. loc[j].typ:=X86_64_NO_CLASS;
  1508. loc[j].def:=nil;
  1509. end;
  1510. end
  1511. else
  1512. begin
  1513. getvalueparaloc(p.proccalloption,hp.varspez,paradef,loc);
  1514. paralen:=push_size(hp.varspez,paradef,p.proccalloption);
  1515. if p.proccalloption = pocall_vectorcall then
  1516. begin
  1517. { TODO: Can this set of instructions be put into 'defutil' without it relying on the argument classification? [Kit] }
  1518. { The SIMD vector types have to be OS_M128 etc., not OS_128 etc.}
  1519. case is_simd_vector_type_or_homogeneous_aggregate(pocall_vectorcall,paradef,vs_value) of
  1520. 0:
  1521. { Not a vector or valid aggregate }
  1522. paracgsize:=def_cgsize(paradef);
  1523. 4:
  1524. paracgsize:=OS_F32;
  1525. 8:
  1526. paracgsize:=OS_F64;
  1527. 16:
  1528. paracgsize:=OS_M128;
  1529. 32:
  1530. paracgsize:=OS_M256;
  1531. 64:
  1532. paracgsize:=OS_M512;
  1533. else
  1534. InternalError(2018012910);
  1535. end;
  1536. end
  1537. else
  1538. paracgsize:=def_cgsize(paradef);
  1539. end;
  1540. { cheat for now, we should copy the value to an mm reg as well (FK) }
  1541. if varargsparas and
  1542. use_ms_abi and
  1543. (paradef.typ = floatdef) then
  1544. begin
  1545. loc[1].typ:=X86_64_NO_CLASS;
  1546. if paracgsize=OS_F64 then
  1547. begin
  1548. loc[0].typ:=X86_64_INTEGER_CLASS;
  1549. paracgsize:=OS_64;
  1550. paradef:=u64inttype;
  1551. end
  1552. else
  1553. begin
  1554. loc[0].typ:=X86_64_INTEGERSI_CLASS;
  1555. paracgsize:=OS_32;
  1556. paradef:=u32inttype;
  1557. end;
  1558. loc[0].def:=paradef;
  1559. end;
  1560. hp.paraloc[side].reset;
  1561. hp.paraloc[side].size:=paracgsize;
  1562. hp.paraloc[side].intsize:=paralen;
  1563. hp.paraloc[side].Alignment:=paraalign;
  1564. hp.paraloc[side].def:=paradef;
  1565. if paralen>0 then
  1566. begin
  1567. { Enough registers free? }
  1568. needintloc:=0;
  1569. needmmloc:=0;
  1570. for locidx:=low(loc) to high(loc) do
  1571. case loc[locidx].typ of
  1572. X86_64_INTEGER_CLASS,
  1573. X86_64_INTEGERSI_CLASS:
  1574. inc(needintloc);
  1575. { Note, do NOT include X86_64_SSEUP_CLASS because this links with
  1576. X86_64_SSE_CLASS and we only need one register, not two. [Kit] }
  1577. X86_64_SSE_CLASS,
  1578. X86_64_SSESF_CLASS,
  1579. X86_64_SSEDF_CLASS:
  1580. inc(needmmloc);
  1581. else
  1582. ;
  1583. end;
  1584. { the "-1" is because we can also use the current register }
  1585. if (use_ms_abi and
  1586. ((intparareg+needintloc-1 > high(paraintsupregs_winx64)) or
  1587. ((p.proccalloption = pocall_vectorcall) and (mmparareg+needmmloc-1 > high(parammsupregs_vectorcall))) or
  1588. ((p.proccalloption <> pocall_vectorcall) and (mmparareg+needmmloc-1 > high(parammsupregs_winx64))))) or
  1589. (not use_ms_abi and
  1590. ((intparareg+needintloc-1 > high(paraintsupregs)) or
  1591. (mmparareg+needmmloc-1 > high(parammsupregs)))) then
  1592. begin
  1593. { If there are no registers available for any
  1594. eightbyte of an argument, the whole argument is
  1595. passed on the stack. }
  1596. loc[low(loc)].typ:=X86_64_MEMORY_CLASS;
  1597. loc[low(loc)].def:=paradef;
  1598. for locidx:=succ(low(loc)) to high(loc) do
  1599. loc[locidx].typ:=X86_64_NO_CLASS;
  1600. end;
  1601. locidx:=0;
  1602. while (paralen>0) and
  1603. (locidx<=high(loc)) and
  1604. (loc[locidx].typ<>X86_64_NO_CLASS) do
  1605. begin
  1606. { Allocate }
  1607. case loc[locidx].typ of
  1608. X86_64_INTEGER_CLASS,
  1609. X86_64_INTEGERSI_CLASS:
  1610. begin
  1611. paraloc:=hp.paraloc[side].add_location;
  1612. paraloc^.loc:=LOC_REGISTER;
  1613. paraloc^.def:=loc[locidx].def;
  1614. if (paracgsize=OS_NO) or ((locidx<high(loc)) and (loc[locidx+1].typ<>X86_64_NO_CLASS)) then
  1615. begin
  1616. if loc[locidx].typ=X86_64_INTEGER_CLASS then
  1617. begin
  1618. paraloc^.size:=OS_INT;
  1619. paraloc^.def:=u64inttype;
  1620. subreg:=R_SUBWHOLE;
  1621. end
  1622. else
  1623. begin
  1624. paraloc^.size:=OS_32;
  1625. paraloc^.def:=u32inttype;
  1626. subreg:=R_SUBD;
  1627. end;
  1628. end
  1629. else
  1630. begin
  1631. paraloc^.size:=def_cgsize(paraloc^.def);
  1632. { s64comp is pushed in an int register }
  1633. if paraloc^.size=OS_C64 then
  1634. begin
  1635. paraloc^.size:=OS_64;
  1636. paraloc^.def:=u64inttype;
  1637. end;
  1638. subreg:=cgsize2subreg(R_INTREGISTER,paraloc^.size);
  1639. end;
  1640. { winx64 uses different registers }
  1641. if use_ms_abi then
  1642. paraloc^.register:=newreg(R_INTREGISTER,paraintsupregs_winx64[intparareg],subreg)
  1643. else
  1644. paraloc^.register:=newreg(R_INTREGISTER,paraintsupregs[intparareg],subreg);
  1645. { matching mm register must be skipped }
  1646. if use_ms_abi then
  1647. inc(mmparareg);
  1648. inc(intparareg);
  1649. dec(paralen,tcgsize2size[paraloc^.size]);
  1650. end;
  1651. X86_64_SSE_CLASS,
  1652. X86_64_SSESF_CLASS,
  1653. X86_64_SSEDF_CLASS:
  1654. begin
  1655. paraloc:=hp.paraloc[side].add_location;
  1656. paraloc^.loc:=LOC_MMREGISTER;
  1657. paraloc^.def:=loc[locidx].def;
  1658. case loc[locidx].typ of
  1659. X86_64_SSESF_CLASS:
  1660. begin
  1661. subreg:=R_SUBMMS;
  1662. paraloc^.size:=OS_F32;
  1663. end;
  1664. X86_64_SSEDF_CLASS:
  1665. begin
  1666. subreg:=R_SUBMMD;
  1667. paraloc^.size:=OS_F64;
  1668. end;
  1669. X86_64_SSE_CLASS:
  1670. begin
  1671. subreg:=R_SUBQ;
  1672. paraloc^.size:=OS_M64;
  1673. j := 1;
  1674. if not (use_ms_abi and (p.proccalloption <> pocall_vectorcall)) then
  1675. while locidx + j <= high(loc) do
  1676. begin
  1677. if loc[locidx+j].typ <> X86_64_SSEUP_CLASS then
  1678. Break;
  1679. Inc(j);
  1680. end;
  1681. { j = MM word count }
  1682. Inc(locidx, j - 1);
  1683. case j of
  1684. 1:
  1685. begin
  1686. subreg:=R_SUBQ;
  1687. paraloc^.size:=OS_M64;
  1688. end;
  1689. 2:
  1690. begin
  1691. subreg:=R_SUBMMX;
  1692. paraloc^.size:=OS_M128;
  1693. end;
  1694. 4:
  1695. begin
  1696. subreg:=R_SUBMMY;
  1697. paraloc^.size:=OS_M256; { Currently unsupported }
  1698. end;
  1699. 8:
  1700. begin
  1701. subreg:=R_SUBMMZ;
  1702. paraloc^.size:=OS_M512; { Currently unsupported }
  1703. end;
  1704. else
  1705. InternalError(2018012903);
  1706. end;
  1707. paraloc^.def:=carraydef.getreusable_no_free_vector(paraloc^.def,j);
  1708. end;
  1709. else
  1710. if (use_ms_abi and (p.proccalloption <> pocall_vectorcall)) then
  1711. begin
  1712. subreg:=R_SUBQ;
  1713. paraloc^.size:=OS_M64;
  1714. end
  1715. else
  1716. { Should not get here }
  1717. InternalError(2018012902);
  1718. end;
  1719. { winx64 uses different registers }
  1720. if use_ms_abi then
  1721. begin
  1722. if p.proccalloption = pocall_vectorcall then
  1723. paraloc^.register:=newreg(R_MMREGISTER,parammsupregs_vectorcall[mmparareg],subreg)
  1724. else
  1725. paraloc^.register:=newreg(R_MMREGISTER,parammsupregs_winx64[mmparareg],subreg);
  1726. end
  1727. else
  1728. paraloc^.register:=newreg(R_MMREGISTER,parammsupregs[mmparareg],subreg);
  1729. { matching int register must be skipped }
  1730. if use_ms_abi then
  1731. inc(intparareg);
  1732. inc(mmparareg);
  1733. dec(paralen,tcgsize2size[paraloc^.size]);
  1734. end;
  1735. X86_64_MEMORY_CLASS :
  1736. begin
  1737. paraloc:=hp.paraloc[side].add_location;
  1738. paraloc^.loc:=LOC_REFERENCE;
  1739. paraloc^.def:=loc[locidx].def;
  1740. {Hack alert!!! We should modify int_cgsize to handle OS_128,
  1741. however, since int_cgsize is called in many places in the
  1742. compiler where only a few can already handle OS_128, fixing it
  1743. properly is out of the question to release 2.2.0 in time. (DM)}
  1744. if paracgsize=OS_128 then
  1745. if paralen=8 then
  1746. paraloc^.size:=OS_64
  1747. else if paralen=16 then
  1748. paraloc^.size:=OS_128
  1749. else
  1750. internalerror(200707143)
  1751. else if paracgsize in [OS_F32,OS_F64,OS_F80,OS_F128] then
  1752. paraloc^.size:=int_float_cgsize(paralen)
  1753. else
  1754. paraloc^.size:=int_cgsize(paralen);
  1755. if side=callerside then
  1756. paraloc^.reference.index:=NR_STACK_POINTER_REG
  1757. else
  1758. paraloc^.reference.index:=NR_FRAME_POINTER_REG;
  1759. varalign:=used_align(size_2_align(paralen),paraalign,paraalign);
  1760. paraloc^.reference.offset:=parasize;
  1761. parasize:=align(parasize+paralen,varalign);
  1762. paralen:=0;
  1763. end;
  1764. else
  1765. internalerror(2010053113);
  1766. end;
  1767. inc(locidx);
  1768. end;
  1769. end
  1770. else
  1771. begin
  1772. paraloc:=hp.paraloc[side].add_location;
  1773. paraloc^.loc:=LOC_VOID;
  1774. paraloc^.def:=paradef;
  1775. end;
  1776. end;
  1777. { Register parameters are assigned from left-to-right, but the
  1778. offsets on the stack are right-to-left. There is no need
  1779. to reverse the offset, only adapt the calleeside with the
  1780. start offset of the first param on the stack }
  1781. if side=calleeside then
  1782. begin
  1783. for i:=0 to paras.count-1 do
  1784. begin
  1785. hp:=tparavarsym(paras[i]);
  1786. paraloc:=hp.paraloc[side].location;
  1787. while paraloc<>nil do
  1788. begin
  1789. with paraloc^ do
  1790. if (loc=LOC_REFERENCE) then
  1791. inc(reference.offset,target_info.first_parm_offset);
  1792. paraloc:=paraloc^.next;
  1793. end;
  1794. end;
  1795. end;
  1796. end;
  1797. function tcpuparamanager.create_varargs_paraloc_info(p : tabstractprocdef; side: tcallercallee; varargspara:tvarargsparalist):longint;
  1798. var
  1799. intparareg,mmparareg,
  1800. parasize : longint;
  1801. begin
  1802. intparareg:=0;
  1803. mmparareg:=0;
  1804. if x86_64_use_ms_abi(p.proccalloption) then
  1805. parasize:=4*8
  1806. else
  1807. parasize:=0;
  1808. { calculate the registers for the normal parameters }
  1809. create_paraloc_info_intern(p,side,p.paras,intparareg,mmparareg,parasize,false);
  1810. { append the varargs }
  1811. if assigned(varargspara) then
  1812. begin
  1813. if side=callerside then
  1814. create_paraloc_info_intern(p,side,varargspara,intparareg,mmparareg,parasize,true)
  1815. else
  1816. internalerror(2019021917);
  1817. { store used no. of SSE registers, that needs to be passed in %AL }
  1818. varargspara.mmregsused:=mmparareg;
  1819. end;
  1820. create_funcretloc_info(p,side);
  1821. result:=parasize;
  1822. end;
  1823. function tcpuparamanager.create_paraloc_info(p : tabstractprocdef; side: tcallercallee):longint;
  1824. var
  1825. intparareg,mmparareg,
  1826. parasize : longint;
  1827. begin
  1828. intparareg:=0;
  1829. mmparareg:=0;
  1830. if x86_64_use_ms_abi(p.proccalloption) then
  1831. parasize:=4*8
  1832. else
  1833. parasize:=0;
  1834. create_paraloc_info_intern(p,side,p.paras,intparareg,mmparareg,parasize,false);
  1835. { Create Function result paraloc }
  1836. create_funcretloc_info(p,side);
  1837. { We need to return the size allocated on the stack }
  1838. result:=parasize;
  1839. end;
  1840. begin
  1841. paramanager:=tcpuparamanager.create;
  1842. end.