cpupara.pas 77 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546154715481549155015511552155315541555155615571558155915601561156215631564156515661567156815691570157115721573157415751576157715781579158015811582158315841585158615871588158915901591159215931594159515961597159815991600160116021603160416051606160716081609161016111612161316141615161616171618161916201621162216231624162516261627162816291630163116321633163416351636163716381639164016411642164316441645164616471648164916501651165216531654165516561657165816591660166116621663166416651666166716681669167016711672167316741675167616771678167916801681168216831684168516861687168816891690169116921693169416951696169716981699170017011702170317041705170617071708170917101711171217131714171517161717171817191720172117221723172417251726172717281729173017311732173317341735173617371738173917401741174217431744174517461747174817491750175117521753175417551756175717581759176017611762176317641765176617671768176917701771177217731774177517761777177817791780178117821783178417851786178717881789179017911792179317941795179617971798179918001801180218031804180518061807180818091810181118121813181418151816181718181819182018211822182318241825182618271828182918301831183218331834183518361837183818391840184118421843184418451846184718481849185018511852185318541855185618571858185918601861186218631864186518661867186818691870187118721873187418751876187718781879188018811882188318841885188618871888188918901891189218931894189518961897189818991900190119021903190419051906190719081909191019111912191319141915191619171918191919201921192219231924192519261927192819291930193119321933193419351936193719381939194019411942194319441945194619471948194919501951195219531954195519561957195819591960196119621963196419651966
  1. {
  2. Copyright (c) 2002 by Florian Klaempfl
  3. Generates the argument location information for x86-64 target
  4. This program is free software; you can redistribute it and/or modify
  5. it under the terms of the GNU General Public License as published by
  6. the Free Software Foundation; either version 2 of the License, or
  7. (at your option) any later version.
  8. This program is distributed in the hope that it will be useful,
  9. but WITHOUT ANY WARRANTY; without even the implied warranty of
  10. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  11. GNU General Public License for more details.
  12. You should have received a copy of the GNU General Public License
  13. along with this program; if not, write to the Free Software
  14. Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  15. ****************************************************************************
  16. }
  17. unit cpupara;
  18. {$i fpcdefs.inc}
  19. interface
  20. uses
  21. globtype,
  22. cpubase,cgbase,cgutils,
  23. symconst,symtype,symsym,symdef,
  24. parabase,paramgr;
  25. type
  26. tcpuparamanager = class(tparamanager)
  27. private
  28. procedure create_paraloc_info_intern(p : tabstractprocdef; side: tcallercallee;paras:tparalist;
  29. var intparareg,mmparareg,parasize:longint;varargsparas: boolean);
  30. public
  31. function param_use_paraloc(const cgpara:tcgpara):boolean;override;
  32. function push_addr_param(varspez:tvarspez;def : tdef;calloption : tproccalloption) : boolean;override;
  33. function ret_in_param(def:tdef;pd:tabstractprocdef):boolean;override;
  34. function get_volatile_registers_int(calloption : tproccalloption):tcpuregisterset;override;
  35. function get_volatile_registers_mm(calloption : tproccalloption):tcpuregisterset;override;
  36. function get_volatile_registers_fpu(calloption : tproccalloption):tcpuregisterset;override;
  37. function create_paraloc_info(p : tabstractprocdef; side: tcallercallee):longint;override;
  38. function create_varargs_paraloc_info(p : tabstractprocdef; varargspara:tvarargsparalist):longint;override;
  39. function get_funcretloc(p : tabstractprocdef; side: tcallercallee; forcetempdef: tdef): tcgpara;override;
  40. end;
  41. implementation
  42. uses
  43. cutils,verbose,
  44. systems,
  45. defutil,
  46. symtable,
  47. cpupi,
  48. cgx86;
  49. const
  50. paraintsupregs : array[0..5] of tsuperregister = (RS_RDI,RS_RSI,RS_RDX,RS_RCX,RS_R8,RS_R9);
  51. parammsupregs : array[0..7] of tsuperregister = (RS_XMM0,RS_XMM1,RS_XMM2,RS_XMM3,RS_XMM4,RS_XMM5,RS_XMM6,RS_XMM7);
  52. paraintsupregs_winx64 : array[0..3] of tsuperregister = (RS_RCX,RS_RDX,RS_R8,RS_R9);
  53. parammsupregs_winx64 : array[0..3] of tsuperregister = (RS_XMM0,RS_XMM1,RS_XMM2,RS_XMM3);
  54. parammsupregs_vectorcall : array[0..5] of tsuperregister = (RS_XMM0,RS_XMM1,RS_XMM2,RS_XMM3,RS_XMM4,RS_XMM5);
  55. {
  56. The argument classification code largely comes from libffi:
  57. ffi64.c - Copyright (c) 2002, 2007 Bo Thorsen <[email protected]>
  58. Copyright (c) 2008 Red Hat, Inc.
  59. x86-64 Foreign Function Interface
  60. Permission is hereby granted, free of charge, to any person obtaining
  61. a copy of this software and associated documentation files (the
  62. ``Software''), to deal in the Software without restriction, including
  63. without limitation the rights to use, copy, modify, merge, publish,
  64. distribute, sublicense, and/or sell copies of the Software, and to
  65. permit persons to whom the Software is furnished to do so, subject to
  66. the following conditions:
  67. The above copyright notice and this permission notice shall be included
  68. in all copies or substantial portions of the Software.
  69. THE SOFTWARE IS PROVIDED ``AS IS'', WITHOUT WARRANTY OF ANY KIND,
  70. EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  71. MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
  72. NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
  73. HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
  74. WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  75. OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
  76. DEALINGS IN THE SOFTWARE.
  77. ----------------------------------------------------------------------- *)
  78. }
  79. const
  80. { This many classes are required in order to support 4 YMMs (_m256) in a
  81. homogeneous vector aggregate under vectorcall. [Kit] }
  82. MAX_PARA_CLASSES = 16;
  83. type
  84. tx64paraclasstype = (
  85. X86_64_NO_CLASS,
  86. X86_64_INTEGER_CLASS,X86_64_INTEGERSI_CLASS,
  87. X86_64_SSE_CLASS,X86_64_SSESF_CLASS,X86_64_SSEDF_CLASS,X86_64_SSEUP_CLASS,
  88. X86_64_X87_CLASS,X86_64_X87UP_CLASS,
  89. X86_64_COMPLEX_X87_CLASS,
  90. X86_64_MEMORY_CLASS
  91. );
  92. tx64paraclass = record
  93. def: tdef;
  94. typ: tx64paraclasstype;
  95. end;
  96. tx64paraclasses = array[0..MAX_PARA_CLASSES-1] of tx64paraclass;
  97. { Win64-specific helper }
  98. function aggregate_in_registers_win64(varspez:tvarspez;size:longint):boolean;
  99. begin
  100. { TODO: Temporary hack: vs_const parameters are always passed by reference for win64}
  101. result:=(varspez=vs_value) and (size in [1,2,4,8])
  102. end;
  103. (* x86-64 register passing implementation. See x86-64 ABI for details. Goal
  104. of this code is to classify each 8bytes of incoming argument by the register
  105. class and assign registers accordingly. *)
  106. function classify_representative_def(def1, def2: tdef): tdef;
  107. var
  108. def1size, def2size: asizeint;
  109. begin
  110. if not assigned(def1) then
  111. result:=def2
  112. else if not assigned(def2) then
  113. result:=def1
  114. else
  115. begin
  116. def1size:=def1.size;
  117. def2size:=def2.size;
  118. if def1size>def2size then
  119. result:=def1
  120. else if def2size>def1size then
  121. result:=def2
  122. else if def1.alignment>def2.alignment then
  123. result:=def1
  124. else
  125. result:=def2;
  126. end;
  127. end;
  128. (* Classify the argument of type TYPE and mode MODE.
  129. CLASSES will be filled by the register class used to pass each word
  130. of the operand. The number of words is returned. In case the parameter
  131. should be passed in memory, 0 is returned. As a special case for zero
  132. sized containers, classes[0] will be NO_CLASS and 1 is returned.
  133. real_size contains either def.size, or a value derived from
  134. def.bitpackedsize and the field offset denoting the number of bytes
  135. spanned by a bitpacked field
  136. See the x86-64 PS ABI for details.
  137. *)
  138. procedure classify_single_integer_class(def: tdef; size,real_size: aint; var cl: tx64paraclass; byte_offset: aint);
  139. begin
  140. if (byte_offset=0) and
  141. (real_size in [1,2,4,8]) and
  142. (not assigned(cl.def) or
  143. (def.alignment>=cl.def.alignment)) then
  144. cl.def:=def;
  145. if size<=4 then
  146. begin
  147. cl.typ:=X86_64_INTEGERSI_CLASS;
  148. { gcc/clang sign/zero-extend all values to 32 bits, except for
  149. _Bool (= Pascal boolean), which is only zero-extended to 8 bits
  150. as per the x86-64 ABI -> do the same }
  151. if not assigned(cl.def) or
  152. not is_pasbool(cl.def) or
  153. (size>1) then
  154. cl.def:=u32inttype;
  155. end
  156. else
  157. begin
  158. cl.typ:=X86_64_INTEGER_CLASS;
  159. if not assigned(cl.def) or
  160. (cl.def.size<size) or
  161. (not(cl.def.typ in [orddef,floatdef,pointerdef,classrefdef]) and
  162. not is_implicit_pointer_object_type(cl.def) and
  163. not is_dynamicstring(cl.def) and
  164. not is_dynamic_array(cl.def)) then
  165. cl.def:=u64inttype;
  166. end;
  167. end;
  168. function classify_as_integer_argument(def: tdef; real_size: aint; var classes: tx64paraclasses; byte_offset: aint): longint;
  169. var
  170. size: aint;
  171. begin
  172. size:=byte_offset+real_size;
  173. classify_single_integer_class(def,size,real_size,classes[0],byte_offset);
  174. if size<=8 then
  175. result:=1
  176. else
  177. begin
  178. classify_single_integer_class(def,size-8,real_size,classes[1],byte_offset-8);
  179. if size>16 then
  180. internalerror(2010021401);
  181. result:=2;
  182. end
  183. end;
  184. (* Return the union class of CLASS1 and CLASS2.
  185. See the x86-64 PS ABI for details. *)
  186. function merge_classes(class1, class2: tx64paraclass): tx64paraclass;
  187. begin
  188. (* Rule #1: If both classes are equal, this is the resulting class. *)
  189. if (class1.typ=class2.typ) then
  190. begin
  191. result.typ:=class1.typ;
  192. result.def:=classify_representative_def(class1.def,class2.def);
  193. exit;
  194. end;
  195. (* Rule #2: If one of the classes is NO_CLASS, the resulting class is
  196. the other class. *)
  197. if (class1.typ=X86_64_NO_CLASS) then
  198. exit(class2);
  199. if (class2.typ=X86_64_NO_CLASS) then
  200. exit(class1);
  201. (* Rule #3: If one of the classes is MEMORY, the result is MEMORY. *)
  202. if (class1.typ=X86_64_MEMORY_CLASS) then
  203. exit(class1)
  204. else if (class2.typ=X86_64_MEMORY_CLASS) then
  205. exit(class2);
  206. (* Rule #4: If one of the classes is INTEGER, the result is INTEGER. *)
  207. { 32 bit }
  208. if ((class1.typ=X86_64_INTEGERSI_CLASS) and
  209. (class2.typ=X86_64_SSESF_CLASS)) then
  210. exit(class1)
  211. else if ((class2.typ=X86_64_INTEGERSI_CLASS) and
  212. (class1.typ=X86_64_SSESF_CLASS)) then
  213. exit(class2);
  214. { 64 bit }
  215. if (class1.typ in [X86_64_INTEGER_CLASS,X86_64_INTEGERSI_CLASS]) then
  216. begin
  217. result:=class1;
  218. if result.def.size<8 then
  219. begin
  220. result.typ:=X86_64_INTEGER_CLASS;
  221. result.def:=s64inttype;
  222. end;
  223. exit
  224. end
  225. else if (class2.typ in [X86_64_INTEGER_CLASS,X86_64_INTEGERSI_CLASS]) then
  226. begin
  227. result:=class2;
  228. if result.def.size<8 then
  229. begin
  230. result.typ:=X86_64_INTEGER_CLASS;
  231. result.def:=s64inttype;
  232. end;
  233. exit
  234. end;
  235. (* Rule #5: If one of the classes is X87, X87UP, or COMPLEX_X87 class,
  236. MEMORY is used. *)
  237. if (class1.typ in [X86_64_X87_CLASS,X86_64_X87UP_CLASS,X86_64_COMPLEX_X87_CLASS]) then
  238. begin
  239. result:=class1;
  240. result.typ:=X86_64_MEMORY_CLASS;
  241. exit;
  242. end
  243. else if (class2.typ in [X86_64_X87_CLASS,X86_64_X87UP_CLASS,X86_64_COMPLEX_X87_CLASS]) then
  244. begin
  245. result:=class2;
  246. result.typ:=X86_64_MEMORY_CLASS;
  247. exit;
  248. end;
  249. (* Rule #6: Otherwise class SSE is used. *)
  250. if class1.def.size>class2.def.size then
  251. result:=class1
  252. else
  253. result:=class2;
  254. result.typ:=X86_64_SSE_CLASS;
  255. result.def:=s64floattype;
  256. end;
  257. function classify_argument(calloption: tproccalloption; def: tdef; parentdef: tdef; varspez: tvarspez; real_size: aint; var classes: tx64paraclasses; byte_offset: aint; round_to_8: Boolean): longint; forward;
  258. function init_aggregate_classification(calloption: tproccalloption; def: tdef; parentdef: tdef; varspez: tvarspez; byte_offset: aint; out words: longint; out classes: tx64paraclasses): longint;
  259. var
  260. i: longint;
  261. begin
  262. words:=0;
  263. { we'll be merging the classes elements with the subclasses
  264. elements, so initialise them first }
  265. for i:=low(classes) to high(classes) do
  266. begin
  267. classes[i].typ:=X86_64_NO_CLASS;
  268. classes[i].def:=nil;
  269. end;
  270. { win64 follows a different convention here }
  271. if x86_64_use_ms_abi(calloption) then
  272. begin
  273. if aggregate_in_registers_win64(varspez,def.size) then
  274. begin
  275. classes[0].typ:=X86_64_INTEGER_CLASS;
  276. classes[0].def:=def;
  277. result:=1;
  278. end
  279. else if (calloption = pocall_vectorcall) then
  280. begin
  281. words := (def.size+byte_offset mod 8+7) div 8;
  282. case words of
  283. 0:
  284. Exit(0);
  285. 1..4:
  286. { Aligned vector or array elements }
  287. Result := words;
  288. else
  289. if ((def.aggregatealignment mod (words shl 3)) = 0) or
  290. Assigned(parentdef) and ((parentdef.aggregatealignment mod 16) = 0)
  291. then
  292. begin
  293. { Field of aligned vector type }
  294. if words = 0 then
  295. begin
  296. classes[0].typ:=X86_64_NO_CLASS;
  297. classes[0].def:=def;
  298. Result := 1;
  299. end
  300. else
  301. Result := words;
  302. end
  303. else
  304. Result := 0;
  305. end;
  306. end
  307. else
  308. Result := 0;
  309. Exit;
  310. end;
  311. (* If the struct is larger than 32 bytes, pass it on the stack. *)
  312. if def.size > 32 then
  313. exit(0);
  314. { if a struct starts an offset not divisible by 8, it can span extra
  315. words }
  316. words:=(def.size+byte_offset mod 8+7) div 8;
  317. (* Zero sized arrays or structures are NO_CLASS. We return 0 to
  318. signal memory class, so handle it as special case. *)
  319. if (words=0) then
  320. begin
  321. classes[0].typ:=X86_64_NO_CLASS;
  322. classes[0].def:=def;
  323. exit(1);
  324. end;
  325. result:=words;
  326. end;
  327. function classify_aggregate_element(calloption: tproccalloption; def: tdef; parentdef: tdef; varspez: tvarspez; real_size: aint; var classes: tx64paraclasses; new_byte_offset: aint): longint;
  328. var
  329. subclasses: tx64paraclasses;
  330. i,
  331. pos: longint;
  332. begin
  333. fillchar(subclasses,sizeof(subclasses),0);
  334. result:=classify_argument(calloption,def,parentdef,varspez,real_size,subclasses,new_byte_offset, True);
  335. if (result=0) then
  336. exit;
  337. pos:=new_byte_offset div 8;
  338. if result-1+pos>high(classes) then
  339. internalerror(2010053108);
  340. for i:=0 to result-1 do
  341. begin
  342. classes[i+pos] :=
  343. merge_classes(subclasses[i],classes[i+pos]);
  344. end;
  345. inc(result,pos);
  346. end;
  347. function finalize_aggregate_classification(calloption: tproccalloption; def: tdef; words: longint; var classes: tx64paraclasses): longint;
  348. var
  349. i, j, vecsize, maxvecsize: longint;
  350. begin
  351. { Workaround: It's not immediately possible to determine if a Double is
  352. by itself or is part of an aligned vector. If the latter, correct the
  353. class definitions here. [Kit] }
  354. if (classes[0].typ = X86_64_SSEDF_CLASS) and (classes[1].typ = X86_64_SSEUP_CLASS) then
  355. classes[0].typ := X86_64_SSE_CLASS;
  356. if (words>2) then
  357. begin
  358. { When size > 16 bytes, if the first one isn't
  359. X86_64_SSE_CLASS or any other ones aren't
  360. X86_64_SSEUP_CLASS, everything should be passed in
  361. memory... }
  362. if (classes[0].typ<>X86_64_SSE_CLASS) then
  363. begin
  364. { ... except if the calling convention is 'vectorcall', then
  365. check to see if we don't have an HFA of 3 or 4 Doubles }
  366. if (calloption <> pocall_vectorcall) or (words > 4) then
  367. Exit(0);
  368. for i := 0 to words - 1 do
  369. if classes[i].typ <> X86_64_SSEDF_CLASS then
  370. Exit(0);
  371. Exit(words);
  372. end;
  373. if ((words shl 3) > def.aggregatealignment) then
  374. { The alignment is wrong for this vector size, hence it is unaligned }
  375. Exit(0);
  376. vecsize := 1;
  377. maxvecsize := words;
  378. for i:=1 to words-1 do
  379. if (classes[i].typ=X86_64_SSEUP_CLASS) then
  380. Inc(vecsize)
  381. else
  382. begin
  383. { Exceptional case. Check that we're not dealing an array of
  384. aligned vectors that is itself aligned to a stricter
  385. boundary (e.g. 4 XMM registers that can be merged into a
  386. single ZMM register). }
  387. if
  388. (classes[i].typ <> X86_64_SSE_CLASS) or { Easy case first - is it actually another SSE vector? }
  389. ((vecsize and (vecsize - 1)) <> 0) or { If vecsize is not a power of two, then it is definitely not a valid vector }
  390. (vecsize > maxvecsize) or ((maxvecsize < words) and (vecsize <> maxvecsize)) { Mixture of XMMs and YMMs, for example, is not valid }
  391. then
  392. Exit(0);
  393. classes[i].typ := X86_64_SSEUP_CLASS;
  394. maxvecsize := vecsize;
  395. vecsize := 1;
  396. end;
  397. if vecsize <> maxvecsize then
  398. { Last vector is of a different size }
  399. Exit(0);
  400. if vecsize > 2 then
  401. begin
  402. { Cannot use 256-bit and 512-bit vectors if we're not using AVX }
  403. if not UseAVX then
  404. Exit(0);
  405. { WARNING: There is currently no support for 256-bit and 512-bit
  406. aligned vectors, so if an aggregate contains more than two
  407. eightbyte words, it must be passed in memory. When 256-bit and
  408. 512-bit vectors are fully supported, remove the following
  409. line. [Kit] }
  410. Exit(0);
  411. end;
  412. end;
  413. (* Final merger cleanup. *)
  414. (* The first one must never be X86_64_SSEUP_CLASS or
  415. X86_64_X87UP_CLASS. *)
  416. if (classes[0].typ=X86_64_SSEUP_CLASS) or
  417. (classes[0].typ=X86_64_X87UP_CLASS) then
  418. internalerror(2010021402);
  419. for i:=0 to words-1 do
  420. begin
  421. (* If one class is MEMORY, everything should be passed in
  422. memory. *)
  423. if (classes[i].typ=X86_64_MEMORY_CLASS) then
  424. exit(0);
  425. (* The X86_64_SSEUP_CLASS should be always preceded by
  426. X86_64_SSE_CLASS or X86_64_SSEUP_CLASS. *)
  427. if (classes[i].typ=X86_64_SSEUP_CLASS) and
  428. (classes[i-1].typ<>X86_64_SSE_CLASS) and
  429. (classes[i-1].typ<>X86_64_SSEUP_CLASS) then
  430. begin
  431. classes[i].typ:=X86_64_SSE_CLASS;
  432. classes[i].def:=carraydef.getreusable_no_free(s32floattype,2);
  433. end;
  434. (* If X86_64_X87UP_CLASS isn't preceded by X86_64_X87_CLASS,
  435. everything should be passed in memory. *)
  436. if (classes[i].typ=X86_64_X87UP_CLASS) and
  437. (classes[i-1].typ<>X86_64_X87_CLASS) then
  438. exit(0);
  439. (* FPC addition: because we store an extended in 10 bytes, the
  440. X86_64_X87UP_CLASS can be replaced with e.g. INTEGER if an
  441. extended is followed by e.g. an array [0..5] of byte -> we also
  442. have to check whether each X86_64_X87_CLASS is followed by
  443. X86_64_X87UP_CLASS -- if not, pass in memory
  444. This cannot happen in the original ABI, because there
  445. sizeof(extended) = 16 and hence nothing can be merged with
  446. X86_64_X87UP_CLASS and change it into something else *)
  447. if (classes[i].typ=X86_64_X87_CLASS) and
  448. ((i=(words-1)) or
  449. (classes[i+1].typ<>X86_64_X87UP_CLASS)) then
  450. exit(0);
  451. end;
  452. {$ifndef llvm}
  453. { FIXME: in case a record contains empty padding space, e.g. a
  454. "single" field followed by a "double", then we have a problem
  455. because the cgpara helpers cannot figure out that they should
  456. skip 4 bytes after storing the single (LOC_MMREGISTER with size
  457. OS_F32) to memory before storing the double -> for now scale
  458. such locations always up to 64 bits, although this loads/stores
  459. some superfluous data }
  460. { 1) the first part is 32 bit while there is still a second part }
  461. if (classes[1].typ<>X86_64_NO_CLASS) then
  462. case classes[0].typ of
  463. X86_64_INTEGERSI_CLASS:
  464. begin
  465. classes[0].typ:=X86_64_INTEGER_CLASS;
  466. classes[0].def:=s64inttype;
  467. end;
  468. X86_64_SSESF_CLASS:
  469. begin
  470. classes[0].typ:=X86_64_SSE_CLASS;
  471. classes[0].def:=carraydef.getreusable_no_free(s32floattype,2);
  472. end;
  473. end;
  474. { 2) the second part is 32 bit, but the total size is > 12 bytes }
  475. if (def.size>12) then
  476. case classes[1].typ of
  477. X86_64_INTEGERSI_CLASS:
  478. begin
  479. classes[1].typ:=X86_64_INTEGER_CLASS;
  480. classes[1].def:=s64inttype;
  481. end;
  482. X86_64_SSESF_CLASS:
  483. begin
  484. classes[1].typ:=X86_64_SSE_CLASS;
  485. classes[1].def:=carraydef.getreusable_no_free(s32floattype,2);
  486. end;
  487. end;
  488. {$endif not llvm}
  489. result:=words;
  490. end;
  491. function try_build_homogeneous_aggregate(def: tdef; words: longint; var classes: tx64paraclasses): longint;
  492. var
  493. i, vecsize, maxvecsize, veccount, num: longint;
  494. size, byte_offset: aint;
  495. vs: TFieldVarSym;
  496. checkalignment: Boolean;
  497. begin
  498. if (words = 0) then
  499. { Should be at least 1 word at this point }
  500. InternalError(2018013100);
  501. case classes[0].typ of
  502. X86_64_SSESF_CLASS:
  503. begin
  504. { Should be an HFA of only a Single }
  505. for i := 1 to High(classes) do
  506. if classes[i].typ <> X86_64_NO_CLASS then
  507. Exit(0);
  508. result := 1;
  509. end;
  510. X86_64_SSEDF_CLASS:
  511. begin
  512. { Possibly an HFA of Doubles }
  513. if TAbstractRecordDef(def).symtable.symlist.count = 0 then
  514. Exit(0);
  515. { Get the information and position on the last entry }
  516. vs:=TFieldVarSym(TAbstractRecordDef(def).symtable.symlist[TAbstractRecordDef(def).symtable.symlist.count - 1]);
  517. size:=vs.vardef.size;
  518. checkalignment:=true;
  519. if not TAbstractRecordSymtable(TAbstractRecordDef(def).symtable).is_packed then
  520. begin
  521. byte_offset:=vs.fieldoffset;
  522. size:=vs.vardef.size;
  523. end
  524. else
  525. begin
  526. byte_offset:=vs.fieldoffset div 8;
  527. if (vs.vardef.typ in [orddef,enumdef]) then
  528. begin
  529. { calculate the number of bytes spanned by
  530. this bitpacked field }
  531. size:=((vs.fieldoffset+vs.vardef.packedbitsize+7) div 8)-(vs.fieldoffset div 8);
  532. { our bitpacked fields are interpreted as always being
  533. aligned, because unlike in C we don't have char:1, int:1
  534. etc (so everything is basically a char:x) }
  535. checkalignment:=false;
  536. end
  537. else
  538. size:=vs.vardef.size;
  539. end;
  540. { If [..] an object [..] contains unaligned fields, it has class
  541. MEMORY }
  542. if checkalignment and
  543. (align(byte_offset,vs.vardef.structalignment)<>byte_offset) then
  544. begin
  545. result:=0;
  546. exit;
  547. end;
  548. if words > 4 then
  549. { HFA too large }
  550. Exit(0);
  551. for i := 1 to words - 1 do
  552. if classes[i].typ <> X86_64_SSEDF_CLASS then
  553. Exit(0);
  554. result := words;
  555. end;
  556. X86_64_SSE_CLASS:
  557. begin
  558. { Determine the nature of the classes.
  559. - If the SSE is by itself, then it is an HFA consisting of 2 Singles.
  560. - If the SSE is followed by an SSESF, then it is an HFA consisting of 3 Singles.
  561. - If the SSE is followed by an SSE and nothing else, then it is an HFA consisting of 4 Singles.
  562. - If the SSE is followed by an SSE, but another class follows, then it is an HFA that is too large.
  563. - If the SSE is followed by an SSEUP, then it is an HVA of some kind.
  564. }
  565. case classes[1].typ of
  566. X86_64_NO_CLASS:
  567. begin
  568. for i := 2 to words - 1 do
  569. if classes[i].typ <> X86_64_NO_CLASS then
  570. { Compound type }
  571. Exit(0);
  572. { Split into 2 Singles again so they correctly fall into separate XMM registers }
  573. classes[0].typ := X86_64_SSESF_CLASS;
  574. classes[0].def := tdef(tarraydef(classes[0].def).elementdef); { Break up the array }
  575. classes[1].typ := X86_64_SSESF_CLASS;
  576. classes[1].def := classes[0].def;
  577. result := 2;
  578. end;
  579. X86_64_SSESF_CLASS:
  580. begin
  581. for i := 2 to words - 1 do
  582. if classes[i].typ <> X86_64_NO_CLASS then
  583. { Compound type }
  584. Exit(0);
  585. classes[2].typ := X86_64_SSESF_CLASS;
  586. classes[2].def := classes[1].def; { Transfer class 1 to class 2 }
  587. classes[0].typ := X86_64_SSESF_CLASS;
  588. classes[0].def := tdef(tarraydef(classes[0].def).elementdef); { Break up the array }
  589. classes[1].typ := X86_64_SSESF_CLASS;
  590. classes[1].def := classes[0].def;
  591. result := 3;
  592. end;
  593. X86_64_SSE_CLASS:
  594. begin
  595. for i := 2 to words - 1 do
  596. if classes[i].typ <> X86_64_NO_CLASS then
  597. { HFA too large (or not a true HFA) }
  598. Exit(0);
  599. classes[0].def := tdef(tarraydef(classes[0].def).elementdef); { Break up the arrays }
  600. classes[2].def := tdef(tarraydef(classes[1].def).elementdef);
  601. classes[1].def := classes[0].def;
  602. classes[3].def := classes[2].def;
  603. classes[0].typ := X86_64_SSESF_CLASS;
  604. classes[1].typ := X86_64_SSESF_CLASS;
  605. classes[2].typ := X86_64_SSESF_CLASS;
  606. classes[3].typ := X86_64_SSESF_CLASS;
  607. result := 4;
  608. end;
  609. X86_64_SSEUP_CLASS:
  610. begin
  611. { Determine vector size }
  612. veccount := 1;
  613. vecsize := 2;
  614. maxvecsize := words;
  615. for i := 2 to words - 1 do
  616. if (classes[i].typ=X86_64_SSEUP_CLASS) then
  617. Inc(vecsize)
  618. else
  619. begin
  620. if
  621. (classes[i].typ <> X86_64_SSE_CLASS) or { Easy case first - is it actually another SSE vector? }
  622. ((vecsize and (vecsize - 1)) <> 0) or { If vecsize is not a power of two, then it is definitely not a valid aggregate }
  623. (vecsize > maxvecsize) or ((maxvecsize < words) and (vecsize <> maxvecsize)) { Mixture of XMMs and YMMs, for example, is not valid }
  624. then
  625. Exit(0);
  626. Inc(veccount);
  627. maxvecsize := vecsize;
  628. vecsize := 1;
  629. end;
  630. if vecsize <> maxvecsize then
  631. { Last vector is of a different size }
  632. Exit(0);
  633. if veccount > 4 then
  634. { HVA too large }
  635. Exit(0);
  636. Result := words;
  637. end;
  638. else
  639. Exit(0);
  640. end;
  641. end;
  642. else
  643. Exit(0);
  644. end;
  645. end;
  646. function classify_record(calloption: tproccalloption; def: tdef; parentdef: tdef; varspez: tvarspez; var classes: tx64paraclasses; byte_offset: aint): longint;
  647. var
  648. vs: tfieldvarsym;
  649. size,
  650. new_byte_offset: aint;
  651. i,
  652. words,
  653. num: longint;
  654. checkalignment: boolean;
  655. begin
  656. result:=init_aggregate_classification(calloption,def,parentdef,varspez,byte_offset,words,classes);
  657. if (words=0) then
  658. exit;
  659. (* Merge the fields of the structure. *)
  660. for i:=0 to tabstractrecorddef(def).symtable.symlist.count-1 do
  661. begin
  662. if tsym(tabstractrecorddef(def).symtable.symlist[i]).typ<>fieldvarsym then
  663. continue;
  664. vs:=tfieldvarsym(tabstractrecorddef(def).symtable.symlist[i]);
  665. checkalignment:=true;
  666. if not tabstractrecordsymtable(tabstractrecorddef(def).symtable).is_packed then
  667. begin
  668. new_byte_offset:=byte_offset+vs.fieldoffset;
  669. size:=vs.vardef.size;
  670. end
  671. else
  672. begin
  673. new_byte_offset:=byte_offset+vs.fieldoffset div 8;
  674. if (vs.vardef.typ in [orddef,enumdef]) then
  675. begin
  676. { calculate the number of bytes spanned by
  677. this bitpacked field }
  678. size:=((vs.fieldoffset+vs.vardef.packedbitsize+7) div 8)-(vs.fieldoffset div 8);
  679. { our bitpacked fields are interpreted as always being
  680. aligned, because unlike in C we don't have char:1, int:1
  681. etc (so everything is basically a char:x) }
  682. checkalignment:=false;
  683. end
  684. else
  685. size:=vs.vardef.size;
  686. end;
  687. { If [..] an object [..] contains unaligned fields, it has class
  688. MEMORY }
  689. if checkalignment and
  690. (align(new_byte_offset,vs.vardef.structalignment)<>new_byte_offset) then
  691. begin
  692. result:=0;
  693. exit;
  694. end;
  695. num:=classify_aggregate_element(calloption,vs.vardef,def,varspez,size,classes,new_byte_offset);
  696. if (num=0) then
  697. exit(0);
  698. end;
  699. result:=finalize_aggregate_classification(calloption,def,words,classes);
  700. { There is still one case where it might not have to be passed on the
  701. stack, and that's a homogeneous vector aggregate (HVA) or a
  702. homogeneous float aggregate (HFA) under vectorcall. }
  703. if (calloption = pocall_vectorcall) then
  704. begin
  705. if (result = 0) then
  706. result := try_build_homogeneous_aggregate(def,words,classes)
  707. else
  708. { If we're dealing with an HFA that has 3 or 4 Singles, pairs of
  709. Singles may be merged into a single SSE_CLASS, which must be
  710. split into separate SSESF_CLASS references for vectorcall; this
  711. is only performed in "try_build_homogeneous_aggregate" and not
  712. elsewhere, so accommodate for this exceptional case. [Kit] }
  713. if (result = 2) then
  714. begin
  715. num := try_build_homogeneous_aggregate(def,words,classes);
  716. if num <> 0 then
  717. { If it's equal to zero, just pass 2 and handle the record
  718. type normally }
  719. result := num;
  720. end;
  721. end;
  722. end;
  723. function classify_normal_array(calloption: tproccalloption; def: tarraydef; parentdef: tdef; varspez: tvarspez; var classes: tx64paraclasses; byte_offset: aint): longint;
  724. var
  725. i, elecount: aword;
  726. size,
  727. elesize,
  728. new_byte_offset,
  729. bitoffset: aint;
  730. words,
  731. num: longint;
  732. isbitpacked: boolean;
  733. begin
  734. size:=0;
  735. bitoffset:=0;
  736. result:=init_aggregate_classification(calloption,def,parentdef,varspez,byte_offset,words,classes);
  737. if (words=0) then
  738. exit;
  739. isbitpacked:=is_packed_array(def);
  740. if not isbitpacked then
  741. begin
  742. elesize:=def.elesize;
  743. size:=elesize;
  744. end
  745. else
  746. begin
  747. elesize:=def.elepackedbitsize;
  748. bitoffset:=0;
  749. end;
  750. (* Merge the elements of the array. *)
  751. i:=0;
  752. elecount:=def.elecount;
  753. repeat
  754. if not isbitpacked then
  755. begin
  756. { size does not change }
  757. new_byte_offset:=byte_offset+i*elesize;
  758. { If [..] an object [..] contains unaligned fields, it has class
  759. MEMORY }
  760. if align(new_byte_offset,def.alignment)<>new_byte_offset then
  761. begin
  762. result:=0;
  763. exit;
  764. end;
  765. end
  766. else
  767. begin
  768. { calculate the number of bytes spanned by this bitpacked
  769. element }
  770. size:=((bitoffset+elesize+7) div 8)-(bitoffset div 8);
  771. new_byte_offset:=byte_offset+(elesize*i) div 8;
  772. { bit offset of next element }
  773. inc(bitoffset,elesize);
  774. end;
  775. num:=classify_aggregate_element(calloption,def.elementdef,def,varspez,size,classes,new_byte_offset);
  776. if (num=0) then
  777. exit(0);
  778. inc(i);
  779. until (i=elecount);
  780. result:=finalize_aggregate_classification(calloption,def,words,classes);
  781. end;
  782. function classify_argument(calloption: tproccalloption; def: tdef; parentdef: tdef; varspez: tvarspez; real_size: aint; var classes: tx64paraclasses; byte_offset: aint; round_to_8: Boolean): longint;
  783. var
  784. rounded_offset: aint;
  785. begin
  786. if round_to_8 then
  787. rounded_offset := byte_offset mod 8
  788. else
  789. rounded_offset := byte_offset;
  790. case def.typ of
  791. orddef,
  792. enumdef,
  793. pointerdef,
  794. classrefdef:
  795. result:=classify_as_integer_argument(def,real_size,classes,rounded_offset);
  796. formaldef:
  797. result:=classify_as_integer_argument(voidpointertype,voidpointertype.size,classes,rounded_offset);
  798. floatdef:
  799. begin
  800. classes[0].def:=def;
  801. case tfloatdef(def).floattype of
  802. s32real:
  803. begin
  804. if (byte_offset mod 8) = 0 then { Check regardless of the round_to_8 flag }
  805. begin
  806. if Assigned(parentdef) and ((parentdef.aggregatealignment mod 16) = 0) and ((byte_offset mod parentdef.aggregatealignment) <> 0) then
  807. { Third element of an aligned vector }
  808. classes[0].typ:=X86_64_SSEUP_CLASS
  809. else
  810. classes[0].typ:=X86_64_SSESF_CLASS
  811. end
  812. else
  813. begin
  814. if Assigned(parentdef) and ((parentdef.aggregatealignment mod 16) = 0) then
  815. { Fourth element of an aligned vector }
  816. classes[0].typ:=X86_64_SSEUP_CLASS
  817. else
  818. { if we have e.g. a record with two successive "single"
  819. fields, we need a 64 bit rather than a 32 bit load }
  820. classes[0].typ:=X86_64_SSE_CLASS;
  821. classes[0].def:=carraydef.getreusable_no_free(s32floattype,2);
  822. end;
  823. result:=1;
  824. end;
  825. s64real:
  826. begin
  827. if Assigned(parentdef) and ((parentdef.aggregatealignment mod 16) = 0) and ((byte_offset mod parentdef.aggregatealignment) <> 0) then
  828. { Aligned vector of type double }
  829. classes[0].typ:=X86_64_SSEUP_CLASS
  830. else
  831. classes[0].typ:=X86_64_SSEDF_CLASS;
  832. result:=1;
  833. end;
  834. s80real,
  835. sc80real:
  836. begin
  837. classes[0].typ:=X86_64_X87_CLASS;
  838. classes[1].typ:=X86_64_X87UP_CLASS;
  839. classes[1].def:=def;
  840. result:=2;
  841. end;
  842. s64comp,
  843. s64currency:
  844. begin
  845. classes[0].typ:=X86_64_INTEGER_CLASS;
  846. result:=1;
  847. end;
  848. s128real:
  849. begin
  850. classes[0].typ:=X86_64_SSE_CLASS;
  851. classes[0].def:=carraydef.getreusable_no_free(s32floattype,2);
  852. classes[1].typ:=X86_64_SSEUP_CLASS;
  853. classes[1].def:=carraydef.getreusable_no_free(s32floattype,2);
  854. result:=2;
  855. end;
  856. else
  857. internalerror(2010060301);
  858. end;
  859. end;
  860. recorddef:
  861. result:=classify_record(calloption,def,parentdef,varspez,classes,rounded_offset);
  862. objectdef:
  863. begin
  864. if is_object(def) then
  865. { pass by reference, like ppc and i386 }
  866. result:=0
  867. else
  868. { all kinds of pointer types: class, objcclass, interface, ... }
  869. result:=classify_as_integer_argument(def,voidpointertype.size,classes,rounded_offset);
  870. end;
  871. setdef:
  872. begin
  873. if is_smallset(def) then
  874. result:=classify_as_integer_argument(def,def.size,classes,rounded_offset)
  875. else
  876. result:=0;
  877. end;
  878. stringdef:
  879. begin
  880. if (tstringdef(def).stringtype in [st_shortstring,st_longstring]) then
  881. result:=0
  882. else
  883. result:=classify_as_integer_argument(def,def.size,classes,rounded_offset);
  884. end;
  885. arraydef:
  886. begin
  887. { a dynamic array is treated like a pointer }
  888. if is_dynamic_array(def) then
  889. result:=classify_as_integer_argument(def,voidpointertype.size,classes,rounded_offset)
  890. { other special arrays are passed on the stack }
  891. else if is_open_array(def) or
  892. is_array_of_const(def) then
  893. result:=0
  894. else
  895. { normal array }
  896. result:=classify_normal_array(calloption,tarraydef(def),parentdef,varspez,classes,rounded_offset);
  897. end;
  898. { the file record is definitely too big }
  899. filedef:
  900. result:=0;
  901. procvardef:
  902. begin
  903. if (po_methodpointer in tprocvardef(def).procoptions) then
  904. begin
  905. { treat as TMethod record }
  906. def:=search_system_type('TMETHOD').typedef;
  907. result:=classify_argument(calloption,def,parentdef,varspez,def.size,classes,rounded_offset, False);
  908. end
  909. else
  910. { pointer }
  911. result:=classify_as_integer_argument(def,def.size,classes,rounded_offset);
  912. end;
  913. variantdef:
  914. begin
  915. { same as tvardata record }
  916. def:=search_system_type('TVARDATA').typedef;
  917. result:=classify_argument(calloption,def,parentdef,varspez,def.size,classes,rounded_offset, False);
  918. end;
  919. undefineddef:
  920. { show shall we know?
  921. since classify_argument is called during parsing, see tw27685.pp,
  922. we handle undefineddef here }
  923. result:=0;
  924. errordef:
  925. { error message should have been thrown already before, so avoid only
  926. an internal error }
  927. result:=0;
  928. else
  929. internalerror(2010021405);
  930. end;
  931. end;
  932. { Returns the size of a single element in the aggregate, or the entire vector, if it is one of these types, 0 otherwise }
  933. function is_simd_vector_type_or_homogeneous_aggregate(calloption: tproccalloption; def: tdef; varspez: tvarspez): aint;
  934. var
  935. numclasses,i,vecsize,veccount,maxvecsize,elementsize,tempsize:longint;
  936. classes: tx64paraclasses;
  937. firstclass: tx64paraclasstype;
  938. begin
  939. for i := Low(classes) to High(classes) do
  940. begin
  941. classes[i].typ := X86_64_NO_CLASS;
  942. classes[i].def := nil;
  943. end;
  944. numclasses:=classify_argument(calloption,def,nil,vs_value,def.size,classes,0,False);
  945. if numclasses = 0 then
  946. Exit(0);
  947. firstclass := classes[0].typ;
  948. case firstclass of
  949. X86_64_SSESF_CLASS: { Only valid if the aggregate contains a lone Single }
  950. begin
  951. if (numclasses = 1) and (calloption = pocall_vectorcall) then
  952. Result := 4
  953. else
  954. Result := 0;
  955. Exit;
  956. end;
  957. X86_64_SSEDF_CLASS:
  958. begin
  959. if (numclasses > 1) and (calloption <> pocall_vectorcall) then
  960. Result := 0
  961. else
  962. begin
  963. for i := 1 to numclasses - 1 do
  964. if classes[i].typ <> X86_64_SSEDF_CLASS then
  965. begin
  966. Result := 0;
  967. Exit;
  968. end;
  969. if (def.size div 8) <> numclasses then
  970. { Wrong alignment or compound size }
  971. Result := 0
  972. else
  973. Result := 8;
  974. end;
  975. end;
  976. X86_64_SSE_CLASS:
  977. begin
  978. maxvecsize := numclasses * 2;
  979. if numclasses = 1 then
  980. begin
  981. { 2 Singles }
  982. if calloption = pocall_vectorcall then
  983. Result := 4
  984. else
  985. Result := 0;
  986. Exit;
  987. end;
  988. if classes[1].typ = X86_64_SSESF_CLASS then
  989. begin
  990. { 3 Singles }
  991. if numclasses <> 2 then
  992. Result := 0
  993. else
  994. Result := 4;
  995. Exit;
  996. end;
  997. vecsize := 2;
  998. veccount := 1;
  999. for i := 1 to numclasses - 1 do
  1000. case classes[i].typ of
  1001. X86_64_SSEUP_CLASS:
  1002. Inc(vecsize, 2);
  1003. X86_64_SSE_CLASS:
  1004. begin
  1005. if (maxvecsize < numclasses * 2) and (vecsize <> maxvecsize) then
  1006. { Different vector sizes }
  1007. Exit(0);
  1008. maxvecsize := vecsize;
  1009. vecsize := 2;
  1010. Inc(veccount);
  1011. end;
  1012. else
  1013. Exit(0);
  1014. end;
  1015. if vecsize <> maxvecsize then
  1016. { Last vector has to be the same size }
  1017. Exit(0);
  1018. { Either an HFA with 4 Singles, or an HVA with up to 4 vectors
  1019. (or a lone SIMD vector if veccount = 1) }
  1020. if (veccount < 4) then
  1021. begin
  1022. if (veccount > 1) and (calloption <> pocall_vectorcall) then
  1023. Result := 0
  1024. else
  1025. if vecsize = 2 then
  1026. { Packed, unaligned array of Singles }
  1027. Result := 4
  1028. else
  1029. Result := vecsize * 8
  1030. end
  1031. else
  1032. Result := 0;
  1033. end;
  1034. else
  1035. Exit(0);
  1036. end;
  1037. end;
  1038. procedure getvalueparaloc(calloption: tproccalloption;varspez:tvarspez;def:tdef;var classes: tx64paraclasses);
  1039. var
  1040. size: aint;
  1041. i: longint;
  1042. numclasses: longint;
  1043. begin
  1044. { init the classes array, because even if classify_argument inits only
  1045. one element we copy both to loc1/loc2 in case "1" is returned }
  1046. for i:=low(classes) to high(classes) do
  1047. begin
  1048. classes[i].typ:=X86_64_NO_CLASS;
  1049. classes[i].def:=nil;
  1050. end;
  1051. { def.size internalerrors for open arrays and dynamic arrays, since
  1052. their size cannot be determined at compile-time.
  1053. classify_argument does not look at the realsize argument for arrays
  1054. cases, but we obviously do have to pass something... }
  1055. if is_special_array(def) then
  1056. size:=-1
  1057. else
  1058. size:=def.size;
  1059. numclasses:=classify_argument(calloption,def,nil,varspez,size,classes,0,False);
  1060. case numclasses of
  1061. 0:
  1062. begin
  1063. classes[0].typ:=X86_64_MEMORY_CLASS;
  1064. classes[0].def:=def;
  1065. end;
  1066. 1..4:
  1067. begin
  1068. { If the class is X87, X87UP or COMPLEX_X87, it is passed in memory }
  1069. for i := 0 to numclasses - 1 do
  1070. begin
  1071. if classes[i].typ in [X86_64_X87_CLASS,X86_64_X87UP_CLASS,X86_64_COMPLEX_X87_CLASS] then
  1072. classes[i].typ:=X86_64_MEMORY_CLASS;
  1073. end;
  1074. end;
  1075. else
  1076. { 8 can happen for _m512 vectors, but are not yet supported }
  1077. internalerror(2010021501);
  1078. end;
  1079. end;
  1080. function tcpuparamanager.ret_in_param(def:tdef;pd:tabstractprocdef):boolean;
  1081. var
  1082. classes: tx64paraclasses;
  1083. numclasses: longint;
  1084. begin
  1085. if handle_common_ret_in_param(def,pd,result) then
  1086. exit;
  1087. fillchar(classes,sizeof(classes),0);
  1088. case def.typ of
  1089. { for records it depends on their contents and size }
  1090. recorddef,
  1091. { make sure we handle 'procedure of object' correctly }
  1092. procvardef:
  1093. begin
  1094. numclasses:=classify_argument(pd.proccalloption,def,nil,vs_value,def.size,classes,0,False);
  1095. result:=(numclasses=0);
  1096. end;
  1097. else
  1098. result:=inherited ret_in_param(def,pd);
  1099. end;
  1100. end;
  1101. function tcpuparamanager.param_use_paraloc(const cgpara:tcgpara):boolean;
  1102. var
  1103. paraloc : pcgparalocation;
  1104. begin
  1105. if not assigned(cgpara.location) then
  1106. internalerror(200410102);
  1107. result:=true;
  1108. { All locations are LOC_REFERENCE }
  1109. paraloc:=cgpara.location;
  1110. while assigned(paraloc) do
  1111. begin
  1112. if (paraloc^.loc<>LOC_REFERENCE) then
  1113. begin
  1114. result:=false;
  1115. exit;
  1116. end;
  1117. paraloc:=paraloc^.next;
  1118. end;
  1119. end;
  1120. { true if a parameter is too large to copy and only the address is pushed }
  1121. function tcpuparamanager.push_addr_param(varspez:tvarspez;def : tdef;calloption : tproccalloption) : boolean;
  1122. var
  1123. classes: tx64paraclasses;
  1124. numclasses: longint;
  1125. begin
  1126. fillchar(classes,sizeof(classes),0);
  1127. result:=false;
  1128. { var,out,constref always require address }
  1129. if varspez in [vs_var,vs_out,vs_constref] then
  1130. begin
  1131. result:=true;
  1132. exit;
  1133. end;
  1134. { Only vs_const, vs_value here }
  1135. case def.typ of
  1136. formaldef :
  1137. result:=true;
  1138. recorddef :
  1139. begin
  1140. { MetroWerks Pascal: const records always passed by reference
  1141. (for Mac OS X interfaces) }
  1142. if (calloption=pocall_mwpascal) and
  1143. (varspez=vs_const) then
  1144. result:=true
  1145. { Win ABI depends on size to pass it in a register or not }
  1146. else if x86_64_use_ms_abi(calloption) then
  1147. begin
  1148. if calloption = pocall_vectorcall then
  1149. begin
  1150. { "vectorcall" has the addition that it allows for aligned SSE types }
  1151. result :=
  1152. not aggregate_in_registers_win64(varspez,def.size) and
  1153. (is_simd_vector_type_or_homogeneous_aggregate(pocall_vectorcall,def,vs_value) = 0);
  1154. end
  1155. else
  1156. result:=not aggregate_in_registers_win64(varspez,def.size)
  1157. end
  1158. { pass constant parameters that would be passed via memory by
  1159. reference for non-cdecl/cppdecl, and make sure that the tmethod
  1160. record (size=16) is passed the same way as a complex procvar }
  1161. else if ((varspez=vs_const) and
  1162. not(calloption in cdecl_pocalls)) or
  1163. (def.size=16) then
  1164. begin
  1165. numclasses:=classify_argument(calloption,def,nil,vs_value,def.size,classes,0,False);
  1166. result:=numclasses=0;
  1167. end
  1168. else
  1169. { SysV ABI always passes it as value parameter }
  1170. result:=false;
  1171. end;
  1172. arraydef :
  1173. begin
  1174. { cdecl array of const need to be ignored and therefor be puhsed
  1175. as value parameter with length 0 }
  1176. if ((calloption in cdecl_pocalls) and
  1177. is_array_of_const(def)) or
  1178. is_dynamic_array(def) then
  1179. result:=false
  1180. else if (calloption = pocall_vectorcall) then
  1181. begin
  1182. { Pass all arrays by reference unless they are a valid, aligned SIMD type (arrays can't be homogeneous aggregates) }
  1183. result := (is_simd_vector_type_or_homogeneous_aggregate(pocall_vectorcall,def,vs_value) = 0);
  1184. end
  1185. else
  1186. { pass all arrays by reference to be compatible with C (passing
  1187. an array by value (= copying it on the stack) does not exist,
  1188. because an array is the same as a pointer there }
  1189. result:=true
  1190. end;
  1191. objectdef :
  1192. begin
  1193. { don't treat objects like records, because we only know wheter
  1194. or not they'll have a VMT after the entire object is parsed
  1195. -> if they are used as function result from one of their own
  1196. methods, their size can still change after we've determined
  1197. whether this function result should be returned by reference or
  1198. by value }
  1199. if is_object(def) then
  1200. result:=true;
  1201. end;
  1202. variantdef,
  1203. stringdef,
  1204. procvardef,
  1205. setdef :
  1206. begin
  1207. numclasses:=classify_argument(calloption,def,nil,vs_value,def.size,classes,0,False);
  1208. result:=numclasses=0;
  1209. end;
  1210. end;
  1211. end;
  1212. function tcpuparamanager.get_volatile_registers_int(calloption : tproccalloption):tcpuregisterset;
  1213. begin
  1214. if x86_64_use_ms_abi(calloption) then
  1215. result:=[RS_RAX,RS_RCX,RS_RDX,RS_R8,RS_R9,RS_R10,RS_R11]
  1216. else
  1217. result:=[RS_RAX,RS_RCX,RS_RDX,RS_RSI,RS_RDI,RS_R8,RS_R9,RS_R10,RS_R11];
  1218. end;
  1219. function tcpuparamanager.get_volatile_registers_mm(calloption : tproccalloption):tcpuregisterset;
  1220. begin
  1221. if x86_64_use_ms_abi(calloption) then
  1222. result:=[RS_XMM0..RS_XMM5]
  1223. else
  1224. result:=[RS_XMM0..RS_XMM15];
  1225. end;
  1226. function tcpuparamanager.get_volatile_registers_fpu(calloption : tproccalloption):tcpuregisterset;
  1227. begin
  1228. result:=[RS_ST0..RS_ST7];
  1229. end;
  1230. function tcpuparamanager.get_funcretloc(p : tabstractprocdef; side: tcallercallee; forcetempdef: tdef): tcgpara;
  1231. const
  1232. intretregs: array[0..1] of tregister = (NR_FUNCTION_RETURN_REG,NR_FUNCTION_RETURN_REG_HIGH);
  1233. mmretregs: array[0..1] of tregister = (NR_MM_RESULT_REG,NR_MM_RESULT_REG_HIGH);
  1234. mmretregs_vectorcall: array[0..3] of tregister = (NR_XMM0,NR_XMM1,NR_XMM2,NR_XMM3);
  1235. var
  1236. classes: tx64paraclasses;
  1237. i,j,
  1238. numclasses: longint;
  1239. intretregidx,
  1240. mmretregidx: longint;
  1241. retcgsize : tcgsize;
  1242. paraloc : pcgparalocation;
  1243. begin
  1244. if set_common_funcretloc_info(p,forcetempdef,retcgsize,result) then
  1245. exit;
  1246. { Return in FPU register? -> don't use classify_argument(), because
  1247. currency and comp need special treatment here (they are integer class
  1248. when passing as parameter, but LOC_FPUREGISTER as function result) }
  1249. if result.def.typ=floatdef then
  1250. begin
  1251. paraloc:=result.add_location;
  1252. paraloc^.def:=result.def;
  1253. case tfloatdef(result.def).floattype of
  1254. s32real:
  1255. begin
  1256. paraloc^.loc:=LOC_MMREGISTER;
  1257. paraloc^.register:=newreg(R_MMREGISTER,RS_MM_RESULT_REG,R_SUBMMS);
  1258. paraloc^.size:=OS_F32;
  1259. end;
  1260. s64real:
  1261. begin
  1262. paraloc^.loc:=LOC_MMREGISTER;
  1263. paraloc^.register:=newreg(R_MMREGISTER,RS_MM_RESULT_REG,R_SUBMMD);
  1264. paraloc^.size:=OS_F64;
  1265. end;
  1266. { the first two only exist on targets with an x87, on others
  1267. they are replace by int64 }
  1268. s64currency,
  1269. s64comp,
  1270. s80real,
  1271. sc80real:
  1272. begin
  1273. paraloc^.loc:=LOC_FPUREGISTER;
  1274. paraloc^.register:=NR_FPU_RESULT_REG;
  1275. paraloc^.size:=retcgsize;
  1276. end;
  1277. else
  1278. internalerror(200405034);
  1279. end;
  1280. end
  1281. else
  1282. { Return in register }
  1283. begin
  1284. fillchar(classes,sizeof(classes),0);
  1285. numclasses:=classify_argument(p.proccalloption,result.def,nil,vs_value,result.def.size,classes,0,False);
  1286. { this would mean a memory return }
  1287. if (numclasses=0) then
  1288. internalerror(2010021502);
  1289. if (numclasses > MAX_PARA_CLASSES) then
  1290. internalerror(2010021503);
  1291. intretregidx:=0;
  1292. mmretregidx:=0;
  1293. i := 0;
  1294. { We can't use a for-loop here because the treatment of the SSEUP class requires skipping over i's }
  1295. while i < numclasses do
  1296. begin
  1297. paraloc:=result.add_location;
  1298. paraloc^.def:=classes[i].def;
  1299. case classes[i].typ of
  1300. X86_64_INTEGERSI_CLASS,
  1301. X86_64_INTEGER_CLASS:
  1302. begin
  1303. paraloc^.loc:=LOC_REGISTER;
  1304. paraloc^.register:=intretregs[intretregidx];
  1305. if classes[i].typ=X86_64_INTEGER_CLASS then
  1306. begin
  1307. paraloc^.size:=OS_64;
  1308. if paraloc^.def.size<>8 then
  1309. paraloc^.def:=u64inttype;
  1310. end
  1311. else if result.intsize in [1,2,4] then
  1312. begin
  1313. paraloc^.size:=def_cgsize(paraloc^.def);
  1314. end
  1315. else
  1316. begin
  1317. paraloc^.size:=OS_32;
  1318. if paraloc^.def.size<>4 then
  1319. paraloc^.def:=u32inttype;
  1320. end;
  1321. setsubreg(paraloc^.register,cgsize2subreg(R_INTREGISTER,paraloc^.size));
  1322. inc(intretregidx);
  1323. end;
  1324. X86_64_SSE_CLASS,
  1325. X86_64_SSEUP_CLASS,
  1326. X86_64_SSESF_CLASS,
  1327. X86_64_SSEDF_CLASS:
  1328. begin
  1329. paraloc^.loc:=LOC_MMREGISTER;
  1330. if p.proccalloption = pocall_vectorcall then
  1331. paraloc^.register:=mmretregs_vectorcall[mmretregidx]
  1332. else
  1333. paraloc^.register:=mmretregs[mmretregidx];
  1334. case classes[i].typ of
  1335. X86_64_SSESF_CLASS:
  1336. begin
  1337. setsubreg(paraloc^.register,R_SUBMMS);
  1338. paraloc^.size:=OS_F32;
  1339. end;
  1340. X86_64_SSEDF_CLASS:
  1341. begin
  1342. setsubreg(paraloc^.register,R_SUBMMD);
  1343. paraloc^.size:=OS_F64;
  1344. end;
  1345. X86_64_SSE_CLASS:
  1346. begin
  1347. j := 1;
  1348. if not (x86_64_use_ms_abi(p.proccalloption) and (p.proccalloption <> pocall_vectorcall)) then
  1349. while i + j <= numclasses do
  1350. begin
  1351. if classes[i+j].typ <> X86_64_SSEUP_CLASS then
  1352. Break;
  1353. Inc(j);
  1354. end;
  1355. { j = MM word count }
  1356. Inc(i, j - 1);
  1357. case j of
  1358. 1:
  1359. begin
  1360. setsubreg(paraloc^.register,R_SUBQ);
  1361. paraloc^.size:=OS_M64;
  1362. end;
  1363. 2:
  1364. begin
  1365. setsubreg(paraloc^.register,R_SUBMMX);
  1366. paraloc^.size:=OS_M128;
  1367. end;
  1368. 4:
  1369. begin
  1370. setsubreg(paraloc^.register,R_SUBMMY);
  1371. paraloc^.size:=OS_M256; { Currently unsupported }
  1372. end;
  1373. 8:
  1374. begin
  1375. setsubreg(paraloc^.register,R_SUBMMZ);
  1376. paraloc^.size:=OS_M512; { Currently unsupported }
  1377. end;
  1378. else
  1379. InternalError(2018012901);
  1380. end;
  1381. end;
  1382. else
  1383. if (x86_64_use_ms_abi(p.proccalloption) and (p.proccalloption <> pocall_vectorcall)) then
  1384. begin
  1385. setsubreg(paraloc^.register,R_SUBQ);
  1386. paraloc^.size:=OS_M64;
  1387. end
  1388. else
  1389. { Should not get here }
  1390. InternalError(2018012900);
  1391. end;
  1392. inc(mmretregidx);
  1393. end;
  1394. X86_64_X87_CLASS:
  1395. begin
  1396. { must be followed by X86_64_X87UP_CLASS and that must be
  1397. the last class }
  1398. if (i<>(numclasses-2)) or
  1399. (classes[i+1].typ<>X86_64_X87UP_CLASS) then
  1400. internalerror(2014110401);
  1401. paraloc^.loc:=LOC_FPUREGISTER;
  1402. paraloc^.register:=NR_FPU_RESULT_REG;
  1403. paraloc^.size:=OS_F80;
  1404. break;
  1405. end;
  1406. X86_64_NO_CLASS:
  1407. begin
  1408. { empty record/array }
  1409. if (i<>0) or
  1410. (numclasses<>1) then
  1411. internalerror(2010060302);
  1412. paraloc^.loc:=LOC_VOID;
  1413. paraloc^.def:=voidtype;
  1414. end;
  1415. else
  1416. internalerror(2010021504);
  1417. end;
  1418. Inc(i);
  1419. end;
  1420. end;
  1421. end;
  1422. procedure tcpuparamanager.create_paraloc_info_intern(p : tabstractprocdef; side: tcallercallee;paras:tparalist;
  1423. var intparareg,mmparareg,parasize:longint;varargsparas: boolean);
  1424. var
  1425. hp : tparavarsym;
  1426. fdef,
  1427. paradef : tdef;
  1428. paraloc : pcgparalocation;
  1429. subreg : tsubregister;
  1430. pushaddr : boolean;
  1431. paracgsize : tcgsize;
  1432. { loc[2] onwards are only used for _m256 under vectorcall/SysV, and
  1433. homogeneous vector aggregates and homogeneous float aggreates under
  1434. the vectorcall calling convention. [Kit] }
  1435. loc : tx64paraclasses;
  1436. needintloc,
  1437. needmmloc,
  1438. paralen,
  1439. locidx,
  1440. i,j,
  1441. varalign,
  1442. paraalign : longint;
  1443. use_ms_abi : boolean;
  1444. elementsize: asizeint; { for HVAs and HFAs under vectorcall }
  1445. begin
  1446. paraalign:=get_para_align(p.proccalloption);
  1447. use_ms_abi:=x86_64_use_ms_abi(p.proccalloption);
  1448. { Register parameters are assigned from left to right }
  1449. for i:=0 to paras.count-1 do
  1450. begin
  1451. hp:=tparavarsym(paras[i]);
  1452. paradef:=hp.vardef;
  1453. { on win64, if a record has only one field and that field is a
  1454. single or double, it has to be handled like a single/double }
  1455. if use_ms_abi and
  1456. ((paradef.typ=recorddef) {or
  1457. is_object(paradef)}) and
  1458. tabstractrecordsymtable(tabstractrecorddef(paradef).symtable).has_single_field(fdef) and
  1459. (fdef.typ=floatdef) and
  1460. (tfloatdef(fdef).floattype in [s32real,s64real]) then
  1461. paradef:=fdef;
  1462. pushaddr:=push_addr_param(hp.varspez,paradef,p.proccalloption);
  1463. if pushaddr then
  1464. begin
  1465. loc[0].typ:=X86_64_INTEGER_CLASS;
  1466. loc[1].typ:=X86_64_NO_CLASS;
  1467. paracgsize:=OS_ADDR;
  1468. paralen:=sizeof(pint);
  1469. paradef:=cpointerdef.getreusable_no_free(paradef);
  1470. loc[0].def:=paradef;
  1471. loc[1].def:=nil;
  1472. for j:=2 to high(loc) do
  1473. begin
  1474. loc[j].typ:=X86_64_NO_CLASS;
  1475. loc[j].def:=nil;
  1476. end;
  1477. end
  1478. else
  1479. begin
  1480. getvalueparaloc(p.proccalloption,hp.varspez,paradef,loc);
  1481. paralen:=push_size(hp.varspez,paradef,p.proccalloption);
  1482. if p.proccalloption = pocall_vectorcall then
  1483. begin
  1484. { TODO: Can this set of instructions be put into 'defutil' without it relying on the argument classification? [Kit] }
  1485. { The SIMD vector types have to be OS_M128 etc., not OS_128 etc.}
  1486. case is_simd_vector_type_or_homogeneous_aggregate(pocall_vectorcall,paradef,vs_value) of
  1487. 0:
  1488. { Not a vector or valid aggregate }
  1489. paracgsize:=def_cgsize(paradef);
  1490. 4:
  1491. paracgsize:=OS_F32;
  1492. 8:
  1493. paracgsize:=OS_F64;
  1494. 16:
  1495. paracgsize:=OS_M128;
  1496. 32:
  1497. paracgsize:=OS_M256;
  1498. 64:
  1499. paracgsize:=OS_M512;
  1500. else
  1501. InternalError(2018012910);
  1502. end;
  1503. end
  1504. else
  1505. paracgsize:=def_cgsize(paradef);
  1506. end;
  1507. { cheat for now, we should copy the value to an mm reg as well (FK) }
  1508. if varargsparas and
  1509. use_ms_abi and
  1510. (paradef.typ = floatdef) then
  1511. begin
  1512. loc[1].typ:=X86_64_NO_CLASS;
  1513. if paracgsize=OS_F64 then
  1514. begin
  1515. loc[0].typ:=X86_64_INTEGER_CLASS;
  1516. paracgsize:=OS_64;
  1517. paradef:=u64inttype;
  1518. end
  1519. else
  1520. begin
  1521. loc[0].typ:=X86_64_INTEGERSI_CLASS;
  1522. paracgsize:=OS_32;
  1523. paradef:=u32inttype;
  1524. end;
  1525. loc[0].def:=paradef;
  1526. end;
  1527. hp.paraloc[side].reset;
  1528. hp.paraloc[side].size:=paracgsize;
  1529. hp.paraloc[side].intsize:=paralen;
  1530. hp.paraloc[side].Alignment:=paraalign;
  1531. hp.paraloc[side].def:=paradef;
  1532. if paralen>0 then
  1533. begin
  1534. { Enough registers free? }
  1535. needintloc:=0;
  1536. needmmloc:=0;
  1537. for locidx:=low(loc) to high(loc) do
  1538. case loc[locidx].typ of
  1539. X86_64_INTEGER_CLASS,
  1540. X86_64_INTEGERSI_CLASS:
  1541. inc(needintloc);
  1542. { Note, do NOT include X86_64_SSEUP_CLASS because this links with
  1543. X86_64_SSE_CLASS and we only need one register, not two. [Kit] }
  1544. X86_64_SSE_CLASS,
  1545. X86_64_SSESF_CLASS,
  1546. X86_64_SSEDF_CLASS:
  1547. inc(needmmloc);
  1548. end;
  1549. { the "-1" is because we can also use the current register }
  1550. if (use_ms_abi and
  1551. ((intparareg+needintloc-1 > high(paraintsupregs_winx64)) or
  1552. ((p.proccalloption = pocall_vectorcall) and (mmparareg+needmmloc-1 > high(parammsupregs_vectorcall))) or
  1553. ((p.proccalloption <> pocall_vectorcall) and (mmparareg+needmmloc-1 > high(parammsupregs_winx64))))) or
  1554. (not use_ms_abi and
  1555. ((intparareg+needintloc-1 > high(paraintsupregs)) or
  1556. (mmparareg+needmmloc-1 > high(parammsupregs)))) then
  1557. begin
  1558. { If there are no registers available for any
  1559. eightbyte of an argument, the whole argument is
  1560. passed on the stack. }
  1561. loc[low(loc)].typ:=X86_64_MEMORY_CLASS;
  1562. loc[low(loc)].def:=paradef;
  1563. for locidx:=succ(low(loc)) to high(loc) do
  1564. loc[locidx].typ:=X86_64_NO_CLASS;
  1565. end;
  1566. locidx:=0;
  1567. while (paralen>0) and
  1568. (locidx<=high(loc)) and
  1569. (loc[locidx].typ<>X86_64_NO_CLASS) do
  1570. begin
  1571. { Allocate }
  1572. case loc[locidx].typ of
  1573. X86_64_INTEGER_CLASS,
  1574. X86_64_INTEGERSI_CLASS:
  1575. begin
  1576. paraloc:=hp.paraloc[side].add_location;
  1577. paraloc^.loc:=LOC_REGISTER;
  1578. paraloc^.def:=loc[locidx].def;
  1579. if (paracgsize=OS_NO) or ((locidx<high(loc)) and (loc[locidx+1].typ<>X86_64_NO_CLASS)) then
  1580. begin
  1581. if loc[locidx].typ=X86_64_INTEGER_CLASS then
  1582. begin
  1583. paraloc^.size:=OS_INT;
  1584. paraloc^.def:=u64inttype;
  1585. subreg:=R_SUBWHOLE;
  1586. end
  1587. else
  1588. begin
  1589. paraloc^.size:=OS_32;
  1590. paraloc^.def:=u32inttype;
  1591. subreg:=R_SUBD;
  1592. end;
  1593. end
  1594. else
  1595. begin
  1596. paraloc^.size:=def_cgsize(paraloc^.def);
  1597. { s64comp is pushed in an int register }
  1598. if paraloc^.size=OS_C64 then
  1599. begin
  1600. paraloc^.size:=OS_64;
  1601. paraloc^.def:=u64inttype;
  1602. end;
  1603. subreg:=cgsize2subreg(R_INTREGISTER,paraloc^.size);
  1604. end;
  1605. { winx64 uses different registers }
  1606. if use_ms_abi then
  1607. paraloc^.register:=newreg(R_INTREGISTER,paraintsupregs_winx64[intparareg],subreg)
  1608. else
  1609. paraloc^.register:=newreg(R_INTREGISTER,paraintsupregs[intparareg],subreg);
  1610. { matching mm register must be skipped }
  1611. if use_ms_abi then
  1612. inc(mmparareg);
  1613. inc(intparareg);
  1614. dec(paralen,tcgsize2size[paraloc^.size]);
  1615. end;
  1616. X86_64_SSE_CLASS,
  1617. X86_64_SSESF_CLASS,
  1618. X86_64_SSEDF_CLASS:
  1619. begin
  1620. paraloc:=hp.paraloc[side].add_location;
  1621. paraloc^.loc:=LOC_MMREGISTER;
  1622. paraloc^.def:=loc[locidx].def;
  1623. case loc[locidx].typ of
  1624. X86_64_SSESF_CLASS:
  1625. begin
  1626. subreg:=R_SUBMMS;
  1627. paraloc^.size:=OS_F32;
  1628. end;
  1629. X86_64_SSEDF_CLASS:
  1630. begin
  1631. subreg:=R_SUBMMD;
  1632. paraloc^.size:=OS_F64;
  1633. end;
  1634. X86_64_SSE_CLASS:
  1635. begin
  1636. subreg:=R_SUBQ;
  1637. paraloc^.size:=OS_M64;
  1638. j := 1;
  1639. if not (use_ms_abi and (p.proccalloption <> pocall_vectorcall)) then
  1640. while locidx + j <= high(loc) do
  1641. begin
  1642. if loc[locidx+j].typ <> X86_64_SSEUP_CLASS then
  1643. Break;
  1644. Inc(j);
  1645. end;
  1646. { j = MM word count }
  1647. Inc(locidx, j - 1);
  1648. case j of
  1649. 1:
  1650. begin
  1651. subreg:=R_SUBQ;
  1652. paraloc^.size:=OS_M64;
  1653. end;
  1654. 2:
  1655. begin
  1656. subreg:=R_SUBMMX;
  1657. paraloc^.size:=OS_M128;
  1658. end;
  1659. 4:
  1660. begin
  1661. subreg:=R_SUBMMY;
  1662. paraloc^.size:=OS_M256; { Currently unsupported }
  1663. end;
  1664. 8:
  1665. begin
  1666. subreg:=R_SUBMMZ;
  1667. paraloc^.size:=OS_M512; { Currently unsupported }
  1668. end;
  1669. else
  1670. InternalError(2018012903);
  1671. end;
  1672. end;
  1673. else
  1674. if (use_ms_abi and (p.proccalloption <> pocall_vectorcall)) then
  1675. begin
  1676. subreg:=R_SUBQ;
  1677. paraloc^.size:=OS_M64;
  1678. end
  1679. else
  1680. { Should not get here }
  1681. InternalError(2018012902);
  1682. end;
  1683. { winx64 uses different registers }
  1684. if use_ms_abi then
  1685. begin
  1686. if p.proccalloption = pocall_vectorcall then
  1687. paraloc^.register:=newreg(R_MMREGISTER,parammsupregs_vectorcall[mmparareg],subreg)
  1688. else
  1689. paraloc^.register:=newreg(R_MMREGISTER,parammsupregs_winx64[mmparareg],subreg);
  1690. end
  1691. else
  1692. paraloc^.register:=newreg(R_MMREGISTER,parammsupregs[mmparareg],subreg);
  1693. { matching int register must be skipped }
  1694. if use_ms_abi then
  1695. inc(intparareg);
  1696. inc(mmparareg);
  1697. dec(paralen,tcgsize2size[paraloc^.size]);
  1698. end;
  1699. X86_64_MEMORY_CLASS :
  1700. begin
  1701. paraloc:=hp.paraloc[side].add_location;
  1702. paraloc^.loc:=LOC_REFERENCE;
  1703. paraloc^.def:=loc[locidx].def;
  1704. {Hack alert!!! We should modify int_cgsize to handle OS_128,
  1705. however, since int_cgsize is called in many places in the
  1706. compiler where only a few can already handle OS_128, fixing it
  1707. properly is out of the question to release 2.2.0 in time. (DM)}
  1708. if paracgsize=OS_128 then
  1709. if paralen=8 then
  1710. paraloc^.size:=OS_64
  1711. else if paralen=16 then
  1712. paraloc^.size:=OS_128
  1713. else
  1714. internalerror(200707143)
  1715. else if paracgsize in [OS_F32,OS_F64,OS_F80,OS_F128] then
  1716. paraloc^.size:=int_float_cgsize(paralen)
  1717. else
  1718. paraloc^.size:=int_cgsize(paralen);
  1719. if side=callerside then
  1720. paraloc^.reference.index:=NR_STACK_POINTER_REG
  1721. else
  1722. paraloc^.reference.index:=NR_FRAME_POINTER_REG;
  1723. varalign:=used_align(size_2_align(paralen),paraalign,paraalign);
  1724. paraloc^.reference.offset:=parasize;
  1725. parasize:=align(parasize+paralen,varalign);
  1726. paralen:=0;
  1727. end;
  1728. else
  1729. internalerror(2010053113);
  1730. end;
  1731. inc(locidx);
  1732. end;
  1733. end
  1734. else
  1735. begin
  1736. paraloc:=hp.paraloc[side].add_location;
  1737. paraloc^.loc:=LOC_VOID;
  1738. paraloc^.def:=paradef;
  1739. end;
  1740. end;
  1741. { Register parameters are assigned from left-to-right, but the
  1742. offsets on the stack are right-to-left. There is no need
  1743. to reverse the offset, only adapt the calleeside with the
  1744. start offset of the first param on the stack }
  1745. if side=calleeside then
  1746. begin
  1747. for i:=0 to paras.count-1 do
  1748. begin
  1749. hp:=tparavarsym(paras[i]);
  1750. paraloc:=hp.paraloc[side].location;
  1751. while paraloc<>nil do
  1752. begin
  1753. with paraloc^ do
  1754. if (loc=LOC_REFERENCE) then
  1755. inc(reference.offset,target_info.first_parm_offset);
  1756. paraloc:=paraloc^.next;
  1757. end;
  1758. end;
  1759. end;
  1760. end;
  1761. function tcpuparamanager.create_varargs_paraloc_info(p : tabstractprocdef; varargspara:tvarargsparalist):longint;
  1762. var
  1763. intparareg,mmparareg,
  1764. parasize : longint;
  1765. begin
  1766. intparareg:=0;
  1767. mmparareg:=0;
  1768. if x86_64_use_ms_abi(p.proccalloption) then
  1769. parasize:=4*8
  1770. else
  1771. parasize:=0;
  1772. { calculate the registers for the normal parameters }
  1773. create_paraloc_info_intern(p,callerside,p.paras,intparareg,mmparareg,parasize,false);
  1774. { append the varargs }
  1775. create_paraloc_info_intern(p,callerside,varargspara,intparareg,mmparareg,parasize,true);
  1776. { store used no. of SSE registers, that needs to be passed in %AL }
  1777. varargspara.mmregsused:=mmparareg;
  1778. result:=parasize;
  1779. end;
  1780. function tcpuparamanager.create_paraloc_info(p : tabstractprocdef; side: tcallercallee):longint;
  1781. var
  1782. intparareg,mmparareg,
  1783. parasize : longint;
  1784. begin
  1785. intparareg:=0;
  1786. mmparareg:=0;
  1787. if x86_64_use_ms_abi(p.proccalloption) then
  1788. parasize:=4*8
  1789. else
  1790. parasize:=0;
  1791. create_paraloc_info_intern(p,side,p.paras,intparareg,mmparareg,parasize,false);
  1792. { Create Function result paraloc }
  1793. create_funcretloc_info(p,side);
  1794. { We need to return the size allocated on the stack }
  1795. result:=parasize;
  1796. end;
  1797. begin
  1798. paramanager:=tcpuparamanager.create;
  1799. end.