cpupara.pas 46 KB


  1. {
  2. Copyright (c) 2002 by Florian Klaempfl
  3. Generates the argument location information for x86-64 target
  4. This program is free software; you can redistribute it and/or modify
  5. it under the terms of the GNU General Public License as published by
  6. the Free Software Foundation; either version 2 of the License, or
  7. (at your option) any later version.
  8. This program is distributed in the hope that it will be useful,
  9. but WITHOUT ANY WARRANTY; without even the implied warranty of
  10. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  11. GNU General Public License for more details.
  12. You should have received a copy of the GNU General Public License
  13. along with this program; if not, write to the Free Software
  14. Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  15. ****************************************************************************
  16. }
  17. unit cpupara;
  18. {$i fpcdefs.inc}
  19. interface
  20. uses
  21. globtype,
  22. cpubase,cgbase,cgutils,
  23. symconst,symtype,symsym,symdef,
  24. aasmtai,aasmdata,
  25. parabase,paramgr;
  26. type
  27. tx86_64paramanager = class(tparamanager)
  28. private
  29. procedure create_funcretloc_info(p : tabstractprocdef; side: tcallercallee);
  30. procedure create_paraloc_info_intern(p : tabstractprocdef; side: tcallercallee;paras:tparalist;
  31. var intparareg,mmparareg,parasize:longint;varargsparas: boolean);
  32. public
  33. function param_use_paraloc(const cgpara:tcgpara):boolean;override;
  34. function push_addr_param(varspez:tvarspez;def : tdef;calloption : tproccalloption) : boolean;override;
  35. function ret_in_param(def : tdef;calloption : tproccalloption) : boolean;override;
  36. procedure getintparaloc(calloption : tproccalloption; nr : longint;var cgpara:TCGPara);override;
  37. function get_volatile_registers_int(calloption : tproccalloption):tcpuregisterset;override;
  38. function get_volatile_registers_mm(calloption : tproccalloption):tcpuregisterset;override;
  39. function get_volatile_registers_fpu(calloption : tproccalloption):tcpuregisterset;override;
  40. function create_paraloc_info(p : tabstractprocdef; side: tcallercallee):longint;override;
  41. function create_varargs_paraloc_info(p : tabstractprocdef; varargspara:tvarargsparalist):longint;override;
  42. function get_funcretloc(p : tabstractprocdef; side: tcallercallee; def: tdef): tcgpara;override;
  43. end;
  44. implementation
  45. uses
  46. cutils,verbose,
  47. systems,
  48. defutil,
  49. symtable;
  50. const
  51. paraintsupregs : array[0..5] of tsuperregister = (RS_RDI,RS_RSI,RS_RDX,RS_RCX,RS_R8,RS_R9);
  52. parammsupregs : array[0..7] of tsuperregister = (RS_XMM0,RS_XMM1,RS_XMM2,RS_XMM3,RS_XMM4,RS_XMM5,RS_XMM6,RS_XMM7);
  53. paraintsupregs_winx64 : array[0..3] of tsuperregister = (RS_RCX,RS_RDX,RS_R8,RS_R9);
  54. parammsupregs_winx64 : array[0..3] of tsuperregister = (RS_XMM0,RS_XMM1,RS_XMM2,RS_XMM3);
  55. {
  56. The argument classification code largely comes from libffi:
  57. ffi64.c - Copyright (c) 2002, 2007 Bo Thorsen <[email protected]>
  58. Copyright (c) 2008 Red Hat, Inc.
  59. x86-64 Foreign Function Interface
  60. Permission is hereby granted, free of charge, to any person obtaining
  61. a copy of this software and associated documentation files (the
  62. ``Software''), to deal in the Software without restriction, including
  63. without limitation the rights to use, copy, modify, merge, publish,
  64. distribute, sublicense, and/or sell copies of the Software, and to
  65. permit persons to whom the Software is furnished to do so, subject to
  66. the following conditions:
  67. The above copyright notice and this permission notice shall be included
  68. in all copies or substantial portions of the Software.
  69. THE SOFTWARE IS PROVIDED ``AS IS'', WITHOUT WARRANTY OF ANY KIND,
  70. EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  71. MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
  72. NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
  73. HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
  74. WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  75. OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
  76. DEALINGS IN THE SOFTWARE.
  77. ----------------------------------------------------------------------- *)
  78. }
  79. const
  80. MAX_PARA_CLASSES = 4;
  81. type
  82. tx64paraclass = (
  83. X86_64_NO_CLASS,
  84. X86_64_INTEGER_CLASS,X86_64_INTEGERSI_CLASS,
  85. X86_64_SSE_CLASS,X86_64_SSESF_CLASS,X86_64_SSEDF_CLASS,X86_64_SSEUP_CLASS,
  86. X86_64_X87_CLASS,X86_64_X87UP_CLASS,
  87. X86_64_COMPLEX_X87_CLASS,
  88. X86_64_MEMORY_CLASS
  89. );
  90. tx64paraclasses = array[0..MAX_PARA_CLASSES-1] of tx64paraclass;
  91. { Win64-specific helper }
  92. function aggregate_in_registers_win64(varspez:tvarspez;size:longint):boolean;
  93. begin
  94. { TODO: Temporary hack: vs_const parameters are always passed by reference for win64}
  95. result:=(varspez=vs_value) and (size in [1,2,4,8])
  96. end;
  97. (* x86-64 register passing implementation. See x86-64 ABI for details. Goal
  98. of this code is to classify each 8bytes of incoming argument by the register
  99. class and assign registers accordingly. *)
  100. (* Return the union class of CLASS1 and CLASS2.
  101. See the x86-64 PS ABI for details. *)
  102. function merge_classes(class1, class2: tx64paraclass): tx64paraclass;
  103. begin
  104. (* Rule #1: If both classes are equal, this is the resulting class. *)
  105. if (class1=class2) then
  106. exit(class1);
  107. (* Rule #2: If one of the classes is NO_CLASS, the resulting class is
  108. the other class. *)
  109. if (class1=X86_64_NO_CLASS) then
  110. exit(class2);
  111. if (class2=X86_64_NO_CLASS) then
  112. exit(class1);
  113. (* Rule #3: If one of the classes is MEMORY, the result is MEMORY. *)
  114. if (class1=X86_64_MEMORY_CLASS) or
  115. (class2=X86_64_MEMORY_CLASS) then
  116. exit(X86_64_MEMORY_CLASS);
  117. (* Rule #4: If one of the classes is INTEGER, the result is INTEGER. *)
  118. { 32 bit }
  119. if ((class1=X86_64_INTEGERSI_CLASS) and
  120. (class2=X86_64_SSESF_CLASS)) or
  121. ((class2=X86_64_INTEGERSI_CLASS) and
  122. (class1=X86_64_SSESF_CLASS)) then
  123. exit(X86_64_INTEGERSI_CLASS);
  124. { 64 bit }
  125. if (class1 in [X86_64_INTEGER_CLASS,X86_64_INTEGERSI_CLASS]) or
  126. (class2 in [X86_64_INTEGER_CLASS,X86_64_INTEGERSI_CLASS]) then
  127. exit(X86_64_INTEGER_CLASS);
  128. (* Rule #5: If one of the classes is X87, X87UP, or COMPLEX_X87 class,
  129. MEMORY is used. *)
  130. if (class1 in [X86_64_X87_CLASS,X86_64_X87UP_CLASS,X86_64_COMPLEX_X87_CLASS]) or
  131. (class2 in [X86_64_X87_CLASS,X86_64_X87UP_CLASS,X86_64_COMPLEX_X87_CLASS]) then
  132. exit(X86_64_MEMORY_CLASS);
  133. (* Rule #6: Otherwise class SSE is used. *)
  134. result:=X86_64_SSE_CLASS;
  135. end;
  136. (* Classify the argument of type TYPE and mode MODE.
  137. CLASSES will be filled by the register class used to pass each word
  138. of the operand. The number of words is returned. In case the parameter
  139. should be passed in memory, 0 is returned. As a special case for zero
  140. sized containers, classes[0] will be NO_CLASS and 1 is returned.
  141. real_size contains either def.size, or a value derived from
  142. def.bitpackedsize and the field offset denoting the number of bytes
  143. spanned by a bitpacked field
  144. See the x86-64 PS ABI for details.
  145. *)
  146. function classify_as_integer_argument(real_size: aint; var classes: tx64paraclasses; byte_offset: aint): longint;
  147. var
  148. size: aint;
  149. begin
  150. size:=byte_offset+real_size;
  151. if size<=4 then
  152. classes[0]:=X86_64_INTEGERSI_CLASS
  153. else
  154. classes[0]:=X86_64_INTEGER_CLASS;
  155. if size<=8 then
  156. result:=1
  157. else
  158. begin
  159. if size<=12 then
  160. classes[1]:=X86_64_INTEGERSI_CLASS
  161. else if (size<=16) then
  162. classes[1]:=X86_64_INTEGER_CLASS
  163. else
  164. internalerror(2010021401);
  165. result:=2;
  166. end
  167. end;
  168. function classify_argument(def: tdef; varspez: tvarspez; real_size: aint; var classes: tx64paraclasses; byte_offset: aint): longint; forward;
  169. function init_aggregate_classification(def: tdef; varspez: tvarspez; out words: longint; out classes: tx64paraclasses): longint;
  170. var
  171. i: longint;
  172. begin
  173. words:=0;
  174. { win64 follows a different convention here }
  175. if (target_info.system=system_x86_64_win64) then
  176. begin
  177. if aggregate_in_registers_win64(varspez,def.size) then
  178. begin
  179. classes[0]:=X86_64_INTEGER_CLASS;
  180. result:=1;
  181. end
  182. else
  183. result:=0;
  184. exit;
  185. end;
  186. (* If the struct is larger than 32 bytes, pass it on the stack. *)
  187. if def.size > 32 then
  188. exit(0);
  189. words:=(def.size+7) div 8;
  190. (* Zero sized arrays or structures are NO_CLASS. We return 0 to
  191. signal memory class, so handle it as special case. *)
  192. if (words=0) then
  193. begin
  194. classes[0]:=X86_64_NO_CLASS;
  195. exit(1);
  196. end;
  197. { we'll be merging the classes elements with the subclasses
  198. elements, so initialise them first }
  199. for i:=low(classes) to high(classes) do
  200. classes[i]:=X86_64_NO_CLASS;
  201. result:=words;
  202. end;
  203. function classify_aggregate_element(def: tdef; varspez: tvarspez; real_size: aint; var classes: tx64paraclasses; new_byte_offset: aint): longint;
  204. var
  205. subclasses: tx64paraclasses;
  206. i,
  207. pos: longint;
  208. begin
  209. result:=classify_argument(def,varspez,real_size,subclasses,new_byte_offset mod 8);
  210. if (result=0) then
  211. exit;
  212. pos:=new_byte_offset div 8;
  213. if result-1+pos>high(classes) then
  214. internalerror(2010053108);
  215. for i:=0 to result-1 do
  216. begin
  217. classes[i+pos] :=
  218. merge_classes(subclasses[i],classes[i+pos]);
  219. end;
  220. end;
  221. function finalize_aggregate_classification(def: tdef; words: longint; var classes: tx64paraclasses): longint;
  222. var
  223. i: longint;
  224. begin
  225. if (words>2) then
  226. begin
  227. (* When size > 16 bytes, if the first one isn't
  228. X86_64_SSE_CLASS or any other ones aren't
  229. X86_64_SSEUP_CLASS, everything should be passed in
  230. memory. *)
  231. if (classes[0]<>X86_64_SSE_CLASS) then
  232. exit(0);
  233. for i:=1 to words-1 do
  234. if (classes[i]<>X86_64_SSEUP_CLASS) then
  235. exit(0);
  236. end;
  237. (* Final merger cleanup. *)
  238. (* The first one must never be X86_64_SSEUP_CLASS or
  239. X86_64_X87UP_CLASS. *)
  240. if (classes[0]=X86_64_SSEUP_CLASS) or
  241. (classes[0]=X86_64_X87UP_CLASS) then
  242. internalerror(2010021402);
  243. for i:=0 to words-1 do
  244. begin
  245. (* If one class is MEMORY, everything should be passed in
  246. memory. *)
  247. if (classes[i]=X86_64_MEMORY_CLASS) then
  248. exit(0);
  249. (* The X86_64_SSEUP_CLASS should be always preceded by
  250. X86_64_SSE_CLASS or X86_64_SSEUP_CLASS. *)
  251. if (classes[i]=X86_64_SSEUP_CLASS) and
  252. (classes[i-1]<>X86_64_SSE_CLASS) and
  253. (classes[i-1]<>X86_64_SSEUP_CLASS) then
  254. classes[i]:=X86_64_SSE_CLASS;
  255. (* If X86_64_X87UP_CLASS isn't preceded by X86_64_X87_CLASS,
  256. everything should be passed in memory. *)
  257. if (classes[i]=X86_64_X87UP_CLASS) and
  258. (classes[i-1]<>X86_64_X87_CLASS) then
  259. exit(0);
  260. end;
  261. { FIXME: in case a record contains empty padding space, e.g. a
  262. "single" field followed by a "double", then we have a problem
  263. because the cgpara helpers cannot figure out that they should
  264. skip 4 bytes after storing the single (LOC_MMREGISTER with size
  265. OS_F32) to memory before storing the double -> for now scale
  266. such locations always up to 64 bits, although this loads/stores
  267. some superfluous data }
  268. { 1) the first part is 32 bit while there is still a second part }
  269. if (classes[1]<>X86_64_NO_CLASS) then
  270. case classes[0] of
  271. X86_64_INTEGERSI_CLASS:
  272. classes[0]:=X86_64_INTEGER_CLASS;
  273. X86_64_SSESF_CLASS:
  274. classes[0]:=X86_64_SSE_CLASS;
  275. end;
  276. { 2) the second part is 32 bit, but the total size is > 12 bytes }
  277. if (def.size>12) then
  278. case classes[1] of
  279. X86_64_INTEGERSI_CLASS:
  280. classes[1]:=X86_64_INTEGER_CLASS;
  281. X86_64_SSESF_CLASS:
  282. classes[1]:=X86_64_SSE_CLASS;
  283. end;
  284. result:=words;
  285. end;
  286. function classify_record(def: tdef; varspez: tvarspez; var classes: tx64paraclasses; byte_offset: aint): longint;
  287. var
  288. vs: tfieldvarsym;
  289. size,
  290. new_byte_offset: aint;
  291. i,
  292. words,
  293. num: longint;
  294. begin
  295. result:=init_aggregate_classification(def,varspez,words,classes);
  296. if (words=0) then
  297. exit;
  298. (* Merge the fields of the structure. *)
  299. for i:=0 to tabstractrecorddef(def).symtable.symlist.count-1 do
  300. begin
  301. if tsym(tabstractrecorddef(def).symtable.symlist[i]).typ<>fieldvarsym then
  302. continue;
  303. vs:=tfieldvarsym(tabstractrecorddef(def).symtable.symlist[i]);
  304. num:=-1;
  305. if not tabstractrecordsymtable(tabstractrecorddef(def).symtable).is_packed then
  306. begin
  307. new_byte_offset:=byte_offset+vs.fieldoffset;
  308. size:=vs.vardef.size;
  309. end
  310. else
  311. begin
  312. new_byte_offset:=byte_offset+vs.fieldoffset div 8;
  313. if (vs.vardef.typ in [orddef,enumdef]) then
  314. { calculate the number of bytes spanned by
  315. this bitpacked field }
  316. size:=((vs.fieldoffset+vs.vardef.packedbitsize+7) div 8)-(vs.fieldoffset div 8)
  317. else
  318. size:=vs.vardef.size
  319. end;
  320. num:=classify_aggregate_element(vs.vardef,varspez,size,classes,new_byte_offset);
  321. if (num=0) then
  322. exit(0);
  323. end;
  324. result:=finalize_aggregate_classification(def,words,classes);
  325. end;
  326. function classify_normal_array(def: tarraydef; varspez: tvarspez; var classes: tx64paraclasses; byte_offset: aint): longint;
  327. var
  328. i, elecount: aword;
  329. size,
  330. elesize,
  331. new_byte_offset,
  332. bitoffset: aint;
  333. words,
  334. num: longint;
  335. isbitpacked: boolean;
  336. begin
  337. result:=init_aggregate_classification(def,varspez,words,classes);
  338. if (words=0) then
  339. exit;
  340. isbitpacked:=is_packed_array(def);
  341. if not isbitpacked then
  342. begin
  343. elesize:=def.elesize;
  344. size:=elesize;
  345. end
  346. else
  347. begin
  348. elesize:=def.elepackedbitsize;
  349. bitoffset:=0;
  350. end;
  351. (* Merge the elements of the array. *)
  352. i:=0;
  353. elecount:=def.elecount;
  354. repeat
  355. if not isbitpacked then
  356. begin
  357. { size does not change }
  358. new_byte_offset:=byte_offset+i*elesize;
  359. end
  360. else
  361. begin
  362. { calculate the number of bytes spanned by this bitpacked
  363. element }
  364. size:=((bitoffset+elesize+7) div 8)-(bitoffset div 8);
  365. new_byte_offset:=byte_offset+(elesize*i) div 8;
  366. { bit offset of next element }
  367. inc(bitoffset,elesize);
  368. end;
  369. num:=classify_aggregate_element(def.elementdef,varspez,size,classes,new_byte_offset);
  370. if (num=0) then
  371. exit(0);
  372. inc(i);
  373. until (i=elecount);
  374. result:=finalize_aggregate_classification(def,words,classes);
  375. end;
  376. function classify_argument(def: tdef; varspez: tvarspez; real_size: aint; var classes: tx64paraclasses; byte_offset: aint): longint;
  377. begin
  378. case def.typ of
  379. orddef,
  380. enumdef,
  381. pointerdef,
  382. classrefdef:
  383. result:=classify_as_integer_argument(real_size,classes,byte_offset);
  384. formaldef:
  385. result:=classify_as_integer_argument(voidpointertype.size,classes,byte_offset);
  386. floatdef:
  387. begin
  388. case tfloatdef(def).floattype of
  389. s32real:
  390. begin
  391. if byte_offset=0 then
  392. classes[0]:=X86_64_SSESF_CLASS
  393. else
  394. { if we have e.g. a record with two successive "single"
  395. fields, we need a 64 bit rather than a 32 bit load }
  396. classes[0]:=X86_64_SSE_CLASS;
  397. result:=1;
  398. end;
  399. s64real:
  400. begin
  401. classes[0]:=X86_64_SSEDF_CLASS;
  402. result:=1;
  403. end;
  404. s80real,
  405. sc80real:
  406. begin
  407. classes[0]:=X86_64_X87_CLASS;
  408. classes[1]:=X86_64_X87UP_CLASS;
  409. result:=2;
  410. end;
  411. s64comp,
  412. s64currency:
  413. begin
  414. classes[0]:=X86_64_INTEGER_CLASS;
  415. result:=1;
  416. end;
  417. s128real:
  418. begin
  419. classes[0]:=X86_64_SSE_CLASS;
  420. classes[1]:=X86_64_SSEUP_CLASS;
  421. result:=2;
  422. end;
  423. else
  424. internalerror(2010060301);
  425. end;
  426. end;
  427. recorddef:
  428. result:=classify_record(def,varspez,classes,byte_offset);
  429. objectdef:
  430. begin
  431. if is_object(def) then
  432. { pass by reference, like ppc and i386 }
  433. result:=0
  434. else
  435. { all kinds of pointer types: class, objcclass, interface, ... }
  436. result:=classify_as_integer_argument(voidpointertype.size,classes,byte_offset);
  437. end;
  438. setdef:
  439. begin
  440. if is_smallset(def) then
  441. result:=classify_as_integer_argument(def.size,classes,byte_offset)
  442. else
  443. result:=0;
  444. end;
  445. stringdef:
  446. begin
  447. if (tstringdef(def).stringtype in [st_shortstring,st_longstring]) then
  448. result:=0
  449. else
  450. result:=classify_as_integer_argument(def.size,classes,byte_offset);
  451. end;
  452. arraydef:
  453. begin
  454. { a dynamic array is treated like a pointer }
  455. if is_dynamic_array(def) then
  456. result:=classify_as_integer_argument(voidpointertype.size,classes,byte_offset)
  457. { other special arrays are passed on the stack }
  458. else if is_open_array(def) or
  459. is_array_of_const(def) then
  460. result:=0
  461. else
  462. { normal array }
  463. result:=classify_normal_array(tarraydef(def),varspez,classes,byte_offset);
  464. end;
  465. { the file record is definitely too big }
  466. filedef:
  467. result:=0;
  468. procvardef:
  469. begin
  470. if (po_methodpointer in tprocvardef(def).procoptions) then
  471. begin
  472. { treat as TMethod record }
  473. def:=search_system_type('TMETHOD').typedef;
  474. result:=classify_argument(def,varspez,def.size,classes,byte_offset);
  475. end
  476. else
  477. { pointer }
  478. result:=classify_as_integer_argument(def.size,classes,byte_offset);
  479. end;
  480. variantdef:
  481. begin
  482. { same as tvardata record }
  483. def:=search_system_type('TVARDATA').typedef;
  484. result:=classify_argument(def,varspez,def.size,classes,byte_offset);
  485. end;
  486. else
  487. internalerror(2010021405);
  488. end;
  489. end;
  490. procedure getvalueparaloc(varspez:tvarspez;def:tdef;var loc1,loc2:tx64paraclass);
  491. var
  492. size: aint;
  493. i: longint;
  494. classes: tx64paraclasses;
  495. numclasses: longint;
  496. begin
  497. { init the classes array, because even if classify_argument inits only
  498. one element we copy both to loc1/loc2 in case "1" is returned }
  499. for i:=low(classes) to high(classes) do
  500. classes[i]:=X86_64_NO_CLASS;
  501. { def.size internalerrors for open arrays and dynamic arrays, since
  502. their size cannot be determined at compile-time.
  503. classify_argument does not look at the realsize argument for arrays
  504. cases, but we obviously do have to pass something... }
  505. if is_special_array(def) then
  506. size:=-1
  507. else
  508. size:=def.size;
  509. numclasses:=classify_argument(def,varspez,size,classes,0);
  510. case numclasses of
  511. 0:
  512. begin
  513. loc1:=X86_64_MEMORY_CLASS;
  514. loc2:=X86_64_NO_CLASS;
  515. end;
  516. 1,2:
  517. begin
  518. { If the class is X87, X87UP or COMPLEX_X87, it is passed in memory }
  519. if classes[0] in [X86_64_X87_CLASS,X86_64_X87UP_CLASS,X86_64_COMPLEX_X87_CLASS] then
  520. classes[0]:=X86_64_MEMORY_CLASS;
  521. if classes[1] in [X86_64_X87_CLASS,X86_64_X87UP_CLASS,X86_64_COMPLEX_X87_CLASS] then
  522. classes[1]:=X86_64_MEMORY_CLASS;
  523. loc1:=classes[0];
  524. loc2:=classes[1];
  525. end
  526. else
  527. { 4 can only happen for _m256 vectors, not yet supported }
  528. internalerror(2010021501);
  529. end;
  530. end;
  531. function tx86_64paramanager.ret_in_param(def : tdef;calloption : tproccalloption) : boolean;
  532. var
  533. classes: tx64paraclasses;
  534. numclasses: longint;
  535. begin
  536. if ((target_info.system=system_x86_64_win64) and
  537. (calloption=pocall_safecall)) then
  538. exit(true);
  539. case def.typ of
  540. { for records it depends on their contents and size }
  541. recorddef,
  542. { make sure we handle 'procedure of object' correctly }
  543. procvardef:
  544. begin
  545. numclasses:=classify_argument(def,vs_value,def.size,classes,0);
  546. result:=(numclasses=0);
  547. end;
  548. else
  549. result:=inherited ret_in_param(def,calloption);
  550. end;
  551. end;
  552. function tx86_64paramanager.param_use_paraloc(const cgpara:tcgpara):boolean;
  553. var
  554. paraloc : pcgparalocation;
  555. begin
  556. if not assigned(cgpara.location) then
  557. internalerror(200410102);
  558. result:=true;
  559. { All locations are LOC_REFERENCE }
  560. paraloc:=cgpara.location;
  561. while assigned(paraloc) do
  562. begin
  563. if (paraloc^.loc<>LOC_REFERENCE) then
  564. begin
  565. result:=false;
  566. exit;
  567. end;
  568. paraloc:=paraloc^.next;
  569. end;
  570. end;
  571. { true if a parameter is too large to copy and only the address is pushed }
  572. function tx86_64paramanager.push_addr_param(varspez:tvarspez;def : tdef;calloption : tproccalloption) : boolean;
  573. var
  574. classes: tx64paraclasses;
  575. numclasses: longint;
  576. begin
  577. result:=false;
  578. { var,out,constref always require address }
  579. if varspez in [vs_var,vs_out,vs_constref] then
  580. begin
  581. result:=true;
  582. exit;
  583. end;
  584. { Only vs_const, vs_value here }
  585. case def.typ of
  586. formaldef :
  587. result:=true;
  588. recorddef :
  589. begin
  590. { MetroWerks Pascal: const records always passed by reference
  591. (for Mac OS X interfaces) }
  592. if (calloption=pocall_mwpascal) and
  593. (varspez=vs_const) then
  594. result:=true
  595. { Win ABI depends on size to pass it in a register or not }
  596. else if (target_info.system=system_x86_64_win64) then
  597. result:=not aggregate_in_registers_win64(varspez,def.size)
  598. { pass constant parameters that would be passed via memory by
  599. reference for non-cdecl/cppdecl, and make sure that the tmethod
  600. record (size=16) is passed the same way as a complex procvar }
  601. else if ((varspez=vs_const) and
  602. not(calloption in [pocall_cdecl,pocall_cppdecl])) or
  603. (def.size=16) then
  604. begin
  605. numclasses:=classify_argument(def,vs_value,def.size,classes,0);
  606. result:=numclasses=0;
  607. end
  608. else
  609. { SysV ABI always passes it as value parameter }
  610. result:=false;
  611. end;
  612. arraydef :
  613. begin
  614. { cdecl array of const need to be ignored and therefor be puhsed
  615. as value parameter with length 0 }
  616. if ((calloption in [pocall_cdecl,pocall_cppdecl]) and
  617. is_array_of_const(def)) or
  618. is_dynamic_array(def) then
  619. result:=false
  620. else
  621. { pass all arrays by reference to be compatible with C (passing
  622. an array by value (= copying it on the stack) does not exist,
  623. because an array is the same as a pointer there }
  624. result:=true
  625. end;
  626. objectdef :
  627. begin
  628. { don't treat objects like records, because we only know wheter
  629. or not they'll have a VMT after the entire object is parsed
  630. -> if they are used as function result from one of their own
  631. methods, their size can still change after we've determined
  632. whether this function result should be returned by reference or
  633. by value }
  634. if is_object(def) then
  635. result:=true;
  636. end;
  637. variantdef,
  638. stringdef,
  639. procvardef,
  640. setdef :
  641. begin
  642. numclasses:=classify_argument(def,vs_value,def.size,classes,0);
  643. result:=numclasses=0;
  644. end;
  645. end;
  646. end;
  647. function tx86_64paramanager.get_volatile_registers_int(calloption : tproccalloption):tcpuregisterset;
  648. begin
  649. if target_info.system=system_x86_64_win64 then
  650. result:=[RS_RAX,RS_RCX,RS_RDX,RS_R8,RS_R9,RS_R10,RS_R11]
  651. else
  652. result:=[RS_RAX,RS_RCX,RS_RDX,RS_RSI,RS_RDI,RS_R8,RS_R9,RS_R10,RS_R11];
  653. end;
  654. function tx86_64paramanager.get_volatile_registers_mm(calloption : tproccalloption):tcpuregisterset;
  655. begin
  656. if target_info.system=system_x86_64_win64 then
  657. result:=[RS_XMM0..RS_XMM5]
  658. else
  659. result:=[RS_XMM0..RS_XMM15];
  660. end;
  661. function tx86_64paramanager.get_volatile_registers_fpu(calloption : tproccalloption):tcpuregisterset;
  662. begin
  663. result:=[RS_ST0..RS_ST7];
  664. end;
  665. procedure tx86_64paramanager.getintparaloc(calloption : tproccalloption; nr : longint;var cgpara:TCGPara);
  666. var
  667. paraloc : pcgparalocation;
  668. begin
  669. cgpara.reset;
  670. cgpara.size:=OS_ADDR;
  671. cgpara.intsize:=sizeof(pint);
  672. cgpara.alignment:=get_para_align(calloption);
  673. paraloc:=cgpara.add_location;
  674. with paraloc^ do
  675. begin
  676. size:=OS_INT;
  677. if target_info.system=system_x86_64_win64 then
  678. begin
  679. if nr<1 then
  680. internalerror(200304303)
  681. else if nr<=high(paraintsupregs_winx64)+1 then
  682. begin
  683. loc:=LOC_REGISTER;
  684. register:=newreg(R_INTREGISTER,paraintsupregs_winx64[nr-1],R_SUBWHOLE);
  685. end
  686. else
  687. begin
  688. loc:=LOC_REFERENCE;
  689. reference.index:=NR_STACK_POINTER_REG;
  690. reference.offset:=(nr-6)*sizeof(aint);
  691. end;
  692. end
  693. else
  694. begin
  695. if nr<1 then
  696. internalerror(200304303)
  697. else if nr<=high(paraintsupregs)+1 then
  698. begin
  699. loc:=LOC_REGISTER;
  700. register:=newreg(R_INTREGISTER,paraintsupregs[nr-1],R_SUBWHOLE);
  701. end
  702. else
  703. begin
  704. loc:=LOC_REFERENCE;
  705. reference.index:=NR_STACK_POINTER_REG;
  706. reference.offset:=(nr-6)*sizeof(aint);
  707. end;
  708. end;
  709. end;
  710. end;
  711. procedure tx86_64paramanager.create_funcretloc_info(p : tabstractprocdef; side: tcallercallee);
  712. begin
  713. p.funcretloc[side]:=get_funcretloc(p,side,p.returndef);
  714. end;
  715. function tx86_64paramanager.get_funcretloc(p : tabstractprocdef; side: tcallercallee; def: tdef): tcgpara;
  716. const
  717. intretregs: array[0..1] of tregister = (NR_FUNCTION_RETURN_REG,NR_FUNCTION_RETURN_REG_HIGH);
  718. mmretregs: array[0..1] of tregister = (NR_MM_RESULT_REG,NR_MM_RESULT_REG_HIGH);
  719. var
  720. classes: tx64paraclasses;
  721. i,
  722. numclasses: longint;
  723. intretregidx,
  724. mmretregidx: longint;
  725. retcgsize : tcgsize;
  726. paraloc : pcgparalocation;
  727. begin
  728. result.init;
  729. result.alignment:=get_para_align(p.proccalloption);
  730. { void has no location }
  731. if is_void(def) then
  732. begin
  733. paraloc:=result.add_location;
  734. result.size:=OS_NO;
  735. result.intsize:=0;
  736. paraloc^.size:=OS_NO;
  737. paraloc^.loc:=LOC_VOID;
  738. exit;
  739. end;
  740. { Constructors return self instead of a boolean }
  741. if (p.proctypeoption=potype_constructor) then
  742. begin
  743. retcgsize:=OS_ADDR;
  744. result.intsize:=sizeof(pint);
  745. end
  746. else
  747. begin
  748. retcgsize:=def_cgsize(def);
  749. result.intsize:=def.size;
  750. end;
  751. result.size:=retcgsize;
  752. { Return is passed as var parameter }
  753. if ret_in_param(def,p.proccalloption) then
  754. begin
  755. paraloc:=result.add_location;
  756. paraloc^.loc:=LOC_REFERENCE;
  757. paraloc^.size:=retcgsize;
  758. exit;
  759. end;
  760. { Return in FPU register? -> don't use classify_argument(), because
  761. currency and comp need special treatment here (they are integer class
  762. when passing as parameter, but LOC_FPUREGISTER as function result) }
  763. if def.typ=floatdef then
  764. begin
  765. paraloc:=result.add_location;
  766. case tfloatdef(def).floattype of
  767. s32real:
  768. begin
  769. paraloc^.loc:=LOC_MMREGISTER;
  770. paraloc^.register:=newreg(R_MMREGISTER,RS_MM_RESULT_REG,R_SUBMMS);
  771. paraloc^.size:=OS_F32;
  772. end;
  773. s64real:
  774. begin
  775. paraloc^.loc:=LOC_MMREGISTER;
  776. paraloc^.register:=newreg(R_MMREGISTER,RS_MM_RESULT_REG,R_SUBMMD);
  777. paraloc^.size:=OS_F64;
  778. end;
  779. { the first two only exist on targets with an x87, on others
  780. they are replace by int64 }
  781. s64currency,
  782. s64comp,
  783. s80real,
  784. sc80real:
  785. begin
  786. paraloc^.loc:=LOC_FPUREGISTER;
  787. paraloc^.register:=NR_FPU_RESULT_REG;
  788. paraloc^.size:=retcgsize;
  789. end;
  790. else
  791. internalerror(200405034);
  792. end;
  793. end
  794. else
  795. { Return in register }
  796. begin
  797. numclasses:=classify_argument(def,vs_value,def.size,classes,0);
  798. { this would mean a memory return }
  799. if (numclasses=0) then
  800. internalerror(2010021502);
  801. { this would mean an _m256 vector (valid, but not yet supported) }
  802. if (numclasses>2) then
  803. internalerror(2010021503);
  804. intretregidx:=0;
  805. mmretregidx:=0;
  806. for i:=0 to numclasses-1 do
  807. begin
  808. paraloc:=result.add_location;
  809. case classes[i] of
  810. X86_64_INTEGERSI_CLASS,
  811. X86_64_INTEGER_CLASS:
  812. begin
  813. paraloc^.loc:=LOC_REGISTER;
  814. paraloc^.register:=intretregs[intretregidx];
  815. if classes[i]=X86_64_INTEGER_CLASS then
  816. paraloc^.size:=OS_64
  817. else if result.intsize in [1,2,4] then
  818. paraloc^.size:=retcgsize
  819. else
  820. paraloc^.size:=OS_32;
  821. setsubreg(paraloc^.register,cgsize2subreg(R_INTREGISTER,paraloc^.size));
  822. inc(intretregidx);
  823. end;
  824. X86_64_SSE_CLASS,
  825. X86_64_SSEUP_CLASS,
  826. X86_64_SSESF_CLASS,
  827. X86_64_SSEDF_CLASS:
  828. begin
  829. paraloc^.loc:=LOC_MMREGISTER;
  830. paraloc^.register:=mmretregs[mmretregidx];
  831. case classes[i] of
  832. X86_64_SSESF_CLASS:
  833. begin
  834. setsubreg(paraloc^.register,R_SUBMMS);
  835. paraloc^.size:=OS_F32;
  836. end;
  837. X86_64_SSEDF_CLASS:
  838. begin
  839. setsubreg(paraloc^.register,R_SUBMMD);
  840. paraloc^.size:=OS_F64;
  841. end;
  842. else
  843. begin
  844. setsubreg(paraloc^.register,R_SUBMMWHOLE);
  845. paraloc^.size:=OS_M64;
  846. end;
  847. end;
  848. inc(mmretregidx);
  849. end;
  850. X86_64_NO_CLASS:
  851. begin
  852. { empty record/array }
  853. if (i<>0) or
  854. (numclasses<>1) then
  855. internalerror(2010060302);
  856. paraloc^.loc:=LOC_VOID;
  857. end;
  858. else
  859. internalerror(2010021504);
  860. end;
  861. end;
  862. end;
  863. end;
  864. procedure tx86_64paramanager.create_paraloc_info_intern(p : tabstractprocdef; side: tcallercallee;paras:tparalist;
  865. var intparareg,mmparareg,parasize:longint;varargsparas: boolean);
  866. var
  867. hp : tparavarsym;
  868. paraloc : pcgparalocation;
  869. subreg : tsubregister;
  870. pushaddr : boolean;
  871. paracgsize : tcgsize;
  872. loc : array[1..2] of tx64paraclass;
  873. needintloc,
  874. needmmloc,
  875. paralen,
  876. locidx,
  877. i,
  878. varalign,
  879. paraalign : longint;
  880. begin
  881. paraalign:=get_para_align(p.proccalloption);
  882. { Register parameters are assigned from left to right }
  883. for i:=0 to paras.count-1 do
  884. begin
  885. hp:=tparavarsym(paras[i]);
  886. pushaddr:=push_addr_param(hp.varspez,hp.vardef,p.proccalloption);
  887. if pushaddr then
  888. begin
  889. loc[1]:=X86_64_INTEGER_CLASS;
  890. loc[2]:=X86_64_NO_CLASS;
  891. paracgsize:=OS_ADDR;
  892. paralen:=sizeof(aint);
  893. end
  894. else
  895. begin
  896. getvalueparaloc(hp.varspez,hp.vardef,loc[1],loc[2]);
  897. paralen:=push_size(hp.varspez,hp.vardef,p.proccalloption);
  898. paracgsize:=def_cgsize(hp.vardef);
  899. end;
  900. { cheat for now, we should copy the value to an mm reg as well (FK) }
  901. if varargsparas and
  902. (target_info.system = system_x86_64_win64) and
  903. (hp.vardef.typ = floatdef) then
  904. begin
  905. loc[2]:=X86_64_NO_CLASS;
  906. if paracgsize=OS_F64 then
  907. begin
  908. loc[1]:=X86_64_INTEGER_CLASS;
  909. paracgsize:=OS_64
  910. end
  911. else
  912. begin
  913. loc[1]:=X86_64_INTEGERSI_CLASS;
  914. paracgsize:=OS_32;
  915. end;
  916. end;
  917. hp.paraloc[side].reset;
  918. hp.paraloc[side].size:=paracgsize;
  919. hp.paraloc[side].intsize:=paralen;
  920. hp.paraloc[side].Alignment:=paraalign;
  921. if paralen>0 then
  922. begin
  923. { Enough registers free? }
  924. needintloc:=0;
  925. needmmloc:=0;
  926. for locidx:=low(loc) to high(loc) do
  927. case loc[locidx] of
  928. X86_64_INTEGER_CLASS,
  929. X86_64_INTEGERSI_CLASS:
  930. inc(needintloc);
  931. X86_64_SSE_CLASS,
  932. X86_64_SSESF_CLASS,
  933. X86_64_SSEDF_CLASS,
  934. X86_64_SSEUP_CLASS:
  935. inc(needmmloc);
  936. end;
  937. { the "-1" is because we can also use the current register }
  938. if ((target_info.system=system_x86_64_win64) and
  939. ((intparareg+needintloc-1 > high(paraintsupregs_winx64)) or
  940. (mmparareg+needmmloc-1 > high(parammsupregs_winx64)))) or
  941. ((target_info.system<>system_x86_64_win64) and
  942. ((intparareg+needintloc-1 > high(paraintsupregs)) or
  943. (mmparareg+needmmloc-1 > high(parammsupregs)))) then
  944. begin
  945. { If there are no registers available for any
  946. eightbyte of an argument, the whole argument is
  947. passed on the stack. }
  948. loc[low(loc)]:=X86_64_MEMORY_CLASS;
  949. for locidx:=succ(low(loc)) to high(loc) do
  950. loc[locidx]:=X86_64_NO_CLASS;
  951. end;
  952. locidx:=1;
  953. while (paralen>0) do
  954. begin
  955. if locidx>2 then
  956. internalerror(200501283);
  957. { Allocate }
  958. case loc[locidx] of
  959. X86_64_INTEGER_CLASS,
  960. X86_64_INTEGERSI_CLASS:
  961. begin
  962. paraloc:=hp.paraloc[side].add_location;
  963. paraloc^.loc:=LOC_REGISTER;
  964. if (paracgsize=OS_NO) or (loc[2]<>X86_64_NO_CLASS) then
  965. begin
  966. if loc[locidx]=X86_64_INTEGER_CLASS then
  967. begin
  968. paraloc^.size:=OS_INT;
  969. subreg:=R_SUBWHOLE;
  970. end
  971. else
  972. begin
  973. paraloc^.size:=OS_32;
  974. subreg:=R_SUBD;
  975. end;
  976. end
  977. else
  978. begin
  979. paraloc^.size:=paracgsize;
  980. { s64comp is pushed in an int register }
  981. if paraloc^.size=OS_C64 then
  982. paraloc^.size:=OS_64;
  983. subreg:=cgsize2subreg(R_INTREGISTER,paraloc^.size);
  984. end;
  985. { winx64 uses different registers }
  986. if target_info.system=system_x86_64_win64 then
  987. paraloc^.register:=newreg(R_INTREGISTER,paraintsupregs_winx64[intparareg],subreg)
  988. else
  989. paraloc^.register:=newreg(R_INTREGISTER,paraintsupregs[intparareg],subreg);
  990. { matching mm register must be skipped }
  991. if target_info.system=system_x86_64_win64 then
  992. inc(mmparareg);
  993. inc(intparareg);
  994. dec(paralen,tcgsize2size[paraloc^.size]);
  995. end;
  996. X86_64_SSE_CLASS,
  997. X86_64_SSESF_CLASS,
  998. X86_64_SSEDF_CLASS,
  999. X86_64_SSEUP_CLASS:
  1000. begin
  1001. paraloc:=hp.paraloc[side].add_location;
  1002. paraloc^.loc:=LOC_MMREGISTER;
  1003. case loc[locidx] of
  1004. X86_64_SSESF_CLASS:
  1005. begin
  1006. subreg:=R_SUBMMS;
  1007. paraloc^.size:=OS_F32;
  1008. end;
  1009. X86_64_SSEDF_CLASS:
  1010. begin
  1011. subreg:=R_SUBMMD;
  1012. paraloc^.size:=OS_F64;
  1013. end;
  1014. else
  1015. begin
  1016. subreg:=R_SUBMMWHOLE;
  1017. paraloc^.size:=OS_M64;
  1018. end;
  1019. end;
  1020. { winx64 uses different registers }
  1021. if target_info.system=system_x86_64_win64 then
  1022. paraloc^.register:=newreg(R_MMREGISTER,parammsupregs_winx64[mmparareg],subreg)
  1023. else
  1024. paraloc^.register:=newreg(R_MMREGISTER,parammsupregs[mmparareg],subreg);
  1025. { matching int register must be skipped }
  1026. if target_info.system=system_x86_64_win64 then
  1027. inc(intparareg);
  1028. inc(mmparareg);
  1029. dec(paralen,tcgsize2size[paraloc^.size]);
  1030. end;
  1031. X86_64_MEMORY_CLASS :
  1032. begin
  1033. paraloc:=hp.paraloc[side].add_location;
  1034. paraloc^.loc:=LOC_REFERENCE;
  1035. {Hack alert!!! We should modify int_cgsize to handle OS_128,
  1036. however, since int_cgsize is called in many places in the
  1037. compiler where only a few can already handle OS_128, fixing it
  1038. properly is out of the question to release 2.2.0 in time. (DM)}
  1039. if paracgsize=OS_128 then
  1040. if paralen=8 then
  1041. paraloc^.size:=OS_64
  1042. else if paralen=16 then
  1043. paraloc^.size:=OS_128
  1044. else
  1045. internalerror(200707143)
  1046. else if paracgsize in [OS_F32,OS_F64,OS_F80,OS_F128] then
  1047. paraloc^.size:=int_float_cgsize(paralen)
  1048. else
  1049. paraloc^.size:=int_cgsize(paralen);
  1050. if side=callerside then
  1051. paraloc^.reference.index:=NR_STACK_POINTER_REG
  1052. else
  1053. paraloc^.reference.index:=NR_FRAME_POINTER_REG;
  1054. varalign:=used_align(size_2_align(paralen),paraalign,paraalign);
  1055. paraloc^.reference.offset:=parasize;
  1056. parasize:=align(parasize+paralen,varalign);
  1057. paralen:=0;
  1058. end;
  1059. else
  1060. internalerror(2010053113);
  1061. end;
  1062. if (locidx<2) and
  1063. (loc[locidx+1]<>X86_64_NO_CLASS) then
  1064. inc(locidx);
  1065. end;
  1066. end
  1067. else
  1068. begin
  1069. paraloc:=hp.paraloc[side].add_location;
  1070. paraloc^.loc:=LOC_VOID;
  1071. end;
  1072. end;
  1073. { Register parameters are assigned from left-to-right, but the
  1074. offsets on the stack are right-to-left. There is no need
  1075. to reverse the offset, only adapt the calleeside with the
  1076. start offset of the first param on the stack }
  1077. if side=calleeside then
  1078. begin
  1079. for i:=0 to paras.count-1 do
  1080. begin
  1081. hp:=tparavarsym(paras[i]);
  1082. paraloc:=hp.paraloc[side].location;
  1083. while paraloc<>nil do
  1084. begin
  1085. with paraloc^ do
  1086. if (loc=LOC_REFERENCE) then
  1087. inc(reference.offset,target_info.first_parm_offset);
  1088. paraloc:=paraloc^.next;
  1089. end;
  1090. end;
  1091. end;
  1092. end;
  1093. function tx86_64paramanager.create_varargs_paraloc_info(p : tabstractprocdef; varargspara:tvarargsparalist):longint;
  1094. var
  1095. intparareg,mmparareg,
  1096. parasize : longint;
  1097. begin
  1098. intparareg:=0;
  1099. mmparareg:=0;
  1100. if target_info.system=system_x86_64_win64 then
  1101. parasize:=4*8
  1102. else
  1103. parasize:=0;
  1104. { calculate the registers for the normal parameters }
  1105. create_paraloc_info_intern(p,callerside,p.paras,intparareg,mmparareg,parasize,false);
  1106. { append the varargs }
  1107. create_paraloc_info_intern(p,callerside,varargspara,intparareg,mmparareg,parasize,true);
  1108. { store used no. of SSE registers, that needs to be passed in %AL }
  1109. varargspara.mmregsused:=mmparareg;
  1110. result:=parasize;
  1111. end;
  1112. function tx86_64paramanager.create_paraloc_info(p : tabstractprocdef; side: tcallercallee):longint;
  1113. var
  1114. intparareg,mmparareg,
  1115. parasize : longint;
  1116. begin
  1117. intparareg:=0;
  1118. mmparareg:=0;
  1119. if target_info.system=system_x86_64_win64 then
  1120. parasize:=4*8
  1121. else
  1122. parasize:=0;
  1123. create_paraloc_info_intern(p,side,p.paras,intparareg,mmparareg,parasize,false);
  1124. { Create Function result paraloc }
  1125. create_funcretloc_info(p,side);
  1126. { We need to return the size allocated on the stack }
  1127. result:=parasize;
  1128. end;
  1129. begin
  1130. paramanager:=tx86_64paramanager.create;
  1131. end.