cpupara.pas 46 KB


  1. {
  2. Copyright (c) 2002 by Florian Klaempfl
  3. Generates the argument location information for x86-64 target
  4. This program is free software; you can redistribute it and/or modify
  5. it under the terms of the GNU General Public License as published by
  6. the Free Software Foundation; either version 2 of the License, or
  7. (at your option) any later version.
  8. This program is distributed in the hope that it will be useful,
  9. but WITHOUT ANY WARRANTY; without even the implied warranty of
  10. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  11. GNU General Public License for more details.
  12. You should have received a copy of the GNU General Public License
  13. along with this program; if not, write to the Free Software
  14. Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  15. ****************************************************************************
  16. }
  17. unit cpupara;
  18. {$i fpcdefs.inc}
  19. interface
  20. uses
  21. globtype,
  22. cpubase,cgbase,cgutils,
  23. symconst,symtype,symsym,symdef,
  24. aasmtai,aasmdata,
  25. parabase,paramgr;
  26. type
  27. tx86_64paramanager = class(tparamanager)
  28. private
  29. procedure create_funcretloc_info(p : tabstractprocdef; side: tcallercallee);
  30. procedure create_paraloc_info_intern(p : tabstractprocdef; side: tcallercallee;paras:tparalist;
  31. var intparareg,mmparareg,parasize:longint;varargsparas: boolean);
  32. public
  33. function param_use_paraloc(const cgpara:tcgpara):boolean;override;
  34. function push_addr_param(varspez:tvarspez;def : tdef;calloption : tproccalloption) : boolean;override;
  35. function ret_in_param(def : tdef;calloption : tproccalloption) : boolean;override;
  36. procedure getintparaloc(calloption : tproccalloption; nr : longint;var cgpara:TCGPara);override;
  37. function get_volatile_registers_int(calloption : tproccalloption):tcpuregisterset;override;
  38. function get_volatile_registers_mm(calloption : tproccalloption):tcpuregisterset;override;
  39. function get_volatile_registers_fpu(calloption : tproccalloption):tcpuregisterset;override;
  40. function create_paraloc_info(p : tabstractprocdef; side: tcallercallee):longint;override;
  41. function create_varargs_paraloc_info(p : tabstractprocdef; varargspara:tvarargsparalist):longint;override;
  42. function get_funcretloc(p : tabstractprocdef; side: tcallercallee; def: tdef): tcgpara;override;
  43. end;
  44. implementation
  45. uses
  46. cutils,verbose,
  47. systems,
  48. defutil,
  49. symtable;
  50. const
  51. paraintsupregs : array[0..5] of tsuperregister = (RS_RDI,RS_RSI,RS_RDX,RS_RCX,RS_R8,RS_R9);
  52. parammsupregs : array[0..7] of tsuperregister = (RS_XMM0,RS_XMM1,RS_XMM2,RS_XMM3,RS_XMM4,RS_XMM5,RS_XMM6,RS_XMM7);
  53. paraintsupregs_winx64 : array[0..3] of tsuperregister = (RS_RCX,RS_RDX,RS_R8,RS_R9);
  54. parammsupregs_winx64 : array[0..3] of tsuperregister = (RS_XMM0,RS_XMM1,RS_XMM2,RS_XMM3);
  55. {
  56. The argument classification code largely comes from libffi:
  57. ffi64.c - Copyright (c) 2002, 2007 Bo Thorsen <[email protected]>
  58. Copyright (c) 2008 Red Hat, Inc.
  59. x86-64 Foreign Function Interface
  60. Permission is hereby granted, free of charge, to any person obtaining
  61. a copy of this software and associated documentation files (the
  62. ``Software''), to deal in the Software without restriction, including
  63. without limitation the rights to use, copy, modify, merge, publish,
  64. distribute, sublicense, and/or sell copies of the Software, and to
  65. permit persons to whom the Software is furnished to do so, subject to
  66. the following conditions:
  67. The above copyright notice and this permission notice shall be included
  68. in all copies or substantial portions of the Software.
  69. THE SOFTWARE IS PROVIDED ``AS IS'', WITHOUT WARRANTY OF ANY KIND,
  70. EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  71. MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
  72. NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
  73. HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
  74. WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  75. OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
  76. DEALINGS IN THE SOFTWARE.
  77. ----------------------------------------------------------------------- *)
  78. }
  79. const
  80. MAX_PARA_CLASSES = 4;
  81. type
  82. tx64paraclass = (
  83. X86_64_NO_CLASS,
  84. X86_64_INTEGER_CLASS,X86_64_INTEGERSI_CLASS,
  85. X86_64_SSE_CLASS,X86_64_SSESF_CLASS,X86_64_SSEDF_CLASS,X86_64_SSEUP_CLASS,
  86. X86_64_X87_CLASS,X86_64_X87UP_CLASS,
  87. X86_64_COMPLEX_X87_CLASS,
  88. X86_64_MEMORY_CLASS
  89. );
  90. tx64paraclasses = array[0..MAX_PARA_CLASSES-1] of tx64paraclass;
  91. { Win64-specific helper }
  92. function aggregate_in_registers_win64(varspez:tvarspez;size:longint):boolean;
  93. begin
  94. { TODO: Temporary hack: vs_const parameters are always passed by reference for win64}
  95. result:=(varspez=vs_value) and (size in [1,2,4,8])
  96. end;
  97. (* x86-64 register passing implementation. See x86-64 ABI for details. Goal
  98. of this code is to classify each 8bytes of incoming argument by the register
  99. class and assign registers accordingly. *)
  100. (* Return the union class of CLASS1 and CLASS2.
  101. See the x86-64 PS ABI for details. *)
  102. function merge_classes(class1, class2: tx64paraclass): tx64paraclass;
  103. begin
  104. (* Rule #1: If both classes are equal, this is the resulting class. *)
  105. if (class1=class2) then
  106. exit(class1);
  107. (* Rule #2: If one of the classes is NO_CLASS, the resulting class is
  108. the other class. *)
  109. if (class1=X86_64_NO_CLASS) then
  110. exit(class2);
  111. if (class2=X86_64_NO_CLASS) then
  112. exit(class1);
  113. (* Rule #3: If one of the classes is MEMORY, the result is MEMORY. *)
  114. if (class1=X86_64_MEMORY_CLASS) or
  115. (class2=X86_64_MEMORY_CLASS) then
  116. exit(X86_64_MEMORY_CLASS);
  117. (* Rule #4: If one of the classes is INTEGER, the result is INTEGER. *)
  118. { 32 bit }
  119. if ((class1=X86_64_INTEGERSI_CLASS) and
  120. (class2=X86_64_SSESF_CLASS)) or
  121. ((class2=X86_64_INTEGERSI_CLASS) and
  122. (class1=X86_64_SSESF_CLASS)) then
  123. exit(X86_64_INTEGERSI_CLASS);
  124. { 64 bit }
  125. if (class1 in [X86_64_INTEGER_CLASS,X86_64_INTEGERSI_CLASS]) or
  126. (class2 in [X86_64_INTEGER_CLASS,X86_64_INTEGERSI_CLASS]) then
  127. exit(X86_64_INTEGER_CLASS);
  128. (* Rule #5: If one of the classes is X87, X87UP, or COMPLEX_X87 class,
  129. MEMORY is used. *)
  130. if (class1 in [X86_64_X87_CLASS,X86_64_X87UP_CLASS,X86_64_COMPLEX_X87_CLASS]) or
  131. (class2 in [X86_64_X87_CLASS,X86_64_X87UP_CLASS,X86_64_COMPLEX_X87_CLASS]) then
  132. exit(X86_64_MEMORY_CLASS);
  133. (* Rule #6: Otherwise class SSE is used. *)
  134. result:=X86_64_SSE_CLASS;
  135. end;
  136. (* Classify the argument of type TYPE and mode MODE.
  137. CLASSES will be filled by the register class used to pass each word
  138. of the operand. The number of words is returned. In case the parameter
  139. should be passed in memory, 0 is returned. As a special case for zero
  140. sized containers, classes[0] will be NO_CLASS and 1 is returned.
  141. real_size contains either def.size, or a value derived from
  142. def.bitpackedsize and the field offset denoting the number of bytes
  143. spanned by a bitpacked field
  144. See the x86-64 PS ABI for details.
  145. *)
  146. function classify_as_integer_argument(real_size: aint; var classes: tx64paraclasses; byte_offset: aint): longint;
  147. var
  148. size: aint;
  149. begin
  150. size:=byte_offset+real_size;
  151. if size<=4 then
  152. classes[0]:=X86_64_INTEGERSI_CLASS
  153. else
  154. classes[0]:=X86_64_INTEGER_CLASS;
  155. if size<=8 then
  156. result:=1
  157. else
  158. begin
  159. if size<=12 then
  160. classes[1]:=X86_64_INTEGERSI_CLASS
  161. else if (size<=16) then
  162. classes[1]:=X86_64_INTEGER_CLASS
  163. else
  164. internalerror(2010021401);
  165. result:=2;
  166. end
  167. end;
  168. function classify_argument(def: tdef; varspez: tvarspez; real_size: aint; var classes: tx64paraclasses; byte_offset: aint): longint; forward;
  169. function init_aggregate_classification(def: tdef; varspez: tvarspez; out words: longint; out classes: tx64paraclasses): longint;
  170. var
  171. i: longint;
  172. begin
  173. words:=0;
  174. { win64 follows a different convention here }
  175. if (target_info.system=system_x86_64_win64) then
  176. begin
  177. if aggregate_in_registers_win64(varspez,def.size) then
  178. begin
  179. classes[0]:=X86_64_INTEGER_CLASS;
  180. result:=1;
  181. end
  182. else
  183. result:=0;
  184. exit;
  185. end;
  186. (* If the struct is larger than 32 bytes, pass it on the stack. *)
  187. if def.size > 32 then
  188. exit(0);
  189. words:=(def.size+7) div 8;
  190. (* Zero sized arrays or structures are NO_CLASS. We return 0 to
  191. signal memory class, so handle it as special case. *)
  192. if (words=0) then
  193. begin
  194. classes[0]:=X86_64_NO_CLASS;
  195. exit(1);
  196. end;
  197. { we'll be merging the classes elements with the subclasses
  198. elements, so initialise them first }
  199. for i:=low(classes) to high(classes) do
  200. classes[i]:=X86_64_NO_CLASS;
  201. result:=words;
  202. end;
  203. function classify_aggregate_element(def: tdef; varspez: tvarspez; real_size: aint; var classes: tx64paraclasses; new_byte_offset: aint): longint;
  204. var
  205. subclasses: tx64paraclasses;
  206. i,
  207. pos: longint;
  208. begin
  209. result:=classify_argument(def,varspez,real_size,subclasses,new_byte_offset mod 8);
  210. if (result=0) then
  211. exit;
  212. pos:=new_byte_offset div 8;
  213. if result-1+pos>high(classes) then
  214. internalerror(2010053108);
  215. for i:=0 to result-1 do
  216. begin
  217. classes[i+pos] :=
  218. merge_classes(subclasses[i],classes[i+pos]);
  219. end;
  220. end;
  221. function finalize_aggregate_classification(def: tdef; words: longint; var classes: tx64paraclasses): longint;
  222. var
  223. i: longint;
  224. begin
  225. if (words>2) then
  226. begin
  227. (* When size > 16 bytes, if the first one isn't
  228. X86_64_SSE_CLASS or any other ones aren't
  229. X86_64_SSEUP_CLASS, everything should be passed in
  230. memory. *)
  231. if (classes[0]<>X86_64_SSE_CLASS) then
  232. exit(0);
  233. for i:=1 to words-1 do
  234. if (classes[i]<>X86_64_SSEUP_CLASS) then
  235. exit(0);
  236. end;
  237. (* Final merger cleanup. *)
  238. (* The first one must never be X86_64_SSEUP_CLASS or
  239. X86_64_X87UP_CLASS. *)
  240. if (classes[0]=X86_64_SSEUP_CLASS) or
  241. (classes[0]=X86_64_X87UP_CLASS) then
  242. internalerror(2010021402);
  243. for i:=0 to words-1 do
  244. begin
  245. (* If one class is MEMORY, everything should be passed in
  246. memory. *)
  247. if (classes[i]=X86_64_MEMORY_CLASS) then
  248. exit(0);
  249. (* The X86_64_SSEUP_CLASS should be always preceded by
  250. X86_64_SSE_CLASS or X86_64_SSEUP_CLASS. *)
  251. if (classes[i]=X86_64_SSEUP_CLASS) and
  252. (classes[i-1]<>X86_64_SSE_CLASS) and
  253. (classes[i-1]<>X86_64_SSEUP_CLASS) then
  254. classes[i]:=X86_64_SSE_CLASS;
  255. (* If X86_64_X87UP_CLASS isn't preceded by X86_64_X87_CLASS,
  256. everything should be passed in memory. *)
  257. if (classes[i]=X86_64_X87UP_CLASS) and
  258. (classes[i-1]<>X86_64_X87_CLASS) then
  259. exit(0);
  260. end;
  261. { FIXME: in case a record contains empty padding space, e.g. a
  262. "single" field followed by a "double", then we have a problem
  263. because the cgpara helpers cannot figure out that they should
  264. skip 4 bytes after storing the single (LOC_MMREGISTER with size
  265. OS_F32) to memory before storing the double -> for now scale
  266. such locations always up to 64 bits, although this loads/stores
  267. some superfluous data }
  268. { 1) the first part is 32 bit while there is still a second part }
  269. if (classes[1]<>X86_64_NO_CLASS) then
  270. case classes[0] of
  271. X86_64_INTEGERSI_CLASS:
  272. classes[0]:=X86_64_INTEGER_CLASS;
  273. X86_64_SSESF_CLASS:
  274. classes[0]:=X86_64_SSE_CLASS;
  275. end;
  276. { 2) the second part is 32 bit, but the total size is > 12 bytes }
  277. if (def.size>12) then
  278. case classes[1] of
  279. X86_64_INTEGERSI_CLASS:
  280. classes[1]:=X86_64_INTEGER_CLASS;
  281. X86_64_SSESF_CLASS:
  282. classes[1]:=X86_64_SSE_CLASS;
  283. end;
  284. result:=words;
  285. end;
  286. function classify_record(def: tdef; varspez: tvarspez; var classes: tx64paraclasses; byte_offset: aint): longint;
  287. var
  288. vs: tfieldvarsym;
  289. size,
  290. new_byte_offset: aint;
  291. i,
  292. words,
  293. num: longint;
  294. begin
  295. result:=init_aggregate_classification(def,varspez,words,classes);
  296. if (words=0) then
  297. exit;
  298. (* Merge the fields of the structure. *)
  299. for i:=0 to tabstractrecorddef(def).symtable.symlist.count-1 do
  300. begin
  301. if tsym(tabstractrecorddef(def).symtable.symlist[i]).typ<>fieldvarsym then
  302. continue;
  303. vs:=tfieldvarsym(tabstractrecorddef(def).symtable.symlist[i]);
  304. num:=-1;
  305. if not tabstractrecordsymtable(tabstractrecorddef(def).symtable).is_packed then
  306. begin
  307. new_byte_offset:=byte_offset+vs.fieldoffset;
  308. size:=vs.vardef.size;
  309. end
  310. else
  311. begin
  312. new_byte_offset:=byte_offset+vs.fieldoffset div 8;
  313. if (vs.vardef.typ in [orddef,enumdef]) then
  314. { calculate the number of bytes spanned by
  315. this bitpacked field }
  316. size:=((vs.fieldoffset+vs.vardef.packedbitsize+7) div 8)-(vs.fieldoffset div 8)
  317. else
  318. size:=vs.vardef.size
  319. end;
  320. num:=classify_aggregate_element(vs.vardef,varspez,size,classes,new_byte_offset);
  321. if (num=0) then
  322. exit(0);
  323. end;
  324. result:=finalize_aggregate_classification(def,words,classes);
  325. end;
  326. function classify_normal_array(def: tarraydef; varspez: tvarspez; var classes: tx64paraclasses; byte_offset: aint): longint;
  327. var
  328. i, elecount: aword;
  329. size,
  330. elesize,
  331. new_byte_offset,
  332. bitoffset: aint;
  333. words,
  334. num: longint;
  335. isbitpacked: boolean;
  336. begin
  337. result:=init_aggregate_classification(def,varspez,words,classes);
  338. if (words=0) then
  339. exit;
  340. isbitpacked:=is_packed_array(def);
  341. if not isbitpacked then
  342. begin
  343. elesize:=def.elesize;
  344. size:=elesize;
  345. end
  346. else
  347. begin
  348. elesize:=def.elepackedbitsize;
  349. bitoffset:=0;
  350. end;
  351. (* Merge the elements of the array. *)
  352. i:=0;
  353. elecount:=def.elecount;
  354. repeat
  355. if not isbitpacked then
  356. begin
  357. { size does not change }
  358. new_byte_offset:=byte_offset+i*elesize;
  359. end
  360. else
  361. begin
  362. { calculate the number of bytes spanned by this bitpacked
  363. element }
  364. size:=((bitoffset+elesize+7) div 8)-(bitoffset div 8);
  365. new_byte_offset:=byte_offset+(elesize*i) div 8;
  366. { bit offset of next element }
  367. inc(bitoffset,elesize);
  368. end;
  369. num:=classify_aggregate_element(def.elementdef,varspez,size,classes,new_byte_offset);
  370. if (num=0) then
  371. exit(0);
  372. inc(i);
  373. until (i=elecount);
  374. result:=finalize_aggregate_classification(def,words,classes);
  375. end;
  376. function classify_argument(def: tdef; varspez: tvarspez; real_size: aint; var classes: tx64paraclasses; byte_offset: aint): longint;
  377. begin
  378. case def.typ of
  379. orddef,
  380. enumdef,
  381. pointerdef,
  382. classrefdef:
  383. result:=classify_as_integer_argument(real_size,classes,byte_offset);
  384. formaldef:
  385. result:=classify_as_integer_argument(voidpointertype.size,classes,byte_offset);
  386. floatdef:
  387. begin
  388. case tfloatdef(def).floattype of
  389. s32real:
  390. begin
  391. if byte_offset=0 then
  392. classes[0]:=X86_64_SSESF_CLASS
  393. else
  394. { if we have e.g. a record with two successive "single"
  395. fields, we need a 64 bit rather than a 32 bit load }
  396. classes[0]:=X86_64_SSE_CLASS;
  397. result:=1;
  398. end;
  399. s64real:
  400. begin
  401. classes[0]:=X86_64_SSEDF_CLASS;
  402. result:=1;
  403. end;
  404. s80real,
  405. sc80real:
  406. begin
  407. classes[0]:=X86_64_X87_CLASS;
  408. classes[1]:=X86_64_X87UP_CLASS;
  409. result:=2;
  410. end;
  411. s64comp,
  412. s64currency:
  413. begin
  414. classes[0]:=X86_64_INTEGER_CLASS;
  415. result:=1;
  416. end;
  417. s128real:
  418. begin
  419. classes[0]:=X86_64_SSE_CLASS;
  420. classes[1]:=X86_64_SSEUP_CLASS;
  421. result:=2;
  422. end;
  423. else
  424. internalerror(2010060301);
  425. end;
  426. end;
  427. recorddef:
  428. result:=classify_record(def,varspez,classes,byte_offset);
  429. objectdef:
  430. begin
  431. if is_object(def) then
  432. { pass by reference, like ppc and i386 }
  433. result:=0
  434. else
  435. { all kinds of pointer types: class, objcclass, interface, ... }
  436. result:=classify_as_integer_argument(voidpointertype.size,classes,byte_offset);
  437. end;
  438. setdef:
  439. begin
  440. if is_smallset(def) then
  441. result:=classify_as_integer_argument(def.size,classes,byte_offset)
  442. else
  443. result:=0;
  444. end;
  445. stringdef:
  446. begin
  447. if (tstringdef(def).stringtype in [st_shortstring,st_longstring]) then
  448. result:=0
  449. else
  450. result:=classify_as_integer_argument(def.size,classes,byte_offset);
  451. end;
  452. arraydef:
  453. begin
  454. { a dynamic array is treated like a pointer }
  455. if is_dynamic_array(def) then
  456. result:=classify_as_integer_argument(voidpointertype.size,classes,byte_offset)
  457. { other special arrays are passed on the stack }
  458. else if is_open_array(def) or
  459. is_array_of_const(def) then
  460. result:=0
  461. else
  462. { normal array }
  463. result:=classify_normal_array(tarraydef(def),varspez,classes,byte_offset);
  464. end;
  465. { the file record is definitely too big }
  466. filedef:
  467. result:=0;
  468. procvardef:
  469. begin
  470. if (po_methodpointer in tprocvardef(def).procoptions) then
  471. begin
  472. { treat as TMethod record }
  473. def:=search_system_type('TMETHOD').typedef;
  474. result:=classify_argument(def,varspez,def.size,classes,byte_offset);
  475. end
  476. else
  477. { pointer }
  478. result:=classify_as_integer_argument(def.size,classes,byte_offset);
  479. end;
  480. variantdef:
  481. begin
  482. { same as tvardata record }
  483. def:=search_system_type('TVARDATA').typedef;
  484. result:=classify_argument(def,varspez,def.size,classes,byte_offset);
  485. end;
  486. else
  487. internalerror(2010021405);
  488. end;
  489. end;
  490. procedure getvalueparaloc(varspez:tvarspez;def:tdef;var loc1,loc2:tx64paraclass);
  491. var
  492. size: aint;
  493. i: longint;
  494. classes: tx64paraclasses;
  495. numclasses: longint;
  496. begin
  497. { init the classes array, because even if classify_argument inits only
  498. one element we copy both to loc1/loc2 in case "1" is returned }
  499. for i:=low(classes) to high(classes) do
  500. classes[i]:=X86_64_NO_CLASS;
  501. { def.size internalerrors for open arrays and dynamic arrays, since
  502. their size cannot be determined at compile-time.
  503. classify_argument does not look at the realsize argument for arrays
  504. cases, but we obviously do have to pass something... }
  505. if is_special_array(def) then
  506. size:=-1
  507. else
  508. size:=def.size;
  509. numclasses:=classify_argument(def,varspez,size,classes,0);
  510. case numclasses of
  511. 0:
  512. begin
  513. loc1:=X86_64_MEMORY_CLASS;
  514. loc2:=X86_64_NO_CLASS;
  515. end;
  516. 1,2:
  517. begin
  518. { If the class is X87, X87UP or COMPLEX_X87, it is passed in memory }
  519. if classes[0] in [X86_64_X87_CLASS,X86_64_X87UP_CLASS,X86_64_COMPLEX_X87_CLASS] then
  520. classes[0]:=X86_64_MEMORY_CLASS;
  521. if classes[1] in [X86_64_X87_CLASS,X86_64_X87UP_CLASS,X86_64_COMPLEX_X87_CLASS] then
  522. classes[1]:=X86_64_MEMORY_CLASS;
  523. loc1:=classes[0];
  524. loc2:=classes[1];
  525. end
  526. else
  527. { 4 can only happen for _m256 vectors, not yet supported }
  528. internalerror(2010021501);
  529. end;
  530. end;
  531. function tx86_64paramanager.ret_in_param(def : tdef;calloption : tproccalloption) : boolean;
  532. var
  533. classes: tx64paraclasses;
  534. numclasses: longint;
  535. begin
  536. if ((target_info.system=system_x86_64_win64) and
  537. (calloption=pocall_safecall)) then
  538. exit(true);
  539. case def.typ of
  540. { for records it depends on their contents and size }
  541. recorddef,
  542. { make sure we handle 'procedure of object' correctly }
  543. procvardef:
  544. begin
  545. numclasses:=classify_argument(def,vs_value,def.size,classes,0);
  546. result:=(numclasses=0);
  547. end;
  548. else
  549. result:=inherited ret_in_param(def,calloption);
  550. end;
  551. end;
  552. function tx86_64paramanager.param_use_paraloc(const cgpara:tcgpara):boolean;
  553. var
  554. paraloc : pcgparalocation;
  555. begin
  556. if not assigned(cgpara.location) then
  557. internalerror(200410102);
  558. result:=true;
  559. { All locations are LOC_REFERENCE }
  560. paraloc:=cgpara.location;
  561. while assigned(paraloc) do
  562. begin
  563. if (paraloc^.loc<>LOC_REFERENCE) then
  564. begin
  565. result:=false;
  566. exit;
  567. end;
  568. paraloc:=paraloc^.next;
  569. end;
  570. end;
  571. { true if a parameter is too large to copy and only the address is pushed }
  572. function tx86_64paramanager.push_addr_param(varspez:tvarspez;def : tdef;calloption : tproccalloption) : boolean;
  573. var
  574. classes: tx64paraclasses;
  575. numclasses: longint;
  576. begin
  577. result:=false;
  578. { var,out,constref always require address }
  579. if varspez in [vs_var,vs_out,vs_constref] then
  580. begin
  581. result:=true;
  582. exit;
  583. end;
  584. { Only vs_const, vs_value here }
  585. case def.typ of
  586. formaldef :
  587. result:=true;
  588. recorddef :
  589. begin
  590. { MetroWerks Pascal: const records always passed by reference
  591. (for Mac OS X interfaces) }
  592. if (calloption=pocall_mwpascal) and
  593. (varspez=vs_const) then
  594. result:=true
  595. { Win ABI depends on size to pass it in a register or not }
  596. else if (target_info.system=system_x86_64_win64) then
  597. result:=not aggregate_in_registers_win64(varspez,def.size)
  598. else
  599. { SysV ABI always passes it as value parameter }
  600. result:=false;
  601. end;
  602. arraydef :
  603. begin
  604. { cdecl array of const need to be ignored and therefor be puhsed
  605. as value parameter with length 0 }
  606. if ((calloption in [pocall_cdecl,pocall_cppdecl]) and
  607. is_array_of_const(def)) or
  608. is_dynamic_array(def) then
  609. result:=false
  610. else
  611. { pass all arrays by reference to be compatible with C (passing
  612. an array by value (= copying it on the stack) does not exist,
  613. because an array is the same as a pointer there }
  614. result:=true
  615. end;
  616. objectdef :
  617. begin
  618. { don't treat objects like records, because we only know wheter
  619. or not they'll have a VMT after the entire object is parsed
  620. -> if they are used as function result from one of their own
  621. methods, their size can still change after we've determined
  622. whether this function result should be returned by reference or
  623. by value }
  624. if is_object(def) then
  625. result:=true;
  626. end;
  627. variantdef,
  628. stringdef,
  629. procvardef,
  630. setdef :
  631. begin
  632. numclasses:=classify_argument(def,vs_value,def.size,classes,0);
  633. result:=numclasses=0;
  634. end;
  635. end;
  636. end;
  637. function tx86_64paramanager.get_volatile_registers_int(calloption : tproccalloption):tcpuregisterset;
  638. begin
  639. if target_info.system=system_x86_64_win64 then
  640. result:=[RS_RAX,RS_RCX,RS_RDX,RS_R8,RS_R9,RS_R10,RS_R11]
  641. else
  642. result:=[RS_RAX,RS_RCX,RS_RDX,RS_RSI,RS_RDI,RS_R8,RS_R9,RS_R10,RS_R11];
  643. end;
  644. function tx86_64paramanager.get_volatile_registers_mm(calloption : tproccalloption):tcpuregisterset;
  645. begin
  646. if target_info.system=system_x86_64_win64 then
  647. result:=[RS_XMM0..RS_XMM5]
  648. else
  649. result:=[RS_XMM0..RS_XMM15];
  650. end;
  651. function tx86_64paramanager.get_volatile_registers_fpu(calloption : tproccalloption):tcpuregisterset;
  652. begin
  653. result:=[RS_ST0..RS_ST7];
  654. end;
  655. procedure tx86_64paramanager.getintparaloc(calloption : tproccalloption; nr : longint;var cgpara:TCGPara);
  656. var
  657. paraloc : pcgparalocation;
  658. begin
  659. cgpara.reset;
  660. cgpara.size:=OS_ADDR;
  661. cgpara.intsize:=sizeof(pint);
  662. cgpara.alignment:=get_para_align(calloption);
  663. paraloc:=cgpara.add_location;
  664. with paraloc^ do
  665. begin
  666. size:=OS_INT;
  667. if target_info.system=system_x86_64_win64 then
  668. begin
  669. if nr<1 then
  670. internalerror(200304303)
  671. else if nr<=high(paraintsupregs_winx64)+1 then
  672. begin
  673. loc:=LOC_REGISTER;
  674. register:=newreg(R_INTREGISTER,paraintsupregs_winx64[nr-1],R_SUBWHOLE);
  675. end
  676. else
  677. begin
  678. loc:=LOC_REFERENCE;
  679. reference.index:=NR_STACK_POINTER_REG;
  680. reference.offset:=(nr-6)*sizeof(aint);
  681. end;
  682. end
  683. else
  684. begin
  685. if nr<1 then
  686. internalerror(200304303)
  687. else if nr<=high(paraintsupregs)+1 then
  688. begin
  689. loc:=LOC_REGISTER;
  690. register:=newreg(R_INTREGISTER,paraintsupregs[nr-1],R_SUBWHOLE);
  691. end
  692. else
  693. begin
  694. loc:=LOC_REFERENCE;
  695. reference.index:=NR_STACK_POINTER_REG;
  696. reference.offset:=(nr-6)*sizeof(aint);
  697. end;
  698. end;
  699. end;
  700. end;
  701. procedure tx86_64paramanager.create_funcretloc_info(p : tabstractprocdef; side: tcallercallee);
  702. begin
  703. p.funcretloc[side]:=get_funcretloc(p,side,p.returndef);
  704. end;
  705. function tx86_64paramanager.get_funcretloc(p : tabstractprocdef; side: tcallercallee; def: tdef): tcgpara;
  706. const
  707. intretregs: array[0..1] of tregister = (NR_FUNCTION_RETURN_REG,NR_FUNCTION_RETURN_REG_HIGH);
  708. mmretregs: array[0..1] of tregister = (NR_MM_RESULT_REG,NR_MM_RESULT_REG_HIGH);
  709. var
  710. classes: tx64paraclasses;
  711. i,
  712. numclasses: longint;
  713. intretregidx,
  714. mmretregidx: longint;
  715. retcgsize : tcgsize;
  716. paraloc : pcgparalocation;
  717. begin
  718. result.init;
  719. result.alignment:=get_para_align(p.proccalloption);
  720. { void has no location }
  721. if is_void(def) then
  722. begin
  723. paraloc:=result.add_location;
  724. result.size:=OS_NO;
  725. result.intsize:=0;
  726. paraloc^.size:=OS_NO;
  727. paraloc^.loc:=LOC_VOID;
  728. exit;
  729. end;
  730. { Constructors return self instead of a boolean }
  731. if (p.proctypeoption=potype_constructor) then
  732. begin
  733. retcgsize:=OS_ADDR;
  734. result.intsize:=sizeof(pint);
  735. end
  736. else
  737. begin
  738. retcgsize:=def_cgsize(def);
  739. result.intsize:=def.size;
  740. end;
  741. result.size:=retcgsize;
  742. { Return is passed as var parameter }
  743. if ret_in_param(def,p.proccalloption) then
  744. begin
  745. paraloc:=result.add_location;
  746. paraloc^.loc:=LOC_REFERENCE;
  747. paraloc^.size:=retcgsize;
  748. exit;
  749. end;
  750. { Return in FPU register? -> don't use classify_argument(), because
  751. currency and comp need special treatment here (they are integer class
  752. when passing as parameter, but LOC_FPUREGISTER as function result) }
  753. if def.typ=floatdef then
  754. begin
  755. paraloc:=result.add_location;
  756. case tfloatdef(def).floattype of
  757. s32real:
  758. begin
  759. paraloc^.loc:=LOC_MMREGISTER;
  760. paraloc^.register:=newreg(R_MMREGISTER,RS_MM_RESULT_REG,R_SUBMMS);
  761. paraloc^.size:=OS_F32;
  762. end;
  763. s64real:
  764. begin
  765. paraloc^.loc:=LOC_MMREGISTER;
  766. paraloc^.register:=newreg(R_MMREGISTER,RS_MM_RESULT_REG,R_SUBMMD);
  767. paraloc^.size:=OS_F64;
  768. end;
  769. { the first two only exist on targets with an x87, on others
  770. they are replace by int64 }
  771. s64currency,
  772. s64comp,
  773. s80real,
  774. sc80real:
  775. begin
  776. paraloc^.loc:=LOC_FPUREGISTER;
  777. paraloc^.register:=NR_FPU_RESULT_REG;
  778. paraloc^.size:=retcgsize;
  779. end;
  780. else
  781. internalerror(200405034);
  782. end;
  783. end
  784. else
  785. { Return in register }
  786. begin
  787. numclasses:=classify_argument(def,vs_value,def.size,classes,0);
  788. { this would mean a memory return }
  789. if (numclasses=0) then
  790. internalerror(2010021502);
  791. { this would mean an _m256 vector (valid, but not yet supported) }
  792. if (numclasses>2) then
  793. internalerror(2010021503);
  794. intretregidx:=0;
  795. mmretregidx:=0;
  796. for i:=0 to numclasses-1 do
  797. begin
  798. paraloc:=result.add_location;
  799. case classes[i] of
  800. X86_64_INTEGERSI_CLASS,
  801. X86_64_INTEGER_CLASS:
  802. begin
  803. paraloc^.loc:=LOC_REGISTER;
  804. paraloc^.register:=intretregs[intretregidx];
  805. if classes[i]=X86_64_INTEGER_CLASS then
  806. paraloc^.size:=OS_64
  807. else if result.intsize in [1,2,4] then
  808. paraloc^.size:=retcgsize
  809. else
  810. paraloc^.size:=OS_32;
  811. setsubreg(paraloc^.register,cgsize2subreg(R_INTREGISTER,paraloc^.size));
  812. inc(intretregidx);
  813. end;
  814. X86_64_SSE_CLASS,
  815. X86_64_SSEUP_CLASS,
  816. X86_64_SSESF_CLASS,
  817. X86_64_SSEDF_CLASS:
  818. begin
  819. paraloc^.loc:=LOC_MMREGISTER;
  820. paraloc^.register:=mmretregs[mmretregidx];
  821. case classes[i] of
  822. X86_64_SSESF_CLASS:
  823. begin
  824. setsubreg(paraloc^.register,R_SUBMMS);
  825. paraloc^.size:=OS_F32;
  826. end;
  827. X86_64_SSEDF_CLASS:
  828. begin
  829. setsubreg(paraloc^.register,R_SUBMMD);
  830. paraloc^.size:=OS_F64;
  831. end;
  832. else
  833. begin
  834. setsubreg(paraloc^.register,R_SUBMMWHOLE);
  835. paraloc^.size:=OS_M64;
  836. end;
  837. end;
  838. inc(mmretregidx);
  839. end;
  840. X86_64_NO_CLASS:
  841. begin
  842. { empty record/array }
  843. if (i<>0) or
  844. (numclasses<>1) then
  845. internalerror(2010060302);
  846. paraloc^.loc:=LOC_VOID;
  847. end;
  848. else
  849. internalerror(2010021504);
  850. end;
  851. end;
  852. end;
  853. end;
  854. procedure tx86_64paramanager.create_paraloc_info_intern(p : tabstractprocdef; side: tcallercallee;paras:tparalist;
  855. var intparareg,mmparareg,parasize:longint;varargsparas: boolean);
  856. var
  857. hp : tparavarsym;
  858. paraloc : pcgparalocation;
  859. subreg : tsubregister;
  860. pushaddr : boolean;
  861. paracgsize : tcgsize;
  862. loc : array[1..2] of tx64paraclass;
  863. needintloc,
  864. needmmloc,
  865. paralen,
  866. locidx,
  867. i,
  868. varalign,
  869. paraalign : longint;
  870. begin
  871. paraalign:=get_para_align(p.proccalloption);
  872. { Register parameters are assigned from left to right }
  873. for i:=0 to paras.count-1 do
  874. begin
  875. hp:=tparavarsym(paras[i]);
  876. pushaddr:=push_addr_param(hp.varspez,hp.vardef,p.proccalloption);
  877. if pushaddr then
  878. begin
  879. loc[1]:=X86_64_INTEGER_CLASS;
  880. loc[2]:=X86_64_NO_CLASS;
  881. paracgsize:=OS_ADDR;
  882. paralen:=sizeof(aint);
  883. end
  884. else
  885. begin
  886. getvalueparaloc(hp.varspez,hp.vardef,loc[1],loc[2]);
  887. paralen:=push_size(hp.varspez,hp.vardef,p.proccalloption);
  888. paracgsize:=def_cgsize(hp.vardef);
  889. end;
  890. { cheat for now, we should copy the value to an mm reg as well (FK) }
  891. if varargsparas and
  892. (target_info.system = system_x86_64_win64) and
  893. (hp.vardef.typ = floatdef) then
  894. begin
  895. loc[2]:=X86_64_NO_CLASS;
  896. if paracgsize=OS_F64 then
  897. begin
  898. loc[1]:=X86_64_INTEGER_CLASS;
  899. paracgsize:=OS_64
  900. end
  901. else
  902. begin
  903. loc[1]:=X86_64_INTEGERSI_CLASS;
  904. paracgsize:=OS_32;
  905. end;
  906. end;
  907. hp.paraloc[side].reset;
  908. hp.paraloc[side].size:=paracgsize;
  909. hp.paraloc[side].intsize:=paralen;
  910. hp.paraloc[side].Alignment:=paraalign;
  911. if paralen>0 then
  912. begin
  913. { Enough registers free? }
  914. needintloc:=0;
  915. needmmloc:=0;
  916. for locidx:=low(loc) to high(loc) do
  917. case loc[locidx] of
  918. X86_64_INTEGER_CLASS,
  919. X86_64_INTEGERSI_CLASS:
  920. inc(needintloc);
  921. X86_64_SSE_CLASS,
  922. X86_64_SSESF_CLASS,
  923. X86_64_SSEDF_CLASS,
  924. X86_64_SSEUP_CLASS:
  925. inc(needmmloc);
  926. end;
  927. { the "-1" is because we can also use the current register }
  928. if ((target_info.system=system_x86_64_win64) and
  929. ((intparareg+needintloc-1 > high(paraintsupregs_winx64)) or
  930. (mmparareg+needmmloc-1 > high(parammsupregs_winx64)))) or
  931. ((target_info.system<>system_x86_64_win64) and
  932. ((intparareg+needintloc-1 > high(paraintsupregs)) or
  933. (mmparareg+needmmloc-1 > high(parammsupregs)))) then
  934. begin
  935. { If there are no registers available for any
  936. eightbyte of an argument, the whole argument is
  937. passed on the stack. }
  938. loc[low(loc)]:=X86_64_MEMORY_CLASS;
  939. for locidx:=succ(low(loc)) to high(loc) do
  940. loc[locidx]:=X86_64_NO_CLASS;
  941. end;
  942. locidx:=1;
  943. while (paralen>0) do
  944. begin
  945. if locidx>2 then
  946. internalerror(200501283);
  947. { Allocate }
  948. case loc[locidx] of
  949. X86_64_INTEGER_CLASS,
  950. X86_64_INTEGERSI_CLASS:
  951. begin
  952. paraloc:=hp.paraloc[side].add_location;
  953. paraloc^.loc:=LOC_REGISTER;
  954. if (paracgsize=OS_NO) or (loc[2]<>X86_64_NO_CLASS) then
  955. begin
  956. if loc[locidx]=X86_64_INTEGER_CLASS then
  957. begin
  958. paraloc^.size:=OS_INT;
  959. subreg:=R_SUBWHOLE;
  960. end
  961. else
  962. begin
  963. paraloc^.size:=OS_32;
  964. subreg:=R_SUBD;
  965. end;
  966. end
  967. else
  968. begin
  969. paraloc^.size:=paracgsize;
  970. { s64comp is pushed in an int register }
  971. if paraloc^.size=OS_C64 then
  972. paraloc^.size:=OS_64;
  973. subreg:=cgsize2subreg(R_INTREGISTER,paraloc^.size);
  974. end;
  975. { winx64 uses different registers }
  976. if target_info.system=system_x86_64_win64 then
  977. paraloc^.register:=newreg(R_INTREGISTER,paraintsupregs_winx64[intparareg],subreg)
  978. else
  979. paraloc^.register:=newreg(R_INTREGISTER,paraintsupregs[intparareg],subreg);
  980. { matching mm register must be skipped }
  981. if target_info.system=system_x86_64_win64 then
  982. inc(mmparareg);
  983. inc(intparareg);
  984. dec(paralen,tcgsize2size[paraloc^.size]);
  985. end;
  986. X86_64_SSE_CLASS,
  987. X86_64_SSESF_CLASS,
  988. X86_64_SSEDF_CLASS,
  989. X86_64_SSEUP_CLASS:
  990. begin
  991. paraloc:=hp.paraloc[side].add_location;
  992. paraloc^.loc:=LOC_MMREGISTER;
  993. case loc[locidx] of
  994. X86_64_SSESF_CLASS:
  995. begin
  996. subreg:=R_SUBMMS;
  997. paraloc^.size:=OS_F32;
  998. end;
  999. X86_64_SSEDF_CLASS:
  1000. begin
  1001. subreg:=R_SUBMMD;
  1002. paraloc^.size:=OS_F64;
  1003. end;
  1004. else
  1005. begin
  1006. subreg:=R_SUBMMWHOLE;
  1007. paraloc^.size:=OS_M64;
  1008. end;
  1009. end;
  1010. { winx64 uses different registers }
  1011. if target_info.system=system_x86_64_win64 then
  1012. paraloc^.register:=newreg(R_MMREGISTER,parammsupregs_winx64[mmparareg],subreg)
  1013. else
  1014. paraloc^.register:=newreg(R_MMREGISTER,parammsupregs[mmparareg],subreg);
  1015. { matching int register must be skipped }
  1016. if target_info.system=system_x86_64_win64 then
  1017. inc(intparareg);
  1018. inc(mmparareg);
  1019. dec(paralen,tcgsize2size[paraloc^.size]);
  1020. end;
  1021. X86_64_MEMORY_CLASS :
  1022. begin
  1023. paraloc:=hp.paraloc[side].add_location;
  1024. paraloc^.loc:=LOC_REFERENCE;
  1025. {Hack alert!!! We should modify int_cgsize to handle OS_128,
  1026. however, since int_cgsize is called in many places in the
  1027. compiler where only a few can already handle OS_128, fixing it
  1028. properly is out of the question to release 2.2.0 in time. (DM)}
  1029. if paracgsize=OS_128 then
  1030. if paralen=8 then
  1031. paraloc^.size:=OS_64
  1032. else if paralen=16 then
  1033. paraloc^.size:=OS_128
  1034. else
  1035. internalerror(200707143)
  1036. else if paracgsize in [OS_F32,OS_F64,OS_F80,OS_F128] then
  1037. paraloc^.size:=int_float_cgsize(paralen)
  1038. else
  1039. paraloc^.size:=int_cgsize(paralen);
  1040. if side=callerside then
  1041. paraloc^.reference.index:=NR_STACK_POINTER_REG
  1042. else
  1043. paraloc^.reference.index:=NR_FRAME_POINTER_REG;
  1044. varalign:=used_align(size_2_align(paralen),paraalign,paraalign);
  1045. paraloc^.reference.offset:=parasize;
  1046. parasize:=align(parasize+paralen,varalign);
  1047. paralen:=0;
  1048. end;
  1049. else
  1050. internalerror(2010053113);
  1051. end;
  1052. if (locidx<2) and
  1053. (loc[locidx+1]<>X86_64_NO_CLASS) then
  1054. inc(locidx);
  1055. end;
  1056. end
  1057. else
  1058. begin
  1059. paraloc:=hp.paraloc[side].add_location;
  1060. paraloc^.loc:=LOC_VOID;
  1061. end;
  1062. end;
  1063. { Register parameters are assigned from left-to-right, but the
  1064. offsets on the stack are right-to-left. There is no need
  1065. to reverse the offset, only adapt the calleeside with the
  1066. start offset of the first param on the stack }
  1067. if side=calleeside then
  1068. begin
  1069. for i:=0 to paras.count-1 do
  1070. begin
  1071. hp:=tparavarsym(paras[i]);
  1072. paraloc:=hp.paraloc[side].location;
  1073. while paraloc<>nil do
  1074. begin
  1075. with paraloc^ do
  1076. if (loc=LOC_REFERENCE) then
  1077. inc(reference.offset,target_info.first_parm_offset);
  1078. paraloc:=paraloc^.next;
  1079. end;
  1080. end;
  1081. end;
  1082. end;
  1083. function tx86_64paramanager.create_varargs_paraloc_info(p : tabstractprocdef; varargspara:tvarargsparalist):longint;
  1084. var
  1085. intparareg,mmparareg,
  1086. parasize : longint;
  1087. begin
  1088. intparareg:=0;
  1089. mmparareg:=0;
  1090. if target_info.system=system_x86_64_win64 then
  1091. parasize:=4*8
  1092. else
  1093. parasize:=0;
  1094. { calculate the registers for the normal parameters }
  1095. create_paraloc_info_intern(p,callerside,p.paras,intparareg,mmparareg,parasize,false);
  1096. { append the varargs }
  1097. create_paraloc_info_intern(p,callerside,varargspara,intparareg,mmparareg,parasize,true);
  1098. { store used no. of SSE registers, that needs to be passed in %AL }
  1099. varargspara.mmregsused:=mmparareg;
  1100. result:=parasize;
  1101. end;
  1102. function tx86_64paramanager.create_paraloc_info(p : tabstractprocdef; side: tcallercallee):longint;
  1103. var
  1104. intparareg,mmparareg,
  1105. parasize : longint;
  1106. begin
  1107. intparareg:=0;
  1108. mmparareg:=0;
  1109. if target_info.system=system_x86_64_win64 then
  1110. parasize:=4*8
  1111. else
  1112. parasize:=0;
  1113. create_paraloc_info_intern(p,side,p.paras,intparareg,mmparareg,parasize,false);
  1114. { Create Function result paraloc }
  1115. create_funcretloc_info(p,side);
  1116. { We need to return the size allocated on the stack }
  1117. result:=parasize;
  1118. end;
  1119. begin
  1120. paramanager:=tx86_64paramanager.create;
  1121. end.