nx86set.pas 20 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464
  1. {
  2. Copyright (c) 1998-2002 by Florian Klaempfl
  3. Generate x86 assembler for in/case nodes
  4. This program is free software; you can redistribute it and/or modify
  5. it under the terms of the GNU General Public License as published by
  6. the Free Software Foundation; either version 2 of the License, or
  7. (at your option) any later version.
  8. This program is distributed in the hope that it will be useful,
  9. but WITHOUT ANY WARRANTY; without even the implied warranty of
  10. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  11. GNU General Public License for more details.
  12. You should have received a copy of the GNU General Public License
  13. along with this program; if not, write to the Free Software
  14. Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  15. ****************************************************************************
  16. }
  17. unit nx86set;
  18. {$i fpcdefs.inc}
  19. interface
  20. uses
  21. node,nset,pass_1,ncgset;
  22. type
  23. tx86innode = class(tinnode)
  24. procedure pass_generate_code;override;
  25. function pass_1 : tnode;override;
  26. end;
  27. implementation
  28. uses
  29. globtype,systems,
  30. verbose,globals,
  31. symconst,symdef,defutil,
  32. aasmbase,aasmtai,aasmdata,aasmcpu,
  33. cgbase,pass_2,tgobj,
  34. ncon,
  35. cpubase,
  36. cga,cgobj,cgutils,ncgutil,
  37. cgx86;
  38. {*****************************************************************************
  39. TX86INNODE
  40. *****************************************************************************}
  41. function tx86innode.pass_1 : tnode;
  42. begin
  43. result:=nil;
  44. { this is the only difference from the generic version }
  45. expectloc:=LOC_FLAGS;
  46. firstpass(right);
  47. firstpass(left);
  48. if codegenerror then
  49. exit;
  50. left_right_max;
  51. { a smallset needs maybe an misc. register }
  52. if (left.nodetype<>ordconstn) and
  53. not(right.expectloc in [LOC_CREGISTER,LOC_REGISTER]) and
  54. (right.registersint<1) then
  55. inc(registersint);
  56. end;
  57. procedure tx86innode.pass_generate_code;
  58. type
  59. Tsetpart=record
  60. range : boolean; {Part is a range.}
  61. start,stop : byte; {Start/stop when range; Stop=element when an element.}
  62. end;
  63. var
  64. genjumps,
  65. use_small,
  66. ranges : boolean;
  67. hreg,hreg2,
  68. pleftreg : tregister;
  69. opsize : tcgsize;
  70. setparts : array[1..8] of Tsetpart;
  71. i,numparts : byte;
  72. adjustment : longint;
  73. l,l2 : tasmlabel;
  74. {$ifdef CORRECT_SET_IN_FPC}
  75. AM : tasmop;
  76. {$endif CORRECT_SET_IN_FPC}
  77. function analizeset(Aset:pconstset;is_small:boolean):boolean;
  78. var
  79. compares,maxcompares:word;
  80. i:byte;
  81. begin
  82. if tnormalset(Aset^)=[] then
  83. {The expression...
  84. if expr in []
  85. ...is allways false. It should be optimized away in the
  86. resultdef pass, and thus never occur here. Since we
  87. do generate wrong code for it, do internalerror.}
  88. internalerror(2002072301);
  89. analizeset:=false;
  90. ranges:=false;
  91. numparts:=0;
  92. compares:=0;
  93. { Lots of comparisions take a lot of time, so do not allow
  94. too much comparisions. 8 comparisions are, however, still
  95. smalller than emitting the set }
  96. if cs_opt_size in current_settings.optimizerswitches then
  97. maxcompares:=8
  98. else
  99. maxcompares:=5;
  100. { when smallset is possible allow only 3 compares the smallset
  101. code is for littlesize also smaller when more compares are used }
  102. if is_small then
  103. maxcompares:=3;
  104. for i:=0 to 255 do
  105. if i in tnormalset(Aset^) then
  106. begin
  107. if (numparts=0) or (i<>setparts[numparts].stop+1) then
  108. begin
  109. {Set element is a separate element.}
  110. inc(compares);
  111. if compares>maxcompares then
  112. exit;
  113. inc(numparts);
  114. setparts[numparts].range:=false;
  115. setparts[numparts].stop:=i;
  116. end
  117. else
  118. {Set element is part of a range.}
  119. if not setparts[numparts].range then
  120. begin
  121. {Transform an element into a range.}
  122. setparts[numparts].range:=true;
  123. setparts[numparts].start:=setparts[numparts].stop;
  124. setparts[numparts].stop:=i;
  125. ranges := true;
  126. { there's only one compare per range anymore. Only a }
  127. { sub is added, but that's much faster than a }
  128. { cmp/jcc combo so neglect its effect }
  129. { inc(compares);
  130. if compares>maxcompares then
  131. exit; }
  132. end
  133. else
  134. begin
  135. {Extend a range.}
  136. setparts[numparts].stop:=i;
  137. end;
  138. end;
  139. analizeset:=true;
  140. end;
  141. begin
  142. { We check first if we can generate jumps, this can be done
  143. because the resultdef is already set in firstpass }
  144. { check if we can use smallset operation using btl which is limited
  145. to 32 bits, the left side may also not contain higher values !! }
  146. use_small:=(tsetdef(right.resultdef).settype=smallset) and
  147. ((left.resultdef.typ=orddef) and (torddef(left.resultdef).high<=32) or
  148. (left.resultdef.typ=enumdef) and (tenumdef(left.resultdef).max<=32));
  149. { Can we generate jumps? Possible for all types of sets }
  150. genjumps:=(right.nodetype=setconstn) and
  151. analizeset(tsetconstnode(right).value_set,use_small);
  152. { calculate both operators }
  153. { the complex one first }
  154. firstcomplex(self);
  155. secondpass(left);
  156. { Only process the right if we are not generating jumps }
  157. if not genjumps then
  158. begin
  159. secondpass(right);
  160. end;
  161. if codegenerror then
  162. exit;
  163. { ofcourse not commutative }
  164. if nf_swapped in flags then
  165. swapleftright;
  166. if not(left.location.loc in [LOC_REGISTER,LOC_CREGISTER,LOC_REFERENCE,LOC_CREFERENCE]) then
  167. location_force_reg(current_asmdata.CurrAsmList,left.location,OS_INT,true);
  168. if genjumps then
  169. begin
  170. { It gives us advantage to check for the set elements
  171. separately instead of using the SET_IN_BYTE procedure.
  172. To do: Build in support for LOC_JUMP }
  173. opsize := def_cgsize(left.resultdef);
  174. { If register is used, use only lower 8 bits }
  175. if left.location.loc in [LOC_REGISTER,LOC_CREGISTER] then
  176. begin
  177. { for ranges we always need a 32bit register, because then we }
  178. { use the register as base in a reference (JM) }
  179. if ranges then
  180. begin
  181. pleftreg:=cg.makeregsize(current_asmdata.CurrAsmList,left.location.register,OS_INT);
  182. cg.a_load_reg_reg(current_asmdata.CurrAsmList,left.location.size,OS_INT,left.location.register,pleftreg);
  183. if opsize<>OS_INT then
  184. cg.a_op_const_reg(current_asmdata.CurrAsmList,OP_AND,OS_INT,255,pleftreg);
  185. opsize:=OS_INT;
  186. end
  187. else
  188. { otherwise simply use the lower 8 bits (no "and" }
  189. { necessary this way) (JM) }
  190. begin
  191. pleftreg:=cg.makeregsize(current_asmdata.CurrAsmList,left.location.register,OS_8);
  192. opsize := OS_8;
  193. end;
  194. end
  195. else
  196. begin
  197. { load the value in a register }
  198. pleftreg:=cg.getintregister(current_asmdata.CurrAsmList,OS_32);
  199. opsize:=OS_32;
  200. cg.a_load_ref_reg(current_asmdata.CurrAsmList,OS_8,OS_32,left.location.reference,pleftreg);
  201. end;
  202. { Get a label to jump to the end }
  203. location_reset(location,LOC_FLAGS,OS_NO);
  204. { It's better to use the zero flag when there are
  205. no ranges }
  206. if ranges then
  207. location.resflags:=F_C
  208. else
  209. location.resflags:=F_E;
  210. current_asmdata.getjumplabel(l);
  211. { how much have we already substracted from the x in the }
  212. { "x in [y..z]" expression }
  213. adjustment := 0;
  214. for i:=1 to numparts do
  215. if setparts[i].range then
  216. { use fact that a <= x <= b <=> cardinal(x-a) <= cardinal(b-a) }
  217. begin
  218. { is the range different from all legal values? }
  219. if (setparts[i].stop-setparts[i].start <> 255) then
  220. begin
  221. { yes, is the lower bound <> 0? }
  222. if (setparts[i].start <> 0) then
  223. begin
  224. if (left.location.loc = LOC_CREGISTER) then
  225. begin
  226. hreg:=cg.getintregister(current_asmdata.CurrAsmList,OS_INT);
  227. cg.a_load_reg_reg(current_asmdata.CurrAsmList,opsize,OS_INT,pleftreg,hreg);
  228. pleftreg:=hreg;
  229. opsize:=OS_INT;
  230. end;
  231. cg.a_op_const_reg(current_asmdata.CurrAsmList,OP_SUB,opsize,setparts[i].start-adjustment,pleftreg);
  232. end;
  233. { new total value substracted from x: }
  234. { adjustment + (setparts[i].start - adjustment) }
  235. adjustment := setparts[i].start;
  236. { check if result < b-a+1 (not "result <= b-a", since }
  237. { we need a carry in case the element is in the range }
  238. { (this will never overflow since we check at the }
  239. { beginning whether stop-start <> 255) }
  240. cg.a_cmp_const_reg_label(current_asmdata.CurrAsmList,opsize,OC_B,setparts[i].stop-setparts[i].start+1,pleftreg,l);
  241. end
  242. else
  243. { if setparts[i].start = 0 and setparts[i].stop = 255, }
  244. { it's always true since "in" is only allowed for bytes }
  245. begin
  246. current_asmdata.CurrAsmList.concat(taicpu.op_none(A_STC,S_NO));
  247. cg.a_jmp_always(current_asmdata.CurrAsmList,l);
  248. end;
  249. end
  250. else
  251. begin
  252. { Emit code to check if left is an element }
  253. current_asmdata.CurrAsmList.concat(taicpu.op_const_reg(A_CMP,TCGSize2OpSize[opsize],setparts[i].stop-adjustment,
  254. pleftreg));
  255. { Result should be in carry flag when ranges are used }
  256. if ranges then
  257. current_asmdata.CurrAsmList.concat(taicpu.op_none(A_STC,S_NO));
  258. { If found, jump to end }
  259. cg.a_jmp_flags(current_asmdata.CurrAsmList,F_E,l);
  260. end;
  261. if ranges and
  262. { if the last one was a range, the carry flag is already }
  263. { set appropriately }
  264. not(setparts[numparts].range) then
  265. current_asmdata.CurrAsmList.concat(taicpu.op_none(A_CLC,S_NO));
  266. { To compensate for not doing a second pass }
  267. right.location.reference.symbol:=nil;
  268. { Now place the end label }
  269. cg.a_label(current_asmdata.CurrAsmList,l);
  270. end
  271. else
  272. begin
  273. location_reset(location,LOC_FLAGS,OS_NO);
  274. { We will now generated code to check the set itself, no jmps,
  275. handle smallsets separate, because it allows faster checks }
  276. if use_small then
  277. begin
  278. if left.nodetype=ordconstn then
  279. begin
  280. location.resflags:=F_NE;
  281. case right.location.loc of
  282. LOC_REGISTER,
  283. LOC_CREGISTER:
  284. begin
  285. emit_const_reg(A_TEST,S_L,
  286. 1 shl (tordconstnode(left).value and 31),right.location.register);
  287. end;
  288. LOC_REFERENCE,
  289. LOC_CREFERENCE :
  290. begin
  291. emit_const_ref(A_TEST,S_L,1 shl (tordconstnode(left).value and 31),
  292. right.location.reference);
  293. end;
  294. else
  295. internalerror(200203312);
  296. end;
  297. end
  298. else
  299. begin
  300. case left.location.loc of
  301. LOC_REGISTER,
  302. LOC_CREGISTER:
  303. begin
  304. hreg:=cg.makeregsize(current_asmdata.CurrAsmList,left.location.register,OS_32);
  305. cg.a_load_reg_reg(current_asmdata.CurrAsmList,left.location.size,OS_32,left.location.register,hreg);
  306. end;
  307. else
  308. begin
  309. { the set element isn't never samller than a byte
  310. and because it's a small set we need only 5 bits
  311. but 8 bits are easier to load }
  312. hreg:=cg.getintregister(current_asmdata.CurrAsmList,OS_32);
  313. cg.a_load_ref_reg(current_asmdata.CurrAsmList,OS_8,OS_32,left.location.reference,hreg);
  314. end;
  315. end;
  316. case right.location.loc of
  317. LOC_REGISTER,
  318. LOC_CREGISTER :
  319. begin
  320. emit_reg_reg(A_BT,S_L,hreg,right.location.register);
  321. end;
  322. LOC_CONSTANT :
  323. begin
  324. { We have to load the value into a register because
  325. btl does not accept values only refs or regs (PFV) }
  326. hreg2:=cg.getintregister(current_asmdata.CurrAsmList,OS_32);
  327. cg.a_load_const_reg(current_asmdata.CurrAsmList,OS_32,right.location.value,hreg2);
  328. emit_reg_reg(A_BT,S_L,hreg,hreg2);
  329. end;
  330. LOC_CREFERENCE,
  331. LOC_REFERENCE :
  332. begin
  333. emit_reg_ref(A_BT,S_L,hreg,right.location.reference);
  334. end;
  335. else
  336. internalerror(2002032210);
  337. end;
  338. location.resflags:=F_C;
  339. end;
  340. end
  341. else
  342. begin
  343. if right.location.loc=LOC_CONSTANT then
  344. begin
  345. location.resflags:=F_C;
  346. current_asmdata.getjumplabel(l);
  347. current_asmdata.getjumplabel(l2);
  348. { load constants to a register }
  349. if left.nodetype=ordconstn then
  350. location_force_reg(current_asmdata.CurrAsmList,left.location,OS_INT,true);
  351. case left.location.loc of
  352. LOC_REGISTER,
  353. LOC_CREGISTER:
  354. begin
  355. hreg:=cg.makeregsize(current_asmdata.CurrAsmList,left.location.register,OS_32);
  356. cg.a_load_reg_reg(current_asmdata.CurrAsmList,left.location.size,OS_32,left.location.register,hreg);
  357. cg.a_cmp_const_reg_label(current_asmdata.CurrAsmList,OS_32,OC_BE,31,hreg,l);
  358. { reset carry flag }
  359. current_asmdata.CurrAsmList.concat(taicpu.op_none(A_CLC,S_NO));
  360. cg.a_jmp_always(current_asmdata.CurrAsmList,l2);
  361. cg.a_label(current_asmdata.CurrAsmList,l);
  362. { We have to load the value into a register because
  363. btl does not accept values only refs or regs (PFV) }
  364. hreg2:=cg.getintregister(current_asmdata.CurrAsmList,OS_32);
  365. cg.a_load_const_reg(current_asmdata.CurrAsmList,OS_32,right.location.value,hreg2);
  366. emit_reg_reg(A_BT,S_L,hreg,hreg2);
  367. end;
  368. else
  369. begin
  370. {$ifdef CORRECT_SET_IN_FPC}
  371. if m_tp in current_settings.modeswitches then
  372. begin
  373. {***WARNING only correct if
  374. reference is 32 bits (PM) *****}
  375. emit_const_ref(A_CMP,S_L,31,reference_copy(left.location.reference));
  376. end
  377. else
  378. {$endif CORRECT_SET_IN_FPC}
  379. begin
  380. emit_const_ref(A_CMP,S_B,31,left.location.reference);
  381. end;
  382. cg.a_jmp_flags(current_asmdata.CurrAsmList,F_BE,l);
  383. { reset carry flag }
  384. current_asmdata.CurrAsmList.concat(taicpu.op_none(A_CLC,S_NO));
  385. cg.a_jmp_always(current_asmdata.CurrAsmList,l2);
  386. cg.a_label(current_asmdata.CurrAsmList,l);
  387. hreg:=cg.getintregister(current_asmdata.CurrAsmList,OS_32);
  388. cg.a_load_ref_reg(current_asmdata.CurrAsmList,OS_32,OS_32,left.location.reference,hreg);
  389. { We have to load the value into a register because
  390. btl does not accept values only refs or regs (PFV) }
  391. hreg2:=cg.getintregister(current_asmdata.CurrAsmList,OS_32);
  392. cg.a_load_const_reg(current_asmdata.CurrAsmList,OS_32,right.location.value,hreg2);
  393. emit_reg_reg(A_BT,S_L,hreg,hreg2);
  394. end;
  395. end;
  396. cg.a_label(current_asmdata.CurrAsmList,l2);
  397. end { of right.location.loc=LOC_CONSTANT }
  398. { do search in a normal set which could have >32 elementsm
  399. but also used if the left side contains higher values > 32 }
  400. else if left.nodetype=ordconstn then
  401. begin
  402. location.resflags:=F_NE;
  403. inc(right.location.reference.offset,tordconstnode(left).value shr 3);
  404. emit_const_ref(A_TEST,S_B,1 shl (tordconstnode(left).value and 7),right.location.reference);
  405. end
  406. else
  407. begin
  408. if (left.location.loc=LOC_REGISTER) then
  409. pleftreg:=cg.makeregsize(current_asmdata.CurrAsmList,left.location.register,OS_32)
  410. else
  411. pleftreg:=cg.getintregister(current_asmdata.CurrAsmList,OS_32);
  412. cg.a_load_loc_reg(current_asmdata.CurrAsmList,OS_32,left.location,pleftreg);
  413. location_freetemp(current_asmdata.CurrAsmList,left.location);
  414. emit_reg_ref(A_BT,S_L,pleftreg,right.location.reference);
  415. { tg.ungetiftemp(current_asmdata.CurrAsmList,right.location.reference) happens below }
  416. location.resflags:=F_C;
  417. end;
  418. end;
  419. end;
  420. if not genjumps then
  421. location_freetemp(current_asmdata.CurrAsmList,right.location);
  422. end;
  423. begin
  424. cinnode:=tx86innode;
  425. end.