lextable.pas 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490
  1. {
  2. This module collects the various tables used by the Lex program:
  3. - the symbol table
  4. - the position table
  5. - the DFA states and transition tables
  6. Note: All tables are allocated dynamically (at initialization time)
  7. because of the 64KB static data limit.
  8. Copyright (c) 1990-92 Albert Graef <[email protected]>
  9. Copyright (C) 1996 Berend de Boer <[email protected]>
  10. This program is free software; you can redistribute it and/or modify
  11. it under the terms of the GNU General Public License as published by
  12. the Free Software Foundation; either version 2 of the License, or
  13. (at your option) any later version.
  14. This program is distributed in the hope that it will be useful,
  15. but WITHOUT ANY WARRANTY; without even the implied warranty of
  16. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  17. GNU General Public License for more details.
  18. You should have received a copy of the GNU General Public License
  19. along with this program; if not, write to the Free Software
  20. Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  21. $Revision$
  22. $Modtime: 96-08-01 10:23 $
  23. $History: LEXTABLE.PAS $
  24. *
  25. * ***************** Version 2 *****************
  26. * User: Berend Date: 96-10-10 Time: 21:16
  27. * Updated in $/Lex and Yacc/tply
  28. * Updated for protected mode, windows and Delphi 1.X and 2.X.
  29. }
  30. unit LexTable;
  31. interface
  32. uses LexBase;
  33. {$IFNDEF Win32}
  34. var max_bytes : LongInt;
  35. (* available memory *)
  36. function n_bytes : LongInt;
  37. (* memory actually used *)
  38. {$ENDIF}
  39. const
  40. (* Maximum table sizes: *)
  41. max_keys = 997; (* size of hash symbol table (prime number!) *)
  42. {$IFDEF MsDos}
  43. max_pos = 600; (* maximum number of positions *)
  44. max_states = 300; (* number of DFA states *)
  45. max_trans = 600; (* number of transitions *)
  46. max_start_states = 50; (* maximum number of user-defined start states *)
  47. {$ELSE}
  48. max_pos = 1200; (* maximum number of positions *)
  49. max_states = 600; (* number of DFA states *)
  50. max_trans = 1200; (* number of transitions *)
  51. max_start_states = 100; (* maximum number of user-defined start states *)
  52. {$ENDIF}
  53. var
  54. (* Actual table sizes: *)
  55. n_pos : Integer;
  56. n_states : Integer;
  57. n_trans : Integer;
  58. n_start_states : Integer;
  59. type
  60. (* Table data structures: *)
  61. SymTable = array [1..max_keys] of record
  62. pname : StrPtr;
  63. (* print name; empty entries are denoted by pname=nil *)
  64. case sym_type : ( none, macro_sym, start_state_sym ) of
  65. macro_sym : ( subst : StrPtr );
  66. (* macro substitution *)
  67. start_state_sym : ( start_state : Integer );
  68. (* start state *)
  69. end;
  70. PosTableEntry = record
  71. follow_pos : IntSetPtr;
  72. (* set of follow positions *)
  73. case pos_type : ( char_pos, cclass_pos, mark_pos ) of
  74. char_pos : ( c : Char );
  75. (* character position *)
  76. cclass_pos : ( cc : CClassPtr );
  77. (* character class position *)
  78. mark_pos : ( rule, pos : Integer );
  79. (* mark position *)
  80. end;
  81. PosTable = array [1..max_pos] of PosTableEntry;
  82. FirstPosTable = array [0..2*max_start_states+1] of IntSetPtr;
  83. (* first positions for start states (even states
  84. are entered anywhere on the line, odd states only
  85. at the beginning of the line; states 0 and 1 denote
  86. default, states 2..2*n_start_states+1 user-defined
  87. start states) *)
  88. StateTableEntry = record
  89. state_pos : IntSetPtr;
  90. (* positions covered by state *)
  91. final : Boolean;
  92. (* final state? *)
  93. trans_lo,
  94. trans_hi : Integer;
  95. (* transitions *)
  96. end;
  97. StateTable = array [0..max_states-1] of StateTableEntry;
  98. TransTableEntry = record
  99. cc : CClassPtr;
  100. (* characters of transition *)
  101. follow_pos : IntSetPtr;
  102. (* follow positions (positions of next state) *)
  103. next_state : Integer;
  104. (* next state *)
  105. end;
  106. TransTable = array [1..max_trans] of TransTableEntry;
  107. var
  108. verbose : Boolean; (* status of the verbose option *)
  109. optimize : Boolean; (* status of the optimization option *)
  110. sym_table : ^SymTable; (* symbol table *)
  111. pos_table : ^PosTable; (* position table *)
  112. first_pos_table : ^FirstPosTable; (* first positions table *)
  113. state_table : ^StateTable; (* DFA state table *)
  114. trans_table : ^TransTable; (* DFA transition table *)
  115. (* Operations: *)
  116. (* Hash symbol table:
  117. The following routines are supplied to be used with the generic hash table
  118. routines in LexBase. *)
  119. function lookup(k : Integer) : String;
  120. (* print name of symbol no. k *)
  121. procedure entry(k : Integer; symbol : String);
  122. (* enter symbol into table *)
  123. (* Routines to build the position table: *)
  124. procedure addCharPos(c : Char);
  125. procedure addCClassPos(cc : CClassPtr);
  126. procedure addMarkPos(rule, pos : Integer);
  127. (* Positions are allocated in the order of calls to addCharPos, addCClassPos
  128. and addMarkPos, starting at position 1. These routines also initialize
  129. the corresponding follow sets. *)
  130. (* Routines to build the state table: *)
  131. var act_state : Integer; (* state currently considered *)
  132. function newState(POS : IntSetPtr) : Integer;
  133. (* Add a new state with the given position set; initialize the state's
  134. position set to POS (the offsets into the transition table are
  135. initialized when the state becomes active, see startStateTrans, below).
  136. Returns: the new state number *)
  137. function addState(POS : IntSetPtr) : Integer;
  138. (* add a new state, but only if there is not already a state with the
  139. same position set *)
  140. procedure startStateTrans;
  141. procedure endStateTrans;
  142. (* initializes act_state's first and last offsets into the transition
  143. table *)
  144. function n_state_trans(i : Integer) : Integer;
  145. (* return number of transitions in state i *)
  146. procedure addTrans(cc : CClass; FOLLOW : IntSetPtr);
  147. (* adds a transition to the table *)
  148. procedure mergeTrans;
  149. (* sorts transitions w.r.t. next states and merges transitions for the
  150. same next state in the active state *)
  151. procedure sortTrans;
  152. (* sort transitions in act_state lexicographically *)
  153. implementation
  154. uses LexMsgs;
  155. {$IFNDEF Win32}
  156. function n_bytes : LongInt;
  157. begin
  158. n_bytes := max_bytes-memAvail
  159. end(*n_bytes*);
  160. {$ENDIF}
  161. (* Hash table routines: *)
  162. function lookup(k : Integer) : String;
  163. begin
  164. with sym_table^[k] do
  165. if pname=nil then
  166. lookup := ''
  167. else
  168. lookup := copy(pname^, 1, length(pname^))
  169. end(*lookup*);
  170. procedure entry(k : Integer; symbol : String);
  171. begin
  172. with sym_table^[k] do
  173. begin
  174. pname := newStr(symbol);
  175. sym_type := none;
  176. end
  177. end(*entry*);
  178. (* Routines to build the position table: *)
  179. procedure addCharPos(c : Char);
  180. begin
  181. inc(n_pos);
  182. if n_pos>max_pos then fatal(pos_table_overflow);
  183. pos_table^[n_pos].follow_pos := newIntSet;
  184. pos_table^[n_pos].pos_type := char_pos;
  185. pos_table^[n_pos].c := c;
  186. end(*addCharPos*);
  187. procedure addCClassPos(cc : CClassPtr);
  188. begin
  189. inc(n_pos);
  190. if n_pos>max_pos then fatal(pos_table_overflow);
  191. pos_table^[n_pos].follow_pos := newIntSet;
  192. pos_table^[n_pos].pos_type := cclass_pos;
  193. pos_table^[n_pos].cc := cc;
  194. end(*addCClassPos*);
  195. procedure addMarkPos(rule, pos : Integer);
  196. begin
  197. inc(n_pos);
  198. if n_pos>max_pos then fatal(pos_table_overflow);
  199. pos_table^[n_pos].follow_pos := newIntSet;
  200. pos_table^[n_pos].pos_type := mark_pos;
  201. pos_table^[n_pos].rule := rule;
  202. pos_table^[n_pos].pos := pos;
  203. end(*addMarkPos*);
  204. (* Routines to build the state table: *)
  205. function newState(POS : IntSetPtr) : Integer;
  206. begin
  207. if n_states>=max_states then fatal(state_table_overflow);
  208. newState := n_states;
  209. with state_table^[n_states] do
  210. begin
  211. state_pos := POS;
  212. final := false;
  213. end;
  214. inc(n_states);
  215. end(*newState*);
  216. function addState(POS : IntSetPtr) : Integer;
  217. var i : Integer;
  218. begin
  219. for i := 0 to pred(n_states) do
  220. if equal(POS^, state_table^[i].state_pos^) then
  221. begin
  222. addState := i;
  223. exit;
  224. end;
  225. addState := newState(POS);
  226. end(*addState*);
  227. procedure startStateTrans;
  228. begin
  229. state_table^[act_state].trans_lo := succ(n_trans);
  230. end(*startStateTrans*);
  231. procedure endStateTrans;
  232. begin
  233. state_table^[act_state].trans_hi := n_trans;
  234. end(*endStateTrans*);
  235. function n_state_trans(i : Integer) : Integer;
  236. begin
  237. with state_table^[i] do
  238. n_state_trans := trans_hi-trans_lo+1
  239. end(*n_state_trans*);
  240. (* Construction of the transition table:
  241. This implementation here uses a simple optimization which tries to avoid
  242. the construction of different transitions for each individual character
  243. in large character classes by MERGING transitions whenever possible. The
  244. transitions, at any time, will be partitioned into transitions on disjoint
  245. character classes. When adding a new transition on character class cc, we
  246. repartition the transitions as follows:
  247. 1. If the current character class cc equals an existing one, we can
  248. simply add the new follow set to the existing one.
  249. 2. Otherwise, for some existing transition on some character class
  250. cc1 with cc*cc1<>[], we replace the existing transition by a new
  251. transition on cc*cc1 with follow set = cc1's follow set + cc's follow
  252. set, and, if necessary (i.e. if cc1-cc is nonempty), a transition on
  253. cc1-cc with follow set = cc1's follow set. We then remove the elements
  254. of cc1 from cc, and proceed again with step 1.
  255. We may stop this process as soon as cc becomes empty (then all characters
  256. in cc have been distributed among the existing partitions). If cc does
  257. NOT become empty, we have to construct a new transition for the remaining
  258. character class (which then will be disjoint from all other character
  259. classes in the transition table). *)
  260. procedure addTrans(cc : CClass; FOLLOW : IntSetPtr);
  261. var
  262. i : Integer;
  263. cc0, cc1, cc2 : CClass;
  264. begin
  265. for i := state_table^[act_state].trans_lo to n_trans do
  266. if trans_table^[i].cc^=cc then
  267. begin
  268. setunion(trans_table^[i].follow_pos^, FOLLOW^);
  269. exit
  270. end
  271. else
  272. begin
  273. cc0 := cc*trans_table^[i].cc^;
  274. if cc0<>[] then
  275. begin
  276. cc1 := trans_table^[i].cc^-cc;
  277. cc2 := cc-trans_table^[i].cc^;
  278. if cc1<>[] then
  279. begin
  280. trans_table^[i].cc^ := cc1;
  281. inc(n_trans);
  282. if n_trans>max_trans then fatal(trans_table_overflow);
  283. trans_table^[n_trans].cc := newCClass(cc0);
  284. trans_table^[n_trans].follow_pos := newIntSet;
  285. trans_table^[n_trans].follow_pos^ :=
  286. trans_table^[i].follow_pos^;
  287. setunion(trans_table^[n_trans].follow_pos^, FOLLOW^);
  288. end
  289. else
  290. begin
  291. trans_table^[i].cc^ := cc0;
  292. setunion(trans_table^[i].follow_pos^, FOLLOW^);
  293. end;
  294. cc := cc2;
  295. if cc=[] then exit;
  296. end
  297. end;
  298. inc(n_trans);
  299. if n_trans>max_trans then fatal(trans_table_overflow);
  300. trans_table^[n_trans].cc := newCClass(cc);
  301. trans_table^[n_trans].follow_pos := newIntSet;
  302. trans_table^[n_trans].follow_pos^ := FOLLOW^;
  303. end(*addCharTrans*);
  304. (* comparison and swap procedures for sorting transitions: *)
  305. {$ifndef fpc}{$F+}{$endif}
  306. function transLessNextState(i, j : Integer) : Boolean;
  307. {$ifndef fpc}{$F-}{$endif}
  308. (* compare transitions based on next states (used in mergeCharTrans) *)
  309. begin
  310. transLessNextState := trans_table^[i].next_state<
  311. trans_table^[j].next_state
  312. end(*transLessNextState*);
  313. {$ifndef fpc}{$F+}{$endif}
  314. function transLess(i, j : Integer) : Boolean;
  315. {$ifndef fpc}{$F-}{$endif}
  316. (* lexical order on transitions *)
  317. var c : Char; xi, xj : Boolean;
  318. begin
  319. for c := #0 to #255 do
  320. begin
  321. xi := c in trans_table^[i].cc^;
  322. xj := c in trans_table^[j].cc^;
  323. if xi<>xj then
  324. begin
  325. transLess := ord(xi)>ord(xj);
  326. exit
  327. end;
  328. end;
  329. transLess := false
  330. end(*transLess*);
  331. {$ifndef fpc}{$F+}{$endif}
  332. procedure transSwap(i, j : Integer);
  333. {$ifndef fpc}{$F-}{$endif}
  334. (* swap transitions i and j *)
  335. var x : TransTableEntry;
  336. begin
  337. x := trans_table^[i];
  338. trans_table^[i] := trans_table^[j];
  339. trans_table^[j] := x;
  340. end(*transSwap*);
  341. procedure mergeTrans;
  342. var
  343. i, j, n_deleted : Integer;
  344. begin
  345. (* sort transitions w.r.t. next states: *)
  346. quicksort(state_table^[act_state].trans_lo,
  347. n_trans,
  348. {$ifdef fpc}@{$endif}transLessNextState,
  349. {$ifdef fpc}@{$endif}transSwap);
  350. (* merge transitions for the same next state: *)
  351. n_deleted := 0;
  352. for i := state_table^[act_state].trans_lo to n_trans do
  353. if trans_table^[i].cc<>nil then
  354. begin
  355. j := succ(i);
  356. while (j<=n_trans) and
  357. (trans_table^[i].next_state =
  358. trans_table^[j].next_state) do
  359. begin
  360. (* merge cclasses of transitions i and j, then mark
  361. transition j as deleted *)
  362. trans_table^[i].cc^ := trans_table^[i].cc^+
  363. trans_table^[j].cc^;
  364. trans_table^[j].cc := nil;
  365. inc(n_deleted);
  366. inc(j);
  367. end;
  368. end;
  369. (* remove deleted transitions: *)
  370. j := state_table^[act_state].trans_lo;
  371. for i := state_table^[act_state].trans_lo to n_trans do
  372. if trans_table^[i].cc<>nil then
  373. if i<>j then
  374. begin
  375. trans_table^[j] := trans_table^[i];
  376. inc(j);
  377. end
  378. else
  379. inc(j);
  380. (* update transition count: *)
  381. dec(n_trans, n_deleted);
  382. end(*mergeTrans*);
  383. procedure sortTrans;
  384. begin
  385. quicksort(state_table^[act_state].trans_lo,
  386. n_trans,
  387. {$ifdef fpc}@{$endif}transLess,
  388. {$ifdef fpc}@{$endif}transSwap);
  389. end(*sortTrans*);
  390. var i : Integer;
  391. begin
  392. verbose := false;
  393. optimize := false;
  394. {$IFNDEF Win32}
  395. max_bytes := memAvail;
  396. {$ENDIF}
  397. n_pos := 0;
  398. n_states := 0;
  399. n_trans := 0;
  400. n_start_states := 0;
  401. (* allocate tables: *)
  402. new(sym_table);
  403. new(pos_table);
  404. new(first_pos_table);
  405. new(state_table);
  406. new(trans_table);
  407. (* initialize symbol table: *)
  408. for i := 1 to max_keys do sym_table^[i].pname := nil;
  409. end(*LexTables*).