lextable.pas 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474
  1. {
  2. This module collects the various tables used by the Lex program:
  3. - the symbol table
  4. - the position table
  5. - the DFA states and transition tables
  6. Note: All tables are allocated dynamically (at initialization time)
  7. because of the 64KB static data limit.
  8. Copyright (c) 1990-92 Albert Graef <[email protected]>
  9. Copyright (C) 1996 Berend de Boer <[email protected]>
  10. This program is free software; you can redistribute it and/or modify
  11. it under the terms of the GNU General Public License as published by
  12. the Free Software Foundation; either version 2 of the License, or
  13. (at your option) any later version.
  14. This program is distributed in the hope that it will be useful,
  15. but WITHOUT ANY WARRANTY; without even the implied warranty of
  16. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  17. GNU General Public License for more details.
  18. You should have received a copy of the GNU General Public License
  19. along with this program; if not, write to the Free Software
  20. Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  21. $Revision: 1.3 $
  22. $Modtime: 96-08-01 10:23 $
  23. $History: LEXTABLE.PAS $
  24. *
  25. * ***************** Version 2 *****************
  26. * User: Berend Date: 96-10-10 Time: 21:16
  27. * Updated in $/Lex and Yacc/tply
  28. * Updated for protected mode, windows and Delphi 1.X and 2.X.
  29. }
  30. unit LexTable;
  31. interface
  32. uses LexBase;
  33. const
  34. (* Maximum table sizes: *)
  35. max_keys = 997; (* size of hash symbol table (prime number!) *)
  36. {$IFDEF MsDos}
  37. max_pos = 600; (* maximum number of positions *)
  38. max_states = 300; (* number of DFA states *)
  39. max_trans = 600; (* number of transitions *)
  40. max_start_states = 50; (* maximum number of user-defined start states *)
  41. {$ELSE}
  42. max_pos = 1200; (* maximum number of positions *)
  43. max_states = 600; (* number of DFA states *)
  44. max_trans = 1200; (* number of transitions *)
  45. max_start_states = 100; (* maximum number of user-defined start states *)
  46. {$ENDIF}
  47. var
  48. (* Actual table sizes: *)
  49. n_pos : Integer;
  50. n_states : Integer;
  51. n_trans : Integer;
  52. n_start_states : Integer;
  53. type
  54. (* Table data structures: *)
  55. SymTable = array [1..max_keys] of record
  56. pname : StrPtr;
  57. (* print name; empty entries are denoted by pname=nil *)
  58. case sym_type : ( none_sym, macro_sym, start_state_sym ) of
  59. macro_sym : ( subst : StrPtr );
  60. (* macro substitution *)
  61. start_state_sym : ( start_state : Integer );
  62. (* start state *)
  63. end;
  64. PosTableEntry = record
  65. follow_pos : IntSetPtr;
  66. (* set of follow positions *)
  67. case pos_type : ( char_pos, cclass_pos, mark_pos ) of
  68. char_pos : ( c : Char );
  69. (* character position *)
  70. cclass_pos : ( cc : CClassPtr );
  71. (* character class position *)
  72. mark_pos : ( rule, pos : Integer );
  73. (* mark position *)
  74. end;
  75. PosTable = array [1..max_pos] of PosTableEntry;
  76. FirstPosTable = array [0..2*max_start_states+1] of IntSetPtr;
  77. (* first positions for start states (even states
  78. are entered anywhere on the line, odd states only
  79. at the beginning of the line; states 0 and 1 denote
  80. default, states 2..2*n_start_states+1 user-defined
  81. start states) *)
  82. StartStateExclusive = array[0..max_start_states] of Boolean;
  83. StateTableEntry = record
  84. state_pos : IntSetPtr;
  85. (* positions covered by state *)
  86. final : Boolean;
  87. (* final state? *)
  88. trans_lo,
  89. trans_hi : Integer;
  90. (* transitions *)
  91. end;
  92. StateTable = array [0..max_states-1] of StateTableEntry;
  93. TransTableEntry = record
  94. cc : CClassPtr;
  95. (* characters of transition *)
  96. follow_pos : IntSetPtr;
  97. (* follow positions (positions of next state) *)
  98. next_state : Integer;
  99. (* next state *)
  100. end;
  101. TransTable = array [1..max_trans] of TransTableEntry;
  102. var
  103. verbose : Boolean; (* status of the verbose option *)
  104. optimize : Boolean; (* status of the optimization option *)
  105. sym_table : ^SymTable; (* symbol table *)
  106. pos_table : ^PosTable; (* position table *)
  107. first_pos_table : ^FirstPosTable; (* first positions table *)
  108. start_excl : ^StartStateExclusive; (* user-defined start state type *)
  109. state_table : ^StateTable; (* DFA state table *)
  110. trans_table : ^TransTable; (* DFA transition table *)
  111. (* Operations: *)
  112. (* Hash symbol table:
  113. The following routines are supplied to be used with the generic hash table
  114. routines in LexBase. *)
  115. function lookup(k : Integer) : String;
  116. (* print name of symbol no. k *)
  117. procedure entry(k : Integer; symbol : String);
  118. (* enter symbol into table *)
  119. (* Routines to build the position table: *)
  120. procedure addCharPos(c : Char);
  121. procedure addCClassPos(cc : CClassPtr);
  122. procedure addMarkPos(rule, pos : Integer);
  123. (* Positions are allocated in the order of calls to addCharPos, addCClassPos
  124. and addMarkPos, starting at position 1. These routines also initialize
  125. the corresponding follow sets. *)
  126. (* Routines to build the state table: *)
  127. var act_state : Integer; (* state currently considered *)
  128. function newState(POS : IntSetPtr) : Integer;
  129. (* Add a new state with the given position set; initialize the state's
  130. position set to POS (the offsets into the transition table are
  131. initialized when the state becomes active, see startStateTrans, below).
  132. Returns: the new state number *)
  133. function addState(POS : IntSetPtr) : Integer;
  134. (* add a new state, but only if there is not already a state with the
  135. same position set *)
  136. procedure startStateTrans;
  137. procedure endStateTrans;
  138. (* initializes act_state's first and last offsets into the transition
  139. table *)
  140. function n_state_trans(i : Integer) : Integer;
  141. (* return number of transitions in state i *)
  142. procedure addTrans(cc : CClass; FOLLOW : IntSetPtr);
  143. (* adds a transition to the table *)
  144. procedure mergeTrans;
  145. (* sorts transitions w.r.t. next states and merges transitions for the
  146. same next state in the active state *)
  147. procedure sortTrans;
  148. (* sort transitions in act_state lexicographically *)
  149. implementation
  150. uses LexMsgs;
  151. (* Hash table routines: *)
  152. function lookup(k : Integer) : String;
  153. begin
  154. with sym_table^[k] do
  155. if pname=nil then
  156. lookup := ''
  157. else
  158. lookup := copy(pname^, 1, length(pname^))
  159. end(*lookup*);
  160. procedure entry(k : Integer; symbol : String);
  161. begin
  162. with sym_table^[k] do
  163. begin
  164. pname := newStr(symbol);
  165. sym_type := none_sym;
  166. end
  167. end(*entry*);
  168. (* Routines to build the position table: *)
  169. procedure addCharPos(c : Char);
  170. begin
  171. inc(n_pos);
  172. if n_pos>max_pos then fatal(pos_table_overflow);
  173. pos_table^[n_pos].follow_pos := newIntSet;
  174. pos_table^[n_pos].pos_type := char_pos;
  175. pos_table^[n_pos].c := c;
  176. end(*addCharPos*);
  177. procedure addCClassPos(cc : CClassPtr);
  178. begin
  179. inc(n_pos);
  180. if n_pos>max_pos then fatal(pos_table_overflow);
  181. pos_table^[n_pos].follow_pos := newIntSet;
  182. pos_table^[n_pos].pos_type := cclass_pos;
  183. pos_table^[n_pos].cc := cc;
  184. end(*addCClassPos*);
  185. procedure addMarkPos(rule, pos : Integer);
  186. begin
  187. inc(n_pos);
  188. if n_pos>max_pos then fatal(pos_table_overflow);
  189. pos_table^[n_pos].follow_pos := newIntSet;
  190. pos_table^[n_pos].pos_type := mark_pos;
  191. pos_table^[n_pos].rule := rule;
  192. pos_table^[n_pos].pos := pos;
  193. end(*addMarkPos*);
  194. (* Routines to build the state table: *)
  195. function newState(POS : IntSetPtr) : Integer;
  196. begin
  197. if n_states>=max_states then fatal(state_table_overflow);
  198. newState := n_states;
  199. with state_table^[n_states] do
  200. begin
  201. state_pos := POS;
  202. final := false;
  203. end;
  204. inc(n_states);
  205. end(*newState*);
  206. function addState(POS : IntSetPtr) : Integer;
  207. var i : Integer;
  208. begin
  209. for i := 0 to pred(n_states) do
  210. if equal(POS^, state_table^[i].state_pos^) then
  211. begin
  212. addState := i;
  213. exit;
  214. end;
  215. addState := newState(POS);
  216. end(*addState*);
  217. procedure startStateTrans;
  218. begin
  219. state_table^[act_state].trans_lo := succ(n_trans);
  220. end(*startStateTrans*);
  221. procedure endStateTrans;
  222. begin
  223. state_table^[act_state].trans_hi := n_trans;
  224. end(*endStateTrans*);
  225. function n_state_trans(i : Integer) : Integer;
  226. begin
  227. with state_table^[i] do
  228. n_state_trans := trans_hi-trans_lo+1
  229. end(*n_state_trans*);
  230. (* Construction of the transition table:
  231. This implementation here uses a simple optimization which tries to avoid
  232. the construction of different transitions for each individual character
  233. in large character classes by MERGING transitions whenever possible. The
  234. transitions, at any time, will be partitioned into transitions on disjoint
  235. character classes. When adding a new transition on character class cc, we
  236. repartition the transitions as follows:
  237. 1. If the current character class cc equals an existing one, we can
  238. simply add the new follow set to the existing one.
  239. 2. Otherwise, for some existing transition on some character class
  240. cc1 with cc*cc1<>[], we replace the existing transition by a new
  241. transition on cc*cc1 with follow set = cc1's follow set + cc's follow
  242. set, and, if necessary (i.e. if cc1-cc is nonempty), a transition on
  243. cc1-cc with follow set = cc1's follow set. We then remove the elements
  244. of cc1 from cc, and proceed again with step 1.
  245. We may stop this process as soon as cc becomes empty (then all characters
  246. in cc have been distributed among the existing partitions). If cc does
  247. NOT become empty, we have to construct a new transition for the remaining
  248. character class (which then will be disjoint from all other character
  249. classes in the transition table). *)
  250. procedure addTrans(cc : CClass; FOLLOW : IntSetPtr);
  251. var
  252. i : Integer;
  253. cc0, cc1, cc2 : CClass;
  254. begin
  255. for i := state_table^[act_state].trans_lo to n_trans do
  256. if trans_table^[i].cc^=cc then
  257. begin
  258. setunion(trans_table^[i].follow_pos^, FOLLOW^);
  259. exit
  260. end
  261. else
  262. begin
  263. cc0 := cc*trans_table^[i].cc^;
  264. if cc0<>[] then
  265. begin
  266. cc1 := trans_table^[i].cc^-cc;
  267. cc2 := cc-trans_table^[i].cc^;
  268. if cc1<>[] then
  269. begin
  270. trans_table^[i].cc^ := cc1;
  271. inc(n_trans);
  272. if n_trans>max_trans then fatal(trans_table_overflow);
  273. trans_table^[n_trans].cc := newCClass(cc0);
  274. trans_table^[n_trans].follow_pos := newIntSet;
  275. trans_table^[n_trans].follow_pos^ :=
  276. trans_table^[i].follow_pos^;
  277. setunion(trans_table^[n_trans].follow_pos^, FOLLOW^);
  278. end
  279. else
  280. begin
  281. trans_table^[i].cc^ := cc0;
  282. setunion(trans_table^[i].follow_pos^, FOLLOW^);
  283. end;
  284. cc := cc2;
  285. if cc=[] then exit;
  286. end
  287. end;
  288. inc(n_trans);
  289. if n_trans>max_trans then fatal(trans_table_overflow);
  290. trans_table^[n_trans].cc := newCClass(cc);
  291. trans_table^[n_trans].follow_pos := newIntSet;
  292. trans_table^[n_trans].follow_pos^ := FOLLOW^;
  293. end(*addCharTrans*);
  294. (* comparison and swap procedures for sorting transitions: *)
  295. {$ifndef fpc}{$F+}{$endif}
  296. function transLessNextState(i, j : Integer) : Boolean;
  297. {$ifndef fpc}{$F-}{$endif}
  298. (* compare transitions based on next states (used in mergeCharTrans) *)
  299. begin
  300. transLessNextState := trans_table^[i].next_state<
  301. trans_table^[j].next_state
  302. end(*transLessNextState*);
  303. {$ifndef fpc}{$F+}{$endif}
  304. function transLess(i, j : Integer) : Boolean;
  305. {$ifndef fpc}{$F-}{$endif}
  306. (* lexical order on transitions *)
  307. var c : Char; xi, xj : Boolean;
  308. begin
  309. for c := #0 to #255 do
  310. begin
  311. xi := c in trans_table^[i].cc^;
  312. xj := c in trans_table^[j].cc^;
  313. if xi<>xj then
  314. begin
  315. transLess := ord(xi)>ord(xj);
  316. exit
  317. end;
  318. end;
  319. transLess := false
  320. end(*transLess*);
  321. {$ifndef fpc}{$F+}{$endif}
  322. procedure transSwap(i, j : Integer);
  323. {$ifndef fpc}{$F-}{$endif}
  324. (* swap transitions i and j *)
  325. var x : TransTableEntry;
  326. begin
  327. x := trans_table^[i];
  328. trans_table^[i] := trans_table^[j];
  329. trans_table^[j] := x;
  330. end(*transSwap*);
  331. procedure mergeTrans;
  332. var
  333. i, j, n_deleted : Integer;
  334. begin
  335. (* sort transitions w.r.t. next states: *)
  336. quicksort(state_table^[act_state].trans_lo,
  337. n_trans,
  338. {$ifdef fpc}@{$endif}transLessNextState,
  339. {$ifdef fpc}@{$endif}transSwap);
  340. (* merge transitions for the same next state: *)
  341. n_deleted := 0;
  342. for i := state_table^[act_state].trans_lo to n_trans do
  343. if trans_table^[i].cc<>nil then
  344. begin
  345. j := succ(i);
  346. while (j<=n_trans) and
  347. (trans_table^[i].next_state =
  348. trans_table^[j].next_state) do
  349. begin
  350. (* merge cclasses of transitions i and j, then mark
  351. transition j as deleted *)
  352. trans_table^[i].cc^ := trans_table^[i].cc^+
  353. trans_table^[j].cc^;
  354. trans_table^[j].cc := nil;
  355. inc(n_deleted);
  356. inc(j);
  357. end;
  358. end;
  359. (* remove deleted transitions: *)
  360. j := state_table^[act_state].trans_lo;
  361. for i := state_table^[act_state].trans_lo to n_trans do
  362. if trans_table^[i].cc<>nil then
  363. if i<>j then
  364. begin
  365. trans_table^[j] := trans_table^[i];
  366. inc(j);
  367. end
  368. else
  369. inc(j);
  370. (* update transition count: *)
  371. dec(n_trans, n_deleted);
  372. end(*mergeTrans*);
  373. procedure sortTrans;
  374. begin
  375. quicksort(state_table^[act_state].trans_lo,
  376. n_trans,
  377. {$ifdef fpc}@{$endif}transLess,
  378. {$ifdef fpc}@{$endif}transSwap);
  379. end(*sortTrans*);
  380. var i : Integer;
  381. begin
  382. verbose := false;
  383. optimize := false;
  384. n_pos := 0;
  385. n_states := 0;
  386. n_trans := 0;
  387. n_start_states := 0;
  388. (* allocate tables: *)
  389. new(sym_table);
  390. new(pos_table);
  391. new(first_pos_table);
  392. new(start_excl);
  393. new(state_table);
  394. new(trans_table);
  395. (* initialize symbol table: *)
  396. for i := 1 to max_keys do sym_table^[i].pname := nil;
  397. end(*LexTables*).