lextable.pas 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470
  1. {
  2. This module collects the various tables used by the Lex program:
  3. - the symbol table
  4. - the position table
  5. - the DFA states and transition tables
  6. Note: All tables are allocated dynamically (at initialization time)
  7. because of the 64KB static data limit.
  8. Copyright (c) 1990-92 Albert Graef <[email protected]>
  9. Copyright (C) 1996 Berend de Boer <[email protected]>
  10. This program is free software; you can redistribute it and/or modify
  11. it under the terms of the GNU General Public License as published by
  12. the Free Software Foundation; either version 2 of the License, or
  13. (at your option) any later version.
  14. This program is distributed in the hope that it will be useful,
  15. but WITHOUT ANY WARRANTY; without even the implied warranty of
  16. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  17. GNU General Public License for more details.
  18. You should have received a copy of the GNU General Public License
  19. along with this program; if not, write to the Free Software
  20. Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  21. $Revision: 1.3 $
  22. $Modtime: 96-08-01 10:23 $
  23. $History: LEXTABLE.PAS $
  24. *
  25. * ***************** Version 2 *****************
  26. * User: Berend Date: 96-10-10 Time: 21:16
  27. * Updated in $/Lex and Yacc/tply
  28. * Updated for protected mode, windows and Delphi 1.X and 2.X.
  29. }
  30. unit LexTable;
  31. interface
  32. uses LexBase;
  33. const
  34. (* Maximum table sizes: *)
  35. max_keys = 997; (* size of hash symbol table (prime number!) *)
  36. {$IFDEF MsDos}
  37. max_pos = 600; (* maximum number of positions *)
  38. max_states = 300; (* number of DFA states *)
  39. max_trans = 600; (* number of transitions *)
  40. max_start_states = 50; (* maximum number of user-defined start states *)
  41. {$ELSE}
  42. max_pos = 1200; (* maximum number of positions *)
  43. max_states = 600; (* number of DFA states *)
  44. max_trans = 1200; (* number of transitions *)
  45. max_start_states = 100; (* maximum number of user-defined start states *)
  46. {$ENDIF}
  47. var
  48. (* Actual table sizes: *)
  49. n_pos : Integer;
  50. n_states : Integer;
  51. n_trans : Integer;
  52. n_start_states : Integer;
  53. type
  54. (* Table data structures: *)
  55. SymTable = array [1..max_keys] of record
  56. pname : StrPtr;
  57. (* print name; empty entries are denoted by pname=nil *)
  58. case sym_type : ( none_sym, macro_sym, start_state_sym ) of
  59. macro_sym : ( subst : StrPtr );
  60. (* macro substitution *)
  61. start_state_sym : ( start_state : Integer );
  62. (* start state *)
  63. end;
  64. PosTableEntry = record
  65. follow_pos : IntSetPtr;
  66. (* set of follow positions *)
  67. case pos_type : ( char_pos, cclass_pos, mark_pos ) of
  68. char_pos : ( c : Char );
  69. (* character position *)
  70. cclass_pos : ( cc : CClassPtr );
  71. (* character class position *)
  72. mark_pos : ( rule, pos : Integer );
  73. (* mark position *)
  74. end;
  75. PosTable = array [1..max_pos] of PosTableEntry;
  76. FirstPosTable = array [0..2*max_start_states+1] of IntSetPtr;
  77. (* first positions for start states (even states
  78. are entered anywhere on the line, odd states only
  79. at the beginning of the line; states 0 and 1 denote
  80. default, states 2..2*n_start_states+1 user-defined
  81. start states) *)
  82. StateTableEntry = record
  83. state_pos : IntSetPtr;
  84. (* positions covered by state *)
  85. final : Boolean;
  86. (* final state? *)
  87. trans_lo,
  88. trans_hi : Integer;
  89. (* transitions *)
  90. end;
  91. StateTable = array [0..max_states-1] of StateTableEntry;
  92. TransTableEntry = record
  93. cc : CClassPtr;
  94. (* characters of transition *)
  95. follow_pos : IntSetPtr;
  96. (* follow positions (positions of next state) *)
  97. next_state : Integer;
  98. (* next state *)
  99. end;
  100. TransTable = array [1..max_trans] of TransTableEntry;
  101. var
  102. verbose : Boolean; (* status of the verbose option *)
  103. optimize : Boolean; (* status of the optimization option *)
  104. sym_table : ^SymTable; (* symbol table *)
  105. pos_table : ^PosTable; (* position table *)
  106. first_pos_table : ^FirstPosTable; (* first positions table *)
  107. state_table : ^StateTable; (* DFA state table *)
  108. trans_table : ^TransTable; (* DFA transition table *)
  109. (* Operations: *)
  110. (* Hash symbol table:
  111. The following routines are supplied to be used with the generic hash table
  112. routines in LexBase. *)
  113. function lookup(k : Integer) : String;
  114. (* print name of symbol no. k *)
  115. procedure entry(k : Integer; symbol : String);
  116. (* enter symbol into table *)
  117. (* Routines to build the position table: *)
  118. procedure addCharPos(c : Char);
  119. procedure addCClassPos(cc : CClassPtr);
  120. procedure addMarkPos(rule, pos : Integer);
  121. (* Positions are allocated in the order of calls to addCharPos, addCClassPos
  122. and addMarkPos, starting at position 1. These routines also initialize
  123. the corresponding follow sets. *)
  124. (* Routines to build the state table: *)
  125. var act_state : Integer; (* state currently considered *)
  126. function newState(POS : IntSetPtr) : Integer;
  127. (* Add a new state with the given position set; initialize the state's
  128. position set to POS (the offsets into the transition table are
  129. initialized when the state becomes active, see startStateTrans, below).
  130. Returns: the new state number *)
  131. function addState(POS : IntSetPtr) : Integer;
  132. (* add a new state, but only if there is not already a state with the
  133. same position set *)
  134. procedure startStateTrans;
  135. procedure endStateTrans;
  136. (* initializes act_state's first and last offsets into the transition
  137. table *)
  138. function n_state_trans(i : Integer) : Integer;
  139. (* return number of transitions in state i *)
  140. procedure addTrans(cc : CClass; FOLLOW : IntSetPtr);
  141. (* adds a transition to the table *)
  142. procedure mergeTrans;
  143. (* sorts transitions w.r.t. next states and merges transitions for the
  144. same next state in the active state *)
  145. procedure sortTrans;
  146. (* sort transitions in act_state lexicographically *)
  147. implementation
  148. uses LexMsgs;
  149. (* Hash table routines: *)
  150. function lookup(k : Integer) : String;
  151. begin
  152. with sym_table^[k] do
  153. if pname=nil then
  154. lookup := ''
  155. else
  156. lookup := copy(pname^, 1, length(pname^))
  157. end(*lookup*);
  158. procedure entry(k : Integer; symbol : String);
  159. begin
  160. with sym_table^[k] do
  161. begin
  162. pname := newStr(symbol);
  163. sym_type := none_sym;
  164. end
  165. end(*entry*);
  166. (* Routines to build the position table: *)
  167. procedure addCharPos(c : Char);
  168. begin
  169. inc(n_pos);
  170. if n_pos>max_pos then fatal(pos_table_overflow);
  171. pos_table^[n_pos].follow_pos := newIntSet;
  172. pos_table^[n_pos].pos_type := char_pos;
  173. pos_table^[n_pos].c := c;
  174. end(*addCharPos*);
  175. procedure addCClassPos(cc : CClassPtr);
  176. begin
  177. inc(n_pos);
  178. if n_pos>max_pos then fatal(pos_table_overflow);
  179. pos_table^[n_pos].follow_pos := newIntSet;
  180. pos_table^[n_pos].pos_type := cclass_pos;
  181. pos_table^[n_pos].cc := cc;
  182. end(*addCClassPos*);
  183. procedure addMarkPos(rule, pos : Integer);
  184. begin
  185. inc(n_pos);
  186. if n_pos>max_pos then fatal(pos_table_overflow);
  187. pos_table^[n_pos].follow_pos := newIntSet;
  188. pos_table^[n_pos].pos_type := mark_pos;
  189. pos_table^[n_pos].rule := rule;
  190. pos_table^[n_pos].pos := pos;
  191. end(*addMarkPos*);
  192. (* Routines to build the state table: *)
  193. function newState(POS : IntSetPtr) : Integer;
  194. begin
  195. if n_states>=max_states then fatal(state_table_overflow);
  196. newState := n_states;
  197. with state_table^[n_states] do
  198. begin
  199. state_pos := POS;
  200. final := false;
  201. end;
  202. inc(n_states);
  203. end(*newState*);
  204. function addState(POS : IntSetPtr) : Integer;
  205. var i : Integer;
  206. begin
  207. for i := 0 to pred(n_states) do
  208. if equal(POS^, state_table^[i].state_pos^) then
  209. begin
  210. addState := i;
  211. exit;
  212. end;
  213. addState := newState(POS);
  214. end(*addState*);
  215. procedure startStateTrans;
  216. begin
  217. state_table^[act_state].trans_lo := succ(n_trans);
  218. end(*startStateTrans*);
  219. procedure endStateTrans;
  220. begin
  221. state_table^[act_state].trans_hi := n_trans;
  222. end(*endStateTrans*);
  223. function n_state_trans(i : Integer) : Integer;
  224. begin
  225. with state_table^[i] do
  226. n_state_trans := trans_hi-trans_lo+1
  227. end(*n_state_trans*);
  228. (* Construction of the transition table:
  229. This implementation here uses a simple optimization which tries to avoid
  230. the construction of different transitions for each individual character
  231. in large character classes by MERGING transitions whenever possible. The
  232. transitions, at any time, will be partitioned into transitions on disjoint
  233. character classes. When adding a new transition on character class cc, we
  234. repartition the transitions as follows:
  235. 1. If the current character class cc equals an existing one, we can
  236. simply add the new follow set to the existing one.
  237. 2. Otherwise, for some existing transition on some character class
  238. cc1 with cc*cc1<>[], we replace the existing transition by a new
  239. transition on cc*cc1 with follow set = cc1's follow set + cc's follow
  240. set, and, if necessary (i.e. if cc1-cc is nonempty), a transition on
  241. cc1-cc with follow set = cc1's follow set. We then remove the elements
  242. of cc1 from cc, and proceed again with step 1.
  243. We may stop this process as soon as cc becomes empty (then all characters
  244. in cc have been distributed among the existing partitions). If cc does
  245. NOT become empty, we have to construct a new transition for the remaining
  246. character class (which then will be disjoint from all other character
  247. classes in the transition table). *)
  248. procedure addTrans(cc : CClass; FOLLOW : IntSetPtr);
  249. var
  250. i : Integer;
  251. cc0, cc1, cc2 : CClass;
  252. begin
  253. for i := state_table^[act_state].trans_lo to n_trans do
  254. if trans_table^[i].cc^=cc then
  255. begin
  256. setunion(trans_table^[i].follow_pos^, FOLLOW^);
  257. exit
  258. end
  259. else
  260. begin
  261. cc0 := cc*trans_table^[i].cc^;
  262. if cc0<>[] then
  263. begin
  264. cc1 := trans_table^[i].cc^-cc;
  265. cc2 := cc-trans_table^[i].cc^;
  266. if cc1<>[] then
  267. begin
  268. trans_table^[i].cc^ := cc1;
  269. inc(n_trans);
  270. if n_trans>max_trans then fatal(trans_table_overflow);
  271. trans_table^[n_trans].cc := newCClass(cc0);
  272. trans_table^[n_trans].follow_pos := newIntSet;
  273. trans_table^[n_trans].follow_pos^ :=
  274. trans_table^[i].follow_pos^;
  275. setunion(trans_table^[n_trans].follow_pos^, FOLLOW^);
  276. end
  277. else
  278. begin
  279. trans_table^[i].cc^ := cc0;
  280. setunion(trans_table^[i].follow_pos^, FOLLOW^);
  281. end;
  282. cc := cc2;
  283. if cc=[] then exit;
  284. end
  285. end;
  286. inc(n_trans);
  287. if n_trans>max_trans then fatal(trans_table_overflow);
  288. trans_table^[n_trans].cc := newCClass(cc);
  289. trans_table^[n_trans].follow_pos := newIntSet;
  290. trans_table^[n_trans].follow_pos^ := FOLLOW^;
  291. end(*addCharTrans*);
  292. (* comparison and swap procedures for sorting transitions: *)
  293. {$ifndef fpc}{$F+}{$endif}
  294. function transLessNextState(i, j : Integer) : Boolean;
  295. {$ifndef fpc}{$F-}{$endif}
  296. (* compare transitions based on next states (used in mergeCharTrans) *)
  297. begin
  298. transLessNextState := trans_table^[i].next_state<
  299. trans_table^[j].next_state
  300. end(*transLessNextState*);
  301. {$ifndef fpc}{$F+}{$endif}
  302. function transLess(i, j : Integer) : Boolean;
  303. {$ifndef fpc}{$F-}{$endif}
  304. (* lexical order on transitions *)
  305. var c : Char; xi, xj : Boolean;
  306. begin
  307. for c := #0 to #255 do
  308. begin
  309. xi := c in trans_table^[i].cc^;
  310. xj := c in trans_table^[j].cc^;
  311. if xi<>xj then
  312. begin
  313. transLess := ord(xi)>ord(xj);
  314. exit
  315. end;
  316. end;
  317. transLess := false
  318. end(*transLess*);
  319. {$ifndef fpc}{$F+}{$endif}
  320. procedure transSwap(i, j : Integer);
  321. {$ifndef fpc}{$F-}{$endif}
  322. (* swap transitions i and j *)
  323. var x : TransTableEntry;
  324. begin
  325. x := trans_table^[i];
  326. trans_table^[i] := trans_table^[j];
  327. trans_table^[j] := x;
  328. end(*transSwap*);
  329. procedure mergeTrans;
  330. var
  331. i, j, n_deleted : Integer;
  332. begin
  333. (* sort transitions w.r.t. next states: *)
  334. quicksort(state_table^[act_state].trans_lo,
  335. n_trans,
  336. {$ifdef fpc}@{$endif}transLessNextState,
  337. {$ifdef fpc}@{$endif}transSwap);
  338. (* merge transitions for the same next state: *)
  339. n_deleted := 0;
  340. for i := state_table^[act_state].trans_lo to n_trans do
  341. if trans_table^[i].cc<>nil then
  342. begin
  343. j := succ(i);
  344. while (j<=n_trans) and
  345. (trans_table^[i].next_state =
  346. trans_table^[j].next_state) do
  347. begin
  348. (* merge cclasses of transitions i and j, then mark
  349. transition j as deleted *)
  350. trans_table^[i].cc^ := trans_table^[i].cc^+
  351. trans_table^[j].cc^;
  352. trans_table^[j].cc := nil;
  353. inc(n_deleted);
  354. inc(j);
  355. end;
  356. end;
  357. (* remove deleted transitions: *)
  358. j := state_table^[act_state].trans_lo;
  359. for i := state_table^[act_state].trans_lo to n_trans do
  360. if trans_table^[i].cc<>nil then
  361. if i<>j then
  362. begin
  363. trans_table^[j] := trans_table^[i];
  364. inc(j);
  365. end
  366. else
  367. inc(j);
  368. (* update transition count: *)
  369. dec(n_trans, n_deleted);
  370. end(*mergeTrans*);
  371. procedure sortTrans;
  372. begin
  373. quicksort(state_table^[act_state].trans_lo,
  374. n_trans,
  375. {$ifdef fpc}@{$endif}transLess,
  376. {$ifdef fpc}@{$endif}transSwap);
  377. end(*sortTrans*);
  378. var i : Integer;
  379. begin
  380. verbose := false;
  381. optimize := false;
  382. n_pos := 0;
  383. n_states := 0;
  384. n_trans := 0;
  385. n_start_states := 0;
  386. (* allocate tables: *)
  387. new(sym_table);
  388. new(pos_table);
  389. new(first_pos_table);
  390. new(state_table);
  391. new(trans_table);
  392. (* initialize symbol table: *)
  393. for i := 1 to max_keys do sym_table^[i].pname := nil;
  394. end(*LexTables*).